diff --git "a/checkpoint-13602/trainer_state.json" "b/checkpoint-13602/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-13602/trainer_state.json" @@ -0,0 +1,95247 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 13602, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 7.351860020585208e-05, + "grad_norm": 2.7453043460845947, + "learning_rate": 5.0000000000000004e-08, + "loss": 0.9882, + "step": 1 + }, + { + "epoch": 0.00014703720041170417, + "grad_norm": 2.726182222366333, + "learning_rate": 1.0000000000000001e-07, + "loss": 1.0532, + "step": 2 + }, + { + "epoch": 0.00022055580061755624, + "grad_norm": 2.7549703121185303, + "learning_rate": 1.5000000000000002e-07, + "loss": 1.0044, + "step": 3 + }, + { + "epoch": 0.00029407440082340834, + "grad_norm": 2.8669583797454834, + "learning_rate": 2.0000000000000002e-07, + "loss": 1.0539, + "step": 4 + }, + { + "epoch": 0.0003675930010292604, + "grad_norm": 2.7171525955200195, + "learning_rate": 2.5000000000000004e-07, + "loss": 1.0581, + "step": 5 + }, + { + "epoch": 0.0004411116012351125, + "grad_norm": 2.9201908111572266, + "learning_rate": 3.0000000000000004e-07, + "loss": 1.0992, + "step": 6 + }, + { + "epoch": 0.0005146302014409646, + "grad_norm": 2.7603566646575928, + "learning_rate": 3.5000000000000004e-07, + "loss": 1.0895, + "step": 7 + }, + { + "epoch": 0.0005881488016468167, + "grad_norm": 2.755460739135742, + "learning_rate": 4.0000000000000003e-07, + "loss": 1.0655, + "step": 8 + }, + { + "epoch": 0.0006616674018526688, + "grad_norm": 2.7305805683135986, + "learning_rate": 4.5000000000000003e-07, + "loss": 1.0685, + "step": 9 + }, + { + "epoch": 0.0007351860020585208, + "grad_norm": 2.8166794776916504, + "learning_rate": 5.000000000000001e-07, + "loss": 1.0462, + "step": 10 + }, + { + "epoch": 0.0008087046022643729, + "grad_norm": 2.5454304218292236, + "learning_rate": 5.5e-07, + "loss": 0.9612, + "step": 11 + }, + { + "epoch": 0.000882223202470225, + "grad_norm": 2.8109467029571533, + "learning_rate": 6.000000000000001e-07, + "loss": 1.0261, + "step": 12 + }, + { + "epoch": 0.000955741802676077, + "grad_norm": 2.6827304363250732, + "learning_rate": 6.5e-07, + "loss": 1.045, + "step": 13 + }, + { + "epoch": 0.0010292604028819291, + "grad_norm": 2.670419454574585, + "learning_rate": 7.000000000000001e-07, + "loss": 1.0623, + "step": 14 + }, + { + "epoch": 0.0011027790030877812, + "grad_norm": 2.6076502799987793, + "learning_rate": 7.5e-07, + "loss": 1.0422, + "step": 15 + }, + { + "epoch": 0.0011762976032936333, + "grad_norm": 2.59539532661438, + "learning_rate": 8.000000000000001e-07, + "loss": 0.9873, + "step": 16 + }, + { + "epoch": 0.0012498162034994854, + "grad_norm": 2.8516616821289062, + "learning_rate": 8.500000000000001e-07, + "loss": 1.0707, + "step": 17 + }, + { + "epoch": 0.0013233348037053375, + "grad_norm": 2.6040961742401123, + "learning_rate": 9.000000000000001e-07, + "loss": 1.0069, + "step": 18 + }, + { + "epoch": 0.0013968534039111896, + "grad_norm": 2.38720965385437, + "learning_rate": 9.500000000000001e-07, + "loss": 0.9712, + "step": 19 + }, + { + "epoch": 0.0014703720041170415, + "grad_norm": 2.5276007652282715, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.9616, + "step": 20 + }, + { + "epoch": 0.0015438906043228936, + "grad_norm": 2.4871504306793213, + "learning_rate": 1.0500000000000001e-06, + "loss": 1.0097, + "step": 21 + }, + { + "epoch": 0.0016174092045287457, + "grad_norm": 2.499459743499756, + "learning_rate": 1.1e-06, + "loss": 1.0623, + "step": 22 + }, + { + "epoch": 0.0016909278047345978, + "grad_norm": 2.2660908699035645, + "learning_rate": 1.1500000000000002e-06, + "loss": 1.0279, + "step": 23 + }, + { + "epoch": 0.00176444640494045, + "grad_norm": 2.0320932865142822, + "learning_rate": 1.2000000000000002e-06, + "loss": 0.932, + "step": 24 + }, + { + "epoch": 0.001837965005146302, + "grad_norm": 2.3445448875427246, + "learning_rate": 1.25e-06, + "loss": 1.0477, + "step": 25 + }, + { + "epoch": 0.001911483605352154, + "grad_norm": 2.001228094100952, + "learning_rate": 1.3e-06, + "loss": 0.9733, + "step": 26 + }, + { + "epoch": 0.001985002205558006, + "grad_norm": 1.8723827600479126, + "learning_rate": 1.3500000000000002e-06, + "loss": 0.9529, + "step": 27 + }, + { + "epoch": 0.0020585208057638583, + "grad_norm": 1.835139274597168, + "learning_rate": 1.4000000000000001e-06, + "loss": 0.9742, + "step": 28 + }, + { + "epoch": 0.00213203940596971, + "grad_norm": 1.833482027053833, + "learning_rate": 1.45e-06, + "loss": 0.9683, + "step": 29 + }, + { + "epoch": 0.0022055580061755625, + "grad_norm": 1.6917386054992676, + "learning_rate": 1.5e-06, + "loss": 0.9799, + "step": 30 + }, + { + "epoch": 0.0022790766063814144, + "grad_norm": 1.7054415941238403, + "learning_rate": 1.5500000000000002e-06, + "loss": 0.9953, + "step": 31 + }, + { + "epoch": 0.0023525952065872667, + "grad_norm": 1.5337066650390625, + "learning_rate": 1.6000000000000001e-06, + "loss": 0.9558, + "step": 32 + }, + { + "epoch": 0.0024261138067931186, + "grad_norm": 1.4918959140777588, + "learning_rate": 1.6500000000000003e-06, + "loss": 0.9345, + "step": 33 + }, + { + "epoch": 0.002499632406998971, + "grad_norm": 1.5619447231292725, + "learning_rate": 1.7000000000000002e-06, + "loss": 0.9831, + "step": 34 + }, + { + "epoch": 0.0025731510072048228, + "grad_norm": 1.4436485767364502, + "learning_rate": 1.75e-06, + "loss": 0.9715, + "step": 35 + }, + { + "epoch": 0.002646669607410675, + "grad_norm": 1.486677646636963, + "learning_rate": 1.8000000000000001e-06, + "loss": 0.9248, + "step": 36 + }, + { + "epoch": 0.002720188207616527, + "grad_norm": 1.4541383981704712, + "learning_rate": 1.85e-06, + "loss": 0.9865, + "step": 37 + }, + { + "epoch": 0.0027937068078223793, + "grad_norm": 1.3482491970062256, + "learning_rate": 1.9000000000000002e-06, + "loss": 0.9094, + "step": 38 + }, + { + "epoch": 0.002867225408028231, + "grad_norm": 1.3040587902069092, + "learning_rate": 1.9500000000000004e-06, + "loss": 0.9425, + "step": 39 + }, + { + "epoch": 0.002940744008234083, + "grad_norm": 1.329936146736145, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.9698, + "step": 40 + }, + { + "epoch": 0.0030142626084399353, + "grad_norm": 1.3117034435272217, + "learning_rate": 2.05e-06, + "loss": 0.9588, + "step": 41 + }, + { + "epoch": 0.0030877812086457872, + "grad_norm": 1.3280625343322754, + "learning_rate": 2.1000000000000002e-06, + "loss": 0.9729, + "step": 42 + }, + { + "epoch": 0.0031612998088516395, + "grad_norm": 1.2921775579452515, + "learning_rate": 2.15e-06, + "loss": 0.9033, + "step": 43 + }, + { + "epoch": 0.0032348184090574914, + "grad_norm": 1.2548478841781616, + "learning_rate": 2.2e-06, + "loss": 0.9125, + "step": 44 + }, + { + "epoch": 0.0033083370092633437, + "grad_norm": 1.218864917755127, + "learning_rate": 2.25e-06, + "loss": 0.9118, + "step": 45 + }, + { + "epoch": 0.0033818556094691956, + "grad_norm": 1.3572269678115845, + "learning_rate": 2.3000000000000004e-06, + "loss": 0.969, + "step": 46 + }, + { + "epoch": 0.003455374209675048, + "grad_norm": 1.300675868988037, + "learning_rate": 2.35e-06, + "loss": 0.8731, + "step": 47 + }, + { + "epoch": 0.0035288928098809, + "grad_norm": 1.3196706771850586, + "learning_rate": 2.4000000000000003e-06, + "loss": 0.9161, + "step": 48 + }, + { + "epoch": 0.003602411410086752, + "grad_norm": 1.2298756837844849, + "learning_rate": 2.4500000000000003e-06, + "loss": 0.8976, + "step": 49 + }, + { + "epoch": 0.003675930010292604, + "grad_norm": 1.2790957689285278, + "learning_rate": 2.5e-06, + "loss": 0.8716, + "step": 50 + }, + { + "epoch": 0.0037494486104984563, + "grad_norm": 1.1932955980300903, + "learning_rate": 2.55e-06, + "loss": 0.8177, + "step": 51 + }, + { + "epoch": 0.003822967210704308, + "grad_norm": 1.0490808486938477, + "learning_rate": 2.6e-06, + "loss": 0.7913, + "step": 52 + }, + { + "epoch": 0.00389648581091016, + "grad_norm": 1.0988456010818481, + "learning_rate": 2.6500000000000005e-06, + "loss": 0.867, + "step": 53 + }, + { + "epoch": 0.003970004411116012, + "grad_norm": 1.148760199546814, + "learning_rate": 2.7000000000000004e-06, + "loss": 0.8972, + "step": 54 + }, + { + "epoch": 0.004043523011321865, + "grad_norm": 1.1270138025283813, + "learning_rate": 2.7500000000000004e-06, + "loss": 0.8791, + "step": 55 + }, + { + "epoch": 0.004117041611527717, + "grad_norm": 1.1520501375198364, + "learning_rate": 2.8000000000000003e-06, + "loss": 0.8627, + "step": 56 + }, + { + "epoch": 0.0041905602117335685, + "grad_norm": 1.1164336204528809, + "learning_rate": 2.85e-06, + "loss": 0.8349, + "step": 57 + }, + { + "epoch": 0.00426407881193942, + "grad_norm": 1.0426692962646484, + "learning_rate": 2.9e-06, + "loss": 0.8442, + "step": 58 + }, + { + "epoch": 0.004337597412145273, + "grad_norm": 0.9961422085762024, + "learning_rate": 2.95e-06, + "loss": 0.7771, + "step": 59 + }, + { + "epoch": 0.004411116012351125, + "grad_norm": 1.1183439493179321, + "learning_rate": 3e-06, + "loss": 0.8612, + "step": 60 + }, + { + "epoch": 0.004484634612556977, + "grad_norm": 1.0372838973999023, + "learning_rate": 3.05e-06, + "loss": 0.844, + "step": 61 + }, + { + "epoch": 0.004558153212762829, + "grad_norm": 1.1020818948745728, + "learning_rate": 3.1000000000000004e-06, + "loss": 0.8357, + "step": 62 + }, + { + "epoch": 0.0046316718129686815, + "grad_norm": 1.0636529922485352, + "learning_rate": 3.1500000000000003e-06, + "loss": 0.8104, + "step": 63 + }, + { + "epoch": 0.004705190413174533, + "grad_norm": 1.0429540872573853, + "learning_rate": 3.2000000000000003e-06, + "loss": 0.8827, + "step": 64 + }, + { + "epoch": 0.004778709013380385, + "grad_norm": 1.0222766399383545, + "learning_rate": 3.2500000000000002e-06, + "loss": 0.7935, + "step": 65 + }, + { + "epoch": 0.004852227613586237, + "grad_norm": 1.104155421257019, + "learning_rate": 3.3000000000000006e-06, + "loss": 0.8354, + "step": 66 + }, + { + "epoch": 0.004925746213792089, + "grad_norm": 0.9883831739425659, + "learning_rate": 3.3500000000000005e-06, + "loss": 0.7898, + "step": 67 + }, + { + "epoch": 0.004999264813997942, + "grad_norm": 1.014737844467163, + "learning_rate": 3.4000000000000005e-06, + "loss": 0.8202, + "step": 68 + }, + { + "epoch": 0.005072783414203794, + "grad_norm": 1.0809258222579956, + "learning_rate": 3.45e-06, + "loss": 0.8395, + "step": 69 + }, + { + "epoch": 0.0051463020144096455, + "grad_norm": 0.9706105589866638, + "learning_rate": 3.5e-06, + "loss": 0.8406, + "step": 70 + }, + { + "epoch": 0.005219820614615497, + "grad_norm": 1.0306090116500854, + "learning_rate": 3.5500000000000003e-06, + "loss": 0.8133, + "step": 71 + }, + { + "epoch": 0.00529333921482135, + "grad_norm": 0.955964982509613, + "learning_rate": 3.6000000000000003e-06, + "loss": 0.7914, + "step": 72 + }, + { + "epoch": 0.005366857815027202, + "grad_norm": 0.9542300701141357, + "learning_rate": 3.65e-06, + "loss": 0.8443, + "step": 73 + }, + { + "epoch": 0.005440376415233054, + "grad_norm": 0.9511138200759888, + "learning_rate": 3.7e-06, + "loss": 0.8077, + "step": 74 + }, + { + "epoch": 0.005513895015438906, + "grad_norm": 1.0469529628753662, + "learning_rate": 3.7500000000000005e-06, + "loss": 0.8347, + "step": 75 + }, + { + "epoch": 0.0055874136156447585, + "grad_norm": 0.9994472861289978, + "learning_rate": 3.8000000000000005e-06, + "loss": 0.8151, + "step": 76 + }, + { + "epoch": 0.00566093221585061, + "grad_norm": 0.9512090086936951, + "learning_rate": 3.85e-06, + "loss": 0.7751, + "step": 77 + }, + { + "epoch": 0.005734450816056462, + "grad_norm": 0.9878868460655212, + "learning_rate": 3.900000000000001e-06, + "loss": 0.8301, + "step": 78 + }, + { + "epoch": 0.005807969416262314, + "grad_norm": 0.9258867502212524, + "learning_rate": 3.95e-06, + "loss": 0.8091, + "step": 79 + }, + { + "epoch": 0.005881488016468166, + "grad_norm": 1.0198184251785278, + "learning_rate": 4.000000000000001e-06, + "loss": 0.7624, + "step": 80 + }, + { + "epoch": 0.005955006616674019, + "grad_norm": 0.9681070446968079, + "learning_rate": 4.05e-06, + "loss": 0.8066, + "step": 81 + }, + { + "epoch": 0.006028525216879871, + "grad_norm": 0.9898563623428345, + "learning_rate": 4.1e-06, + "loss": 0.8437, + "step": 82 + }, + { + "epoch": 0.006102043817085723, + "grad_norm": 0.9947354197502136, + "learning_rate": 4.15e-06, + "loss": 0.8024, + "step": 83 + }, + { + "epoch": 0.0061755624172915745, + "grad_norm": 0.9315201640129089, + "learning_rate": 4.2000000000000004e-06, + "loss": 0.7863, + "step": 84 + }, + { + "epoch": 0.006249081017497427, + "grad_norm": 0.9657160639762878, + "learning_rate": 4.25e-06, + "loss": 0.8054, + "step": 85 + }, + { + "epoch": 0.006322599617703279, + "grad_norm": 0.996770977973938, + "learning_rate": 4.3e-06, + "loss": 0.794, + "step": 86 + }, + { + "epoch": 0.006396118217909131, + "grad_norm": 0.9504892826080322, + "learning_rate": 4.350000000000001e-06, + "loss": 0.8063, + "step": 87 + }, + { + "epoch": 0.006469636818114983, + "grad_norm": 0.9791470170021057, + "learning_rate": 4.4e-06, + "loss": 0.8103, + "step": 88 + }, + { + "epoch": 0.006543155418320836, + "grad_norm": 0.9856845140457153, + "learning_rate": 4.450000000000001e-06, + "loss": 0.7487, + "step": 89 + }, + { + "epoch": 0.0066166740185266875, + "grad_norm": 0.9999904632568359, + "learning_rate": 4.5e-06, + "loss": 0.783, + "step": 90 + }, + { + "epoch": 0.006690192618732539, + "grad_norm": 0.9612404108047485, + "learning_rate": 4.5500000000000005e-06, + "loss": 0.7633, + "step": 91 + }, + { + "epoch": 0.006763711218938391, + "grad_norm": 0.9946274161338806, + "learning_rate": 4.600000000000001e-06, + "loss": 0.7361, + "step": 92 + }, + { + "epoch": 0.006837229819144243, + "grad_norm": 1.0638426542282104, + "learning_rate": 4.65e-06, + "loss": 0.7888, + "step": 93 + }, + { + "epoch": 0.006910748419350096, + "grad_norm": 0.914172351360321, + "learning_rate": 4.7e-06, + "loss": 0.7507, + "step": 94 + }, + { + "epoch": 0.006984267019555948, + "grad_norm": 0.9401671886444092, + "learning_rate": 4.75e-06, + "loss": 0.8279, + "step": 95 + }, + { + "epoch": 0.0070577856197618, + "grad_norm": 0.9406688809394836, + "learning_rate": 4.800000000000001e-06, + "loss": 0.7661, + "step": 96 + }, + { + "epoch": 0.0071313042199676515, + "grad_norm": 0.940018355846405, + "learning_rate": 4.85e-06, + "loss": 0.7722, + "step": 97 + }, + { + "epoch": 0.007204822820173504, + "grad_norm": 0.9887327551841736, + "learning_rate": 4.9000000000000005e-06, + "loss": 0.8133, + "step": 98 + }, + { + "epoch": 0.007278341420379356, + "grad_norm": 1.0041906833648682, + "learning_rate": 4.95e-06, + "loss": 0.7866, + "step": 99 + }, + { + "epoch": 0.007351860020585208, + "grad_norm": 0.9543432593345642, + "learning_rate": 5e-06, + "loss": 0.7433, + "step": 100 + }, + { + "epoch": 0.00742537862079106, + "grad_norm": 0.935296356678009, + "learning_rate": 4.999999998143193e-06, + "loss": 0.7811, + "step": 101 + }, + { + "epoch": 0.007498897220996913, + "grad_norm": 0.9257735013961792, + "learning_rate": 4.999999992572773e-06, + "loss": 0.7988, + "step": 102 + }, + { + "epoch": 0.0075724158212027645, + "grad_norm": 1.0122402906417847, + "learning_rate": 4.999999983288742e-06, + "loss": 0.754, + "step": 103 + }, + { + "epoch": 0.007645934421408616, + "grad_norm": 1.0093228816986084, + "learning_rate": 4.999999970291096e-06, + "loss": 0.7736, + "step": 104 + }, + { + "epoch": 0.007719453021614468, + "grad_norm": 0.9187529683113098, + "learning_rate": 4.999999953579837e-06, + "loss": 0.7616, + "step": 105 + }, + { + "epoch": 0.00779297162182032, + "grad_norm": 0.9086087942123413, + "learning_rate": 4.999999933154965e-06, + "loss": 0.7463, + "step": 106 + }, + { + "epoch": 0.007866490222026172, + "grad_norm": 0.8989643454551697, + "learning_rate": 4.99999990901648e-06, + "loss": 0.786, + "step": 107 + }, + { + "epoch": 0.007940008822232024, + "grad_norm": 0.9673314094543457, + "learning_rate": 4.999999881164382e-06, + "loss": 0.7796, + "step": 108 + }, + { + "epoch": 0.008013527422437878, + "grad_norm": 0.9537264704704285, + "learning_rate": 4.999999849598671e-06, + "loss": 0.7936, + "step": 109 + }, + { + "epoch": 0.00808704602264373, + "grad_norm": 0.9616097807884216, + "learning_rate": 4.9999998143193475e-06, + "loss": 0.7475, + "step": 110 + }, + { + "epoch": 0.008160564622849581, + "grad_norm": 0.9634294509887695, + "learning_rate": 4.999999775326411e-06, + "loss": 0.7607, + "step": 111 + }, + { + "epoch": 0.008234083223055433, + "grad_norm": 0.966712474822998, + "learning_rate": 4.999999732619861e-06, + "loss": 0.7191, + "step": 112 + }, + { + "epoch": 0.008307601823261285, + "grad_norm": 0.9415963888168335, + "learning_rate": 4.999999686199699e-06, + "loss": 0.7606, + "step": 113 + }, + { + "epoch": 0.008381120423467137, + "grad_norm": 0.9189338088035583, + "learning_rate": 4.999999636065925e-06, + "loss": 0.7355, + "step": 114 + }, + { + "epoch": 0.008454639023672989, + "grad_norm": 0.8791027069091797, + "learning_rate": 4.999999582218538e-06, + "loss": 0.736, + "step": 115 + }, + { + "epoch": 0.00852815762387884, + "grad_norm": 0.974856436252594, + "learning_rate": 4.999999524657538e-06, + "loss": 0.7488, + "step": 116 + }, + { + "epoch": 0.008601676224084693, + "grad_norm": 0.9251773357391357, + "learning_rate": 4.9999994633829265e-06, + "loss": 0.7351, + "step": 117 + }, + { + "epoch": 0.008675194824290546, + "grad_norm": 0.9398778676986694, + "learning_rate": 4.999999398394701e-06, + "loss": 0.7296, + "step": 118 + }, + { + "epoch": 0.008748713424496398, + "grad_norm": 0.9316085577011108, + "learning_rate": 4.999999329692865e-06, + "loss": 0.7409, + "step": 119 + }, + { + "epoch": 0.00882223202470225, + "grad_norm": 0.8969900012016296, + "learning_rate": 4.9999992572774166e-06, + "loss": 0.7091, + "step": 120 + }, + { + "epoch": 0.008895750624908102, + "grad_norm": 0.9218995571136475, + "learning_rate": 4.999999181148356e-06, + "loss": 0.7739, + "step": 121 + }, + { + "epoch": 0.008969269225113954, + "grad_norm": 0.9443301558494568, + "learning_rate": 4.999999101305683e-06, + "loss": 0.7515, + "step": 122 + }, + { + "epoch": 0.009042787825319806, + "grad_norm": 0.9461542963981628, + "learning_rate": 4.9999990177493985e-06, + "loss": 0.7384, + "step": 123 + }, + { + "epoch": 0.009116306425525657, + "grad_norm": 0.9097594022750854, + "learning_rate": 4.9999989304795025e-06, + "loss": 0.7494, + "step": 124 + }, + { + "epoch": 0.00918982502573151, + "grad_norm": 0.9125921130180359, + "learning_rate": 4.9999988394959955e-06, + "loss": 0.75, + "step": 125 + }, + { + "epoch": 0.009263343625937363, + "grad_norm": 0.8836398720741272, + "learning_rate": 4.999998744798877e-06, + "loss": 0.7339, + "step": 126 + }, + { + "epoch": 0.009336862226143215, + "grad_norm": 0.9500898122787476, + "learning_rate": 4.999998646388146e-06, + "loss": 0.7299, + "step": 127 + }, + { + "epoch": 0.009410380826349067, + "grad_norm": 0.8739739656448364, + "learning_rate": 4.999998544263805e-06, + "loss": 0.7532, + "step": 128 + }, + { + "epoch": 0.009483899426554919, + "grad_norm": 0.8998104929924011, + "learning_rate": 4.999998438425853e-06, + "loss": 0.7503, + "step": 129 + }, + { + "epoch": 0.00955741802676077, + "grad_norm": 0.9872859716415405, + "learning_rate": 4.99999832887429e-06, + "loss": 0.7647, + "step": 130 + }, + { + "epoch": 0.009630936626966622, + "grad_norm": 0.9128516316413879, + "learning_rate": 4.999998215609116e-06, + "loss": 0.7343, + "step": 131 + }, + { + "epoch": 0.009704455227172474, + "grad_norm": 0.8700780868530273, + "learning_rate": 4.999998098630332e-06, + "loss": 0.7666, + "step": 132 + }, + { + "epoch": 0.009777973827378326, + "grad_norm": 0.9218820929527283, + "learning_rate": 4.999997977937938e-06, + "loss": 0.7389, + "step": 133 + }, + { + "epoch": 0.009851492427584178, + "grad_norm": 0.9427739977836609, + "learning_rate": 4.9999978535319334e-06, + "loss": 0.7797, + "step": 134 + }, + { + "epoch": 0.009925011027790032, + "grad_norm": 0.9559230208396912, + "learning_rate": 4.999997725412319e-06, + "loss": 0.7876, + "step": 135 + }, + { + "epoch": 0.009998529627995884, + "grad_norm": 0.9200750589370728, + "learning_rate": 4.999997593579095e-06, + "loss": 0.7348, + "step": 136 + }, + { + "epoch": 0.010072048228201735, + "grad_norm": 0.9572651982307434, + "learning_rate": 4.999997458032261e-06, + "loss": 0.7504, + "step": 137 + }, + { + "epoch": 0.010145566828407587, + "grad_norm": 0.8785117268562317, + "learning_rate": 4.999997318771818e-06, + "loss": 0.7439, + "step": 138 + }, + { + "epoch": 0.01021908542861344, + "grad_norm": 0.9233011603355408, + "learning_rate": 4.999997175797766e-06, + "loss": 0.7205, + "step": 139 + }, + { + "epoch": 0.010292604028819291, + "grad_norm": 0.85040682554245, + "learning_rate": 4.9999970291101055e-06, + "loss": 0.7287, + "step": 140 + }, + { + "epoch": 0.010366122629025143, + "grad_norm": 0.9641652703285217, + "learning_rate": 4.999996878708836e-06, + "loss": 0.7597, + "step": 141 + }, + { + "epoch": 0.010439641229230995, + "grad_norm": 0.9080432653427124, + "learning_rate": 4.999996724593958e-06, + "loss": 0.7411, + "step": 142 + }, + { + "epoch": 0.010513159829436847, + "grad_norm": 0.9182582497596741, + "learning_rate": 4.999996566765471e-06, + "loss": 0.7013, + "step": 143 + }, + { + "epoch": 0.0105866784296427, + "grad_norm": 0.9291180372238159, + "learning_rate": 4.999996405223377e-06, + "loss": 0.7497, + "step": 144 + }, + { + "epoch": 0.010660197029848552, + "grad_norm": 0.94748854637146, + "learning_rate": 4.999996239967675e-06, + "loss": 0.744, + "step": 145 + }, + { + "epoch": 0.010733715630054404, + "grad_norm": 0.891032874584198, + "learning_rate": 4.9999960709983655e-06, + "loss": 0.7106, + "step": 146 + }, + { + "epoch": 0.010807234230260256, + "grad_norm": 0.8822353482246399, + "learning_rate": 4.999995898315448e-06, + "loss": 0.7628, + "step": 147 + }, + { + "epoch": 0.010880752830466108, + "grad_norm": 0.9697872400283813, + "learning_rate": 4.9999957219189245e-06, + "loss": 0.7648, + "step": 148 + }, + { + "epoch": 0.01095427143067196, + "grad_norm": 0.9625885486602783, + "learning_rate": 4.999995541808794e-06, + "loss": 0.7273, + "step": 149 + }, + { + "epoch": 0.011027790030877812, + "grad_norm": 0.8682568669319153, + "learning_rate": 4.9999953579850565e-06, + "loss": 0.7525, + "step": 150 + }, + { + "epoch": 0.011101308631083663, + "grad_norm": 0.8519266247749329, + "learning_rate": 4.999995170447713e-06, + "loss": 0.7232, + "step": 151 + }, + { + "epoch": 0.011174827231289517, + "grad_norm": 0.9366410970687866, + "learning_rate": 4.999994979196764e-06, + "loss": 0.7169, + "step": 152 + }, + { + "epoch": 0.011248345831495369, + "grad_norm": 0.9287764430046082, + "learning_rate": 4.999994784232209e-06, + "loss": 0.7104, + "step": 153 + }, + { + "epoch": 0.01132186443170122, + "grad_norm": 0.8807492256164551, + "learning_rate": 4.999994585554049e-06, + "loss": 0.7253, + "step": 154 + }, + { + "epoch": 0.011395383031907073, + "grad_norm": 0.891542911529541, + "learning_rate": 4.999994383162284e-06, + "loss": 0.6875, + "step": 155 + }, + { + "epoch": 0.011468901632112925, + "grad_norm": 0.9400976300239563, + "learning_rate": 4.999994177056914e-06, + "loss": 0.7089, + "step": 156 + }, + { + "epoch": 0.011542420232318776, + "grad_norm": 0.9032735228538513, + "learning_rate": 4.999993967237939e-06, + "loss": 0.705, + "step": 157 + }, + { + "epoch": 0.011615938832524628, + "grad_norm": 0.9477034211158752, + "learning_rate": 4.99999375370536e-06, + "loss": 0.6992, + "step": 158 + }, + { + "epoch": 0.01168945743273048, + "grad_norm": 0.8804504871368408, + "learning_rate": 4.999993536459178e-06, + "loss": 0.7132, + "step": 159 + }, + { + "epoch": 0.011762976032936332, + "grad_norm": 0.8735411763191223, + "learning_rate": 4.999993315499392e-06, + "loss": 0.7442, + "step": 160 + }, + { + "epoch": 0.011836494633142186, + "grad_norm": 0.8803058862686157, + "learning_rate": 4.999993090826003e-06, + "loss": 0.6935, + "step": 161 + }, + { + "epoch": 0.011910013233348038, + "grad_norm": 0.9732248783111572, + "learning_rate": 4.9999928624390105e-06, + "loss": 0.7247, + "step": 162 + }, + { + "epoch": 0.01198353183355389, + "grad_norm": 0.9447747468948364, + "learning_rate": 4.999992630338416e-06, + "loss": 0.7347, + "step": 163 + }, + { + "epoch": 0.012057050433759741, + "grad_norm": 0.9365608096122742, + "learning_rate": 4.999992394524219e-06, + "loss": 0.7688, + "step": 164 + }, + { + "epoch": 0.012130569033965593, + "grad_norm": 0.9859951138496399, + "learning_rate": 4.999992154996421e-06, + "loss": 0.7056, + "step": 165 + }, + { + "epoch": 0.012204087634171445, + "grad_norm": 0.9221821427345276, + "learning_rate": 4.999991911755021e-06, + "loss": 0.7069, + "step": 166 + }, + { + "epoch": 0.012277606234377297, + "grad_norm": 1.0242071151733398, + "learning_rate": 4.99999166480002e-06, + "loss": 0.7023, + "step": 167 + }, + { + "epoch": 0.012351124834583149, + "grad_norm": 0.9036085605621338, + "learning_rate": 4.999991414131418e-06, + "loss": 0.7401, + "step": 168 + }, + { + "epoch": 0.012424643434789, + "grad_norm": 0.8491998910903931, + "learning_rate": 4.999991159749216e-06, + "loss": 0.7017, + "step": 169 + }, + { + "epoch": 0.012498162034994854, + "grad_norm": 0.9317228198051453, + "learning_rate": 4.999990901653414e-06, + "loss": 0.7149, + "step": 170 + }, + { + "epoch": 0.012571680635200706, + "grad_norm": 0.9174526333808899, + "learning_rate": 4.9999906398440124e-06, + "loss": 0.7295, + "step": 171 + }, + { + "epoch": 0.012645199235406558, + "grad_norm": 0.8997306823730469, + "learning_rate": 4.9999903743210115e-06, + "loss": 0.7201, + "step": 172 + }, + { + "epoch": 0.01271871783561241, + "grad_norm": 0.9166220426559448, + "learning_rate": 4.9999901050844115e-06, + "loss": 0.7195, + "step": 173 + }, + { + "epoch": 0.012792236435818262, + "grad_norm": 1.0067110061645508, + "learning_rate": 4.999989832134214e-06, + "loss": 0.7892, + "step": 174 + }, + { + "epoch": 0.012865755036024114, + "grad_norm": 0.8526698350906372, + "learning_rate": 4.999989555470418e-06, + "loss": 0.7432, + "step": 175 + }, + { + "epoch": 0.012939273636229966, + "grad_norm": 0.9225575923919678, + "learning_rate": 4.999989275093024e-06, + "loss": 0.7251, + "step": 176 + }, + { + "epoch": 0.013012792236435818, + "grad_norm": 0.9577476382255554, + "learning_rate": 4.999988991002034e-06, + "loss": 0.7253, + "step": 177 + }, + { + "epoch": 0.013086310836641671, + "grad_norm": 0.9230891466140747, + "learning_rate": 4.999988703197446e-06, + "loss": 0.7009, + "step": 178 + }, + { + "epoch": 0.013159829436847523, + "grad_norm": 0.9513611197471619, + "learning_rate": 4.999988411679262e-06, + "loss": 0.7559, + "step": 179 + }, + { + "epoch": 0.013233348037053375, + "grad_norm": 0.9068126082420349, + "learning_rate": 4.999988116447482e-06, + "loss": 0.7367, + "step": 180 + }, + { + "epoch": 0.013306866637259227, + "grad_norm": 0.9084961414337158, + "learning_rate": 4.9999878175021065e-06, + "loss": 0.7481, + "step": 181 + }, + { + "epoch": 0.013380385237465079, + "grad_norm": 0.8727425932884216, + "learning_rate": 4.999987514843136e-06, + "loss": 0.7366, + "step": 182 + }, + { + "epoch": 0.01345390383767093, + "grad_norm": 0.8604572415351868, + "learning_rate": 4.9999872084705714e-06, + "loss": 0.7187, + "step": 183 + }, + { + "epoch": 0.013527422437876782, + "grad_norm": 0.8866093158721924, + "learning_rate": 4.9999868983844125e-06, + "loss": 0.7217, + "step": 184 + }, + { + "epoch": 0.013600941038082634, + "grad_norm": 0.9599839448928833, + "learning_rate": 4.99998658458466e-06, + "loss": 0.6829, + "step": 185 + }, + { + "epoch": 0.013674459638288486, + "grad_norm": 0.9125257134437561, + "learning_rate": 4.999986267071314e-06, + "loss": 0.709, + "step": 186 + }, + { + "epoch": 0.01374797823849434, + "grad_norm": 0.9279797673225403, + "learning_rate": 4.9999859458443755e-06, + "loss": 0.7451, + "step": 187 + }, + { + "epoch": 0.013821496838700192, + "grad_norm": 0.9264232516288757, + "learning_rate": 4.9999856209038455e-06, + "loss": 0.7469, + "step": 188 + }, + { + "epoch": 0.013895015438906044, + "grad_norm": 0.8535299301147461, + "learning_rate": 4.999985292249723e-06, + "loss": 0.6981, + "step": 189 + }, + { + "epoch": 0.013968534039111895, + "grad_norm": 0.8957895636558533, + "learning_rate": 4.999984959882009e-06, + "loss": 0.7333, + "step": 190 + }, + { + "epoch": 0.014042052639317747, + "grad_norm": 1.000357747077942, + "learning_rate": 4.999984623800705e-06, + "loss": 0.7545, + "step": 191 + }, + { + "epoch": 0.0141155712395236, + "grad_norm": 0.87474524974823, + "learning_rate": 4.9999842840058095e-06, + "loss": 0.7012, + "step": 192 + }, + { + "epoch": 0.014189089839729451, + "grad_norm": 0.9511388540267944, + "learning_rate": 4.999983940497325e-06, + "loss": 0.7157, + "step": 193 + }, + { + "epoch": 0.014262608439935303, + "grad_norm": 0.9488825798034668, + "learning_rate": 4.9999835932752515e-06, + "loss": 0.7059, + "step": 194 + }, + { + "epoch": 0.014336127040141155, + "grad_norm": 0.9168820977210999, + "learning_rate": 4.9999832423395895e-06, + "loss": 0.7204, + "step": 195 + }, + { + "epoch": 0.014409645640347008, + "grad_norm": 0.9075759649276733, + "learning_rate": 4.999982887690339e-06, + "loss": 0.7206, + "step": 196 + }, + { + "epoch": 0.01448316424055286, + "grad_norm": 0.8828081488609314, + "learning_rate": 4.9999825293275e-06, + "loss": 0.7099, + "step": 197 + }, + { + "epoch": 0.014556682840758712, + "grad_norm": 1.0223312377929688, + "learning_rate": 4.999982167251075e-06, + "loss": 0.7258, + "step": 198 + }, + { + "epoch": 0.014630201440964564, + "grad_norm": 0.8753028512001038, + "learning_rate": 4.999981801461063e-06, + "loss": 0.7144, + "step": 199 + }, + { + "epoch": 0.014703720041170416, + "grad_norm": 0.9368354678153992, + "learning_rate": 4.9999814319574645e-06, + "loss": 0.7412, + "step": 200 + }, + { + "epoch": 0.014777238641376268, + "grad_norm": 0.898521363735199, + "learning_rate": 4.999981058740281e-06, + "loss": 0.7233, + "step": 201 + }, + { + "epoch": 0.01485075724158212, + "grad_norm": 0.8689929842948914, + "learning_rate": 4.999980681809512e-06, + "loss": 0.6961, + "step": 202 + }, + { + "epoch": 0.014924275841787972, + "grad_norm": 0.9280577301979065, + "learning_rate": 4.9999803011651596e-06, + "loss": 0.7009, + "step": 203 + }, + { + "epoch": 0.014997794441993825, + "grad_norm": 0.923763632774353, + "learning_rate": 4.999979916807222e-06, + "loss": 0.7196, + "step": 204 + }, + { + "epoch": 0.015071313042199677, + "grad_norm": 0.9146960377693176, + "learning_rate": 4.999979528735702e-06, + "loss": 0.7694, + "step": 205 + }, + { + "epoch": 0.015144831642405529, + "grad_norm": 0.8919402360916138, + "learning_rate": 4.999979136950599e-06, + "loss": 0.7052, + "step": 206 + }, + { + "epoch": 0.015218350242611381, + "grad_norm": 0.917793333530426, + "learning_rate": 4.999978741451914e-06, + "loss": 0.6897, + "step": 207 + }, + { + "epoch": 0.015291868842817233, + "grad_norm": 0.9201381206512451, + "learning_rate": 4.999978342239648e-06, + "loss": 0.6888, + "step": 208 + }, + { + "epoch": 0.015365387443023085, + "grad_norm": 0.9635261297225952, + "learning_rate": 4.9999779393138e-06, + "loss": 0.7314, + "step": 209 + }, + { + "epoch": 0.015438906043228937, + "grad_norm": 0.8976085186004639, + "learning_rate": 4.999977532674373e-06, + "loss": 0.7096, + "step": 210 + }, + { + "epoch": 0.015512424643434788, + "grad_norm": 0.8891539573669434, + "learning_rate": 4.999977122321365e-06, + "loss": 0.7142, + "step": 211 + }, + { + "epoch": 0.01558594324364064, + "grad_norm": 0.9370412230491638, + "learning_rate": 4.999976708254779e-06, + "loss": 0.7388, + "step": 212 + }, + { + "epoch": 0.015659461843846492, + "grad_norm": 0.943287193775177, + "learning_rate": 4.9999762904746135e-06, + "loss": 0.6643, + "step": 213 + }, + { + "epoch": 0.015732980444052344, + "grad_norm": 0.9693775773048401, + "learning_rate": 4.999975868980871e-06, + "loss": 0.7295, + "step": 214 + }, + { + "epoch": 0.015806499044258196, + "grad_norm": 0.886859655380249, + "learning_rate": 4.999975443773551e-06, + "loss": 0.7047, + "step": 215 + }, + { + "epoch": 0.015880017644464048, + "grad_norm": 0.9756608605384827, + "learning_rate": 4.999975014852654e-06, + "loss": 0.7071, + "step": 216 + }, + { + "epoch": 0.015953536244669903, + "grad_norm": 0.8882419466972351, + "learning_rate": 4.99997458221818e-06, + "loss": 0.6824, + "step": 217 + }, + { + "epoch": 0.016027054844875755, + "grad_norm": 0.9146257638931274, + "learning_rate": 4.9999741458701325e-06, + "loss": 0.728, + "step": 218 + }, + { + "epoch": 0.016100573445081607, + "grad_norm": 0.9347761273384094, + "learning_rate": 4.999973705808509e-06, + "loss": 0.7073, + "step": 219 + }, + { + "epoch": 0.01617409204528746, + "grad_norm": 0.9639213681221008, + "learning_rate": 4.999973262033312e-06, + "loss": 0.7192, + "step": 220 + }, + { + "epoch": 0.01624761064549331, + "grad_norm": 0.9313254952430725, + "learning_rate": 4.999972814544542e-06, + "loss": 0.7228, + "step": 221 + }, + { + "epoch": 0.016321129245699163, + "grad_norm": 0.8674330115318298, + "learning_rate": 4.999972363342199e-06, + "loss": 0.7069, + "step": 222 + }, + { + "epoch": 0.016394647845905014, + "grad_norm": 0.8882086873054504, + "learning_rate": 4.9999719084262835e-06, + "loss": 0.7189, + "step": 223 + }, + { + "epoch": 0.016468166446110866, + "grad_norm": 0.9171630144119263, + "learning_rate": 4.999971449796797e-06, + "loss": 0.7005, + "step": 224 + }, + { + "epoch": 0.016541685046316718, + "grad_norm": 0.9635277986526489, + "learning_rate": 4.99997098745374e-06, + "loss": 0.7239, + "step": 225 + }, + { + "epoch": 0.01661520364652257, + "grad_norm": 0.894798755645752, + "learning_rate": 4.9999705213971124e-06, + "loss": 0.7261, + "step": 226 + }, + { + "epoch": 0.016688722246728422, + "grad_norm": 0.8845874071121216, + "learning_rate": 4.9999700516269165e-06, + "loss": 0.7036, + "step": 227 + }, + { + "epoch": 0.016762240846934274, + "grad_norm": 0.8949581980705261, + "learning_rate": 4.999969578143151e-06, + "loss": 0.6885, + "step": 228 + }, + { + "epoch": 0.016835759447140126, + "grad_norm": 0.9564293622970581, + "learning_rate": 4.999969100945819e-06, + "loss": 0.7193, + "step": 229 + }, + { + "epoch": 0.016909278047345978, + "grad_norm": 0.8525534868240356, + "learning_rate": 4.999968620034918e-06, + "loss": 0.7085, + "step": 230 + }, + { + "epoch": 0.01698279664755183, + "grad_norm": 0.9157596826553345, + "learning_rate": 4.9999681354104515e-06, + "loss": 0.6944, + "step": 231 + }, + { + "epoch": 0.01705631524775768, + "grad_norm": 0.9612221121788025, + "learning_rate": 4.999967647072418e-06, + "loss": 0.6975, + "step": 232 + }, + { + "epoch": 0.017129833847963533, + "grad_norm": 0.8573952317237854, + "learning_rate": 4.999967155020821e-06, + "loss": 0.6917, + "step": 233 + }, + { + "epoch": 0.017203352448169385, + "grad_norm": 0.8602647185325623, + "learning_rate": 4.999966659255659e-06, + "loss": 0.7136, + "step": 234 + }, + { + "epoch": 0.01727687104837524, + "grad_norm": 0.9700285196304321, + "learning_rate": 4.9999661597769335e-06, + "loss": 0.6805, + "step": 235 + }, + { + "epoch": 0.017350389648581092, + "grad_norm": 0.9724011421203613, + "learning_rate": 4.999965656584645e-06, + "loss": 0.7253, + "step": 236 + }, + { + "epoch": 0.017423908248786944, + "grad_norm": 0.905944287776947, + "learning_rate": 4.9999651496787945e-06, + "loss": 0.7058, + "step": 237 + }, + { + "epoch": 0.017497426848992796, + "grad_norm": 0.9061954617500305, + "learning_rate": 4.9999646390593835e-06, + "loss": 0.6814, + "step": 238 + }, + { + "epoch": 0.017570945449198648, + "grad_norm": 0.9528065323829651, + "learning_rate": 4.999964124726411e-06, + "loss": 0.7161, + "step": 239 + }, + { + "epoch": 0.0176444640494045, + "grad_norm": 0.8746179342269897, + "learning_rate": 4.9999636066798785e-06, + "loss": 0.7111, + "step": 240 + }, + { + "epoch": 0.017717982649610352, + "grad_norm": 0.8267335295677185, + "learning_rate": 4.999963084919788e-06, + "loss": 0.646, + "step": 241 + }, + { + "epoch": 0.017791501249816204, + "grad_norm": 0.8658010959625244, + "learning_rate": 4.999962559446138e-06, + "loss": 0.7246, + "step": 242 + }, + { + "epoch": 0.017865019850022056, + "grad_norm": 0.9030390977859497, + "learning_rate": 4.999962030258931e-06, + "loss": 0.697, + "step": 243 + }, + { + "epoch": 0.017938538450227907, + "grad_norm": 0.8551895618438721, + "learning_rate": 4.999961497358168e-06, + "loss": 0.7057, + "step": 244 + }, + { + "epoch": 0.01801205705043376, + "grad_norm": 0.949202835559845, + "learning_rate": 4.9999609607438485e-06, + "loss": 0.6978, + "step": 245 + }, + { + "epoch": 0.01808557565063961, + "grad_norm": 0.9080657362937927, + "learning_rate": 4.999960420415973e-06, + "loss": 0.6972, + "step": 246 + }, + { + "epoch": 0.018159094250845463, + "grad_norm": 0.8521149158477783, + "learning_rate": 4.9999598763745445e-06, + "loss": 0.7272, + "step": 247 + }, + { + "epoch": 0.018232612851051315, + "grad_norm": 0.9263285994529724, + "learning_rate": 4.9999593286195625e-06, + "loss": 0.6759, + "step": 248 + }, + { + "epoch": 0.018306131451257167, + "grad_norm": 0.9313548803329468, + "learning_rate": 4.999958777151027e-06, + "loss": 0.7009, + "step": 249 + }, + { + "epoch": 0.01837965005146302, + "grad_norm": 0.921577513217926, + "learning_rate": 4.99995822196894e-06, + "loss": 0.6993, + "step": 250 + }, + { + "epoch": 0.01845316865166887, + "grad_norm": 0.9597542881965637, + "learning_rate": 4.999957663073302e-06, + "loss": 0.697, + "step": 251 + }, + { + "epoch": 0.018526687251874726, + "grad_norm": 0.899789035320282, + "learning_rate": 4.999957100464113e-06, + "loss": 0.7109, + "step": 252 + }, + { + "epoch": 0.018600205852080578, + "grad_norm": 0.8826919794082642, + "learning_rate": 4.999956534141376e-06, + "loss": 0.7085, + "step": 253 + }, + { + "epoch": 0.01867372445228643, + "grad_norm": 0.8850485682487488, + "learning_rate": 4.99995596410509e-06, + "loss": 0.6787, + "step": 254 + }, + { + "epoch": 0.01874724305249228, + "grad_norm": 0.8478387594223022, + "learning_rate": 4.999955390355256e-06, + "loss": 0.6759, + "step": 255 + }, + { + "epoch": 0.018820761652698133, + "grad_norm": 0.9246669411659241, + "learning_rate": 4.999954812891875e-06, + "loss": 0.707, + "step": 256 + }, + { + "epoch": 0.018894280252903985, + "grad_norm": 0.8764976263046265, + "learning_rate": 4.999954231714948e-06, + "loss": 0.6768, + "step": 257 + }, + { + "epoch": 0.018967798853109837, + "grad_norm": 0.9326310157775879, + "learning_rate": 4.999953646824477e-06, + "loss": 0.68, + "step": 258 + }, + { + "epoch": 0.01904131745331569, + "grad_norm": 0.8816811442375183, + "learning_rate": 4.999953058220461e-06, + "loss": 0.678, + "step": 259 + }, + { + "epoch": 0.01911483605352154, + "grad_norm": 0.9235605597496033, + "learning_rate": 4.999952465902902e-06, + "loss": 0.6976, + "step": 260 + }, + { + "epoch": 0.019188354653727393, + "grad_norm": 0.8637455105781555, + "learning_rate": 4.9999518698717995e-06, + "loss": 0.6856, + "step": 261 + }, + { + "epoch": 0.019261873253933245, + "grad_norm": 0.8961496353149414, + "learning_rate": 4.999951270127156e-06, + "loss": 0.7007, + "step": 262 + }, + { + "epoch": 0.019335391854139097, + "grad_norm": 0.9232350587844849, + "learning_rate": 4.999950666668972e-06, + "loss": 0.7347, + "step": 263 + }, + { + "epoch": 0.01940891045434495, + "grad_norm": 0.8908945918083191, + "learning_rate": 4.999950059497247e-06, + "loss": 0.6445, + "step": 264 + }, + { + "epoch": 0.0194824290545508, + "grad_norm": 0.9378383159637451, + "learning_rate": 4.999949448611985e-06, + "loss": 0.7034, + "step": 265 + }, + { + "epoch": 0.019555947654756652, + "grad_norm": 0.9045705795288086, + "learning_rate": 4.999948834013183e-06, + "loss": 0.7267, + "step": 266 + }, + { + "epoch": 0.019629466254962504, + "grad_norm": 0.9436831474304199, + "learning_rate": 4.9999482157008455e-06, + "loss": 0.7353, + "step": 267 + }, + { + "epoch": 0.019702984855168356, + "grad_norm": 0.9156989455223083, + "learning_rate": 4.999947593674971e-06, + "loss": 0.6815, + "step": 268 + }, + { + "epoch": 0.01977650345537421, + "grad_norm": 0.973416805267334, + "learning_rate": 4.9999469679355614e-06, + "loss": 0.7209, + "step": 269 + }, + { + "epoch": 0.019850022055580063, + "grad_norm": 0.8488795161247253, + "learning_rate": 4.999946338482618e-06, + "loss": 0.71, + "step": 270 + }, + { + "epoch": 0.019923540655785915, + "grad_norm": 0.8830358386039734, + "learning_rate": 4.99994570531614e-06, + "loss": 0.708, + "step": 271 + }, + { + "epoch": 0.019997059255991767, + "grad_norm": 0.9228744506835938, + "learning_rate": 4.99994506843613e-06, + "loss": 0.696, + "step": 272 + }, + { + "epoch": 0.02007057785619762, + "grad_norm": 0.9539812803268433, + "learning_rate": 4.99994442784259e-06, + "loss": 0.6858, + "step": 273 + }, + { + "epoch": 0.02014409645640347, + "grad_norm": 0.918371856212616, + "learning_rate": 4.999943783535518e-06, + "loss": 0.7065, + "step": 274 + }, + { + "epoch": 0.020217615056609323, + "grad_norm": 0.8969374895095825, + "learning_rate": 4.999943135514916e-06, + "loss": 0.6603, + "step": 275 + }, + { + "epoch": 0.020291133656815175, + "grad_norm": 0.8966051340103149, + "learning_rate": 4.999942483780787e-06, + "loss": 0.6767, + "step": 276 + }, + { + "epoch": 0.020364652257021026, + "grad_norm": 0.8767815828323364, + "learning_rate": 4.9999418283331295e-06, + "loss": 0.6775, + "step": 277 + }, + { + "epoch": 0.02043817085722688, + "grad_norm": 0.8562202453613281, + "learning_rate": 4.999941169171944e-06, + "loss": 0.7048, + "step": 278 + }, + { + "epoch": 0.02051168945743273, + "grad_norm": 0.8932065367698669, + "learning_rate": 4.9999405062972336e-06, + "loss": 0.6837, + "step": 279 + }, + { + "epoch": 0.020585208057638582, + "grad_norm": 0.9029339551925659, + "learning_rate": 4.999939839708999e-06, + "loss": 0.6906, + "step": 280 + }, + { + "epoch": 0.020658726657844434, + "grad_norm": 0.9631369709968567, + "learning_rate": 4.999939169407241e-06, + "loss": 0.7132, + "step": 281 + }, + { + "epoch": 0.020732245258050286, + "grad_norm": 0.9100865721702576, + "learning_rate": 4.999938495391959e-06, + "loss": 0.6593, + "step": 282 + }, + { + "epoch": 0.020805763858256138, + "grad_norm": 0.8769522309303284, + "learning_rate": 4.999937817663156e-06, + "loss": 0.7286, + "step": 283 + }, + { + "epoch": 0.02087928245846199, + "grad_norm": 0.8595074415206909, + "learning_rate": 4.999937136220833e-06, + "loss": 0.6816, + "step": 284 + }, + { + "epoch": 0.02095280105866784, + "grad_norm": 0.944084882736206, + "learning_rate": 4.99993645106499e-06, + "loss": 0.6728, + "step": 285 + }, + { + "epoch": 0.021026319658873693, + "grad_norm": 0.9207298755645752, + "learning_rate": 4.999935762195627e-06, + "loss": 0.6882, + "step": 286 + }, + { + "epoch": 0.02109983825907955, + "grad_norm": 0.8795613646507263, + "learning_rate": 4.999935069612747e-06, + "loss": 0.6825, + "step": 287 + }, + { + "epoch": 0.0211733568592854, + "grad_norm": 0.9323369264602661, + "learning_rate": 4.999934373316351e-06, + "loss": 0.6672, + "step": 288 + }, + { + "epoch": 0.021246875459491252, + "grad_norm": 0.9060819745063782, + "learning_rate": 4.9999336733064386e-06, + "loss": 0.6614, + "step": 289 + }, + { + "epoch": 0.021320394059697104, + "grad_norm": 0.8755649924278259, + "learning_rate": 4.999932969583013e-06, + "loss": 0.6807, + "step": 290 + }, + { + "epoch": 0.021393912659902956, + "grad_norm": 0.9596628546714783, + "learning_rate": 4.999932262146071e-06, + "loss": 0.6655, + "step": 291 + }, + { + "epoch": 0.021467431260108808, + "grad_norm": 0.870047390460968, + "learning_rate": 4.999931550995619e-06, + "loss": 0.6895, + "step": 292 + }, + { + "epoch": 0.02154094986031466, + "grad_norm": 0.9329373836517334, + "learning_rate": 4.999930836131654e-06, + "loss": 0.6928, + "step": 293 + }, + { + "epoch": 0.021614468460520512, + "grad_norm": 0.8921118974685669, + "learning_rate": 4.999930117554181e-06, + "loss": 0.6418, + "step": 294 + }, + { + "epoch": 0.021687987060726364, + "grad_norm": 0.9320242404937744, + "learning_rate": 4.999929395263197e-06, + "loss": 0.6818, + "step": 295 + }, + { + "epoch": 0.021761505660932216, + "grad_norm": 0.8811293840408325, + "learning_rate": 4.999928669258705e-06, + "loss": 0.6801, + "step": 296 + }, + { + "epoch": 0.021835024261138068, + "grad_norm": 0.9170669317245483, + "learning_rate": 4.999927939540705e-06, + "loss": 0.7143, + "step": 297 + }, + { + "epoch": 0.02190854286134392, + "grad_norm": 0.8889912962913513, + "learning_rate": 4.9999272061091995e-06, + "loss": 0.6827, + "step": 298 + }, + { + "epoch": 0.02198206146154977, + "grad_norm": 0.9072418808937073, + "learning_rate": 4.9999264689641895e-06, + "loss": 0.7022, + "step": 299 + }, + { + "epoch": 0.022055580061755623, + "grad_norm": 0.8905680179595947, + "learning_rate": 4.999925728105675e-06, + "loss": 0.6993, + "step": 300 + }, + { + "epoch": 0.022129098661961475, + "grad_norm": 0.8498929738998413, + "learning_rate": 4.9999249835336586e-06, + "loss": 0.6523, + "step": 301 + }, + { + "epoch": 0.022202617262167327, + "grad_norm": 0.9485750198364258, + "learning_rate": 4.99992423524814e-06, + "loss": 0.7004, + "step": 302 + }, + { + "epoch": 0.02227613586237318, + "grad_norm": 0.8848434090614319, + "learning_rate": 4.999923483249121e-06, + "loss": 0.6759, + "step": 303 + }, + { + "epoch": 0.022349654462579034, + "grad_norm": 0.8883751034736633, + "learning_rate": 4.999922727536602e-06, + "loss": 0.6889, + "step": 304 + }, + { + "epoch": 0.022423173062784886, + "grad_norm": 0.9160410761833191, + "learning_rate": 4.999921968110586e-06, + "loss": 0.707, + "step": 305 + }, + { + "epoch": 0.022496691662990738, + "grad_norm": 0.9714613556861877, + "learning_rate": 4.999921204971071e-06, + "loss": 0.6781, + "step": 306 + }, + { + "epoch": 0.02257021026319659, + "grad_norm": 0.9077397584915161, + "learning_rate": 4.999920438118061e-06, + "loss": 0.7265, + "step": 307 + }, + { + "epoch": 0.02264372886340244, + "grad_norm": 0.9424301981925964, + "learning_rate": 4.999919667551556e-06, + "loss": 0.7303, + "step": 308 + }, + { + "epoch": 0.022717247463608294, + "grad_norm": 0.863412618637085, + "learning_rate": 4.999918893271558e-06, + "loss": 0.6716, + "step": 309 + }, + { + "epoch": 0.022790766063814145, + "grad_norm": 0.9365112781524658, + "learning_rate": 4.9999181152780665e-06, + "loss": 0.7012, + "step": 310 + }, + { + "epoch": 0.022864284664019997, + "grad_norm": 0.9169602990150452, + "learning_rate": 4.9999173335710834e-06, + "loss": 0.7106, + "step": 311 + }, + { + "epoch": 0.02293780326422585, + "grad_norm": 0.8940392732620239, + "learning_rate": 4.99991654815061e-06, + "loss": 0.6624, + "step": 312 + }, + { + "epoch": 0.0230113218644317, + "grad_norm": 0.8787896633148193, + "learning_rate": 4.999915759016648e-06, + "loss": 0.7037, + "step": 313 + }, + { + "epoch": 0.023084840464637553, + "grad_norm": 0.9231352210044861, + "learning_rate": 4.999914966169197e-06, + "loss": 0.6839, + "step": 314 + }, + { + "epoch": 0.023158359064843405, + "grad_norm": 0.8559814095497131, + "learning_rate": 4.99991416960826e-06, + "loss": 0.655, + "step": 315 + }, + { + "epoch": 0.023231877665049257, + "grad_norm": 0.9028724431991577, + "learning_rate": 4.999913369333838e-06, + "loss": 0.7122, + "step": 316 + }, + { + "epoch": 0.02330539626525511, + "grad_norm": 0.9281487464904785, + "learning_rate": 4.99991256534593e-06, + "loss": 0.6738, + "step": 317 + }, + { + "epoch": 0.02337891486546096, + "grad_norm": 0.915011465549469, + "learning_rate": 4.99991175764454e-06, + "loss": 0.6655, + "step": 318 + }, + { + "epoch": 0.023452433465666812, + "grad_norm": 0.8986821174621582, + "learning_rate": 4.999910946229668e-06, + "loss": 0.667, + "step": 319 + }, + { + "epoch": 0.023525952065872664, + "grad_norm": 0.8805340528488159, + "learning_rate": 4.999910131101314e-06, + "loss": 0.7001, + "step": 320 + }, + { + "epoch": 0.02359947066607852, + "grad_norm": 0.9429108500480652, + "learning_rate": 4.999909312259481e-06, + "loss": 0.6835, + "step": 321 + }, + { + "epoch": 0.02367298926628437, + "grad_norm": 0.8529578447341919, + "learning_rate": 4.999908489704171e-06, + "loss": 0.6992, + "step": 322 + }, + { + "epoch": 0.023746507866490223, + "grad_norm": 0.895702064037323, + "learning_rate": 4.9999076634353814e-06, + "loss": 0.6713, + "step": 323 + }, + { + "epoch": 0.023820026466696075, + "grad_norm": 0.9275758862495422, + "learning_rate": 4.999906833453118e-06, + "loss": 0.6615, + "step": 324 + }, + { + "epoch": 0.023893545066901927, + "grad_norm": 0.8796506524085999, + "learning_rate": 4.999905999757378e-06, + "loss": 0.7007, + "step": 325 + }, + { + "epoch": 0.02396706366710778, + "grad_norm": 0.8811155557632446, + "learning_rate": 4.999905162348166e-06, + "loss": 0.7195, + "step": 326 + }, + { + "epoch": 0.02404058226731363, + "grad_norm": 0.9049225449562073, + "learning_rate": 4.999904321225481e-06, + "loss": 0.7008, + "step": 327 + }, + { + "epoch": 0.024114100867519483, + "grad_norm": 0.9164134860038757, + "learning_rate": 4.999903476389325e-06, + "loss": 0.6651, + "step": 328 + }, + { + "epoch": 0.024187619467725335, + "grad_norm": 0.9474919438362122, + "learning_rate": 4.9999026278397e-06, + "loss": 0.6854, + "step": 329 + }, + { + "epoch": 0.024261138067931187, + "grad_norm": 0.9634822010993958, + "learning_rate": 4.999901775576606e-06, + "loss": 0.7104, + "step": 330 + }, + { + "epoch": 0.02433465666813704, + "grad_norm": 0.9221928715705872, + "learning_rate": 4.999900919600045e-06, + "loss": 0.6676, + "step": 331 + }, + { + "epoch": 0.02440817526834289, + "grad_norm": 0.9026092290878296, + "learning_rate": 4.999900059910018e-06, + "loss": 0.7219, + "step": 332 + }, + { + "epoch": 0.024481693868548742, + "grad_norm": 0.8906301856040955, + "learning_rate": 4.999899196506526e-06, + "loss": 0.7073, + "step": 333 + }, + { + "epoch": 0.024555212468754594, + "grad_norm": 0.8687638640403748, + "learning_rate": 4.999898329389571e-06, + "loss": 0.6342, + "step": 334 + }, + { + "epoch": 0.024628731068960446, + "grad_norm": 0.8734484314918518, + "learning_rate": 4.9998974585591545e-06, + "loss": 0.7122, + "step": 335 + }, + { + "epoch": 0.024702249669166298, + "grad_norm": 0.8814878463745117, + "learning_rate": 4.999896584015277e-06, + "loss": 0.6739, + "step": 336 + }, + { + "epoch": 0.02477576826937215, + "grad_norm": 0.8827154040336609, + "learning_rate": 4.999895705757939e-06, + "loss": 0.6791, + "step": 337 + }, + { + "epoch": 0.024849286869578, + "grad_norm": 0.8957892060279846, + "learning_rate": 4.999894823787144e-06, + "loss": 0.6928, + "step": 338 + }, + { + "epoch": 0.024922805469783857, + "grad_norm": 0.9330945014953613, + "learning_rate": 4.999893938102891e-06, + "loss": 0.7179, + "step": 339 + }, + { + "epoch": 0.02499632406998971, + "grad_norm": 0.9021344184875488, + "learning_rate": 4.999893048705182e-06, + "loss": 0.6938, + "step": 340 + }, + { + "epoch": 0.02506984267019556, + "grad_norm": 0.8941838145256042, + "learning_rate": 4.999892155594021e-06, + "loss": 0.7076, + "step": 341 + }, + { + "epoch": 0.025143361270401413, + "grad_norm": 0.9637088179588318, + "learning_rate": 4.999891258769406e-06, + "loss": 0.7186, + "step": 342 + }, + { + "epoch": 0.025216879870607264, + "grad_norm": 0.9006240367889404, + "learning_rate": 4.999890358231338e-06, + "loss": 0.6888, + "step": 343 + }, + { + "epoch": 0.025290398470813116, + "grad_norm": 0.8861478567123413, + "learning_rate": 4.999889453979821e-06, + "loss": 0.6586, + "step": 344 + }, + { + "epoch": 0.025363917071018968, + "grad_norm": 0.9023501873016357, + "learning_rate": 4.999888546014856e-06, + "loss": 0.6671, + "step": 345 + }, + { + "epoch": 0.02543743567122482, + "grad_norm": 0.9167267680168152, + "learning_rate": 4.999887634336442e-06, + "loss": 0.6855, + "step": 346 + }, + { + "epoch": 0.025510954271430672, + "grad_norm": 0.8984218239784241, + "learning_rate": 4.999886718944583e-06, + "loss": 0.6626, + "step": 347 + }, + { + "epoch": 0.025584472871636524, + "grad_norm": 0.97298663854599, + "learning_rate": 4.999885799839279e-06, + "loss": 0.6706, + "step": 348 + }, + { + "epoch": 0.025657991471842376, + "grad_norm": 0.8641842007637024, + "learning_rate": 4.999884877020531e-06, + "loss": 0.6915, + "step": 349 + }, + { + "epoch": 0.025731510072048228, + "grad_norm": 0.9565679430961609, + "learning_rate": 4.999883950488341e-06, + "loss": 0.6759, + "step": 350 + }, + { + "epoch": 0.02580502867225408, + "grad_norm": 0.9897817373275757, + "learning_rate": 4.9998830202427105e-06, + "loss": 0.6769, + "step": 351 + }, + { + "epoch": 0.02587854727245993, + "grad_norm": 0.9851171374320984, + "learning_rate": 4.999882086283641e-06, + "loss": 0.7002, + "step": 352 + }, + { + "epoch": 0.025952065872665783, + "grad_norm": 0.9364323019981384, + "learning_rate": 4.999881148611132e-06, + "loss": 0.693, + "step": 353 + }, + { + "epoch": 0.026025584472871635, + "grad_norm": 0.9576888084411621, + "learning_rate": 4.999880207225188e-06, + "loss": 0.6696, + "step": 354 + }, + { + "epoch": 0.026099103073077487, + "grad_norm": 0.9219179153442383, + "learning_rate": 4.999879262125808e-06, + "loss": 0.6952, + "step": 355 + }, + { + "epoch": 0.026172621673283342, + "grad_norm": 0.9090839624404907, + "learning_rate": 4.9998783133129945e-06, + "loss": 0.7061, + "step": 356 + }, + { + "epoch": 0.026246140273489194, + "grad_norm": 0.8953468203544617, + "learning_rate": 4.999877360786749e-06, + "loss": 0.7083, + "step": 357 + }, + { + "epoch": 0.026319658873695046, + "grad_norm": 0.9064217805862427, + "learning_rate": 4.999876404547072e-06, + "loss": 0.618, + "step": 358 + }, + { + "epoch": 0.026393177473900898, + "grad_norm": 0.903666079044342, + "learning_rate": 4.999875444593965e-06, + "loss": 0.6526, + "step": 359 + }, + { + "epoch": 0.02646669607410675, + "grad_norm": 0.9834994673728943, + "learning_rate": 4.9998744809274316e-06, + "loss": 0.6628, + "step": 360 + }, + { + "epoch": 0.026540214674312602, + "grad_norm": 0.9108104109764099, + "learning_rate": 4.9998735135474706e-06, + "loss": 0.6895, + "step": 361 + }, + { + "epoch": 0.026613733274518454, + "grad_norm": 0.8865970373153687, + "learning_rate": 4.999872542454084e-06, + "loss": 0.6719, + "step": 362 + }, + { + "epoch": 0.026687251874724306, + "grad_norm": 0.8531721234321594, + "learning_rate": 4.999871567647274e-06, + "loss": 0.6928, + "step": 363 + }, + { + "epoch": 0.026760770474930157, + "grad_norm": 0.9684659838676453, + "learning_rate": 4.999870589127042e-06, + "loss": 0.7589, + "step": 364 + }, + { + "epoch": 0.02683428907513601, + "grad_norm": 0.8807981014251709, + "learning_rate": 4.99986960689339e-06, + "loss": 0.6859, + "step": 365 + }, + { + "epoch": 0.02690780767534186, + "grad_norm": 0.8829328417778015, + "learning_rate": 4.9998686209463165e-06, + "loss": 0.6784, + "step": 366 + }, + { + "epoch": 0.026981326275547713, + "grad_norm": 0.9220550656318665, + "learning_rate": 4.999867631285826e-06, + "loss": 0.6981, + "step": 367 + }, + { + "epoch": 0.027054844875753565, + "grad_norm": 0.8738634586334229, + "learning_rate": 4.999866637911919e-06, + "loss": 0.6495, + "step": 368 + }, + { + "epoch": 0.027128363475959417, + "grad_norm": 0.9013907313346863, + "learning_rate": 4.999865640824597e-06, + "loss": 0.6792, + "step": 369 + }, + { + "epoch": 0.02720188207616527, + "grad_norm": 0.8495463132858276, + "learning_rate": 4.999864640023862e-06, + "loss": 0.6667, + "step": 370 + }, + { + "epoch": 0.02727540067637112, + "grad_norm": 0.9115763306617737, + "learning_rate": 4.9998636355097145e-06, + "loss": 0.6666, + "step": 371 + }, + { + "epoch": 0.027348919276576972, + "grad_norm": 0.9425170421600342, + "learning_rate": 4.999862627282156e-06, + "loss": 0.6827, + "step": 372 + }, + { + "epoch": 0.027422437876782824, + "grad_norm": 0.89332515001297, + "learning_rate": 4.999861615341189e-06, + "loss": 0.7093, + "step": 373 + }, + { + "epoch": 0.02749595647698868, + "grad_norm": 0.9390074610710144, + "learning_rate": 4.999860599686814e-06, + "loss": 0.7335, + "step": 374 + }, + { + "epoch": 0.02756947507719453, + "grad_norm": 0.9444466233253479, + "learning_rate": 4.999859580319034e-06, + "loss": 0.6939, + "step": 375 + }, + { + "epoch": 0.027642993677400383, + "grad_norm": 1.0425325632095337, + "learning_rate": 4.999858557237848e-06, + "loss": 0.7093, + "step": 376 + }, + { + "epoch": 0.027716512277606235, + "grad_norm": 0.8692599534988403, + "learning_rate": 4.999857530443261e-06, + "loss": 0.6799, + "step": 377 + }, + { + "epoch": 0.027790030877812087, + "grad_norm": 0.9205524921417236, + "learning_rate": 4.999856499935271e-06, + "loss": 0.7003, + "step": 378 + }, + { + "epoch": 0.02786354947801794, + "grad_norm": 0.8993099331855774, + "learning_rate": 4.999855465713881e-06, + "loss": 0.6751, + "step": 379 + }, + { + "epoch": 0.02793706807822379, + "grad_norm": 0.929251492023468, + "learning_rate": 4.999854427779093e-06, + "loss": 0.6544, + "step": 380 + }, + { + "epoch": 0.028010586678429643, + "grad_norm": 0.9287191033363342, + "learning_rate": 4.999853386130908e-06, + "loss": 0.702, + "step": 381 + }, + { + "epoch": 0.028084105278635495, + "grad_norm": 0.8940449357032776, + "learning_rate": 4.9998523407693275e-06, + "loss": 0.6728, + "step": 382 + }, + { + "epoch": 0.028157623878841347, + "grad_norm": 0.9249378442764282, + "learning_rate": 4.999851291694354e-06, + "loss": 0.6749, + "step": 383 + }, + { + "epoch": 0.0282311424790472, + "grad_norm": 0.9225497245788574, + "learning_rate": 4.999850238905988e-06, + "loss": 0.6909, + "step": 384 + }, + { + "epoch": 0.02830466107925305, + "grad_norm": 0.9008486866950989, + "learning_rate": 4.99984918240423e-06, + "loss": 0.6921, + "step": 385 + }, + { + "epoch": 0.028378179679458902, + "grad_norm": 0.8985016345977783, + "learning_rate": 4.999848122189085e-06, + "loss": 0.6915, + "step": 386 + }, + { + "epoch": 0.028451698279664754, + "grad_norm": 0.857316255569458, + "learning_rate": 4.999847058260552e-06, + "loss": 0.7165, + "step": 387 + }, + { + "epoch": 0.028525216879870606, + "grad_norm": 0.8692685961723328, + "learning_rate": 4.999845990618632e-06, + "loss": 0.6917, + "step": 388 + }, + { + "epoch": 0.028598735480076458, + "grad_norm": 0.9247520565986633, + "learning_rate": 4.999844919263329e-06, + "loss": 0.6831, + "step": 389 + }, + { + "epoch": 0.02867225408028231, + "grad_norm": 0.8977326154708862, + "learning_rate": 4.9998438441946424e-06, + "loss": 0.6784, + "step": 390 + }, + { + "epoch": 0.028745772680488165, + "grad_norm": 0.8422284722328186, + "learning_rate": 4.9998427654125745e-06, + "loss": 0.6545, + "step": 391 + }, + { + "epoch": 0.028819291280694017, + "grad_norm": 0.8973318934440613, + "learning_rate": 4.9998416829171285e-06, + "loss": 0.6758, + "step": 392 + }, + { + "epoch": 0.02889280988089987, + "grad_norm": 0.9890840649604797, + "learning_rate": 4.999840596708303e-06, + "loss": 0.713, + "step": 393 + }, + { + "epoch": 0.02896632848110572, + "grad_norm": 0.8983599543571472, + "learning_rate": 4.9998395067861016e-06, + "loss": 0.7066, + "step": 394 + }, + { + "epoch": 0.029039847081311573, + "grad_norm": 0.8973932862281799, + "learning_rate": 4.999838413150525e-06, + "loss": 0.6946, + "step": 395 + }, + { + "epoch": 0.029113365681517425, + "grad_norm": 0.8959775567054749, + "learning_rate": 4.999837315801577e-06, + "loss": 0.6829, + "step": 396 + }, + { + "epoch": 0.029186884281723276, + "grad_norm": 0.9069240689277649, + "learning_rate": 4.999836214739256e-06, + "loss": 0.7174, + "step": 397 + }, + { + "epoch": 0.02926040288192913, + "grad_norm": 0.9082890152931213, + "learning_rate": 4.999835109963565e-06, + "loss": 0.6759, + "step": 398 + }, + { + "epoch": 0.02933392148213498, + "grad_norm": 0.9052538871765137, + "learning_rate": 4.999834001474507e-06, + "loss": 0.6512, + "step": 399 + }, + { + "epoch": 0.029407440082340832, + "grad_norm": 0.8763680458068848, + "learning_rate": 4.999832889272082e-06, + "loss": 0.6994, + "step": 400 + }, + { + "epoch": 0.029480958682546684, + "grad_norm": 0.9308213591575623, + "learning_rate": 4.999831773356292e-06, + "loss": 0.667, + "step": 401 + }, + { + "epoch": 0.029554477282752536, + "grad_norm": 0.9311529994010925, + "learning_rate": 4.999830653727139e-06, + "loss": 0.6815, + "step": 402 + }, + { + "epoch": 0.029627995882958388, + "grad_norm": 0.86879563331604, + "learning_rate": 4.999829530384624e-06, + "loss": 0.6719, + "step": 403 + }, + { + "epoch": 0.02970151448316424, + "grad_norm": 0.8749473690986633, + "learning_rate": 4.999828403328749e-06, + "loss": 0.6572, + "step": 404 + }, + { + "epoch": 0.02977503308337009, + "grad_norm": 0.9151558876037598, + "learning_rate": 4.999827272559517e-06, + "loss": 0.6927, + "step": 405 + }, + { + "epoch": 0.029848551683575943, + "grad_norm": 0.8939521908760071, + "learning_rate": 4.999826138076927e-06, + "loss": 0.6579, + "step": 406 + }, + { + "epoch": 0.029922070283781795, + "grad_norm": 0.9026974439620972, + "learning_rate": 4.9998249998809834e-06, + "loss": 0.6759, + "step": 407 + }, + { + "epoch": 0.02999558888398765, + "grad_norm": 0.8809598088264465, + "learning_rate": 4.999823857971686e-06, + "loss": 0.6614, + "step": 408 + }, + { + "epoch": 0.030069107484193502, + "grad_norm": 1.038047432899475, + "learning_rate": 4.999822712349037e-06, + "loss": 0.6795, + "step": 409 + }, + { + "epoch": 0.030142626084399354, + "grad_norm": 0.9165703058242798, + "learning_rate": 4.99982156301304e-06, + "loss": 0.7206, + "step": 410 + }, + { + "epoch": 0.030216144684605206, + "grad_norm": 0.8753448128700256, + "learning_rate": 4.999820409963693e-06, + "loss": 0.6606, + "step": 411 + }, + { + "epoch": 0.030289663284811058, + "grad_norm": 0.9012455344200134, + "learning_rate": 4.9998192532009995e-06, + "loss": 0.6611, + "step": 412 + }, + { + "epoch": 0.03036318188501691, + "grad_norm": 0.9593011736869812, + "learning_rate": 4.999818092724962e-06, + "loss": 0.6896, + "step": 413 + }, + { + "epoch": 0.030436700485222762, + "grad_norm": 0.9049318432807922, + "learning_rate": 4.999816928535583e-06, + "loss": 0.686, + "step": 414 + }, + { + "epoch": 0.030510219085428614, + "grad_norm": 0.9025911688804626, + "learning_rate": 4.99981576063286e-06, + "loss": 0.6753, + "step": 415 + }, + { + "epoch": 0.030583737685634466, + "grad_norm": 0.868049681186676, + "learning_rate": 4.9998145890167995e-06, + "loss": 0.6431, + "step": 416 + }, + { + "epoch": 0.030657256285840317, + "grad_norm": 0.9877146482467651, + "learning_rate": 4.999813413687401e-06, + "loss": 0.6959, + "step": 417 + }, + { + "epoch": 0.03073077488604617, + "grad_norm": 0.9567207098007202, + "learning_rate": 4.999812234644667e-06, + "loss": 0.6768, + "step": 418 + }, + { + "epoch": 0.03080429348625202, + "grad_norm": 0.952367901802063, + "learning_rate": 4.999811051888598e-06, + "loss": 0.6474, + "step": 419 + }, + { + "epoch": 0.030877812086457873, + "grad_norm": 0.8720830082893372, + "learning_rate": 4.999809865419196e-06, + "loss": 0.6839, + "step": 420 + }, + { + "epoch": 0.030951330686663725, + "grad_norm": 0.9080668091773987, + "learning_rate": 4.999808675236465e-06, + "loss": 0.6653, + "step": 421 + }, + { + "epoch": 0.031024849286869577, + "grad_norm": 0.8704681992530823, + "learning_rate": 4.999807481340404e-06, + "loss": 0.6639, + "step": 422 + }, + { + "epoch": 0.03109836788707543, + "grad_norm": 0.9167091250419617, + "learning_rate": 4.999806283731016e-06, + "loss": 0.6576, + "step": 423 + }, + { + "epoch": 0.03117188648728128, + "grad_norm": 0.9165781140327454, + "learning_rate": 4.9998050824083025e-06, + "loss": 0.6433, + "step": 424 + }, + { + "epoch": 0.031245405087487133, + "grad_norm": 0.8376379013061523, + "learning_rate": 4.9998038773722654e-06, + "loss": 0.6514, + "step": 425 + }, + { + "epoch": 0.031318923687692984, + "grad_norm": 0.880710780620575, + "learning_rate": 4.999802668622907e-06, + "loss": 0.6586, + "step": 426 + }, + { + "epoch": 0.031392442287898836, + "grad_norm": 0.9744524359703064, + "learning_rate": 4.999801456160228e-06, + "loss": 0.7038, + "step": 427 + }, + { + "epoch": 0.03146596088810469, + "grad_norm": 0.90839022397995, + "learning_rate": 4.9998002399842325e-06, + "loss": 0.6602, + "step": 428 + }, + { + "epoch": 0.03153947948831054, + "grad_norm": 0.9204235076904297, + "learning_rate": 4.999799020094919e-06, + "loss": 0.6353, + "step": 429 + }, + { + "epoch": 0.03161299808851639, + "grad_norm": 0.8932419419288635, + "learning_rate": 4.999797796492291e-06, + "loss": 0.6419, + "step": 430 + }, + { + "epoch": 0.031686516688722244, + "grad_norm": 0.9409022331237793, + "learning_rate": 4.99979656917635e-06, + "loss": 0.6832, + "step": 431 + }, + { + "epoch": 0.031760035288928096, + "grad_norm": 0.8973593711853027, + "learning_rate": 4.9997953381471e-06, + "loss": 0.6118, + "step": 432 + }, + { + "epoch": 0.03183355388913395, + "grad_norm": 0.8936858177185059, + "learning_rate": 4.99979410340454e-06, + "loss": 0.6819, + "step": 433 + }, + { + "epoch": 0.031907072489339806, + "grad_norm": 0.9579751491546631, + "learning_rate": 4.999792864948671e-06, + "loss": 0.6791, + "step": 434 + }, + { + "epoch": 0.03198059108954566, + "grad_norm": 0.9322174191474915, + "learning_rate": 4.9997916227794984e-06, + "loss": 0.6712, + "step": 435 + }, + { + "epoch": 0.03205410968975151, + "grad_norm": 0.9393752217292786, + "learning_rate": 4.999790376897022e-06, + "loss": 0.6879, + "step": 436 + }, + { + "epoch": 0.03212762828995736, + "grad_norm": 0.9040606617927551, + "learning_rate": 4.999789127301243e-06, + "loss": 0.6649, + "step": 437 + }, + { + "epoch": 0.032201146890163214, + "grad_norm": 0.9439957737922668, + "learning_rate": 4.999787873992165e-06, + "loss": 0.6852, + "step": 438 + }, + { + "epoch": 0.032274665490369066, + "grad_norm": 0.8809225559234619, + "learning_rate": 4.999786616969789e-06, + "loss": 0.6402, + "step": 439 + }, + { + "epoch": 0.03234818409057492, + "grad_norm": 0.8732386827468872, + "learning_rate": 4.999785356234117e-06, + "loss": 0.6637, + "step": 440 + }, + { + "epoch": 0.03242170269078077, + "grad_norm": 0.8830150365829468, + "learning_rate": 4.99978409178515e-06, + "loss": 0.6747, + "step": 441 + }, + { + "epoch": 0.03249522129098662, + "grad_norm": 0.924486517906189, + "learning_rate": 4.999782823622891e-06, + "loss": 0.6864, + "step": 442 + }, + { + "epoch": 0.03256873989119247, + "grad_norm": 0.9726548790931702, + "learning_rate": 4.999781551747341e-06, + "loss": 0.6731, + "step": 443 + }, + { + "epoch": 0.032642258491398325, + "grad_norm": 0.9313679933547974, + "learning_rate": 4.999780276158504e-06, + "loss": 0.6372, + "step": 444 + }, + { + "epoch": 0.03271577709160418, + "grad_norm": 0.9105954766273499, + "learning_rate": 4.999778996856378e-06, + "loss": 0.6709, + "step": 445 + }, + { + "epoch": 0.03278929569181003, + "grad_norm": 0.8583987951278687, + "learning_rate": 4.999777713840969e-06, + "loss": 0.7205, + "step": 446 + }, + { + "epoch": 0.03286281429201588, + "grad_norm": 0.8993404507637024, + "learning_rate": 4.999776427112276e-06, + "loss": 0.6373, + "step": 447 + }, + { + "epoch": 0.03293633289222173, + "grad_norm": 0.863158643245697, + "learning_rate": 4.999775136670303e-06, + "loss": 0.6767, + "step": 448 + }, + { + "epoch": 0.033009851492427585, + "grad_norm": 0.9031382203102112, + "learning_rate": 4.99977384251505e-06, + "loss": 0.6654, + "step": 449 + }, + { + "epoch": 0.033083370092633436, + "grad_norm": 0.9319318532943726, + "learning_rate": 4.99977254464652e-06, + "loss": 0.6814, + "step": 450 + }, + { + "epoch": 0.03315688869283929, + "grad_norm": 0.9106870293617249, + "learning_rate": 4.999771243064715e-06, + "loss": 0.6766, + "step": 451 + }, + { + "epoch": 0.03323040729304514, + "grad_norm": 0.8962377905845642, + "learning_rate": 4.999769937769637e-06, + "loss": 0.71, + "step": 452 + }, + { + "epoch": 0.03330392589325099, + "grad_norm": 0.9740825891494751, + "learning_rate": 4.999768628761287e-06, + "loss": 0.7001, + "step": 453 + }, + { + "epoch": 0.033377444493456844, + "grad_norm": 0.9701704978942871, + "learning_rate": 4.999767316039668e-06, + "loss": 0.6818, + "step": 454 + }, + { + "epoch": 0.033450963093662696, + "grad_norm": 0.9088112711906433, + "learning_rate": 4.9997659996047815e-06, + "loss": 0.6656, + "step": 455 + }, + { + "epoch": 0.03352448169386855, + "grad_norm": 0.9223311543464661, + "learning_rate": 4.99976467945663e-06, + "loss": 0.6707, + "step": 456 + }, + { + "epoch": 0.0335980002940744, + "grad_norm": 0.8492006063461304, + "learning_rate": 4.999763355595214e-06, + "loss": 0.7089, + "step": 457 + }, + { + "epoch": 0.03367151889428025, + "grad_norm": 0.8770394921302795, + "learning_rate": 4.999762028020537e-06, + "loss": 0.6716, + "step": 458 + }, + { + "epoch": 0.0337450374944861, + "grad_norm": 0.9929648041725159, + "learning_rate": 4.999760696732599e-06, + "loss": 0.7144, + "step": 459 + }, + { + "epoch": 0.033818556094691955, + "grad_norm": 0.9752563834190369, + "learning_rate": 4.999759361731406e-06, + "loss": 0.6924, + "step": 460 + }, + { + "epoch": 0.03389207469489781, + "grad_norm": 0.959294855594635, + "learning_rate": 4.999758023016955e-06, + "loss": 0.7378, + "step": 461 + }, + { + "epoch": 0.03396559329510366, + "grad_norm": 1.0037825107574463, + "learning_rate": 4.9997566805892516e-06, + "loss": 0.7291, + "step": 462 + }, + { + "epoch": 0.03403911189530951, + "grad_norm": 0.9078593254089355, + "learning_rate": 4.999755334448296e-06, + "loss": 0.6254, + "step": 463 + }, + { + "epoch": 0.03411263049551536, + "grad_norm": 0.981401801109314, + "learning_rate": 4.999753984594091e-06, + "loss": 0.6968, + "step": 464 + }, + { + "epoch": 0.034186149095721215, + "grad_norm": 0.866899311542511, + "learning_rate": 4.999752631026638e-06, + "loss": 0.65, + "step": 465 + }, + { + "epoch": 0.03425966769592707, + "grad_norm": 0.8927156329154968, + "learning_rate": 4.99975127374594e-06, + "loss": 0.6923, + "step": 466 + }, + { + "epoch": 0.03433318629613292, + "grad_norm": 0.969243586063385, + "learning_rate": 4.999749912751998e-06, + "loss": 0.6813, + "step": 467 + }, + { + "epoch": 0.03440670489633877, + "grad_norm": 0.9190050363540649, + "learning_rate": 4.999748548044814e-06, + "loss": 0.6706, + "step": 468 + }, + { + "epoch": 0.03448022349654463, + "grad_norm": 0.9287043809890747, + "learning_rate": 4.999747179624391e-06, + "loss": 0.694, + "step": 469 + }, + { + "epoch": 0.03455374209675048, + "grad_norm": 0.8335980772972107, + "learning_rate": 4.99974580749073e-06, + "loss": 0.6232, + "step": 470 + }, + { + "epoch": 0.03462726069695633, + "grad_norm": 0.8925533890724182, + "learning_rate": 4.999744431643834e-06, + "loss": 0.6747, + "step": 471 + }, + { + "epoch": 0.034700779297162185, + "grad_norm": 0.8895329236984253, + "learning_rate": 4.999743052083704e-06, + "loss": 0.6419, + "step": 472 + }, + { + "epoch": 0.03477429789736804, + "grad_norm": 0.8715165853500366, + "learning_rate": 4.9997416688103426e-06, + "loss": 0.678, + "step": 473 + }, + { + "epoch": 0.03484781649757389, + "grad_norm": 0.9329585433006287, + "learning_rate": 4.999740281823752e-06, + "loss": 0.6376, + "step": 474 + }, + { + "epoch": 0.03492133509777974, + "grad_norm": 0.8514848947525024, + "learning_rate": 4.999738891123934e-06, + "loss": 0.689, + "step": 475 + }, + { + "epoch": 0.03499485369798559, + "grad_norm": 0.8834176063537598, + "learning_rate": 4.999737496710891e-06, + "loss": 0.6554, + "step": 476 + }, + { + "epoch": 0.035068372298191444, + "grad_norm": 0.898615300655365, + "learning_rate": 4.999736098584625e-06, + "loss": 0.6685, + "step": 477 + }, + { + "epoch": 0.035141890898397296, + "grad_norm": 0.943239688873291, + "learning_rate": 4.999734696745138e-06, + "loss": 0.6658, + "step": 478 + }, + { + "epoch": 0.03521540949860315, + "grad_norm": 0.88676917552948, + "learning_rate": 4.999733291192431e-06, + "loss": 0.6685, + "step": 479 + }, + { + "epoch": 0.035288928098809, + "grad_norm": 0.9091856479644775, + "learning_rate": 4.999731881926508e-06, + "loss": 0.6729, + "step": 480 + }, + { + "epoch": 0.03536244669901485, + "grad_norm": 0.9539706707000732, + "learning_rate": 4.99973046894737e-06, + "loss": 0.6676, + "step": 481 + }, + { + "epoch": 0.035435965299220704, + "grad_norm": 0.9328814148902893, + "learning_rate": 4.999729052255019e-06, + "loss": 0.6986, + "step": 482 + }, + { + "epoch": 0.035509483899426555, + "grad_norm": 0.937873125076294, + "learning_rate": 4.999727631849458e-06, + "loss": 0.6851, + "step": 483 + }, + { + "epoch": 0.03558300249963241, + "grad_norm": 0.9150975942611694, + "learning_rate": 4.9997262077306875e-06, + "loss": 0.6786, + "step": 484 + }, + { + "epoch": 0.03565652109983826, + "grad_norm": 0.9646447896957397, + "learning_rate": 4.999724779898711e-06, + "loss": 0.6503, + "step": 485 + }, + { + "epoch": 0.03573003970004411, + "grad_norm": 0.9805817604064941, + "learning_rate": 4.9997233483535305e-06, + "loss": 0.6951, + "step": 486 + }, + { + "epoch": 0.03580355830024996, + "grad_norm": 0.9047462344169617, + "learning_rate": 4.999721913095147e-06, + "loss": 0.668, + "step": 487 + }, + { + "epoch": 0.035877076900455815, + "grad_norm": 0.9185648560523987, + "learning_rate": 4.999720474123565e-06, + "loss": 0.6547, + "step": 488 + }, + { + "epoch": 0.03595059550066167, + "grad_norm": 0.9536022543907166, + "learning_rate": 4.999719031438784e-06, + "loss": 0.6598, + "step": 489 + }, + { + "epoch": 0.03602411410086752, + "grad_norm": 0.9068762063980103, + "learning_rate": 4.999717585040808e-06, + "loss": 0.656, + "step": 490 + }, + { + "epoch": 0.03609763270107337, + "grad_norm": 0.9148703813552856, + "learning_rate": 4.999716134929637e-06, + "loss": 0.6712, + "step": 491 + }, + { + "epoch": 0.03617115130127922, + "grad_norm": 0.8662840127944946, + "learning_rate": 4.999714681105275e-06, + "loss": 0.6327, + "step": 492 + }, + { + "epoch": 0.036244669901485074, + "grad_norm": 0.9038741588592529, + "learning_rate": 4.999713223567725e-06, + "loss": 0.6729, + "step": 493 + }, + { + "epoch": 0.036318188501690926, + "grad_norm": 0.9072089195251465, + "learning_rate": 4.9997117623169875e-06, + "loss": 0.6828, + "step": 494 + }, + { + "epoch": 0.03639170710189678, + "grad_norm": 0.8825056552886963, + "learning_rate": 4.999710297353064e-06, + "loss": 0.6091, + "step": 495 + }, + { + "epoch": 0.03646522570210263, + "grad_norm": 0.8527052402496338, + "learning_rate": 4.9997088286759586e-06, + "loss": 0.6986, + "step": 496 + }, + { + "epoch": 0.03653874430230848, + "grad_norm": 0.9165689945220947, + "learning_rate": 4.999707356285673e-06, + "loss": 0.678, + "step": 497 + }, + { + "epoch": 0.036612262902514334, + "grad_norm": 1.0216366052627563, + "learning_rate": 4.999705880182208e-06, + "loss": 0.6974, + "step": 498 + }, + { + "epoch": 0.036685781502720186, + "grad_norm": 0.8511521220207214, + "learning_rate": 4.999704400365567e-06, + "loss": 0.6499, + "step": 499 + }, + { + "epoch": 0.03675930010292604, + "grad_norm": 0.9159859418869019, + "learning_rate": 4.999702916835753e-06, + "loss": 0.652, + "step": 500 + }, + { + "epoch": 0.03683281870313189, + "grad_norm": 0.8968804478645325, + "learning_rate": 4.999701429592766e-06, + "loss": 0.6833, + "step": 501 + }, + { + "epoch": 0.03690633730333774, + "grad_norm": 0.9367195963859558, + "learning_rate": 4.99969993863661e-06, + "loss": 0.7232, + "step": 502 + }, + { + "epoch": 0.03697985590354359, + "grad_norm": 0.9010957479476929, + "learning_rate": 4.9996984439672855e-06, + "loss": 0.679, + "step": 503 + }, + { + "epoch": 0.03705337450374945, + "grad_norm": 0.8776184916496277, + "learning_rate": 4.9996969455847975e-06, + "loss": 0.6669, + "step": 504 + }, + { + "epoch": 0.037126893103955304, + "grad_norm": 0.873222291469574, + "learning_rate": 4.999695443489146e-06, + "loss": 0.6104, + "step": 505 + }, + { + "epoch": 0.037200411704161156, + "grad_norm": 0.9053163528442383, + "learning_rate": 4.9996939376803336e-06, + "loss": 0.6742, + "step": 506 + }, + { + "epoch": 0.03727393030436701, + "grad_norm": 0.906057596206665, + "learning_rate": 4.999692428158363e-06, + "loss": 0.6702, + "step": 507 + }, + { + "epoch": 0.03734744890457286, + "grad_norm": 1.0221502780914307, + "learning_rate": 4.999690914923235e-06, + "loss": 0.7044, + "step": 508 + }, + { + "epoch": 0.03742096750477871, + "grad_norm": 0.8522925972938538, + "learning_rate": 4.999689397974954e-06, + "loss": 0.6657, + "step": 509 + }, + { + "epoch": 0.03749448610498456, + "grad_norm": 0.9655179381370544, + "learning_rate": 4.999687877313521e-06, + "loss": 0.6801, + "step": 510 + }, + { + "epoch": 0.037568004705190415, + "grad_norm": 0.8858939409255981, + "learning_rate": 4.99968635293894e-06, + "loss": 0.6472, + "step": 511 + }, + { + "epoch": 0.03764152330539627, + "grad_norm": 0.9397798180580139, + "learning_rate": 4.9996848248512094e-06, + "loss": 0.6458, + "step": 512 + }, + { + "epoch": 0.03771504190560212, + "grad_norm": 0.8884574770927429, + "learning_rate": 4.999683293050336e-06, + "loss": 0.648, + "step": 513 + }, + { + "epoch": 0.03778856050580797, + "grad_norm": 0.9605230093002319, + "learning_rate": 4.999681757536319e-06, + "loss": 0.6788, + "step": 514 + }, + { + "epoch": 0.03786207910601382, + "grad_norm": 0.8670795559883118, + "learning_rate": 4.999680218309161e-06, + "loss": 0.6582, + "step": 515 + }, + { + "epoch": 0.037935597706219674, + "grad_norm": 0.9397976398468018, + "learning_rate": 4.9996786753688655e-06, + "loss": 0.6672, + "step": 516 + }, + { + "epoch": 0.038009116306425526, + "grad_norm": 0.9629880785942078, + "learning_rate": 4.999677128715434e-06, + "loss": 0.6341, + "step": 517 + }, + { + "epoch": 0.03808263490663138, + "grad_norm": 0.9028946757316589, + "learning_rate": 4.99967557834887e-06, + "loss": 0.6566, + "step": 518 + }, + { + "epoch": 0.03815615350683723, + "grad_norm": 0.8551868796348572, + "learning_rate": 4.999674024269174e-06, + "loss": 0.6687, + "step": 519 + }, + { + "epoch": 0.03822967210704308, + "grad_norm": 0.9720919728279114, + "learning_rate": 4.999672466476349e-06, + "loss": 0.6827, + "step": 520 + }, + { + "epoch": 0.038303190707248934, + "grad_norm": 0.9186602234840393, + "learning_rate": 4.9996709049703975e-06, + "loss": 0.7037, + "step": 521 + }, + { + "epoch": 0.038376709307454786, + "grad_norm": 0.9329096078872681, + "learning_rate": 4.999669339751322e-06, + "loss": 0.651, + "step": 522 + }, + { + "epoch": 0.03845022790766064, + "grad_norm": 0.9190548658370972, + "learning_rate": 4.999667770819124e-06, + "loss": 0.6506, + "step": 523 + }, + { + "epoch": 0.03852374650786649, + "grad_norm": 0.9257223606109619, + "learning_rate": 4.999666198173807e-06, + "loss": 0.684, + "step": 524 + }, + { + "epoch": 0.03859726510807234, + "grad_norm": 0.8933098316192627, + "learning_rate": 4.999664621815373e-06, + "loss": 0.6582, + "step": 525 + }, + { + "epoch": 0.03867078370827819, + "grad_norm": 0.9269927740097046, + "learning_rate": 4.9996630417438235e-06, + "loss": 0.6606, + "step": 526 + }, + { + "epoch": 0.038744302308484045, + "grad_norm": 0.9555436372756958, + "learning_rate": 4.9996614579591615e-06, + "loss": 0.6616, + "step": 527 + }, + { + "epoch": 0.0388178209086899, + "grad_norm": 0.9753631353378296, + "learning_rate": 4.99965987046139e-06, + "loss": 0.6883, + "step": 528 + }, + { + "epoch": 0.03889133950889575, + "grad_norm": 0.9344114065170288, + "learning_rate": 4.99965827925051e-06, + "loss": 0.6681, + "step": 529 + }, + { + "epoch": 0.0389648581091016, + "grad_norm": 0.9137898683547974, + "learning_rate": 4.999656684326523e-06, + "loss": 0.6991, + "step": 530 + }, + { + "epoch": 0.03903837670930745, + "grad_norm": 0.9958014488220215, + "learning_rate": 4.999655085689436e-06, + "loss": 0.7248, + "step": 531 + }, + { + "epoch": 0.039111895309513305, + "grad_norm": 0.8773913979530334, + "learning_rate": 4.9996534833392465e-06, + "loss": 0.6492, + "step": 532 + }, + { + "epoch": 0.039185413909719156, + "grad_norm": 0.9496752619743347, + "learning_rate": 4.999651877275959e-06, + "loss": 0.6541, + "step": 533 + }, + { + "epoch": 0.03925893250992501, + "grad_norm": 0.8336145281791687, + "learning_rate": 4.999650267499575e-06, + "loss": 0.6433, + "step": 534 + }, + { + "epoch": 0.03933245111013086, + "grad_norm": 0.891284704208374, + "learning_rate": 4.9996486540100975e-06, + "loss": 0.629, + "step": 535 + }, + { + "epoch": 0.03940596971033671, + "grad_norm": 0.8943108916282654, + "learning_rate": 4.999647036807529e-06, + "loss": 0.632, + "step": 536 + }, + { + "epoch": 0.039479488310542564, + "grad_norm": 0.9396488070487976, + "learning_rate": 4.999645415891872e-06, + "loss": 0.665, + "step": 537 + }, + { + "epoch": 0.03955300691074842, + "grad_norm": 0.9420066475868225, + "learning_rate": 4.999643791263128e-06, + "loss": 0.6606, + "step": 538 + }, + { + "epoch": 0.039626525510954275, + "grad_norm": 0.8789041638374329, + "learning_rate": 4.9996421629213e-06, + "loss": 0.661, + "step": 539 + }, + { + "epoch": 0.03970004411116013, + "grad_norm": 0.9336462616920471, + "learning_rate": 4.999640530866391e-06, + "loss": 0.659, + "step": 540 + }, + { + "epoch": 0.03977356271136598, + "grad_norm": 0.9203698039054871, + "learning_rate": 4.999638895098402e-06, + "loss": 0.6576, + "step": 541 + }, + { + "epoch": 0.03984708131157183, + "grad_norm": 0.8767902255058289, + "learning_rate": 4.999637255617338e-06, + "loss": 0.6311, + "step": 542 + }, + { + "epoch": 0.03992059991177768, + "grad_norm": 0.9017326831817627, + "learning_rate": 4.999635612423198e-06, + "loss": 0.6824, + "step": 543 + }, + { + "epoch": 0.039994118511983534, + "grad_norm": 0.8707701563835144, + "learning_rate": 4.999633965515987e-06, + "loss": 0.6393, + "step": 544 + }, + { + "epoch": 0.040067637112189386, + "grad_norm": 1.0128626823425293, + "learning_rate": 4.999632314895706e-06, + "loss": 0.6827, + "step": 545 + }, + { + "epoch": 0.04014115571239524, + "grad_norm": 0.8796296715736389, + "learning_rate": 4.999630660562359e-06, + "loss": 0.6361, + "step": 546 + }, + { + "epoch": 0.04021467431260109, + "grad_norm": 1.008248209953308, + "learning_rate": 4.999629002515947e-06, + "loss": 0.7063, + "step": 547 + }, + { + "epoch": 0.04028819291280694, + "grad_norm": 0.939013659954071, + "learning_rate": 4.999627340756473e-06, + "loss": 0.6601, + "step": 548 + }, + { + "epoch": 0.040361711513012793, + "grad_norm": 0.9182678461074829, + "learning_rate": 4.9996256752839394e-06, + "loss": 0.6606, + "step": 549 + }, + { + "epoch": 0.040435230113218645, + "grad_norm": 0.8877587914466858, + "learning_rate": 4.999624006098349e-06, + "loss": 0.6574, + "step": 550 + }, + { + "epoch": 0.0405087487134245, + "grad_norm": 0.8423686623573303, + "learning_rate": 4.999622333199704e-06, + "loss": 0.6437, + "step": 551 + }, + { + "epoch": 0.04058226731363035, + "grad_norm": 0.9165937304496765, + "learning_rate": 4.999620656588007e-06, + "loss": 0.6529, + "step": 552 + }, + { + "epoch": 0.0406557859138362, + "grad_norm": 0.9045810699462891, + "learning_rate": 4.999618976263259e-06, + "loss": 0.6367, + "step": 553 + }, + { + "epoch": 0.04072930451404205, + "grad_norm": 0.8798891305923462, + "learning_rate": 4.999617292225466e-06, + "loss": 0.6221, + "step": 554 + }, + { + "epoch": 0.040802823114247905, + "grad_norm": 0.9188511371612549, + "learning_rate": 4.999615604474627e-06, + "loss": 0.6456, + "step": 555 + }, + { + "epoch": 0.04087634171445376, + "grad_norm": 0.9586350917816162, + "learning_rate": 4.999613913010747e-06, + "loss": 0.6629, + "step": 556 + }, + { + "epoch": 0.04094986031465961, + "grad_norm": 0.9274671673774719, + "learning_rate": 4.999612217833826e-06, + "loss": 0.6633, + "step": 557 + }, + { + "epoch": 0.04102337891486546, + "grad_norm": 0.9011229276657104, + "learning_rate": 4.999610518943869e-06, + "loss": 0.6732, + "step": 558 + }, + { + "epoch": 0.04109689751507131, + "grad_norm": 0.9440698623657227, + "learning_rate": 4.9996088163408776e-06, + "loss": 0.6902, + "step": 559 + }, + { + "epoch": 0.041170416115277164, + "grad_norm": 0.9058323502540588, + "learning_rate": 4.999607110024853e-06, + "loss": 0.6615, + "step": 560 + }, + { + "epoch": 0.041243934715483016, + "grad_norm": 0.9217320680618286, + "learning_rate": 4.999605399995799e-06, + "loss": 0.6401, + "step": 561 + }, + { + "epoch": 0.04131745331568887, + "grad_norm": 0.8662657141685486, + "learning_rate": 4.999603686253719e-06, + "loss": 0.6318, + "step": 562 + }, + { + "epoch": 0.04139097191589472, + "grad_norm": 0.9080684185028076, + "learning_rate": 4.999601968798614e-06, + "loss": 0.6664, + "step": 563 + }, + { + "epoch": 0.04146449051610057, + "grad_norm": 0.9344659447669983, + "learning_rate": 4.999600247630488e-06, + "loss": 0.6772, + "step": 564 + }, + { + "epoch": 0.041538009116306424, + "grad_norm": 0.9127987027168274, + "learning_rate": 4.999598522749342e-06, + "loss": 0.6506, + "step": 565 + }, + { + "epoch": 0.041611527716512275, + "grad_norm": 0.9435017704963684, + "learning_rate": 4.999596794155179e-06, + "loss": 0.6273, + "step": 566 + }, + { + "epoch": 0.04168504631671813, + "grad_norm": 0.8922345042228699, + "learning_rate": 4.999595061848003e-06, + "loss": 0.6877, + "step": 567 + }, + { + "epoch": 0.04175856491692398, + "grad_norm": 0.9843034744262695, + "learning_rate": 4.999593325827814e-06, + "loss": 0.6308, + "step": 568 + }, + { + "epoch": 0.04183208351712983, + "grad_norm": 0.849409282207489, + "learning_rate": 4.999591586094616e-06, + "loss": 0.6778, + "step": 569 + }, + { + "epoch": 0.04190560211733568, + "grad_norm": 0.9373292326927185, + "learning_rate": 4.999589842648413e-06, + "loss": 0.659, + "step": 570 + }, + { + "epoch": 0.041979120717541535, + "grad_norm": 0.9580425024032593, + "learning_rate": 4.999588095489205e-06, + "loss": 0.674, + "step": 571 + }, + { + "epoch": 0.04205263931774739, + "grad_norm": 0.8717241883277893, + "learning_rate": 4.9995863446169966e-06, + "loss": 0.6473, + "step": 572 + }, + { + "epoch": 0.042126157917953246, + "grad_norm": 0.9126169085502625, + "learning_rate": 4.999584590031789e-06, + "loss": 0.6266, + "step": 573 + }, + { + "epoch": 0.0421996765181591, + "grad_norm": 0.8891578912734985, + "learning_rate": 4.999582831733585e-06, + "loss": 0.608, + "step": 574 + }, + { + "epoch": 0.04227319511836495, + "grad_norm": 0.8806067705154419, + "learning_rate": 4.999581069722388e-06, + "loss": 0.6566, + "step": 575 + }, + { + "epoch": 0.0423467137185708, + "grad_norm": 0.9088340401649475, + "learning_rate": 4.999579303998201e-06, + "loss": 0.682, + "step": 576 + }, + { + "epoch": 0.04242023231877665, + "grad_norm": 0.9500383734703064, + "learning_rate": 4.999577534561024e-06, + "loss": 0.6395, + "step": 577 + }, + { + "epoch": 0.042493750918982505, + "grad_norm": 0.8838303089141846, + "learning_rate": 4.9995757614108635e-06, + "loss": 0.6822, + "step": 578 + }, + { + "epoch": 0.04256726951918836, + "grad_norm": 0.8895675539970398, + "learning_rate": 4.999573984547719e-06, + "loss": 0.6228, + "step": 579 + }, + { + "epoch": 0.04264078811939421, + "grad_norm": 0.9103283286094666, + "learning_rate": 4.999572203971594e-06, + "loss": 0.6912, + "step": 580 + }, + { + "epoch": 0.04271430671960006, + "grad_norm": 0.8796473145484924, + "learning_rate": 4.999570419682492e-06, + "loss": 0.6417, + "step": 581 + }, + { + "epoch": 0.04278782531980591, + "grad_norm": 0.9177064299583435, + "learning_rate": 4.9995686316804146e-06, + "loss": 0.6393, + "step": 582 + }, + { + "epoch": 0.042861343920011764, + "grad_norm": 0.9348646998405457, + "learning_rate": 4.999566839965364e-06, + "loss": 0.6818, + "step": 583 + }, + { + "epoch": 0.042934862520217616, + "grad_norm": 0.8784725666046143, + "learning_rate": 4.999565044537345e-06, + "loss": 0.65, + "step": 584 + }, + { + "epoch": 0.04300838112042347, + "grad_norm": 0.935387134552002, + "learning_rate": 4.999563245396359e-06, + "loss": 0.6753, + "step": 585 + }, + { + "epoch": 0.04308189972062932, + "grad_norm": 0.9324643611907959, + "learning_rate": 4.999561442542408e-06, + "loss": 0.6836, + "step": 586 + }, + { + "epoch": 0.04315541832083517, + "grad_norm": 0.925586998462677, + "learning_rate": 4.999559635975496e-06, + "loss": 0.6752, + "step": 587 + }, + { + "epoch": 0.043228936921041024, + "grad_norm": 0.9136750102043152, + "learning_rate": 4.999557825695624e-06, + "loss": 0.6746, + "step": 588 + }, + { + "epoch": 0.043302455521246876, + "grad_norm": 0.9462132453918457, + "learning_rate": 4.999556011702797e-06, + "loss": 0.6716, + "step": 589 + }, + { + "epoch": 0.04337597412145273, + "grad_norm": 0.9141427278518677, + "learning_rate": 4.999554193997016e-06, + "loss": 0.6377, + "step": 590 + }, + { + "epoch": 0.04344949272165858, + "grad_norm": 0.8793272376060486, + "learning_rate": 4.999552372578284e-06, + "loss": 0.6294, + "step": 591 + }, + { + "epoch": 0.04352301132186443, + "grad_norm": 0.9201319217681885, + "learning_rate": 4.999550547446603e-06, + "loss": 0.6744, + "step": 592 + }, + { + "epoch": 0.04359652992207028, + "grad_norm": 0.8413757681846619, + "learning_rate": 4.999548718601978e-06, + "loss": 0.6333, + "step": 593 + }, + { + "epoch": 0.043670048522276135, + "grad_norm": 0.9166660904884338, + "learning_rate": 4.999546886044409e-06, + "loss": 0.679, + "step": 594 + }, + { + "epoch": 0.04374356712248199, + "grad_norm": 0.9515949487686157, + "learning_rate": 4.9995450497739005e-06, + "loss": 0.6926, + "step": 595 + }, + { + "epoch": 0.04381708572268784, + "grad_norm": 0.9126117825508118, + "learning_rate": 4.999543209790455e-06, + "loss": 0.6691, + "step": 596 + }, + { + "epoch": 0.04389060432289369, + "grad_norm": 0.893225371837616, + "learning_rate": 4.999541366094075e-06, + "loss": 0.5949, + "step": 597 + }, + { + "epoch": 0.04396412292309954, + "grad_norm": 0.8902578353881836, + "learning_rate": 4.999539518684763e-06, + "loss": 0.6652, + "step": 598 + }, + { + "epoch": 0.044037641523305394, + "grad_norm": 0.9035239219665527, + "learning_rate": 4.999537667562521e-06, + "loss": 0.6707, + "step": 599 + }, + { + "epoch": 0.044111160123511246, + "grad_norm": 0.8614745736122131, + "learning_rate": 4.999535812727354e-06, + "loss": 0.6282, + "step": 600 + }, + { + "epoch": 0.0441846787237171, + "grad_norm": 0.9114537835121155, + "learning_rate": 4.999533954179263e-06, + "loss": 0.6945, + "step": 601 + }, + { + "epoch": 0.04425819732392295, + "grad_norm": 0.8684289455413818, + "learning_rate": 4.99953209191825e-06, + "loss": 0.6509, + "step": 602 + }, + { + "epoch": 0.0443317159241288, + "grad_norm": 0.9340181946754456, + "learning_rate": 4.99953022594432e-06, + "loss": 0.676, + "step": 603 + }, + { + "epoch": 0.044405234524334654, + "grad_norm": 0.8870452046394348, + "learning_rate": 4.999528356257474e-06, + "loss": 0.6628, + "step": 604 + }, + { + "epoch": 0.044478753124540506, + "grad_norm": 0.89988112449646, + "learning_rate": 4.999526482857717e-06, + "loss": 0.6314, + "step": 605 + }, + { + "epoch": 0.04455227172474636, + "grad_norm": 0.9426406621932983, + "learning_rate": 4.99952460574505e-06, + "loss": 0.6127, + "step": 606 + }, + { + "epoch": 0.04462579032495221, + "grad_norm": 0.8722384572029114, + "learning_rate": 4.9995227249194754e-06, + "loss": 0.6434, + "step": 607 + }, + { + "epoch": 0.04469930892515807, + "grad_norm": 0.8896253108978271, + "learning_rate": 4.999520840380997e-06, + "loss": 0.6351, + "step": 608 + }, + { + "epoch": 0.04477282752536392, + "grad_norm": 0.948898196220398, + "learning_rate": 4.9995189521296175e-06, + "loss": 0.6656, + "step": 609 + }, + { + "epoch": 0.04484634612556977, + "grad_norm": 0.8855255842208862, + "learning_rate": 4.999517060165339e-06, + "loss": 0.6552, + "step": 610 + }, + { + "epoch": 0.044919864725775624, + "grad_norm": 0.9095856547355652, + "learning_rate": 4.999515164488164e-06, + "loss": 0.6519, + "step": 611 + }, + { + "epoch": 0.044993383325981476, + "grad_norm": 0.8779225945472717, + "learning_rate": 4.999513265098097e-06, + "loss": 0.6438, + "step": 612 + }, + { + "epoch": 0.04506690192618733, + "grad_norm": 0.8958042860031128, + "learning_rate": 4.999511361995139e-06, + "loss": 0.6533, + "step": 613 + }, + { + "epoch": 0.04514042052639318, + "grad_norm": 0.8687169551849365, + "learning_rate": 4.999509455179296e-06, + "loss": 0.6376, + "step": 614 + }, + { + "epoch": 0.04521393912659903, + "grad_norm": 0.9126976132392883, + "learning_rate": 4.999507544650567e-06, + "loss": 0.6627, + "step": 615 + }, + { + "epoch": 0.04528745772680488, + "grad_norm": 0.8966965675354004, + "learning_rate": 4.9995056304089564e-06, + "loss": 0.6166, + "step": 616 + }, + { + "epoch": 0.045360976327010735, + "grad_norm": 0.8688904643058777, + "learning_rate": 4.999503712454466e-06, + "loss": 0.6414, + "step": 617 + }, + { + "epoch": 0.04543449492721659, + "grad_norm": 0.940758466720581, + "learning_rate": 4.999501790787102e-06, + "loss": 0.6746, + "step": 618 + }, + { + "epoch": 0.04550801352742244, + "grad_norm": 0.9047825932502747, + "learning_rate": 4.999499865406864e-06, + "loss": 0.6323, + "step": 619 + }, + { + "epoch": 0.04558153212762829, + "grad_norm": 0.9411000609397888, + "learning_rate": 4.999497936313755e-06, + "loss": 0.6928, + "step": 620 + }, + { + "epoch": 0.04565505072783414, + "grad_norm": 0.9102792143821716, + "learning_rate": 4.99949600350778e-06, + "loss": 0.6626, + "step": 621 + }, + { + "epoch": 0.045728569328039995, + "grad_norm": 0.9048720598220825, + "learning_rate": 4.999494066988939e-06, + "loss": 0.6345, + "step": 622 + }, + { + "epoch": 0.045802087928245847, + "grad_norm": 0.9443221688270569, + "learning_rate": 4.999492126757238e-06, + "loss": 0.6652, + "step": 623 + }, + { + "epoch": 0.0458756065284517, + "grad_norm": 0.8984074592590332, + "learning_rate": 4.999490182812678e-06, + "loss": 0.6233, + "step": 624 + }, + { + "epoch": 0.04594912512865755, + "grad_norm": 0.8702187538146973, + "learning_rate": 4.999488235155261e-06, + "loss": 0.6221, + "step": 625 + }, + { + "epoch": 0.0460226437288634, + "grad_norm": 0.9430752396583557, + "learning_rate": 4.9994862837849925e-06, + "loss": 0.661, + "step": 626 + }, + { + "epoch": 0.046096162329069254, + "grad_norm": 0.9090223908424377, + "learning_rate": 4.999484328701874e-06, + "loss": 0.6659, + "step": 627 + }, + { + "epoch": 0.046169680929275106, + "grad_norm": 0.8619539141654968, + "learning_rate": 4.999482369905907e-06, + "loss": 0.6393, + "step": 628 + }, + { + "epoch": 0.04624319952948096, + "grad_norm": 0.9567862749099731, + "learning_rate": 4.9994804073970974e-06, + "loss": 0.6492, + "step": 629 + }, + { + "epoch": 0.04631671812968681, + "grad_norm": 0.8717013001441956, + "learning_rate": 4.999478441175445e-06, + "loss": 0.6872, + "step": 630 + }, + { + "epoch": 0.04639023672989266, + "grad_norm": 0.9066158533096313, + "learning_rate": 4.999476471240955e-06, + "loss": 0.6584, + "step": 631 + }, + { + "epoch": 0.04646375533009851, + "grad_norm": 0.9198997616767883, + "learning_rate": 4.99947449759363e-06, + "loss": 0.6506, + "step": 632 + }, + { + "epoch": 0.046537273930304365, + "grad_norm": 0.9087890982627869, + "learning_rate": 4.9994725202334715e-06, + "loss": 0.6722, + "step": 633 + }, + { + "epoch": 0.04661079253051022, + "grad_norm": 0.8870009183883667, + "learning_rate": 4.999470539160485e-06, + "loss": 0.6701, + "step": 634 + }, + { + "epoch": 0.04668431113071607, + "grad_norm": 0.865755558013916, + "learning_rate": 4.99946855437467e-06, + "loss": 0.5852, + "step": 635 + }, + { + "epoch": 0.04675782973092192, + "grad_norm": 0.8527227640151978, + "learning_rate": 4.999466565876032e-06, + "loss": 0.6565, + "step": 636 + }, + { + "epoch": 0.04683134833112777, + "grad_norm": 0.9668926000595093, + "learning_rate": 4.999464573664574e-06, + "loss": 0.6647, + "step": 637 + }, + { + "epoch": 0.046904866931333625, + "grad_norm": 0.9324785470962524, + "learning_rate": 4.999462577740298e-06, + "loss": 0.6581, + "step": 638 + }, + { + "epoch": 0.04697838553153948, + "grad_norm": 0.9127007126808167, + "learning_rate": 4.999460578103207e-06, + "loss": 0.6784, + "step": 639 + }, + { + "epoch": 0.04705190413174533, + "grad_norm": 0.8700149655342102, + "learning_rate": 4.9994585747533045e-06, + "loss": 0.6702, + "step": 640 + }, + { + "epoch": 0.04712542273195118, + "grad_norm": 0.916636049747467, + "learning_rate": 4.9994565676905926e-06, + "loss": 0.6178, + "step": 641 + }, + { + "epoch": 0.04719894133215704, + "grad_norm": 0.9016149044036865, + "learning_rate": 4.999454556915075e-06, + "loss": 0.6435, + "step": 642 + }, + { + "epoch": 0.04727245993236289, + "grad_norm": 0.8774076700210571, + "learning_rate": 4.999452542426754e-06, + "loss": 0.6624, + "step": 643 + }, + { + "epoch": 0.04734597853256874, + "grad_norm": 0.8920407891273499, + "learning_rate": 4.999450524225634e-06, + "loss": 0.6599, + "step": 644 + }, + { + "epoch": 0.047419497132774595, + "grad_norm": 0.8753563761711121, + "learning_rate": 4.9994485023117165e-06, + "loss": 0.6515, + "step": 645 + }, + { + "epoch": 0.04749301573298045, + "grad_norm": 1.0270837545394897, + "learning_rate": 4.999446476685006e-06, + "loss": 0.6251, + "step": 646 + }, + { + "epoch": 0.0475665343331863, + "grad_norm": 0.8947177529335022, + "learning_rate": 4.999444447345504e-06, + "loss": 0.6599, + "step": 647 + }, + { + "epoch": 0.04764005293339215, + "grad_norm": 0.8872786164283752, + "learning_rate": 4.999442414293214e-06, + "loss": 0.6575, + "step": 648 + }, + { + "epoch": 0.047713571533598, + "grad_norm": 0.8841577172279358, + "learning_rate": 4.999440377528139e-06, + "loss": 0.6691, + "step": 649 + }, + { + "epoch": 0.047787090133803854, + "grad_norm": 0.9158629775047302, + "learning_rate": 4.999438337050283e-06, + "loss": 0.6856, + "step": 650 + }, + { + "epoch": 0.047860608734009706, + "grad_norm": 0.9039363265037537, + "learning_rate": 4.999436292859648e-06, + "loss": 0.6571, + "step": 651 + }, + { + "epoch": 0.04793412733421556, + "grad_norm": 0.8892017602920532, + "learning_rate": 4.999434244956236e-06, + "loss": 0.644, + "step": 652 + }, + { + "epoch": 0.04800764593442141, + "grad_norm": 0.9041864275932312, + "learning_rate": 4.9994321933400525e-06, + "loss": 0.7007, + "step": 653 + }, + { + "epoch": 0.04808116453462726, + "grad_norm": 0.8946360945701599, + "learning_rate": 4.999430138011099e-06, + "loss": 0.6638, + "step": 654 + }, + { + "epoch": 0.048154683134833114, + "grad_norm": 0.8766074776649475, + "learning_rate": 4.99942807896938e-06, + "loss": 0.6909, + "step": 655 + }, + { + "epoch": 0.048228201735038966, + "grad_norm": 0.9720597863197327, + "learning_rate": 4.999426016214896e-06, + "loss": 0.6611, + "step": 656 + }, + { + "epoch": 0.04830172033524482, + "grad_norm": 0.9245123267173767, + "learning_rate": 4.999423949747652e-06, + "loss": 0.6335, + "step": 657 + }, + { + "epoch": 0.04837523893545067, + "grad_norm": 0.883325457572937, + "learning_rate": 4.999421879567651e-06, + "loss": 0.6656, + "step": 658 + }, + { + "epoch": 0.04844875753565652, + "grad_norm": 0.8924735188484192, + "learning_rate": 4.999419805674894e-06, + "loss": 0.598, + "step": 659 + }, + { + "epoch": 0.04852227613586237, + "grad_norm": 0.8510939478874207, + "learning_rate": 4.999417728069388e-06, + "loss": 0.6363, + "step": 660 + }, + { + "epoch": 0.048595794736068225, + "grad_norm": 0.919967532157898, + "learning_rate": 4.999415646751132e-06, + "loss": 0.6697, + "step": 661 + }, + { + "epoch": 0.04866931333627408, + "grad_norm": 0.8838420510292053, + "learning_rate": 4.9994135617201325e-06, + "loss": 0.6336, + "step": 662 + }, + { + "epoch": 0.04874283193647993, + "grad_norm": 0.8596723675727844, + "learning_rate": 4.99941147297639e-06, + "loss": 0.6297, + "step": 663 + }, + { + "epoch": 0.04881635053668578, + "grad_norm": 0.9451053142547607, + "learning_rate": 4.9994093805199085e-06, + "loss": 0.6503, + "step": 664 + }, + { + "epoch": 0.04888986913689163, + "grad_norm": 0.9615591764450073, + "learning_rate": 4.999407284350692e-06, + "loss": 0.6508, + "step": 665 + }, + { + "epoch": 0.048963387737097484, + "grad_norm": 0.9489431381225586, + "learning_rate": 4.999405184468742e-06, + "loss": 0.6994, + "step": 666 + }, + { + "epoch": 0.049036906337303336, + "grad_norm": 0.9373032450675964, + "learning_rate": 4.9994030808740636e-06, + "loss": 0.639, + "step": 667 + }, + { + "epoch": 0.04911042493750919, + "grad_norm": 0.9617019295692444, + "learning_rate": 4.999400973566657e-06, + "loss": 0.6437, + "step": 668 + }, + { + "epoch": 0.04918394353771504, + "grad_norm": 0.9279304146766663, + "learning_rate": 4.999398862546529e-06, + "loss": 0.6734, + "step": 669 + }, + { + "epoch": 0.04925746213792089, + "grad_norm": 0.9505481123924255, + "learning_rate": 4.99939674781368e-06, + "loss": 0.7015, + "step": 670 + }, + { + "epoch": 0.049330980738126744, + "grad_norm": 0.9387148022651672, + "learning_rate": 4.9993946293681136e-06, + "loss": 0.6684, + "step": 671 + }, + { + "epoch": 0.049404499338332596, + "grad_norm": 0.9150479435920715, + "learning_rate": 4.999392507209835e-06, + "loss": 0.6292, + "step": 672 + }, + { + "epoch": 0.04947801793853845, + "grad_norm": 0.8986387848854065, + "learning_rate": 4.999390381338844e-06, + "loss": 0.6498, + "step": 673 + }, + { + "epoch": 0.0495515365387443, + "grad_norm": 0.8924996852874756, + "learning_rate": 4.999388251755146e-06, + "loss": 0.6623, + "step": 674 + }, + { + "epoch": 0.04962505513895015, + "grad_norm": 0.889807403087616, + "learning_rate": 4.9993861184587435e-06, + "loss": 0.6592, + "step": 675 + }, + { + "epoch": 0.049698573739156, + "grad_norm": 0.9232891201972961, + "learning_rate": 4.99938398144964e-06, + "loss": 0.6626, + "step": 676 + }, + { + "epoch": 0.04977209233936186, + "grad_norm": 0.9072375893592834, + "learning_rate": 4.999381840727839e-06, + "loss": 0.6811, + "step": 677 + }, + { + "epoch": 0.049845610939567714, + "grad_norm": 0.8523333668708801, + "learning_rate": 4.999379696293342e-06, + "loss": 0.6087, + "step": 678 + }, + { + "epoch": 0.049919129539773566, + "grad_norm": 0.8854493498802185, + "learning_rate": 4.999377548146153e-06, + "loss": 0.6406, + "step": 679 + }, + { + "epoch": 0.04999264813997942, + "grad_norm": 0.8818999528884888, + "learning_rate": 4.999375396286277e-06, + "loss": 0.6378, + "step": 680 + }, + { + "epoch": 0.05006616674018527, + "grad_norm": 1.0217292308807373, + "learning_rate": 4.9993732407137145e-06, + "loss": 0.647, + "step": 681 + }, + { + "epoch": 0.05013968534039112, + "grad_norm": 0.8898207545280457, + "learning_rate": 4.99937108142847e-06, + "loss": 0.6502, + "step": 682 + }, + { + "epoch": 0.05021320394059697, + "grad_norm": 0.926699161529541, + "learning_rate": 4.999368918430547e-06, + "loss": 0.6512, + "step": 683 + }, + { + "epoch": 0.050286722540802825, + "grad_norm": 0.8848741054534912, + "learning_rate": 4.9993667517199485e-06, + "loss": 0.6479, + "step": 684 + }, + { + "epoch": 0.05036024114100868, + "grad_norm": 0.9710608720779419, + "learning_rate": 4.999364581296678e-06, + "loss": 0.6312, + "step": 685 + }, + { + "epoch": 0.05043375974121453, + "grad_norm": 0.855275571346283, + "learning_rate": 4.999362407160737e-06, + "loss": 0.6654, + "step": 686 + }, + { + "epoch": 0.05050727834142038, + "grad_norm": 0.8812558650970459, + "learning_rate": 4.999360229312131e-06, + "loss": 0.6512, + "step": 687 + }, + { + "epoch": 0.05058079694162623, + "grad_norm": 0.8663848042488098, + "learning_rate": 4.999358047750862e-06, + "loss": 0.6005, + "step": 688 + }, + { + "epoch": 0.050654315541832085, + "grad_norm": 0.94410240650177, + "learning_rate": 4.999355862476933e-06, + "loss": 0.6773, + "step": 689 + }, + { + "epoch": 0.050727834142037936, + "grad_norm": 0.8495420217514038, + "learning_rate": 4.999353673490348e-06, + "loss": 0.6185, + "step": 690 + }, + { + "epoch": 0.05080135274224379, + "grad_norm": 0.8700076341629028, + "learning_rate": 4.99935148079111e-06, + "loss": 0.6085, + "step": 691 + }, + { + "epoch": 0.05087487134244964, + "grad_norm": 0.903495192527771, + "learning_rate": 4.999349284379223e-06, + "loss": 0.6455, + "step": 692 + }, + { + "epoch": 0.05094838994265549, + "grad_norm": 0.899311363697052, + "learning_rate": 4.999347084254689e-06, + "loss": 0.6454, + "step": 693 + }, + { + "epoch": 0.051021908542861344, + "grad_norm": 0.855495810508728, + "learning_rate": 4.999344880417512e-06, + "loss": 0.6357, + "step": 694 + }, + { + "epoch": 0.051095427143067196, + "grad_norm": 0.8353908061981201, + "learning_rate": 4.999342672867694e-06, + "loss": 0.644, + "step": 695 + }, + { + "epoch": 0.05116894574327305, + "grad_norm": 0.8615337610244751, + "learning_rate": 4.99934046160524e-06, + "loss": 0.6732, + "step": 696 + }, + { + "epoch": 0.0512424643434789, + "grad_norm": 0.8858133554458618, + "learning_rate": 4.9993382466301525e-06, + "loss": 0.6222, + "step": 697 + }, + { + "epoch": 0.05131598294368475, + "grad_norm": 0.8865490555763245, + "learning_rate": 4.999336027942435e-06, + "loss": 0.6021, + "step": 698 + }, + { + "epoch": 0.0513895015438906, + "grad_norm": 0.9450401067733765, + "learning_rate": 4.999333805542091e-06, + "loss": 0.6627, + "step": 699 + }, + { + "epoch": 0.051463020144096455, + "grad_norm": 0.8982500433921814, + "learning_rate": 4.999331579429123e-06, + "loss": 0.659, + "step": 700 + }, + { + "epoch": 0.05153653874430231, + "grad_norm": 0.9010064601898193, + "learning_rate": 4.999329349603535e-06, + "loss": 0.6726, + "step": 701 + }, + { + "epoch": 0.05161005734450816, + "grad_norm": 0.9033017158508301, + "learning_rate": 4.99932711606533e-06, + "loss": 0.6895, + "step": 702 + }, + { + "epoch": 0.05168357594471401, + "grad_norm": 0.900846004486084, + "learning_rate": 4.9993248788145115e-06, + "loss": 0.6518, + "step": 703 + }, + { + "epoch": 0.05175709454491986, + "grad_norm": 0.9579852223396301, + "learning_rate": 4.999322637851083e-06, + "loss": 0.6642, + "step": 704 + }, + { + "epoch": 0.051830613145125715, + "grad_norm": 0.9291789531707764, + "learning_rate": 4.9993203931750465e-06, + "loss": 0.6526, + "step": 705 + }, + { + "epoch": 0.051904131745331566, + "grad_norm": 0.9443159103393555, + "learning_rate": 4.999318144786408e-06, + "loss": 0.6745, + "step": 706 + }, + { + "epoch": 0.05197765034553742, + "grad_norm": 0.8161056637763977, + "learning_rate": 4.9993158926851685e-06, + "loss": 0.5951, + "step": 707 + }, + { + "epoch": 0.05205116894574327, + "grad_norm": 0.8496602773666382, + "learning_rate": 4.999313636871332e-06, + "loss": 0.6256, + "step": 708 + }, + { + "epoch": 0.05212468754594912, + "grad_norm": 0.9153755903244019, + "learning_rate": 4.999311377344902e-06, + "loss": 0.6561, + "step": 709 + }, + { + "epoch": 0.052198206146154974, + "grad_norm": 0.9562864303588867, + "learning_rate": 4.999309114105882e-06, + "loss": 0.639, + "step": 710 + }, + { + "epoch": 0.052271724746360826, + "grad_norm": 0.9084814786911011, + "learning_rate": 4.999306847154275e-06, + "loss": 0.6493, + "step": 711 + }, + { + "epoch": 0.052345243346566685, + "grad_norm": 0.9189197421073914, + "learning_rate": 4.999304576490084e-06, + "loss": 0.6729, + "step": 712 + }, + { + "epoch": 0.05241876194677254, + "grad_norm": 0.8793302178382874, + "learning_rate": 4.999302302113313e-06, + "loss": 0.623, + "step": 713 + }, + { + "epoch": 0.05249228054697839, + "grad_norm": 0.9386196136474609, + "learning_rate": 4.999300024023967e-06, + "loss": 0.6541, + "step": 714 + }, + { + "epoch": 0.05256579914718424, + "grad_norm": 0.9852139949798584, + "learning_rate": 4.9992977422220465e-06, + "loss": 0.6727, + "step": 715 + }, + { + "epoch": 0.05263931774739009, + "grad_norm": 0.9315295219421387, + "learning_rate": 4.999295456707556e-06, + "loss": 0.6467, + "step": 716 + }, + { + "epoch": 0.052712836347595944, + "grad_norm": 1.0223405361175537, + "learning_rate": 4.999293167480498e-06, + "loss": 0.6574, + "step": 717 + }, + { + "epoch": 0.052786354947801796, + "grad_norm": 0.8905497193336487, + "learning_rate": 4.999290874540878e-06, + "loss": 0.6566, + "step": 718 + }, + { + "epoch": 0.05285987354800765, + "grad_norm": 0.86713045835495, + "learning_rate": 4.999288577888699e-06, + "loss": 0.673, + "step": 719 + }, + { + "epoch": 0.0529333921482135, + "grad_norm": 0.9714829921722412, + "learning_rate": 4.9992862775239625e-06, + "loss": 0.6266, + "step": 720 + }, + { + "epoch": 0.05300691074841935, + "grad_norm": 0.896156907081604, + "learning_rate": 4.9992839734466734e-06, + "loss": 0.639, + "step": 721 + }, + { + "epoch": 0.053080429348625204, + "grad_norm": 0.900148332118988, + "learning_rate": 4.999281665656835e-06, + "loss": 0.6407, + "step": 722 + }, + { + "epoch": 0.053153947948831055, + "grad_norm": 0.9035518169403076, + "learning_rate": 4.99927935415445e-06, + "loss": 0.6747, + "step": 723 + }, + { + "epoch": 0.05322746654903691, + "grad_norm": 0.9567741751670837, + "learning_rate": 4.999277038939523e-06, + "loss": 0.6824, + "step": 724 + }, + { + "epoch": 0.05330098514924276, + "grad_norm": 0.9545497298240662, + "learning_rate": 4.999274720012056e-06, + "loss": 0.6495, + "step": 725 + }, + { + "epoch": 0.05337450374944861, + "grad_norm": 0.8298144936561584, + "learning_rate": 4.9992723973720536e-06, + "loss": 0.6152, + "step": 726 + }, + { + "epoch": 0.05344802234965446, + "grad_norm": 0.9723663926124573, + "learning_rate": 4.999270071019519e-06, + "loss": 0.694, + "step": 727 + }, + { + "epoch": 0.053521540949860315, + "grad_norm": 0.9213699698448181, + "learning_rate": 4.999267740954455e-06, + "loss": 0.6815, + "step": 728 + }, + { + "epoch": 0.05359505955006617, + "grad_norm": 0.880365788936615, + "learning_rate": 4.999265407176867e-06, + "loss": 0.6299, + "step": 729 + }, + { + "epoch": 0.05366857815027202, + "grad_norm": 0.8773685693740845, + "learning_rate": 4.999263069686756e-06, + "loss": 0.6071, + "step": 730 + }, + { + "epoch": 0.05374209675047787, + "grad_norm": 0.8954081535339355, + "learning_rate": 4.999260728484127e-06, + "loss": 0.6516, + "step": 731 + }, + { + "epoch": 0.05381561535068372, + "grad_norm": 0.9660015106201172, + "learning_rate": 4.9992583835689834e-06, + "loss": 0.6331, + "step": 732 + }, + { + "epoch": 0.053889133950889574, + "grad_norm": 0.8796841502189636, + "learning_rate": 4.999256034941327e-06, + "loss": 0.6569, + "step": 733 + }, + { + "epoch": 0.053962652551095426, + "grad_norm": 0.9216030240058899, + "learning_rate": 4.999253682601164e-06, + "loss": 0.6339, + "step": 734 + }, + { + "epoch": 0.05403617115130128, + "grad_norm": 0.881103515625, + "learning_rate": 4.999251326548497e-06, + "loss": 0.6468, + "step": 735 + }, + { + "epoch": 0.05410968975150713, + "grad_norm": 0.8973515033721924, + "learning_rate": 4.999248966783327e-06, + "loss": 0.6528, + "step": 736 + }, + { + "epoch": 0.05418320835171298, + "grad_norm": 0.843757688999176, + "learning_rate": 4.99924660330566e-06, + "loss": 0.6639, + "step": 737 + }, + { + "epoch": 0.054256726951918834, + "grad_norm": 0.9375478625297546, + "learning_rate": 4.9992442361155e-06, + "loss": 0.6667, + "step": 738 + }, + { + "epoch": 0.054330245552124685, + "grad_norm": 0.8993657231330872, + "learning_rate": 4.999241865212849e-06, + "loss": 0.6217, + "step": 739 + }, + { + "epoch": 0.05440376415233054, + "grad_norm": 0.9325907230377197, + "learning_rate": 4.999239490597712e-06, + "loss": 0.618, + "step": 740 + }, + { + "epoch": 0.05447728275253639, + "grad_norm": 0.8314366340637207, + "learning_rate": 4.99923711227009e-06, + "loss": 0.6074, + "step": 741 + }, + { + "epoch": 0.05455080135274224, + "grad_norm": 0.8956190943717957, + "learning_rate": 4.99923473022999e-06, + "loss": 0.5903, + "step": 742 + }, + { + "epoch": 0.05462431995294809, + "grad_norm": 0.9316298365592957, + "learning_rate": 4.9992323444774114e-06, + "loss": 0.6804, + "step": 743 + }, + { + "epoch": 0.054697838553153945, + "grad_norm": 0.9517955780029297, + "learning_rate": 4.9992299550123614e-06, + "loss": 0.6651, + "step": 744 + }, + { + "epoch": 0.0547713571533598, + "grad_norm": 0.9174290299415588, + "learning_rate": 4.9992275618348414e-06, + "loss": 0.662, + "step": 745 + }, + { + "epoch": 0.05484487575356565, + "grad_norm": 0.9665938019752502, + "learning_rate": 4.999225164944857e-06, + "loss": 0.6758, + "step": 746 + }, + { + "epoch": 0.05491839435377151, + "grad_norm": 0.8609781265258789, + "learning_rate": 4.99922276434241e-06, + "loss": 0.6482, + "step": 747 + }, + { + "epoch": 0.05499191295397736, + "grad_norm": 0.865852415561676, + "learning_rate": 4.999220360027504e-06, + "loss": 0.6294, + "step": 748 + }, + { + "epoch": 0.05506543155418321, + "grad_norm": 0.8991050124168396, + "learning_rate": 4.9992179520001425e-06, + "loss": 0.627, + "step": 749 + }, + { + "epoch": 0.05513895015438906, + "grad_norm": 0.9244593977928162, + "learning_rate": 4.99921554026033e-06, + "loss": 0.6589, + "step": 750 + }, + { + "epoch": 0.055212468754594915, + "grad_norm": 0.8613766431808472, + "learning_rate": 4.99921312480807e-06, + "loss": 0.6664, + "step": 751 + }, + { + "epoch": 0.05528598735480077, + "grad_norm": 0.9217789769172668, + "learning_rate": 4.999210705643366e-06, + "loss": 0.6205, + "step": 752 + }, + { + "epoch": 0.05535950595500662, + "grad_norm": 0.9317554831504822, + "learning_rate": 4.9992082827662215e-06, + "loss": 0.647, + "step": 753 + }, + { + "epoch": 0.05543302455521247, + "grad_norm": 0.8963078260421753, + "learning_rate": 4.999205856176639e-06, + "loss": 0.6106, + "step": 754 + }, + { + "epoch": 0.05550654315541832, + "grad_norm": 0.9516403675079346, + "learning_rate": 4.999203425874624e-06, + "loss": 0.661, + "step": 755 + }, + { + "epoch": 0.055580061755624174, + "grad_norm": 0.8998643755912781, + "learning_rate": 4.999200991860178e-06, + "loss": 0.6542, + "step": 756 + }, + { + "epoch": 0.055653580355830026, + "grad_norm": 0.8793849945068359, + "learning_rate": 4.999198554133307e-06, + "loss": 0.6664, + "step": 757 + }, + { + "epoch": 0.05572709895603588, + "grad_norm": 0.8418884873390198, + "learning_rate": 4.999196112694013e-06, + "loss": 0.605, + "step": 758 + }, + { + "epoch": 0.05580061755624173, + "grad_norm": 0.8728195428848267, + "learning_rate": 4.999193667542299e-06, + "loss": 0.6311, + "step": 759 + }, + { + "epoch": 0.05587413615644758, + "grad_norm": 0.96137535572052, + "learning_rate": 4.999191218678171e-06, + "loss": 0.6759, + "step": 760 + }, + { + "epoch": 0.055947654756653434, + "grad_norm": 0.8965213894844055, + "learning_rate": 4.999188766101631e-06, + "loss": 0.625, + "step": 761 + }, + { + "epoch": 0.056021173356859286, + "grad_norm": 0.972930371761322, + "learning_rate": 4.999186309812682e-06, + "loss": 0.6632, + "step": 762 + }, + { + "epoch": 0.05609469195706514, + "grad_norm": 0.9554989337921143, + "learning_rate": 4.99918384981133e-06, + "loss": 0.6563, + "step": 763 + }, + { + "epoch": 0.05616821055727099, + "grad_norm": 0.9036605358123779, + "learning_rate": 4.999181386097577e-06, + "loss": 0.6587, + "step": 764 + }, + { + "epoch": 0.05624172915747684, + "grad_norm": 0.8494423031806946, + "learning_rate": 4.9991789186714255e-06, + "loss": 0.6005, + "step": 765 + }, + { + "epoch": 0.05631524775768269, + "grad_norm": 0.9248480200767517, + "learning_rate": 4.999176447532882e-06, + "loss": 0.6903, + "step": 766 + }, + { + "epoch": 0.056388766357888545, + "grad_norm": 0.9696122407913208, + "learning_rate": 4.999173972681948e-06, + "loss": 0.6668, + "step": 767 + }, + { + "epoch": 0.0564622849580944, + "grad_norm": 0.941091775894165, + "learning_rate": 4.999171494118628e-06, + "loss": 0.6476, + "step": 768 + }, + { + "epoch": 0.05653580355830025, + "grad_norm": 0.9235638380050659, + "learning_rate": 4.999169011842926e-06, + "loss": 0.6627, + "step": 769 + }, + { + "epoch": 0.0566093221585061, + "grad_norm": 0.8816823363304138, + "learning_rate": 4.999166525854846e-06, + "loss": 0.5879, + "step": 770 + }, + { + "epoch": 0.05668284075871195, + "grad_norm": 0.9036292433738708, + "learning_rate": 4.9991640361543895e-06, + "loss": 0.6267, + "step": 771 + }, + { + "epoch": 0.056756359358917804, + "grad_norm": 0.9639711976051331, + "learning_rate": 4.999161542741562e-06, + "loss": 0.6617, + "step": 772 + }, + { + "epoch": 0.056829877959123656, + "grad_norm": 0.9046840071678162, + "learning_rate": 4.999159045616367e-06, + "loss": 0.6552, + "step": 773 + }, + { + "epoch": 0.05690339655932951, + "grad_norm": 0.8738160729408264, + "learning_rate": 4.999156544778809e-06, + "loss": 0.6278, + "step": 774 + }, + { + "epoch": 0.05697691515953536, + "grad_norm": 0.8577995896339417, + "learning_rate": 4.99915404022889e-06, + "loss": 0.6272, + "step": 775 + }, + { + "epoch": 0.05705043375974121, + "grad_norm": 0.9035370945930481, + "learning_rate": 4.999151531966614e-06, + "loss": 0.6383, + "step": 776 + }, + { + "epoch": 0.057123952359947064, + "grad_norm": 0.8986926674842834, + "learning_rate": 4.999149019991986e-06, + "loss": 0.6762, + "step": 777 + }, + { + "epoch": 0.057197470960152916, + "grad_norm": 0.89336097240448, + "learning_rate": 4.999146504305009e-06, + "loss": 0.6452, + "step": 778 + }, + { + "epoch": 0.05727098956035877, + "grad_norm": 0.9116345047950745, + "learning_rate": 4.999143984905687e-06, + "loss": 0.6256, + "step": 779 + }, + { + "epoch": 0.05734450816056462, + "grad_norm": 0.8901962041854858, + "learning_rate": 4.999141461794023e-06, + "loss": 0.6232, + "step": 780 + }, + { + "epoch": 0.05741802676077048, + "grad_norm": 0.8841277956962585, + "learning_rate": 4.9991389349700206e-06, + "loss": 0.6021, + "step": 781 + }, + { + "epoch": 0.05749154536097633, + "grad_norm": 0.9411440491676331, + "learning_rate": 4.999136404433685e-06, + "loss": 0.6486, + "step": 782 + }, + { + "epoch": 0.05756506396118218, + "grad_norm": 0.8941417932510376, + "learning_rate": 4.999133870185019e-06, + "loss": 0.6493, + "step": 783 + }, + { + "epoch": 0.057638582561388034, + "grad_norm": 0.8891673684120178, + "learning_rate": 4.999131332224026e-06, + "loss": 0.5986, + "step": 784 + }, + { + "epoch": 0.057712101161593886, + "grad_norm": 0.9186288118362427, + "learning_rate": 4.99912879055071e-06, + "loss": 0.6384, + "step": 785 + }, + { + "epoch": 0.05778561976179974, + "grad_norm": 0.9045973420143127, + "learning_rate": 4.999126245165076e-06, + "loss": 0.7017, + "step": 786 + }, + { + "epoch": 0.05785913836200559, + "grad_norm": 0.8994572758674622, + "learning_rate": 4.999123696067126e-06, + "loss": 0.636, + "step": 787 + }, + { + "epoch": 0.05793265696221144, + "grad_norm": 0.8797492980957031, + "learning_rate": 4.9991211432568656e-06, + "loss": 0.6804, + "step": 788 + }, + { + "epoch": 0.05800617556241729, + "grad_norm": 0.9095622301101685, + "learning_rate": 4.999118586734297e-06, + "loss": 0.6619, + "step": 789 + }, + { + "epoch": 0.058079694162623145, + "grad_norm": 0.889020562171936, + "learning_rate": 4.999116026499425e-06, + "loss": 0.6511, + "step": 790 + }, + { + "epoch": 0.058153212762829, + "grad_norm": 0.9027122259140015, + "learning_rate": 4.999113462552252e-06, + "loss": 0.6344, + "step": 791 + }, + { + "epoch": 0.05822673136303485, + "grad_norm": 0.8416079878807068, + "learning_rate": 4.999110894892784e-06, + "loss": 0.5972, + "step": 792 + }, + { + "epoch": 0.0583002499632407, + "grad_norm": 0.9498671889305115, + "learning_rate": 4.999108323521023e-06, + "loss": 0.6225, + "step": 793 + }, + { + "epoch": 0.05837376856344655, + "grad_norm": 0.8939564228057861, + "learning_rate": 4.9991057484369735e-06, + "loss": 0.6183, + "step": 794 + }, + { + "epoch": 0.058447287163652405, + "grad_norm": 0.8449214100837708, + "learning_rate": 4.999103169640639e-06, + "loss": 0.6299, + "step": 795 + }, + { + "epoch": 0.05852080576385826, + "grad_norm": 0.9068626761436462, + "learning_rate": 4.9991005871320235e-06, + "loss": 0.6224, + "step": 796 + }, + { + "epoch": 0.05859432436406411, + "grad_norm": 0.9138272404670715, + "learning_rate": 4.999098000911131e-06, + "loss": 0.6474, + "step": 797 + }, + { + "epoch": 0.05866784296426996, + "grad_norm": 0.9171993136405945, + "learning_rate": 4.999095410977966e-06, + "loss": 0.6549, + "step": 798 + }, + { + "epoch": 0.05874136156447581, + "grad_norm": 0.9348229169845581, + "learning_rate": 4.9990928173325305e-06, + "loss": 0.662, + "step": 799 + }, + { + "epoch": 0.058814880164681664, + "grad_norm": 0.9308472871780396, + "learning_rate": 4.99909021997483e-06, + "loss": 0.6571, + "step": 800 + }, + { + "epoch": 0.058888398764887516, + "grad_norm": 0.9044580459594727, + "learning_rate": 4.999087618904867e-06, + "loss": 0.6346, + "step": 801 + }, + { + "epoch": 0.05896191736509337, + "grad_norm": 0.8926357626914978, + "learning_rate": 4.999085014122648e-06, + "loss": 0.5724, + "step": 802 + }, + { + "epoch": 0.05903543596529922, + "grad_norm": 0.9556214809417725, + "learning_rate": 4.999082405628174e-06, + "loss": 0.6503, + "step": 803 + }, + { + "epoch": 0.05910895456550507, + "grad_norm": 0.941215455532074, + "learning_rate": 4.9990797934214506e-06, + "loss": 0.6543, + "step": 804 + }, + { + "epoch": 0.059182473165710923, + "grad_norm": 0.9276965856552124, + "learning_rate": 4.99907717750248e-06, + "loss": 0.6441, + "step": 805 + }, + { + "epoch": 0.059255991765916775, + "grad_norm": 0.9304004907608032, + "learning_rate": 4.9990745578712675e-06, + "loss": 0.6212, + "step": 806 + }, + { + "epoch": 0.05932951036612263, + "grad_norm": 0.9084956049919128, + "learning_rate": 4.999071934527817e-06, + "loss": 0.6761, + "step": 807 + }, + { + "epoch": 0.05940302896632848, + "grad_norm": 0.8983669281005859, + "learning_rate": 4.999069307472132e-06, + "loss": 0.6377, + "step": 808 + }, + { + "epoch": 0.05947654756653433, + "grad_norm": 0.9402640461921692, + "learning_rate": 4.9990666767042155e-06, + "loss": 0.6786, + "step": 809 + }, + { + "epoch": 0.05955006616674018, + "grad_norm": 0.9275414347648621, + "learning_rate": 4.999064042224073e-06, + "loss": 0.6403, + "step": 810 + }, + { + "epoch": 0.059623584766946035, + "grad_norm": 0.9382085800170898, + "learning_rate": 4.999061404031707e-06, + "loss": 0.6115, + "step": 811 + }, + { + "epoch": 0.05969710336715189, + "grad_norm": 0.9155436158180237, + "learning_rate": 4.999058762127123e-06, + "loss": 0.636, + "step": 812 + }, + { + "epoch": 0.05977062196735774, + "grad_norm": 0.8671582937240601, + "learning_rate": 4.9990561165103236e-06, + "loss": 0.6321, + "step": 813 + }, + { + "epoch": 0.05984414056756359, + "grad_norm": 0.9301660656929016, + "learning_rate": 4.999053467181313e-06, + "loss": 0.6517, + "step": 814 + }, + { + "epoch": 0.05991765916776944, + "grad_norm": 0.9091248512268066, + "learning_rate": 4.9990508141400965e-06, + "loss": 0.6604, + "step": 815 + }, + { + "epoch": 0.0599911777679753, + "grad_norm": 0.9071407914161682, + "learning_rate": 4.999048157386676e-06, + "loss": 0.6505, + "step": 816 + }, + { + "epoch": 0.06006469636818115, + "grad_norm": 0.9143269062042236, + "learning_rate": 4.999045496921056e-06, + "loss": 0.6565, + "step": 817 + }, + { + "epoch": 0.060138214968387005, + "grad_norm": 0.8441069722175598, + "learning_rate": 4.999042832743241e-06, + "loss": 0.619, + "step": 818 + }, + { + "epoch": 0.06021173356859286, + "grad_norm": 0.9065079092979431, + "learning_rate": 4.999040164853235e-06, + "loss": 0.6195, + "step": 819 + }, + { + "epoch": 0.06028525216879871, + "grad_norm": 0.8917563557624817, + "learning_rate": 4.999037493251041e-06, + "loss": 0.6326, + "step": 820 + }, + { + "epoch": 0.06035877076900456, + "grad_norm": 0.9454360008239746, + "learning_rate": 4.999034817936665e-06, + "loss": 0.6453, + "step": 821 + }, + { + "epoch": 0.06043228936921041, + "grad_norm": 0.8825609683990479, + "learning_rate": 4.999032138910108e-06, + "loss": 0.6187, + "step": 822 + }, + { + "epoch": 0.060505807969416264, + "grad_norm": 0.8456750512123108, + "learning_rate": 4.999029456171377e-06, + "loss": 0.5955, + "step": 823 + }, + { + "epoch": 0.060579326569622116, + "grad_norm": 0.9336667656898499, + "learning_rate": 4.9990267697204734e-06, + "loss": 0.657, + "step": 824 + }, + { + "epoch": 0.06065284516982797, + "grad_norm": 0.9444589018821716, + "learning_rate": 4.999024079557403e-06, + "loss": 0.631, + "step": 825 + }, + { + "epoch": 0.06072636377003382, + "grad_norm": 1.0250695943832397, + "learning_rate": 4.999021385682169e-06, + "loss": 0.6443, + "step": 826 + }, + { + "epoch": 0.06079988237023967, + "grad_norm": 0.9707430005073547, + "learning_rate": 4.999018688094775e-06, + "loss": 0.6432, + "step": 827 + }, + { + "epoch": 0.060873400970445524, + "grad_norm": 0.9615734219551086, + "learning_rate": 4.999015986795227e-06, + "loss": 0.655, + "step": 828 + }, + { + "epoch": 0.060946919570651376, + "grad_norm": 0.9391093850135803, + "learning_rate": 4.999013281783527e-06, + "loss": 0.6841, + "step": 829 + }, + { + "epoch": 0.06102043817085723, + "grad_norm": 0.9376475811004639, + "learning_rate": 4.999010573059679e-06, + "loss": 0.6423, + "step": 830 + }, + { + "epoch": 0.06109395677106308, + "grad_norm": 0.9662964344024658, + "learning_rate": 4.999007860623688e-06, + "loss": 0.6622, + "step": 831 + }, + { + "epoch": 0.06116747537126893, + "grad_norm": 0.8953654170036316, + "learning_rate": 4.999005144475558e-06, + "loss": 0.6603, + "step": 832 + }, + { + "epoch": 0.06124099397147478, + "grad_norm": 0.9158509373664856, + "learning_rate": 4.999002424615293e-06, + "loss": 0.6285, + "step": 833 + }, + { + "epoch": 0.061314512571680635, + "grad_norm": 0.9303370714187622, + "learning_rate": 4.998999701042895e-06, + "loss": 0.6095, + "step": 834 + }, + { + "epoch": 0.06138803117188649, + "grad_norm": 0.867514967918396, + "learning_rate": 4.998996973758371e-06, + "loss": 0.6317, + "step": 835 + }, + { + "epoch": 0.06146154977209234, + "grad_norm": 0.9549927711486816, + "learning_rate": 4.998994242761724e-06, + "loss": 0.6645, + "step": 836 + }, + { + "epoch": 0.06153506837229819, + "grad_norm": 0.920991837978363, + "learning_rate": 4.998991508052957e-06, + "loss": 0.6242, + "step": 837 + }, + { + "epoch": 0.06160858697250404, + "grad_norm": 0.8477164506912231, + "learning_rate": 4.998988769632076e-06, + "loss": 0.5968, + "step": 838 + }, + { + "epoch": 0.061682105572709894, + "grad_norm": 0.8835163712501526, + "learning_rate": 4.998986027499084e-06, + "loss": 0.6286, + "step": 839 + }, + { + "epoch": 0.061755624172915746, + "grad_norm": 0.8756706714630127, + "learning_rate": 4.9989832816539845e-06, + "loss": 0.6334, + "step": 840 + }, + { + "epoch": 0.0618291427731216, + "grad_norm": 0.8752192258834839, + "learning_rate": 4.998980532096782e-06, + "loss": 0.5981, + "step": 841 + }, + { + "epoch": 0.06190266137332745, + "grad_norm": 0.9051156640052795, + "learning_rate": 4.998977778827482e-06, + "loss": 0.6408, + "step": 842 + }, + { + "epoch": 0.0619761799735333, + "grad_norm": 0.9490341544151306, + "learning_rate": 4.998975021846087e-06, + "loss": 0.6049, + "step": 843 + }, + { + "epoch": 0.062049698573739154, + "grad_norm": 0.9012110233306885, + "learning_rate": 4.998972261152601e-06, + "loss": 0.6357, + "step": 844 + }, + { + "epoch": 0.062123217173945006, + "grad_norm": 0.9597491025924683, + "learning_rate": 4.998969496747028e-06, + "loss": 0.6825, + "step": 845 + }, + { + "epoch": 0.06219673577415086, + "grad_norm": 0.9137027859687805, + "learning_rate": 4.998966728629373e-06, + "loss": 0.6339, + "step": 846 + }, + { + "epoch": 0.06227025437435671, + "grad_norm": 0.9366801381111145, + "learning_rate": 4.99896395679964e-06, + "loss": 0.6702, + "step": 847 + }, + { + "epoch": 0.06234377297456256, + "grad_norm": 0.9430127739906311, + "learning_rate": 4.998961181257832e-06, + "loss": 0.653, + "step": 848 + }, + { + "epoch": 0.06241729157476841, + "grad_norm": 0.891970694065094, + "learning_rate": 4.998958402003955e-06, + "loss": 0.6235, + "step": 849 + }, + { + "epoch": 0.062490810174974265, + "grad_norm": 0.8471444249153137, + "learning_rate": 4.998955619038013e-06, + "loss": 0.6114, + "step": 850 + }, + { + "epoch": 0.06256432877518012, + "grad_norm": 0.9218086004257202, + "learning_rate": 4.9989528323600075e-06, + "loss": 0.6751, + "step": 851 + }, + { + "epoch": 0.06263784737538597, + "grad_norm": 0.9227179884910583, + "learning_rate": 4.998950041969945e-06, + "loss": 0.683, + "step": 852 + }, + { + "epoch": 0.06271136597559182, + "grad_norm": 0.9223443269729614, + "learning_rate": 4.99894724786783e-06, + "loss": 0.6148, + "step": 853 + }, + { + "epoch": 0.06278488457579767, + "grad_norm": 0.8694784045219421, + "learning_rate": 4.998944450053664e-06, + "loss": 0.6034, + "step": 854 + }, + { + "epoch": 0.06285840317600352, + "grad_norm": 0.9790363907814026, + "learning_rate": 4.998941648527454e-06, + "loss": 0.6678, + "step": 855 + }, + { + "epoch": 0.06293192177620938, + "grad_norm": 0.8715975284576416, + "learning_rate": 4.9989388432892026e-06, + "loss": 0.6563, + "step": 856 + }, + { + "epoch": 0.06300544037641523, + "grad_norm": 0.93951016664505, + "learning_rate": 4.9989360343389146e-06, + "loss": 0.6737, + "step": 857 + }, + { + "epoch": 0.06307895897662108, + "grad_norm": 0.9340924620628357, + "learning_rate": 4.998933221676594e-06, + "loss": 0.6595, + "step": 858 + }, + { + "epoch": 0.06315247757682693, + "grad_norm": 0.9205107092857361, + "learning_rate": 4.998930405302245e-06, + "loss": 0.6272, + "step": 859 + }, + { + "epoch": 0.06322599617703278, + "grad_norm": 0.8972273468971252, + "learning_rate": 4.9989275852158716e-06, + "loss": 0.631, + "step": 860 + }, + { + "epoch": 0.06329951477723864, + "grad_norm": 0.8867536783218384, + "learning_rate": 4.998924761417479e-06, + "loss": 0.6575, + "step": 861 + }, + { + "epoch": 0.06337303337744449, + "grad_norm": 0.8571094274520874, + "learning_rate": 4.9989219339070685e-06, + "loss": 0.6191, + "step": 862 + }, + { + "epoch": 0.06344655197765034, + "grad_norm": 0.8869963884353638, + "learning_rate": 4.998919102684647e-06, + "loss": 0.6426, + "step": 863 + }, + { + "epoch": 0.06352007057785619, + "grad_norm": 0.9589933753013611, + "learning_rate": 4.998916267750219e-06, + "loss": 0.663, + "step": 864 + }, + { + "epoch": 0.06359358917806204, + "grad_norm": 0.9322200417518616, + "learning_rate": 4.998913429103787e-06, + "loss": 0.663, + "step": 865 + }, + { + "epoch": 0.0636671077782679, + "grad_norm": 0.9475408792495728, + "learning_rate": 4.9989105867453566e-06, + "loss": 0.6443, + "step": 866 + }, + { + "epoch": 0.06374062637847375, + "grad_norm": 0.9284839034080505, + "learning_rate": 4.9989077406749305e-06, + "loss": 0.609, + "step": 867 + }, + { + "epoch": 0.06381414497867961, + "grad_norm": 0.8761789202690125, + "learning_rate": 4.998904890892514e-06, + "loss": 0.6408, + "step": 868 + }, + { + "epoch": 0.06388766357888546, + "grad_norm": 0.8866778016090393, + "learning_rate": 4.998902037398112e-06, + "loss": 0.6351, + "step": 869 + }, + { + "epoch": 0.06396118217909132, + "grad_norm": 0.9108879566192627, + "learning_rate": 4.9988991801917264e-06, + "loss": 0.6842, + "step": 870 + }, + { + "epoch": 0.06403470077929717, + "grad_norm": 0.8667548894882202, + "learning_rate": 4.998896319273363e-06, + "loss": 0.616, + "step": 871 + }, + { + "epoch": 0.06410821937950302, + "grad_norm": 0.9253000617027283, + "learning_rate": 4.998893454643027e-06, + "loss": 0.6392, + "step": 872 + }, + { + "epoch": 0.06418173797970887, + "grad_norm": 0.8760155439376831, + "learning_rate": 4.9988905863007205e-06, + "loss": 0.6437, + "step": 873 + }, + { + "epoch": 0.06425525657991472, + "grad_norm": 0.9356342554092407, + "learning_rate": 4.99888771424645e-06, + "loss": 0.6745, + "step": 874 + }, + { + "epoch": 0.06432877518012058, + "grad_norm": 0.9507670998573303, + "learning_rate": 4.9988848384802185e-06, + "loss": 0.6223, + "step": 875 + }, + { + "epoch": 0.06440229378032643, + "grad_norm": 0.898931622505188, + "learning_rate": 4.9988819590020295e-06, + "loss": 0.6112, + "step": 876 + }, + { + "epoch": 0.06447581238053228, + "grad_norm": 0.9290241599082947, + "learning_rate": 4.99887907581189e-06, + "loss": 0.6558, + "step": 877 + }, + { + "epoch": 0.06454933098073813, + "grad_norm": 0.8917547464370728, + "learning_rate": 4.9988761889098e-06, + "loss": 0.6532, + "step": 878 + }, + { + "epoch": 0.06462284958094398, + "grad_norm": 0.864997386932373, + "learning_rate": 4.998873298295768e-06, + "loss": 0.5706, + "step": 879 + }, + { + "epoch": 0.06469636818114984, + "grad_norm": 0.879806399345398, + "learning_rate": 4.998870403969796e-06, + "loss": 0.6507, + "step": 880 + }, + { + "epoch": 0.06476988678135569, + "grad_norm": 0.9007628560066223, + "learning_rate": 4.9988675059318894e-06, + "loss": 0.6489, + "step": 881 + }, + { + "epoch": 0.06484340538156154, + "grad_norm": 0.9199798107147217, + "learning_rate": 4.998864604182051e-06, + "loss": 0.6337, + "step": 882 + }, + { + "epoch": 0.06491692398176739, + "grad_norm": 0.9719408750534058, + "learning_rate": 4.998861698720287e-06, + "loss": 0.6632, + "step": 883 + }, + { + "epoch": 0.06499044258197324, + "grad_norm": 0.914836049079895, + "learning_rate": 4.9988587895466e-06, + "loss": 0.6396, + "step": 884 + }, + { + "epoch": 0.0650639611821791, + "grad_norm": 0.9523332715034485, + "learning_rate": 4.998855876660995e-06, + "loss": 0.6573, + "step": 885 + }, + { + "epoch": 0.06513747978238495, + "grad_norm": 0.8779497146606445, + "learning_rate": 4.998852960063477e-06, + "loss": 0.6441, + "step": 886 + }, + { + "epoch": 0.0652109983825908, + "grad_norm": 0.8999835252761841, + "learning_rate": 4.998850039754049e-06, + "loss": 0.6138, + "step": 887 + }, + { + "epoch": 0.06528451698279665, + "grad_norm": 0.9114091992378235, + "learning_rate": 4.998847115732717e-06, + "loss": 0.6599, + "step": 888 + }, + { + "epoch": 0.0653580355830025, + "grad_norm": 0.9102962017059326, + "learning_rate": 4.998844187999484e-06, + "loss": 0.6353, + "step": 889 + }, + { + "epoch": 0.06543155418320835, + "grad_norm": 0.8758114576339722, + "learning_rate": 4.998841256554355e-06, + "loss": 0.6157, + "step": 890 + }, + { + "epoch": 0.0655050727834142, + "grad_norm": 0.9177024960517883, + "learning_rate": 4.998838321397334e-06, + "loss": 0.665, + "step": 891 + }, + { + "epoch": 0.06557859138362006, + "grad_norm": 0.923774778842926, + "learning_rate": 4.998835382528426e-06, + "loss": 0.6549, + "step": 892 + }, + { + "epoch": 0.06565210998382591, + "grad_norm": 0.9518763422966003, + "learning_rate": 4.998832439947634e-06, + "loss": 0.7104, + "step": 893 + }, + { + "epoch": 0.06572562858403176, + "grad_norm": 0.8948664665222168, + "learning_rate": 4.9988294936549645e-06, + "loss": 0.6415, + "step": 894 + }, + { + "epoch": 0.06579914718423761, + "grad_norm": 0.8420028686523438, + "learning_rate": 4.998826543650419e-06, + "loss": 0.6109, + "step": 895 + }, + { + "epoch": 0.06587266578444347, + "grad_norm": 0.8832506537437439, + "learning_rate": 4.998823589934004e-06, + "loss": 0.6713, + "step": 896 + }, + { + "epoch": 0.06594618438464932, + "grad_norm": 0.9431427717208862, + "learning_rate": 4.998820632505724e-06, + "loss": 0.6351, + "step": 897 + }, + { + "epoch": 0.06601970298485517, + "grad_norm": 0.8857357501983643, + "learning_rate": 4.9988176713655825e-06, + "loss": 0.6595, + "step": 898 + }, + { + "epoch": 0.06609322158506102, + "grad_norm": 0.8864589333534241, + "learning_rate": 4.998814706513584e-06, + "loss": 0.6621, + "step": 899 + }, + { + "epoch": 0.06616674018526687, + "grad_norm": 0.8687387704849243, + "learning_rate": 4.998811737949734e-06, + "loss": 0.6179, + "step": 900 + }, + { + "epoch": 0.06624025878547272, + "grad_norm": 0.9524187445640564, + "learning_rate": 4.998808765674035e-06, + "loss": 0.5986, + "step": 901 + }, + { + "epoch": 0.06631377738567858, + "grad_norm": 0.8880021572113037, + "learning_rate": 4.998805789686492e-06, + "loss": 0.6385, + "step": 902 + }, + { + "epoch": 0.06638729598588443, + "grad_norm": 0.8404618501663208, + "learning_rate": 4.998802809987111e-06, + "loss": 0.6051, + "step": 903 + }, + { + "epoch": 0.06646081458609028, + "grad_norm": 0.9363351464271545, + "learning_rate": 4.998799826575894e-06, + "loss": 0.6195, + "step": 904 + }, + { + "epoch": 0.06653433318629613, + "grad_norm": 0.9064217805862427, + "learning_rate": 4.998796839452847e-06, + "loss": 0.6049, + "step": 905 + }, + { + "epoch": 0.06660785178650198, + "grad_norm": 0.9746383428573608, + "learning_rate": 4.998793848617976e-06, + "loss": 0.6737, + "step": 906 + }, + { + "epoch": 0.06668137038670784, + "grad_norm": 0.9914853572845459, + "learning_rate": 4.998790854071282e-06, + "loss": 0.6588, + "step": 907 + }, + { + "epoch": 0.06675488898691369, + "grad_norm": 0.8785464763641357, + "learning_rate": 4.9987878558127705e-06, + "loss": 0.6606, + "step": 908 + }, + { + "epoch": 0.06682840758711954, + "grad_norm": 0.9428932070732117, + "learning_rate": 4.9987848538424474e-06, + "loss": 0.6514, + "step": 909 + }, + { + "epoch": 0.06690192618732539, + "grad_norm": 0.9520069360733032, + "learning_rate": 4.998781848160316e-06, + "loss": 0.6304, + "step": 910 + }, + { + "epoch": 0.06697544478753124, + "grad_norm": 0.8562750816345215, + "learning_rate": 4.99877883876638e-06, + "loss": 0.6389, + "step": 911 + }, + { + "epoch": 0.0670489633877371, + "grad_norm": 0.9428862929344177, + "learning_rate": 4.998775825660647e-06, + "loss": 0.6346, + "step": 912 + }, + { + "epoch": 0.06712248198794295, + "grad_norm": 0.9826167821884155, + "learning_rate": 4.998772808843118e-06, + "loss": 0.6056, + "step": 913 + }, + { + "epoch": 0.0671960005881488, + "grad_norm": 0.8836949467658997, + "learning_rate": 4.998769788313799e-06, + "loss": 0.6271, + "step": 914 + }, + { + "epoch": 0.06726951918835465, + "grad_norm": 0.9062725305557251, + "learning_rate": 4.998766764072694e-06, + "loss": 0.6503, + "step": 915 + }, + { + "epoch": 0.0673430377885605, + "grad_norm": 0.9007443785667419, + "learning_rate": 4.998763736119808e-06, + "loss": 0.6546, + "step": 916 + }, + { + "epoch": 0.06741655638876635, + "grad_norm": 0.8576843738555908, + "learning_rate": 4.998760704455145e-06, + "loss": 0.6251, + "step": 917 + }, + { + "epoch": 0.0674900749889722, + "grad_norm": 0.8914588093757629, + "learning_rate": 4.99875766907871e-06, + "loss": 0.6416, + "step": 918 + }, + { + "epoch": 0.06756359358917806, + "grad_norm": 0.897590160369873, + "learning_rate": 4.998754629990508e-06, + "loss": 0.6497, + "step": 919 + }, + { + "epoch": 0.06763711218938391, + "grad_norm": 0.9441062211990356, + "learning_rate": 4.998751587190542e-06, + "loss": 0.6514, + "step": 920 + }, + { + "epoch": 0.06771063078958976, + "grad_norm": 0.9162338972091675, + "learning_rate": 4.998748540678817e-06, + "loss": 0.6697, + "step": 921 + }, + { + "epoch": 0.06778414938979561, + "grad_norm": 0.9930146336555481, + "learning_rate": 4.998745490455339e-06, + "loss": 0.6059, + "step": 922 + }, + { + "epoch": 0.06785766799000147, + "grad_norm": 0.9240813255310059, + "learning_rate": 4.998742436520111e-06, + "loss": 0.6032, + "step": 923 + }, + { + "epoch": 0.06793118659020732, + "grad_norm": 0.9070894718170166, + "learning_rate": 4.998739378873137e-06, + "loss": 0.6167, + "step": 924 + }, + { + "epoch": 0.06800470519041317, + "grad_norm": 0.9052631258964539, + "learning_rate": 4.998736317514425e-06, + "loss": 0.6674, + "step": 925 + }, + { + "epoch": 0.06807822379061902, + "grad_norm": 0.9301578402519226, + "learning_rate": 4.998733252443975e-06, + "loss": 0.6491, + "step": 926 + }, + { + "epoch": 0.06815174239082487, + "grad_norm": 0.9131954312324524, + "learning_rate": 4.998730183661794e-06, + "loss": 0.6322, + "step": 927 + }, + { + "epoch": 0.06822526099103073, + "grad_norm": 0.8440525531768799, + "learning_rate": 4.998727111167885e-06, + "loss": 0.606, + "step": 928 + }, + { + "epoch": 0.06829877959123658, + "grad_norm": 0.8775787353515625, + "learning_rate": 4.9987240349622555e-06, + "loss": 0.6431, + "step": 929 + }, + { + "epoch": 0.06837229819144243, + "grad_norm": 0.9163735508918762, + "learning_rate": 4.998720955044907e-06, + "loss": 0.6397, + "step": 930 + }, + { + "epoch": 0.06844581679164828, + "grad_norm": 0.9280537962913513, + "learning_rate": 4.998717871415846e-06, + "loss": 0.6541, + "step": 931 + }, + { + "epoch": 0.06851933539185413, + "grad_norm": 0.9305991530418396, + "learning_rate": 4.998714784075076e-06, + "loss": 0.6499, + "step": 932 + }, + { + "epoch": 0.06859285399205999, + "grad_norm": 0.8866575360298157, + "learning_rate": 4.998711693022603e-06, + "loss": 0.6319, + "step": 933 + }, + { + "epoch": 0.06866637259226584, + "grad_norm": 0.8861228227615356, + "learning_rate": 4.998708598258429e-06, + "loss": 0.6394, + "step": 934 + }, + { + "epoch": 0.06873989119247169, + "grad_norm": 0.8889540433883667, + "learning_rate": 4.998705499782562e-06, + "loss": 0.659, + "step": 935 + }, + { + "epoch": 0.06881340979267754, + "grad_norm": 0.9915173649787903, + "learning_rate": 4.998702397595004e-06, + "loss": 0.6244, + "step": 936 + }, + { + "epoch": 0.0688869283928834, + "grad_norm": 0.9014344215393066, + "learning_rate": 4.99869929169576e-06, + "loss": 0.5803, + "step": 937 + }, + { + "epoch": 0.06896044699308926, + "grad_norm": 0.9240987300872803, + "learning_rate": 4.9986961820848354e-06, + "loss": 0.6721, + "step": 938 + }, + { + "epoch": 0.06903396559329511, + "grad_norm": 0.891690194606781, + "learning_rate": 4.998693068762235e-06, + "loss": 0.6099, + "step": 939 + }, + { + "epoch": 0.06910748419350096, + "grad_norm": 0.9651312232017517, + "learning_rate": 4.998689951727962e-06, + "loss": 0.6709, + "step": 940 + }, + { + "epoch": 0.06918100279370681, + "grad_norm": 0.861619770526886, + "learning_rate": 4.998686830982023e-06, + "loss": 0.6201, + "step": 941 + }, + { + "epoch": 0.06925452139391267, + "grad_norm": 0.8892514109611511, + "learning_rate": 4.99868370652442e-06, + "loss": 0.6724, + "step": 942 + }, + { + "epoch": 0.06932803999411852, + "grad_norm": 0.9783864617347717, + "learning_rate": 4.99868057835516e-06, + "loss": 0.7073, + "step": 943 + }, + { + "epoch": 0.06940155859432437, + "grad_norm": 0.8546106815338135, + "learning_rate": 4.998677446474247e-06, + "loss": 0.6385, + "step": 944 + }, + { + "epoch": 0.06947507719453022, + "grad_norm": 0.9091883301734924, + "learning_rate": 4.9986743108816845e-06, + "loss": 0.6398, + "step": 945 + }, + { + "epoch": 0.06954859579473607, + "grad_norm": 0.8452423214912415, + "learning_rate": 4.99867117157748e-06, + "loss": 0.6342, + "step": 946 + }, + { + "epoch": 0.06962211439494193, + "grad_norm": 0.9273501038551331, + "learning_rate": 4.998668028561634e-06, + "loss": 0.6603, + "step": 947 + }, + { + "epoch": 0.06969563299514778, + "grad_norm": 0.8695652484893799, + "learning_rate": 4.998664881834155e-06, + "loss": 0.6312, + "step": 948 + }, + { + "epoch": 0.06976915159535363, + "grad_norm": 0.9294798374176025, + "learning_rate": 4.9986617313950455e-06, + "loss": 0.6827, + "step": 949 + }, + { + "epoch": 0.06984267019555948, + "grad_norm": 0.870663046836853, + "learning_rate": 4.9986585772443115e-06, + "loss": 0.6562, + "step": 950 + }, + { + "epoch": 0.06991618879576533, + "grad_norm": 0.9125139117240906, + "learning_rate": 4.998655419381956e-06, + "loss": 0.6504, + "step": 951 + }, + { + "epoch": 0.06998970739597118, + "grad_norm": 0.9475307464599609, + "learning_rate": 4.998652257807986e-06, + "loss": 0.6307, + "step": 952 + }, + { + "epoch": 0.07006322599617704, + "grad_norm": 0.881325364112854, + "learning_rate": 4.998649092522404e-06, + "loss": 0.6046, + "step": 953 + }, + { + "epoch": 0.07013674459638289, + "grad_norm": 0.8955162763595581, + "learning_rate": 4.998645923525215e-06, + "loss": 0.6423, + "step": 954 + }, + { + "epoch": 0.07021026319658874, + "grad_norm": 0.8998056054115295, + "learning_rate": 4.9986427508164256e-06, + "loss": 0.6317, + "step": 955 + }, + { + "epoch": 0.07028378179679459, + "grad_norm": 0.8734452128410339, + "learning_rate": 4.998639574396038e-06, + "loss": 0.6316, + "step": 956 + }, + { + "epoch": 0.07035730039700044, + "grad_norm": 0.8676026463508606, + "learning_rate": 4.9986363942640595e-06, + "loss": 0.5947, + "step": 957 + }, + { + "epoch": 0.0704308189972063, + "grad_norm": 0.9133521318435669, + "learning_rate": 4.9986332104204924e-06, + "loss": 0.6638, + "step": 958 + }, + { + "epoch": 0.07050433759741215, + "grad_norm": 0.9522754549980164, + "learning_rate": 4.998630022865343e-06, + "loss": 0.6985, + "step": 959 + }, + { + "epoch": 0.070577856197618, + "grad_norm": 0.8942468166351318, + "learning_rate": 4.9986268315986155e-06, + "loss": 0.607, + "step": 960 + }, + { + "epoch": 0.07065137479782385, + "grad_norm": 0.8814367055892944, + "learning_rate": 4.998623636620314e-06, + "loss": 0.6055, + "step": 961 + }, + { + "epoch": 0.0707248933980297, + "grad_norm": 0.8795385360717773, + "learning_rate": 4.998620437930446e-06, + "loss": 0.6145, + "step": 962 + }, + { + "epoch": 0.07079841199823556, + "grad_norm": 0.8785633444786072, + "learning_rate": 4.998617235529012e-06, + "loss": 0.5871, + "step": 963 + }, + { + "epoch": 0.07087193059844141, + "grad_norm": 0.9258562326431274, + "learning_rate": 4.998614029416019e-06, + "loss": 0.6443, + "step": 964 + }, + { + "epoch": 0.07094544919864726, + "grad_norm": 0.9000057578086853, + "learning_rate": 4.998610819591473e-06, + "loss": 0.6574, + "step": 965 + }, + { + "epoch": 0.07101896779885311, + "grad_norm": 0.9130043983459473, + "learning_rate": 4.998607606055377e-06, + "loss": 0.6655, + "step": 966 + }, + { + "epoch": 0.07109248639905896, + "grad_norm": 0.9762011766433716, + "learning_rate": 4.998604388807735e-06, + "loss": 0.6796, + "step": 967 + }, + { + "epoch": 0.07116600499926481, + "grad_norm": 0.8742241263389587, + "learning_rate": 4.998601167848554e-06, + "loss": 0.6436, + "step": 968 + }, + { + "epoch": 0.07123952359947067, + "grad_norm": 0.9009432196617126, + "learning_rate": 4.998597943177838e-06, + "loss": 0.6739, + "step": 969 + }, + { + "epoch": 0.07131304219967652, + "grad_norm": 0.9845740795135498, + "learning_rate": 4.998594714795591e-06, + "loss": 0.6757, + "step": 970 + }, + { + "epoch": 0.07138656079988237, + "grad_norm": 0.8914994597434998, + "learning_rate": 4.9985914827018186e-06, + "loss": 0.6267, + "step": 971 + }, + { + "epoch": 0.07146007940008822, + "grad_norm": 0.8413193821907043, + "learning_rate": 4.998588246896525e-06, + "loss": 0.6065, + "step": 972 + }, + { + "epoch": 0.07153359800029407, + "grad_norm": 0.94328773021698, + "learning_rate": 4.998585007379716e-06, + "loss": 0.6507, + "step": 973 + }, + { + "epoch": 0.07160711660049993, + "grad_norm": 0.8523434400558472, + "learning_rate": 4.9985817641513955e-06, + "loss": 0.6503, + "step": 974 + }, + { + "epoch": 0.07168063520070578, + "grad_norm": 0.9490289688110352, + "learning_rate": 4.998578517211569e-06, + "loss": 0.646, + "step": 975 + }, + { + "epoch": 0.07175415380091163, + "grad_norm": 0.9607353806495667, + "learning_rate": 4.998575266560241e-06, + "loss": 0.6712, + "step": 976 + }, + { + "epoch": 0.07182767240111748, + "grad_norm": 0.8849191665649414, + "learning_rate": 4.998572012197416e-06, + "loss": 0.645, + "step": 977 + }, + { + "epoch": 0.07190119100132333, + "grad_norm": 0.8993845582008362, + "learning_rate": 4.998568754123099e-06, + "loss": 0.6135, + "step": 978 + }, + { + "epoch": 0.07197470960152919, + "grad_norm": 0.9222908616065979, + "learning_rate": 4.998565492337294e-06, + "loss": 0.6241, + "step": 979 + }, + { + "epoch": 0.07204822820173504, + "grad_norm": 0.9821109175682068, + "learning_rate": 4.998562226840008e-06, + "loss": 0.6407, + "step": 980 + }, + { + "epoch": 0.07212174680194089, + "grad_norm": 0.8872527480125427, + "learning_rate": 4.9985589576312445e-06, + "loss": 0.6573, + "step": 981 + }, + { + "epoch": 0.07219526540214674, + "grad_norm": 0.9550442099571228, + "learning_rate": 4.998555684711008e-06, + "loss": 0.6528, + "step": 982 + }, + { + "epoch": 0.07226878400235259, + "grad_norm": 0.8713672757148743, + "learning_rate": 4.998552408079304e-06, + "loss": 0.6588, + "step": 983 + }, + { + "epoch": 0.07234230260255844, + "grad_norm": 0.9131240844726562, + "learning_rate": 4.998549127736137e-06, + "loss": 0.6659, + "step": 984 + }, + { + "epoch": 0.0724158212027643, + "grad_norm": 0.899346649646759, + "learning_rate": 4.9985458436815134e-06, + "loss": 0.6115, + "step": 985 + }, + { + "epoch": 0.07248933980297015, + "grad_norm": 0.9480598568916321, + "learning_rate": 4.998542555915435e-06, + "loss": 0.6525, + "step": 986 + }, + { + "epoch": 0.072562858403176, + "grad_norm": 0.8869585990905762, + "learning_rate": 4.998539264437909e-06, + "loss": 0.6012, + "step": 987 + }, + { + "epoch": 0.07263637700338185, + "grad_norm": 0.9152124524116516, + "learning_rate": 4.998535969248941e-06, + "loss": 0.6461, + "step": 988 + }, + { + "epoch": 0.0727098956035877, + "grad_norm": 0.8461659550666809, + "learning_rate": 4.9985326703485335e-06, + "loss": 0.6043, + "step": 989 + }, + { + "epoch": 0.07278341420379356, + "grad_norm": 0.9258691072463989, + "learning_rate": 4.9985293677366925e-06, + "loss": 0.6431, + "step": 990 + }, + { + "epoch": 0.07285693280399941, + "grad_norm": 0.8762116432189941, + "learning_rate": 4.998526061413423e-06, + "loss": 0.6465, + "step": 991 + }, + { + "epoch": 0.07293045140420526, + "grad_norm": 0.8473223447799683, + "learning_rate": 4.99852275137873e-06, + "loss": 0.6263, + "step": 992 + }, + { + "epoch": 0.07300397000441111, + "grad_norm": 0.9510603547096252, + "learning_rate": 4.998519437632618e-06, + "loss": 0.6284, + "step": 993 + }, + { + "epoch": 0.07307748860461696, + "grad_norm": 0.9182654023170471, + "learning_rate": 4.998516120175092e-06, + "loss": 0.6281, + "step": 994 + }, + { + "epoch": 0.07315100720482282, + "grad_norm": 0.9486371278762817, + "learning_rate": 4.998512799006158e-06, + "loss": 0.6638, + "step": 995 + }, + { + "epoch": 0.07322452580502867, + "grad_norm": 1.0156487226486206, + "learning_rate": 4.99850947412582e-06, + "loss": 0.6558, + "step": 996 + }, + { + "epoch": 0.07329804440523452, + "grad_norm": 0.8978772759437561, + "learning_rate": 4.9985061455340825e-06, + "loss": 0.6246, + "step": 997 + }, + { + "epoch": 0.07337156300544037, + "grad_norm": 0.8896544575691223, + "learning_rate": 4.998502813230951e-06, + "loss": 0.6175, + "step": 998 + }, + { + "epoch": 0.07344508160564622, + "grad_norm": 0.8315522074699402, + "learning_rate": 4.998499477216431e-06, + "loss": 0.6009, + "step": 999 + }, + { + "epoch": 0.07351860020585207, + "grad_norm": 0.9177680611610413, + "learning_rate": 4.998496137490526e-06, + "loss": 0.6523, + "step": 1000 + }, + { + "epoch": 0.07359211880605793, + "grad_norm": 0.8559266924858093, + "learning_rate": 4.9984927940532415e-06, + "loss": 0.6013, + "step": 1001 + }, + { + "epoch": 0.07366563740626378, + "grad_norm": 0.8774431347846985, + "learning_rate": 4.9984894469045835e-06, + "loss": 0.6391, + "step": 1002 + }, + { + "epoch": 0.07373915600646963, + "grad_norm": 0.9093317985534668, + "learning_rate": 4.998486096044556e-06, + "loss": 0.6431, + "step": 1003 + }, + { + "epoch": 0.07381267460667548, + "grad_norm": 0.8766744136810303, + "learning_rate": 4.998482741473164e-06, + "loss": 0.609, + "step": 1004 + }, + { + "epoch": 0.07388619320688133, + "grad_norm": 0.8410215377807617, + "learning_rate": 4.998479383190413e-06, + "loss": 0.6067, + "step": 1005 + }, + { + "epoch": 0.07395971180708719, + "grad_norm": 0.9748973846435547, + "learning_rate": 4.998476021196308e-06, + "loss": 0.6828, + "step": 1006 + }, + { + "epoch": 0.07403323040729305, + "grad_norm": 0.9235811233520508, + "learning_rate": 4.998472655490853e-06, + "loss": 0.6625, + "step": 1007 + }, + { + "epoch": 0.0741067490074989, + "grad_norm": 0.8862221837043762, + "learning_rate": 4.998469286074054e-06, + "loss": 0.5997, + "step": 1008 + }, + { + "epoch": 0.07418026760770476, + "grad_norm": 0.8916918635368347, + "learning_rate": 4.998465912945915e-06, + "loss": 0.6492, + "step": 1009 + }, + { + "epoch": 0.07425378620791061, + "grad_norm": 0.931361734867096, + "learning_rate": 4.998462536106443e-06, + "loss": 0.6446, + "step": 1010 + }, + { + "epoch": 0.07432730480811646, + "grad_norm": 0.9114062786102295, + "learning_rate": 4.998459155555642e-06, + "loss": 0.6505, + "step": 1011 + }, + { + "epoch": 0.07440082340832231, + "grad_norm": 0.8990252614021301, + "learning_rate": 4.998455771293516e-06, + "loss": 0.6348, + "step": 1012 + }, + { + "epoch": 0.07447434200852816, + "grad_norm": 0.9208066463470459, + "learning_rate": 4.99845238332007e-06, + "loss": 0.6522, + "step": 1013 + }, + { + "epoch": 0.07454786060873402, + "grad_norm": 0.8649980425834656, + "learning_rate": 4.99844899163531e-06, + "loss": 0.5834, + "step": 1014 + }, + { + "epoch": 0.07462137920893987, + "grad_norm": 0.9001917839050293, + "learning_rate": 4.998445596239242e-06, + "loss": 0.6549, + "step": 1015 + }, + { + "epoch": 0.07469489780914572, + "grad_norm": 0.8829908967018127, + "learning_rate": 4.9984421971318686e-06, + "loss": 0.6507, + "step": 1016 + }, + { + "epoch": 0.07476841640935157, + "grad_norm": 0.9360724091529846, + "learning_rate": 4.9984387943131966e-06, + "loss": 0.621, + "step": 1017 + }, + { + "epoch": 0.07484193500955742, + "grad_norm": 0.9254799485206604, + "learning_rate": 4.998435387783231e-06, + "loss": 0.6536, + "step": 1018 + }, + { + "epoch": 0.07491545360976327, + "grad_norm": 0.9447954893112183, + "learning_rate": 4.998431977541976e-06, + "loss": 0.6659, + "step": 1019 + }, + { + "epoch": 0.07498897220996913, + "grad_norm": 0.9403309226036072, + "learning_rate": 4.998428563589437e-06, + "loss": 0.6468, + "step": 1020 + }, + { + "epoch": 0.07506249081017498, + "grad_norm": 0.9279907941818237, + "learning_rate": 4.998425145925619e-06, + "loss": 0.635, + "step": 1021 + }, + { + "epoch": 0.07513600941038083, + "grad_norm": 0.9059908986091614, + "learning_rate": 4.998421724550528e-06, + "loss": 0.6352, + "step": 1022 + }, + { + "epoch": 0.07520952801058668, + "grad_norm": 0.8917195200920105, + "learning_rate": 4.998418299464168e-06, + "loss": 0.6384, + "step": 1023 + }, + { + "epoch": 0.07528304661079253, + "grad_norm": 0.8846142292022705, + "learning_rate": 4.998414870666545e-06, + "loss": 0.6312, + "step": 1024 + }, + { + "epoch": 0.07535656521099839, + "grad_norm": 0.9354112148284912, + "learning_rate": 4.998411438157662e-06, + "loss": 0.6409, + "step": 1025 + }, + { + "epoch": 0.07543008381120424, + "grad_norm": 0.8977459669113159, + "learning_rate": 4.9984080019375265e-06, + "loss": 0.6061, + "step": 1026 + }, + { + "epoch": 0.07550360241141009, + "grad_norm": 0.9382780194282532, + "learning_rate": 4.998404562006142e-06, + "loss": 0.6131, + "step": 1027 + }, + { + "epoch": 0.07557712101161594, + "grad_norm": 0.9551622271537781, + "learning_rate": 4.998401118363516e-06, + "loss": 0.6715, + "step": 1028 + }, + { + "epoch": 0.0756506396118218, + "grad_norm": 0.8780169486999512, + "learning_rate": 4.998397671009652e-06, + "loss": 0.6198, + "step": 1029 + }, + { + "epoch": 0.07572415821202765, + "grad_norm": 0.9172724485397339, + "learning_rate": 4.998394219944553e-06, + "loss": 0.684, + "step": 1030 + }, + { + "epoch": 0.0757976768122335, + "grad_norm": 0.8886054754257202, + "learning_rate": 4.998390765168227e-06, + "loss": 0.6022, + "step": 1031 + }, + { + "epoch": 0.07587119541243935, + "grad_norm": 0.8729427456855774, + "learning_rate": 4.99838730668068e-06, + "loss": 0.6341, + "step": 1032 + }, + { + "epoch": 0.0759447140126452, + "grad_norm": 0.9126943349838257, + "learning_rate": 4.998383844481913e-06, + "loss": 0.6166, + "step": 1033 + }, + { + "epoch": 0.07601823261285105, + "grad_norm": 0.8805565237998962, + "learning_rate": 4.998380378571935e-06, + "loss": 0.637, + "step": 1034 + }, + { + "epoch": 0.0760917512130569, + "grad_norm": 0.8970052003860474, + "learning_rate": 4.9983769089507495e-06, + "loss": 0.6206, + "step": 1035 + }, + { + "epoch": 0.07616526981326276, + "grad_norm": 0.9545966386795044, + "learning_rate": 4.998373435618362e-06, + "loss": 0.6177, + "step": 1036 + }, + { + "epoch": 0.07623878841346861, + "grad_norm": 0.8681239485740662, + "learning_rate": 4.998369958574777e-06, + "loss": 0.6099, + "step": 1037 + }, + { + "epoch": 0.07631230701367446, + "grad_norm": 0.8837587833404541, + "learning_rate": 4.9983664778200004e-06, + "loss": 0.6233, + "step": 1038 + }, + { + "epoch": 0.07638582561388031, + "grad_norm": 0.8905543684959412, + "learning_rate": 4.998362993354038e-06, + "loss": 0.6275, + "step": 1039 + }, + { + "epoch": 0.07645934421408616, + "grad_norm": 0.9034938216209412, + "learning_rate": 4.998359505176894e-06, + "loss": 0.6186, + "step": 1040 + }, + { + "epoch": 0.07653286281429202, + "grad_norm": 0.9631270170211792, + "learning_rate": 4.998356013288573e-06, + "loss": 0.6002, + "step": 1041 + }, + { + "epoch": 0.07660638141449787, + "grad_norm": 0.8569901585578918, + "learning_rate": 4.9983525176890815e-06, + "loss": 0.6429, + "step": 1042 + }, + { + "epoch": 0.07667990001470372, + "grad_norm": 0.8670945763587952, + "learning_rate": 4.998349018378424e-06, + "loss": 0.6263, + "step": 1043 + }, + { + "epoch": 0.07675341861490957, + "grad_norm": 0.8884134888648987, + "learning_rate": 4.998345515356606e-06, + "loss": 0.6238, + "step": 1044 + }, + { + "epoch": 0.07682693721511542, + "grad_norm": 0.8888139724731445, + "learning_rate": 4.998342008623632e-06, + "loss": 0.6239, + "step": 1045 + }, + { + "epoch": 0.07690045581532128, + "grad_norm": 0.9405614733695984, + "learning_rate": 4.998338498179508e-06, + "loss": 0.6867, + "step": 1046 + }, + { + "epoch": 0.07697397441552713, + "grad_norm": 0.8802968263626099, + "learning_rate": 4.998334984024239e-06, + "loss": 0.5859, + "step": 1047 + }, + { + "epoch": 0.07704749301573298, + "grad_norm": 0.8811073303222656, + "learning_rate": 4.998331466157831e-06, + "loss": 0.6335, + "step": 1048 + }, + { + "epoch": 0.07712101161593883, + "grad_norm": 0.906154453754425, + "learning_rate": 4.998327944580287e-06, + "loss": 0.6219, + "step": 1049 + }, + { + "epoch": 0.07719453021614468, + "grad_norm": 0.8763747215270996, + "learning_rate": 4.998324419291615e-06, + "loss": 0.6366, + "step": 1050 + }, + { + "epoch": 0.07726804881635053, + "grad_norm": 0.9406538009643555, + "learning_rate": 4.9983208902918175e-06, + "loss": 0.622, + "step": 1051 + }, + { + "epoch": 0.07734156741655639, + "grad_norm": 0.9612917304039001, + "learning_rate": 4.998317357580902e-06, + "loss": 0.6334, + "step": 1052 + }, + { + "epoch": 0.07741508601676224, + "grad_norm": 0.912607729434967, + "learning_rate": 4.998313821158873e-06, + "loss": 0.6187, + "step": 1053 + }, + { + "epoch": 0.07748860461696809, + "grad_norm": 0.8707718253135681, + "learning_rate": 4.998310281025735e-06, + "loss": 0.669, + "step": 1054 + }, + { + "epoch": 0.07756212321717394, + "grad_norm": 0.9025745391845703, + "learning_rate": 4.998306737181494e-06, + "loss": 0.6762, + "step": 1055 + }, + { + "epoch": 0.0776356418173798, + "grad_norm": 0.8686363101005554, + "learning_rate": 4.998303189626156e-06, + "loss": 0.6154, + "step": 1056 + }, + { + "epoch": 0.07770916041758565, + "grad_norm": 0.9050138592720032, + "learning_rate": 4.9982996383597245e-06, + "loss": 0.6612, + "step": 1057 + }, + { + "epoch": 0.0777826790177915, + "grad_norm": 0.9071610569953918, + "learning_rate": 4.998296083382206e-06, + "loss": 0.6231, + "step": 1058 + }, + { + "epoch": 0.07785619761799735, + "grad_norm": 0.9507054686546326, + "learning_rate": 4.998292524693605e-06, + "loss": 0.6483, + "step": 1059 + }, + { + "epoch": 0.0779297162182032, + "grad_norm": 0.8718072772026062, + "learning_rate": 4.998288962293928e-06, + "loss": 0.5907, + "step": 1060 + }, + { + "epoch": 0.07800323481840905, + "grad_norm": 0.9814473986625671, + "learning_rate": 4.998285396183179e-06, + "loss": 0.6154, + "step": 1061 + }, + { + "epoch": 0.0780767534186149, + "grad_norm": 0.873520016670227, + "learning_rate": 4.998281826361365e-06, + "loss": 0.6183, + "step": 1062 + }, + { + "epoch": 0.07815027201882076, + "grad_norm": 0.909367024898529, + "learning_rate": 4.998278252828488e-06, + "loss": 0.6544, + "step": 1063 + }, + { + "epoch": 0.07822379061902661, + "grad_norm": 0.9275009036064148, + "learning_rate": 4.998274675584557e-06, + "loss": 0.6561, + "step": 1064 + }, + { + "epoch": 0.07829730921923246, + "grad_norm": 0.9527419805526733, + "learning_rate": 4.9982710946295755e-06, + "loss": 0.6527, + "step": 1065 + }, + { + "epoch": 0.07837082781943831, + "grad_norm": 0.8973004817962646, + "learning_rate": 4.998267509963549e-06, + "loss": 0.6099, + "step": 1066 + }, + { + "epoch": 0.07844434641964416, + "grad_norm": 0.896919846534729, + "learning_rate": 4.998263921586483e-06, + "loss": 0.6537, + "step": 1067 + }, + { + "epoch": 0.07851786501985002, + "grad_norm": 0.8512209057807922, + "learning_rate": 4.998260329498383e-06, + "loss": 0.6524, + "step": 1068 + }, + { + "epoch": 0.07859138362005587, + "grad_norm": 0.8656381368637085, + "learning_rate": 4.998256733699254e-06, + "loss": 0.6569, + "step": 1069 + }, + { + "epoch": 0.07866490222026172, + "grad_norm": 0.9248393774032593, + "learning_rate": 4.998253134189101e-06, + "loss": 0.654, + "step": 1070 + }, + { + "epoch": 0.07873842082046757, + "grad_norm": 0.8894462585449219, + "learning_rate": 4.9982495309679305e-06, + "loss": 0.5922, + "step": 1071 + }, + { + "epoch": 0.07881193942067342, + "grad_norm": 0.9371131658554077, + "learning_rate": 4.9982459240357465e-06, + "loss": 0.6656, + "step": 1072 + }, + { + "epoch": 0.07888545802087928, + "grad_norm": 0.9389594197273254, + "learning_rate": 4.998242313392554e-06, + "loss": 0.6789, + "step": 1073 + }, + { + "epoch": 0.07895897662108513, + "grad_norm": 0.8964859843254089, + "learning_rate": 4.998238699038361e-06, + "loss": 0.6185, + "step": 1074 + }, + { + "epoch": 0.07903249522129098, + "grad_norm": 0.9193222522735596, + "learning_rate": 4.998235080973171e-06, + "loss": 0.6604, + "step": 1075 + }, + { + "epoch": 0.07910601382149685, + "grad_norm": 0.8872045874595642, + "learning_rate": 4.998231459196989e-06, + "loss": 0.6061, + "step": 1076 + }, + { + "epoch": 0.0791795324217027, + "grad_norm": 0.9555277228355408, + "learning_rate": 4.998227833709821e-06, + "loss": 0.6455, + "step": 1077 + }, + { + "epoch": 0.07925305102190855, + "grad_norm": 0.9229790568351746, + "learning_rate": 4.998224204511672e-06, + "loss": 0.6441, + "step": 1078 + }, + { + "epoch": 0.0793265696221144, + "grad_norm": 0.9242422580718994, + "learning_rate": 4.998220571602549e-06, + "loss": 0.6436, + "step": 1079 + }, + { + "epoch": 0.07940008822232025, + "grad_norm": 0.8753752112388611, + "learning_rate": 4.998216934982455e-06, + "loss": 0.6365, + "step": 1080 + }, + { + "epoch": 0.0794736068225261, + "grad_norm": 0.9254816174507141, + "learning_rate": 4.998213294651397e-06, + "loss": 0.6373, + "step": 1081 + }, + { + "epoch": 0.07954712542273196, + "grad_norm": 0.8686538338661194, + "learning_rate": 4.99820965060938e-06, + "loss": 0.6316, + "step": 1082 + }, + { + "epoch": 0.07962064402293781, + "grad_norm": 0.8886088132858276, + "learning_rate": 4.99820600285641e-06, + "loss": 0.6804, + "step": 1083 + }, + { + "epoch": 0.07969416262314366, + "grad_norm": 0.9254351854324341, + "learning_rate": 4.99820235139249e-06, + "loss": 0.6407, + "step": 1084 + }, + { + "epoch": 0.07976768122334951, + "grad_norm": 0.8896394371986389, + "learning_rate": 4.998198696217628e-06, + "loss": 0.6126, + "step": 1085 + }, + { + "epoch": 0.07984119982355536, + "grad_norm": 0.924364447593689, + "learning_rate": 4.99819503733183e-06, + "loss": 0.6386, + "step": 1086 + }, + { + "epoch": 0.07991471842376122, + "grad_norm": 0.9077097177505493, + "learning_rate": 4.9981913747350985e-06, + "loss": 0.6664, + "step": 1087 + }, + { + "epoch": 0.07998823702396707, + "grad_norm": 0.9532744884490967, + "learning_rate": 4.998187708427441e-06, + "loss": 0.6527, + "step": 1088 + }, + { + "epoch": 0.08006175562417292, + "grad_norm": 0.8918446898460388, + "learning_rate": 4.998184038408863e-06, + "loss": 0.5808, + "step": 1089 + }, + { + "epoch": 0.08013527422437877, + "grad_norm": 0.8796015977859497, + "learning_rate": 4.998180364679369e-06, + "loss": 0.6108, + "step": 1090 + }, + { + "epoch": 0.08020879282458462, + "grad_norm": 0.9711124300956726, + "learning_rate": 4.998176687238965e-06, + "loss": 0.6496, + "step": 1091 + }, + { + "epoch": 0.08028231142479048, + "grad_norm": 0.8901387453079224, + "learning_rate": 4.998173006087656e-06, + "loss": 0.636, + "step": 1092 + }, + { + "epoch": 0.08035583002499633, + "grad_norm": 0.9386656284332275, + "learning_rate": 4.998169321225447e-06, + "loss": 0.5843, + "step": 1093 + }, + { + "epoch": 0.08042934862520218, + "grad_norm": 0.8709046244621277, + "learning_rate": 4.9981656326523455e-06, + "loss": 0.5996, + "step": 1094 + }, + { + "epoch": 0.08050286722540803, + "grad_norm": 0.9222824573516846, + "learning_rate": 4.9981619403683554e-06, + "loss": 0.6385, + "step": 1095 + }, + { + "epoch": 0.08057638582561388, + "grad_norm": 0.8505337238311768, + "learning_rate": 4.998158244373482e-06, + "loss": 0.5987, + "step": 1096 + }, + { + "epoch": 0.08064990442581974, + "grad_norm": 0.931421160697937, + "learning_rate": 4.998154544667732e-06, + "loss": 0.6323, + "step": 1097 + }, + { + "epoch": 0.08072342302602559, + "grad_norm": 0.8904578685760498, + "learning_rate": 4.99815084125111e-06, + "loss": 0.6393, + "step": 1098 + }, + { + "epoch": 0.08079694162623144, + "grad_norm": 0.9216798543930054, + "learning_rate": 4.9981471341236225e-06, + "loss": 0.706, + "step": 1099 + }, + { + "epoch": 0.08087046022643729, + "grad_norm": 0.8828610181808472, + "learning_rate": 4.998143423285272e-06, + "loss": 0.6641, + "step": 1100 + }, + { + "epoch": 0.08094397882664314, + "grad_norm": 0.8822001218795776, + "learning_rate": 4.998139708736068e-06, + "loss": 0.6723, + "step": 1101 + }, + { + "epoch": 0.081017497426849, + "grad_norm": 0.8829940557479858, + "learning_rate": 4.998135990476015e-06, + "loss": 0.6055, + "step": 1102 + }, + { + "epoch": 0.08109101602705485, + "grad_norm": 0.9075462222099304, + "learning_rate": 4.998132268505116e-06, + "loss": 0.6544, + "step": 1103 + }, + { + "epoch": 0.0811645346272607, + "grad_norm": 0.900483250617981, + "learning_rate": 4.99812854282338e-06, + "loss": 0.6508, + "step": 1104 + }, + { + "epoch": 0.08123805322746655, + "grad_norm": 0.904538631439209, + "learning_rate": 4.998124813430809e-06, + "loss": 0.6304, + "step": 1105 + }, + { + "epoch": 0.0813115718276724, + "grad_norm": 1.0322226285934448, + "learning_rate": 4.998121080327412e-06, + "loss": 0.6497, + "step": 1106 + }, + { + "epoch": 0.08138509042787825, + "grad_norm": 0.9099419116973877, + "learning_rate": 4.998117343513192e-06, + "loss": 0.6822, + "step": 1107 + }, + { + "epoch": 0.0814586090280841, + "grad_norm": 0.9157517552375793, + "learning_rate": 4.998113602988156e-06, + "loss": 0.5524, + "step": 1108 + }, + { + "epoch": 0.08153212762828996, + "grad_norm": 0.8742488026618958, + "learning_rate": 4.998109858752308e-06, + "loss": 0.6533, + "step": 1109 + }, + { + "epoch": 0.08160564622849581, + "grad_norm": 0.8398026823997498, + "learning_rate": 4.998106110805655e-06, + "loss": 0.6241, + "step": 1110 + }, + { + "epoch": 0.08167916482870166, + "grad_norm": 0.8926224708557129, + "learning_rate": 4.998102359148203e-06, + "loss": 0.5854, + "step": 1111 + }, + { + "epoch": 0.08175268342890751, + "grad_norm": 0.9539499282836914, + "learning_rate": 4.998098603779956e-06, + "loss": 0.622, + "step": 1112 + }, + { + "epoch": 0.08182620202911337, + "grad_norm": 0.8794082999229431, + "learning_rate": 4.998094844700921e-06, + "loss": 0.6463, + "step": 1113 + }, + { + "epoch": 0.08189972062931922, + "grad_norm": 0.9353698492050171, + "learning_rate": 4.998091081911102e-06, + "loss": 0.6004, + "step": 1114 + }, + { + "epoch": 0.08197323922952507, + "grad_norm": 0.9171034693717957, + "learning_rate": 4.9980873154105055e-06, + "loss": 0.6425, + "step": 1115 + }, + { + "epoch": 0.08204675782973092, + "grad_norm": 0.8570618033409119, + "learning_rate": 4.998083545199139e-06, + "loss": 0.596, + "step": 1116 + }, + { + "epoch": 0.08212027642993677, + "grad_norm": 0.9052398800849915, + "learning_rate": 4.998079771277004e-06, + "loss": 0.6175, + "step": 1117 + }, + { + "epoch": 0.08219379503014262, + "grad_norm": 0.9264988303184509, + "learning_rate": 4.998075993644109e-06, + "loss": 0.5848, + "step": 1118 + }, + { + "epoch": 0.08226731363034848, + "grad_norm": 0.9243963360786438, + "learning_rate": 4.998072212300459e-06, + "loss": 0.6542, + "step": 1119 + }, + { + "epoch": 0.08234083223055433, + "grad_norm": 0.8831713795661926, + "learning_rate": 4.998068427246059e-06, + "loss": 0.5937, + "step": 1120 + }, + { + "epoch": 0.08241435083076018, + "grad_norm": 0.936769425868988, + "learning_rate": 4.998064638480916e-06, + "loss": 0.6401, + "step": 1121 + }, + { + "epoch": 0.08248786943096603, + "grad_norm": 0.9529094696044922, + "learning_rate": 4.998060846005034e-06, + "loss": 0.6419, + "step": 1122 + }, + { + "epoch": 0.08256138803117188, + "grad_norm": 0.9394015073776245, + "learning_rate": 4.99805704981842e-06, + "loss": 0.6598, + "step": 1123 + }, + { + "epoch": 0.08263490663137774, + "grad_norm": 0.8817088603973389, + "learning_rate": 4.998053249921079e-06, + "loss": 0.6135, + "step": 1124 + }, + { + "epoch": 0.08270842523158359, + "grad_norm": 0.936487078666687, + "learning_rate": 4.998049446313016e-06, + "loss": 0.6144, + "step": 1125 + }, + { + "epoch": 0.08278194383178944, + "grad_norm": 0.9186888337135315, + "learning_rate": 4.998045638994238e-06, + "loss": 0.6487, + "step": 1126 + }, + { + "epoch": 0.08285546243199529, + "grad_norm": 0.9369435906410217, + "learning_rate": 4.99804182796475e-06, + "loss": 0.596, + "step": 1127 + }, + { + "epoch": 0.08292898103220114, + "grad_norm": 0.9145678877830505, + "learning_rate": 4.998038013224558e-06, + "loss": 0.6711, + "step": 1128 + }, + { + "epoch": 0.083002499632407, + "grad_norm": 1.0210590362548828, + "learning_rate": 4.998034194773666e-06, + "loss": 0.633, + "step": 1129 + }, + { + "epoch": 0.08307601823261285, + "grad_norm": 0.8955479264259338, + "learning_rate": 4.998030372612081e-06, + "loss": 0.6237, + "step": 1130 + }, + { + "epoch": 0.0831495368328187, + "grad_norm": 0.9169224500656128, + "learning_rate": 4.99802654673981e-06, + "loss": 0.6648, + "step": 1131 + }, + { + "epoch": 0.08322305543302455, + "grad_norm": 0.9132497906684875, + "learning_rate": 4.998022717156857e-06, + "loss": 0.6538, + "step": 1132 + }, + { + "epoch": 0.0832965740332304, + "grad_norm": 0.920063853263855, + "learning_rate": 4.9980188838632285e-06, + "loss": 0.6562, + "step": 1133 + }, + { + "epoch": 0.08337009263343625, + "grad_norm": 0.9218965172767639, + "learning_rate": 4.998015046858929e-06, + "loss": 0.6237, + "step": 1134 + }, + { + "epoch": 0.0834436112336421, + "grad_norm": 0.8970731496810913, + "learning_rate": 4.998011206143965e-06, + "loss": 0.6862, + "step": 1135 + }, + { + "epoch": 0.08351712983384796, + "grad_norm": 0.895173192024231, + "learning_rate": 4.998007361718342e-06, + "loss": 0.63, + "step": 1136 + }, + { + "epoch": 0.08359064843405381, + "grad_norm": 0.8793734312057495, + "learning_rate": 4.9980035135820656e-06, + "loss": 0.6563, + "step": 1137 + }, + { + "epoch": 0.08366416703425966, + "grad_norm": 0.8972702622413635, + "learning_rate": 4.997999661735142e-06, + "loss": 0.6375, + "step": 1138 + }, + { + "epoch": 0.08373768563446551, + "grad_norm": 0.9960283637046814, + "learning_rate": 4.997995806177577e-06, + "loss": 0.68, + "step": 1139 + }, + { + "epoch": 0.08381120423467137, + "grad_norm": 0.8839015364646912, + "learning_rate": 4.997991946909376e-06, + "loss": 0.644, + "step": 1140 + }, + { + "epoch": 0.08388472283487722, + "grad_norm": 0.9401217699050903, + "learning_rate": 4.997988083930545e-06, + "loss": 0.6242, + "step": 1141 + }, + { + "epoch": 0.08395824143508307, + "grad_norm": 0.8871413469314575, + "learning_rate": 4.9979842172410885e-06, + "loss": 0.6083, + "step": 1142 + }, + { + "epoch": 0.08403176003528892, + "grad_norm": 0.9186726212501526, + "learning_rate": 4.997980346841014e-06, + "loss": 0.6707, + "step": 1143 + }, + { + "epoch": 0.08410527863549477, + "grad_norm": 1.0079505443572998, + "learning_rate": 4.997976472730326e-06, + "loss": 0.6521, + "step": 1144 + }, + { + "epoch": 0.08417879723570063, + "grad_norm": 0.8737124800682068, + "learning_rate": 4.997972594909031e-06, + "loss": 0.6354, + "step": 1145 + }, + { + "epoch": 0.08425231583590649, + "grad_norm": 0.9080460071563721, + "learning_rate": 4.997968713377135e-06, + "loss": 0.6484, + "step": 1146 + }, + { + "epoch": 0.08432583443611234, + "grad_norm": 0.8463165163993835, + "learning_rate": 4.997964828134643e-06, + "loss": 0.6083, + "step": 1147 + }, + { + "epoch": 0.0843993530363182, + "grad_norm": 0.9013625979423523, + "learning_rate": 4.997960939181561e-06, + "loss": 0.6526, + "step": 1148 + }, + { + "epoch": 0.08447287163652405, + "grad_norm": 0.9189068078994751, + "learning_rate": 4.997957046517894e-06, + "loss": 0.5885, + "step": 1149 + }, + { + "epoch": 0.0845463902367299, + "grad_norm": 0.8855214715003967, + "learning_rate": 4.99795315014365e-06, + "loss": 0.6093, + "step": 1150 + }, + { + "epoch": 0.08461990883693575, + "grad_norm": 0.9257481098175049, + "learning_rate": 4.997949250058832e-06, + "loss": 0.6084, + "step": 1151 + }, + { + "epoch": 0.0846934274371416, + "grad_norm": 0.8789997100830078, + "learning_rate": 4.9979453462634476e-06, + "loss": 0.6256, + "step": 1152 + }, + { + "epoch": 0.08476694603734745, + "grad_norm": 0.8586603999137878, + "learning_rate": 4.997941438757502e-06, + "loss": 0.6238, + "step": 1153 + }, + { + "epoch": 0.0848404646375533, + "grad_norm": 0.8668395280838013, + "learning_rate": 4.997937527541003e-06, + "loss": 0.6053, + "step": 1154 + }, + { + "epoch": 0.08491398323775916, + "grad_norm": 0.9150515794754028, + "learning_rate": 4.997933612613952e-06, + "loss": 0.65, + "step": 1155 + }, + { + "epoch": 0.08498750183796501, + "grad_norm": 0.87159264087677, + "learning_rate": 4.997929693976359e-06, + "loss": 0.6103, + "step": 1156 + }, + { + "epoch": 0.08506102043817086, + "grad_norm": 0.9619450569152832, + "learning_rate": 4.997925771628228e-06, + "loss": 0.6298, + "step": 1157 + }, + { + "epoch": 0.08513453903837671, + "grad_norm": 0.9042837619781494, + "learning_rate": 4.997921845569564e-06, + "loss": 0.651, + "step": 1158 + }, + { + "epoch": 0.08520805763858257, + "grad_norm": 0.9397809505462646, + "learning_rate": 4.997917915800376e-06, + "loss": 0.6874, + "step": 1159 + }, + { + "epoch": 0.08528157623878842, + "grad_norm": 0.87514328956604, + "learning_rate": 4.997913982320666e-06, + "loss": 0.5611, + "step": 1160 + }, + { + "epoch": 0.08535509483899427, + "grad_norm": 0.9015989899635315, + "learning_rate": 4.997910045130442e-06, + "loss": 0.6259, + "step": 1161 + }, + { + "epoch": 0.08542861343920012, + "grad_norm": 0.9160394668579102, + "learning_rate": 4.997906104229709e-06, + "loss": 0.6358, + "step": 1162 + }, + { + "epoch": 0.08550213203940597, + "grad_norm": 0.9184367060661316, + "learning_rate": 4.997902159618474e-06, + "loss": 0.6545, + "step": 1163 + }, + { + "epoch": 0.08557565063961182, + "grad_norm": 0.850663423538208, + "learning_rate": 4.9978982112967415e-06, + "loss": 0.6244, + "step": 1164 + }, + { + "epoch": 0.08564916923981768, + "grad_norm": 0.923816442489624, + "learning_rate": 4.997894259264518e-06, + "loss": 0.6964, + "step": 1165 + }, + { + "epoch": 0.08572268784002353, + "grad_norm": 0.8472855091094971, + "learning_rate": 4.99789030352181e-06, + "loss": 0.6282, + "step": 1166 + }, + { + "epoch": 0.08579620644022938, + "grad_norm": 0.8849336504936218, + "learning_rate": 4.997886344068622e-06, + "loss": 0.6625, + "step": 1167 + }, + { + "epoch": 0.08586972504043523, + "grad_norm": 0.9250897169113159, + "learning_rate": 4.997882380904961e-06, + "loss": 0.6307, + "step": 1168 + }, + { + "epoch": 0.08594324364064108, + "grad_norm": 0.8551267981529236, + "learning_rate": 4.997878414030832e-06, + "loss": 0.5978, + "step": 1169 + }, + { + "epoch": 0.08601676224084694, + "grad_norm": 0.9130911231040955, + "learning_rate": 4.997874443446242e-06, + "loss": 0.6354, + "step": 1170 + }, + { + "epoch": 0.08609028084105279, + "grad_norm": 0.9902708530426025, + "learning_rate": 4.997870469151197e-06, + "loss": 0.6033, + "step": 1171 + }, + { + "epoch": 0.08616379944125864, + "grad_norm": 0.9740489721298218, + "learning_rate": 4.997866491145701e-06, + "loss": 0.6886, + "step": 1172 + }, + { + "epoch": 0.08623731804146449, + "grad_norm": 0.9062485098838806, + "learning_rate": 4.997862509429762e-06, + "loss": 0.6098, + "step": 1173 + }, + { + "epoch": 0.08631083664167034, + "grad_norm": 0.9017149209976196, + "learning_rate": 4.997858524003384e-06, + "loss": 0.6191, + "step": 1174 + }, + { + "epoch": 0.0863843552418762, + "grad_norm": 0.8763580322265625, + "learning_rate": 4.9978545348665745e-06, + "loss": 0.633, + "step": 1175 + }, + { + "epoch": 0.08645787384208205, + "grad_norm": 0.9143757224082947, + "learning_rate": 4.997850542019338e-06, + "loss": 0.6365, + "step": 1176 + }, + { + "epoch": 0.0865313924422879, + "grad_norm": 0.8938527703285217, + "learning_rate": 4.9978465454616825e-06, + "loss": 0.6339, + "step": 1177 + }, + { + "epoch": 0.08660491104249375, + "grad_norm": 0.9113151431083679, + "learning_rate": 4.9978425451936126e-06, + "loss": 0.6786, + "step": 1178 + }, + { + "epoch": 0.0866784296426996, + "grad_norm": 0.9132856726646423, + "learning_rate": 4.997838541215134e-06, + "loss": 0.6355, + "step": 1179 + }, + { + "epoch": 0.08675194824290546, + "grad_norm": 0.9318829774856567, + "learning_rate": 4.997834533526253e-06, + "loss": 0.6074, + "step": 1180 + }, + { + "epoch": 0.0868254668431113, + "grad_norm": 0.9002376198768616, + "learning_rate": 4.9978305221269755e-06, + "loss": 0.623, + "step": 1181 + }, + { + "epoch": 0.08689898544331716, + "grad_norm": 0.9293199777603149, + "learning_rate": 4.997826507017308e-06, + "loss": 0.6123, + "step": 1182 + }, + { + "epoch": 0.08697250404352301, + "grad_norm": 1.0128778219223022, + "learning_rate": 4.997822488197256e-06, + "loss": 0.6451, + "step": 1183 + }, + { + "epoch": 0.08704602264372886, + "grad_norm": 0.9101513624191284, + "learning_rate": 4.997818465666825e-06, + "loss": 0.6557, + "step": 1184 + }, + { + "epoch": 0.08711954124393471, + "grad_norm": 0.9585062861442566, + "learning_rate": 4.997814439426022e-06, + "loss": 0.6403, + "step": 1185 + }, + { + "epoch": 0.08719305984414057, + "grad_norm": 0.9385941028594971, + "learning_rate": 4.9978104094748515e-06, + "loss": 0.6521, + "step": 1186 + }, + { + "epoch": 0.08726657844434642, + "grad_norm": 0.875613272190094, + "learning_rate": 4.997806375813321e-06, + "loss": 0.6283, + "step": 1187 + }, + { + "epoch": 0.08734009704455227, + "grad_norm": 0.8831232190132141, + "learning_rate": 4.997802338441437e-06, + "loss": 0.6076, + "step": 1188 + }, + { + "epoch": 0.08741361564475812, + "grad_norm": 0.9218342304229736, + "learning_rate": 4.997798297359202e-06, + "loss": 0.6499, + "step": 1189 + }, + { + "epoch": 0.08748713424496397, + "grad_norm": 0.9265999794006348, + "learning_rate": 4.997794252566626e-06, + "loss": 0.6175, + "step": 1190 + }, + { + "epoch": 0.08756065284516983, + "grad_norm": 0.9486510753631592, + "learning_rate": 4.997790204063713e-06, + "loss": 0.6457, + "step": 1191 + }, + { + "epoch": 0.08763417144537568, + "grad_norm": 0.8806183934211731, + "learning_rate": 4.99778615185047e-06, + "loss": 0.617, + "step": 1192 + }, + { + "epoch": 0.08770769004558153, + "grad_norm": 0.909102201461792, + "learning_rate": 4.997782095926902e-06, + "loss": 0.6514, + "step": 1193 + }, + { + "epoch": 0.08778120864578738, + "grad_norm": 0.8812152147293091, + "learning_rate": 4.997778036293015e-06, + "loss": 0.6516, + "step": 1194 + }, + { + "epoch": 0.08785472724599323, + "grad_norm": 0.8800252676010132, + "learning_rate": 4.9977739729488165e-06, + "loss": 0.6111, + "step": 1195 + }, + { + "epoch": 0.08792824584619909, + "grad_norm": 0.8901277184486389, + "learning_rate": 4.997769905894311e-06, + "loss": 0.5806, + "step": 1196 + }, + { + "epoch": 0.08800176444640494, + "grad_norm": 0.9125829339027405, + "learning_rate": 4.997765835129505e-06, + "loss": 0.6033, + "step": 1197 + }, + { + "epoch": 0.08807528304661079, + "grad_norm": 0.9133819937705994, + "learning_rate": 4.997761760654405e-06, + "loss": 0.6449, + "step": 1198 + }, + { + "epoch": 0.08814880164681664, + "grad_norm": 0.9081048965454102, + "learning_rate": 4.997757682469016e-06, + "loss": 0.6473, + "step": 1199 + }, + { + "epoch": 0.08822232024702249, + "grad_norm": 0.9016053080558777, + "learning_rate": 4.997753600573345e-06, + "loss": 0.6197, + "step": 1200 + }, + { + "epoch": 0.08829583884722834, + "grad_norm": 0.8619528412818909, + "learning_rate": 4.997749514967398e-06, + "loss": 0.6076, + "step": 1201 + }, + { + "epoch": 0.0883693574474342, + "grad_norm": 0.909257709980011, + "learning_rate": 4.997745425651181e-06, + "loss": 0.6412, + "step": 1202 + }, + { + "epoch": 0.08844287604764005, + "grad_norm": 0.9062349796295166, + "learning_rate": 4.9977413326246995e-06, + "loss": 0.6471, + "step": 1203 + }, + { + "epoch": 0.0885163946478459, + "grad_norm": 0.8486305475234985, + "learning_rate": 4.997737235887961e-06, + "loss": 0.6306, + "step": 1204 + }, + { + "epoch": 0.08858991324805175, + "grad_norm": 0.8949933648109436, + "learning_rate": 4.997733135440969e-06, + "loss": 0.5922, + "step": 1205 + }, + { + "epoch": 0.0886634318482576, + "grad_norm": 0.8754445910453796, + "learning_rate": 4.997729031283733e-06, + "loss": 0.6124, + "step": 1206 + }, + { + "epoch": 0.08873695044846346, + "grad_norm": 0.8662939667701721, + "learning_rate": 4.997724923416255e-06, + "loss": 0.5954, + "step": 1207 + }, + { + "epoch": 0.08881046904866931, + "grad_norm": 0.8568161129951477, + "learning_rate": 4.9977208118385455e-06, + "loss": 0.6305, + "step": 1208 + }, + { + "epoch": 0.08888398764887516, + "grad_norm": 0.8673065304756165, + "learning_rate": 4.997716696550607e-06, + "loss": 0.6397, + "step": 1209 + }, + { + "epoch": 0.08895750624908101, + "grad_norm": 0.9388177394866943, + "learning_rate": 4.997712577552448e-06, + "loss": 0.6724, + "step": 1210 + }, + { + "epoch": 0.08903102484928686, + "grad_norm": 0.9019496440887451, + "learning_rate": 4.997708454844074e-06, + "loss": 0.6456, + "step": 1211 + }, + { + "epoch": 0.08910454344949272, + "grad_norm": 0.9599400162696838, + "learning_rate": 4.997704328425491e-06, + "loss": 0.5983, + "step": 1212 + }, + { + "epoch": 0.08917806204969857, + "grad_norm": 0.9547899961471558, + "learning_rate": 4.997700198296704e-06, + "loss": 0.6323, + "step": 1213 + }, + { + "epoch": 0.08925158064990442, + "grad_norm": 0.8866705894470215, + "learning_rate": 4.997696064457721e-06, + "loss": 0.6213, + "step": 1214 + }, + { + "epoch": 0.08932509925011028, + "grad_norm": 0.9543051719665527, + "learning_rate": 4.997691926908546e-06, + "loss": 0.6436, + "step": 1215 + }, + { + "epoch": 0.08939861785031614, + "grad_norm": 0.8823151588439941, + "learning_rate": 4.997687785649188e-06, + "loss": 0.6152, + "step": 1216 + }, + { + "epoch": 0.08947213645052199, + "grad_norm": 0.90006422996521, + "learning_rate": 4.997683640679651e-06, + "loss": 0.6531, + "step": 1217 + }, + { + "epoch": 0.08954565505072784, + "grad_norm": 0.924672544002533, + "learning_rate": 4.997679491999942e-06, + "loss": 0.6272, + "step": 1218 + }, + { + "epoch": 0.08961917365093369, + "grad_norm": 0.8825312256813049, + "learning_rate": 4.997675339610066e-06, + "loss": 0.5888, + "step": 1219 + }, + { + "epoch": 0.08969269225113954, + "grad_norm": 0.9861111640930176, + "learning_rate": 4.99767118351003e-06, + "loss": 0.6989, + "step": 1220 + }, + { + "epoch": 0.0897662108513454, + "grad_norm": 0.9322039484977722, + "learning_rate": 4.9976670236998414e-06, + "loss": 0.6165, + "step": 1221 + }, + { + "epoch": 0.08983972945155125, + "grad_norm": 0.9430164694786072, + "learning_rate": 4.997662860179504e-06, + "loss": 0.6373, + "step": 1222 + }, + { + "epoch": 0.0899132480517571, + "grad_norm": 0.8880107998847961, + "learning_rate": 4.997658692949026e-06, + "loss": 0.6245, + "step": 1223 + }, + { + "epoch": 0.08998676665196295, + "grad_norm": 0.9071699976921082, + "learning_rate": 4.997654522008412e-06, + "loss": 0.5746, + "step": 1224 + }, + { + "epoch": 0.0900602852521688, + "grad_norm": 0.9234199523925781, + "learning_rate": 4.99765034735767e-06, + "loss": 0.6516, + "step": 1225 + }, + { + "epoch": 0.09013380385237466, + "grad_norm": 0.9044167995452881, + "learning_rate": 4.9976461689968045e-06, + "loss": 0.638, + "step": 1226 + }, + { + "epoch": 0.09020732245258051, + "grad_norm": 0.821570098400116, + "learning_rate": 4.997641986925823e-06, + "loss": 0.5951, + "step": 1227 + }, + { + "epoch": 0.09028084105278636, + "grad_norm": 0.8973380327224731, + "learning_rate": 4.9976378011447304e-06, + "loss": 0.6633, + "step": 1228 + }, + { + "epoch": 0.09035435965299221, + "grad_norm": 0.9353355765342712, + "learning_rate": 4.997633611653534e-06, + "loss": 0.6148, + "step": 1229 + }, + { + "epoch": 0.09042787825319806, + "grad_norm": 0.8975375890731812, + "learning_rate": 4.9976294184522385e-06, + "loss": 0.6279, + "step": 1230 + }, + { + "epoch": 0.09050139685340391, + "grad_norm": 0.9435052275657654, + "learning_rate": 4.997625221540853e-06, + "loss": 0.6475, + "step": 1231 + }, + { + "epoch": 0.09057491545360977, + "grad_norm": 0.876145601272583, + "learning_rate": 4.997621020919381e-06, + "loss": 0.6389, + "step": 1232 + }, + { + "epoch": 0.09064843405381562, + "grad_norm": 0.9499614834785461, + "learning_rate": 4.99761681658783e-06, + "loss": 0.6115, + "step": 1233 + }, + { + "epoch": 0.09072195265402147, + "grad_norm": 0.9411835074424744, + "learning_rate": 4.997612608546206e-06, + "loss": 0.6482, + "step": 1234 + }, + { + "epoch": 0.09079547125422732, + "grad_norm": 0.9421559572219849, + "learning_rate": 4.997608396794516e-06, + "loss": 0.6327, + "step": 1235 + }, + { + "epoch": 0.09086898985443317, + "grad_norm": 0.8936654925346375, + "learning_rate": 4.9976041813327646e-06, + "loss": 0.6344, + "step": 1236 + }, + { + "epoch": 0.09094250845463903, + "grad_norm": 0.8863602876663208, + "learning_rate": 4.997599962160958e-06, + "loss": 0.6531, + "step": 1237 + }, + { + "epoch": 0.09101602705484488, + "grad_norm": 0.8569052219390869, + "learning_rate": 4.997595739279105e-06, + "loss": 0.6242, + "step": 1238 + }, + { + "epoch": 0.09108954565505073, + "grad_norm": 0.8402303457260132, + "learning_rate": 4.99759151268721e-06, + "loss": 0.5872, + "step": 1239 + }, + { + "epoch": 0.09116306425525658, + "grad_norm": 0.8806663751602173, + "learning_rate": 4.9975872823852785e-06, + "loss": 0.6196, + "step": 1240 + }, + { + "epoch": 0.09123658285546243, + "grad_norm": 0.9181160926818848, + "learning_rate": 4.997583048373319e-06, + "loss": 0.6337, + "step": 1241 + }, + { + "epoch": 0.09131010145566829, + "grad_norm": 0.9151357412338257, + "learning_rate": 4.997578810651336e-06, + "loss": 0.629, + "step": 1242 + }, + { + "epoch": 0.09138362005587414, + "grad_norm": 0.9126406908035278, + "learning_rate": 4.997574569219338e-06, + "loss": 0.6299, + "step": 1243 + }, + { + "epoch": 0.09145713865607999, + "grad_norm": 0.9350519776344299, + "learning_rate": 4.997570324077328e-06, + "loss": 0.6564, + "step": 1244 + }, + { + "epoch": 0.09153065725628584, + "grad_norm": 0.9262341856956482, + "learning_rate": 4.997566075225315e-06, + "loss": 0.6592, + "step": 1245 + }, + { + "epoch": 0.09160417585649169, + "grad_norm": 0.8690038919448853, + "learning_rate": 4.997561822663304e-06, + "loss": 0.6518, + "step": 1246 + }, + { + "epoch": 0.09167769445669754, + "grad_norm": 0.8549033403396606, + "learning_rate": 4.997557566391301e-06, + "loss": 0.6123, + "step": 1247 + }, + { + "epoch": 0.0917512130569034, + "grad_norm": 0.9294427037239075, + "learning_rate": 4.997553306409314e-06, + "loss": 0.6738, + "step": 1248 + }, + { + "epoch": 0.09182473165710925, + "grad_norm": 0.9662851095199585, + "learning_rate": 4.997549042717348e-06, + "loss": 0.6308, + "step": 1249 + }, + { + "epoch": 0.0918982502573151, + "grad_norm": 0.9071508646011353, + "learning_rate": 4.997544775315409e-06, + "loss": 0.6203, + "step": 1250 + }, + { + "epoch": 0.09197176885752095, + "grad_norm": 0.8780937790870667, + "learning_rate": 4.997540504203505e-06, + "loss": 0.618, + "step": 1251 + }, + { + "epoch": 0.0920452874577268, + "grad_norm": 0.852019190788269, + "learning_rate": 4.997536229381641e-06, + "loss": 0.6251, + "step": 1252 + }, + { + "epoch": 0.09211880605793266, + "grad_norm": 0.9117379784584045, + "learning_rate": 4.997531950849823e-06, + "loss": 0.6333, + "step": 1253 + }, + { + "epoch": 0.09219232465813851, + "grad_norm": 0.8933721780776978, + "learning_rate": 4.9975276686080595e-06, + "loss": 0.5999, + "step": 1254 + }, + { + "epoch": 0.09226584325834436, + "grad_norm": 0.899491012096405, + "learning_rate": 4.997523382656354e-06, + "loss": 0.6312, + "step": 1255 + }, + { + "epoch": 0.09233936185855021, + "grad_norm": 0.8897246718406677, + "learning_rate": 4.997519092994715e-06, + "loss": 0.6103, + "step": 1256 + }, + { + "epoch": 0.09241288045875606, + "grad_norm": 0.9475231766700745, + "learning_rate": 4.997514799623149e-06, + "loss": 0.6493, + "step": 1257 + }, + { + "epoch": 0.09248639905896192, + "grad_norm": 0.9129990339279175, + "learning_rate": 4.997510502541659e-06, + "loss": 0.6303, + "step": 1258 + }, + { + "epoch": 0.09255991765916777, + "grad_norm": 0.9106349945068359, + "learning_rate": 4.997506201750256e-06, + "loss": 0.6811, + "step": 1259 + }, + { + "epoch": 0.09263343625937362, + "grad_norm": 0.9311082363128662, + "learning_rate": 4.997501897248944e-06, + "loss": 0.6068, + "step": 1260 + }, + { + "epoch": 0.09270695485957947, + "grad_norm": 0.9512309432029724, + "learning_rate": 4.997497589037729e-06, + "loss": 0.6152, + "step": 1261 + }, + { + "epoch": 0.09278047345978532, + "grad_norm": 0.9248741269111633, + "learning_rate": 4.997493277116619e-06, + "loss": 0.6265, + "step": 1262 + }, + { + "epoch": 0.09285399205999118, + "grad_norm": 0.905279278755188, + "learning_rate": 4.9974889614856185e-06, + "loss": 0.5939, + "step": 1263 + }, + { + "epoch": 0.09292751066019703, + "grad_norm": 0.9188653230667114, + "learning_rate": 4.997484642144735e-06, + "loss": 0.6282, + "step": 1264 + }, + { + "epoch": 0.09300102926040288, + "grad_norm": 0.9226070642471313, + "learning_rate": 4.997480319093976e-06, + "loss": 0.6015, + "step": 1265 + }, + { + "epoch": 0.09307454786060873, + "grad_norm": 0.9034364819526672, + "learning_rate": 4.997475992333346e-06, + "loss": 0.6342, + "step": 1266 + }, + { + "epoch": 0.09314806646081458, + "grad_norm": 0.8857253789901733, + "learning_rate": 4.997471661862852e-06, + "loss": 0.6166, + "step": 1267 + }, + { + "epoch": 0.09322158506102043, + "grad_norm": 0.8909137845039368, + "learning_rate": 4.997467327682501e-06, + "loss": 0.6554, + "step": 1268 + }, + { + "epoch": 0.09329510366122629, + "grad_norm": 0.9227771162986755, + "learning_rate": 4.997462989792299e-06, + "loss": 0.6214, + "step": 1269 + }, + { + "epoch": 0.09336862226143214, + "grad_norm": 0.9018422961235046, + "learning_rate": 4.997458648192252e-06, + "loss": 0.6695, + "step": 1270 + }, + { + "epoch": 0.09344214086163799, + "grad_norm": 0.9017221331596375, + "learning_rate": 4.997454302882367e-06, + "loss": 0.6121, + "step": 1271 + }, + { + "epoch": 0.09351565946184384, + "grad_norm": 0.8545765280723572, + "learning_rate": 4.997449953862651e-06, + "loss": 0.5826, + "step": 1272 + }, + { + "epoch": 0.0935891780620497, + "grad_norm": 0.8912649750709534, + "learning_rate": 4.99744560113311e-06, + "loss": 0.6312, + "step": 1273 + }, + { + "epoch": 0.09366269666225555, + "grad_norm": 0.8742656707763672, + "learning_rate": 4.9974412446937495e-06, + "loss": 0.6213, + "step": 1274 + }, + { + "epoch": 0.0937362152624614, + "grad_norm": 0.9401431679725647, + "learning_rate": 4.997436884544576e-06, + "loss": 0.6244, + "step": 1275 + }, + { + "epoch": 0.09380973386266725, + "grad_norm": 0.8523190021514893, + "learning_rate": 4.997432520685599e-06, + "loss": 0.6316, + "step": 1276 + }, + { + "epoch": 0.0938832524628731, + "grad_norm": 0.8962435126304626, + "learning_rate": 4.997428153116821e-06, + "loss": 0.5697, + "step": 1277 + }, + { + "epoch": 0.09395677106307895, + "grad_norm": 0.9663000106811523, + "learning_rate": 4.997423781838251e-06, + "loss": 0.6468, + "step": 1278 + }, + { + "epoch": 0.0940302896632848, + "grad_norm": 0.8730047941207886, + "learning_rate": 4.997419406849895e-06, + "loss": 0.6288, + "step": 1279 + }, + { + "epoch": 0.09410380826349066, + "grad_norm": 0.881116509437561, + "learning_rate": 4.997415028151758e-06, + "loss": 0.5948, + "step": 1280 + }, + { + "epoch": 0.09417732686369651, + "grad_norm": 0.9392960667610168, + "learning_rate": 4.997410645743848e-06, + "loss": 0.6263, + "step": 1281 + }, + { + "epoch": 0.09425084546390236, + "grad_norm": 0.9497473835945129, + "learning_rate": 4.997406259626172e-06, + "loss": 0.6065, + "step": 1282 + }, + { + "epoch": 0.09432436406410821, + "grad_norm": 0.9031859636306763, + "learning_rate": 4.997401869798736e-06, + "loss": 0.6728, + "step": 1283 + }, + { + "epoch": 0.09439788266431408, + "grad_norm": 0.8799036145210266, + "learning_rate": 4.9973974762615455e-06, + "loss": 0.6166, + "step": 1284 + }, + { + "epoch": 0.09447140126451993, + "grad_norm": 0.8212847709655762, + "learning_rate": 4.9973930790146086e-06, + "loss": 0.6348, + "step": 1285 + }, + { + "epoch": 0.09454491986472578, + "grad_norm": 0.8582103252410889, + "learning_rate": 4.997388678057931e-06, + "loss": 0.6207, + "step": 1286 + }, + { + "epoch": 0.09461843846493163, + "grad_norm": 0.9493943452835083, + "learning_rate": 4.997384273391518e-06, + "loss": 0.6335, + "step": 1287 + }, + { + "epoch": 0.09469195706513749, + "grad_norm": 0.8859123587608337, + "learning_rate": 4.997379865015378e-06, + "loss": 0.6122, + "step": 1288 + }, + { + "epoch": 0.09476547566534334, + "grad_norm": 0.9158732295036316, + "learning_rate": 4.997375452929517e-06, + "loss": 0.5591, + "step": 1289 + }, + { + "epoch": 0.09483899426554919, + "grad_norm": 0.8927797675132751, + "learning_rate": 4.997371037133943e-06, + "loss": 0.6341, + "step": 1290 + }, + { + "epoch": 0.09491251286575504, + "grad_norm": 0.9322537779808044, + "learning_rate": 4.997366617628659e-06, + "loss": 0.6254, + "step": 1291 + }, + { + "epoch": 0.0949860314659609, + "grad_norm": 0.8776544332504272, + "learning_rate": 4.9973621944136745e-06, + "loss": 0.6177, + "step": 1292 + }, + { + "epoch": 0.09505955006616675, + "grad_norm": 0.8595021367073059, + "learning_rate": 4.997357767488995e-06, + "loss": 0.5878, + "step": 1293 + }, + { + "epoch": 0.0951330686663726, + "grad_norm": 0.8718990087509155, + "learning_rate": 4.997353336854627e-06, + "loss": 0.6213, + "step": 1294 + }, + { + "epoch": 0.09520658726657845, + "grad_norm": 0.9269999861717224, + "learning_rate": 4.997348902510578e-06, + "loss": 0.6403, + "step": 1295 + }, + { + "epoch": 0.0952801058667843, + "grad_norm": 0.8546423316001892, + "learning_rate": 4.997344464456854e-06, + "loss": 0.6164, + "step": 1296 + }, + { + "epoch": 0.09535362446699015, + "grad_norm": 0.8853698968887329, + "learning_rate": 4.997340022693461e-06, + "loss": 0.6619, + "step": 1297 + }, + { + "epoch": 0.095427143067196, + "grad_norm": 0.9006273150444031, + "learning_rate": 4.997335577220407e-06, + "loss": 0.6026, + "step": 1298 + }, + { + "epoch": 0.09550066166740186, + "grad_norm": 0.8324064612388611, + "learning_rate": 4.997331128037697e-06, + "loss": 0.6192, + "step": 1299 + }, + { + "epoch": 0.09557418026760771, + "grad_norm": 0.9204953908920288, + "learning_rate": 4.997326675145338e-06, + "loss": 0.6437, + "step": 1300 + }, + { + "epoch": 0.09564769886781356, + "grad_norm": 0.8699774146080017, + "learning_rate": 4.997322218543338e-06, + "loss": 0.582, + "step": 1301 + }, + { + "epoch": 0.09572121746801941, + "grad_norm": 0.9018247127532959, + "learning_rate": 4.9973177582317015e-06, + "loss": 0.5645, + "step": 1302 + }, + { + "epoch": 0.09579473606822526, + "grad_norm": 0.8852306604385376, + "learning_rate": 4.9973132942104374e-06, + "loss": 0.6155, + "step": 1303 + }, + { + "epoch": 0.09586825466843112, + "grad_norm": 0.966788649559021, + "learning_rate": 4.99730882647955e-06, + "loss": 0.6035, + "step": 1304 + }, + { + "epoch": 0.09594177326863697, + "grad_norm": 0.8863289952278137, + "learning_rate": 4.997304355039048e-06, + "loss": 0.6116, + "step": 1305 + }, + { + "epoch": 0.09601529186884282, + "grad_norm": 0.9077394604682922, + "learning_rate": 4.997299879888937e-06, + "loss": 0.6322, + "step": 1306 + }, + { + "epoch": 0.09608881046904867, + "grad_norm": 0.9378896951675415, + "learning_rate": 4.997295401029223e-06, + "loss": 0.6447, + "step": 1307 + }, + { + "epoch": 0.09616232906925452, + "grad_norm": 0.9222307205200195, + "learning_rate": 4.997290918459914e-06, + "loss": 0.6291, + "step": 1308 + }, + { + "epoch": 0.09623584766946038, + "grad_norm": 0.9320966005325317, + "learning_rate": 4.997286432181016e-06, + "loss": 0.6078, + "step": 1309 + }, + { + "epoch": 0.09630936626966623, + "grad_norm": 0.9126346707344055, + "learning_rate": 4.997281942192536e-06, + "loss": 0.601, + "step": 1310 + }, + { + "epoch": 0.09638288486987208, + "grad_norm": 0.9355618953704834, + "learning_rate": 4.997277448494481e-06, + "loss": 0.63, + "step": 1311 + }, + { + "epoch": 0.09645640347007793, + "grad_norm": 0.8697651624679565, + "learning_rate": 4.997272951086856e-06, + "loss": 0.576, + "step": 1312 + }, + { + "epoch": 0.09652992207028378, + "grad_norm": 0.8685473203659058, + "learning_rate": 4.997268449969669e-06, + "loss": 0.6113, + "step": 1313 + }, + { + "epoch": 0.09660344067048963, + "grad_norm": 0.8836897611618042, + "learning_rate": 4.997263945142926e-06, + "loss": 0.6131, + "step": 1314 + }, + { + "epoch": 0.09667695927069549, + "grad_norm": 0.8842237591743469, + "learning_rate": 4.997259436606635e-06, + "loss": 0.6401, + "step": 1315 + }, + { + "epoch": 0.09675047787090134, + "grad_norm": 0.9020752310752869, + "learning_rate": 4.997254924360801e-06, + "loss": 0.6515, + "step": 1316 + }, + { + "epoch": 0.09682399647110719, + "grad_norm": 0.9172556400299072, + "learning_rate": 4.997250408405433e-06, + "loss": 0.6143, + "step": 1317 + }, + { + "epoch": 0.09689751507131304, + "grad_norm": 0.8936550617218018, + "learning_rate": 4.997245888740535e-06, + "loss": 0.6294, + "step": 1318 + }, + { + "epoch": 0.0969710336715189, + "grad_norm": 0.9413780570030212, + "learning_rate": 4.997241365366116e-06, + "loss": 0.6058, + "step": 1319 + }, + { + "epoch": 0.09704455227172475, + "grad_norm": 0.9682443737983704, + "learning_rate": 4.997236838282182e-06, + "loss": 0.622, + "step": 1320 + }, + { + "epoch": 0.0971180708719306, + "grad_norm": 0.9367340803146362, + "learning_rate": 4.997232307488738e-06, + "loss": 0.5936, + "step": 1321 + }, + { + "epoch": 0.09719158947213645, + "grad_norm": 0.8736156225204468, + "learning_rate": 4.997227772985792e-06, + "loss": 0.6351, + "step": 1322 + }, + { + "epoch": 0.0972651080723423, + "grad_norm": 0.927788257598877, + "learning_rate": 4.997223234773352e-06, + "loss": 0.66, + "step": 1323 + }, + { + "epoch": 0.09733862667254815, + "grad_norm": 0.873458743095398, + "learning_rate": 4.997218692851423e-06, + "loss": 0.6209, + "step": 1324 + }, + { + "epoch": 0.097412145272754, + "grad_norm": 0.8703386783599854, + "learning_rate": 4.9972141472200125e-06, + "loss": 0.5986, + "step": 1325 + }, + { + "epoch": 0.09748566387295986, + "grad_norm": 0.8884498476982117, + "learning_rate": 4.997209597879127e-06, + "loss": 0.6482, + "step": 1326 + }, + { + "epoch": 0.09755918247316571, + "grad_norm": 0.9118110537528992, + "learning_rate": 4.997205044828774e-06, + "loss": 0.598, + "step": 1327 + }, + { + "epoch": 0.09763270107337156, + "grad_norm": 0.8482718467712402, + "learning_rate": 4.997200488068959e-06, + "loss": 0.596, + "step": 1328 + }, + { + "epoch": 0.09770621967357741, + "grad_norm": 0.9670121073722839, + "learning_rate": 4.9971959275996895e-06, + "loss": 0.6374, + "step": 1329 + }, + { + "epoch": 0.09777973827378326, + "grad_norm": 0.8815810680389404, + "learning_rate": 4.997191363420972e-06, + "loss": 0.5921, + "step": 1330 + }, + { + "epoch": 0.09785325687398912, + "grad_norm": 0.8391305208206177, + "learning_rate": 4.997186795532815e-06, + "loss": 0.5538, + "step": 1331 + }, + { + "epoch": 0.09792677547419497, + "grad_norm": 0.895730197429657, + "learning_rate": 4.9971822239352216e-06, + "loss": 0.5597, + "step": 1332 + }, + { + "epoch": 0.09800029407440082, + "grad_norm": 0.8969757556915283, + "learning_rate": 4.9971776486282016e-06, + "loss": 0.6123, + "step": 1333 + }, + { + "epoch": 0.09807381267460667, + "grad_norm": 0.9537861943244934, + "learning_rate": 4.997173069611762e-06, + "loss": 0.6774, + "step": 1334 + }, + { + "epoch": 0.09814733127481252, + "grad_norm": 0.9586853384971619, + "learning_rate": 4.997168486885907e-06, + "loss": 0.6133, + "step": 1335 + }, + { + "epoch": 0.09822084987501838, + "grad_norm": 0.9030576944351196, + "learning_rate": 4.997163900450645e-06, + "loss": 0.634, + "step": 1336 + }, + { + "epoch": 0.09829436847522423, + "grad_norm": 0.9122081398963928, + "learning_rate": 4.997159310305984e-06, + "loss": 0.675, + "step": 1337 + }, + { + "epoch": 0.09836788707543008, + "grad_norm": 0.9322298765182495, + "learning_rate": 4.997154716451928e-06, + "loss": 0.6216, + "step": 1338 + }, + { + "epoch": 0.09844140567563593, + "grad_norm": 0.8688555955886841, + "learning_rate": 4.9971501188884875e-06, + "loss": 0.6119, + "step": 1339 + }, + { + "epoch": 0.09851492427584178, + "grad_norm": 0.8808645606040955, + "learning_rate": 4.997145517615666e-06, + "loss": 0.6059, + "step": 1340 + }, + { + "epoch": 0.09858844287604764, + "grad_norm": 0.892190158367157, + "learning_rate": 4.9971409126334715e-06, + "loss": 0.643, + "step": 1341 + }, + { + "epoch": 0.09866196147625349, + "grad_norm": 0.883420467376709, + "learning_rate": 4.997136303941911e-06, + "loss": 0.6039, + "step": 1342 + }, + { + "epoch": 0.09873548007645934, + "grad_norm": 0.9269583821296692, + "learning_rate": 4.997131691540991e-06, + "loss": 0.6114, + "step": 1343 + }, + { + "epoch": 0.09880899867666519, + "grad_norm": 0.8222026228904724, + "learning_rate": 4.997127075430719e-06, + "loss": 0.5544, + "step": 1344 + }, + { + "epoch": 0.09888251727687104, + "grad_norm": 0.9070024490356445, + "learning_rate": 4.997122455611102e-06, + "loss": 0.6406, + "step": 1345 + }, + { + "epoch": 0.0989560358770769, + "grad_norm": 0.8951523900032043, + "learning_rate": 4.9971178320821455e-06, + "loss": 0.6216, + "step": 1346 + }, + { + "epoch": 0.09902955447728275, + "grad_norm": 1.0359801054000854, + "learning_rate": 4.9971132048438574e-06, + "loss": 0.5938, + "step": 1347 + }, + { + "epoch": 0.0991030730774886, + "grad_norm": 0.8998252749443054, + "learning_rate": 4.997108573896244e-06, + "loss": 0.6073, + "step": 1348 + }, + { + "epoch": 0.09917659167769445, + "grad_norm": 0.8432213664054871, + "learning_rate": 4.997103939239313e-06, + "loss": 0.5994, + "step": 1349 + }, + { + "epoch": 0.0992501102779003, + "grad_norm": 0.9023082256317139, + "learning_rate": 4.99709930087307e-06, + "loss": 0.6366, + "step": 1350 + }, + { + "epoch": 0.09932362887810615, + "grad_norm": 0.8838227987289429, + "learning_rate": 4.997094658797525e-06, + "loss": 0.6125, + "step": 1351 + }, + { + "epoch": 0.099397147478312, + "grad_norm": 0.9405934810638428, + "learning_rate": 4.997090013012681e-06, + "loss": 0.6636, + "step": 1352 + }, + { + "epoch": 0.09947066607851786, + "grad_norm": 0.9359622597694397, + "learning_rate": 4.997085363518546e-06, + "loss": 0.6778, + "step": 1353 + }, + { + "epoch": 0.09954418467872372, + "grad_norm": 0.927362859249115, + "learning_rate": 4.997080710315129e-06, + "loss": 0.6007, + "step": 1354 + }, + { + "epoch": 0.09961770327892958, + "grad_norm": 0.9375314116477966, + "learning_rate": 4.997076053402434e-06, + "loss": 0.6566, + "step": 1355 + }, + { + "epoch": 0.09969122187913543, + "grad_norm": 0.885159969329834, + "learning_rate": 4.997071392780469e-06, + "loss": 0.595, + "step": 1356 + }, + { + "epoch": 0.09976474047934128, + "grad_norm": 0.933778703212738, + "learning_rate": 4.997066728449242e-06, + "loss": 0.6614, + "step": 1357 + }, + { + "epoch": 0.09983825907954713, + "grad_norm": 0.8726223111152649, + "learning_rate": 4.9970620604087595e-06, + "loss": 0.6176, + "step": 1358 + }, + { + "epoch": 0.09991177767975298, + "grad_norm": 0.8980335593223572, + "learning_rate": 4.997057388659027e-06, + "loss": 0.6183, + "step": 1359 + }, + { + "epoch": 0.09998529627995884, + "grad_norm": 0.8865818381309509, + "learning_rate": 4.997052713200054e-06, + "loss": 0.6338, + "step": 1360 + }, + { + "epoch": 0.10005881488016469, + "grad_norm": 0.9313775300979614, + "learning_rate": 4.9970480340318435e-06, + "loss": 0.6112, + "step": 1361 + }, + { + "epoch": 0.10013233348037054, + "grad_norm": 0.9235588312149048, + "learning_rate": 4.997043351154407e-06, + "loss": 0.6152, + "step": 1362 + }, + { + "epoch": 0.10020585208057639, + "grad_norm": 0.9101454019546509, + "learning_rate": 4.997038664567749e-06, + "loss": 0.6153, + "step": 1363 + }, + { + "epoch": 0.10027937068078224, + "grad_norm": 0.9225130081176758, + "learning_rate": 4.997033974271877e-06, + "loss": 0.5729, + "step": 1364 + }, + { + "epoch": 0.1003528892809881, + "grad_norm": 0.8954328894615173, + "learning_rate": 4.997029280266796e-06, + "loss": 0.6274, + "step": 1365 + }, + { + "epoch": 0.10042640788119395, + "grad_norm": 0.85503751039505, + "learning_rate": 4.997024582552516e-06, + "loss": 0.6058, + "step": 1366 + }, + { + "epoch": 0.1004999264813998, + "grad_norm": 0.8610653877258301, + "learning_rate": 4.997019881129042e-06, + "loss": 0.6102, + "step": 1367 + }, + { + "epoch": 0.10057344508160565, + "grad_norm": 0.8606139421463013, + "learning_rate": 4.997015175996384e-06, + "loss": 0.5913, + "step": 1368 + }, + { + "epoch": 0.1006469636818115, + "grad_norm": 0.879711925983429, + "learning_rate": 4.997010467154544e-06, + "loss": 0.6453, + "step": 1369 + }, + { + "epoch": 0.10072048228201735, + "grad_norm": 0.9129104018211365, + "learning_rate": 4.997005754603533e-06, + "loss": 0.6048, + "step": 1370 + }, + { + "epoch": 0.1007940008822232, + "grad_norm": 0.9379005432128906, + "learning_rate": 4.997001038343356e-06, + "loss": 0.6579, + "step": 1371 + }, + { + "epoch": 0.10086751948242906, + "grad_norm": 0.9601891040802002, + "learning_rate": 4.996996318374022e-06, + "loss": 0.6542, + "step": 1372 + }, + { + "epoch": 0.10094103808263491, + "grad_norm": 0.8383168578147888, + "learning_rate": 4.9969915946955356e-06, + "loss": 0.614, + "step": 1373 + }, + { + "epoch": 0.10101455668284076, + "grad_norm": 0.8815730810165405, + "learning_rate": 4.996986867307905e-06, + "loss": 0.6077, + "step": 1374 + }, + { + "epoch": 0.10108807528304661, + "grad_norm": 0.9085824489593506, + "learning_rate": 4.996982136211137e-06, + "loss": 0.6539, + "step": 1375 + }, + { + "epoch": 0.10116159388325247, + "grad_norm": 0.8811940550804138, + "learning_rate": 4.9969774014052395e-06, + "loss": 0.6322, + "step": 1376 + }, + { + "epoch": 0.10123511248345832, + "grad_norm": 0.8661423921585083, + "learning_rate": 4.996972662890218e-06, + "loss": 0.6002, + "step": 1377 + }, + { + "epoch": 0.10130863108366417, + "grad_norm": 0.9173404574394226, + "learning_rate": 4.996967920666081e-06, + "loss": 0.6252, + "step": 1378 + }, + { + "epoch": 0.10138214968387002, + "grad_norm": 0.9662761688232422, + "learning_rate": 4.9969631747328345e-06, + "loss": 0.635, + "step": 1379 + }, + { + "epoch": 0.10145566828407587, + "grad_norm": 0.9285694360733032, + "learning_rate": 4.996958425090486e-06, + "loss": 0.6566, + "step": 1380 + }, + { + "epoch": 0.10152918688428172, + "grad_norm": 0.8767092823982239, + "learning_rate": 4.996953671739042e-06, + "loss": 0.5465, + "step": 1381 + }, + { + "epoch": 0.10160270548448758, + "grad_norm": 0.9105330109596252, + "learning_rate": 4.996948914678511e-06, + "loss": 0.6194, + "step": 1382 + }, + { + "epoch": 0.10167622408469343, + "grad_norm": 0.9721804261207581, + "learning_rate": 4.9969441539088985e-06, + "loss": 0.6641, + "step": 1383 + }, + { + "epoch": 0.10174974268489928, + "grad_norm": 0.8862181901931763, + "learning_rate": 4.996939389430212e-06, + "loss": 0.6586, + "step": 1384 + }, + { + "epoch": 0.10182326128510513, + "grad_norm": 0.913955569267273, + "learning_rate": 4.9969346212424595e-06, + "loss": 0.6176, + "step": 1385 + }, + { + "epoch": 0.10189677988531098, + "grad_norm": 0.89970463514328, + "learning_rate": 4.996929849345647e-06, + "loss": 0.6303, + "step": 1386 + }, + { + "epoch": 0.10197029848551684, + "grad_norm": 0.8939787745475769, + "learning_rate": 4.996925073739783e-06, + "loss": 0.6129, + "step": 1387 + }, + { + "epoch": 0.10204381708572269, + "grad_norm": 0.9203036427497864, + "learning_rate": 4.996920294424871e-06, + "loss": 0.63, + "step": 1388 + }, + { + "epoch": 0.10211733568592854, + "grad_norm": 0.9050187468528748, + "learning_rate": 4.996915511400922e-06, + "loss": 0.6636, + "step": 1389 + }, + { + "epoch": 0.10219085428613439, + "grad_norm": 0.8690789937973022, + "learning_rate": 4.996910724667942e-06, + "loss": 0.6143, + "step": 1390 + }, + { + "epoch": 0.10226437288634024, + "grad_norm": 0.9079875946044922, + "learning_rate": 4.996905934225938e-06, + "loss": 0.6365, + "step": 1391 + }, + { + "epoch": 0.1023378914865461, + "grad_norm": 0.9446528553962708, + "learning_rate": 4.9969011400749165e-06, + "loss": 0.6482, + "step": 1392 + }, + { + "epoch": 0.10241141008675195, + "grad_norm": 0.9385480880737305, + "learning_rate": 4.9968963422148855e-06, + "loss": 0.6468, + "step": 1393 + }, + { + "epoch": 0.1024849286869578, + "grad_norm": 0.9558590650558472, + "learning_rate": 4.9968915406458505e-06, + "loss": 0.6731, + "step": 1394 + }, + { + "epoch": 0.10255844728716365, + "grad_norm": 0.9905747175216675, + "learning_rate": 4.996886735367821e-06, + "loss": 0.6378, + "step": 1395 + }, + { + "epoch": 0.1026319658873695, + "grad_norm": 0.8774735331535339, + "learning_rate": 4.996881926380803e-06, + "loss": 0.6343, + "step": 1396 + }, + { + "epoch": 0.10270548448757535, + "grad_norm": 0.8653581142425537, + "learning_rate": 4.996877113684803e-06, + "loss": 0.6086, + "step": 1397 + }, + { + "epoch": 0.1027790030877812, + "grad_norm": 0.8818200826644897, + "learning_rate": 4.996872297279829e-06, + "loss": 0.571, + "step": 1398 + }, + { + "epoch": 0.10285252168798706, + "grad_norm": 0.9042085409164429, + "learning_rate": 4.996867477165889e-06, + "loss": 0.6332, + "step": 1399 + }, + { + "epoch": 0.10292604028819291, + "grad_norm": 0.9402602314949036, + "learning_rate": 4.996862653342988e-06, + "loss": 0.6268, + "step": 1400 + }, + { + "epoch": 0.10299955888839876, + "grad_norm": 0.8467676043510437, + "learning_rate": 4.996857825811134e-06, + "loss": 0.6232, + "step": 1401 + }, + { + "epoch": 0.10307307748860461, + "grad_norm": 0.8916479349136353, + "learning_rate": 4.996852994570335e-06, + "loss": 0.6532, + "step": 1402 + }, + { + "epoch": 0.10314659608881047, + "grad_norm": 0.9210587739944458, + "learning_rate": 4.996848159620597e-06, + "loss": 0.6554, + "step": 1403 + }, + { + "epoch": 0.10322011468901632, + "grad_norm": 0.9224237203598022, + "learning_rate": 4.9968433209619285e-06, + "loss": 0.5897, + "step": 1404 + }, + { + "epoch": 0.10329363328922217, + "grad_norm": 0.8731396794319153, + "learning_rate": 4.996838478594336e-06, + "loss": 0.607, + "step": 1405 + }, + { + "epoch": 0.10336715188942802, + "grad_norm": 0.8806251287460327, + "learning_rate": 4.996833632517825e-06, + "loss": 0.6056, + "step": 1406 + }, + { + "epoch": 0.10344067048963387, + "grad_norm": 0.9178991913795471, + "learning_rate": 4.996828782732407e-06, + "loss": 0.6323, + "step": 1407 + }, + { + "epoch": 0.10351418908983973, + "grad_norm": 0.9033799767494202, + "learning_rate": 4.996823929238085e-06, + "loss": 0.6641, + "step": 1408 + }, + { + "epoch": 0.10358770769004558, + "grad_norm": 0.9654202461242676, + "learning_rate": 4.996819072034867e-06, + "loss": 0.6517, + "step": 1409 + }, + { + "epoch": 0.10366122629025143, + "grad_norm": 0.9026849865913391, + "learning_rate": 4.996814211122762e-06, + "loss": 0.6115, + "step": 1410 + }, + { + "epoch": 0.10373474489045728, + "grad_norm": 0.8568432331085205, + "learning_rate": 4.996809346501777e-06, + "loss": 0.5955, + "step": 1411 + }, + { + "epoch": 0.10380826349066313, + "grad_norm": 0.9103450775146484, + "learning_rate": 4.996804478171918e-06, + "loss": 0.651, + "step": 1412 + }, + { + "epoch": 0.10388178209086898, + "grad_norm": 0.8709381818771362, + "learning_rate": 4.996799606133192e-06, + "loss": 0.5959, + "step": 1413 + }, + { + "epoch": 0.10395530069107484, + "grad_norm": 0.9063971042633057, + "learning_rate": 4.996794730385607e-06, + "loss": 0.6238, + "step": 1414 + }, + { + "epoch": 0.10402881929128069, + "grad_norm": 0.8260523676872253, + "learning_rate": 4.996789850929171e-06, + "loss": 0.5893, + "step": 1415 + }, + { + "epoch": 0.10410233789148654, + "grad_norm": 0.9034861326217651, + "learning_rate": 4.996784967763889e-06, + "loss": 0.5906, + "step": 1416 + }, + { + "epoch": 0.10417585649169239, + "grad_norm": 0.8920764923095703, + "learning_rate": 4.996780080889771e-06, + "loss": 0.6307, + "step": 1417 + }, + { + "epoch": 0.10424937509189824, + "grad_norm": 0.8661985397338867, + "learning_rate": 4.996775190306823e-06, + "loss": 0.5918, + "step": 1418 + }, + { + "epoch": 0.1043228936921041, + "grad_norm": 0.8854774236679077, + "learning_rate": 4.996770296015051e-06, + "loss": 0.6355, + "step": 1419 + }, + { + "epoch": 0.10439641229230995, + "grad_norm": 0.8873342871665955, + "learning_rate": 4.996765398014463e-06, + "loss": 0.6515, + "step": 1420 + }, + { + "epoch": 0.1044699308925158, + "grad_norm": 0.8798553943634033, + "learning_rate": 4.996760496305068e-06, + "loss": 0.6146, + "step": 1421 + }, + { + "epoch": 0.10454344949272165, + "grad_norm": 0.8904065489768982, + "learning_rate": 4.996755590886872e-06, + "loss": 0.6066, + "step": 1422 + }, + { + "epoch": 0.10461696809292752, + "grad_norm": 0.8327847719192505, + "learning_rate": 4.996750681759882e-06, + "loss": 0.5582, + "step": 1423 + }, + { + "epoch": 0.10469048669313337, + "grad_norm": 0.9156943559646606, + "learning_rate": 4.996745768924105e-06, + "loss": 0.6277, + "step": 1424 + }, + { + "epoch": 0.10476400529333922, + "grad_norm": 0.8534517884254456, + "learning_rate": 4.99674085237955e-06, + "loss": 0.6223, + "step": 1425 + }, + { + "epoch": 0.10483752389354507, + "grad_norm": 0.8547676801681519, + "learning_rate": 4.996735932126223e-06, + "loss": 0.585, + "step": 1426 + }, + { + "epoch": 0.10491104249375093, + "grad_norm": 0.8899720311164856, + "learning_rate": 4.996731008164131e-06, + "loss": 0.6336, + "step": 1427 + }, + { + "epoch": 0.10498456109395678, + "grad_norm": 0.8394469022750854, + "learning_rate": 4.996726080493281e-06, + "loss": 0.6005, + "step": 1428 + }, + { + "epoch": 0.10505807969416263, + "grad_norm": 0.8239346146583557, + "learning_rate": 4.996721149113682e-06, + "loss": 0.6067, + "step": 1429 + }, + { + "epoch": 0.10513159829436848, + "grad_norm": 0.895564615726471, + "learning_rate": 4.996716214025341e-06, + "loss": 0.6241, + "step": 1430 + }, + { + "epoch": 0.10520511689457433, + "grad_norm": 0.9283389449119568, + "learning_rate": 4.996711275228265e-06, + "loss": 0.6556, + "step": 1431 + }, + { + "epoch": 0.10527863549478018, + "grad_norm": 0.8899241089820862, + "learning_rate": 4.9967063327224594e-06, + "loss": 0.6516, + "step": 1432 + }, + { + "epoch": 0.10535215409498604, + "grad_norm": 0.8655397891998291, + "learning_rate": 4.996701386507935e-06, + "loss": 0.6389, + "step": 1433 + }, + { + "epoch": 0.10542567269519189, + "grad_norm": 0.9780510067939758, + "learning_rate": 4.996696436584697e-06, + "loss": 0.5816, + "step": 1434 + }, + { + "epoch": 0.10549919129539774, + "grad_norm": 0.9130342602729797, + "learning_rate": 4.996691482952752e-06, + "loss": 0.6271, + "step": 1435 + }, + { + "epoch": 0.10557270989560359, + "grad_norm": 0.8626898527145386, + "learning_rate": 4.996686525612109e-06, + "loss": 0.6095, + "step": 1436 + }, + { + "epoch": 0.10564622849580944, + "grad_norm": 0.9286977052688599, + "learning_rate": 4.996681564562775e-06, + "loss": 0.6312, + "step": 1437 + }, + { + "epoch": 0.1057197470960153, + "grad_norm": 0.8507925271987915, + "learning_rate": 4.996676599804759e-06, + "loss": 0.618, + "step": 1438 + }, + { + "epoch": 0.10579326569622115, + "grad_norm": 0.8794651031494141, + "learning_rate": 4.9966716313380645e-06, + "loss": 0.5673, + "step": 1439 + }, + { + "epoch": 0.105866784296427, + "grad_norm": 0.948802649974823, + "learning_rate": 4.996666659162701e-06, + "loss": 0.5925, + "step": 1440 + }, + { + "epoch": 0.10594030289663285, + "grad_norm": 0.8866406083106995, + "learning_rate": 4.996661683278677e-06, + "loss": 0.6249, + "step": 1441 + }, + { + "epoch": 0.1060138214968387, + "grad_norm": 0.8727967739105225, + "learning_rate": 4.996656703685998e-06, + "loss": 0.6013, + "step": 1442 + }, + { + "epoch": 0.10608734009704456, + "grad_norm": 0.924845278263092, + "learning_rate": 4.9966517203846725e-06, + "loss": 0.6161, + "step": 1443 + }, + { + "epoch": 0.10616085869725041, + "grad_norm": 0.8994647264480591, + "learning_rate": 4.996646733374708e-06, + "loss": 0.6436, + "step": 1444 + }, + { + "epoch": 0.10623437729745626, + "grad_norm": 0.9311122894287109, + "learning_rate": 4.996641742656111e-06, + "loss": 0.66, + "step": 1445 + }, + { + "epoch": 0.10630789589766211, + "grad_norm": 0.915627658367157, + "learning_rate": 4.996636748228889e-06, + "loss": 0.6029, + "step": 1446 + }, + { + "epoch": 0.10638141449786796, + "grad_norm": 0.9727275371551514, + "learning_rate": 4.996631750093051e-06, + "loss": 0.6633, + "step": 1447 + }, + { + "epoch": 0.10645493309807381, + "grad_norm": 0.8895959854125977, + "learning_rate": 4.996626748248602e-06, + "loss": 0.6167, + "step": 1448 + }, + { + "epoch": 0.10652845169827967, + "grad_norm": 0.8804746270179749, + "learning_rate": 4.996621742695551e-06, + "loss": 0.615, + "step": 1449 + }, + { + "epoch": 0.10660197029848552, + "grad_norm": 0.901432454586029, + "learning_rate": 4.9966167334339055e-06, + "loss": 0.5834, + "step": 1450 + }, + { + "epoch": 0.10667548889869137, + "grad_norm": 0.8981085419654846, + "learning_rate": 4.9966117204636725e-06, + "loss": 0.6459, + "step": 1451 + }, + { + "epoch": 0.10674900749889722, + "grad_norm": 0.873637318611145, + "learning_rate": 4.996606703784859e-06, + "loss": 0.6388, + "step": 1452 + }, + { + "epoch": 0.10682252609910307, + "grad_norm": 0.9202940464019775, + "learning_rate": 4.996601683397473e-06, + "loss": 0.6186, + "step": 1453 + }, + { + "epoch": 0.10689604469930893, + "grad_norm": 0.9089062213897705, + "learning_rate": 4.996596659301522e-06, + "loss": 0.6294, + "step": 1454 + }, + { + "epoch": 0.10696956329951478, + "grad_norm": 0.9528651833534241, + "learning_rate": 4.996591631497014e-06, + "loss": 0.6425, + "step": 1455 + }, + { + "epoch": 0.10704308189972063, + "grad_norm": 0.8903738260269165, + "learning_rate": 4.996586599983955e-06, + "loss": 0.6076, + "step": 1456 + }, + { + "epoch": 0.10711660049992648, + "grad_norm": 0.8819149732589722, + "learning_rate": 4.9965815647623536e-06, + "loss": 0.5811, + "step": 1457 + }, + { + "epoch": 0.10719011910013233, + "grad_norm": 0.8826157450675964, + "learning_rate": 4.9965765258322175e-06, + "loss": 0.6162, + "step": 1458 + }, + { + "epoch": 0.10726363770033819, + "grad_norm": 0.9485787153244019, + "learning_rate": 4.996571483193553e-06, + "loss": 0.5995, + "step": 1459 + }, + { + "epoch": 0.10733715630054404, + "grad_norm": 0.9387826919555664, + "learning_rate": 4.996566436846368e-06, + "loss": 0.6181, + "step": 1460 + }, + { + "epoch": 0.10741067490074989, + "grad_norm": 0.8855082988739014, + "learning_rate": 4.9965613867906714e-06, + "loss": 0.6248, + "step": 1461 + }, + { + "epoch": 0.10748419350095574, + "grad_norm": 0.9200281500816345, + "learning_rate": 4.996556333026469e-06, + "loss": 0.5912, + "step": 1462 + }, + { + "epoch": 0.10755771210116159, + "grad_norm": 0.8683533668518066, + "learning_rate": 4.996551275553769e-06, + "loss": 0.5917, + "step": 1463 + }, + { + "epoch": 0.10763123070136744, + "grad_norm": 0.8555864095687866, + "learning_rate": 4.996546214372578e-06, + "loss": 0.6184, + "step": 1464 + }, + { + "epoch": 0.1077047493015733, + "grad_norm": 0.8497228622436523, + "learning_rate": 4.996541149482906e-06, + "loss": 0.591, + "step": 1465 + }, + { + "epoch": 0.10777826790177915, + "grad_norm": 0.9019395112991333, + "learning_rate": 4.996536080884757e-06, + "loss": 0.6094, + "step": 1466 + }, + { + "epoch": 0.107851786501985, + "grad_norm": 0.8873715400695801, + "learning_rate": 4.996531008578142e-06, + "loss": 0.6194, + "step": 1467 + }, + { + "epoch": 0.10792530510219085, + "grad_norm": 0.9219510555267334, + "learning_rate": 4.996525932563065e-06, + "loss": 0.6022, + "step": 1468 + }, + { + "epoch": 0.1079988237023967, + "grad_norm": 0.9184242486953735, + "learning_rate": 4.996520852839537e-06, + "loss": 0.6734, + "step": 1469 + }, + { + "epoch": 0.10807234230260256, + "grad_norm": 0.8671081066131592, + "learning_rate": 4.996515769407564e-06, + "loss": 0.6325, + "step": 1470 + }, + { + "epoch": 0.10814586090280841, + "grad_norm": 0.9371756911277771, + "learning_rate": 4.9965106822671524e-06, + "loss": 0.5924, + "step": 1471 + }, + { + "epoch": 0.10821937950301426, + "grad_norm": 0.8734831213951111, + "learning_rate": 4.996505591418312e-06, + "loss": 0.6271, + "step": 1472 + }, + { + "epoch": 0.10829289810322011, + "grad_norm": 0.8965625166893005, + "learning_rate": 4.996500496861048e-06, + "loss": 0.6292, + "step": 1473 + }, + { + "epoch": 0.10836641670342596, + "grad_norm": 0.8794538378715515, + "learning_rate": 4.99649539859537e-06, + "loss": 0.5927, + "step": 1474 + }, + { + "epoch": 0.10843993530363182, + "grad_norm": 0.9504835605621338, + "learning_rate": 4.996490296621285e-06, + "loss": 0.6252, + "step": 1475 + }, + { + "epoch": 0.10851345390383767, + "grad_norm": 0.8950563073158264, + "learning_rate": 4.9964851909388e-06, + "loss": 0.5973, + "step": 1476 + }, + { + "epoch": 0.10858697250404352, + "grad_norm": 0.8925493955612183, + "learning_rate": 4.996480081547923e-06, + "loss": 0.6551, + "step": 1477 + }, + { + "epoch": 0.10866049110424937, + "grad_norm": 0.9013493657112122, + "learning_rate": 4.996474968448662e-06, + "loss": 0.6182, + "step": 1478 + }, + { + "epoch": 0.10873400970445522, + "grad_norm": 0.882617175579071, + "learning_rate": 4.996469851641024e-06, + "loss": 0.5728, + "step": 1479 + }, + { + "epoch": 0.10880752830466107, + "grad_norm": 0.9242693185806274, + "learning_rate": 4.996464731125016e-06, + "loss": 0.6592, + "step": 1480 + }, + { + "epoch": 0.10888104690486693, + "grad_norm": 0.8768022060394287, + "learning_rate": 4.996459606900647e-06, + "loss": 0.6291, + "step": 1481 + }, + { + "epoch": 0.10895456550507278, + "grad_norm": 0.9397027492523193, + "learning_rate": 4.996454478967924e-06, + "loss": 0.6409, + "step": 1482 + }, + { + "epoch": 0.10902808410527863, + "grad_norm": 0.8947820067405701, + "learning_rate": 4.996449347326854e-06, + "loss": 0.6254, + "step": 1483 + }, + { + "epoch": 0.10910160270548448, + "grad_norm": 0.9460829496383667, + "learning_rate": 4.996444211977446e-06, + "loss": 0.6531, + "step": 1484 + }, + { + "epoch": 0.10917512130569033, + "grad_norm": 0.9487284421920776, + "learning_rate": 4.996439072919706e-06, + "loss": 0.6583, + "step": 1485 + }, + { + "epoch": 0.10924863990589619, + "grad_norm": 0.9412375092506409, + "learning_rate": 4.996433930153643e-06, + "loss": 0.6472, + "step": 1486 + }, + { + "epoch": 0.10932215850610204, + "grad_norm": 0.9278518557548523, + "learning_rate": 4.996428783679264e-06, + "loss": 0.6388, + "step": 1487 + }, + { + "epoch": 0.10939567710630789, + "grad_norm": 0.9579070806503296, + "learning_rate": 4.996423633496576e-06, + "loss": 0.6162, + "step": 1488 + }, + { + "epoch": 0.10946919570651374, + "grad_norm": 0.9094953536987305, + "learning_rate": 4.996418479605588e-06, + "loss": 0.6148, + "step": 1489 + }, + { + "epoch": 0.1095427143067196, + "grad_norm": 0.8479555249214172, + "learning_rate": 4.996413322006308e-06, + "loss": 0.5956, + "step": 1490 + }, + { + "epoch": 0.10961623290692545, + "grad_norm": 0.9837150573730469, + "learning_rate": 4.996408160698741e-06, + "loss": 0.6385, + "step": 1491 + }, + { + "epoch": 0.1096897515071313, + "grad_norm": 0.8889987468719482, + "learning_rate": 4.9964029956828975e-06, + "loss": 0.6203, + "step": 1492 + }, + { + "epoch": 0.10976327010733716, + "grad_norm": 0.9085855484008789, + "learning_rate": 4.996397826958784e-06, + "loss": 0.6626, + "step": 1493 + }, + { + "epoch": 0.10983678870754301, + "grad_norm": 0.928468644618988, + "learning_rate": 4.996392654526408e-06, + "loss": 0.6688, + "step": 1494 + }, + { + "epoch": 0.10991030730774887, + "grad_norm": 0.8755361437797546, + "learning_rate": 4.996387478385777e-06, + "loss": 0.5951, + "step": 1495 + }, + { + "epoch": 0.10998382590795472, + "grad_norm": 0.9008297324180603, + "learning_rate": 4.9963822985368995e-06, + "loss": 0.6109, + "step": 1496 + }, + { + "epoch": 0.11005734450816057, + "grad_norm": 0.9225376844406128, + "learning_rate": 4.996377114979783e-06, + "loss": 0.6029, + "step": 1497 + }, + { + "epoch": 0.11013086310836642, + "grad_norm": 0.8878459334373474, + "learning_rate": 4.9963719277144355e-06, + "loss": 0.6118, + "step": 1498 + }, + { + "epoch": 0.11020438170857227, + "grad_norm": 0.9368178248405457, + "learning_rate": 4.996366736740863e-06, + "loss": 0.6451, + "step": 1499 + }, + { + "epoch": 0.11027790030877813, + "grad_norm": 0.9207358956336975, + "learning_rate": 4.996361542059075e-06, + "loss": 0.614, + "step": 1500 + }, + { + "epoch": 0.11035141890898398, + "grad_norm": 0.9131987690925598, + "learning_rate": 4.996356343669078e-06, + "loss": 0.6487, + "step": 1501 + }, + { + "epoch": 0.11042493750918983, + "grad_norm": 0.9336088299751282, + "learning_rate": 4.9963511415708806e-06, + "loss": 0.6313, + "step": 1502 + }, + { + "epoch": 0.11049845610939568, + "grad_norm": 0.9218401908874512, + "learning_rate": 4.996345935764491e-06, + "loss": 0.6224, + "step": 1503 + }, + { + "epoch": 0.11057197470960153, + "grad_norm": 0.9249551892280579, + "learning_rate": 4.996340726249915e-06, + "loss": 0.6443, + "step": 1504 + }, + { + "epoch": 0.11064549330980739, + "grad_norm": 0.8429004549980164, + "learning_rate": 4.996335513027163e-06, + "loss": 0.5667, + "step": 1505 + }, + { + "epoch": 0.11071901191001324, + "grad_norm": 0.9407071471214294, + "learning_rate": 4.9963302960962404e-06, + "loss": 0.6472, + "step": 1506 + }, + { + "epoch": 0.11079253051021909, + "grad_norm": 0.9019366502761841, + "learning_rate": 4.996325075457156e-06, + "loss": 0.6014, + "step": 1507 + }, + { + "epoch": 0.11086604911042494, + "grad_norm": 0.924136221408844, + "learning_rate": 4.996319851109918e-06, + "loss": 0.6194, + "step": 1508 + }, + { + "epoch": 0.1109395677106308, + "grad_norm": 0.9310719966888428, + "learning_rate": 4.996314623054532e-06, + "loss": 0.6071, + "step": 1509 + }, + { + "epoch": 0.11101308631083665, + "grad_norm": 0.8787524700164795, + "learning_rate": 4.996309391291009e-06, + "loss": 0.584, + "step": 1510 + }, + { + "epoch": 0.1110866049110425, + "grad_norm": 0.9141201376914978, + "learning_rate": 4.996304155819354e-06, + "loss": 0.6647, + "step": 1511 + }, + { + "epoch": 0.11116012351124835, + "grad_norm": 0.9165619015693665, + "learning_rate": 4.996298916639577e-06, + "loss": 0.6005, + "step": 1512 + }, + { + "epoch": 0.1112336421114542, + "grad_norm": 0.9252340793609619, + "learning_rate": 4.9962936737516835e-06, + "loss": 0.6634, + "step": 1513 + }, + { + "epoch": 0.11130716071166005, + "grad_norm": 0.8620991110801697, + "learning_rate": 4.9962884271556825e-06, + "loss": 0.6215, + "step": 1514 + }, + { + "epoch": 0.1113806793118659, + "grad_norm": 0.8973037004470825, + "learning_rate": 4.996283176851582e-06, + "loss": 0.6074, + "step": 1515 + }, + { + "epoch": 0.11145419791207176, + "grad_norm": 0.9142740368843079, + "learning_rate": 4.99627792283939e-06, + "loss": 0.6115, + "step": 1516 + }, + { + "epoch": 0.11152771651227761, + "grad_norm": 0.8560245037078857, + "learning_rate": 4.996272665119113e-06, + "loss": 0.6268, + "step": 1517 + }, + { + "epoch": 0.11160123511248346, + "grad_norm": 0.9435736536979675, + "learning_rate": 4.996267403690761e-06, + "loss": 0.6106, + "step": 1518 + }, + { + "epoch": 0.11167475371268931, + "grad_norm": 0.8681843280792236, + "learning_rate": 4.9962621385543394e-06, + "loss": 0.6246, + "step": 1519 + }, + { + "epoch": 0.11174827231289516, + "grad_norm": 0.8890358209609985, + "learning_rate": 4.996256869709857e-06, + "loss": 0.6192, + "step": 1520 + }, + { + "epoch": 0.11182179091310102, + "grad_norm": 0.9469218850135803, + "learning_rate": 4.996251597157322e-06, + "loss": 0.6689, + "step": 1521 + }, + { + "epoch": 0.11189530951330687, + "grad_norm": 0.9265885353088379, + "learning_rate": 4.996246320896742e-06, + "loss": 0.6107, + "step": 1522 + }, + { + "epoch": 0.11196882811351272, + "grad_norm": 1.0302071571350098, + "learning_rate": 4.996241040928125e-06, + "loss": 0.6999, + "step": 1523 + }, + { + "epoch": 0.11204234671371857, + "grad_norm": 0.8446939587593079, + "learning_rate": 4.996235757251478e-06, + "loss": 0.6075, + "step": 1524 + }, + { + "epoch": 0.11211586531392442, + "grad_norm": 0.8774357438087463, + "learning_rate": 4.9962304698668095e-06, + "loss": 0.6337, + "step": 1525 + }, + { + "epoch": 0.11218938391413028, + "grad_norm": 0.9133644104003906, + "learning_rate": 4.996225178774128e-06, + "loss": 0.6185, + "step": 1526 + }, + { + "epoch": 0.11226290251433613, + "grad_norm": 0.9140355587005615, + "learning_rate": 4.99621988397344e-06, + "loss": 0.5944, + "step": 1527 + }, + { + "epoch": 0.11233642111454198, + "grad_norm": 0.8477957844734192, + "learning_rate": 4.996214585464755e-06, + "loss": 0.6093, + "step": 1528 + }, + { + "epoch": 0.11240993971474783, + "grad_norm": 0.8715763092041016, + "learning_rate": 4.9962092832480795e-06, + "loss": 0.6815, + "step": 1529 + }, + { + "epoch": 0.11248345831495368, + "grad_norm": 0.9087228178977966, + "learning_rate": 4.996203977323421e-06, + "loss": 0.6026, + "step": 1530 + }, + { + "epoch": 0.11255697691515953, + "grad_norm": 0.861834704875946, + "learning_rate": 4.9961986676907885e-06, + "loss": 0.6206, + "step": 1531 + }, + { + "epoch": 0.11263049551536539, + "grad_norm": 0.888852059841156, + "learning_rate": 4.99619335435019e-06, + "loss": 0.6179, + "step": 1532 + }, + { + "epoch": 0.11270401411557124, + "grad_norm": 0.9009319543838501, + "learning_rate": 4.996188037301632e-06, + "loss": 0.5941, + "step": 1533 + }, + { + "epoch": 0.11277753271577709, + "grad_norm": 0.8653982281684875, + "learning_rate": 4.996182716545125e-06, + "loss": 0.6236, + "step": 1534 + }, + { + "epoch": 0.11285105131598294, + "grad_norm": 0.9298326373100281, + "learning_rate": 4.996177392080675e-06, + "loss": 0.6251, + "step": 1535 + }, + { + "epoch": 0.1129245699161888, + "grad_norm": 0.903154730796814, + "learning_rate": 4.996172063908289e-06, + "loss": 0.6305, + "step": 1536 + }, + { + "epoch": 0.11299808851639465, + "grad_norm": 0.9308039546012878, + "learning_rate": 4.996166732027976e-06, + "loss": 0.6257, + "step": 1537 + }, + { + "epoch": 0.1130716071166005, + "grad_norm": 0.9005637168884277, + "learning_rate": 4.996161396439745e-06, + "loss": 0.5961, + "step": 1538 + }, + { + "epoch": 0.11314512571680635, + "grad_norm": 0.8644019961357117, + "learning_rate": 4.996156057143603e-06, + "loss": 0.6173, + "step": 1539 + }, + { + "epoch": 0.1132186443170122, + "grad_norm": 0.857646644115448, + "learning_rate": 4.996150714139557e-06, + "loss": 0.62, + "step": 1540 + }, + { + "epoch": 0.11329216291721805, + "grad_norm": 0.908957839012146, + "learning_rate": 4.996145367427616e-06, + "loss": 0.584, + "step": 1541 + }, + { + "epoch": 0.1133656815174239, + "grad_norm": 0.860297441482544, + "learning_rate": 4.996140017007789e-06, + "loss": 0.627, + "step": 1542 + }, + { + "epoch": 0.11343920011762976, + "grad_norm": 0.8307655453681946, + "learning_rate": 4.996134662880081e-06, + "loss": 0.6182, + "step": 1543 + }, + { + "epoch": 0.11351271871783561, + "grad_norm": 0.9350309371948242, + "learning_rate": 4.9961293050445025e-06, + "loss": 0.6491, + "step": 1544 + }, + { + "epoch": 0.11358623731804146, + "grad_norm": 0.9301838874816895, + "learning_rate": 4.99612394350106e-06, + "loss": 0.6193, + "step": 1545 + }, + { + "epoch": 0.11365975591824731, + "grad_norm": 0.8212436437606812, + "learning_rate": 4.996118578249763e-06, + "loss": 0.6041, + "step": 1546 + }, + { + "epoch": 0.11373327451845316, + "grad_norm": 0.8508825302124023, + "learning_rate": 4.996113209290618e-06, + "loss": 0.5844, + "step": 1547 + }, + { + "epoch": 0.11380679311865902, + "grad_norm": 0.8751680254936218, + "learning_rate": 4.9961078366236335e-06, + "loss": 0.6106, + "step": 1548 + }, + { + "epoch": 0.11388031171886487, + "grad_norm": 0.8692864179611206, + "learning_rate": 4.996102460248817e-06, + "loss": 0.5683, + "step": 1549 + }, + { + "epoch": 0.11395383031907072, + "grad_norm": 0.8827788233757019, + "learning_rate": 4.996097080166178e-06, + "loss": 0.6117, + "step": 1550 + }, + { + "epoch": 0.11402734891927657, + "grad_norm": 0.9100142121315002, + "learning_rate": 4.996091696375723e-06, + "loss": 0.592, + "step": 1551 + }, + { + "epoch": 0.11410086751948242, + "grad_norm": 0.8946461081504822, + "learning_rate": 4.99608630887746e-06, + "loss": 0.6124, + "step": 1552 + }, + { + "epoch": 0.11417438611968828, + "grad_norm": 0.8875262141227722, + "learning_rate": 4.996080917671399e-06, + "loss": 0.6035, + "step": 1553 + }, + { + "epoch": 0.11424790471989413, + "grad_norm": 0.9280027747154236, + "learning_rate": 4.996075522757545e-06, + "loss": 0.6657, + "step": 1554 + }, + { + "epoch": 0.11432142332009998, + "grad_norm": 0.9085277915000916, + "learning_rate": 4.996070124135908e-06, + "loss": 0.6539, + "step": 1555 + }, + { + "epoch": 0.11439494192030583, + "grad_norm": 0.8845333456993103, + "learning_rate": 4.996064721806495e-06, + "loss": 0.6382, + "step": 1556 + }, + { + "epoch": 0.11446846052051168, + "grad_norm": 0.9253994822502136, + "learning_rate": 4.9960593157693145e-06, + "loss": 0.6646, + "step": 1557 + }, + { + "epoch": 0.11454197912071754, + "grad_norm": 0.937165379524231, + "learning_rate": 4.996053906024375e-06, + "loss": 0.6401, + "step": 1558 + }, + { + "epoch": 0.11461549772092339, + "grad_norm": 0.9050517082214355, + "learning_rate": 4.996048492571685e-06, + "loss": 0.6701, + "step": 1559 + }, + { + "epoch": 0.11468901632112924, + "grad_norm": 0.9698276519775391, + "learning_rate": 4.99604307541125e-06, + "loss": 0.6127, + "step": 1560 + }, + { + "epoch": 0.11476253492133509, + "grad_norm": 0.9651087522506714, + "learning_rate": 4.996037654543081e-06, + "loss": 0.5942, + "step": 1561 + }, + { + "epoch": 0.11483605352154096, + "grad_norm": 0.8594624400138855, + "learning_rate": 4.996032229967184e-06, + "loss": 0.6059, + "step": 1562 + }, + { + "epoch": 0.11490957212174681, + "grad_norm": 0.9661678671836853, + "learning_rate": 4.996026801683568e-06, + "loss": 0.6316, + "step": 1563 + }, + { + "epoch": 0.11498309072195266, + "grad_norm": 0.9930208921432495, + "learning_rate": 4.996021369692241e-06, + "loss": 0.639, + "step": 1564 + }, + { + "epoch": 0.11505660932215851, + "grad_norm": 0.8540977239608765, + "learning_rate": 4.996015933993211e-06, + "loss": 0.6021, + "step": 1565 + }, + { + "epoch": 0.11513012792236436, + "grad_norm": 0.8151262998580933, + "learning_rate": 4.996010494586485e-06, + "loss": 0.5585, + "step": 1566 + }, + { + "epoch": 0.11520364652257022, + "grad_norm": 0.9236770272254944, + "learning_rate": 4.996005051472073e-06, + "loss": 0.6614, + "step": 1567 + }, + { + "epoch": 0.11527716512277607, + "grad_norm": 0.9877501726150513, + "learning_rate": 4.9959996046499825e-06, + "loss": 0.6203, + "step": 1568 + }, + { + "epoch": 0.11535068372298192, + "grad_norm": 0.8669698238372803, + "learning_rate": 4.995994154120221e-06, + "loss": 0.5949, + "step": 1569 + }, + { + "epoch": 0.11542420232318777, + "grad_norm": 0.8716546297073364, + "learning_rate": 4.995988699882796e-06, + "loss": 0.611, + "step": 1570 + }, + { + "epoch": 0.11549772092339362, + "grad_norm": 0.9406499266624451, + "learning_rate": 4.995983241937718e-06, + "loss": 0.6578, + "step": 1571 + }, + { + "epoch": 0.11557123952359948, + "grad_norm": 0.8915784358978271, + "learning_rate": 4.995977780284992e-06, + "loss": 0.623, + "step": 1572 + }, + { + "epoch": 0.11564475812380533, + "grad_norm": 0.920809805393219, + "learning_rate": 4.995972314924629e-06, + "loss": 0.5932, + "step": 1573 + }, + { + "epoch": 0.11571827672401118, + "grad_norm": 0.8856778144836426, + "learning_rate": 4.995966845856635e-06, + "loss": 0.6078, + "step": 1574 + }, + { + "epoch": 0.11579179532421703, + "grad_norm": 0.899374783039093, + "learning_rate": 4.995961373081019e-06, + "loss": 0.6173, + "step": 1575 + }, + { + "epoch": 0.11586531392442288, + "grad_norm": 0.9338160157203674, + "learning_rate": 4.9959558965977895e-06, + "loss": 0.6192, + "step": 1576 + }, + { + "epoch": 0.11593883252462873, + "grad_norm": 0.9024181365966797, + "learning_rate": 4.995950416406954e-06, + "loss": 0.5664, + "step": 1577 + }, + { + "epoch": 0.11601235112483459, + "grad_norm": 0.961466908454895, + "learning_rate": 4.995944932508521e-06, + "loss": 0.629, + "step": 1578 + }, + { + "epoch": 0.11608586972504044, + "grad_norm": 0.9213968515396118, + "learning_rate": 4.995939444902498e-06, + "loss": 0.6077, + "step": 1579 + }, + { + "epoch": 0.11615938832524629, + "grad_norm": 0.8926026821136475, + "learning_rate": 4.9959339535888934e-06, + "loss": 0.6025, + "step": 1580 + }, + { + "epoch": 0.11623290692545214, + "grad_norm": 0.9402706623077393, + "learning_rate": 4.995928458567716e-06, + "loss": 0.6225, + "step": 1581 + }, + { + "epoch": 0.116306425525658, + "grad_norm": 0.840830385684967, + "learning_rate": 4.995922959838973e-06, + "loss": 0.5927, + "step": 1582 + }, + { + "epoch": 0.11637994412586385, + "grad_norm": 0.9188015460968018, + "learning_rate": 4.995917457402674e-06, + "loss": 0.6123, + "step": 1583 + }, + { + "epoch": 0.1164534627260697, + "grad_norm": 0.8944933414459229, + "learning_rate": 4.995911951258826e-06, + "loss": 0.622, + "step": 1584 + }, + { + "epoch": 0.11652698132627555, + "grad_norm": 0.8577650189399719, + "learning_rate": 4.995906441407438e-06, + "loss": 0.6188, + "step": 1585 + }, + { + "epoch": 0.1166004999264814, + "grad_norm": 0.9210219383239746, + "learning_rate": 4.9959009278485165e-06, + "loss": 0.6234, + "step": 1586 + }, + { + "epoch": 0.11667401852668725, + "grad_norm": 0.9881633520126343, + "learning_rate": 4.995895410582071e-06, + "loss": 0.6051, + "step": 1587 + }, + { + "epoch": 0.1167475371268931, + "grad_norm": 0.9272133708000183, + "learning_rate": 4.995889889608111e-06, + "loss": 0.6321, + "step": 1588 + }, + { + "epoch": 0.11682105572709896, + "grad_norm": 0.9557957053184509, + "learning_rate": 4.995884364926642e-06, + "loss": 0.6587, + "step": 1589 + }, + { + "epoch": 0.11689457432730481, + "grad_norm": 0.8226425647735596, + "learning_rate": 4.995878836537673e-06, + "loss": 0.5954, + "step": 1590 + }, + { + "epoch": 0.11696809292751066, + "grad_norm": 0.8964298367500305, + "learning_rate": 4.995873304441213e-06, + "loss": 0.6265, + "step": 1591 + }, + { + "epoch": 0.11704161152771651, + "grad_norm": 0.8692817687988281, + "learning_rate": 4.995867768637271e-06, + "loss": 0.6289, + "step": 1592 + }, + { + "epoch": 0.11711513012792236, + "grad_norm": 0.8756454586982727, + "learning_rate": 4.995862229125853e-06, + "loss": 0.5747, + "step": 1593 + }, + { + "epoch": 0.11718864872812822, + "grad_norm": 0.959038257598877, + "learning_rate": 4.995856685906969e-06, + "loss": 0.6639, + "step": 1594 + }, + { + "epoch": 0.11726216732833407, + "grad_norm": 0.8814332485198975, + "learning_rate": 4.9958511389806264e-06, + "loss": 0.6111, + "step": 1595 + }, + { + "epoch": 0.11733568592853992, + "grad_norm": 0.9195178151130676, + "learning_rate": 4.995845588346832e-06, + "loss": 0.6243, + "step": 1596 + }, + { + "epoch": 0.11740920452874577, + "grad_norm": 0.9286447763442993, + "learning_rate": 4.995840034005598e-06, + "loss": 0.6101, + "step": 1597 + }, + { + "epoch": 0.11748272312895162, + "grad_norm": 0.867506742477417, + "learning_rate": 4.995834475956929e-06, + "loss": 0.5929, + "step": 1598 + }, + { + "epoch": 0.11755624172915748, + "grad_norm": 0.9563419222831726, + "learning_rate": 4.995828914200835e-06, + "loss": 0.6316, + "step": 1599 + }, + { + "epoch": 0.11762976032936333, + "grad_norm": 0.9094517827033997, + "learning_rate": 4.9958233487373245e-06, + "loss": 0.6599, + "step": 1600 + }, + { + "epoch": 0.11770327892956918, + "grad_norm": 0.8922371864318848, + "learning_rate": 4.995817779566404e-06, + "loss": 0.6156, + "step": 1601 + }, + { + "epoch": 0.11777679752977503, + "grad_norm": 0.8802765011787415, + "learning_rate": 4.995812206688082e-06, + "loss": 0.6092, + "step": 1602 + }, + { + "epoch": 0.11785031612998088, + "grad_norm": 1.0082600116729736, + "learning_rate": 4.995806630102369e-06, + "loss": 0.6863, + "step": 1603 + }, + { + "epoch": 0.11792383473018674, + "grad_norm": 0.9544060826301575, + "learning_rate": 4.995801049809273e-06, + "loss": 0.6522, + "step": 1604 + }, + { + "epoch": 0.11799735333039259, + "grad_norm": 0.8967961668968201, + "learning_rate": 4.995795465808799e-06, + "loss": 0.6086, + "step": 1605 + }, + { + "epoch": 0.11807087193059844, + "grad_norm": 0.9213505387306213, + "learning_rate": 4.995789878100958e-06, + "loss": 0.5975, + "step": 1606 + }, + { + "epoch": 0.11814439053080429, + "grad_norm": 0.914439857006073, + "learning_rate": 4.995784286685758e-06, + "loss": 0.5761, + "step": 1607 + }, + { + "epoch": 0.11821790913101014, + "grad_norm": 0.9483448266983032, + "learning_rate": 4.995778691563208e-06, + "loss": 0.6151, + "step": 1608 + }, + { + "epoch": 0.118291427731216, + "grad_norm": 0.8747820854187012, + "learning_rate": 4.9957730927333144e-06, + "loss": 0.5882, + "step": 1609 + }, + { + "epoch": 0.11836494633142185, + "grad_norm": 0.9421710968017578, + "learning_rate": 4.995767490196087e-06, + "loss": 0.6139, + "step": 1610 + }, + { + "epoch": 0.1184384649316277, + "grad_norm": 0.9263650178909302, + "learning_rate": 4.9957618839515335e-06, + "loss": 0.6253, + "step": 1611 + }, + { + "epoch": 0.11851198353183355, + "grad_norm": 0.8491151928901672, + "learning_rate": 4.995756273999662e-06, + "loss": 0.5882, + "step": 1612 + }, + { + "epoch": 0.1185855021320394, + "grad_norm": 0.8554331660270691, + "learning_rate": 4.9957506603404825e-06, + "loss": 0.5996, + "step": 1613 + }, + { + "epoch": 0.11865902073224525, + "grad_norm": 0.8686283230781555, + "learning_rate": 4.995745042974001e-06, + "loss": 0.6099, + "step": 1614 + }, + { + "epoch": 0.1187325393324511, + "grad_norm": 0.9835015535354614, + "learning_rate": 4.995739421900227e-06, + "loss": 0.6589, + "step": 1615 + }, + { + "epoch": 0.11880605793265696, + "grad_norm": 0.9296848177909851, + "learning_rate": 4.995733797119169e-06, + "loss": 0.6611, + "step": 1616 + }, + { + "epoch": 0.11887957653286281, + "grad_norm": 0.9174243211746216, + "learning_rate": 4.9957281686308345e-06, + "loss": 0.5987, + "step": 1617 + }, + { + "epoch": 0.11895309513306866, + "grad_norm": 0.905307948589325, + "learning_rate": 4.995722536435233e-06, + "loss": 0.5937, + "step": 1618 + }, + { + "epoch": 0.11902661373327451, + "grad_norm": 0.8613993525505066, + "learning_rate": 4.995716900532372e-06, + "loss": 0.6005, + "step": 1619 + }, + { + "epoch": 0.11910013233348037, + "grad_norm": 0.8783199787139893, + "learning_rate": 4.9957112609222605e-06, + "loss": 0.6296, + "step": 1620 + }, + { + "epoch": 0.11917365093368622, + "grad_norm": 0.892136812210083, + "learning_rate": 4.995705617604907e-06, + "loss": 0.5925, + "step": 1621 + }, + { + "epoch": 0.11924716953389207, + "grad_norm": 0.9136375188827515, + "learning_rate": 4.995699970580319e-06, + "loss": 0.6357, + "step": 1622 + }, + { + "epoch": 0.11932068813409792, + "grad_norm": 0.9311370253562927, + "learning_rate": 4.995694319848506e-06, + "loss": 0.6106, + "step": 1623 + }, + { + "epoch": 0.11939420673430377, + "grad_norm": 0.885345995426178, + "learning_rate": 4.9956886654094745e-06, + "loss": 0.6171, + "step": 1624 + }, + { + "epoch": 0.11946772533450963, + "grad_norm": 0.9060914516448975, + "learning_rate": 4.995683007263234e-06, + "loss": 0.6348, + "step": 1625 + }, + { + "epoch": 0.11954124393471548, + "grad_norm": 0.8736132979393005, + "learning_rate": 4.995677345409794e-06, + "loss": 0.5917, + "step": 1626 + }, + { + "epoch": 0.11961476253492133, + "grad_norm": 0.8769115209579468, + "learning_rate": 4.995671679849162e-06, + "loss": 0.6391, + "step": 1627 + }, + { + "epoch": 0.11968828113512718, + "grad_norm": 0.8304693698883057, + "learning_rate": 4.995666010581346e-06, + "loss": 0.6045, + "step": 1628 + }, + { + "epoch": 0.11976179973533303, + "grad_norm": 0.9031703472137451, + "learning_rate": 4.995660337606355e-06, + "loss": 0.6491, + "step": 1629 + }, + { + "epoch": 0.11983531833553888, + "grad_norm": 0.9289074540138245, + "learning_rate": 4.995654660924197e-06, + "loss": 0.6061, + "step": 1630 + }, + { + "epoch": 0.11990883693574475, + "grad_norm": 0.8678799867630005, + "learning_rate": 4.995648980534881e-06, + "loss": 0.6323, + "step": 1631 + }, + { + "epoch": 0.1199823555359506, + "grad_norm": 0.9232732057571411, + "learning_rate": 4.995643296438415e-06, + "loss": 0.6288, + "step": 1632 + }, + { + "epoch": 0.12005587413615645, + "grad_norm": 0.9040062427520752, + "learning_rate": 4.995637608634808e-06, + "loss": 0.6156, + "step": 1633 + }, + { + "epoch": 0.1201293927363623, + "grad_norm": 0.9114024639129639, + "learning_rate": 4.995631917124067e-06, + "loss": 0.5895, + "step": 1634 + }, + { + "epoch": 0.12020291133656816, + "grad_norm": 0.8669640421867371, + "learning_rate": 4.995626221906202e-06, + "loss": 0.634, + "step": 1635 + }, + { + "epoch": 0.12027642993677401, + "grad_norm": 0.9509706497192383, + "learning_rate": 4.9956205229812205e-06, + "loss": 0.6537, + "step": 1636 + }, + { + "epoch": 0.12034994853697986, + "grad_norm": 0.965101957321167, + "learning_rate": 4.995614820349132e-06, + "loss": 0.6154, + "step": 1637 + }, + { + "epoch": 0.12042346713718571, + "grad_norm": 0.8591254353523254, + "learning_rate": 4.995609114009944e-06, + "loss": 0.5984, + "step": 1638 + }, + { + "epoch": 0.12049698573739157, + "grad_norm": 0.8795799016952515, + "learning_rate": 4.995603403963665e-06, + "loss": 0.5784, + "step": 1639 + }, + { + "epoch": 0.12057050433759742, + "grad_norm": 0.9401871562004089, + "learning_rate": 4.995597690210305e-06, + "loss": 0.6093, + "step": 1640 + }, + { + "epoch": 0.12064402293780327, + "grad_norm": 1.032249093055725, + "learning_rate": 4.99559197274987e-06, + "loss": 0.6409, + "step": 1641 + }, + { + "epoch": 0.12071754153800912, + "grad_norm": 0.9179297685623169, + "learning_rate": 4.99558625158237e-06, + "loss": 0.6043, + "step": 1642 + }, + { + "epoch": 0.12079106013821497, + "grad_norm": 0.8952412605285645, + "learning_rate": 4.995580526707814e-06, + "loss": 0.6314, + "step": 1643 + }, + { + "epoch": 0.12086457873842082, + "grad_norm": 0.8529797792434692, + "learning_rate": 4.995574798126209e-06, + "loss": 0.5951, + "step": 1644 + }, + { + "epoch": 0.12093809733862668, + "grad_norm": 0.9022047519683838, + "learning_rate": 4.995569065837565e-06, + "loss": 0.6293, + "step": 1645 + }, + { + "epoch": 0.12101161593883253, + "grad_norm": 0.9263952970504761, + "learning_rate": 4.9955633298418884e-06, + "loss": 0.6392, + "step": 1646 + }, + { + "epoch": 0.12108513453903838, + "grad_norm": 0.9094380736351013, + "learning_rate": 4.995557590139191e-06, + "loss": 0.6192, + "step": 1647 + }, + { + "epoch": 0.12115865313924423, + "grad_norm": 0.9023709893226624, + "learning_rate": 4.995551846729478e-06, + "loss": 0.6169, + "step": 1648 + }, + { + "epoch": 0.12123217173945008, + "grad_norm": 0.8930521607398987, + "learning_rate": 4.99554609961276e-06, + "loss": 0.623, + "step": 1649 + }, + { + "epoch": 0.12130569033965594, + "grad_norm": 0.8808190822601318, + "learning_rate": 4.995540348789044e-06, + "loss": 0.6132, + "step": 1650 + }, + { + "epoch": 0.12137920893986179, + "grad_norm": 0.9123886227607727, + "learning_rate": 4.9955345942583414e-06, + "loss": 0.5736, + "step": 1651 + }, + { + "epoch": 0.12145272754006764, + "grad_norm": 0.8735642433166504, + "learning_rate": 4.995528836020657e-06, + "loss": 0.614, + "step": 1652 + }, + { + "epoch": 0.12152624614027349, + "grad_norm": 0.9039204716682434, + "learning_rate": 4.995523074076002e-06, + "loss": 0.6307, + "step": 1653 + }, + { + "epoch": 0.12159976474047934, + "grad_norm": 0.9060366153717041, + "learning_rate": 4.995517308424384e-06, + "loss": 0.6125, + "step": 1654 + }, + { + "epoch": 0.1216732833406852, + "grad_norm": 0.8675918579101562, + "learning_rate": 4.995511539065811e-06, + "loss": 0.5881, + "step": 1655 + }, + { + "epoch": 0.12174680194089105, + "grad_norm": 0.9836347103118896, + "learning_rate": 4.995505766000294e-06, + "loss": 0.6896, + "step": 1656 + }, + { + "epoch": 0.1218203205410969, + "grad_norm": 0.8618261814117432, + "learning_rate": 4.995499989227839e-06, + "loss": 0.5795, + "step": 1657 + }, + { + "epoch": 0.12189383914130275, + "grad_norm": 0.8553207516670227, + "learning_rate": 4.9954942087484544e-06, + "loss": 0.6272, + "step": 1658 + }, + { + "epoch": 0.1219673577415086, + "grad_norm": 0.8662916421890259, + "learning_rate": 4.9954884245621506e-06, + "loss": 0.5865, + "step": 1659 + }, + { + "epoch": 0.12204087634171445, + "grad_norm": 0.8503262996673584, + "learning_rate": 4.995482636668936e-06, + "loss": 0.621, + "step": 1660 + }, + { + "epoch": 0.1221143949419203, + "grad_norm": 0.8535900115966797, + "learning_rate": 4.995476845068818e-06, + "loss": 0.5819, + "step": 1661 + }, + { + "epoch": 0.12218791354212616, + "grad_norm": 0.8581811785697937, + "learning_rate": 4.9954710497618055e-06, + "loss": 0.6007, + "step": 1662 + }, + { + "epoch": 0.12226143214233201, + "grad_norm": 0.8794524073600769, + "learning_rate": 4.995465250747908e-06, + "loss": 0.5992, + "step": 1663 + }, + { + "epoch": 0.12233495074253786, + "grad_norm": 0.8805656433105469, + "learning_rate": 4.995459448027133e-06, + "loss": 0.6171, + "step": 1664 + }, + { + "epoch": 0.12240846934274371, + "grad_norm": 0.8533534407615662, + "learning_rate": 4.99545364159949e-06, + "loss": 0.608, + "step": 1665 + }, + { + "epoch": 0.12248198794294957, + "grad_norm": 0.868085503578186, + "learning_rate": 4.995447831464986e-06, + "loss": 0.5892, + "step": 1666 + }, + { + "epoch": 0.12255550654315542, + "grad_norm": 0.9503712058067322, + "learning_rate": 4.995442017623633e-06, + "loss": 0.6124, + "step": 1667 + }, + { + "epoch": 0.12262902514336127, + "grad_norm": 0.9091495871543884, + "learning_rate": 4.995436200075436e-06, + "loss": 0.638, + "step": 1668 + }, + { + "epoch": 0.12270254374356712, + "grad_norm": 0.8602770566940308, + "learning_rate": 4.995430378820406e-06, + "loss": 0.6337, + "step": 1669 + }, + { + "epoch": 0.12277606234377297, + "grad_norm": 0.9069023132324219, + "learning_rate": 4.995424553858551e-06, + "loss": 0.6157, + "step": 1670 + }, + { + "epoch": 0.12284958094397883, + "grad_norm": 0.923987865447998, + "learning_rate": 4.995418725189879e-06, + "loss": 0.5976, + "step": 1671 + }, + { + "epoch": 0.12292309954418468, + "grad_norm": 0.9200736880302429, + "learning_rate": 4.995412892814399e-06, + "loss": 0.6228, + "step": 1672 + }, + { + "epoch": 0.12299661814439053, + "grad_norm": 0.9429432153701782, + "learning_rate": 4.99540705673212e-06, + "loss": 0.6347, + "step": 1673 + }, + { + "epoch": 0.12307013674459638, + "grad_norm": 0.8937427401542664, + "learning_rate": 4.99540121694305e-06, + "loss": 0.6236, + "step": 1674 + }, + { + "epoch": 0.12314365534480223, + "grad_norm": 0.869981050491333, + "learning_rate": 4.995395373447199e-06, + "loss": 0.5788, + "step": 1675 + }, + { + "epoch": 0.12321717394500808, + "grad_norm": 0.936450183391571, + "learning_rate": 4.995389526244574e-06, + "loss": 0.6435, + "step": 1676 + }, + { + "epoch": 0.12329069254521394, + "grad_norm": 0.8939347267150879, + "learning_rate": 4.995383675335185e-06, + "loss": 0.6005, + "step": 1677 + }, + { + "epoch": 0.12336421114541979, + "grad_norm": 0.8378753066062927, + "learning_rate": 4.9953778207190396e-06, + "loss": 0.5491, + "step": 1678 + }, + { + "epoch": 0.12343772974562564, + "grad_norm": 0.85422283411026, + "learning_rate": 4.995371962396147e-06, + "loss": 0.6133, + "step": 1679 + }, + { + "epoch": 0.12351124834583149, + "grad_norm": 0.9838407635688782, + "learning_rate": 4.995366100366518e-06, + "loss": 0.65, + "step": 1680 + }, + { + "epoch": 0.12358476694603734, + "grad_norm": 0.8823294043540955, + "learning_rate": 4.995360234630157e-06, + "loss": 0.6117, + "step": 1681 + }, + { + "epoch": 0.1236582855462432, + "grad_norm": 0.894336462020874, + "learning_rate": 4.9953543651870766e-06, + "loss": 0.6016, + "step": 1682 + }, + { + "epoch": 0.12373180414644905, + "grad_norm": 0.8669780492782593, + "learning_rate": 4.995348492037283e-06, + "loss": 0.5958, + "step": 1683 + }, + { + "epoch": 0.1238053227466549, + "grad_norm": 0.9200948476791382, + "learning_rate": 4.995342615180786e-06, + "loss": 0.6177, + "step": 1684 + }, + { + "epoch": 0.12387884134686075, + "grad_norm": 0.9780153036117554, + "learning_rate": 4.995336734617594e-06, + "loss": 0.5977, + "step": 1685 + }, + { + "epoch": 0.1239523599470666, + "grad_norm": 0.9540718793869019, + "learning_rate": 4.995330850347716e-06, + "loss": 0.6315, + "step": 1686 + }, + { + "epoch": 0.12402587854727246, + "grad_norm": 0.9483146071434021, + "learning_rate": 4.9953249623711605e-06, + "loss": 0.6084, + "step": 1687 + }, + { + "epoch": 0.12409939714747831, + "grad_norm": 0.9223366379737854, + "learning_rate": 4.9953190706879375e-06, + "loss": 0.6827, + "step": 1688 + }, + { + "epoch": 0.12417291574768416, + "grad_norm": 0.8802179098129272, + "learning_rate": 4.995313175298053e-06, + "loss": 0.6116, + "step": 1689 + }, + { + "epoch": 0.12424643434789001, + "grad_norm": 0.9345000982284546, + "learning_rate": 4.9953072762015184e-06, + "loss": 0.6262, + "step": 1690 + }, + { + "epoch": 0.12431995294809586, + "grad_norm": 0.9492130279541016, + "learning_rate": 4.995301373398341e-06, + "loss": 0.6318, + "step": 1691 + }, + { + "epoch": 0.12439347154830172, + "grad_norm": 0.8862586617469788, + "learning_rate": 4.995295466888531e-06, + "loss": 0.6371, + "step": 1692 + }, + { + "epoch": 0.12446699014850757, + "grad_norm": 0.8960027694702148, + "learning_rate": 4.995289556672095e-06, + "loss": 0.5926, + "step": 1693 + }, + { + "epoch": 0.12454050874871342, + "grad_norm": 0.8780937790870667, + "learning_rate": 4.995283642749044e-06, + "loss": 0.6111, + "step": 1694 + }, + { + "epoch": 0.12461402734891927, + "grad_norm": 0.8431730270385742, + "learning_rate": 4.995277725119386e-06, + "loss": 0.5342, + "step": 1695 + }, + { + "epoch": 0.12468754594912512, + "grad_norm": 0.8128412961959839, + "learning_rate": 4.995271803783128e-06, + "loss": 0.5764, + "step": 1696 + }, + { + "epoch": 0.12476106454933097, + "grad_norm": 0.9160401821136475, + "learning_rate": 4.995265878740282e-06, + "loss": 0.6261, + "step": 1697 + }, + { + "epoch": 0.12483458314953683, + "grad_norm": 0.9530867338180542, + "learning_rate": 4.995259949990855e-06, + "loss": 0.6381, + "step": 1698 + }, + { + "epoch": 0.12490810174974268, + "grad_norm": 0.9495123624801636, + "learning_rate": 4.995254017534855e-06, + "loss": 0.6374, + "step": 1699 + }, + { + "epoch": 0.12498162034994853, + "grad_norm": 0.8964534401893616, + "learning_rate": 4.9952480813722924e-06, + "loss": 0.6157, + "step": 1700 + }, + { + "epoch": 0.12505513895015438, + "grad_norm": 0.9301723837852478, + "learning_rate": 4.995242141503175e-06, + "loss": 0.6344, + "step": 1701 + }, + { + "epoch": 0.12512865755036023, + "grad_norm": 0.9161364436149597, + "learning_rate": 4.995236197927513e-06, + "loss": 0.6337, + "step": 1702 + }, + { + "epoch": 0.12520217615056609, + "grad_norm": 0.9161568284034729, + "learning_rate": 4.995230250645313e-06, + "loss": 0.6781, + "step": 1703 + }, + { + "epoch": 0.12527569475077194, + "grad_norm": 0.8699995279312134, + "learning_rate": 4.995224299656585e-06, + "loss": 0.6241, + "step": 1704 + }, + { + "epoch": 0.1253492133509778, + "grad_norm": 0.90499347448349, + "learning_rate": 4.99521834496134e-06, + "loss": 0.6204, + "step": 1705 + }, + { + "epoch": 0.12542273195118364, + "grad_norm": 0.8843741416931152, + "learning_rate": 4.995212386559583e-06, + "loss": 0.6321, + "step": 1706 + }, + { + "epoch": 0.1254962505513895, + "grad_norm": 0.8619635105133057, + "learning_rate": 4.995206424451325e-06, + "loss": 0.5993, + "step": 1707 + }, + { + "epoch": 0.12556976915159535, + "grad_norm": 0.9064298272132874, + "learning_rate": 4.995200458636575e-06, + "loss": 0.5991, + "step": 1708 + }, + { + "epoch": 0.1256432877518012, + "grad_norm": 0.9381205439567566, + "learning_rate": 4.99519448911534e-06, + "loss": 0.5763, + "step": 1709 + }, + { + "epoch": 0.12571680635200705, + "grad_norm": 0.9147866368293762, + "learning_rate": 4.995188515887632e-06, + "loss": 0.6255, + "step": 1710 + }, + { + "epoch": 0.1257903249522129, + "grad_norm": 0.898268461227417, + "learning_rate": 4.995182538953457e-06, + "loss": 0.6464, + "step": 1711 + }, + { + "epoch": 0.12586384355241875, + "grad_norm": 0.8774908185005188, + "learning_rate": 4.995176558312825e-06, + "loss": 0.6266, + "step": 1712 + }, + { + "epoch": 0.1259373621526246, + "grad_norm": 0.9124217629432678, + "learning_rate": 4.995170573965745e-06, + "loss": 0.6176, + "step": 1713 + }, + { + "epoch": 0.12601088075283046, + "grad_norm": 0.918968677520752, + "learning_rate": 4.995164585912226e-06, + "loss": 0.6015, + "step": 1714 + }, + { + "epoch": 0.1260843993530363, + "grad_norm": 0.9119697213172913, + "learning_rate": 4.995158594152277e-06, + "loss": 0.618, + "step": 1715 + }, + { + "epoch": 0.12615791795324216, + "grad_norm": 0.9326184391975403, + "learning_rate": 4.9951525986859055e-06, + "loss": 0.6101, + "step": 1716 + }, + { + "epoch": 0.126231436553448, + "grad_norm": 0.9930976629257202, + "learning_rate": 4.995146599513122e-06, + "loss": 0.6466, + "step": 1717 + }, + { + "epoch": 0.12630495515365386, + "grad_norm": 0.8457563519477844, + "learning_rate": 4.995140596633935e-06, + "loss": 0.5607, + "step": 1718 + }, + { + "epoch": 0.12637847375385972, + "grad_norm": 0.8692227602005005, + "learning_rate": 4.995134590048353e-06, + "loss": 0.6367, + "step": 1719 + }, + { + "epoch": 0.12645199235406557, + "grad_norm": 0.9711206555366516, + "learning_rate": 4.9951285797563854e-06, + "loss": 0.6504, + "step": 1720 + }, + { + "epoch": 0.12652551095427142, + "grad_norm": 0.942479133605957, + "learning_rate": 4.9951225657580405e-06, + "loss": 0.667, + "step": 1721 + }, + { + "epoch": 0.12659902955447727, + "grad_norm": 0.9371203184127808, + "learning_rate": 4.9951165480533285e-06, + "loss": 0.6109, + "step": 1722 + }, + { + "epoch": 0.12667254815468312, + "grad_norm": 0.8755165934562683, + "learning_rate": 4.995110526642256e-06, + "loss": 0.614, + "step": 1723 + }, + { + "epoch": 0.12674606675488898, + "grad_norm": 0.930038332939148, + "learning_rate": 4.995104501524834e-06, + "loss": 0.6534, + "step": 1724 + }, + { + "epoch": 0.12681958535509483, + "grad_norm": 0.9155031442642212, + "learning_rate": 4.9950984727010724e-06, + "loss": 0.6276, + "step": 1725 + }, + { + "epoch": 0.12689310395530068, + "grad_norm": 0.8624013066291809, + "learning_rate": 4.995092440170977e-06, + "loss": 0.5902, + "step": 1726 + }, + { + "epoch": 0.12696662255550653, + "grad_norm": 0.8840454816818237, + "learning_rate": 4.995086403934559e-06, + "loss": 0.5616, + "step": 1727 + }, + { + "epoch": 0.12704014115571238, + "grad_norm": 0.875093936920166, + "learning_rate": 4.995080363991826e-06, + "loss": 0.6365, + "step": 1728 + }, + { + "epoch": 0.12711365975591823, + "grad_norm": 0.9549031257629395, + "learning_rate": 4.995074320342789e-06, + "loss": 0.5942, + "step": 1729 + }, + { + "epoch": 0.1271871783561241, + "grad_norm": 0.9058423042297363, + "learning_rate": 4.995068272987456e-06, + "loss": 0.5931, + "step": 1730 + }, + { + "epoch": 0.12726069695632994, + "grad_norm": 0.8890552520751953, + "learning_rate": 4.995062221925834e-06, + "loss": 0.6039, + "step": 1731 + }, + { + "epoch": 0.1273342155565358, + "grad_norm": 0.9653670191764832, + "learning_rate": 4.995056167157934e-06, + "loss": 0.6815, + "step": 1732 + }, + { + "epoch": 0.12740773415674164, + "grad_norm": 0.8427765369415283, + "learning_rate": 4.995050108683766e-06, + "loss": 0.59, + "step": 1733 + }, + { + "epoch": 0.1274812527569475, + "grad_norm": 0.9294586777687073, + "learning_rate": 4.995044046503337e-06, + "loss": 0.6241, + "step": 1734 + }, + { + "epoch": 0.12755477135715337, + "grad_norm": 0.9003506898880005, + "learning_rate": 4.995037980616657e-06, + "loss": 0.6132, + "step": 1735 + }, + { + "epoch": 0.12762828995735923, + "grad_norm": 0.8758178949356079, + "learning_rate": 4.995031911023734e-06, + "loss": 0.5864, + "step": 1736 + }, + { + "epoch": 0.12770180855756508, + "grad_norm": 0.9144997596740723, + "learning_rate": 4.995025837724578e-06, + "loss": 0.6081, + "step": 1737 + }, + { + "epoch": 0.12777532715777093, + "grad_norm": 0.8938120603561401, + "learning_rate": 4.995019760719197e-06, + "loss": 0.6365, + "step": 1738 + }, + { + "epoch": 0.12784884575797678, + "grad_norm": 0.9167048335075378, + "learning_rate": 4.995013680007602e-06, + "loss": 0.6263, + "step": 1739 + }, + { + "epoch": 0.12792236435818263, + "grad_norm": 0.9067861437797546, + "learning_rate": 4.995007595589801e-06, + "loss": 0.6311, + "step": 1740 + }, + { + "epoch": 0.12799588295838848, + "grad_norm": 0.8805794715881348, + "learning_rate": 4.995001507465802e-06, + "loss": 0.6188, + "step": 1741 + }, + { + "epoch": 0.12806940155859434, + "grad_norm": 0.9122366905212402, + "learning_rate": 4.994995415635615e-06, + "loss": 0.6162, + "step": 1742 + }, + { + "epoch": 0.1281429201588002, + "grad_norm": 0.9494568705558777, + "learning_rate": 4.994989320099249e-06, + "loss": 0.6162, + "step": 1743 + }, + { + "epoch": 0.12821643875900604, + "grad_norm": 0.9526582360267639, + "learning_rate": 4.994983220856713e-06, + "loss": 0.6079, + "step": 1744 + }, + { + "epoch": 0.1282899573592119, + "grad_norm": 0.915439784526825, + "learning_rate": 4.994977117908016e-06, + "loss": 0.6154, + "step": 1745 + }, + { + "epoch": 0.12836347595941774, + "grad_norm": 0.9365389943122864, + "learning_rate": 4.9949710112531676e-06, + "loss": 0.6573, + "step": 1746 + }, + { + "epoch": 0.1284369945596236, + "grad_norm": 0.892112672328949, + "learning_rate": 4.994964900892176e-06, + "loss": 0.6298, + "step": 1747 + }, + { + "epoch": 0.12851051315982945, + "grad_norm": 0.9268995523452759, + "learning_rate": 4.9949587868250506e-06, + "loss": 0.642, + "step": 1748 + }, + { + "epoch": 0.1285840317600353, + "grad_norm": 0.9250532984733582, + "learning_rate": 4.9949526690518e-06, + "loss": 0.6105, + "step": 1749 + }, + { + "epoch": 0.12865755036024115, + "grad_norm": 0.9500529766082764, + "learning_rate": 4.994946547572435e-06, + "loss": 0.5676, + "step": 1750 + }, + { + "epoch": 0.128731068960447, + "grad_norm": 0.8614692687988281, + "learning_rate": 4.9949404223869625e-06, + "loss": 0.6046, + "step": 1751 + }, + { + "epoch": 0.12880458756065286, + "grad_norm": 0.8959610462188721, + "learning_rate": 4.994934293495393e-06, + "loss": 0.6636, + "step": 1752 + }, + { + "epoch": 0.1288781061608587, + "grad_norm": 0.8993657231330872, + "learning_rate": 4.994928160897735e-06, + "loss": 0.6049, + "step": 1753 + }, + { + "epoch": 0.12895162476106456, + "grad_norm": 0.8959274291992188, + "learning_rate": 4.994922024593999e-06, + "loss": 0.5965, + "step": 1754 + }, + { + "epoch": 0.1290251433612704, + "grad_norm": 0.8911101818084717, + "learning_rate": 4.994915884584192e-06, + "loss": 0.5713, + "step": 1755 + }, + { + "epoch": 0.12909866196147626, + "grad_norm": 0.8832529187202454, + "learning_rate": 4.9949097408683235e-06, + "loss": 0.6018, + "step": 1756 + }, + { + "epoch": 0.12917218056168212, + "grad_norm": 1.0005053281784058, + "learning_rate": 4.994903593446404e-06, + "loss": 0.5893, + "step": 1757 + }, + { + "epoch": 0.12924569916188797, + "grad_norm": 0.8684425950050354, + "learning_rate": 4.994897442318441e-06, + "loss": 0.5497, + "step": 1758 + }, + { + "epoch": 0.12931921776209382, + "grad_norm": 0.9257813096046448, + "learning_rate": 4.994891287484446e-06, + "loss": 0.5879, + "step": 1759 + }, + { + "epoch": 0.12939273636229967, + "grad_norm": 0.8738377094268799, + "learning_rate": 4.994885128944425e-06, + "loss": 0.605, + "step": 1760 + }, + { + "epoch": 0.12946625496250552, + "grad_norm": 0.9179147481918335, + "learning_rate": 4.99487896669839e-06, + "loss": 0.6078, + "step": 1761 + }, + { + "epoch": 0.12953977356271137, + "grad_norm": 0.9246540069580078, + "learning_rate": 4.9948728007463485e-06, + "loss": 0.5785, + "step": 1762 + }, + { + "epoch": 0.12961329216291723, + "grad_norm": 0.8973028063774109, + "learning_rate": 4.99486663108831e-06, + "loss": 0.589, + "step": 1763 + }, + { + "epoch": 0.12968681076312308, + "grad_norm": 0.9156644344329834, + "learning_rate": 4.994860457724284e-06, + "loss": 0.622, + "step": 1764 + }, + { + "epoch": 0.12976032936332893, + "grad_norm": 0.8975784778594971, + "learning_rate": 4.994854280654279e-06, + "loss": 0.5955, + "step": 1765 + }, + { + "epoch": 0.12983384796353478, + "grad_norm": 0.8702983856201172, + "learning_rate": 4.994848099878305e-06, + "loss": 0.6401, + "step": 1766 + }, + { + "epoch": 0.12990736656374063, + "grad_norm": 0.9424370527267456, + "learning_rate": 4.9948419153963705e-06, + "loss": 0.6501, + "step": 1767 + }, + { + "epoch": 0.12998088516394649, + "grad_norm": 0.8990488648414612, + "learning_rate": 4.994835727208485e-06, + "loss": 0.5975, + "step": 1768 + }, + { + "epoch": 0.13005440376415234, + "grad_norm": 0.8932004570960999, + "learning_rate": 4.994829535314657e-06, + "loss": 0.628, + "step": 1769 + }, + { + "epoch": 0.1301279223643582, + "grad_norm": 0.9056817293167114, + "learning_rate": 4.994823339714897e-06, + "loss": 0.6314, + "step": 1770 + }, + { + "epoch": 0.13020144096456404, + "grad_norm": 0.8943083882331848, + "learning_rate": 4.9948171404092126e-06, + "loss": 0.6103, + "step": 1771 + }, + { + "epoch": 0.1302749595647699, + "grad_norm": 0.9443556070327759, + "learning_rate": 4.994810937397615e-06, + "loss": 0.6483, + "step": 1772 + }, + { + "epoch": 0.13034847816497575, + "grad_norm": 0.8825258016586304, + "learning_rate": 4.994804730680112e-06, + "loss": 0.6035, + "step": 1773 + }, + { + "epoch": 0.1304219967651816, + "grad_norm": 0.9009276032447815, + "learning_rate": 4.994798520256714e-06, + "loss": 0.6326, + "step": 1774 + }, + { + "epoch": 0.13049551536538745, + "grad_norm": 0.9135634303092957, + "learning_rate": 4.994792306127428e-06, + "loss": 0.5926, + "step": 1775 + }, + { + "epoch": 0.1305690339655933, + "grad_norm": 0.9400137066841125, + "learning_rate": 4.994786088292266e-06, + "loss": 0.6314, + "step": 1776 + }, + { + "epoch": 0.13064255256579915, + "grad_norm": 0.8873360753059387, + "learning_rate": 4.994779866751235e-06, + "loss": 0.6558, + "step": 1777 + }, + { + "epoch": 0.130716071166005, + "grad_norm": 0.8792514204978943, + "learning_rate": 4.994773641504346e-06, + "loss": 0.6094, + "step": 1778 + }, + { + "epoch": 0.13078958976621086, + "grad_norm": 0.8758922815322876, + "learning_rate": 4.994767412551606e-06, + "loss": 0.5892, + "step": 1779 + }, + { + "epoch": 0.1308631083664167, + "grad_norm": 0.8802415728569031, + "learning_rate": 4.994761179893026e-06, + "loss": 0.5863, + "step": 1780 + }, + { + "epoch": 0.13093662696662256, + "grad_norm": 0.9028784036636353, + "learning_rate": 4.994754943528616e-06, + "loss": 0.6166, + "step": 1781 + }, + { + "epoch": 0.1310101455668284, + "grad_norm": 0.8628852367401123, + "learning_rate": 4.994748703458383e-06, + "loss": 0.5908, + "step": 1782 + }, + { + "epoch": 0.13108366416703426, + "grad_norm": 0.9385373592376709, + "learning_rate": 4.994742459682338e-06, + "loss": 0.6236, + "step": 1783 + }, + { + "epoch": 0.13115718276724012, + "grad_norm": 0.9004799127578735, + "learning_rate": 4.99473621220049e-06, + "loss": 0.6341, + "step": 1784 + }, + { + "epoch": 0.13123070136744597, + "grad_norm": 0.890778124332428, + "learning_rate": 4.9947299610128465e-06, + "loss": 0.5922, + "step": 1785 + }, + { + "epoch": 0.13130421996765182, + "grad_norm": 0.86668860912323, + "learning_rate": 4.99472370611942e-06, + "loss": 0.6465, + "step": 1786 + }, + { + "epoch": 0.13137773856785767, + "grad_norm": 0.8700196743011475, + "learning_rate": 4.994717447520216e-06, + "loss": 0.6029, + "step": 1787 + }, + { + "epoch": 0.13145125716806352, + "grad_norm": 0.8656007647514343, + "learning_rate": 4.994711185215247e-06, + "loss": 0.6127, + "step": 1788 + }, + { + "epoch": 0.13152477576826938, + "grad_norm": 0.8815177083015442, + "learning_rate": 4.9947049192045216e-06, + "loss": 0.5893, + "step": 1789 + }, + { + "epoch": 0.13159829436847523, + "grad_norm": 0.890410840511322, + "learning_rate": 4.994698649488049e-06, + "loss": 0.6132, + "step": 1790 + }, + { + "epoch": 0.13167181296868108, + "grad_norm": 0.961200475692749, + "learning_rate": 4.9946923760658374e-06, + "loss": 0.6309, + "step": 1791 + }, + { + "epoch": 0.13174533156888693, + "grad_norm": 0.9505396485328674, + "learning_rate": 4.994686098937897e-06, + "loss": 0.6317, + "step": 1792 + }, + { + "epoch": 0.13181885016909278, + "grad_norm": 0.9100663661956787, + "learning_rate": 4.994679818104237e-06, + "loss": 0.5996, + "step": 1793 + }, + { + "epoch": 0.13189236876929863, + "grad_norm": 0.9411263465881348, + "learning_rate": 4.994673533564866e-06, + "loss": 0.6126, + "step": 1794 + }, + { + "epoch": 0.1319658873695045, + "grad_norm": 0.8840299248695374, + "learning_rate": 4.994667245319795e-06, + "loss": 0.6243, + "step": 1795 + }, + { + "epoch": 0.13203940596971034, + "grad_norm": 0.8477934002876282, + "learning_rate": 4.994660953369032e-06, + "loss": 0.5769, + "step": 1796 + }, + { + "epoch": 0.1321129245699162, + "grad_norm": 0.8901490569114685, + "learning_rate": 4.994654657712587e-06, + "loss": 0.5947, + "step": 1797 + }, + { + "epoch": 0.13218644317012204, + "grad_norm": 0.8636474609375, + "learning_rate": 4.994648358350469e-06, + "loss": 0.6083, + "step": 1798 + }, + { + "epoch": 0.1322599617703279, + "grad_norm": 0.8869476914405823, + "learning_rate": 4.994642055282687e-06, + "loss": 0.6147, + "step": 1799 + }, + { + "epoch": 0.13233348037053375, + "grad_norm": 0.923099160194397, + "learning_rate": 4.9946357485092515e-06, + "loss": 0.6111, + "step": 1800 + }, + { + "epoch": 0.1324069989707396, + "grad_norm": 0.887391209602356, + "learning_rate": 4.994629438030171e-06, + "loss": 0.5987, + "step": 1801 + }, + { + "epoch": 0.13248051757094545, + "grad_norm": 0.9134442210197449, + "learning_rate": 4.994623123845455e-06, + "loss": 0.6295, + "step": 1802 + }, + { + "epoch": 0.1325540361711513, + "grad_norm": 0.9535447955131531, + "learning_rate": 4.994616805955113e-06, + "loss": 0.635, + "step": 1803 + }, + { + "epoch": 0.13262755477135715, + "grad_norm": 0.914865255355835, + "learning_rate": 4.994610484359155e-06, + "loss": 0.6324, + "step": 1804 + }, + { + "epoch": 0.132701073371563, + "grad_norm": 0.9371379017829895, + "learning_rate": 4.994604159057588e-06, + "loss": 0.6283, + "step": 1805 + }, + { + "epoch": 0.13277459197176886, + "grad_norm": 0.8878989219665527, + "learning_rate": 4.994597830050424e-06, + "loss": 0.6144, + "step": 1806 + }, + { + "epoch": 0.1328481105719747, + "grad_norm": 0.8824242949485779, + "learning_rate": 4.994591497337672e-06, + "loss": 0.6557, + "step": 1807 + }, + { + "epoch": 0.13292162917218056, + "grad_norm": 0.9818916320800781, + "learning_rate": 4.994585160919341e-06, + "loss": 0.6433, + "step": 1808 + }, + { + "epoch": 0.1329951477723864, + "grad_norm": 0.8823691010475159, + "learning_rate": 4.9945788207954396e-06, + "loss": 0.5742, + "step": 1809 + }, + { + "epoch": 0.13306866637259226, + "grad_norm": 0.8998549580574036, + "learning_rate": 4.9945724769659785e-06, + "loss": 0.6025, + "step": 1810 + }, + { + "epoch": 0.13314218497279812, + "grad_norm": 0.9938244223594666, + "learning_rate": 4.994566129430967e-06, + "loss": 0.6572, + "step": 1811 + }, + { + "epoch": 0.13321570357300397, + "grad_norm": 0.9152001738548279, + "learning_rate": 4.994559778190413e-06, + "loss": 0.6241, + "step": 1812 + }, + { + "epoch": 0.13328922217320982, + "grad_norm": 0.8998814225196838, + "learning_rate": 4.994553423244328e-06, + "loss": 0.6518, + "step": 1813 + }, + { + "epoch": 0.13336274077341567, + "grad_norm": 0.9124985933303833, + "learning_rate": 4.99454706459272e-06, + "loss": 0.6207, + "step": 1814 + }, + { + "epoch": 0.13343625937362152, + "grad_norm": 0.8833310008049011, + "learning_rate": 4.994540702235599e-06, + "loss": 0.6096, + "step": 1815 + }, + { + "epoch": 0.13350977797382738, + "grad_norm": 0.907128095626831, + "learning_rate": 4.994534336172974e-06, + "loss": 0.6067, + "step": 1816 + }, + { + "epoch": 0.13358329657403323, + "grad_norm": 0.9054633975028992, + "learning_rate": 4.994527966404855e-06, + "loss": 0.5891, + "step": 1817 + }, + { + "epoch": 0.13365681517423908, + "grad_norm": 0.9311162233352661, + "learning_rate": 4.9945215929312515e-06, + "loss": 0.6125, + "step": 1818 + }, + { + "epoch": 0.13373033377444493, + "grad_norm": 0.9355047941207886, + "learning_rate": 4.994515215752173e-06, + "loss": 0.5966, + "step": 1819 + }, + { + "epoch": 0.13380385237465078, + "grad_norm": 0.8695403933525085, + "learning_rate": 4.994508834867628e-06, + "loss": 0.6027, + "step": 1820 + }, + { + "epoch": 0.13387737097485664, + "grad_norm": 0.9194187521934509, + "learning_rate": 4.994502450277628e-06, + "loss": 0.6053, + "step": 1821 + }, + { + "epoch": 0.1339508895750625, + "grad_norm": 0.9171292185783386, + "learning_rate": 4.994496061982181e-06, + "loss": 0.5924, + "step": 1822 + }, + { + "epoch": 0.13402440817526834, + "grad_norm": 0.8752750158309937, + "learning_rate": 4.994489669981295e-06, + "loss": 0.6073, + "step": 1823 + }, + { + "epoch": 0.1340979267754742, + "grad_norm": 0.9116701483726501, + "learning_rate": 4.994483274274983e-06, + "loss": 0.6263, + "step": 1824 + }, + { + "epoch": 0.13417144537568004, + "grad_norm": 0.9056298136711121, + "learning_rate": 4.994476874863252e-06, + "loss": 0.6258, + "step": 1825 + }, + { + "epoch": 0.1342449639758859, + "grad_norm": 0.9290796518325806, + "learning_rate": 4.994470471746113e-06, + "loss": 0.6266, + "step": 1826 + }, + { + "epoch": 0.13431848257609175, + "grad_norm": 0.8338856101036072, + "learning_rate": 4.994464064923573e-06, + "loss": 0.592, + "step": 1827 + }, + { + "epoch": 0.1343920011762976, + "grad_norm": 0.8890321254730225, + "learning_rate": 4.994457654395645e-06, + "loss": 0.583, + "step": 1828 + }, + { + "epoch": 0.13446551977650345, + "grad_norm": 0.8820494413375854, + "learning_rate": 4.994451240162335e-06, + "loss": 0.6131, + "step": 1829 + }, + { + "epoch": 0.1345390383767093, + "grad_norm": 0.9167306423187256, + "learning_rate": 4.994444822223655e-06, + "loss": 0.6375, + "step": 1830 + }, + { + "epoch": 0.13461255697691515, + "grad_norm": 0.8909950256347656, + "learning_rate": 4.994438400579614e-06, + "loss": 0.6028, + "step": 1831 + }, + { + "epoch": 0.134686075577121, + "grad_norm": 0.9988374710083008, + "learning_rate": 4.9944319752302215e-06, + "loss": 0.6394, + "step": 1832 + }, + { + "epoch": 0.13475959417732686, + "grad_norm": 0.8921940922737122, + "learning_rate": 4.994425546175487e-06, + "loss": 0.6312, + "step": 1833 + }, + { + "epoch": 0.1348331127775327, + "grad_norm": 0.9203033447265625, + "learning_rate": 4.99441911341542e-06, + "loss": 0.6354, + "step": 1834 + }, + { + "epoch": 0.13490663137773856, + "grad_norm": 0.88136887550354, + "learning_rate": 4.994412676950029e-06, + "loss": 0.6544, + "step": 1835 + }, + { + "epoch": 0.1349801499779444, + "grad_norm": 0.8684060573577881, + "learning_rate": 4.9944062367793255e-06, + "loss": 0.6361, + "step": 1836 + }, + { + "epoch": 0.13505366857815027, + "grad_norm": 0.9016065001487732, + "learning_rate": 4.994399792903318e-06, + "loss": 0.6116, + "step": 1837 + }, + { + "epoch": 0.13512718717835612, + "grad_norm": 0.9060812592506409, + "learning_rate": 4.994393345322016e-06, + "loss": 0.5597, + "step": 1838 + }, + { + "epoch": 0.13520070577856197, + "grad_norm": 0.9442190527915955, + "learning_rate": 4.99438689403543e-06, + "loss": 0.6332, + "step": 1839 + }, + { + "epoch": 0.13527422437876782, + "grad_norm": 0.8760855793952942, + "learning_rate": 4.994380439043568e-06, + "loss": 0.5879, + "step": 1840 + }, + { + "epoch": 0.13534774297897367, + "grad_norm": 0.8795561194419861, + "learning_rate": 4.994373980346441e-06, + "loss": 0.5922, + "step": 1841 + }, + { + "epoch": 0.13542126157917952, + "grad_norm": 0.9152747988700867, + "learning_rate": 4.994367517944057e-06, + "loss": 0.6085, + "step": 1842 + }, + { + "epoch": 0.13549478017938538, + "grad_norm": 0.9391136765480042, + "learning_rate": 4.9943610518364275e-06, + "loss": 0.6296, + "step": 1843 + }, + { + "epoch": 0.13556829877959123, + "grad_norm": 0.8577148914337158, + "learning_rate": 4.994354582023561e-06, + "loss": 0.583, + "step": 1844 + }, + { + "epoch": 0.13564181737979708, + "grad_norm": 0.9253576993942261, + "learning_rate": 4.994348108505467e-06, + "loss": 0.6063, + "step": 1845 + }, + { + "epoch": 0.13571533598000293, + "grad_norm": 0.8630625009536743, + "learning_rate": 4.9943416312821565e-06, + "loss": 0.61, + "step": 1846 + }, + { + "epoch": 0.13578885458020878, + "grad_norm": 0.9434619545936584, + "learning_rate": 4.9943351503536375e-06, + "loss": 0.6448, + "step": 1847 + }, + { + "epoch": 0.13586237318041464, + "grad_norm": 0.9784210920333862, + "learning_rate": 4.9943286657199194e-06, + "loss": 0.6218, + "step": 1848 + }, + { + "epoch": 0.1359358917806205, + "grad_norm": 0.871012270450592, + "learning_rate": 4.994322177381014e-06, + "loss": 0.6125, + "step": 1849 + }, + { + "epoch": 0.13600941038082634, + "grad_norm": 0.9342452883720398, + "learning_rate": 4.994315685336929e-06, + "loss": 0.6215, + "step": 1850 + }, + { + "epoch": 0.1360829289810322, + "grad_norm": 0.8910565376281738, + "learning_rate": 4.994309189587675e-06, + "loss": 0.6036, + "step": 1851 + }, + { + "epoch": 0.13615644758123804, + "grad_norm": 0.9127495884895325, + "learning_rate": 4.994302690133261e-06, + "loss": 0.6525, + "step": 1852 + }, + { + "epoch": 0.1362299661814439, + "grad_norm": 0.8537747263908386, + "learning_rate": 4.9942961869736975e-06, + "loss": 0.5815, + "step": 1853 + }, + { + "epoch": 0.13630348478164975, + "grad_norm": 0.8783905506134033, + "learning_rate": 4.994289680108992e-06, + "loss": 0.5922, + "step": 1854 + }, + { + "epoch": 0.1363770033818556, + "grad_norm": 0.851421058177948, + "learning_rate": 4.994283169539157e-06, + "loss": 0.5612, + "step": 1855 + }, + { + "epoch": 0.13645052198206145, + "grad_norm": 0.8594253063201904, + "learning_rate": 4.994276655264201e-06, + "loss": 0.5792, + "step": 1856 + }, + { + "epoch": 0.1365240405822673, + "grad_norm": 0.9255067706108093, + "learning_rate": 4.994270137284133e-06, + "loss": 0.6162, + "step": 1857 + }, + { + "epoch": 0.13659755918247315, + "grad_norm": 0.8705329298973083, + "learning_rate": 4.994263615598964e-06, + "loss": 0.5712, + "step": 1858 + }, + { + "epoch": 0.136671077782679, + "grad_norm": 0.906637966632843, + "learning_rate": 4.994257090208702e-06, + "loss": 0.6142, + "step": 1859 + }, + { + "epoch": 0.13674459638288486, + "grad_norm": 0.9067385196685791, + "learning_rate": 4.994250561113359e-06, + "loss": 0.6119, + "step": 1860 + }, + { + "epoch": 0.1368181149830907, + "grad_norm": 0.8778902888298035, + "learning_rate": 4.994244028312943e-06, + "loss": 0.6264, + "step": 1861 + }, + { + "epoch": 0.13689163358329656, + "grad_norm": 0.8865839838981628, + "learning_rate": 4.994237491807463e-06, + "loss": 0.652, + "step": 1862 + }, + { + "epoch": 0.13696515218350241, + "grad_norm": 0.8757295608520508, + "learning_rate": 4.994230951596931e-06, + "loss": 0.5924, + "step": 1863 + }, + { + "epoch": 0.13703867078370827, + "grad_norm": 0.8621471524238586, + "learning_rate": 4.994224407681355e-06, + "loss": 0.5766, + "step": 1864 + }, + { + "epoch": 0.13711218938391412, + "grad_norm": 0.8681663274765015, + "learning_rate": 4.994217860060745e-06, + "loss": 0.609, + "step": 1865 + }, + { + "epoch": 0.13718570798411997, + "grad_norm": 0.8301835060119629, + "learning_rate": 4.994211308735111e-06, + "loss": 0.5873, + "step": 1866 + }, + { + "epoch": 0.13725922658432582, + "grad_norm": 0.9381612539291382, + "learning_rate": 4.994204753704463e-06, + "loss": 0.6192, + "step": 1867 + }, + { + "epoch": 0.13733274518453167, + "grad_norm": 0.9550758600234985, + "learning_rate": 4.9941981949688104e-06, + "loss": 0.5728, + "step": 1868 + }, + { + "epoch": 0.13740626378473753, + "grad_norm": 0.8731115460395813, + "learning_rate": 4.994191632528163e-06, + "loss": 0.6023, + "step": 1869 + }, + { + "epoch": 0.13747978238494338, + "grad_norm": 0.8721991181373596, + "learning_rate": 4.99418506638253e-06, + "loss": 0.5946, + "step": 1870 + }, + { + "epoch": 0.13755330098514923, + "grad_norm": 0.8587796092033386, + "learning_rate": 4.994178496531922e-06, + "loss": 0.6255, + "step": 1871 + }, + { + "epoch": 0.13762681958535508, + "grad_norm": 0.8944863677024841, + "learning_rate": 4.994171922976349e-06, + "loss": 0.6347, + "step": 1872 + }, + { + "epoch": 0.13770033818556093, + "grad_norm": 0.9064776301383972, + "learning_rate": 4.994165345715819e-06, + "loss": 0.5817, + "step": 1873 + }, + { + "epoch": 0.1377738567857668, + "grad_norm": 0.8829681277275085, + "learning_rate": 4.994158764750344e-06, + "loss": 0.5869, + "step": 1874 + }, + { + "epoch": 0.13784737538597266, + "grad_norm": 0.8605572581291199, + "learning_rate": 4.994152180079932e-06, + "loss": 0.6357, + "step": 1875 + }, + { + "epoch": 0.13792089398617852, + "grad_norm": 0.9462392926216125, + "learning_rate": 4.994145591704593e-06, + "loss": 0.6136, + "step": 1876 + }, + { + "epoch": 0.13799441258638437, + "grad_norm": 0.8767430186271667, + "learning_rate": 4.9941389996243385e-06, + "loss": 0.5814, + "step": 1877 + }, + { + "epoch": 0.13806793118659022, + "grad_norm": 0.8746331334114075, + "learning_rate": 4.994132403839177e-06, + "loss": 0.5911, + "step": 1878 + }, + { + "epoch": 0.13814144978679607, + "grad_norm": 0.8845453262329102, + "learning_rate": 4.994125804349118e-06, + "loss": 0.5828, + "step": 1879 + }, + { + "epoch": 0.13821496838700192, + "grad_norm": 0.9269023537635803, + "learning_rate": 4.994119201154172e-06, + "loss": 0.6506, + "step": 1880 + }, + { + "epoch": 0.13828848698720778, + "grad_norm": 0.8931878209114075, + "learning_rate": 4.994112594254349e-06, + "loss": 0.6105, + "step": 1881 + }, + { + "epoch": 0.13836200558741363, + "grad_norm": 0.8935298323631287, + "learning_rate": 4.994105983649657e-06, + "loss": 0.6142, + "step": 1882 + }, + { + "epoch": 0.13843552418761948, + "grad_norm": 0.8714648485183716, + "learning_rate": 4.994099369340107e-06, + "loss": 0.5925, + "step": 1883 + }, + { + "epoch": 0.13850904278782533, + "grad_norm": 0.9478265643119812, + "learning_rate": 4.99409275132571e-06, + "loss": 0.6164, + "step": 1884 + }, + { + "epoch": 0.13858256138803118, + "grad_norm": 0.8700025677680969, + "learning_rate": 4.994086129606475e-06, + "loss": 0.5856, + "step": 1885 + }, + { + "epoch": 0.13865607998823704, + "grad_norm": 0.8605608344078064, + "learning_rate": 4.994079504182411e-06, + "loss": 0.5989, + "step": 1886 + }, + { + "epoch": 0.1387295985884429, + "grad_norm": 0.855316698551178, + "learning_rate": 4.994072875053528e-06, + "loss": 0.6078, + "step": 1887 + }, + { + "epoch": 0.13880311718864874, + "grad_norm": 0.8653114438056946, + "learning_rate": 4.9940662422198376e-06, + "loss": 0.6245, + "step": 1888 + }, + { + "epoch": 0.1388766357888546, + "grad_norm": 0.8961775302886963, + "learning_rate": 4.9940596056813474e-06, + "loss": 0.5704, + "step": 1889 + }, + { + "epoch": 0.13895015438906044, + "grad_norm": 0.9326402544975281, + "learning_rate": 4.994052965438069e-06, + "loss": 0.5924, + "step": 1890 + }, + { + "epoch": 0.1390236729892663, + "grad_norm": 0.8850392699241638, + "learning_rate": 4.99404632149001e-06, + "loss": 0.6092, + "step": 1891 + }, + { + "epoch": 0.13909719158947215, + "grad_norm": 0.905234694480896, + "learning_rate": 4.994039673837183e-06, + "loss": 0.5799, + "step": 1892 + }, + { + "epoch": 0.139170710189678, + "grad_norm": 0.91145259141922, + "learning_rate": 4.9940330224795965e-06, + "loss": 0.6121, + "step": 1893 + }, + { + "epoch": 0.13924422878988385, + "grad_norm": 0.9142112135887146, + "learning_rate": 4.994026367417261e-06, + "loss": 0.5437, + "step": 1894 + }, + { + "epoch": 0.1393177473900897, + "grad_norm": 0.9689028859138489, + "learning_rate": 4.994019708650185e-06, + "loss": 0.6235, + "step": 1895 + }, + { + "epoch": 0.13939126599029555, + "grad_norm": 0.9668759107589722, + "learning_rate": 4.99401304617838e-06, + "loss": 0.6536, + "step": 1896 + }, + { + "epoch": 0.1394647845905014, + "grad_norm": 0.8808347582817078, + "learning_rate": 4.994006380001855e-06, + "loss": 0.5862, + "step": 1897 + }, + { + "epoch": 0.13953830319070726, + "grad_norm": 0.9245070219039917, + "learning_rate": 4.993999710120619e-06, + "loss": 0.627, + "step": 1898 + }, + { + "epoch": 0.1396118217909131, + "grad_norm": 0.9375527501106262, + "learning_rate": 4.993993036534684e-06, + "loss": 0.6637, + "step": 1899 + }, + { + "epoch": 0.13968534039111896, + "grad_norm": 0.9326124787330627, + "learning_rate": 4.993986359244058e-06, + "loss": 0.6194, + "step": 1900 + }, + { + "epoch": 0.1397588589913248, + "grad_norm": 0.8499554395675659, + "learning_rate": 4.993979678248753e-06, + "loss": 0.5895, + "step": 1901 + }, + { + "epoch": 0.13983237759153067, + "grad_norm": 0.904549241065979, + "learning_rate": 4.993972993548778e-06, + "loss": 0.588, + "step": 1902 + }, + { + "epoch": 0.13990589619173652, + "grad_norm": 0.9257004857063293, + "learning_rate": 4.993966305144142e-06, + "loss": 0.6133, + "step": 1903 + }, + { + "epoch": 0.13997941479194237, + "grad_norm": 0.878681480884552, + "learning_rate": 4.993959613034855e-06, + "loss": 0.5705, + "step": 1904 + }, + { + "epoch": 0.14005293339214822, + "grad_norm": 0.9429702162742615, + "learning_rate": 4.993952917220929e-06, + "loss": 0.5826, + "step": 1905 + }, + { + "epoch": 0.14012645199235407, + "grad_norm": 0.9814742803573608, + "learning_rate": 4.9939462177023716e-06, + "loss": 0.589, + "step": 1906 + }, + { + "epoch": 0.14019997059255992, + "grad_norm": 0.8545621633529663, + "learning_rate": 4.993939514479193e-06, + "loss": 0.61, + "step": 1907 + }, + { + "epoch": 0.14027348919276578, + "grad_norm": 0.8721655607223511, + "learning_rate": 4.993932807551406e-06, + "loss": 0.6142, + "step": 1908 + }, + { + "epoch": 0.14034700779297163, + "grad_norm": 0.8901684284210205, + "learning_rate": 4.993926096919016e-06, + "loss": 0.5999, + "step": 1909 + }, + { + "epoch": 0.14042052639317748, + "grad_norm": 0.8806648254394531, + "learning_rate": 4.993919382582036e-06, + "loss": 0.5671, + "step": 1910 + }, + { + "epoch": 0.14049404499338333, + "grad_norm": 0.8331906199455261, + "learning_rate": 4.993912664540476e-06, + "loss": 0.6077, + "step": 1911 + }, + { + "epoch": 0.14056756359358918, + "grad_norm": 0.8843333125114441, + "learning_rate": 4.993905942794345e-06, + "loss": 0.5978, + "step": 1912 + }, + { + "epoch": 0.14064108219379504, + "grad_norm": 0.8822590708732605, + "learning_rate": 4.993899217343654e-06, + "loss": 0.5893, + "step": 1913 + }, + { + "epoch": 0.1407146007940009, + "grad_norm": 0.8990853428840637, + "learning_rate": 4.993892488188411e-06, + "loss": 0.5768, + "step": 1914 + }, + { + "epoch": 0.14078811939420674, + "grad_norm": 0.8738876581192017, + "learning_rate": 4.993885755328628e-06, + "loss": 0.5483, + "step": 1915 + }, + { + "epoch": 0.1408616379944126, + "grad_norm": 0.8884310722351074, + "learning_rate": 4.993879018764314e-06, + "loss": 0.6162, + "step": 1916 + }, + { + "epoch": 0.14093515659461844, + "grad_norm": 0.8749534487724304, + "learning_rate": 4.99387227849548e-06, + "loss": 0.5874, + "step": 1917 + }, + { + "epoch": 0.1410086751948243, + "grad_norm": 0.8792432546615601, + "learning_rate": 4.993865534522134e-06, + "loss": 0.5595, + "step": 1918 + }, + { + "epoch": 0.14108219379503015, + "grad_norm": 0.8871124386787415, + "learning_rate": 4.993858786844288e-06, + "loss": 0.5655, + "step": 1919 + }, + { + "epoch": 0.141155712395236, + "grad_norm": 0.9621310234069824, + "learning_rate": 4.993852035461951e-06, + "loss": 0.6157, + "step": 1920 + }, + { + "epoch": 0.14122923099544185, + "grad_norm": 0.9494878649711609, + "learning_rate": 4.993845280375133e-06, + "loss": 0.6108, + "step": 1921 + }, + { + "epoch": 0.1413027495956477, + "grad_norm": 0.8679466843605042, + "learning_rate": 4.993838521583846e-06, + "loss": 0.6226, + "step": 1922 + }, + { + "epoch": 0.14137626819585355, + "grad_norm": 0.9312536716461182, + "learning_rate": 4.9938317590880966e-06, + "loss": 0.6464, + "step": 1923 + }, + { + "epoch": 0.1414497867960594, + "grad_norm": 0.8939416408538818, + "learning_rate": 4.993824992887898e-06, + "loss": 0.6033, + "step": 1924 + }, + { + "epoch": 0.14152330539626526, + "grad_norm": 0.9010453820228577, + "learning_rate": 4.9938182229832585e-06, + "loss": 0.6127, + "step": 1925 + }, + { + "epoch": 0.1415968239964711, + "grad_norm": 0.9062104225158691, + "learning_rate": 4.9938114493741885e-06, + "loss": 0.5885, + "step": 1926 + }, + { + "epoch": 0.14167034259667696, + "grad_norm": 0.8978317975997925, + "learning_rate": 4.993804672060698e-06, + "loss": 0.6372, + "step": 1927 + }, + { + "epoch": 0.14174386119688281, + "grad_norm": 0.8746522665023804, + "learning_rate": 4.993797891042798e-06, + "loss": 0.6312, + "step": 1928 + }, + { + "epoch": 0.14181737979708867, + "grad_norm": 0.8768738508224487, + "learning_rate": 4.993791106320497e-06, + "loss": 0.587, + "step": 1929 + }, + { + "epoch": 0.14189089839729452, + "grad_norm": 0.8828332424163818, + "learning_rate": 4.993784317893806e-06, + "loss": 0.57, + "step": 1930 + }, + { + "epoch": 0.14196441699750037, + "grad_norm": 0.8950077295303345, + "learning_rate": 4.9937775257627354e-06, + "loss": 0.6541, + "step": 1931 + }, + { + "epoch": 0.14203793559770622, + "grad_norm": 0.9518096446990967, + "learning_rate": 4.993770729927294e-06, + "loss": 0.6331, + "step": 1932 + }, + { + "epoch": 0.14211145419791207, + "grad_norm": 0.8729810118675232, + "learning_rate": 4.993763930387494e-06, + "loss": 0.6165, + "step": 1933 + }, + { + "epoch": 0.14218497279811793, + "grad_norm": 0.9291284084320068, + "learning_rate": 4.993757127143343e-06, + "loss": 0.5928, + "step": 1934 + }, + { + "epoch": 0.14225849139832378, + "grad_norm": 0.8924115896224976, + "learning_rate": 4.993750320194853e-06, + "loss": 0.6156, + "step": 1935 + }, + { + "epoch": 0.14233200999852963, + "grad_norm": 0.9754784107208252, + "learning_rate": 4.993743509542033e-06, + "loss": 0.6363, + "step": 1936 + }, + { + "epoch": 0.14240552859873548, + "grad_norm": 0.8575875163078308, + "learning_rate": 4.993736695184894e-06, + "loss": 0.5762, + "step": 1937 + }, + { + "epoch": 0.14247904719894133, + "grad_norm": 0.860281229019165, + "learning_rate": 4.993729877123446e-06, + "loss": 0.588, + "step": 1938 + }, + { + "epoch": 0.14255256579914719, + "grad_norm": 0.8908127546310425, + "learning_rate": 4.993723055357697e-06, + "loss": 0.5918, + "step": 1939 + }, + { + "epoch": 0.14262608439935304, + "grad_norm": 0.9038593173027039, + "learning_rate": 4.9937162298876605e-06, + "loss": 0.591, + "step": 1940 + }, + { + "epoch": 0.1426996029995589, + "grad_norm": 0.8887065649032593, + "learning_rate": 4.993709400713345e-06, + "loss": 0.5816, + "step": 1941 + }, + { + "epoch": 0.14277312159976474, + "grad_norm": 0.953069806098938, + "learning_rate": 4.993702567834761e-06, + "loss": 0.6614, + "step": 1942 + }, + { + "epoch": 0.1428466401999706, + "grad_norm": 0.8680023550987244, + "learning_rate": 4.993695731251918e-06, + "loss": 0.6139, + "step": 1943 + }, + { + "epoch": 0.14292015880017644, + "grad_norm": 0.8614834547042847, + "learning_rate": 4.993688890964826e-06, + "loss": 0.5732, + "step": 1944 + }, + { + "epoch": 0.1429936774003823, + "grad_norm": 0.8813884258270264, + "learning_rate": 4.993682046973497e-06, + "loss": 0.6463, + "step": 1945 + }, + { + "epoch": 0.14306719600058815, + "grad_norm": 0.8664059042930603, + "learning_rate": 4.993675199277938e-06, + "loss": 0.6245, + "step": 1946 + }, + { + "epoch": 0.143140714600794, + "grad_norm": 0.8320324420928955, + "learning_rate": 4.993668347878164e-06, + "loss": 0.5156, + "step": 1947 + }, + { + "epoch": 0.14321423320099985, + "grad_norm": 0.9018843173980713, + "learning_rate": 4.9936614927741794e-06, + "loss": 0.6066, + "step": 1948 + }, + { + "epoch": 0.1432877518012057, + "grad_norm": 0.9349761009216309, + "learning_rate": 4.993654633965999e-06, + "loss": 0.5968, + "step": 1949 + }, + { + "epoch": 0.14336127040141156, + "grad_norm": 0.890771210193634, + "learning_rate": 4.99364777145363e-06, + "loss": 0.5942, + "step": 1950 + }, + { + "epoch": 0.1434347890016174, + "grad_norm": 0.8847559094429016, + "learning_rate": 4.993640905237084e-06, + "loss": 0.6161, + "step": 1951 + }, + { + "epoch": 0.14350830760182326, + "grad_norm": 0.8344963192939758, + "learning_rate": 4.993634035316371e-06, + "loss": 0.5608, + "step": 1952 + }, + { + "epoch": 0.1435818262020291, + "grad_norm": 0.8413221836090088, + "learning_rate": 4.993627161691501e-06, + "loss": 0.6074, + "step": 1953 + }, + { + "epoch": 0.14365534480223496, + "grad_norm": 0.9319347739219666, + "learning_rate": 4.993620284362485e-06, + "loss": 0.6362, + "step": 1954 + }, + { + "epoch": 0.14372886340244082, + "grad_norm": 0.9383192658424377, + "learning_rate": 4.993613403329332e-06, + "loss": 0.6304, + "step": 1955 + }, + { + "epoch": 0.14380238200264667, + "grad_norm": 0.8909980058670044, + "learning_rate": 4.993606518592053e-06, + "loss": 0.5536, + "step": 1956 + }, + { + "epoch": 0.14387590060285252, + "grad_norm": 0.9530027508735657, + "learning_rate": 4.993599630150658e-06, + "loss": 0.6029, + "step": 1957 + }, + { + "epoch": 0.14394941920305837, + "grad_norm": 0.9471220374107361, + "learning_rate": 4.993592738005159e-06, + "loss": 0.6298, + "step": 1958 + }, + { + "epoch": 0.14402293780326422, + "grad_norm": 0.9612910747528076, + "learning_rate": 4.993585842155562e-06, + "loss": 0.5933, + "step": 1959 + }, + { + "epoch": 0.14409645640347007, + "grad_norm": 0.8628808259963989, + "learning_rate": 4.993578942601881e-06, + "loss": 0.5797, + "step": 1960 + }, + { + "epoch": 0.14416997500367593, + "grad_norm": 0.9250409007072449, + "learning_rate": 4.993572039344125e-06, + "loss": 0.6407, + "step": 1961 + }, + { + "epoch": 0.14424349360388178, + "grad_norm": 0.9235482215881348, + "learning_rate": 4.993565132382304e-06, + "loss": 0.5808, + "step": 1962 + }, + { + "epoch": 0.14431701220408763, + "grad_norm": 0.9629660844802856, + "learning_rate": 4.993558221716429e-06, + "loss": 0.656, + "step": 1963 + }, + { + "epoch": 0.14439053080429348, + "grad_norm": 0.9031004905700684, + "learning_rate": 4.993551307346509e-06, + "loss": 0.6195, + "step": 1964 + }, + { + "epoch": 0.14446404940449933, + "grad_norm": 0.878551185131073, + "learning_rate": 4.993544389272555e-06, + "loss": 0.5908, + "step": 1965 + }, + { + "epoch": 0.14453756800470519, + "grad_norm": 0.9374029040336609, + "learning_rate": 4.993537467494579e-06, + "loss": 0.6407, + "step": 1966 + }, + { + "epoch": 0.14461108660491104, + "grad_norm": 0.9224410057067871, + "learning_rate": 4.993530542012588e-06, + "loss": 0.662, + "step": 1967 + }, + { + "epoch": 0.1446846052051169, + "grad_norm": 0.8922368288040161, + "learning_rate": 4.993523612826595e-06, + "loss": 0.6229, + "step": 1968 + }, + { + "epoch": 0.14475812380532274, + "grad_norm": 0.8821583390235901, + "learning_rate": 4.993516679936609e-06, + "loss": 0.6008, + "step": 1969 + }, + { + "epoch": 0.1448316424055286, + "grad_norm": 0.8507670164108276, + "learning_rate": 4.993509743342639e-06, + "loss": 0.5876, + "step": 1970 + }, + { + "epoch": 0.14490516100573445, + "grad_norm": 0.9142316579818726, + "learning_rate": 4.993502803044699e-06, + "loss": 0.583, + "step": 1971 + }, + { + "epoch": 0.1449786796059403, + "grad_norm": 1.0007017850875854, + "learning_rate": 4.993495859042796e-06, + "loss": 0.6284, + "step": 1972 + }, + { + "epoch": 0.14505219820614615, + "grad_norm": 0.8320895433425903, + "learning_rate": 4.993488911336941e-06, + "loss": 0.5542, + "step": 1973 + }, + { + "epoch": 0.145125716806352, + "grad_norm": 0.8584206104278564, + "learning_rate": 4.993481959927145e-06, + "loss": 0.618, + "step": 1974 + }, + { + "epoch": 0.14519923540655785, + "grad_norm": 0.8634738922119141, + "learning_rate": 4.993475004813419e-06, + "loss": 0.6361, + "step": 1975 + }, + { + "epoch": 0.1452727540067637, + "grad_norm": 0.8374816179275513, + "learning_rate": 4.993468045995772e-06, + "loss": 0.5827, + "step": 1976 + }, + { + "epoch": 0.14534627260696956, + "grad_norm": 0.8908120393753052, + "learning_rate": 4.993461083474215e-06, + "loss": 0.597, + "step": 1977 + }, + { + "epoch": 0.1454197912071754, + "grad_norm": 0.9134806394577026, + "learning_rate": 4.9934541172487575e-06, + "loss": 0.6508, + "step": 1978 + }, + { + "epoch": 0.14549330980738126, + "grad_norm": 0.9322769045829773, + "learning_rate": 4.9934471473194106e-06, + "loss": 0.6419, + "step": 1979 + }, + { + "epoch": 0.1455668284075871, + "grad_norm": 0.9442958235740662, + "learning_rate": 4.993440173686185e-06, + "loss": 0.6738, + "step": 1980 + }, + { + "epoch": 0.14564034700779296, + "grad_norm": 0.8557815551757812, + "learning_rate": 4.99343319634909e-06, + "loss": 0.6108, + "step": 1981 + }, + { + "epoch": 0.14571386560799882, + "grad_norm": 0.8601601123809814, + "learning_rate": 4.9934262153081364e-06, + "loss": 0.5502, + "step": 1982 + }, + { + "epoch": 0.14578738420820467, + "grad_norm": 0.9286725521087646, + "learning_rate": 4.993419230563335e-06, + "loss": 0.5891, + "step": 1983 + }, + { + "epoch": 0.14586090280841052, + "grad_norm": 0.8827030658721924, + "learning_rate": 4.9934122421146955e-06, + "loss": 0.6188, + "step": 1984 + }, + { + "epoch": 0.14593442140861637, + "grad_norm": 0.8618441224098206, + "learning_rate": 4.99340524996223e-06, + "loss": 0.5962, + "step": 1985 + }, + { + "epoch": 0.14600794000882222, + "grad_norm": 0.9042010307312012, + "learning_rate": 4.993398254105945e-06, + "loss": 0.6428, + "step": 1986 + }, + { + "epoch": 0.14608145860902808, + "grad_norm": 0.9133428931236267, + "learning_rate": 4.993391254545855e-06, + "loss": 0.6145, + "step": 1987 + }, + { + "epoch": 0.14615497720923393, + "grad_norm": 0.8925638794898987, + "learning_rate": 4.99338425128197e-06, + "loss": 0.5795, + "step": 1988 + }, + { + "epoch": 0.14622849580943978, + "grad_norm": 0.8527039885520935, + "learning_rate": 4.993377244314297e-06, + "loss": 0.578, + "step": 1989 + }, + { + "epoch": 0.14630201440964563, + "grad_norm": 0.9356743693351746, + "learning_rate": 4.99337023364285e-06, + "loss": 0.6359, + "step": 1990 + }, + { + "epoch": 0.14637553300985148, + "grad_norm": 0.9220222234725952, + "learning_rate": 4.993363219267637e-06, + "loss": 0.6146, + "step": 1991 + }, + { + "epoch": 0.14644905161005733, + "grad_norm": 0.8928828835487366, + "learning_rate": 4.993356201188671e-06, + "loss": 0.6259, + "step": 1992 + }, + { + "epoch": 0.1465225702102632, + "grad_norm": 0.8752000331878662, + "learning_rate": 4.99334917940596e-06, + "loss": 0.597, + "step": 1993 + }, + { + "epoch": 0.14659608881046904, + "grad_norm": 0.8265818953514099, + "learning_rate": 4.9933421539195144e-06, + "loss": 0.5964, + "step": 1994 + }, + { + "epoch": 0.1466696074106749, + "grad_norm": 0.8600594401359558, + "learning_rate": 4.993335124729347e-06, + "loss": 0.6021, + "step": 1995 + }, + { + "epoch": 0.14674312601088074, + "grad_norm": 0.89439857006073, + "learning_rate": 4.993328091835466e-06, + "loss": 0.6194, + "step": 1996 + }, + { + "epoch": 0.1468166446110866, + "grad_norm": 0.9430837631225586, + "learning_rate": 4.993321055237883e-06, + "loss": 0.6579, + "step": 1997 + }, + { + "epoch": 0.14689016321129245, + "grad_norm": 0.8811208605766296, + "learning_rate": 4.993314014936609e-06, + "loss": 0.5995, + "step": 1998 + }, + { + "epoch": 0.1469636818114983, + "grad_norm": 0.9167332053184509, + "learning_rate": 4.993306970931652e-06, + "loss": 0.6101, + "step": 1999 + }, + { + "epoch": 0.14703720041170415, + "grad_norm": 0.8356183171272278, + "learning_rate": 4.993299923223025e-06, + "loss": 0.5743, + "step": 2000 + }, + { + "epoch": 0.14711071901191, + "grad_norm": 0.8486096262931824, + "learning_rate": 4.993292871810737e-06, + "loss": 0.5888, + "step": 2001 + }, + { + "epoch": 0.14718423761211585, + "grad_norm": 0.8856045007705688, + "learning_rate": 4.9932858166947994e-06, + "loss": 0.635, + "step": 2002 + }, + { + "epoch": 0.1472577562123217, + "grad_norm": 0.928325891494751, + "learning_rate": 4.993278757875221e-06, + "loss": 0.6768, + "step": 2003 + }, + { + "epoch": 0.14733127481252756, + "grad_norm": 0.9372091293334961, + "learning_rate": 4.9932716953520155e-06, + "loss": 0.6635, + "step": 2004 + }, + { + "epoch": 0.1474047934127334, + "grad_norm": 0.926615834236145, + "learning_rate": 4.99326462912519e-06, + "loss": 0.5885, + "step": 2005 + }, + { + "epoch": 0.14747831201293926, + "grad_norm": 0.9090005159378052, + "learning_rate": 4.9932575591947574e-06, + "loss": 0.6077, + "step": 2006 + }, + { + "epoch": 0.1475518306131451, + "grad_norm": 0.8946206569671631, + "learning_rate": 4.9932504855607265e-06, + "loss": 0.6024, + "step": 2007 + }, + { + "epoch": 0.14762534921335096, + "grad_norm": 0.8989397883415222, + "learning_rate": 4.993243408223108e-06, + "loss": 0.596, + "step": 2008 + }, + { + "epoch": 0.14769886781355682, + "grad_norm": 0.8409633040428162, + "learning_rate": 4.993236327181914e-06, + "loss": 0.5734, + "step": 2009 + }, + { + "epoch": 0.14777238641376267, + "grad_norm": 0.89621502161026, + "learning_rate": 4.9932292424371544e-06, + "loss": 0.6072, + "step": 2010 + }, + { + "epoch": 0.14784590501396852, + "grad_norm": 0.9188008904457092, + "learning_rate": 4.993222153988838e-06, + "loss": 0.6342, + "step": 2011 + }, + { + "epoch": 0.14791942361417437, + "grad_norm": 0.846118688583374, + "learning_rate": 4.993215061836977e-06, + "loss": 0.6091, + "step": 2012 + }, + { + "epoch": 0.14799294221438025, + "grad_norm": 0.872322678565979, + "learning_rate": 4.9932079659815815e-06, + "loss": 0.6042, + "step": 2013 + }, + { + "epoch": 0.1480664608145861, + "grad_norm": 0.8632361888885498, + "learning_rate": 4.993200866422663e-06, + "loss": 0.5991, + "step": 2014 + }, + { + "epoch": 0.14813997941479196, + "grad_norm": 0.8693488836288452, + "learning_rate": 4.99319376316023e-06, + "loss": 0.5872, + "step": 2015 + }, + { + "epoch": 0.1482134980149978, + "grad_norm": 0.87595534324646, + "learning_rate": 4.993186656194295e-06, + "loss": 0.5789, + "step": 2016 + }, + { + "epoch": 0.14828701661520366, + "grad_norm": 0.9076812267303467, + "learning_rate": 4.993179545524866e-06, + "loss": 0.6287, + "step": 2017 + }, + { + "epoch": 0.1483605352154095, + "grad_norm": 0.8796176910400391, + "learning_rate": 4.993172431151957e-06, + "loss": 0.6213, + "step": 2018 + }, + { + "epoch": 0.14843405381561536, + "grad_norm": 0.8926225900650024, + "learning_rate": 4.993165313075577e-06, + "loss": 0.625, + "step": 2019 + }, + { + "epoch": 0.14850757241582122, + "grad_norm": 0.8614782691001892, + "learning_rate": 4.993158191295736e-06, + "loss": 0.6285, + "step": 2020 + }, + { + "epoch": 0.14858109101602707, + "grad_norm": 0.895997941493988, + "learning_rate": 4.993151065812445e-06, + "loss": 0.5966, + "step": 2021 + }, + { + "epoch": 0.14865460961623292, + "grad_norm": 0.9744428396224976, + "learning_rate": 4.993143936625715e-06, + "loss": 0.6022, + "step": 2022 + }, + { + "epoch": 0.14872812821643877, + "grad_norm": 0.8732846975326538, + "learning_rate": 4.993136803735555e-06, + "loss": 0.6203, + "step": 2023 + }, + { + "epoch": 0.14880164681664462, + "grad_norm": 0.8611468076705933, + "learning_rate": 4.993129667141979e-06, + "loss": 0.5948, + "step": 2024 + }, + { + "epoch": 0.14887516541685047, + "grad_norm": 0.8625207543373108, + "learning_rate": 4.993122526844993e-06, + "loss": 0.6068, + "step": 2025 + }, + { + "epoch": 0.14894868401705633, + "grad_norm": 0.9068389534950256, + "learning_rate": 4.993115382844611e-06, + "loss": 0.6328, + "step": 2026 + }, + { + "epoch": 0.14902220261726218, + "grad_norm": 0.8804337978363037, + "learning_rate": 4.993108235140842e-06, + "loss": 0.6006, + "step": 2027 + }, + { + "epoch": 0.14909572121746803, + "grad_norm": 0.8437624573707581, + "learning_rate": 4.9931010837336986e-06, + "loss": 0.6124, + "step": 2028 + }, + { + "epoch": 0.14916923981767388, + "grad_norm": 0.8493860960006714, + "learning_rate": 4.99309392862319e-06, + "loss": 0.5935, + "step": 2029 + }, + { + "epoch": 0.14924275841787973, + "grad_norm": 0.927146315574646, + "learning_rate": 4.993086769809326e-06, + "loss": 0.5984, + "step": 2030 + }, + { + "epoch": 0.14931627701808559, + "grad_norm": 0.9044570326805115, + "learning_rate": 4.993079607292117e-06, + "loss": 0.5476, + "step": 2031 + }, + { + "epoch": 0.14938979561829144, + "grad_norm": 0.8480664491653442, + "learning_rate": 4.993072441071577e-06, + "loss": 0.6015, + "step": 2032 + }, + { + "epoch": 0.1494633142184973, + "grad_norm": 0.8963684439659119, + "learning_rate": 4.993065271147714e-06, + "loss": 0.5941, + "step": 2033 + }, + { + "epoch": 0.14953683281870314, + "grad_norm": 0.9184762239456177, + "learning_rate": 4.993058097520538e-06, + "loss": 0.5758, + "step": 2034 + }, + { + "epoch": 0.149610351418909, + "grad_norm": 0.9291899800300598, + "learning_rate": 4.993050920190061e-06, + "loss": 0.6487, + "step": 2035 + }, + { + "epoch": 0.14968387001911485, + "grad_norm": 0.8696863651275635, + "learning_rate": 4.9930437391562935e-06, + "loss": 0.5997, + "step": 2036 + }, + { + "epoch": 0.1497573886193207, + "grad_norm": 0.9055871367454529, + "learning_rate": 4.993036554419246e-06, + "loss": 0.6509, + "step": 2037 + }, + { + "epoch": 0.14983090721952655, + "grad_norm": 0.916183590888977, + "learning_rate": 4.9930293659789295e-06, + "loss": 0.6303, + "step": 2038 + }, + { + "epoch": 0.1499044258197324, + "grad_norm": 0.8911793828010559, + "learning_rate": 4.993022173835354e-06, + "loss": 0.6313, + "step": 2039 + }, + { + "epoch": 0.14997794441993825, + "grad_norm": 0.9066827893257141, + "learning_rate": 4.993014977988531e-06, + "loss": 0.5978, + "step": 2040 + }, + { + "epoch": 0.1500514630201441, + "grad_norm": 0.8701024651527405, + "learning_rate": 4.993007778438471e-06, + "loss": 0.586, + "step": 2041 + }, + { + "epoch": 0.15012498162034996, + "grad_norm": 0.8983065485954285, + "learning_rate": 4.993000575185184e-06, + "loss": 0.6082, + "step": 2042 + }, + { + "epoch": 0.1501985002205558, + "grad_norm": 0.8586896657943726, + "learning_rate": 4.992993368228681e-06, + "loss": 0.6322, + "step": 2043 + }, + { + "epoch": 0.15027201882076166, + "grad_norm": 0.8237684369087219, + "learning_rate": 4.992986157568973e-06, + "loss": 0.5849, + "step": 2044 + }, + { + "epoch": 0.1503455374209675, + "grad_norm": 0.885899007320404, + "learning_rate": 4.992978943206071e-06, + "loss": 0.6203, + "step": 2045 + }, + { + "epoch": 0.15041905602117336, + "grad_norm": 0.9218222498893738, + "learning_rate": 4.992971725139985e-06, + "loss": 0.6185, + "step": 2046 + }, + { + "epoch": 0.15049257462137922, + "grad_norm": 0.9616694450378418, + "learning_rate": 4.992964503370726e-06, + "loss": 0.6274, + "step": 2047 + }, + { + "epoch": 0.15056609322158507, + "grad_norm": 0.8953124284744263, + "learning_rate": 4.992957277898305e-06, + "loss": 0.5738, + "step": 2048 + }, + { + "epoch": 0.15063961182179092, + "grad_norm": 0.8975686430931091, + "learning_rate": 4.992950048722732e-06, + "loss": 0.6368, + "step": 2049 + }, + { + "epoch": 0.15071313042199677, + "grad_norm": 0.8826925754547119, + "learning_rate": 4.9929428158440184e-06, + "loss": 0.6115, + "step": 2050 + }, + { + "epoch": 0.15078664902220262, + "grad_norm": 0.8765580654144287, + "learning_rate": 4.992935579262175e-06, + "loss": 0.5872, + "step": 2051 + }, + { + "epoch": 0.15086016762240848, + "grad_norm": 0.905973494052887, + "learning_rate": 4.992928338977212e-06, + "loss": 0.6243, + "step": 2052 + }, + { + "epoch": 0.15093368622261433, + "grad_norm": 0.9011672139167786, + "learning_rate": 4.99292109498914e-06, + "loss": 0.6411, + "step": 2053 + }, + { + "epoch": 0.15100720482282018, + "grad_norm": 0.8848375678062439, + "learning_rate": 4.992913847297971e-06, + "loss": 0.6118, + "step": 2054 + }, + { + "epoch": 0.15108072342302603, + "grad_norm": 0.8705130219459534, + "learning_rate": 4.992906595903715e-06, + "loss": 0.61, + "step": 2055 + }, + { + "epoch": 0.15115424202323188, + "grad_norm": 0.9036268591880798, + "learning_rate": 4.992899340806383e-06, + "loss": 0.5957, + "step": 2056 + }, + { + "epoch": 0.15122776062343773, + "grad_norm": 0.86445152759552, + "learning_rate": 4.992892082005984e-06, + "loss": 0.6222, + "step": 2057 + }, + { + "epoch": 0.1513012792236436, + "grad_norm": 0.8816419839859009, + "learning_rate": 4.992884819502531e-06, + "loss": 0.6104, + "step": 2058 + }, + { + "epoch": 0.15137479782384944, + "grad_norm": 0.8453980684280396, + "learning_rate": 4.992877553296035e-06, + "loss": 0.6066, + "step": 2059 + }, + { + "epoch": 0.1514483164240553, + "grad_norm": 0.8719300031661987, + "learning_rate": 4.992870283386505e-06, + "loss": 0.5918, + "step": 2060 + }, + { + "epoch": 0.15152183502426114, + "grad_norm": 0.8753029704093933, + "learning_rate": 4.9928630097739535e-06, + "loss": 0.5924, + "step": 2061 + }, + { + "epoch": 0.151595353624467, + "grad_norm": 0.8714781999588013, + "learning_rate": 4.9928557324583905e-06, + "loss": 0.5957, + "step": 2062 + }, + { + "epoch": 0.15166887222467285, + "grad_norm": 0.8990970849990845, + "learning_rate": 4.9928484514398265e-06, + "loss": 0.6255, + "step": 2063 + }, + { + "epoch": 0.1517423908248787, + "grad_norm": 0.9032968282699585, + "learning_rate": 4.9928411667182716e-06, + "loss": 0.6192, + "step": 2064 + }, + { + "epoch": 0.15181590942508455, + "grad_norm": 0.8877323269844055, + "learning_rate": 4.992833878293739e-06, + "loss": 0.6029, + "step": 2065 + }, + { + "epoch": 0.1518894280252904, + "grad_norm": 0.9103398323059082, + "learning_rate": 4.992826586166238e-06, + "loss": 0.6335, + "step": 2066 + }, + { + "epoch": 0.15196294662549625, + "grad_norm": 0.9473859071731567, + "learning_rate": 4.99281929033578e-06, + "loss": 0.5869, + "step": 2067 + }, + { + "epoch": 0.1520364652257021, + "grad_norm": 0.8974996209144592, + "learning_rate": 4.992811990802374e-06, + "loss": 0.6138, + "step": 2068 + }, + { + "epoch": 0.15210998382590796, + "grad_norm": 0.8633518218994141, + "learning_rate": 4.992804687566033e-06, + "loss": 0.5937, + "step": 2069 + }, + { + "epoch": 0.1521835024261138, + "grad_norm": 0.8852039575576782, + "learning_rate": 4.992797380626767e-06, + "loss": 0.6026, + "step": 2070 + }, + { + "epoch": 0.15225702102631966, + "grad_norm": 0.937819242477417, + "learning_rate": 4.992790069984588e-06, + "loss": 0.6376, + "step": 2071 + }, + { + "epoch": 0.1523305396265255, + "grad_norm": 0.9038795828819275, + "learning_rate": 4.992782755639505e-06, + "loss": 0.604, + "step": 2072 + }, + { + "epoch": 0.15240405822673136, + "grad_norm": 0.9866390824317932, + "learning_rate": 4.992775437591529e-06, + "loss": 0.6687, + "step": 2073 + }, + { + "epoch": 0.15247757682693722, + "grad_norm": 0.8821561336517334, + "learning_rate": 4.992768115840673e-06, + "loss": 0.5769, + "step": 2074 + }, + { + "epoch": 0.15255109542714307, + "grad_norm": 0.899945080280304, + "learning_rate": 4.992760790386946e-06, + "loss": 0.606, + "step": 2075 + }, + { + "epoch": 0.15262461402734892, + "grad_norm": 0.9488226771354675, + "learning_rate": 4.9927534612303594e-06, + "loss": 0.6158, + "step": 2076 + }, + { + "epoch": 0.15269813262755477, + "grad_norm": 0.902712345123291, + "learning_rate": 4.9927461283709235e-06, + "loss": 0.6111, + "step": 2077 + }, + { + "epoch": 0.15277165122776062, + "grad_norm": 0.9532789587974548, + "learning_rate": 4.99273879180865e-06, + "loss": 0.632, + "step": 2078 + }, + { + "epoch": 0.15284516982796648, + "grad_norm": 0.8683720827102661, + "learning_rate": 4.99273145154355e-06, + "loss": 0.5992, + "step": 2079 + }, + { + "epoch": 0.15291868842817233, + "grad_norm": 0.8598487377166748, + "learning_rate": 4.9927241075756325e-06, + "loss": 0.5587, + "step": 2080 + }, + { + "epoch": 0.15299220702837818, + "grad_norm": 0.9090564846992493, + "learning_rate": 4.9927167599049115e-06, + "loss": 0.5867, + "step": 2081 + }, + { + "epoch": 0.15306572562858403, + "grad_norm": 0.8759300708770752, + "learning_rate": 4.992709408531395e-06, + "loss": 0.5779, + "step": 2082 + }, + { + "epoch": 0.15313924422878988, + "grad_norm": 0.9203583002090454, + "learning_rate": 4.992702053455097e-06, + "loss": 0.5988, + "step": 2083 + }, + { + "epoch": 0.15321276282899574, + "grad_norm": 0.8936710357666016, + "learning_rate": 4.992694694676025e-06, + "loss": 0.6068, + "step": 2084 + }, + { + "epoch": 0.1532862814292016, + "grad_norm": 0.891085147857666, + "learning_rate": 4.9926873321941915e-06, + "loss": 0.5919, + "step": 2085 + }, + { + "epoch": 0.15335980002940744, + "grad_norm": 0.8829676508903503, + "learning_rate": 4.992679966009608e-06, + "loss": 0.6414, + "step": 2086 + }, + { + "epoch": 0.1534333186296133, + "grad_norm": 0.87547767162323, + "learning_rate": 4.992672596122284e-06, + "loss": 0.6111, + "step": 2087 + }, + { + "epoch": 0.15350683722981914, + "grad_norm": 0.9011093378067017, + "learning_rate": 4.992665222532232e-06, + "loss": 0.6163, + "step": 2088 + }, + { + "epoch": 0.153580355830025, + "grad_norm": 0.893439769744873, + "learning_rate": 4.992657845239464e-06, + "loss": 0.6156, + "step": 2089 + }, + { + "epoch": 0.15365387443023085, + "grad_norm": 0.9118849039077759, + "learning_rate": 4.9926504642439865e-06, + "loss": 0.5612, + "step": 2090 + }, + { + "epoch": 0.1537273930304367, + "grad_norm": 0.9065951704978943, + "learning_rate": 4.992643079545815e-06, + "loss": 0.6042, + "step": 2091 + }, + { + "epoch": 0.15380091163064255, + "grad_norm": 0.9151957631111145, + "learning_rate": 4.992635691144958e-06, + "loss": 0.5964, + "step": 2092 + }, + { + "epoch": 0.1538744302308484, + "grad_norm": 0.8649517297744751, + "learning_rate": 4.992628299041427e-06, + "loss": 0.6079, + "step": 2093 + }, + { + "epoch": 0.15394794883105425, + "grad_norm": 0.8977082371711731, + "learning_rate": 4.992620903235233e-06, + "loss": 0.5838, + "step": 2094 + }, + { + "epoch": 0.1540214674312601, + "grad_norm": 1.0021982192993164, + "learning_rate": 4.992613503726388e-06, + "loss": 0.6218, + "step": 2095 + }, + { + "epoch": 0.15409498603146596, + "grad_norm": 0.8889201879501343, + "learning_rate": 4.992606100514902e-06, + "loss": 0.6352, + "step": 2096 + }, + { + "epoch": 0.1541685046316718, + "grad_norm": 0.946492612361908, + "learning_rate": 4.992598693600786e-06, + "loss": 0.6162, + "step": 2097 + }, + { + "epoch": 0.15424202323187766, + "grad_norm": 0.8687295317649841, + "learning_rate": 4.9925912829840505e-06, + "loss": 0.5807, + "step": 2098 + }, + { + "epoch": 0.1543155418320835, + "grad_norm": 0.8813924789428711, + "learning_rate": 4.992583868664708e-06, + "loss": 0.5888, + "step": 2099 + }, + { + "epoch": 0.15438906043228937, + "grad_norm": 0.9143240451812744, + "learning_rate": 4.992576450642769e-06, + "loss": 0.6266, + "step": 2100 + }, + { + "epoch": 0.15446257903249522, + "grad_norm": 0.9310875535011292, + "learning_rate": 4.992569028918242e-06, + "loss": 0.6377, + "step": 2101 + }, + { + "epoch": 0.15453609763270107, + "grad_norm": 0.9306317567825317, + "learning_rate": 4.992561603491142e-06, + "loss": 0.6213, + "step": 2102 + }, + { + "epoch": 0.15460961623290692, + "grad_norm": 0.9048009514808655, + "learning_rate": 4.992554174361478e-06, + "loss": 0.5941, + "step": 2103 + }, + { + "epoch": 0.15468313483311277, + "grad_norm": 0.8666405081748962, + "learning_rate": 4.992546741529262e-06, + "loss": 0.5815, + "step": 2104 + }, + { + "epoch": 0.15475665343331863, + "grad_norm": 0.8715243339538574, + "learning_rate": 4.9925393049945026e-06, + "loss": 0.6047, + "step": 2105 + }, + { + "epoch": 0.15483017203352448, + "grad_norm": 0.9062009453773499, + "learning_rate": 4.992531864757213e-06, + "loss": 0.6017, + "step": 2106 + }, + { + "epoch": 0.15490369063373033, + "grad_norm": 0.8997846245765686, + "learning_rate": 4.992524420817405e-06, + "loss": 0.6056, + "step": 2107 + }, + { + "epoch": 0.15497720923393618, + "grad_norm": 0.851060152053833, + "learning_rate": 4.992516973175087e-06, + "loss": 0.5672, + "step": 2108 + }, + { + "epoch": 0.15505072783414203, + "grad_norm": 0.8791379332542419, + "learning_rate": 4.992509521830272e-06, + "loss": 0.5982, + "step": 2109 + }, + { + "epoch": 0.15512424643434788, + "grad_norm": 0.8559949994087219, + "learning_rate": 4.9925020667829716e-06, + "loss": 0.6066, + "step": 2110 + }, + { + "epoch": 0.15519776503455374, + "grad_norm": 0.8260853886604309, + "learning_rate": 4.992494608033195e-06, + "loss": 0.5625, + "step": 2111 + }, + { + "epoch": 0.1552712836347596, + "grad_norm": 0.9557468295097351, + "learning_rate": 4.992487145580954e-06, + "loss": 0.6601, + "step": 2112 + }, + { + "epoch": 0.15534480223496544, + "grad_norm": 0.9149782657623291, + "learning_rate": 4.99247967942626e-06, + "loss": 0.6473, + "step": 2113 + }, + { + "epoch": 0.1554183208351713, + "grad_norm": 0.8962692022323608, + "learning_rate": 4.9924722095691235e-06, + "loss": 0.6054, + "step": 2114 + }, + { + "epoch": 0.15549183943537714, + "grad_norm": 0.9709644317626953, + "learning_rate": 4.992464736009557e-06, + "loss": 0.6576, + "step": 2115 + }, + { + "epoch": 0.155565358035583, + "grad_norm": 0.8802908658981323, + "learning_rate": 4.992457258747569e-06, + "loss": 0.5865, + "step": 2116 + }, + { + "epoch": 0.15563887663578885, + "grad_norm": 0.9116073250770569, + "learning_rate": 4.992449777783174e-06, + "loss": 0.5919, + "step": 2117 + }, + { + "epoch": 0.1557123952359947, + "grad_norm": 0.8835378885269165, + "learning_rate": 4.9924422931163805e-06, + "loss": 0.595, + "step": 2118 + }, + { + "epoch": 0.15578591383620055, + "grad_norm": 0.9069526791572571, + "learning_rate": 4.9924348047472e-06, + "loss": 0.6279, + "step": 2119 + }, + { + "epoch": 0.1558594324364064, + "grad_norm": 0.8964654803276062, + "learning_rate": 4.992427312675645e-06, + "loss": 0.5965, + "step": 2120 + }, + { + "epoch": 0.15593295103661226, + "grad_norm": 0.9560715556144714, + "learning_rate": 4.992419816901726e-06, + "loss": 0.653, + "step": 2121 + }, + { + "epoch": 0.1560064696368181, + "grad_norm": 0.8872304558753967, + "learning_rate": 4.992412317425453e-06, + "loss": 0.6101, + "step": 2122 + }, + { + "epoch": 0.15607998823702396, + "grad_norm": 0.9217097759246826, + "learning_rate": 4.992404814246839e-06, + "loss": 0.5839, + "step": 2123 + }, + { + "epoch": 0.1561535068372298, + "grad_norm": 0.885416567325592, + "learning_rate": 4.992397307365892e-06, + "loss": 0.6051, + "step": 2124 + }, + { + "epoch": 0.15622702543743566, + "grad_norm": 0.886443018913269, + "learning_rate": 4.992389796782628e-06, + "loss": 0.5629, + "step": 2125 + }, + { + "epoch": 0.15630054403764151, + "grad_norm": 0.8838292956352234, + "learning_rate": 4.992382282497053e-06, + "loss": 0.6384, + "step": 2126 + }, + { + "epoch": 0.15637406263784737, + "grad_norm": 0.9326781034469604, + "learning_rate": 4.992374764509181e-06, + "loss": 0.6141, + "step": 2127 + }, + { + "epoch": 0.15644758123805322, + "grad_norm": 0.8595094680786133, + "learning_rate": 4.992367242819024e-06, + "loss": 0.6089, + "step": 2128 + }, + { + "epoch": 0.15652109983825907, + "grad_norm": 0.8732204437255859, + "learning_rate": 4.992359717426591e-06, + "loss": 0.6095, + "step": 2129 + }, + { + "epoch": 0.15659461843846492, + "grad_norm": 0.8788696527481079, + "learning_rate": 4.992352188331895e-06, + "loss": 0.6085, + "step": 2130 + }, + { + "epoch": 0.15666813703867077, + "grad_norm": 0.8475849628448486, + "learning_rate": 4.992344655534946e-06, + "loss": 0.5611, + "step": 2131 + }, + { + "epoch": 0.15674165563887663, + "grad_norm": 0.8632932901382446, + "learning_rate": 4.992337119035755e-06, + "loss": 0.602, + "step": 2132 + }, + { + "epoch": 0.15681517423908248, + "grad_norm": 0.8435457348823547, + "learning_rate": 4.992329578834334e-06, + "loss": 0.5931, + "step": 2133 + }, + { + "epoch": 0.15688869283928833, + "grad_norm": 0.8896248936653137, + "learning_rate": 4.992322034930693e-06, + "loss": 0.6095, + "step": 2134 + }, + { + "epoch": 0.15696221143949418, + "grad_norm": 0.9129958152770996, + "learning_rate": 4.992314487324845e-06, + "loss": 0.5821, + "step": 2135 + }, + { + "epoch": 0.15703573003970003, + "grad_norm": 0.9115581512451172, + "learning_rate": 4.992306936016801e-06, + "loss": 0.5704, + "step": 2136 + }, + { + "epoch": 0.15710924863990589, + "grad_norm": 0.9540382027626038, + "learning_rate": 4.99229938100657e-06, + "loss": 0.653, + "step": 2137 + }, + { + "epoch": 0.15718276724011174, + "grad_norm": 0.9226659536361694, + "learning_rate": 4.992291822294167e-06, + "loss": 0.6026, + "step": 2138 + }, + { + "epoch": 0.1572562858403176, + "grad_norm": 0.8980425596237183, + "learning_rate": 4.992284259879598e-06, + "loss": 0.6078, + "step": 2139 + }, + { + "epoch": 0.15732980444052344, + "grad_norm": 0.8682123422622681, + "learning_rate": 4.992276693762879e-06, + "loss": 0.6186, + "step": 2140 + }, + { + "epoch": 0.1574033230407293, + "grad_norm": 0.915819525718689, + "learning_rate": 4.9922691239440194e-06, + "loss": 0.6636, + "step": 2141 + }, + { + "epoch": 0.15747684164093514, + "grad_norm": 0.8518280982971191, + "learning_rate": 4.9922615504230295e-06, + "loss": 0.5877, + "step": 2142 + }, + { + "epoch": 0.157550360241141, + "grad_norm": 0.9027178287506104, + "learning_rate": 4.992253973199923e-06, + "loss": 0.5834, + "step": 2143 + }, + { + "epoch": 0.15762387884134685, + "grad_norm": 0.9593867659568787, + "learning_rate": 4.992246392274709e-06, + "loss": 0.6517, + "step": 2144 + }, + { + "epoch": 0.1576973974415527, + "grad_norm": 0.9162589311599731, + "learning_rate": 4.992238807647399e-06, + "loss": 0.6381, + "step": 2145 + }, + { + "epoch": 0.15777091604175855, + "grad_norm": 0.9040341377258301, + "learning_rate": 4.992231219318006e-06, + "loss": 0.5982, + "step": 2146 + }, + { + "epoch": 0.1578444346419644, + "grad_norm": 0.8802903294563293, + "learning_rate": 4.992223627286539e-06, + "loss": 0.5772, + "step": 2147 + }, + { + "epoch": 0.15791795324217026, + "grad_norm": 0.8488205075263977, + "learning_rate": 4.99221603155301e-06, + "loss": 0.6047, + "step": 2148 + }, + { + "epoch": 0.1579914718423761, + "grad_norm": 0.9504686594009399, + "learning_rate": 4.9922084321174304e-06, + "loss": 0.5902, + "step": 2149 + }, + { + "epoch": 0.15806499044258196, + "grad_norm": 0.9275770783424377, + "learning_rate": 4.992200828979812e-06, + "loss": 0.6107, + "step": 2150 + }, + { + "epoch": 0.1581385090427878, + "grad_norm": 0.9668870568275452, + "learning_rate": 4.992193222140167e-06, + "loss": 0.6172, + "step": 2151 + }, + { + "epoch": 0.1582120276429937, + "grad_norm": 0.8695838451385498, + "learning_rate": 4.992185611598503e-06, + "loss": 0.5916, + "step": 2152 + }, + { + "epoch": 0.15828554624319954, + "grad_norm": 0.8686313629150391, + "learning_rate": 4.992177997354836e-06, + "loss": 0.5985, + "step": 2153 + }, + { + "epoch": 0.1583590648434054, + "grad_norm": 0.8764513731002808, + "learning_rate": 4.992170379409173e-06, + "loss": 0.6138, + "step": 2154 + }, + { + "epoch": 0.15843258344361125, + "grad_norm": 0.8803037405014038, + "learning_rate": 4.99216275776153e-06, + "loss": 0.6366, + "step": 2155 + }, + { + "epoch": 0.1585061020438171, + "grad_norm": 0.8942816257476807, + "learning_rate": 4.9921551324119135e-06, + "loss": 0.5995, + "step": 2156 + }, + { + "epoch": 0.15857962064402295, + "grad_norm": 0.9032237529754639, + "learning_rate": 4.992147503360338e-06, + "loss": 0.5914, + "step": 2157 + }, + { + "epoch": 0.1586531392442288, + "grad_norm": 0.9366900324821472, + "learning_rate": 4.992139870606814e-06, + "loss": 0.6056, + "step": 2158 + }, + { + "epoch": 0.15872665784443465, + "grad_norm": 0.945042610168457, + "learning_rate": 4.992132234151352e-06, + "loss": 0.6055, + "step": 2159 + }, + { + "epoch": 0.1588001764446405, + "grad_norm": 0.8929865956306458, + "learning_rate": 4.992124593993964e-06, + "loss": 0.5874, + "step": 2160 + }, + { + "epoch": 0.15887369504484636, + "grad_norm": 0.8645323514938354, + "learning_rate": 4.992116950134662e-06, + "loss": 0.6142, + "step": 2161 + }, + { + "epoch": 0.1589472136450522, + "grad_norm": 0.897826075553894, + "learning_rate": 4.992109302573457e-06, + "loss": 0.5284, + "step": 2162 + }, + { + "epoch": 0.15902073224525806, + "grad_norm": 0.9003996253013611, + "learning_rate": 4.992101651310359e-06, + "loss": 0.5858, + "step": 2163 + }, + { + "epoch": 0.1590942508454639, + "grad_norm": 0.8808921575546265, + "learning_rate": 4.992093996345381e-06, + "loss": 0.5709, + "step": 2164 + }, + { + "epoch": 0.15916776944566977, + "grad_norm": 0.8703809380531311, + "learning_rate": 4.9920863376785334e-06, + "loss": 0.6057, + "step": 2165 + }, + { + "epoch": 0.15924128804587562, + "grad_norm": 0.8610981702804565, + "learning_rate": 4.992078675309828e-06, + "loss": 0.5867, + "step": 2166 + }, + { + "epoch": 0.15931480664608147, + "grad_norm": 0.8793745636940002, + "learning_rate": 4.992071009239277e-06, + "loss": 0.6262, + "step": 2167 + }, + { + "epoch": 0.15938832524628732, + "grad_norm": 0.84833163022995, + "learning_rate": 4.992063339466891e-06, + "loss": 0.5756, + "step": 2168 + }, + { + "epoch": 0.15946184384649317, + "grad_norm": 0.9062881469726562, + "learning_rate": 4.992055665992681e-06, + "loss": 0.6397, + "step": 2169 + }, + { + "epoch": 0.15953536244669902, + "grad_norm": 0.8884952068328857, + "learning_rate": 4.992047988816658e-06, + "loss": 0.6495, + "step": 2170 + }, + { + "epoch": 0.15960888104690488, + "grad_norm": 0.9217897653579712, + "learning_rate": 4.992040307938835e-06, + "loss": 0.5705, + "step": 2171 + }, + { + "epoch": 0.15968239964711073, + "grad_norm": 0.89406818151474, + "learning_rate": 4.9920326233592225e-06, + "loss": 0.5761, + "step": 2172 + }, + { + "epoch": 0.15975591824731658, + "grad_norm": 0.9504969716072083, + "learning_rate": 4.992024935077832e-06, + "loss": 0.6007, + "step": 2173 + }, + { + "epoch": 0.15982943684752243, + "grad_norm": 0.8602853417396545, + "learning_rate": 4.992017243094675e-06, + "loss": 0.612, + "step": 2174 + }, + { + "epoch": 0.15990295544772828, + "grad_norm": 0.8496035933494568, + "learning_rate": 4.992009547409763e-06, + "loss": 0.5619, + "step": 2175 + }, + { + "epoch": 0.15997647404793414, + "grad_norm": 0.8938122987747192, + "learning_rate": 4.992001848023107e-06, + "loss": 0.5445, + "step": 2176 + }, + { + "epoch": 0.16004999264814, + "grad_norm": 0.9030353426933289, + "learning_rate": 4.991994144934718e-06, + "loss": 0.6354, + "step": 2177 + }, + { + "epoch": 0.16012351124834584, + "grad_norm": 0.9074141383171082, + "learning_rate": 4.99198643814461e-06, + "loss": 0.624, + "step": 2178 + }, + { + "epoch": 0.1601970298485517, + "grad_norm": 0.862515926361084, + "learning_rate": 4.991978727652791e-06, + "loss": 0.6112, + "step": 2179 + }, + { + "epoch": 0.16027054844875754, + "grad_norm": 0.8761016726493835, + "learning_rate": 4.991971013459274e-06, + "loss": 0.6212, + "step": 2180 + }, + { + "epoch": 0.1603440670489634, + "grad_norm": 0.8666130304336548, + "learning_rate": 4.991963295564072e-06, + "loss": 0.5815, + "step": 2181 + }, + { + "epoch": 0.16041758564916925, + "grad_norm": 0.901148796081543, + "learning_rate": 4.991955573967193e-06, + "loss": 0.5868, + "step": 2182 + }, + { + "epoch": 0.1604911042493751, + "grad_norm": 0.8814185857772827, + "learning_rate": 4.991947848668652e-06, + "loss": 0.5546, + "step": 2183 + }, + { + "epoch": 0.16056462284958095, + "grad_norm": 0.8601882457733154, + "learning_rate": 4.991940119668458e-06, + "loss": 0.5664, + "step": 2184 + }, + { + "epoch": 0.1606381414497868, + "grad_norm": 0.8635388016700745, + "learning_rate": 4.991932386966624e-06, + "loss": 0.5922, + "step": 2185 + }, + { + "epoch": 0.16071166004999266, + "grad_norm": 0.8945576548576355, + "learning_rate": 4.991924650563161e-06, + "loss": 0.602, + "step": 2186 + }, + { + "epoch": 0.1607851786501985, + "grad_norm": 0.9085294008255005, + "learning_rate": 4.99191691045808e-06, + "loss": 0.6212, + "step": 2187 + }, + { + "epoch": 0.16085869725040436, + "grad_norm": 0.9319827556610107, + "learning_rate": 4.991909166651393e-06, + "loss": 0.5997, + "step": 2188 + }, + { + "epoch": 0.1609322158506102, + "grad_norm": 0.85947585105896, + "learning_rate": 4.991901419143111e-06, + "loss": 0.61, + "step": 2189 + }, + { + "epoch": 0.16100573445081606, + "grad_norm": 0.9292179346084595, + "learning_rate": 4.991893667933246e-06, + "loss": 0.6462, + "step": 2190 + }, + { + "epoch": 0.16107925305102191, + "grad_norm": 0.9178624153137207, + "learning_rate": 4.99188591302181e-06, + "loss": 0.6045, + "step": 2191 + }, + { + "epoch": 0.16115277165122777, + "grad_norm": 0.8769585490226746, + "learning_rate": 4.9918781544088134e-06, + "loss": 0.5911, + "step": 2192 + }, + { + "epoch": 0.16122629025143362, + "grad_norm": 0.8826670050621033, + "learning_rate": 4.9918703920942694e-06, + "loss": 0.5708, + "step": 2193 + }, + { + "epoch": 0.16129980885163947, + "grad_norm": 0.8751488924026489, + "learning_rate": 4.991862626078186e-06, + "loss": 0.5968, + "step": 2194 + }, + { + "epoch": 0.16137332745184532, + "grad_norm": 0.9092724919319153, + "learning_rate": 4.9918548563605795e-06, + "loss": 0.6242, + "step": 2195 + }, + { + "epoch": 0.16144684605205117, + "grad_norm": 0.895979106426239, + "learning_rate": 4.991847082941458e-06, + "loss": 0.6157, + "step": 2196 + }, + { + "epoch": 0.16152036465225703, + "grad_norm": 0.8981353044509888, + "learning_rate": 4.991839305820835e-06, + "loss": 0.6097, + "step": 2197 + }, + { + "epoch": 0.16159388325246288, + "grad_norm": 0.8638691902160645, + "learning_rate": 4.991831524998719e-06, + "loss": 0.5971, + "step": 2198 + }, + { + "epoch": 0.16166740185266873, + "grad_norm": 0.8463515639305115, + "learning_rate": 4.991823740475126e-06, + "loss": 0.5927, + "step": 2199 + }, + { + "epoch": 0.16174092045287458, + "grad_norm": 0.8596117496490479, + "learning_rate": 4.991815952250064e-06, + "loss": 0.5907, + "step": 2200 + }, + { + "epoch": 0.16181443905308043, + "grad_norm": 0.8398154377937317, + "learning_rate": 4.991808160323547e-06, + "loss": 0.5992, + "step": 2201 + }, + { + "epoch": 0.16188795765328629, + "grad_norm": 0.8347529172897339, + "learning_rate": 4.9918003646955846e-06, + "loss": 0.5613, + "step": 2202 + }, + { + "epoch": 0.16196147625349214, + "grad_norm": 0.8345642685890198, + "learning_rate": 4.9917925653661895e-06, + "loss": 0.5785, + "step": 2203 + }, + { + "epoch": 0.162034994853698, + "grad_norm": 0.8295274376869202, + "learning_rate": 4.991784762335372e-06, + "loss": 0.5741, + "step": 2204 + }, + { + "epoch": 0.16210851345390384, + "grad_norm": 0.8626688718795776, + "learning_rate": 4.991776955603146e-06, + "loss": 0.5963, + "step": 2205 + }, + { + "epoch": 0.1621820320541097, + "grad_norm": 0.9029784202575684, + "learning_rate": 4.9917691451695205e-06, + "loss": 0.6107, + "step": 2206 + }, + { + "epoch": 0.16225555065431554, + "grad_norm": 0.9052979350090027, + "learning_rate": 4.99176133103451e-06, + "loss": 0.5731, + "step": 2207 + }, + { + "epoch": 0.1623290692545214, + "grad_norm": 0.9271512627601624, + "learning_rate": 4.991753513198123e-06, + "loss": 0.6197, + "step": 2208 + }, + { + "epoch": 0.16240258785472725, + "grad_norm": 0.901123046875, + "learning_rate": 4.991745691660374e-06, + "loss": 0.6128, + "step": 2209 + }, + { + "epoch": 0.1624761064549331, + "grad_norm": 0.8609054684638977, + "learning_rate": 4.991737866421271e-06, + "loss": 0.6028, + "step": 2210 + }, + { + "epoch": 0.16254962505513895, + "grad_norm": 0.9029075503349304, + "learning_rate": 4.99173003748083e-06, + "loss": 0.6093, + "step": 2211 + }, + { + "epoch": 0.1626231436553448, + "grad_norm": 0.915040910243988, + "learning_rate": 4.9917222048390595e-06, + "loss": 0.5525, + "step": 2212 + }, + { + "epoch": 0.16269666225555066, + "grad_norm": 0.8462899327278137, + "learning_rate": 4.991714368495972e-06, + "loss": 0.5922, + "step": 2213 + }, + { + "epoch": 0.1627701808557565, + "grad_norm": 0.9041837453842163, + "learning_rate": 4.9917065284515795e-06, + "loss": 0.6058, + "step": 2214 + }, + { + "epoch": 0.16284369945596236, + "grad_norm": 0.8949518799781799, + "learning_rate": 4.991698684705894e-06, + "loss": 0.5777, + "step": 2215 + }, + { + "epoch": 0.1629172180561682, + "grad_norm": 0.870936393737793, + "learning_rate": 4.991690837258926e-06, + "loss": 0.5486, + "step": 2216 + }, + { + "epoch": 0.16299073665637406, + "grad_norm": 0.8956127166748047, + "learning_rate": 4.991682986110688e-06, + "loss": 0.6066, + "step": 2217 + }, + { + "epoch": 0.16306425525657992, + "grad_norm": 0.890688419342041, + "learning_rate": 4.991675131261191e-06, + "loss": 0.5878, + "step": 2218 + }, + { + "epoch": 0.16313777385678577, + "grad_norm": 0.9362198114395142, + "learning_rate": 4.991667272710447e-06, + "loss": 0.607, + "step": 2219 + }, + { + "epoch": 0.16321129245699162, + "grad_norm": 0.8924433588981628, + "learning_rate": 4.991659410458468e-06, + "loss": 0.6264, + "step": 2220 + }, + { + "epoch": 0.16328481105719747, + "grad_norm": 0.8815121650695801, + "learning_rate": 4.991651544505265e-06, + "loss": 0.5842, + "step": 2221 + }, + { + "epoch": 0.16335832965740332, + "grad_norm": 0.8885782361030579, + "learning_rate": 4.99164367485085e-06, + "loss": 0.6061, + "step": 2222 + }, + { + "epoch": 0.16343184825760917, + "grad_norm": 0.852882981300354, + "learning_rate": 4.991635801495235e-06, + "loss": 0.6028, + "step": 2223 + }, + { + "epoch": 0.16350536685781503, + "grad_norm": 0.8632150888442993, + "learning_rate": 4.991627924438432e-06, + "loss": 0.533, + "step": 2224 + }, + { + "epoch": 0.16357888545802088, + "grad_norm": 0.9239793419837952, + "learning_rate": 4.991620043680452e-06, + "loss": 0.6171, + "step": 2225 + }, + { + "epoch": 0.16365240405822673, + "grad_norm": 0.9605894088745117, + "learning_rate": 4.991612159221306e-06, + "loss": 0.5815, + "step": 2226 + }, + { + "epoch": 0.16372592265843258, + "grad_norm": 0.9276779294013977, + "learning_rate": 4.991604271061007e-06, + "loss": 0.6109, + "step": 2227 + }, + { + "epoch": 0.16379944125863843, + "grad_norm": 0.9108554124832153, + "learning_rate": 4.991596379199566e-06, + "loss": 0.5788, + "step": 2228 + }, + { + "epoch": 0.16387295985884429, + "grad_norm": 0.8998804092407227, + "learning_rate": 4.991588483636996e-06, + "loss": 0.6282, + "step": 2229 + }, + { + "epoch": 0.16394647845905014, + "grad_norm": 0.8980699777603149, + "learning_rate": 4.9915805843733066e-06, + "loss": 0.5855, + "step": 2230 + }, + { + "epoch": 0.164019997059256, + "grad_norm": 0.9618656635284424, + "learning_rate": 4.991572681408512e-06, + "loss": 0.6241, + "step": 2231 + }, + { + "epoch": 0.16409351565946184, + "grad_norm": 0.9165166616439819, + "learning_rate": 4.991564774742622e-06, + "loss": 0.5929, + "step": 2232 + }, + { + "epoch": 0.1641670342596677, + "grad_norm": 0.8919005990028381, + "learning_rate": 4.991556864375648e-06, + "loss": 0.6283, + "step": 2233 + }, + { + "epoch": 0.16424055285987355, + "grad_norm": 0.8648372292518616, + "learning_rate": 4.9915489503076046e-06, + "loss": 0.5815, + "step": 2234 + }, + { + "epoch": 0.1643140714600794, + "grad_norm": 0.9035197496414185, + "learning_rate": 4.9915410325385e-06, + "loss": 0.5421, + "step": 2235 + }, + { + "epoch": 0.16438759006028525, + "grad_norm": 0.8933001756668091, + "learning_rate": 4.991533111068348e-06, + "loss": 0.5732, + "step": 2236 + }, + { + "epoch": 0.1644611086604911, + "grad_norm": 0.909848153591156, + "learning_rate": 4.99152518589716e-06, + "loss": 0.607, + "step": 2237 + }, + { + "epoch": 0.16453462726069695, + "grad_norm": 0.9195646047592163, + "learning_rate": 4.991517257024948e-06, + "loss": 0.5821, + "step": 2238 + }, + { + "epoch": 0.1646081458609028, + "grad_norm": 0.9185721278190613, + "learning_rate": 4.9915093244517234e-06, + "loss": 0.6081, + "step": 2239 + }, + { + "epoch": 0.16468166446110866, + "grad_norm": 0.9235435128211975, + "learning_rate": 4.991501388177499e-06, + "loss": 0.6113, + "step": 2240 + }, + { + "epoch": 0.1647551830613145, + "grad_norm": 0.9582498669624329, + "learning_rate": 4.9914934482022845e-06, + "loss": 0.6299, + "step": 2241 + }, + { + "epoch": 0.16482870166152036, + "grad_norm": 0.8670428991317749, + "learning_rate": 4.991485504526093e-06, + "loss": 0.5957, + "step": 2242 + }, + { + "epoch": 0.1649022202617262, + "grad_norm": 0.8391399383544922, + "learning_rate": 4.991477557148937e-06, + "loss": 0.5533, + "step": 2243 + }, + { + "epoch": 0.16497573886193206, + "grad_norm": 0.9243310689926147, + "learning_rate": 4.991469606070828e-06, + "loss": 0.642, + "step": 2244 + }, + { + "epoch": 0.16504925746213792, + "grad_norm": 0.9164239764213562, + "learning_rate": 4.991461651291776e-06, + "loss": 0.6293, + "step": 2245 + }, + { + "epoch": 0.16512277606234377, + "grad_norm": 0.8856542110443115, + "learning_rate": 4.991453692811794e-06, + "loss": 0.6393, + "step": 2246 + }, + { + "epoch": 0.16519629466254962, + "grad_norm": 0.9649579524993896, + "learning_rate": 4.991445730630895e-06, + "loss": 0.624, + "step": 2247 + }, + { + "epoch": 0.16526981326275547, + "grad_norm": 0.9064390659332275, + "learning_rate": 4.991437764749089e-06, + "loss": 0.6125, + "step": 2248 + }, + { + "epoch": 0.16534333186296132, + "grad_norm": 0.8584736585617065, + "learning_rate": 4.991429795166389e-06, + "loss": 0.6092, + "step": 2249 + }, + { + "epoch": 0.16541685046316718, + "grad_norm": 0.8772110342979431, + "learning_rate": 4.991421821882806e-06, + "loss": 0.6181, + "step": 2250 + }, + { + "epoch": 0.16549036906337303, + "grad_norm": 0.8536840677261353, + "learning_rate": 4.991413844898354e-06, + "loss": 0.5785, + "step": 2251 + }, + { + "epoch": 0.16556388766357888, + "grad_norm": 0.8544003367424011, + "learning_rate": 4.991405864213042e-06, + "loss": 0.6423, + "step": 2252 + }, + { + "epoch": 0.16563740626378473, + "grad_norm": 0.9164078235626221, + "learning_rate": 4.991397879826883e-06, + "loss": 0.6843, + "step": 2253 + }, + { + "epoch": 0.16571092486399058, + "grad_norm": 0.9306586980819702, + "learning_rate": 4.9913898917398876e-06, + "loss": 0.6313, + "step": 2254 + }, + { + "epoch": 0.16578444346419643, + "grad_norm": 0.910316526889801, + "learning_rate": 4.99138189995207e-06, + "loss": 0.6055, + "step": 2255 + }, + { + "epoch": 0.1658579620644023, + "grad_norm": 0.919989287853241, + "learning_rate": 4.991373904463441e-06, + "loss": 0.6224, + "step": 2256 + }, + { + "epoch": 0.16593148066460814, + "grad_norm": 0.8750834465026855, + "learning_rate": 4.991365905274013e-06, + "loss": 0.6055, + "step": 2257 + }, + { + "epoch": 0.166004999264814, + "grad_norm": 0.9094887971878052, + "learning_rate": 4.9913579023837976e-06, + "loss": 0.6266, + "step": 2258 + }, + { + "epoch": 0.16607851786501984, + "grad_norm": 0.9042008519172668, + "learning_rate": 4.991349895792805e-06, + "loss": 0.6075, + "step": 2259 + }, + { + "epoch": 0.1661520364652257, + "grad_norm": 0.8865842223167419, + "learning_rate": 4.99134188550105e-06, + "loss": 0.6144, + "step": 2260 + }, + { + "epoch": 0.16622555506543155, + "grad_norm": 0.8555862307548523, + "learning_rate": 4.991333871508542e-06, + "loss": 0.581, + "step": 2261 + }, + { + "epoch": 0.1662990736656374, + "grad_norm": 0.9585682153701782, + "learning_rate": 4.991325853815295e-06, + "loss": 0.6242, + "step": 2262 + }, + { + "epoch": 0.16637259226584325, + "grad_norm": 0.8887312412261963, + "learning_rate": 4.9913178324213195e-06, + "loss": 0.6013, + "step": 2263 + }, + { + "epoch": 0.1664461108660491, + "grad_norm": 0.8329148888587952, + "learning_rate": 4.991309807326627e-06, + "loss": 0.5878, + "step": 2264 + }, + { + "epoch": 0.16651962946625495, + "grad_norm": 0.8741734027862549, + "learning_rate": 4.991301778531231e-06, + "loss": 0.5922, + "step": 2265 + }, + { + "epoch": 0.1665931480664608, + "grad_norm": 0.8491045832633972, + "learning_rate": 4.991293746035143e-06, + "loss": 0.5824, + "step": 2266 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.8924875259399414, + "learning_rate": 4.991285709838373e-06, + "loss": 0.6161, + "step": 2267 + }, + { + "epoch": 0.1667401852668725, + "grad_norm": 0.8917773365974426, + "learning_rate": 4.991277669940936e-06, + "loss": 0.6192, + "step": 2268 + }, + { + "epoch": 0.16681370386707836, + "grad_norm": 0.8713855743408203, + "learning_rate": 4.991269626342841e-06, + "loss": 0.5753, + "step": 2269 + }, + { + "epoch": 0.1668872224672842, + "grad_norm": 0.867523729801178, + "learning_rate": 4.991261579044103e-06, + "loss": 0.5888, + "step": 2270 + }, + { + "epoch": 0.16696074106749006, + "grad_norm": 0.9028171896934509, + "learning_rate": 4.991253528044732e-06, + "loss": 0.6225, + "step": 2271 + }, + { + "epoch": 0.16703425966769592, + "grad_norm": 0.9799610376358032, + "learning_rate": 4.9912454733447406e-06, + "loss": 0.6537, + "step": 2272 + }, + { + "epoch": 0.16710777826790177, + "grad_norm": 0.9360060095787048, + "learning_rate": 4.991237414944139e-06, + "loss": 0.6032, + "step": 2273 + }, + { + "epoch": 0.16718129686810762, + "grad_norm": 0.8620225191116333, + "learning_rate": 4.991229352842941e-06, + "loss": 0.592, + "step": 2274 + }, + { + "epoch": 0.16725481546831347, + "grad_norm": 0.9308688044548035, + "learning_rate": 4.9912212870411594e-06, + "loss": 0.5842, + "step": 2275 + }, + { + "epoch": 0.16732833406851932, + "grad_norm": 0.9335238337516785, + "learning_rate": 4.991213217538804e-06, + "loss": 0.5815, + "step": 2276 + }, + { + "epoch": 0.16740185266872518, + "grad_norm": 0.8444926142692566, + "learning_rate": 4.991205144335888e-06, + "loss": 0.5558, + "step": 2277 + }, + { + "epoch": 0.16747537126893103, + "grad_norm": 0.9145177006721497, + "learning_rate": 4.991197067432423e-06, + "loss": 0.6296, + "step": 2278 + }, + { + "epoch": 0.16754888986913688, + "grad_norm": 0.8588817715644836, + "learning_rate": 4.991188986828421e-06, + "loss": 0.5937, + "step": 2279 + }, + { + "epoch": 0.16762240846934273, + "grad_norm": 0.8942281603813171, + "learning_rate": 4.991180902523896e-06, + "loss": 0.6274, + "step": 2280 + }, + { + "epoch": 0.16769592706954858, + "grad_norm": 0.8884355425834656, + "learning_rate": 4.9911728145188556e-06, + "loss": 0.6178, + "step": 2281 + }, + { + "epoch": 0.16776944566975444, + "grad_norm": 0.9536502957344055, + "learning_rate": 4.991164722813316e-06, + "loss": 0.6298, + "step": 2282 + }, + { + "epoch": 0.1678429642699603, + "grad_norm": 0.8869833946228027, + "learning_rate": 4.991156627407287e-06, + "loss": 0.5577, + "step": 2283 + }, + { + "epoch": 0.16791648287016614, + "grad_norm": 0.8808479309082031, + "learning_rate": 4.991148528300781e-06, + "loss": 0.5905, + "step": 2284 + }, + { + "epoch": 0.167990001470372, + "grad_norm": 0.9065474271774292, + "learning_rate": 4.991140425493811e-06, + "loss": 0.5606, + "step": 2285 + }, + { + "epoch": 0.16806352007057784, + "grad_norm": 0.9109593629837036, + "learning_rate": 4.991132318986388e-06, + "loss": 0.6059, + "step": 2286 + }, + { + "epoch": 0.1681370386707837, + "grad_norm": 0.8722795844078064, + "learning_rate": 4.991124208778524e-06, + "loss": 0.5755, + "step": 2287 + }, + { + "epoch": 0.16821055727098955, + "grad_norm": 0.8677640557289124, + "learning_rate": 4.991116094870232e-06, + "loss": 0.6, + "step": 2288 + }, + { + "epoch": 0.1682840758711954, + "grad_norm": 0.9422605037689209, + "learning_rate": 4.991107977261523e-06, + "loss": 0.6075, + "step": 2289 + }, + { + "epoch": 0.16835759447140125, + "grad_norm": 0.9477503895759583, + "learning_rate": 4.99109985595241e-06, + "loss": 0.6197, + "step": 2290 + }, + { + "epoch": 0.16843111307160713, + "grad_norm": 0.9062376618385315, + "learning_rate": 4.991091730942904e-06, + "loss": 0.5825, + "step": 2291 + }, + { + "epoch": 0.16850463167181298, + "grad_norm": 0.8838793039321899, + "learning_rate": 4.991083602233018e-06, + "loss": 0.5923, + "step": 2292 + }, + { + "epoch": 0.16857815027201883, + "grad_norm": 0.9427785873413086, + "learning_rate": 4.991075469822764e-06, + "loss": 0.5849, + "step": 2293 + }, + { + "epoch": 0.16865166887222469, + "grad_norm": 0.9163717031478882, + "learning_rate": 4.991067333712153e-06, + "loss": 0.6381, + "step": 2294 + }, + { + "epoch": 0.16872518747243054, + "grad_norm": 0.8744823336601257, + "learning_rate": 4.991059193901199e-06, + "loss": 0.6134, + "step": 2295 + }, + { + "epoch": 0.1687987060726364, + "grad_norm": 0.885051429271698, + "learning_rate": 4.991051050389912e-06, + "loss": 0.5941, + "step": 2296 + }, + { + "epoch": 0.16887222467284224, + "grad_norm": 0.8794012665748596, + "learning_rate": 4.991042903178305e-06, + "loss": 0.5867, + "step": 2297 + }, + { + "epoch": 0.1689457432730481, + "grad_norm": 0.8609407544136047, + "learning_rate": 4.99103475226639e-06, + "loss": 0.609, + "step": 2298 + }, + { + "epoch": 0.16901926187325395, + "grad_norm": 0.9235706329345703, + "learning_rate": 4.9910265976541805e-06, + "loss": 0.6329, + "step": 2299 + }, + { + "epoch": 0.1690927804734598, + "grad_norm": 0.9405500292778015, + "learning_rate": 4.991018439341687e-06, + "loss": 0.6168, + "step": 2300 + }, + { + "epoch": 0.16916629907366565, + "grad_norm": 0.8633197546005249, + "learning_rate": 4.991010277328922e-06, + "loss": 0.6073, + "step": 2301 + }, + { + "epoch": 0.1692398176738715, + "grad_norm": 0.9027257561683655, + "learning_rate": 4.991002111615897e-06, + "loss": 0.6263, + "step": 2302 + }, + { + "epoch": 0.16931333627407735, + "grad_norm": 0.8724936842918396, + "learning_rate": 4.990993942202624e-06, + "loss": 0.6119, + "step": 2303 + }, + { + "epoch": 0.1693868548742832, + "grad_norm": 0.8756887316703796, + "learning_rate": 4.990985769089117e-06, + "loss": 0.6026, + "step": 2304 + }, + { + "epoch": 0.16946037347448906, + "grad_norm": 0.902955949306488, + "learning_rate": 4.9909775922753875e-06, + "loss": 0.6447, + "step": 2305 + }, + { + "epoch": 0.1695338920746949, + "grad_norm": 0.9157037734985352, + "learning_rate": 4.990969411761447e-06, + "loss": 0.5804, + "step": 2306 + }, + { + "epoch": 0.16960741067490076, + "grad_norm": 0.8806798458099365, + "learning_rate": 4.990961227547307e-06, + "loss": 0.5865, + "step": 2307 + }, + { + "epoch": 0.1696809292751066, + "grad_norm": 0.8479150533676147, + "learning_rate": 4.9909530396329815e-06, + "loss": 0.5438, + "step": 2308 + }, + { + "epoch": 0.16975444787531246, + "grad_norm": 0.8911952376365662, + "learning_rate": 4.990944848018481e-06, + "loss": 0.5717, + "step": 2309 + }, + { + "epoch": 0.16982796647551832, + "grad_norm": 0.8797869682312012, + "learning_rate": 4.990936652703819e-06, + "loss": 0.6065, + "step": 2310 + }, + { + "epoch": 0.16990148507572417, + "grad_norm": 0.8836401104927063, + "learning_rate": 4.990928453689007e-06, + "loss": 0.5916, + "step": 2311 + }, + { + "epoch": 0.16997500367593002, + "grad_norm": 0.9788417220115662, + "learning_rate": 4.990920250974056e-06, + "loss": 0.6204, + "step": 2312 + }, + { + "epoch": 0.17004852227613587, + "grad_norm": 0.9641525149345398, + "learning_rate": 4.990912044558981e-06, + "loss": 0.6214, + "step": 2313 + }, + { + "epoch": 0.17012204087634172, + "grad_norm": 0.8982216119766235, + "learning_rate": 4.990903834443791e-06, + "loss": 0.6187, + "step": 2314 + }, + { + "epoch": 0.17019555947654758, + "grad_norm": 1.001181721687317, + "learning_rate": 4.990895620628501e-06, + "loss": 0.6418, + "step": 2315 + }, + { + "epoch": 0.17026907807675343, + "grad_norm": 0.8888273239135742, + "learning_rate": 4.9908874031131205e-06, + "loss": 0.5944, + "step": 2316 + }, + { + "epoch": 0.17034259667695928, + "grad_norm": 0.8951061964035034, + "learning_rate": 4.990879181897664e-06, + "loss": 0.5908, + "step": 2317 + }, + { + "epoch": 0.17041611527716513, + "grad_norm": 0.9113389253616333, + "learning_rate": 4.990870956982143e-06, + "loss": 0.5978, + "step": 2318 + }, + { + "epoch": 0.17048963387737098, + "grad_norm": 0.9222834706306458, + "learning_rate": 4.99086272836657e-06, + "loss": 0.626, + "step": 2319 + }, + { + "epoch": 0.17056315247757683, + "grad_norm": 0.8581855893135071, + "learning_rate": 4.990854496050957e-06, + "loss": 0.5754, + "step": 2320 + }, + { + "epoch": 0.1706366710777827, + "grad_norm": 0.9181677103042603, + "learning_rate": 4.990846260035315e-06, + "loss": 0.5834, + "step": 2321 + }, + { + "epoch": 0.17071018967798854, + "grad_norm": 0.8312368988990784, + "learning_rate": 4.9908380203196575e-06, + "loss": 0.5811, + "step": 2322 + }, + { + "epoch": 0.1707837082781944, + "grad_norm": 0.8315255641937256, + "learning_rate": 4.990829776903996e-06, + "loss": 0.5741, + "step": 2323 + }, + { + "epoch": 0.17085722687840024, + "grad_norm": 0.8417409658432007, + "learning_rate": 4.990821529788345e-06, + "loss": 0.6179, + "step": 2324 + }, + { + "epoch": 0.1709307454786061, + "grad_norm": 0.9843947887420654, + "learning_rate": 4.990813278972713e-06, + "loss": 0.6536, + "step": 2325 + }, + { + "epoch": 0.17100426407881195, + "grad_norm": 0.8444965481758118, + "learning_rate": 4.990805024457115e-06, + "loss": 0.5342, + "step": 2326 + }, + { + "epoch": 0.1710777826790178, + "grad_norm": 0.8587597608566284, + "learning_rate": 4.990796766241563e-06, + "loss": 0.5954, + "step": 2327 + }, + { + "epoch": 0.17115130127922365, + "grad_norm": 0.8861211538314819, + "learning_rate": 4.990788504326068e-06, + "loss": 0.5972, + "step": 2328 + }, + { + "epoch": 0.1712248198794295, + "grad_norm": 0.9289387464523315, + "learning_rate": 4.990780238710644e-06, + "loss": 0.6074, + "step": 2329 + }, + { + "epoch": 0.17129833847963535, + "grad_norm": 0.8297473192214966, + "learning_rate": 4.990771969395302e-06, + "loss": 0.5675, + "step": 2330 + }, + { + "epoch": 0.1713718570798412, + "grad_norm": 0.8651319742202759, + "learning_rate": 4.9907636963800545e-06, + "loss": 0.6107, + "step": 2331 + }, + { + "epoch": 0.17144537568004706, + "grad_norm": 0.9178484082221985, + "learning_rate": 4.990755419664914e-06, + "loss": 0.5643, + "step": 2332 + }, + { + "epoch": 0.1715188942802529, + "grad_norm": 0.863943874835968, + "learning_rate": 4.990747139249893e-06, + "loss": 0.553, + "step": 2333 + }, + { + "epoch": 0.17159241288045876, + "grad_norm": 0.8720464706420898, + "learning_rate": 4.990738855135003e-06, + "loss": 0.6099, + "step": 2334 + }, + { + "epoch": 0.1716659314806646, + "grad_norm": 0.9047247171401978, + "learning_rate": 4.990730567320256e-06, + "loss": 0.59, + "step": 2335 + }, + { + "epoch": 0.17173945008087046, + "grad_norm": 0.8433340191841125, + "learning_rate": 4.990722275805667e-06, + "loss": 0.5928, + "step": 2336 + }, + { + "epoch": 0.17181296868107632, + "grad_norm": 0.9151166081428528, + "learning_rate": 4.990713980591246e-06, + "loss": 0.6154, + "step": 2337 + }, + { + "epoch": 0.17188648728128217, + "grad_norm": 0.8989944458007812, + "learning_rate": 4.990705681677005e-06, + "loss": 0.6134, + "step": 2338 + }, + { + "epoch": 0.17196000588148802, + "grad_norm": 0.9166705012321472, + "learning_rate": 4.990697379062958e-06, + "loss": 0.6147, + "step": 2339 + }, + { + "epoch": 0.17203352448169387, + "grad_norm": 0.8533045649528503, + "learning_rate": 4.990689072749116e-06, + "loss": 0.5652, + "step": 2340 + }, + { + "epoch": 0.17210704308189972, + "grad_norm": 0.8709073662757874, + "learning_rate": 4.990680762735491e-06, + "loss": 0.6086, + "step": 2341 + }, + { + "epoch": 0.17218056168210558, + "grad_norm": 0.8843314051628113, + "learning_rate": 4.990672449022097e-06, + "loss": 0.6077, + "step": 2342 + }, + { + "epoch": 0.17225408028231143, + "grad_norm": 0.8736380338668823, + "learning_rate": 4.9906641316089455e-06, + "loss": 0.6114, + "step": 2343 + }, + { + "epoch": 0.17232759888251728, + "grad_norm": 0.9013457894325256, + "learning_rate": 4.990655810496048e-06, + "loss": 0.6278, + "step": 2344 + }, + { + "epoch": 0.17240111748272313, + "grad_norm": 0.8989180326461792, + "learning_rate": 4.990647485683419e-06, + "loss": 0.5994, + "step": 2345 + }, + { + "epoch": 0.17247463608292898, + "grad_norm": 0.9051030278205872, + "learning_rate": 4.990639157171068e-06, + "loss": 0.5643, + "step": 2346 + }, + { + "epoch": 0.17254815468313484, + "grad_norm": 0.8627777099609375, + "learning_rate": 4.99063082495901e-06, + "loss": 0.5836, + "step": 2347 + }, + { + "epoch": 0.1726216732833407, + "grad_norm": 0.8552145957946777, + "learning_rate": 4.990622489047256e-06, + "loss": 0.5871, + "step": 2348 + }, + { + "epoch": 0.17269519188354654, + "grad_norm": 0.8735430836677551, + "learning_rate": 4.9906141494358185e-06, + "loss": 0.5956, + "step": 2349 + }, + { + "epoch": 0.1727687104837524, + "grad_norm": 0.8995591998100281, + "learning_rate": 4.990605806124711e-06, + "loss": 0.6162, + "step": 2350 + }, + { + "epoch": 0.17284222908395824, + "grad_norm": 0.8873665928840637, + "learning_rate": 4.990597459113944e-06, + "loss": 0.6137, + "step": 2351 + }, + { + "epoch": 0.1729157476841641, + "grad_norm": 0.8684831261634827, + "learning_rate": 4.990589108403531e-06, + "loss": 0.5323, + "step": 2352 + }, + { + "epoch": 0.17298926628436995, + "grad_norm": 0.8728950023651123, + "learning_rate": 4.990580753993483e-06, + "loss": 0.5312, + "step": 2353 + }, + { + "epoch": 0.1730627848845758, + "grad_norm": 0.909091055393219, + "learning_rate": 4.990572395883816e-06, + "loss": 0.6024, + "step": 2354 + }, + { + "epoch": 0.17313630348478165, + "grad_norm": 0.8439345955848694, + "learning_rate": 4.990564034074539e-06, + "loss": 0.6004, + "step": 2355 + }, + { + "epoch": 0.1732098220849875, + "grad_norm": 0.8895620107650757, + "learning_rate": 4.990555668565665e-06, + "loss": 0.5662, + "step": 2356 + }, + { + "epoch": 0.17328334068519335, + "grad_norm": 0.8635866641998291, + "learning_rate": 4.990547299357208e-06, + "loss": 0.5588, + "step": 2357 + }, + { + "epoch": 0.1733568592853992, + "grad_norm": 0.8646054267883301, + "learning_rate": 4.990538926449179e-06, + "loss": 0.6562, + "step": 2358 + }, + { + "epoch": 0.17343037788560506, + "grad_norm": 0.8441058397293091, + "learning_rate": 4.99053054984159e-06, + "loss": 0.5503, + "step": 2359 + }, + { + "epoch": 0.1735038964858109, + "grad_norm": 0.8535297513008118, + "learning_rate": 4.990522169534455e-06, + "loss": 0.6204, + "step": 2360 + }, + { + "epoch": 0.17357741508601676, + "grad_norm": 0.8544523119926453, + "learning_rate": 4.9905137855277865e-06, + "loss": 0.6176, + "step": 2361 + }, + { + "epoch": 0.1736509336862226, + "grad_norm": 0.9110089540481567, + "learning_rate": 4.990505397821595e-06, + "loss": 0.5919, + "step": 2362 + }, + { + "epoch": 0.17372445228642847, + "grad_norm": 0.8960421085357666, + "learning_rate": 4.990497006415894e-06, + "loss": 0.6127, + "step": 2363 + }, + { + "epoch": 0.17379797088663432, + "grad_norm": 0.8825910687446594, + "learning_rate": 4.990488611310696e-06, + "loss": 0.6044, + "step": 2364 + }, + { + "epoch": 0.17387148948684017, + "grad_norm": 0.8703500032424927, + "learning_rate": 4.990480212506015e-06, + "loss": 0.5892, + "step": 2365 + }, + { + "epoch": 0.17394500808704602, + "grad_norm": 0.8530479073524475, + "learning_rate": 4.990471810001861e-06, + "loss": 0.5961, + "step": 2366 + }, + { + "epoch": 0.17401852668725187, + "grad_norm": 0.8359675407409668, + "learning_rate": 4.990463403798247e-06, + "loss": 0.5725, + "step": 2367 + }, + { + "epoch": 0.17409204528745773, + "grad_norm": 0.8331050276756287, + "learning_rate": 4.990454993895186e-06, + "loss": 0.61, + "step": 2368 + }, + { + "epoch": 0.17416556388766358, + "grad_norm": 0.8958504796028137, + "learning_rate": 4.990446580292692e-06, + "loss": 0.6519, + "step": 2369 + }, + { + "epoch": 0.17423908248786943, + "grad_norm": 0.8885365724563599, + "learning_rate": 4.990438162990774e-06, + "loss": 0.6263, + "step": 2370 + }, + { + "epoch": 0.17431260108807528, + "grad_norm": 0.9301501512527466, + "learning_rate": 4.990429741989448e-06, + "loss": 0.6166, + "step": 2371 + }, + { + "epoch": 0.17438611968828113, + "grad_norm": 0.8892646431922913, + "learning_rate": 4.9904213172887246e-06, + "loss": 0.6086, + "step": 2372 + }, + { + "epoch": 0.17445963828848698, + "grad_norm": 0.8869601488113403, + "learning_rate": 4.990412888888616e-06, + "loss": 0.6016, + "step": 2373 + }, + { + "epoch": 0.17453315688869284, + "grad_norm": 0.8561221957206726, + "learning_rate": 4.990404456789136e-06, + "loss": 0.5474, + "step": 2374 + }, + { + "epoch": 0.1746066754888987, + "grad_norm": 0.8671098947525024, + "learning_rate": 4.990396020990297e-06, + "loss": 0.6168, + "step": 2375 + }, + { + "epoch": 0.17468019408910454, + "grad_norm": 0.947002112865448, + "learning_rate": 4.99038758149211e-06, + "loss": 0.6455, + "step": 2376 + }, + { + "epoch": 0.1747537126893104, + "grad_norm": 0.9017940163612366, + "learning_rate": 4.99037913829459e-06, + "loss": 0.6294, + "step": 2377 + }, + { + "epoch": 0.17482723128951624, + "grad_norm": 0.8745951652526855, + "learning_rate": 4.9903706913977465e-06, + "loss": 0.5846, + "step": 2378 + }, + { + "epoch": 0.1749007498897221, + "grad_norm": 0.8639684319496155, + "learning_rate": 4.990362240801595e-06, + "loss": 0.5798, + "step": 2379 + }, + { + "epoch": 0.17497426848992795, + "grad_norm": 0.883975088596344, + "learning_rate": 4.990353786506147e-06, + "loss": 0.6044, + "step": 2380 + }, + { + "epoch": 0.1750477870901338, + "grad_norm": 0.8134744763374329, + "learning_rate": 4.9903453285114135e-06, + "loss": 0.5666, + "step": 2381 + }, + { + "epoch": 0.17512130569033965, + "grad_norm": 0.8659883141517639, + "learning_rate": 4.990336866817409e-06, + "loss": 0.6053, + "step": 2382 + }, + { + "epoch": 0.1751948242905455, + "grad_norm": 0.9066112041473389, + "learning_rate": 4.990328401424145e-06, + "loss": 0.6205, + "step": 2383 + }, + { + "epoch": 0.17526834289075136, + "grad_norm": 0.8764538764953613, + "learning_rate": 4.990319932331636e-06, + "loss": 0.6043, + "step": 2384 + }, + { + "epoch": 0.1753418614909572, + "grad_norm": 0.8322216272354126, + "learning_rate": 4.990311459539892e-06, + "loss": 0.5799, + "step": 2385 + }, + { + "epoch": 0.17541538009116306, + "grad_norm": 0.9515473246574402, + "learning_rate": 4.990302983048926e-06, + "loss": 0.6325, + "step": 2386 + }, + { + "epoch": 0.1754888986913689, + "grad_norm": 0.9023774862289429, + "learning_rate": 4.9902945028587526e-06, + "loss": 0.6184, + "step": 2387 + }, + { + "epoch": 0.17556241729157476, + "grad_norm": 0.9442160129547119, + "learning_rate": 4.990286018969383e-06, + "loss": 0.639, + "step": 2388 + }, + { + "epoch": 0.17563593589178061, + "grad_norm": 0.8919162750244141, + "learning_rate": 4.990277531380829e-06, + "loss": 0.6318, + "step": 2389 + }, + { + "epoch": 0.17570945449198647, + "grad_norm": 0.8402987122535706, + "learning_rate": 4.990269040093104e-06, + "loss": 0.6125, + "step": 2390 + }, + { + "epoch": 0.17578297309219232, + "grad_norm": 0.9203922152519226, + "learning_rate": 4.990260545106221e-06, + "loss": 0.63, + "step": 2391 + }, + { + "epoch": 0.17585649169239817, + "grad_norm": 0.8549087047576904, + "learning_rate": 4.990252046420193e-06, + "loss": 0.5795, + "step": 2392 + }, + { + "epoch": 0.17593001029260402, + "grad_norm": 0.9271152019500732, + "learning_rate": 4.990243544035031e-06, + "loss": 0.6191, + "step": 2393 + }, + { + "epoch": 0.17600352889280987, + "grad_norm": 0.9064239859580994, + "learning_rate": 4.9902350379507505e-06, + "loss": 0.6263, + "step": 2394 + }, + { + "epoch": 0.17607704749301573, + "grad_norm": 0.8648806214332581, + "learning_rate": 4.99022652816736e-06, + "loss": 0.5802, + "step": 2395 + }, + { + "epoch": 0.17615056609322158, + "grad_norm": 0.9319424033164978, + "learning_rate": 4.990218014684876e-06, + "loss": 0.6112, + "step": 2396 + }, + { + "epoch": 0.17622408469342743, + "grad_norm": 0.8879496455192566, + "learning_rate": 4.990209497503308e-06, + "loss": 0.6277, + "step": 2397 + }, + { + "epoch": 0.17629760329363328, + "grad_norm": 0.8807005882263184, + "learning_rate": 4.990200976622671e-06, + "loss": 0.6042, + "step": 2398 + }, + { + "epoch": 0.17637112189383913, + "grad_norm": 0.9048507213592529, + "learning_rate": 4.990192452042977e-06, + "loss": 0.6217, + "step": 2399 + }, + { + "epoch": 0.17644464049404499, + "grad_norm": 0.8553173542022705, + "learning_rate": 4.990183923764238e-06, + "loss": 0.6193, + "step": 2400 + }, + { + "epoch": 0.17651815909425084, + "grad_norm": 0.8444970846176147, + "learning_rate": 4.990175391786467e-06, + "loss": 0.5332, + "step": 2401 + }, + { + "epoch": 0.1765916776944567, + "grad_norm": 0.8631430864334106, + "learning_rate": 4.990166856109677e-06, + "loss": 0.607, + "step": 2402 + }, + { + "epoch": 0.17666519629466254, + "grad_norm": 0.9289126992225647, + "learning_rate": 4.9901583167338804e-06, + "loss": 0.5808, + "step": 2403 + }, + { + "epoch": 0.1767387148948684, + "grad_norm": 0.8223212957382202, + "learning_rate": 4.99014977365909e-06, + "loss": 0.5677, + "step": 2404 + }, + { + "epoch": 0.17681223349507424, + "grad_norm": 0.9148321747779846, + "learning_rate": 4.990141226885319e-06, + "loss": 0.6171, + "step": 2405 + }, + { + "epoch": 0.1768857520952801, + "grad_norm": 0.9254779815673828, + "learning_rate": 4.990132676412579e-06, + "loss": 0.5758, + "step": 2406 + }, + { + "epoch": 0.17695927069548595, + "grad_norm": 0.894390881061554, + "learning_rate": 4.990124122240883e-06, + "loss": 0.6058, + "step": 2407 + }, + { + "epoch": 0.1770327892956918, + "grad_norm": 0.8451379537582397, + "learning_rate": 4.990115564370245e-06, + "loss": 0.5965, + "step": 2408 + }, + { + "epoch": 0.17710630789589765, + "grad_norm": 0.9141919612884521, + "learning_rate": 4.990107002800676e-06, + "loss": 0.5951, + "step": 2409 + }, + { + "epoch": 0.1771798264961035, + "grad_norm": 0.9111067056655884, + "learning_rate": 4.990098437532189e-06, + "loss": 0.581, + "step": 2410 + }, + { + "epoch": 0.17725334509630936, + "grad_norm": 0.9003580212593079, + "learning_rate": 4.990089868564798e-06, + "loss": 0.6102, + "step": 2411 + }, + { + "epoch": 0.1773268636965152, + "grad_norm": 0.8515177965164185, + "learning_rate": 4.990081295898514e-06, + "loss": 0.599, + "step": 2412 + }, + { + "epoch": 0.17740038229672106, + "grad_norm": 0.8457432389259338, + "learning_rate": 4.990072719533352e-06, + "loss": 0.5894, + "step": 2413 + }, + { + "epoch": 0.1774739008969269, + "grad_norm": 0.9099202752113342, + "learning_rate": 4.990064139469322e-06, + "loss": 0.5956, + "step": 2414 + }, + { + "epoch": 0.17754741949713276, + "grad_norm": 0.9333937168121338, + "learning_rate": 4.990055555706438e-06, + "loss": 0.5938, + "step": 2415 + }, + { + "epoch": 0.17762093809733862, + "grad_norm": 0.8746470212936401, + "learning_rate": 4.9900469682447135e-06, + "loss": 0.6362, + "step": 2416 + }, + { + "epoch": 0.17769445669754447, + "grad_norm": 0.9189608097076416, + "learning_rate": 4.990038377084161e-06, + "loss": 0.6346, + "step": 2417 + }, + { + "epoch": 0.17776797529775032, + "grad_norm": 0.9496613144874573, + "learning_rate": 4.990029782224791e-06, + "loss": 0.6218, + "step": 2418 + }, + { + "epoch": 0.17784149389795617, + "grad_norm": 0.8789765238761902, + "learning_rate": 4.99002118366662e-06, + "loss": 0.6026, + "step": 2419 + }, + { + "epoch": 0.17791501249816202, + "grad_norm": 0.8483562469482422, + "learning_rate": 4.990012581409658e-06, + "loss": 0.5978, + "step": 2420 + }, + { + "epoch": 0.17798853109836787, + "grad_norm": 0.8861926794052124, + "learning_rate": 4.990003975453918e-06, + "loss": 0.5803, + "step": 2421 + }, + { + "epoch": 0.17806204969857373, + "grad_norm": 0.8607658743858337, + "learning_rate": 4.989995365799415e-06, + "loss": 0.5877, + "step": 2422 + }, + { + "epoch": 0.17813556829877958, + "grad_norm": 0.9125183820724487, + "learning_rate": 4.98998675244616e-06, + "loss": 0.6204, + "step": 2423 + }, + { + "epoch": 0.17820908689898543, + "grad_norm": 0.9738055467605591, + "learning_rate": 4.989978135394166e-06, + "loss": 0.6664, + "step": 2424 + }, + { + "epoch": 0.17828260549919128, + "grad_norm": 0.9459792971611023, + "learning_rate": 4.989969514643445e-06, + "loss": 0.5943, + "step": 2425 + }, + { + "epoch": 0.17835612409939713, + "grad_norm": 0.8890348672866821, + "learning_rate": 4.98996089019401e-06, + "loss": 0.5774, + "step": 2426 + }, + { + "epoch": 0.17842964269960299, + "grad_norm": 0.8732793927192688, + "learning_rate": 4.9899522620458765e-06, + "loss": 0.6422, + "step": 2427 + }, + { + "epoch": 0.17850316129980884, + "grad_norm": 0.8728112578392029, + "learning_rate": 4.989943630199054e-06, + "loss": 0.6004, + "step": 2428 + }, + { + "epoch": 0.17857667990001472, + "grad_norm": 0.9631345868110657, + "learning_rate": 4.989934994653557e-06, + "loss": 0.5547, + "step": 2429 + }, + { + "epoch": 0.17865019850022057, + "grad_norm": 0.8651347756385803, + "learning_rate": 4.9899263554093976e-06, + "loss": 0.5925, + "step": 2430 + }, + { + "epoch": 0.17872371710042642, + "grad_norm": 0.9396576285362244, + "learning_rate": 4.989917712466589e-06, + "loss": 0.6807, + "step": 2431 + }, + { + "epoch": 0.17879723570063227, + "grad_norm": 0.9034039378166199, + "learning_rate": 4.9899090658251435e-06, + "loss": 0.6495, + "step": 2432 + }, + { + "epoch": 0.17887075430083813, + "grad_norm": 0.8611977100372314, + "learning_rate": 4.989900415485075e-06, + "loss": 0.5626, + "step": 2433 + }, + { + "epoch": 0.17894427290104398, + "grad_norm": 0.8688853979110718, + "learning_rate": 4.989891761446396e-06, + "loss": 0.6002, + "step": 2434 + }, + { + "epoch": 0.17901779150124983, + "grad_norm": 0.9067410230636597, + "learning_rate": 4.989883103709119e-06, + "loss": 0.5948, + "step": 2435 + }, + { + "epoch": 0.17909131010145568, + "grad_norm": 0.9336304664611816, + "learning_rate": 4.989874442273257e-06, + "loss": 0.5636, + "step": 2436 + }, + { + "epoch": 0.17916482870166153, + "grad_norm": 0.8189940452575684, + "learning_rate": 4.989865777138823e-06, + "loss": 0.5911, + "step": 2437 + }, + { + "epoch": 0.17923834730186738, + "grad_norm": 0.9534480571746826, + "learning_rate": 4.989857108305829e-06, + "loss": 0.6233, + "step": 2438 + }, + { + "epoch": 0.17931186590207324, + "grad_norm": 0.865146279335022, + "learning_rate": 4.989848435774289e-06, + "loss": 0.5953, + "step": 2439 + }, + { + "epoch": 0.1793853845022791, + "grad_norm": 0.9044166207313538, + "learning_rate": 4.989839759544215e-06, + "loss": 0.596, + "step": 2440 + }, + { + "epoch": 0.17945890310248494, + "grad_norm": 0.8941745758056641, + "learning_rate": 4.989831079615622e-06, + "loss": 0.5693, + "step": 2441 + }, + { + "epoch": 0.1795324217026908, + "grad_norm": 0.948297917842865, + "learning_rate": 4.98982239598852e-06, + "loss": 0.5807, + "step": 2442 + }, + { + "epoch": 0.17960594030289664, + "grad_norm": 0.8424651622772217, + "learning_rate": 4.989813708662923e-06, + "loss": 0.6137, + "step": 2443 + }, + { + "epoch": 0.1796794589031025, + "grad_norm": 0.8820436596870422, + "learning_rate": 4.989805017638845e-06, + "loss": 0.5827, + "step": 2444 + }, + { + "epoch": 0.17975297750330835, + "grad_norm": 0.9259464740753174, + "learning_rate": 4.9897963229162974e-06, + "loss": 0.6371, + "step": 2445 + }, + { + "epoch": 0.1798264961035142, + "grad_norm": 0.8450630903244019, + "learning_rate": 4.989787624495293e-06, + "loss": 0.5352, + "step": 2446 + }, + { + "epoch": 0.17990001470372005, + "grad_norm": 0.897432267665863, + "learning_rate": 4.989778922375847e-06, + "loss": 0.6037, + "step": 2447 + }, + { + "epoch": 0.1799735333039259, + "grad_norm": 0.8953327536582947, + "learning_rate": 4.989770216557969e-06, + "loss": 0.5845, + "step": 2448 + }, + { + "epoch": 0.18004705190413176, + "grad_norm": 0.8439005613327026, + "learning_rate": 4.989761507041675e-06, + "loss": 0.5635, + "step": 2449 + }, + { + "epoch": 0.1801205705043376, + "grad_norm": 0.8458166718482971, + "learning_rate": 4.9897527938269754e-06, + "loss": 0.5747, + "step": 2450 + }, + { + "epoch": 0.18019408910454346, + "grad_norm": 0.8428983688354492, + "learning_rate": 4.989744076913886e-06, + "loss": 0.5693, + "step": 2451 + }, + { + "epoch": 0.1802676077047493, + "grad_norm": 0.882759153842926, + "learning_rate": 4.989735356302417e-06, + "loss": 0.5988, + "step": 2452 + }, + { + "epoch": 0.18034112630495516, + "grad_norm": 0.8962409496307373, + "learning_rate": 4.9897266319925824e-06, + "loss": 0.5928, + "step": 2453 + }, + { + "epoch": 0.18041464490516101, + "grad_norm": 0.897134006023407, + "learning_rate": 4.9897179039843955e-06, + "loss": 0.6071, + "step": 2454 + }, + { + "epoch": 0.18048816350536687, + "grad_norm": 0.9285306930541992, + "learning_rate": 4.989709172277869e-06, + "loss": 0.6074, + "step": 2455 + }, + { + "epoch": 0.18056168210557272, + "grad_norm": 0.8713551759719849, + "learning_rate": 4.989700436873016e-06, + "loss": 0.5915, + "step": 2456 + }, + { + "epoch": 0.18063520070577857, + "grad_norm": 0.8930342793464661, + "learning_rate": 4.989691697769849e-06, + "loss": 0.6085, + "step": 2457 + }, + { + "epoch": 0.18070871930598442, + "grad_norm": 0.966964602470398, + "learning_rate": 4.989682954968382e-06, + "loss": 0.6135, + "step": 2458 + }, + { + "epoch": 0.18078223790619027, + "grad_norm": 0.8786676526069641, + "learning_rate": 4.989674208468626e-06, + "loss": 0.6394, + "step": 2459 + }, + { + "epoch": 0.18085575650639613, + "grad_norm": 0.9559305310249329, + "learning_rate": 4.989665458270597e-06, + "loss": 0.6376, + "step": 2460 + }, + { + "epoch": 0.18092927510660198, + "grad_norm": 0.8584518432617188, + "learning_rate": 4.989656704374305e-06, + "loss": 0.5756, + "step": 2461 + }, + { + "epoch": 0.18100279370680783, + "grad_norm": 0.9630537629127502, + "learning_rate": 4.989647946779765e-06, + "loss": 0.5837, + "step": 2462 + }, + { + "epoch": 0.18107631230701368, + "grad_norm": 0.9187317490577698, + "learning_rate": 4.989639185486989e-06, + "loss": 0.6401, + "step": 2463 + }, + { + "epoch": 0.18114983090721953, + "grad_norm": 0.8610250949859619, + "learning_rate": 4.98963042049599e-06, + "loss": 0.5944, + "step": 2464 + }, + { + "epoch": 0.18122334950742539, + "grad_norm": 0.8966956734657288, + "learning_rate": 4.989621651806782e-06, + "loss": 0.6253, + "step": 2465 + }, + { + "epoch": 0.18129686810763124, + "grad_norm": 0.9416041374206543, + "learning_rate": 4.989612879419377e-06, + "loss": 0.6222, + "step": 2466 + }, + { + "epoch": 0.1813703867078371, + "grad_norm": 0.9207152128219604, + "learning_rate": 4.989604103333788e-06, + "loss": 0.5996, + "step": 2467 + }, + { + "epoch": 0.18144390530804294, + "grad_norm": 0.9200131297111511, + "learning_rate": 4.989595323550029e-06, + "loss": 0.5795, + "step": 2468 + }, + { + "epoch": 0.1815174239082488, + "grad_norm": 0.8814932107925415, + "learning_rate": 4.989586540068113e-06, + "loss": 0.5979, + "step": 2469 + }, + { + "epoch": 0.18159094250845464, + "grad_norm": 0.9509708285331726, + "learning_rate": 4.989577752888052e-06, + "loss": 0.6345, + "step": 2470 + }, + { + "epoch": 0.1816644611086605, + "grad_norm": 0.9463447332382202, + "learning_rate": 4.989568962009859e-06, + "loss": 0.6133, + "step": 2471 + }, + { + "epoch": 0.18173797970886635, + "grad_norm": 0.9491605162620544, + "learning_rate": 4.989560167433548e-06, + "loss": 0.6324, + "step": 2472 + }, + { + "epoch": 0.1818114983090722, + "grad_norm": 0.9412112236022949, + "learning_rate": 4.989551369159132e-06, + "loss": 0.6195, + "step": 2473 + }, + { + "epoch": 0.18188501690927805, + "grad_norm": 0.9131445288658142, + "learning_rate": 4.9895425671866236e-06, + "loss": 0.5468, + "step": 2474 + }, + { + "epoch": 0.1819585355094839, + "grad_norm": 0.9130856394767761, + "learning_rate": 4.9895337615160356e-06, + "loss": 0.5775, + "step": 2475 + }, + { + "epoch": 0.18203205410968976, + "grad_norm": 0.8583151698112488, + "learning_rate": 4.989524952147382e-06, + "loss": 0.6044, + "step": 2476 + }, + { + "epoch": 0.1821055727098956, + "grad_norm": 0.9059491157531738, + "learning_rate": 4.989516139080675e-06, + "loss": 0.6223, + "step": 2477 + }, + { + "epoch": 0.18217909131010146, + "grad_norm": 0.8627238273620605, + "learning_rate": 4.989507322315928e-06, + "loss": 0.5897, + "step": 2478 + }, + { + "epoch": 0.1822526099103073, + "grad_norm": 0.891129732131958, + "learning_rate": 4.989498501853155e-06, + "loss": 0.6286, + "step": 2479 + }, + { + "epoch": 0.18232612851051316, + "grad_norm": 0.8541082143783569, + "learning_rate": 4.989489677692368e-06, + "loss": 0.5721, + "step": 2480 + }, + { + "epoch": 0.18239964711071902, + "grad_norm": 0.8452786803245544, + "learning_rate": 4.98948084983358e-06, + "loss": 0.5727, + "step": 2481 + }, + { + "epoch": 0.18247316571092487, + "grad_norm": 0.9045311212539673, + "learning_rate": 4.9894720182768055e-06, + "loss": 0.5476, + "step": 2482 + }, + { + "epoch": 0.18254668431113072, + "grad_norm": 0.872664749622345, + "learning_rate": 4.989463183022055e-06, + "loss": 0.5623, + "step": 2483 + }, + { + "epoch": 0.18262020291133657, + "grad_norm": 0.9001846313476562, + "learning_rate": 4.989454344069345e-06, + "loss": 0.5934, + "step": 2484 + }, + { + "epoch": 0.18269372151154242, + "grad_norm": 0.8612085580825806, + "learning_rate": 4.989445501418686e-06, + "loss": 0.5922, + "step": 2485 + }, + { + "epoch": 0.18276724011174827, + "grad_norm": 0.9001266956329346, + "learning_rate": 4.9894366550700925e-06, + "loss": 0.6094, + "step": 2486 + }, + { + "epoch": 0.18284075871195413, + "grad_norm": 0.8582026362419128, + "learning_rate": 4.989427805023576e-06, + "loss": 0.6013, + "step": 2487 + }, + { + "epoch": 0.18291427731215998, + "grad_norm": 0.8638447523117065, + "learning_rate": 4.989418951279152e-06, + "loss": 0.615, + "step": 2488 + }, + { + "epoch": 0.18298779591236583, + "grad_norm": 0.8775068521499634, + "learning_rate": 4.989410093836831e-06, + "loss": 0.6196, + "step": 2489 + }, + { + "epoch": 0.18306131451257168, + "grad_norm": 0.8501842021942139, + "learning_rate": 4.98940123269663e-06, + "loss": 0.5708, + "step": 2490 + }, + { + "epoch": 0.18313483311277753, + "grad_norm": 0.8541299104690552, + "learning_rate": 4.9893923678585585e-06, + "loss": 0.5901, + "step": 2491 + }, + { + "epoch": 0.18320835171298339, + "grad_norm": 0.870822012424469, + "learning_rate": 4.989383499322631e-06, + "loss": 0.5867, + "step": 2492 + }, + { + "epoch": 0.18328187031318924, + "grad_norm": 0.9227132201194763, + "learning_rate": 4.98937462708886e-06, + "loss": 0.6089, + "step": 2493 + }, + { + "epoch": 0.1833553889133951, + "grad_norm": 0.9173308610916138, + "learning_rate": 4.98936575115726e-06, + "loss": 0.6253, + "step": 2494 + }, + { + "epoch": 0.18342890751360094, + "grad_norm": 0.8774897456169128, + "learning_rate": 4.989356871527844e-06, + "loss": 0.5495, + "step": 2495 + }, + { + "epoch": 0.1835024261138068, + "grad_norm": 0.9012879729270935, + "learning_rate": 4.989347988200624e-06, + "loss": 0.6047, + "step": 2496 + }, + { + "epoch": 0.18357594471401265, + "grad_norm": 0.8931599259376526, + "learning_rate": 4.989339101175613e-06, + "loss": 0.5673, + "step": 2497 + }, + { + "epoch": 0.1836494633142185, + "grad_norm": 0.853146493434906, + "learning_rate": 4.989330210452826e-06, + "loss": 0.5585, + "step": 2498 + }, + { + "epoch": 0.18372298191442435, + "grad_norm": 0.8902667760848999, + "learning_rate": 4.989321316032276e-06, + "loss": 0.5737, + "step": 2499 + }, + { + "epoch": 0.1837965005146302, + "grad_norm": 0.8451610207557678, + "learning_rate": 4.989312417913974e-06, + "loss": 0.6111, + "step": 2500 + }, + { + "epoch": 0.18387001911483605, + "grad_norm": 0.9037519097328186, + "learning_rate": 4.989303516097936e-06, + "loss": 0.6004, + "step": 2501 + }, + { + "epoch": 0.1839435377150419, + "grad_norm": 0.8669981956481934, + "learning_rate": 4.989294610584174e-06, + "loss": 0.6131, + "step": 2502 + }, + { + "epoch": 0.18401705631524776, + "grad_norm": 0.8625298738479614, + "learning_rate": 4.9892857013727e-06, + "loss": 0.6236, + "step": 2503 + }, + { + "epoch": 0.1840905749154536, + "grad_norm": 0.8598683476448059, + "learning_rate": 4.9892767884635285e-06, + "loss": 0.5724, + "step": 2504 + }, + { + "epoch": 0.18416409351565946, + "grad_norm": 0.8011195659637451, + "learning_rate": 4.989267871856674e-06, + "loss": 0.5696, + "step": 2505 + }, + { + "epoch": 0.1842376121158653, + "grad_norm": 0.8987377285957336, + "learning_rate": 4.989258951552147e-06, + "loss": 0.5897, + "step": 2506 + }, + { + "epoch": 0.18431113071607116, + "grad_norm": 0.8879187703132629, + "learning_rate": 4.989250027549962e-06, + "loss": 0.6418, + "step": 2507 + }, + { + "epoch": 0.18438464931627702, + "grad_norm": 0.9099470376968384, + "learning_rate": 4.989241099850134e-06, + "loss": 0.6022, + "step": 2508 + }, + { + "epoch": 0.18445816791648287, + "grad_norm": 0.9012265205383301, + "learning_rate": 4.989232168452673e-06, + "loss": 0.587, + "step": 2509 + }, + { + "epoch": 0.18453168651668872, + "grad_norm": 0.8395746350288391, + "learning_rate": 4.989223233357595e-06, + "loss": 0.5733, + "step": 2510 + }, + { + "epoch": 0.18460520511689457, + "grad_norm": 0.9631102681159973, + "learning_rate": 4.989214294564912e-06, + "loss": 0.613, + "step": 2511 + }, + { + "epoch": 0.18467872371710042, + "grad_norm": 0.8583738803863525, + "learning_rate": 4.989205352074637e-06, + "loss": 0.5731, + "step": 2512 + }, + { + "epoch": 0.18475224231730628, + "grad_norm": 0.8822098970413208, + "learning_rate": 4.989196405886783e-06, + "loss": 0.5796, + "step": 2513 + }, + { + "epoch": 0.18482576091751213, + "grad_norm": 0.881816029548645, + "learning_rate": 4.989187456001366e-06, + "loss": 0.5824, + "step": 2514 + }, + { + "epoch": 0.18489927951771798, + "grad_norm": 0.8460358381271362, + "learning_rate": 4.989178502418397e-06, + "loss": 0.5935, + "step": 2515 + }, + { + "epoch": 0.18497279811792383, + "grad_norm": 0.9002302289009094, + "learning_rate": 4.989169545137889e-06, + "loss": 0.557, + "step": 2516 + }, + { + "epoch": 0.18504631671812968, + "grad_norm": 0.9116310477256775, + "learning_rate": 4.989160584159855e-06, + "loss": 0.569, + "step": 2517 + }, + { + "epoch": 0.18511983531833553, + "grad_norm": 0.9254744052886963, + "learning_rate": 4.98915161948431e-06, + "loss": 0.6146, + "step": 2518 + }, + { + "epoch": 0.1851933539185414, + "grad_norm": 0.9000033736228943, + "learning_rate": 4.989142651111267e-06, + "loss": 0.6067, + "step": 2519 + }, + { + "epoch": 0.18526687251874724, + "grad_norm": 0.880547046661377, + "learning_rate": 4.98913367904074e-06, + "loss": 0.5912, + "step": 2520 + }, + { + "epoch": 0.1853403911189531, + "grad_norm": 0.8751773238182068, + "learning_rate": 4.989124703272739e-06, + "loss": 0.5911, + "step": 2521 + }, + { + "epoch": 0.18541390971915894, + "grad_norm": 0.8342462778091431, + "learning_rate": 4.989115723807281e-06, + "loss": 0.6035, + "step": 2522 + }, + { + "epoch": 0.1854874283193648, + "grad_norm": 0.8467423319816589, + "learning_rate": 4.9891067406443775e-06, + "loss": 0.5852, + "step": 2523 + }, + { + "epoch": 0.18556094691957065, + "grad_norm": 0.8829067349433899, + "learning_rate": 4.989097753784042e-06, + "loss": 0.6025, + "step": 2524 + }, + { + "epoch": 0.1856344655197765, + "grad_norm": 0.9242157340049744, + "learning_rate": 4.989088763226288e-06, + "loss": 0.6057, + "step": 2525 + }, + { + "epoch": 0.18570798411998235, + "grad_norm": 0.9409055113792419, + "learning_rate": 4.989079768971129e-06, + "loss": 0.6208, + "step": 2526 + }, + { + "epoch": 0.1857815027201882, + "grad_norm": 0.9059059023857117, + "learning_rate": 4.989070771018578e-06, + "loss": 0.6028, + "step": 2527 + }, + { + "epoch": 0.18585502132039405, + "grad_norm": 0.867737889289856, + "learning_rate": 4.989061769368649e-06, + "loss": 0.5648, + "step": 2528 + }, + { + "epoch": 0.1859285399205999, + "grad_norm": 0.8708767294883728, + "learning_rate": 4.989052764021356e-06, + "loss": 0.6213, + "step": 2529 + }, + { + "epoch": 0.18600205852080576, + "grad_norm": 0.8523142337799072, + "learning_rate": 4.98904375497671e-06, + "loss": 0.5846, + "step": 2530 + }, + { + "epoch": 0.1860755771210116, + "grad_norm": 0.8632274270057678, + "learning_rate": 4.989034742234726e-06, + "loss": 0.5941, + "step": 2531 + }, + { + "epoch": 0.18614909572121746, + "grad_norm": 0.9281796813011169, + "learning_rate": 4.989025725795418e-06, + "loss": 0.5541, + "step": 2532 + }, + { + "epoch": 0.1862226143214233, + "grad_norm": 0.8385058641433716, + "learning_rate": 4.9890167056587984e-06, + "loss": 0.5751, + "step": 2533 + }, + { + "epoch": 0.18629613292162917, + "grad_norm": 0.9312770366668701, + "learning_rate": 4.989007681824881e-06, + "loss": 0.5931, + "step": 2534 + }, + { + "epoch": 0.18636965152183502, + "grad_norm": 0.9087133407592773, + "learning_rate": 4.988998654293678e-06, + "loss": 0.5519, + "step": 2535 + }, + { + "epoch": 0.18644317012204087, + "grad_norm": 0.8752126693725586, + "learning_rate": 4.988989623065205e-06, + "loss": 0.6213, + "step": 2536 + }, + { + "epoch": 0.18651668872224672, + "grad_norm": 0.863163411617279, + "learning_rate": 4.988980588139473e-06, + "loss": 0.5941, + "step": 2537 + }, + { + "epoch": 0.18659020732245257, + "grad_norm": 0.8948654532432556, + "learning_rate": 4.988971549516497e-06, + "loss": 0.6151, + "step": 2538 + }, + { + "epoch": 0.18666372592265842, + "grad_norm": 0.9072282910346985, + "learning_rate": 4.98896250719629e-06, + "loss": 0.6405, + "step": 2539 + }, + { + "epoch": 0.18673724452286428, + "grad_norm": 0.8459285497665405, + "learning_rate": 4.988953461178866e-06, + "loss": 0.5733, + "step": 2540 + }, + { + "epoch": 0.18681076312307013, + "grad_norm": 0.9459330439567566, + "learning_rate": 4.988944411464237e-06, + "loss": 0.5813, + "step": 2541 + }, + { + "epoch": 0.18688428172327598, + "grad_norm": 0.9136485457420349, + "learning_rate": 4.988935358052418e-06, + "loss": 0.606, + "step": 2542 + }, + { + "epoch": 0.18695780032348183, + "grad_norm": 0.9442867040634155, + "learning_rate": 4.988926300943422e-06, + "loss": 0.626, + "step": 2543 + }, + { + "epoch": 0.18703131892368768, + "grad_norm": 0.8703967928886414, + "learning_rate": 4.988917240137262e-06, + "loss": 0.5881, + "step": 2544 + }, + { + "epoch": 0.18710483752389354, + "grad_norm": 0.8783220648765564, + "learning_rate": 4.988908175633952e-06, + "loss": 0.5767, + "step": 2545 + }, + { + "epoch": 0.1871783561240994, + "grad_norm": 0.874894380569458, + "learning_rate": 4.988899107433505e-06, + "loss": 0.6358, + "step": 2546 + }, + { + "epoch": 0.18725187472430524, + "grad_norm": 0.8401141166687012, + "learning_rate": 4.988890035535935e-06, + "loss": 0.6026, + "step": 2547 + }, + { + "epoch": 0.1873253933245111, + "grad_norm": 0.8773707151412964, + "learning_rate": 4.988880959941255e-06, + "loss": 0.6279, + "step": 2548 + }, + { + "epoch": 0.18739891192471694, + "grad_norm": 0.9141743779182434, + "learning_rate": 4.988871880649477e-06, + "loss": 0.5949, + "step": 2549 + }, + { + "epoch": 0.1874724305249228, + "grad_norm": 0.8952704668045044, + "learning_rate": 4.988862797660618e-06, + "loss": 0.6381, + "step": 2550 + }, + { + "epoch": 0.18754594912512865, + "grad_norm": 0.9135822653770447, + "learning_rate": 4.988853710974689e-06, + "loss": 0.5764, + "step": 2551 + }, + { + "epoch": 0.1876194677253345, + "grad_norm": 0.8858804106712341, + "learning_rate": 4.988844620591704e-06, + "loss": 0.6221, + "step": 2552 + }, + { + "epoch": 0.18769298632554035, + "grad_norm": 0.8510022759437561, + "learning_rate": 4.988835526511677e-06, + "loss": 0.6024, + "step": 2553 + }, + { + "epoch": 0.1877665049257462, + "grad_norm": 0.8742685914039612, + "learning_rate": 4.988826428734621e-06, + "loss": 0.5538, + "step": 2554 + }, + { + "epoch": 0.18784002352595205, + "grad_norm": 0.846047580242157, + "learning_rate": 4.98881732726055e-06, + "loss": 0.5558, + "step": 2555 + }, + { + "epoch": 0.1879135421261579, + "grad_norm": 0.8523429036140442, + "learning_rate": 4.988808222089476e-06, + "loss": 0.5948, + "step": 2556 + }, + { + "epoch": 0.18798706072636376, + "grad_norm": 0.9059728384017944, + "learning_rate": 4.9887991132214145e-06, + "loss": 0.6232, + "step": 2557 + }, + { + "epoch": 0.1880605793265696, + "grad_norm": 0.8722187280654907, + "learning_rate": 4.988790000656378e-06, + "loss": 0.5791, + "step": 2558 + }, + { + "epoch": 0.18813409792677546, + "grad_norm": 0.8306583166122437, + "learning_rate": 4.988780884394381e-06, + "loss": 0.5832, + "step": 2559 + }, + { + "epoch": 0.18820761652698131, + "grad_norm": 0.8355990052223206, + "learning_rate": 4.988771764435435e-06, + "loss": 0.5391, + "step": 2560 + }, + { + "epoch": 0.18828113512718717, + "grad_norm": 0.8922846913337708, + "learning_rate": 4.988762640779555e-06, + "loss": 0.6044, + "step": 2561 + }, + { + "epoch": 0.18835465372739302, + "grad_norm": 0.8420063853263855, + "learning_rate": 4.988753513426755e-06, + "loss": 0.5765, + "step": 2562 + }, + { + "epoch": 0.18842817232759887, + "grad_norm": 0.8467522859573364, + "learning_rate": 4.988744382377048e-06, + "loss": 0.5774, + "step": 2563 + }, + { + "epoch": 0.18850169092780472, + "grad_norm": 0.9658263325691223, + "learning_rate": 4.988735247630447e-06, + "loss": 0.6377, + "step": 2564 + }, + { + "epoch": 0.18857520952801057, + "grad_norm": 0.86518794298172, + "learning_rate": 4.988726109186966e-06, + "loss": 0.5949, + "step": 2565 + }, + { + "epoch": 0.18864872812821643, + "grad_norm": 0.8346620798110962, + "learning_rate": 4.9887169670466194e-06, + "loss": 0.5836, + "step": 2566 + }, + { + "epoch": 0.18872224672842228, + "grad_norm": 0.9003053903579712, + "learning_rate": 4.98870782120942e-06, + "loss": 0.5873, + "step": 2567 + }, + { + "epoch": 0.18879576532862816, + "grad_norm": 0.891181230545044, + "learning_rate": 4.98869867167538e-06, + "loss": 0.6018, + "step": 2568 + }, + { + "epoch": 0.188869283928834, + "grad_norm": 0.9380562901496887, + "learning_rate": 4.988689518444516e-06, + "loss": 0.5992, + "step": 2569 + }, + { + "epoch": 0.18894280252903986, + "grad_norm": 0.8686289191246033, + "learning_rate": 4.98868036151684e-06, + "loss": 0.6016, + "step": 2570 + }, + { + "epoch": 0.1890163211292457, + "grad_norm": 0.8648205995559692, + "learning_rate": 4.988671200892364e-06, + "loss": 0.6033, + "step": 2571 + }, + { + "epoch": 0.18908983972945156, + "grad_norm": 0.9180740714073181, + "learning_rate": 4.9886620365711045e-06, + "loss": 0.6026, + "step": 2572 + }, + { + "epoch": 0.18916335832965742, + "grad_norm": 0.8255365490913391, + "learning_rate": 4.9886528685530735e-06, + "loss": 0.5764, + "step": 2573 + }, + { + "epoch": 0.18923687692986327, + "grad_norm": 0.9202641248703003, + "learning_rate": 4.988643696838285e-06, + "loss": 0.6195, + "step": 2574 + }, + { + "epoch": 0.18931039553006912, + "grad_norm": 0.9118548631668091, + "learning_rate": 4.988634521426752e-06, + "loss": 0.5974, + "step": 2575 + }, + { + "epoch": 0.18938391413027497, + "grad_norm": 0.8541158437728882, + "learning_rate": 4.988625342318489e-06, + "loss": 0.6063, + "step": 2576 + }, + { + "epoch": 0.18945743273048082, + "grad_norm": 0.8646546006202698, + "learning_rate": 4.988616159513509e-06, + "loss": 0.6138, + "step": 2577 + }, + { + "epoch": 0.18953095133068668, + "grad_norm": 0.872343122959137, + "learning_rate": 4.988606973011828e-06, + "loss": 0.5784, + "step": 2578 + }, + { + "epoch": 0.18960446993089253, + "grad_norm": 0.8706567287445068, + "learning_rate": 4.9885977828134555e-06, + "loss": 0.5801, + "step": 2579 + }, + { + "epoch": 0.18967798853109838, + "grad_norm": 0.839758038520813, + "learning_rate": 4.988588588918408e-06, + "loss": 0.5517, + "step": 2580 + }, + { + "epoch": 0.18975150713130423, + "grad_norm": 0.8662402033805847, + "learning_rate": 4.9885793913266975e-06, + "loss": 0.5829, + "step": 2581 + }, + { + "epoch": 0.18982502573151008, + "grad_norm": 0.8654571771621704, + "learning_rate": 4.98857019003834e-06, + "loss": 0.593, + "step": 2582 + }, + { + "epoch": 0.18989854433171593, + "grad_norm": 0.9083172678947449, + "learning_rate": 4.988560985053347e-06, + "loss": 0.5873, + "step": 2583 + }, + { + "epoch": 0.1899720629319218, + "grad_norm": 0.8783074617385864, + "learning_rate": 4.988551776371733e-06, + "loss": 0.5738, + "step": 2584 + }, + { + "epoch": 0.19004558153212764, + "grad_norm": 0.8527397513389587, + "learning_rate": 4.988542563993511e-06, + "loss": 0.5628, + "step": 2585 + }, + { + "epoch": 0.1901191001323335, + "grad_norm": 0.8894096612930298, + "learning_rate": 4.988533347918697e-06, + "loss": 0.6141, + "step": 2586 + }, + { + "epoch": 0.19019261873253934, + "grad_norm": 0.8999924063682556, + "learning_rate": 4.988524128147302e-06, + "loss": 0.6258, + "step": 2587 + }, + { + "epoch": 0.1902661373327452, + "grad_norm": 0.8599125742912292, + "learning_rate": 4.98851490467934e-06, + "loss": 0.6061, + "step": 2588 + }, + { + "epoch": 0.19033965593295105, + "grad_norm": 0.8749895691871643, + "learning_rate": 4.9885056775148265e-06, + "loss": 0.5841, + "step": 2589 + }, + { + "epoch": 0.1904131745331569, + "grad_norm": 0.8931896090507507, + "learning_rate": 4.988496446653774e-06, + "loss": 0.6581, + "step": 2590 + }, + { + "epoch": 0.19048669313336275, + "grad_norm": 0.8533226251602173, + "learning_rate": 4.988487212096195e-06, + "loss": 0.5655, + "step": 2591 + }, + { + "epoch": 0.1905602117335686, + "grad_norm": 0.9001132845878601, + "learning_rate": 4.988477973842105e-06, + "loss": 0.6462, + "step": 2592 + }, + { + "epoch": 0.19063373033377445, + "grad_norm": 0.8442355394363403, + "learning_rate": 4.988468731891518e-06, + "loss": 0.6012, + "step": 2593 + }, + { + "epoch": 0.1907072489339803, + "grad_norm": 0.8708751201629639, + "learning_rate": 4.9884594862444465e-06, + "loss": 0.5707, + "step": 2594 + }, + { + "epoch": 0.19078076753418616, + "grad_norm": 0.9158522486686707, + "learning_rate": 4.988450236900904e-06, + "loss": 0.6291, + "step": 2595 + }, + { + "epoch": 0.190854286134392, + "grad_norm": 1.0337013006210327, + "learning_rate": 4.988440983860907e-06, + "loss": 0.6591, + "step": 2596 + }, + { + "epoch": 0.19092780473459786, + "grad_norm": 0.8590973615646362, + "learning_rate": 4.9884317271244655e-06, + "loss": 0.5874, + "step": 2597 + }, + { + "epoch": 0.1910013233348037, + "grad_norm": 0.9012556076049805, + "learning_rate": 4.988422466691595e-06, + "loss": 0.6257, + "step": 2598 + }, + { + "epoch": 0.19107484193500957, + "grad_norm": 0.9264505505561829, + "learning_rate": 4.98841320256231e-06, + "loss": 0.6282, + "step": 2599 + }, + { + "epoch": 0.19114836053521542, + "grad_norm": 0.9238608479499817, + "learning_rate": 4.9884039347366235e-06, + "loss": 0.618, + "step": 2600 + }, + { + "epoch": 0.19122187913542127, + "grad_norm": 0.9103981852531433, + "learning_rate": 4.988394663214548e-06, + "loss": 0.5988, + "step": 2601 + }, + { + "epoch": 0.19129539773562712, + "grad_norm": 0.9141647219657898, + "learning_rate": 4.988385387996099e-06, + "loss": 0.5643, + "step": 2602 + }, + { + "epoch": 0.19136891633583297, + "grad_norm": 0.8671426177024841, + "learning_rate": 4.98837610908129e-06, + "loss": 0.5919, + "step": 2603 + }, + { + "epoch": 0.19144243493603882, + "grad_norm": 1.0325409173965454, + "learning_rate": 4.988366826470135e-06, + "loss": 0.6427, + "step": 2604 + }, + { + "epoch": 0.19151595353624468, + "grad_norm": 0.8769060373306274, + "learning_rate": 4.988357540162646e-06, + "loss": 0.574, + "step": 2605 + }, + { + "epoch": 0.19158947213645053, + "grad_norm": 0.9534289836883545, + "learning_rate": 4.988348250158839e-06, + "loss": 0.5726, + "step": 2606 + }, + { + "epoch": 0.19166299073665638, + "grad_norm": 0.8943125009536743, + "learning_rate": 4.988338956458727e-06, + "loss": 0.5535, + "step": 2607 + }, + { + "epoch": 0.19173650933686223, + "grad_norm": 0.857146143913269, + "learning_rate": 4.988329659062323e-06, + "loss": 0.5731, + "step": 2608 + }, + { + "epoch": 0.19181002793706808, + "grad_norm": 0.8532823920249939, + "learning_rate": 4.988320357969643e-06, + "loss": 0.6132, + "step": 2609 + }, + { + "epoch": 0.19188354653727394, + "grad_norm": 0.9207832217216492, + "learning_rate": 4.988311053180698e-06, + "loss": 0.6196, + "step": 2610 + }, + { + "epoch": 0.1919570651374798, + "grad_norm": 0.8725190162658691, + "learning_rate": 4.988301744695504e-06, + "loss": 0.6183, + "step": 2611 + }, + { + "epoch": 0.19203058373768564, + "grad_norm": 0.8911886215209961, + "learning_rate": 4.9882924325140734e-06, + "loss": 0.5852, + "step": 2612 + }, + { + "epoch": 0.1921041023378915, + "grad_norm": 0.9855916500091553, + "learning_rate": 4.988283116636421e-06, + "loss": 0.638, + "step": 2613 + }, + { + "epoch": 0.19217762093809734, + "grad_norm": 0.8922145962715149, + "learning_rate": 4.988273797062561e-06, + "loss": 0.6032, + "step": 2614 + }, + { + "epoch": 0.1922511395383032, + "grad_norm": 0.8532742261886597, + "learning_rate": 4.9882644737925056e-06, + "loss": 0.598, + "step": 2615 + }, + { + "epoch": 0.19232465813850905, + "grad_norm": 0.9086215496063232, + "learning_rate": 4.9882551468262695e-06, + "loss": 0.5741, + "step": 2616 + }, + { + "epoch": 0.1923981767387149, + "grad_norm": 0.8509706258773804, + "learning_rate": 4.988245816163867e-06, + "loss": 0.5703, + "step": 2617 + }, + { + "epoch": 0.19247169533892075, + "grad_norm": 0.8713697791099548, + "learning_rate": 4.988236481805311e-06, + "loss": 0.5527, + "step": 2618 + }, + { + "epoch": 0.1925452139391266, + "grad_norm": 0.8427676558494568, + "learning_rate": 4.988227143750617e-06, + "loss": 0.5702, + "step": 2619 + }, + { + "epoch": 0.19261873253933245, + "grad_norm": 0.8882177472114563, + "learning_rate": 4.9882178019997975e-06, + "loss": 0.6111, + "step": 2620 + }, + { + "epoch": 0.1926922511395383, + "grad_norm": 0.8494210839271545, + "learning_rate": 4.988208456552866e-06, + "loss": 0.6287, + "step": 2621 + }, + { + "epoch": 0.19276576973974416, + "grad_norm": 0.8781957030296326, + "learning_rate": 4.988199107409838e-06, + "loss": 0.5681, + "step": 2622 + }, + { + "epoch": 0.19283928833995, + "grad_norm": 0.8840944766998291, + "learning_rate": 4.9881897545707256e-06, + "loss": 0.5869, + "step": 2623 + }, + { + "epoch": 0.19291280694015586, + "grad_norm": 0.8604236245155334, + "learning_rate": 4.988180398035543e-06, + "loss": 0.6069, + "step": 2624 + }, + { + "epoch": 0.19298632554036171, + "grad_norm": 0.8765935301780701, + "learning_rate": 4.988171037804307e-06, + "loss": 0.618, + "step": 2625 + }, + { + "epoch": 0.19305984414056757, + "grad_norm": 0.9026581645011902, + "learning_rate": 4.988161673877027e-06, + "loss": 0.6679, + "step": 2626 + }, + { + "epoch": 0.19313336274077342, + "grad_norm": 0.8511289954185486, + "learning_rate": 4.988152306253721e-06, + "loss": 0.5424, + "step": 2627 + }, + { + "epoch": 0.19320688134097927, + "grad_norm": 0.9067777395248413, + "learning_rate": 4.988142934934399e-06, + "loss": 0.5913, + "step": 2628 + }, + { + "epoch": 0.19328039994118512, + "grad_norm": 0.8907544612884521, + "learning_rate": 4.988133559919078e-06, + "loss": 0.5874, + "step": 2629 + }, + { + "epoch": 0.19335391854139097, + "grad_norm": 0.8770014643669128, + "learning_rate": 4.9881241812077705e-06, + "loss": 0.5668, + "step": 2630 + }, + { + "epoch": 0.19342743714159683, + "grad_norm": 0.8942950963973999, + "learning_rate": 4.988114798800491e-06, + "loss": 0.5885, + "step": 2631 + }, + { + "epoch": 0.19350095574180268, + "grad_norm": 0.8935433626174927, + "learning_rate": 4.988105412697254e-06, + "loss": 0.6067, + "step": 2632 + }, + { + "epoch": 0.19357447434200853, + "grad_norm": 0.8895753026008606, + "learning_rate": 4.988096022898072e-06, + "loss": 0.6039, + "step": 2633 + }, + { + "epoch": 0.19364799294221438, + "grad_norm": 0.83304363489151, + "learning_rate": 4.988086629402959e-06, + "loss": 0.5867, + "step": 2634 + }, + { + "epoch": 0.19372151154242023, + "grad_norm": 0.8779156804084778, + "learning_rate": 4.9880772322119295e-06, + "loss": 0.6409, + "step": 2635 + }, + { + "epoch": 0.19379503014262608, + "grad_norm": 0.9228371977806091, + "learning_rate": 4.9880678313249985e-06, + "loss": 0.5983, + "step": 2636 + }, + { + "epoch": 0.19386854874283194, + "grad_norm": 0.8460370898246765, + "learning_rate": 4.988058426742178e-06, + "loss": 0.5737, + "step": 2637 + }, + { + "epoch": 0.1939420673430378, + "grad_norm": 0.8395795822143555, + "learning_rate": 4.988049018463484e-06, + "loss": 0.583, + "step": 2638 + }, + { + "epoch": 0.19401558594324364, + "grad_norm": 0.9002264142036438, + "learning_rate": 4.988039606488929e-06, + "loss": 0.5817, + "step": 2639 + }, + { + "epoch": 0.1940891045434495, + "grad_norm": 0.8253484964370728, + "learning_rate": 4.988030190818527e-06, + "loss": 0.5157, + "step": 2640 + }, + { + "epoch": 0.19416262314365534, + "grad_norm": 0.8659694194793701, + "learning_rate": 4.988020771452292e-06, + "loss": 0.5868, + "step": 2641 + }, + { + "epoch": 0.1942361417438612, + "grad_norm": 0.8803070783615112, + "learning_rate": 4.9880113483902395e-06, + "loss": 0.5931, + "step": 2642 + }, + { + "epoch": 0.19430966034406705, + "grad_norm": 0.8176282048225403, + "learning_rate": 4.988001921632382e-06, + "loss": 0.5369, + "step": 2643 + }, + { + "epoch": 0.1943831789442729, + "grad_norm": 0.9162812232971191, + "learning_rate": 4.987992491178734e-06, + "loss": 0.6029, + "step": 2644 + }, + { + "epoch": 0.19445669754447875, + "grad_norm": 0.8315905332565308, + "learning_rate": 4.987983057029308e-06, + "loss": 0.5844, + "step": 2645 + }, + { + "epoch": 0.1945302161446846, + "grad_norm": 0.856306791305542, + "learning_rate": 4.987973619184122e-06, + "loss": 0.6189, + "step": 2646 + }, + { + "epoch": 0.19460373474489046, + "grad_norm": 0.8714321255683899, + "learning_rate": 4.9879641776431855e-06, + "loss": 0.5862, + "step": 2647 + }, + { + "epoch": 0.1946772533450963, + "grad_norm": 0.8214387893676758, + "learning_rate": 4.987954732406515e-06, + "loss": 0.5523, + "step": 2648 + }, + { + "epoch": 0.19475077194530216, + "grad_norm": 0.8283721804618835, + "learning_rate": 4.9879452834741235e-06, + "loss": 0.5234, + "step": 2649 + }, + { + "epoch": 0.194824290545508, + "grad_norm": 0.8722845911979675, + "learning_rate": 4.9879358308460265e-06, + "loss": 0.6394, + "step": 2650 + }, + { + "epoch": 0.19489780914571386, + "grad_norm": 0.8758174180984497, + "learning_rate": 4.987926374522237e-06, + "loss": 0.5859, + "step": 2651 + }, + { + "epoch": 0.19497132774591971, + "grad_norm": 0.9071541428565979, + "learning_rate": 4.9879169145027686e-06, + "loss": 0.6302, + "step": 2652 + }, + { + "epoch": 0.19504484634612557, + "grad_norm": 0.9066377282142639, + "learning_rate": 4.9879074507876355e-06, + "loss": 0.5723, + "step": 2653 + }, + { + "epoch": 0.19511836494633142, + "grad_norm": 0.9006228446960449, + "learning_rate": 4.987897983376853e-06, + "loss": 0.5805, + "step": 2654 + }, + { + "epoch": 0.19519188354653727, + "grad_norm": 0.9103559851646423, + "learning_rate": 4.987888512270434e-06, + "loss": 0.5854, + "step": 2655 + }, + { + "epoch": 0.19526540214674312, + "grad_norm": 0.9021365642547607, + "learning_rate": 4.9878790374683925e-06, + "loss": 0.5621, + "step": 2656 + }, + { + "epoch": 0.19533892074694897, + "grad_norm": 0.8608340620994568, + "learning_rate": 4.987869558970743e-06, + "loss": 0.6076, + "step": 2657 + }, + { + "epoch": 0.19541243934715483, + "grad_norm": 0.8844380974769592, + "learning_rate": 4.987860076777499e-06, + "loss": 0.6084, + "step": 2658 + }, + { + "epoch": 0.19548595794736068, + "grad_norm": 0.9069132208824158, + "learning_rate": 4.987850590888676e-06, + "loss": 0.566, + "step": 2659 + }, + { + "epoch": 0.19555947654756653, + "grad_norm": 0.879303514957428, + "learning_rate": 4.987841101304287e-06, + "loss": 0.5777, + "step": 2660 + }, + { + "epoch": 0.19563299514777238, + "grad_norm": 0.9085049629211426, + "learning_rate": 4.9878316080243465e-06, + "loss": 0.6158, + "step": 2661 + }, + { + "epoch": 0.19570651374797823, + "grad_norm": 0.909247100353241, + "learning_rate": 4.987822111048867e-06, + "loss": 0.6056, + "step": 2662 + }, + { + "epoch": 0.19578003234818409, + "grad_norm": 0.8882083296775818, + "learning_rate": 4.987812610377865e-06, + "loss": 0.5636, + "step": 2663 + }, + { + "epoch": 0.19585355094838994, + "grad_norm": 0.8745611906051636, + "learning_rate": 4.9878031060113544e-06, + "loss": 0.5411, + "step": 2664 + }, + { + "epoch": 0.1959270695485958, + "grad_norm": 0.8409385085105896, + "learning_rate": 4.987793597949347e-06, + "loss": 0.5917, + "step": 2665 + }, + { + "epoch": 0.19600058814880164, + "grad_norm": 0.8693474531173706, + "learning_rate": 4.987784086191859e-06, + "loss": 0.6224, + "step": 2666 + }, + { + "epoch": 0.1960741067490075, + "grad_norm": 0.8576704859733582, + "learning_rate": 4.987774570738905e-06, + "loss": 0.568, + "step": 2667 + }, + { + "epoch": 0.19614762534921334, + "grad_norm": 0.8883634805679321, + "learning_rate": 4.987765051590497e-06, + "loss": 0.6432, + "step": 2668 + }, + { + "epoch": 0.1962211439494192, + "grad_norm": 0.9038800001144409, + "learning_rate": 4.98775552874665e-06, + "loss": 0.63, + "step": 2669 + }, + { + "epoch": 0.19629466254962505, + "grad_norm": 0.931391179561615, + "learning_rate": 4.9877460022073786e-06, + "loss": 0.6006, + "step": 2670 + }, + { + "epoch": 0.1963681811498309, + "grad_norm": 0.8632233142852783, + "learning_rate": 4.9877364719726974e-06, + "loss": 0.5639, + "step": 2671 + }, + { + "epoch": 0.19644169975003675, + "grad_norm": 0.8567810654640198, + "learning_rate": 4.98772693804262e-06, + "loss": 0.5861, + "step": 2672 + }, + { + "epoch": 0.1965152183502426, + "grad_norm": 0.9154455661773682, + "learning_rate": 4.987717400417159e-06, + "loss": 0.6196, + "step": 2673 + }, + { + "epoch": 0.19658873695044846, + "grad_norm": 0.8693529367446899, + "learning_rate": 4.987707859096331e-06, + "loss": 0.594, + "step": 2674 + }, + { + "epoch": 0.1966622555506543, + "grad_norm": 0.884856641292572, + "learning_rate": 4.98769831408015e-06, + "loss": 0.5891, + "step": 2675 + }, + { + "epoch": 0.19673577415086016, + "grad_norm": 0.9323624968528748, + "learning_rate": 4.987688765368628e-06, + "loss": 0.6183, + "step": 2676 + }, + { + "epoch": 0.196809292751066, + "grad_norm": 0.912138819694519, + "learning_rate": 4.9876792129617814e-06, + "loss": 0.5778, + "step": 2677 + }, + { + "epoch": 0.19688281135127186, + "grad_norm": 0.869387686252594, + "learning_rate": 4.987669656859623e-06, + "loss": 0.6047, + "step": 2678 + }, + { + "epoch": 0.19695632995147772, + "grad_norm": 0.8681856989860535, + "learning_rate": 4.987660097062167e-06, + "loss": 0.5998, + "step": 2679 + }, + { + "epoch": 0.19702984855168357, + "grad_norm": 0.8470371961593628, + "learning_rate": 4.9876505335694295e-06, + "loss": 0.6106, + "step": 2680 + }, + { + "epoch": 0.19710336715188942, + "grad_norm": 0.8327966928482056, + "learning_rate": 4.987640966381423e-06, + "loss": 0.5768, + "step": 2681 + }, + { + "epoch": 0.19717688575209527, + "grad_norm": 0.9049786925315857, + "learning_rate": 4.987631395498161e-06, + "loss": 0.624, + "step": 2682 + }, + { + "epoch": 0.19725040435230112, + "grad_norm": 0.8897273540496826, + "learning_rate": 4.98762182091966e-06, + "loss": 0.5546, + "step": 2683 + }, + { + "epoch": 0.19732392295250697, + "grad_norm": 0.8656709790229797, + "learning_rate": 4.9876122426459326e-06, + "loss": 0.5433, + "step": 2684 + }, + { + "epoch": 0.19739744155271283, + "grad_norm": 0.8899847865104675, + "learning_rate": 4.987602660676993e-06, + "loss": 0.5799, + "step": 2685 + }, + { + "epoch": 0.19747096015291868, + "grad_norm": 0.8772549033164978, + "learning_rate": 4.987593075012857e-06, + "loss": 0.5728, + "step": 2686 + }, + { + "epoch": 0.19754447875312453, + "grad_norm": 0.8847360610961914, + "learning_rate": 4.987583485653537e-06, + "loss": 0.6075, + "step": 2687 + }, + { + "epoch": 0.19761799735333038, + "grad_norm": 0.8706080913543701, + "learning_rate": 4.987573892599048e-06, + "loss": 0.5532, + "step": 2688 + }, + { + "epoch": 0.19769151595353623, + "grad_norm": 0.8978468775749207, + "learning_rate": 4.987564295849404e-06, + "loss": 0.646, + "step": 2689 + }, + { + "epoch": 0.1977650345537421, + "grad_norm": 0.8574922680854797, + "learning_rate": 4.987554695404619e-06, + "loss": 0.5712, + "step": 2690 + }, + { + "epoch": 0.19783855315394794, + "grad_norm": 0.8781751394271851, + "learning_rate": 4.987545091264709e-06, + "loss": 0.5659, + "step": 2691 + }, + { + "epoch": 0.1979120717541538, + "grad_norm": 0.8593193292617798, + "learning_rate": 4.9875354834296864e-06, + "loss": 0.5963, + "step": 2692 + }, + { + "epoch": 0.19798559035435964, + "grad_norm": 0.8615499138832092, + "learning_rate": 4.987525871899566e-06, + "loss": 0.592, + "step": 2693 + }, + { + "epoch": 0.1980591089545655, + "grad_norm": 0.9067341089248657, + "learning_rate": 4.987516256674362e-06, + "loss": 0.6109, + "step": 2694 + }, + { + "epoch": 0.19813262755477135, + "grad_norm": 0.8639235496520996, + "learning_rate": 4.987506637754089e-06, + "loss": 0.5506, + "step": 2695 + }, + { + "epoch": 0.1982061461549772, + "grad_norm": 0.8792172074317932, + "learning_rate": 4.987497015138762e-06, + "loss": 0.5818, + "step": 2696 + }, + { + "epoch": 0.19827966475518305, + "grad_norm": 0.9385961890220642, + "learning_rate": 4.987487388828393e-06, + "loss": 0.5928, + "step": 2697 + }, + { + "epoch": 0.1983531833553889, + "grad_norm": 0.865451991558075, + "learning_rate": 4.987477758822998e-06, + "loss": 0.5265, + "step": 2698 + }, + { + "epoch": 0.19842670195559475, + "grad_norm": 0.843689501285553, + "learning_rate": 4.987468125122591e-06, + "loss": 0.5986, + "step": 2699 + }, + { + "epoch": 0.1985002205558006, + "grad_norm": 0.9168558120727539, + "learning_rate": 4.987458487727186e-06, + "loss": 0.6505, + "step": 2700 + }, + { + "epoch": 0.19857373915600646, + "grad_norm": 0.932747483253479, + "learning_rate": 4.987448846636798e-06, + "loss": 0.5651, + "step": 2701 + }, + { + "epoch": 0.1986472577562123, + "grad_norm": 0.8644194006919861, + "learning_rate": 4.987439201851441e-06, + "loss": 0.5795, + "step": 2702 + }, + { + "epoch": 0.19872077635641816, + "grad_norm": 0.882186233997345, + "learning_rate": 4.987429553371129e-06, + "loss": 0.5714, + "step": 2703 + }, + { + "epoch": 0.198794294956624, + "grad_norm": 0.8518140912055969, + "learning_rate": 4.987419901195877e-06, + "loss": 0.579, + "step": 2704 + }, + { + "epoch": 0.19886781355682986, + "grad_norm": 0.8845393657684326, + "learning_rate": 4.987410245325699e-06, + "loss": 0.6243, + "step": 2705 + }, + { + "epoch": 0.19894133215703572, + "grad_norm": 0.8433366417884827, + "learning_rate": 4.987400585760609e-06, + "loss": 0.5695, + "step": 2706 + }, + { + "epoch": 0.1990148507572416, + "grad_norm": 0.858320951461792, + "learning_rate": 4.9873909225006225e-06, + "loss": 0.5677, + "step": 2707 + }, + { + "epoch": 0.19908836935744745, + "grad_norm": 0.873680830001831, + "learning_rate": 4.987381255545752e-06, + "loss": 0.5856, + "step": 2708 + }, + { + "epoch": 0.1991618879576533, + "grad_norm": 0.8592490553855896, + "learning_rate": 4.987371584896013e-06, + "loss": 0.5989, + "step": 2709 + }, + { + "epoch": 0.19923540655785915, + "grad_norm": 0.9376903176307678, + "learning_rate": 4.98736191055142e-06, + "loss": 0.6582, + "step": 2710 + }, + { + "epoch": 0.199308925158065, + "grad_norm": 0.8596504330635071, + "learning_rate": 4.987352232511987e-06, + "loss": 0.583, + "step": 2711 + }, + { + "epoch": 0.19938244375827086, + "grad_norm": 0.891355574131012, + "learning_rate": 4.987342550777728e-06, + "loss": 0.6232, + "step": 2712 + }, + { + "epoch": 0.1994559623584767, + "grad_norm": 0.8858441114425659, + "learning_rate": 4.987332865348658e-06, + "loss": 0.5984, + "step": 2713 + }, + { + "epoch": 0.19952948095868256, + "grad_norm": 0.8299402594566345, + "learning_rate": 4.987323176224791e-06, + "loss": 0.5675, + "step": 2714 + }, + { + "epoch": 0.1996029995588884, + "grad_norm": 0.907468318939209, + "learning_rate": 4.9873134834061425e-06, + "loss": 0.6079, + "step": 2715 + }, + { + "epoch": 0.19967651815909426, + "grad_norm": 0.9128381609916687, + "learning_rate": 4.987303786892726e-06, + "loss": 0.58, + "step": 2716 + }, + { + "epoch": 0.19975003675930011, + "grad_norm": 0.8515064716339111, + "learning_rate": 4.9872940866845555e-06, + "loss": 0.6167, + "step": 2717 + }, + { + "epoch": 0.19982355535950597, + "grad_norm": 0.8740354776382446, + "learning_rate": 4.987284382781645e-06, + "loss": 0.5739, + "step": 2718 + }, + { + "epoch": 0.19989707395971182, + "grad_norm": 0.9444685578346252, + "learning_rate": 4.987274675184011e-06, + "loss": 0.5825, + "step": 2719 + }, + { + "epoch": 0.19997059255991767, + "grad_norm": 0.9138668179512024, + "learning_rate": 4.987264963891666e-06, + "loss": 0.6411, + "step": 2720 + }, + { + "epoch": 0.20004411116012352, + "grad_norm": 0.9336073398590088, + "learning_rate": 4.987255248904625e-06, + "loss": 0.5894, + "step": 2721 + }, + { + "epoch": 0.20011762976032937, + "grad_norm": 0.8767333626747131, + "learning_rate": 4.9872455302229036e-06, + "loss": 0.6135, + "step": 2722 + }, + { + "epoch": 0.20019114836053523, + "grad_norm": 0.870988667011261, + "learning_rate": 4.987235807846514e-06, + "loss": 0.608, + "step": 2723 + }, + { + "epoch": 0.20026466696074108, + "grad_norm": 0.8933121562004089, + "learning_rate": 4.9872260817754726e-06, + "loss": 0.5931, + "step": 2724 + }, + { + "epoch": 0.20033818556094693, + "grad_norm": 0.8621559739112854, + "learning_rate": 4.987216352009793e-06, + "loss": 0.605, + "step": 2725 + }, + { + "epoch": 0.20041170416115278, + "grad_norm": 0.947706401348114, + "learning_rate": 4.987206618549489e-06, + "loss": 0.6207, + "step": 2726 + }, + { + "epoch": 0.20048522276135863, + "grad_norm": 0.8728081583976746, + "learning_rate": 4.987196881394576e-06, + "loss": 0.6054, + "step": 2727 + }, + { + "epoch": 0.20055874136156449, + "grad_norm": 0.9121400117874146, + "learning_rate": 4.987187140545069e-06, + "loss": 0.6209, + "step": 2728 + }, + { + "epoch": 0.20063225996177034, + "grad_norm": 0.9378294944763184, + "learning_rate": 4.987177396000981e-06, + "loss": 0.5931, + "step": 2729 + }, + { + "epoch": 0.2007057785619762, + "grad_norm": 0.885489284992218, + "learning_rate": 4.9871676477623275e-06, + "loss": 0.5978, + "step": 2730 + }, + { + "epoch": 0.20077929716218204, + "grad_norm": 0.8750812411308289, + "learning_rate": 4.987157895829122e-06, + "loss": 0.588, + "step": 2731 + }, + { + "epoch": 0.2008528157623879, + "grad_norm": 0.9221212267875671, + "learning_rate": 4.98714814020138e-06, + "loss": 0.6606, + "step": 2732 + }, + { + "epoch": 0.20092633436259374, + "grad_norm": 0.8449889421463013, + "learning_rate": 4.987138380879116e-06, + "loss": 0.5624, + "step": 2733 + }, + { + "epoch": 0.2009998529627996, + "grad_norm": 0.8910398483276367, + "learning_rate": 4.987128617862343e-06, + "loss": 0.6127, + "step": 2734 + }, + { + "epoch": 0.20107337156300545, + "grad_norm": 0.9344319105148315, + "learning_rate": 4.9871188511510784e-06, + "loss": 0.6188, + "step": 2735 + }, + { + "epoch": 0.2011468901632113, + "grad_norm": 0.8575812578201294, + "learning_rate": 4.987109080745334e-06, + "loss": 0.5741, + "step": 2736 + }, + { + "epoch": 0.20122040876341715, + "grad_norm": 0.8537940979003906, + "learning_rate": 4.987099306645125e-06, + "loss": 0.5633, + "step": 2737 + }, + { + "epoch": 0.201293927363623, + "grad_norm": 0.8193941712379456, + "learning_rate": 4.987089528850466e-06, + "loss": 0.5531, + "step": 2738 + }, + { + "epoch": 0.20136744596382886, + "grad_norm": 0.8654205203056335, + "learning_rate": 4.987079747361372e-06, + "loss": 0.5587, + "step": 2739 + }, + { + "epoch": 0.2014409645640347, + "grad_norm": 0.9541195034980774, + "learning_rate": 4.987069962177858e-06, + "loss": 0.6634, + "step": 2740 + }, + { + "epoch": 0.20151448316424056, + "grad_norm": 0.872682511806488, + "learning_rate": 4.987060173299937e-06, + "loss": 0.6254, + "step": 2741 + }, + { + "epoch": 0.2015880017644464, + "grad_norm": 0.9524520635604858, + "learning_rate": 4.987050380727623e-06, + "loss": 0.6642, + "step": 2742 + }, + { + "epoch": 0.20166152036465226, + "grad_norm": 0.9123826026916504, + "learning_rate": 4.987040584460935e-06, + "loss": 0.5919, + "step": 2743 + }, + { + "epoch": 0.20173503896485812, + "grad_norm": 0.8872438669204712, + "learning_rate": 4.987030784499882e-06, + "loss": 0.6067, + "step": 2744 + }, + { + "epoch": 0.20180855756506397, + "grad_norm": 0.9309021830558777, + "learning_rate": 4.987020980844481e-06, + "loss": 0.6233, + "step": 2745 + }, + { + "epoch": 0.20188207616526982, + "grad_norm": 0.8939865827560425, + "learning_rate": 4.987011173494747e-06, + "loss": 0.5759, + "step": 2746 + }, + { + "epoch": 0.20195559476547567, + "grad_norm": 0.9249514937400818, + "learning_rate": 4.987001362450695e-06, + "loss": 0.5744, + "step": 2747 + }, + { + "epoch": 0.20202911336568152, + "grad_norm": 0.90708988904953, + "learning_rate": 4.9869915477123376e-06, + "loss": 0.5568, + "step": 2748 + }, + { + "epoch": 0.20210263196588737, + "grad_norm": 0.8678144216537476, + "learning_rate": 4.986981729279691e-06, + "loss": 0.5835, + "step": 2749 + }, + { + "epoch": 0.20217615056609323, + "grad_norm": 0.8649929761886597, + "learning_rate": 4.986971907152769e-06, + "loss": 0.6194, + "step": 2750 + }, + { + "epoch": 0.20224966916629908, + "grad_norm": 0.8372429013252258, + "learning_rate": 4.986962081331585e-06, + "loss": 0.5565, + "step": 2751 + }, + { + "epoch": 0.20232318776650493, + "grad_norm": 0.8259061574935913, + "learning_rate": 4.986952251816157e-06, + "loss": 0.5557, + "step": 2752 + }, + { + "epoch": 0.20239670636671078, + "grad_norm": 0.886566698551178, + "learning_rate": 4.986942418606498e-06, + "loss": 0.581, + "step": 2753 + }, + { + "epoch": 0.20247022496691663, + "grad_norm": 0.8774964213371277, + "learning_rate": 4.986932581702621e-06, + "loss": 0.593, + "step": 2754 + }, + { + "epoch": 0.2025437435671225, + "grad_norm": 0.8357530236244202, + "learning_rate": 4.9869227411045415e-06, + "loss": 0.5884, + "step": 2755 + }, + { + "epoch": 0.20261726216732834, + "grad_norm": 0.9106654524803162, + "learning_rate": 4.986912896812275e-06, + "loss": 0.6028, + "step": 2756 + }, + { + "epoch": 0.2026907807675342, + "grad_norm": 0.877719521522522, + "learning_rate": 4.986903048825835e-06, + "loss": 0.6025, + "step": 2757 + }, + { + "epoch": 0.20276429936774004, + "grad_norm": 0.8383312821388245, + "learning_rate": 4.986893197145238e-06, + "loss": 0.5805, + "step": 2758 + }, + { + "epoch": 0.2028378179679459, + "grad_norm": 0.8619343042373657, + "learning_rate": 4.986883341770496e-06, + "loss": 0.5898, + "step": 2759 + }, + { + "epoch": 0.20291133656815175, + "grad_norm": 0.8478254675865173, + "learning_rate": 4.986873482701625e-06, + "loss": 0.5534, + "step": 2760 + }, + { + "epoch": 0.2029848551683576, + "grad_norm": 0.929556131362915, + "learning_rate": 4.986863619938641e-06, + "loss": 0.5474, + "step": 2761 + }, + { + "epoch": 0.20305837376856345, + "grad_norm": 0.8788395524024963, + "learning_rate": 4.986853753481557e-06, + "loss": 0.5896, + "step": 2762 + }, + { + "epoch": 0.2031318923687693, + "grad_norm": 0.892558753490448, + "learning_rate": 4.986843883330386e-06, + "loss": 0.5756, + "step": 2763 + }, + { + "epoch": 0.20320541096897515, + "grad_norm": 0.9216018915176392, + "learning_rate": 4.986834009485146e-06, + "loss": 0.5434, + "step": 2764 + }, + { + "epoch": 0.203278929569181, + "grad_norm": 0.9068880677223206, + "learning_rate": 4.98682413194585e-06, + "loss": 0.6411, + "step": 2765 + }, + { + "epoch": 0.20335244816938686, + "grad_norm": 0.8579868674278259, + "learning_rate": 4.986814250712514e-06, + "loss": 0.5639, + "step": 2766 + }, + { + "epoch": 0.2034259667695927, + "grad_norm": 0.8743771314620972, + "learning_rate": 4.98680436578515e-06, + "loss": 0.5983, + "step": 2767 + }, + { + "epoch": 0.20349948536979856, + "grad_norm": 0.8344875574111938, + "learning_rate": 4.986794477163775e-06, + "loss": 0.556, + "step": 2768 + }, + { + "epoch": 0.2035730039700044, + "grad_norm": 0.8958021998405457, + "learning_rate": 4.986784584848403e-06, + "loss": 0.6077, + "step": 2769 + }, + { + "epoch": 0.20364652257021026, + "grad_norm": 0.8758130073547363, + "learning_rate": 4.986774688839049e-06, + "loss": 0.6001, + "step": 2770 + }, + { + "epoch": 0.20372004117041612, + "grad_norm": 0.9349328875541687, + "learning_rate": 4.986764789135727e-06, + "loss": 0.6634, + "step": 2771 + }, + { + "epoch": 0.20379355977062197, + "grad_norm": 0.8493031859397888, + "learning_rate": 4.986754885738452e-06, + "loss": 0.5691, + "step": 2772 + }, + { + "epoch": 0.20386707837082782, + "grad_norm": 0.8402639627456665, + "learning_rate": 4.9867449786472386e-06, + "loss": 0.5903, + "step": 2773 + }, + { + "epoch": 0.20394059697103367, + "grad_norm": 0.8650320172309875, + "learning_rate": 4.986735067862102e-06, + "loss": 0.569, + "step": 2774 + }, + { + "epoch": 0.20401411557123952, + "grad_norm": 0.8602520823478699, + "learning_rate": 4.986725153383057e-06, + "loss": 0.6039, + "step": 2775 + }, + { + "epoch": 0.20408763417144538, + "grad_norm": 0.8179925680160522, + "learning_rate": 4.986715235210118e-06, + "loss": 0.5526, + "step": 2776 + }, + { + "epoch": 0.20416115277165123, + "grad_norm": 0.9249500036239624, + "learning_rate": 4.986705313343298e-06, + "loss": 0.6273, + "step": 2777 + }, + { + "epoch": 0.20423467137185708, + "grad_norm": 0.8904820084571838, + "learning_rate": 4.986695387782616e-06, + "loss": 0.5887, + "step": 2778 + }, + { + "epoch": 0.20430818997206293, + "grad_norm": 0.9079967737197876, + "learning_rate": 4.986685458528082e-06, + "loss": 0.5971, + "step": 2779 + }, + { + "epoch": 0.20438170857226878, + "grad_norm": 0.8922668695449829, + "learning_rate": 4.986675525579714e-06, + "loss": 0.573, + "step": 2780 + }, + { + "epoch": 0.20445522717247464, + "grad_norm": 0.8857004046440125, + "learning_rate": 4.986665588937526e-06, + "loss": 0.615, + "step": 2781 + }, + { + "epoch": 0.2045287457726805, + "grad_norm": 0.8572609424591064, + "learning_rate": 4.9866556486015316e-06, + "loss": 0.5853, + "step": 2782 + }, + { + "epoch": 0.20460226437288634, + "grad_norm": 0.995408296585083, + "learning_rate": 4.986645704571747e-06, + "loss": 0.5782, + "step": 2783 + }, + { + "epoch": 0.2046757829730922, + "grad_norm": 0.8580557703971863, + "learning_rate": 4.986635756848185e-06, + "loss": 0.5822, + "step": 2784 + }, + { + "epoch": 0.20474930157329804, + "grad_norm": 0.8552242517471313, + "learning_rate": 4.986625805430864e-06, + "loss": 0.5977, + "step": 2785 + }, + { + "epoch": 0.2048228201735039, + "grad_norm": 0.9850109219551086, + "learning_rate": 4.986615850319795e-06, + "loss": 0.6589, + "step": 2786 + }, + { + "epoch": 0.20489633877370975, + "grad_norm": 0.9469531178474426, + "learning_rate": 4.986605891514994e-06, + "loss": 0.6177, + "step": 2787 + }, + { + "epoch": 0.2049698573739156, + "grad_norm": 0.8828681707382202, + "learning_rate": 4.986595929016477e-06, + "loss": 0.6001, + "step": 2788 + }, + { + "epoch": 0.20504337597412145, + "grad_norm": 0.8650670051574707, + "learning_rate": 4.9865859628242574e-06, + "loss": 0.5797, + "step": 2789 + }, + { + "epoch": 0.2051168945743273, + "grad_norm": 0.8274554014205933, + "learning_rate": 4.9865759929383516e-06, + "loss": 0.539, + "step": 2790 + }, + { + "epoch": 0.20519041317453315, + "grad_norm": 0.8731688261032104, + "learning_rate": 4.986566019358771e-06, + "loss": 0.6107, + "step": 2791 + }, + { + "epoch": 0.205263931774739, + "grad_norm": 0.8585526347160339, + "learning_rate": 4.986556042085535e-06, + "loss": 0.5514, + "step": 2792 + }, + { + "epoch": 0.20533745037494486, + "grad_norm": 0.9549726843833923, + "learning_rate": 4.9865460611186555e-06, + "loss": 0.6051, + "step": 2793 + }, + { + "epoch": 0.2054109689751507, + "grad_norm": 0.8467205166816711, + "learning_rate": 4.986536076458149e-06, + "loss": 0.5383, + "step": 2794 + }, + { + "epoch": 0.20548448757535656, + "grad_norm": 0.8528454303741455, + "learning_rate": 4.986526088104027e-06, + "loss": 0.5353, + "step": 2795 + }, + { + "epoch": 0.2055580061755624, + "grad_norm": 0.9330796003341675, + "learning_rate": 4.986516096056308e-06, + "loss": 0.6231, + "step": 2796 + }, + { + "epoch": 0.20563152477576827, + "grad_norm": 0.874799907207489, + "learning_rate": 4.986506100315006e-06, + "loss": 0.5748, + "step": 2797 + }, + { + "epoch": 0.20570504337597412, + "grad_norm": 0.8881467580795288, + "learning_rate": 4.9864961008801344e-06, + "loss": 0.6061, + "step": 2798 + }, + { + "epoch": 0.20577856197617997, + "grad_norm": 0.8640484809875488, + "learning_rate": 4.986486097751709e-06, + "loss": 0.5864, + "step": 2799 + }, + { + "epoch": 0.20585208057638582, + "grad_norm": 0.924656867980957, + "learning_rate": 4.986476090929745e-06, + "loss": 0.6142, + "step": 2800 + }, + { + "epoch": 0.20592559917659167, + "grad_norm": 0.9350962042808533, + "learning_rate": 4.986466080414257e-06, + "loss": 0.6452, + "step": 2801 + }, + { + "epoch": 0.20599911777679752, + "grad_norm": 0.9063174724578857, + "learning_rate": 4.98645606620526e-06, + "loss": 0.6009, + "step": 2802 + }, + { + "epoch": 0.20607263637700338, + "grad_norm": 0.9024665355682373, + "learning_rate": 4.986446048302768e-06, + "loss": 0.5844, + "step": 2803 + }, + { + "epoch": 0.20614615497720923, + "grad_norm": 0.9099068641662598, + "learning_rate": 4.986436026706797e-06, + "loss": 0.5938, + "step": 2804 + }, + { + "epoch": 0.20621967357741508, + "grad_norm": 0.8770126104354858, + "learning_rate": 4.986426001417362e-06, + "loss": 0.5955, + "step": 2805 + }, + { + "epoch": 0.20629319217762093, + "grad_norm": 0.8991008996963501, + "learning_rate": 4.986415972434476e-06, + "loss": 0.5516, + "step": 2806 + }, + { + "epoch": 0.20636671077782678, + "grad_norm": 0.9449423551559448, + "learning_rate": 4.986405939758155e-06, + "loss": 0.6349, + "step": 2807 + }, + { + "epoch": 0.20644022937803264, + "grad_norm": 0.911669909954071, + "learning_rate": 4.986395903388416e-06, + "loss": 0.5752, + "step": 2808 + }, + { + "epoch": 0.2065137479782385, + "grad_norm": 0.9026505947113037, + "learning_rate": 4.98638586332527e-06, + "loss": 0.6431, + "step": 2809 + }, + { + "epoch": 0.20658726657844434, + "grad_norm": 0.9294904470443726, + "learning_rate": 4.986375819568736e-06, + "loss": 0.5887, + "step": 2810 + }, + { + "epoch": 0.2066607851786502, + "grad_norm": 0.9420085549354553, + "learning_rate": 4.986365772118826e-06, + "loss": 0.6163, + "step": 2811 + }, + { + "epoch": 0.20673430377885604, + "grad_norm": 0.841249942779541, + "learning_rate": 4.9863557209755555e-06, + "loss": 0.6056, + "step": 2812 + }, + { + "epoch": 0.2068078223790619, + "grad_norm": 0.84046471118927, + "learning_rate": 4.98634566613894e-06, + "loss": 0.5769, + "step": 2813 + }, + { + "epoch": 0.20688134097926775, + "grad_norm": 0.9256582260131836, + "learning_rate": 4.986335607608994e-06, + "loss": 0.5955, + "step": 2814 + }, + { + "epoch": 0.2069548595794736, + "grad_norm": 0.903153121471405, + "learning_rate": 4.986325545385733e-06, + "loss": 0.5865, + "step": 2815 + }, + { + "epoch": 0.20702837817967945, + "grad_norm": 0.9248403310775757, + "learning_rate": 4.986315479469171e-06, + "loss": 0.6128, + "step": 2816 + }, + { + "epoch": 0.2071018967798853, + "grad_norm": 0.9162865281105042, + "learning_rate": 4.986305409859323e-06, + "loss": 0.6276, + "step": 2817 + }, + { + "epoch": 0.20717541538009115, + "grad_norm": 0.8784268498420715, + "learning_rate": 4.986295336556206e-06, + "loss": 0.556, + "step": 2818 + }, + { + "epoch": 0.207248933980297, + "grad_norm": 0.9463109374046326, + "learning_rate": 4.986285259559832e-06, + "loss": 0.6079, + "step": 2819 + }, + { + "epoch": 0.20732245258050286, + "grad_norm": 0.882976770401001, + "learning_rate": 4.986275178870218e-06, + "loss": 0.6, + "step": 2820 + }, + { + "epoch": 0.2073959711807087, + "grad_norm": 0.8883724808692932, + "learning_rate": 4.986265094487378e-06, + "loss": 0.5549, + "step": 2821 + }, + { + "epoch": 0.20746948978091456, + "grad_norm": 0.8834844827651978, + "learning_rate": 4.986255006411328e-06, + "loss": 0.6448, + "step": 2822 + }, + { + "epoch": 0.20754300838112041, + "grad_norm": 0.9368519186973572, + "learning_rate": 4.986244914642082e-06, + "loss": 0.5392, + "step": 2823 + }, + { + "epoch": 0.20761652698132627, + "grad_norm": 0.9185148477554321, + "learning_rate": 4.986234819179656e-06, + "loss": 0.6114, + "step": 2824 + }, + { + "epoch": 0.20769004558153212, + "grad_norm": 0.905223548412323, + "learning_rate": 4.986224720024063e-06, + "loss": 0.5726, + "step": 2825 + }, + { + "epoch": 0.20776356418173797, + "grad_norm": 0.9522912502288818, + "learning_rate": 4.98621461717532e-06, + "loss": 0.6279, + "step": 2826 + }, + { + "epoch": 0.20783708278194382, + "grad_norm": 0.8576167225837708, + "learning_rate": 4.986204510633441e-06, + "loss": 0.5729, + "step": 2827 + }, + { + "epoch": 0.20791060138214967, + "grad_norm": 0.8875895738601685, + "learning_rate": 4.9861944003984416e-06, + "loss": 0.5855, + "step": 2828 + }, + { + "epoch": 0.20798411998235553, + "grad_norm": 0.8959012031555176, + "learning_rate": 4.986184286470337e-06, + "loss": 0.6159, + "step": 2829 + }, + { + "epoch": 0.20805763858256138, + "grad_norm": 0.8928465247154236, + "learning_rate": 4.986174168849141e-06, + "loss": 0.5744, + "step": 2830 + }, + { + "epoch": 0.20813115718276723, + "grad_norm": 0.8763473629951477, + "learning_rate": 4.98616404753487e-06, + "loss": 0.5877, + "step": 2831 + }, + { + "epoch": 0.20820467578297308, + "grad_norm": 0.8279255032539368, + "learning_rate": 4.986153922527539e-06, + "loss": 0.5901, + "step": 2832 + }, + { + "epoch": 0.20827819438317893, + "grad_norm": 0.8836056590080261, + "learning_rate": 4.986143793827162e-06, + "loss": 0.5637, + "step": 2833 + }, + { + "epoch": 0.20835171298338478, + "grad_norm": 0.8606396317481995, + "learning_rate": 4.986133661433753e-06, + "loss": 0.6519, + "step": 2834 + }, + { + "epoch": 0.20842523158359064, + "grad_norm": 0.908710777759552, + "learning_rate": 4.98612352534733e-06, + "loss": 0.624, + "step": 2835 + }, + { + "epoch": 0.2084987501837965, + "grad_norm": 0.9013873934745789, + "learning_rate": 4.986113385567907e-06, + "loss": 0.6036, + "step": 2836 + }, + { + "epoch": 0.20857226878400234, + "grad_norm": 0.8622982501983643, + "learning_rate": 4.986103242095498e-06, + "loss": 0.5907, + "step": 2837 + }, + { + "epoch": 0.2086457873842082, + "grad_norm": 0.9005083441734314, + "learning_rate": 4.9860930949301186e-06, + "loss": 0.5738, + "step": 2838 + }, + { + "epoch": 0.20871930598441404, + "grad_norm": 0.9190096259117126, + "learning_rate": 4.986082944071785e-06, + "loss": 0.6064, + "step": 2839 + }, + { + "epoch": 0.2087928245846199, + "grad_norm": 0.8897873759269714, + "learning_rate": 4.986072789520511e-06, + "loss": 0.6078, + "step": 2840 + }, + { + "epoch": 0.20886634318482575, + "grad_norm": 0.890263557434082, + "learning_rate": 4.9860626312763115e-06, + "loss": 0.5331, + "step": 2841 + }, + { + "epoch": 0.2089398617850316, + "grad_norm": 0.9033092856407166, + "learning_rate": 4.986052469339202e-06, + "loss": 0.6095, + "step": 2842 + }, + { + "epoch": 0.20901338038523745, + "grad_norm": 0.8668504357337952, + "learning_rate": 4.986042303709198e-06, + "loss": 0.6031, + "step": 2843 + }, + { + "epoch": 0.2090868989854433, + "grad_norm": 0.9087004065513611, + "learning_rate": 4.986032134386315e-06, + "loss": 0.5492, + "step": 2844 + }, + { + "epoch": 0.20916041758564916, + "grad_norm": 0.9059748649597168, + "learning_rate": 4.9860219613705665e-06, + "loss": 0.5607, + "step": 2845 + }, + { + "epoch": 0.20923393618585504, + "grad_norm": 0.8855966329574585, + "learning_rate": 4.986011784661968e-06, + "loss": 0.6008, + "step": 2846 + }, + { + "epoch": 0.2093074547860609, + "grad_norm": 0.8521701693534851, + "learning_rate": 4.986001604260536e-06, + "loss": 0.5757, + "step": 2847 + }, + { + "epoch": 0.20938097338626674, + "grad_norm": 0.9208184480667114, + "learning_rate": 4.985991420166285e-06, + "loss": 0.6042, + "step": 2848 + }, + { + "epoch": 0.2094544919864726, + "grad_norm": 1.0072356462478638, + "learning_rate": 4.985981232379229e-06, + "loss": 0.6707, + "step": 2849 + }, + { + "epoch": 0.20952801058667844, + "grad_norm": 0.8587074875831604, + "learning_rate": 4.985971040899385e-06, + "loss": 0.5989, + "step": 2850 + }, + { + "epoch": 0.2096015291868843, + "grad_norm": 0.8770651817321777, + "learning_rate": 4.9859608457267665e-06, + "loss": 0.6167, + "step": 2851 + }, + { + "epoch": 0.20967504778709015, + "grad_norm": 0.8971473574638367, + "learning_rate": 4.985950646861389e-06, + "loss": 0.5864, + "step": 2852 + }, + { + "epoch": 0.209748566387296, + "grad_norm": 0.8278238773345947, + "learning_rate": 4.985940444303269e-06, + "loss": 0.5668, + "step": 2853 + }, + { + "epoch": 0.20982208498750185, + "grad_norm": 0.888195812702179, + "learning_rate": 4.98593023805242e-06, + "loss": 0.6024, + "step": 2854 + }, + { + "epoch": 0.2098956035877077, + "grad_norm": 0.8530681729316711, + "learning_rate": 4.9859200281088575e-06, + "loss": 0.582, + "step": 2855 + }, + { + "epoch": 0.20996912218791355, + "grad_norm": 0.8518027663230896, + "learning_rate": 4.985909814472597e-06, + "loss": 0.5874, + "step": 2856 + }, + { + "epoch": 0.2100426407881194, + "grad_norm": 0.8852684497833252, + "learning_rate": 4.985899597143654e-06, + "loss": 0.5887, + "step": 2857 + }, + { + "epoch": 0.21011615938832526, + "grad_norm": 0.8757057785987854, + "learning_rate": 4.985889376122043e-06, + "loss": 0.5431, + "step": 2858 + }, + { + "epoch": 0.2101896779885311, + "grad_norm": 0.8446922898292542, + "learning_rate": 4.985879151407779e-06, + "loss": 0.594, + "step": 2859 + }, + { + "epoch": 0.21026319658873696, + "grad_norm": 0.8274088501930237, + "learning_rate": 4.985868923000878e-06, + "loss": 0.5612, + "step": 2860 + }, + { + "epoch": 0.2103367151889428, + "grad_norm": 0.9202337265014648, + "learning_rate": 4.985858690901355e-06, + "loss": 0.6005, + "step": 2861 + }, + { + "epoch": 0.21041023378914867, + "grad_norm": 0.8418406248092651, + "learning_rate": 4.985848455109225e-06, + "loss": 0.5697, + "step": 2862 + }, + { + "epoch": 0.21048375238935452, + "grad_norm": 0.8705913424491882, + "learning_rate": 4.985838215624503e-06, + "loss": 0.5913, + "step": 2863 + }, + { + "epoch": 0.21055727098956037, + "grad_norm": 0.9138587117195129, + "learning_rate": 4.985827972447205e-06, + "loss": 0.5664, + "step": 2864 + }, + { + "epoch": 0.21063078958976622, + "grad_norm": 0.9720252752304077, + "learning_rate": 4.985817725577346e-06, + "loss": 0.6349, + "step": 2865 + }, + { + "epoch": 0.21070430818997207, + "grad_norm": 0.9230591654777527, + "learning_rate": 4.98580747501494e-06, + "loss": 0.5877, + "step": 2866 + }, + { + "epoch": 0.21077782679017792, + "grad_norm": 0.8318852186203003, + "learning_rate": 4.985797220760002e-06, + "loss": 0.5401, + "step": 2867 + }, + { + "epoch": 0.21085134539038378, + "grad_norm": 0.8792896866798401, + "learning_rate": 4.985786962812551e-06, + "loss": 0.5783, + "step": 2868 + }, + { + "epoch": 0.21092486399058963, + "grad_norm": 0.8901796340942383, + "learning_rate": 4.985776701172598e-06, + "loss": 0.5579, + "step": 2869 + }, + { + "epoch": 0.21099838259079548, + "grad_norm": 0.8525848388671875, + "learning_rate": 4.98576643584016e-06, + "loss": 0.5906, + "step": 2870 + }, + { + "epoch": 0.21107190119100133, + "grad_norm": 0.860578179359436, + "learning_rate": 4.9857561668152525e-06, + "loss": 0.6053, + "step": 2871 + }, + { + "epoch": 0.21114541979120718, + "grad_norm": 0.9114035964012146, + "learning_rate": 4.98574589409789e-06, + "loss": 0.6066, + "step": 2872 + }, + { + "epoch": 0.21121893839141304, + "grad_norm": 0.9365326762199402, + "learning_rate": 4.9857356176880875e-06, + "loss": 0.5959, + "step": 2873 + }, + { + "epoch": 0.2112924569916189, + "grad_norm": 0.8666334748268127, + "learning_rate": 4.985725337585862e-06, + "loss": 0.5877, + "step": 2874 + }, + { + "epoch": 0.21136597559182474, + "grad_norm": 0.8569995164871216, + "learning_rate": 4.985715053791227e-06, + "loss": 0.5625, + "step": 2875 + }, + { + "epoch": 0.2114394941920306, + "grad_norm": 0.8868404626846313, + "learning_rate": 4.985704766304198e-06, + "loss": 0.5675, + "step": 2876 + }, + { + "epoch": 0.21151301279223644, + "grad_norm": 0.9001592397689819, + "learning_rate": 4.985694475124792e-06, + "loss": 0.5689, + "step": 2877 + }, + { + "epoch": 0.2115865313924423, + "grad_norm": 0.896503210067749, + "learning_rate": 4.985684180253022e-06, + "loss": 0.5767, + "step": 2878 + }, + { + "epoch": 0.21166004999264815, + "grad_norm": 0.8459535241127014, + "learning_rate": 4.985673881688904e-06, + "loss": 0.6023, + "step": 2879 + }, + { + "epoch": 0.211733568592854, + "grad_norm": 0.8601410388946533, + "learning_rate": 4.985663579432454e-06, + "loss": 0.5711, + "step": 2880 + }, + { + "epoch": 0.21180708719305985, + "grad_norm": 0.8937724828720093, + "learning_rate": 4.985653273483687e-06, + "loss": 0.5512, + "step": 2881 + }, + { + "epoch": 0.2118806057932657, + "grad_norm": 0.899260938167572, + "learning_rate": 4.985642963842618e-06, + "loss": 0.5754, + "step": 2882 + }, + { + "epoch": 0.21195412439347155, + "grad_norm": 0.8443267941474915, + "learning_rate": 4.985632650509262e-06, + "loss": 0.5613, + "step": 2883 + }, + { + "epoch": 0.2120276429936774, + "grad_norm": 0.8691442608833313, + "learning_rate": 4.985622333483635e-06, + "loss": 0.5888, + "step": 2884 + }, + { + "epoch": 0.21210116159388326, + "grad_norm": 0.8363087773323059, + "learning_rate": 4.985612012765753e-06, + "loss": 0.5529, + "step": 2885 + }, + { + "epoch": 0.2121746801940891, + "grad_norm": 0.9395051598548889, + "learning_rate": 4.98560168835563e-06, + "loss": 0.6131, + "step": 2886 + }, + { + "epoch": 0.21224819879429496, + "grad_norm": 0.8929702639579773, + "learning_rate": 4.985591360253282e-06, + "loss": 0.5886, + "step": 2887 + }, + { + "epoch": 0.21232171739450081, + "grad_norm": 0.8603038787841797, + "learning_rate": 4.985581028458723e-06, + "loss": 0.5915, + "step": 2888 + }, + { + "epoch": 0.21239523599470667, + "grad_norm": 0.9155166745185852, + "learning_rate": 4.9855706929719705e-06, + "loss": 0.5636, + "step": 2889 + }, + { + "epoch": 0.21246875459491252, + "grad_norm": 0.8761892914772034, + "learning_rate": 4.985560353793039e-06, + "loss": 0.5706, + "step": 2890 + }, + { + "epoch": 0.21254227319511837, + "grad_norm": 0.8519219756126404, + "learning_rate": 4.985550010921943e-06, + "loss": 0.5423, + "step": 2891 + }, + { + "epoch": 0.21261579179532422, + "grad_norm": 0.8692018389701843, + "learning_rate": 4.985539664358699e-06, + "loss": 0.6275, + "step": 2892 + }, + { + "epoch": 0.21268931039553007, + "grad_norm": 0.866221010684967, + "learning_rate": 4.985529314103321e-06, + "loss": 0.5712, + "step": 2893 + }, + { + "epoch": 0.21276282899573593, + "grad_norm": 0.8562460541725159, + "learning_rate": 4.985518960155827e-06, + "loss": 0.5608, + "step": 2894 + }, + { + "epoch": 0.21283634759594178, + "grad_norm": 0.8483374714851379, + "learning_rate": 4.985508602516229e-06, + "loss": 0.5724, + "step": 2895 + }, + { + "epoch": 0.21290986619614763, + "grad_norm": 0.8496392965316772, + "learning_rate": 4.9854982411845454e-06, + "loss": 0.5376, + "step": 2896 + }, + { + "epoch": 0.21298338479635348, + "grad_norm": 0.8512835502624512, + "learning_rate": 4.985487876160789e-06, + "loss": 0.5845, + "step": 2897 + }, + { + "epoch": 0.21305690339655933, + "grad_norm": 0.8397398591041565, + "learning_rate": 4.9854775074449775e-06, + "loss": 0.5558, + "step": 2898 + }, + { + "epoch": 0.21313042199676518, + "grad_norm": 0.8653168082237244, + "learning_rate": 4.985467135037125e-06, + "loss": 0.5983, + "step": 2899 + }, + { + "epoch": 0.21320394059697104, + "grad_norm": 0.900173008441925, + "learning_rate": 4.985456758937248e-06, + "loss": 0.6168, + "step": 2900 + }, + { + "epoch": 0.2132774591971769, + "grad_norm": 0.8035306930541992, + "learning_rate": 4.98544637914536e-06, + "loss": 0.5574, + "step": 2901 + }, + { + "epoch": 0.21335097779738274, + "grad_norm": 0.902619481086731, + "learning_rate": 4.985435995661478e-06, + "loss": 0.604, + "step": 2902 + }, + { + "epoch": 0.2134244963975886, + "grad_norm": 0.8480420112609863, + "learning_rate": 4.985425608485617e-06, + "loss": 0.5682, + "step": 2903 + }, + { + "epoch": 0.21349801499779444, + "grad_norm": 0.8713414072990417, + "learning_rate": 4.985415217617791e-06, + "loss": 0.5168, + "step": 2904 + }, + { + "epoch": 0.2135715335980003, + "grad_norm": 0.8871290683746338, + "learning_rate": 4.985404823058018e-06, + "loss": 0.5689, + "step": 2905 + }, + { + "epoch": 0.21364505219820615, + "grad_norm": 0.8933058381080627, + "learning_rate": 4.9853944248063125e-06, + "loss": 0.6471, + "step": 2906 + }, + { + "epoch": 0.213718570798412, + "grad_norm": 0.8723229765892029, + "learning_rate": 4.985384022862689e-06, + "loss": 0.6029, + "step": 2907 + }, + { + "epoch": 0.21379208939861785, + "grad_norm": 0.8772785663604736, + "learning_rate": 4.985373617227164e-06, + "loss": 0.625, + "step": 2908 + }, + { + "epoch": 0.2138656079988237, + "grad_norm": 0.9383623600006104, + "learning_rate": 4.985363207899753e-06, + "loss": 0.59, + "step": 2909 + }, + { + "epoch": 0.21393912659902956, + "grad_norm": 0.8710925579071045, + "learning_rate": 4.985352794880471e-06, + "loss": 0.5617, + "step": 2910 + }, + { + "epoch": 0.2140126451992354, + "grad_norm": 0.8986659049987793, + "learning_rate": 4.985342378169333e-06, + "loss": 0.5661, + "step": 2911 + }, + { + "epoch": 0.21408616379944126, + "grad_norm": 0.8554993271827698, + "learning_rate": 4.985331957766354e-06, + "loss": 0.5949, + "step": 2912 + }, + { + "epoch": 0.2141596823996471, + "grad_norm": 0.8781396150588989, + "learning_rate": 4.985321533671552e-06, + "loss": 0.6234, + "step": 2913 + }, + { + "epoch": 0.21423320099985296, + "grad_norm": 0.8900280594825745, + "learning_rate": 4.9853111058849405e-06, + "loss": 0.5565, + "step": 2914 + }, + { + "epoch": 0.21430671960005881, + "grad_norm": 0.8666138052940369, + "learning_rate": 4.985300674406534e-06, + "loss": 0.5899, + "step": 2915 + }, + { + "epoch": 0.21438023820026467, + "grad_norm": 0.8559967279434204, + "learning_rate": 4.985290239236352e-06, + "loss": 0.5857, + "step": 2916 + }, + { + "epoch": 0.21445375680047052, + "grad_norm": 0.9088654518127441, + "learning_rate": 4.985279800374406e-06, + "loss": 0.5992, + "step": 2917 + }, + { + "epoch": 0.21452727540067637, + "grad_norm": 0.9604570865631104, + "learning_rate": 4.985269357820713e-06, + "loss": 0.6177, + "step": 2918 + }, + { + "epoch": 0.21460079400088222, + "grad_norm": 0.910036027431488, + "learning_rate": 4.98525891157529e-06, + "loss": 0.6172, + "step": 2919 + }, + { + "epoch": 0.21467431260108807, + "grad_norm": 0.9265397787094116, + "learning_rate": 4.985248461638148e-06, + "loss": 0.5875, + "step": 2920 + }, + { + "epoch": 0.21474783120129393, + "grad_norm": 0.8972342610359192, + "learning_rate": 4.985238008009308e-06, + "loss": 0.6396, + "step": 2921 + }, + { + "epoch": 0.21482134980149978, + "grad_norm": 0.8669350147247314, + "learning_rate": 4.985227550688781e-06, + "loss": 0.5982, + "step": 2922 + }, + { + "epoch": 0.21489486840170563, + "grad_norm": 0.837538480758667, + "learning_rate": 4.985217089676587e-06, + "loss": 0.6228, + "step": 2923 + }, + { + "epoch": 0.21496838700191148, + "grad_norm": 0.8343914151191711, + "learning_rate": 4.985206624972737e-06, + "loss": 0.5686, + "step": 2924 + }, + { + "epoch": 0.21504190560211733, + "grad_norm": 0.8957287073135376, + "learning_rate": 4.98519615657725e-06, + "loss": 0.5924, + "step": 2925 + }, + { + "epoch": 0.21511542420232319, + "grad_norm": 0.8561069369316101, + "learning_rate": 4.985185684490139e-06, + "loss": 0.5689, + "step": 2926 + }, + { + "epoch": 0.21518894280252904, + "grad_norm": 0.8929122090339661, + "learning_rate": 4.985175208711422e-06, + "loss": 0.5972, + "step": 2927 + }, + { + "epoch": 0.2152624614027349, + "grad_norm": 0.9286881685256958, + "learning_rate": 4.9851647292411134e-06, + "loss": 0.586, + "step": 2928 + }, + { + "epoch": 0.21533598000294074, + "grad_norm": 0.8858124017715454, + "learning_rate": 4.985154246079228e-06, + "loss": 0.6152, + "step": 2929 + }, + { + "epoch": 0.2154094986031466, + "grad_norm": 0.8733359575271606, + "learning_rate": 4.985143759225781e-06, + "loss": 0.557, + "step": 2930 + }, + { + "epoch": 0.21548301720335244, + "grad_norm": 0.8936023712158203, + "learning_rate": 4.985133268680791e-06, + "loss": 0.5744, + "step": 2931 + }, + { + "epoch": 0.2155565358035583, + "grad_norm": 0.8938051462173462, + "learning_rate": 4.98512277444427e-06, + "loss": 0.5855, + "step": 2932 + }, + { + "epoch": 0.21563005440376415, + "grad_norm": 0.84048992395401, + "learning_rate": 4.985112276516236e-06, + "loss": 0.5783, + "step": 2933 + }, + { + "epoch": 0.21570357300397, + "grad_norm": 0.8208466172218323, + "learning_rate": 4.985101774896704e-06, + "loss": 0.5506, + "step": 2934 + }, + { + "epoch": 0.21577709160417585, + "grad_norm": 0.8730533123016357, + "learning_rate": 4.985091269585689e-06, + "loss": 0.589, + "step": 2935 + }, + { + "epoch": 0.2158506102043817, + "grad_norm": 0.8789061903953552, + "learning_rate": 4.985080760583207e-06, + "loss": 0.599, + "step": 2936 + }, + { + "epoch": 0.21592412880458756, + "grad_norm": 0.9555930495262146, + "learning_rate": 4.985070247889274e-06, + "loss": 0.6196, + "step": 2937 + }, + { + "epoch": 0.2159976474047934, + "grad_norm": 0.8651476502418518, + "learning_rate": 4.9850597315039055e-06, + "loss": 0.5952, + "step": 2938 + }, + { + "epoch": 0.21607116600499926, + "grad_norm": 0.8519105911254883, + "learning_rate": 4.985049211427115e-06, + "loss": 0.5795, + "step": 2939 + }, + { + "epoch": 0.2161446846052051, + "grad_norm": 0.9683173894882202, + "learning_rate": 4.985038687658922e-06, + "loss": 0.5924, + "step": 2940 + }, + { + "epoch": 0.21621820320541096, + "grad_norm": 0.8475030064582825, + "learning_rate": 4.985028160199339e-06, + "loss": 0.5599, + "step": 2941 + }, + { + "epoch": 0.21629172180561682, + "grad_norm": 0.8907042145729065, + "learning_rate": 4.985017629048383e-06, + "loss": 0.5945, + "step": 2942 + }, + { + "epoch": 0.21636524040582267, + "grad_norm": 0.8703698515892029, + "learning_rate": 4.98500709420607e-06, + "loss": 0.6059, + "step": 2943 + }, + { + "epoch": 0.21643875900602852, + "grad_norm": 0.8650113940238953, + "learning_rate": 4.984996555672415e-06, + "loss": 0.6098, + "step": 2944 + }, + { + "epoch": 0.21651227760623437, + "grad_norm": 0.8239542245864868, + "learning_rate": 4.984986013447433e-06, + "loss": 0.6047, + "step": 2945 + }, + { + "epoch": 0.21658579620644022, + "grad_norm": 0.9036781787872314, + "learning_rate": 4.98497546753114e-06, + "loss": 0.5994, + "step": 2946 + }, + { + "epoch": 0.21665931480664608, + "grad_norm": 0.8873794078826904, + "learning_rate": 4.9849649179235524e-06, + "loss": 0.6257, + "step": 2947 + }, + { + "epoch": 0.21673283340685193, + "grad_norm": 0.8660964965820312, + "learning_rate": 4.984954364624686e-06, + "loss": 0.6096, + "step": 2948 + }, + { + "epoch": 0.21680635200705778, + "grad_norm": 0.8687571287155151, + "learning_rate": 4.984943807634556e-06, + "loss": 0.5445, + "step": 2949 + }, + { + "epoch": 0.21687987060726363, + "grad_norm": 0.875360369682312, + "learning_rate": 4.984933246953177e-06, + "loss": 0.6131, + "step": 2950 + }, + { + "epoch": 0.21695338920746948, + "grad_norm": 0.8639423847198486, + "learning_rate": 4.984922682580566e-06, + "loss": 0.5865, + "step": 2951 + }, + { + "epoch": 0.21702690780767533, + "grad_norm": 0.9248652458190918, + "learning_rate": 4.984912114516739e-06, + "loss": 0.6278, + "step": 2952 + }, + { + "epoch": 0.2171004264078812, + "grad_norm": 0.8177772760391235, + "learning_rate": 4.9849015427617115e-06, + "loss": 0.5966, + "step": 2953 + }, + { + "epoch": 0.21717394500808704, + "grad_norm": 0.8916250467300415, + "learning_rate": 4.984890967315498e-06, + "loss": 0.6336, + "step": 2954 + }, + { + "epoch": 0.2172474636082929, + "grad_norm": 0.8697659969329834, + "learning_rate": 4.984880388178116e-06, + "loss": 0.5724, + "step": 2955 + }, + { + "epoch": 0.21732098220849874, + "grad_norm": 0.9031931161880493, + "learning_rate": 4.984869805349579e-06, + "loss": 0.5888, + "step": 2956 + }, + { + "epoch": 0.2173945008087046, + "grad_norm": 0.8631497621536255, + "learning_rate": 4.984859218829905e-06, + "loss": 0.5669, + "step": 2957 + }, + { + "epoch": 0.21746801940891045, + "grad_norm": 0.8698444962501526, + "learning_rate": 4.984848628619108e-06, + "loss": 0.5651, + "step": 2958 + }, + { + "epoch": 0.2175415380091163, + "grad_norm": 0.8596742749214172, + "learning_rate": 4.984838034717205e-06, + "loss": 0.5753, + "step": 2959 + }, + { + "epoch": 0.21761505660932215, + "grad_norm": 0.9348883628845215, + "learning_rate": 4.98482743712421e-06, + "loss": 0.5825, + "step": 2960 + }, + { + "epoch": 0.217688575209528, + "grad_norm": 0.8961125612258911, + "learning_rate": 4.984816835840142e-06, + "loss": 0.5871, + "step": 2961 + }, + { + "epoch": 0.21776209380973385, + "grad_norm": 0.8780435919761658, + "learning_rate": 4.984806230865012e-06, + "loss": 0.5946, + "step": 2962 + }, + { + "epoch": 0.2178356124099397, + "grad_norm": 0.8583767414093018, + "learning_rate": 4.984795622198841e-06, + "loss": 0.5753, + "step": 2963 + }, + { + "epoch": 0.21790913101014556, + "grad_norm": 0.8968968391418457, + "learning_rate": 4.984785009841641e-06, + "loss": 0.61, + "step": 2964 + }, + { + "epoch": 0.2179826496103514, + "grad_norm": 0.8366369605064392, + "learning_rate": 4.984774393793428e-06, + "loss": 0.5253, + "step": 2965 + }, + { + "epoch": 0.21805616821055726, + "grad_norm": 0.9554526209831238, + "learning_rate": 4.98476377405422e-06, + "loss": 0.5999, + "step": 2966 + }, + { + "epoch": 0.2181296868107631, + "grad_norm": 0.8518994450569153, + "learning_rate": 4.984753150624031e-06, + "loss": 0.6158, + "step": 2967 + }, + { + "epoch": 0.21820320541096896, + "grad_norm": 0.8830186128616333, + "learning_rate": 4.984742523502878e-06, + "loss": 0.5721, + "step": 2968 + }, + { + "epoch": 0.21827672401117482, + "grad_norm": 0.8948132395744324, + "learning_rate": 4.984731892690775e-06, + "loss": 0.5714, + "step": 2969 + }, + { + "epoch": 0.21835024261138067, + "grad_norm": 0.8687844276428223, + "learning_rate": 4.9847212581877395e-06, + "loss": 0.5576, + "step": 2970 + }, + { + "epoch": 0.21842376121158652, + "grad_norm": 0.8874993324279785, + "learning_rate": 4.984710619993786e-06, + "loss": 0.6127, + "step": 2971 + }, + { + "epoch": 0.21849727981179237, + "grad_norm": 0.8880925178527832, + "learning_rate": 4.9846999781089314e-06, + "loss": 0.5971, + "step": 2972 + }, + { + "epoch": 0.21857079841199822, + "grad_norm": 0.906452476978302, + "learning_rate": 4.984689332533192e-06, + "loss": 0.6149, + "step": 2973 + }, + { + "epoch": 0.21864431701220408, + "grad_norm": 0.9023789167404175, + "learning_rate": 4.9846786832665805e-06, + "loss": 0.6122, + "step": 2974 + }, + { + "epoch": 0.21871783561240993, + "grad_norm": 0.9308779835700989, + "learning_rate": 4.984668030309116e-06, + "loss": 0.5755, + "step": 2975 + }, + { + "epoch": 0.21879135421261578, + "grad_norm": 0.8626759052276611, + "learning_rate": 4.984657373660813e-06, + "loss": 0.5825, + "step": 2976 + }, + { + "epoch": 0.21886487281282163, + "grad_norm": 0.8697079420089722, + "learning_rate": 4.984646713321688e-06, + "loss": 0.5645, + "step": 2977 + }, + { + "epoch": 0.21893839141302748, + "grad_norm": 0.8256148099899292, + "learning_rate": 4.984636049291756e-06, + "loss": 0.5384, + "step": 2978 + }, + { + "epoch": 0.21901191001323334, + "grad_norm": 0.855104386806488, + "learning_rate": 4.984625381571033e-06, + "loss": 0.5327, + "step": 2979 + }, + { + "epoch": 0.2190854286134392, + "grad_norm": 0.8925179839134216, + "learning_rate": 4.984614710159535e-06, + "loss": 0.5561, + "step": 2980 + }, + { + "epoch": 0.21915894721364504, + "grad_norm": 0.8366983532905579, + "learning_rate": 4.984604035057279e-06, + "loss": 0.5683, + "step": 2981 + }, + { + "epoch": 0.2192324658138509, + "grad_norm": 0.879680335521698, + "learning_rate": 4.984593356264279e-06, + "loss": 0.5701, + "step": 2982 + }, + { + "epoch": 0.21930598441405674, + "grad_norm": 0.8690125942230225, + "learning_rate": 4.984582673780552e-06, + "loss": 0.5838, + "step": 2983 + }, + { + "epoch": 0.2193795030142626, + "grad_norm": 0.8705770373344421, + "learning_rate": 4.984571987606113e-06, + "loss": 0.6116, + "step": 2984 + }, + { + "epoch": 0.21945302161446847, + "grad_norm": 0.8804391026496887, + "learning_rate": 4.984561297740977e-06, + "loss": 0.5835, + "step": 2985 + }, + { + "epoch": 0.21952654021467433, + "grad_norm": 0.8609461784362793, + "learning_rate": 4.984550604185164e-06, + "loss": 0.5835, + "step": 2986 + }, + { + "epoch": 0.21960005881488018, + "grad_norm": 0.8758531808853149, + "learning_rate": 4.984539906938685e-06, + "loss": 0.5916, + "step": 2987 + }, + { + "epoch": 0.21967357741508603, + "grad_norm": 0.9071616530418396, + "learning_rate": 4.984529206001559e-06, + "loss": 0.5747, + "step": 2988 + }, + { + "epoch": 0.21974709601529188, + "grad_norm": 0.852536678314209, + "learning_rate": 4.9845185013738e-06, + "loss": 0.5625, + "step": 2989 + }, + { + "epoch": 0.21982061461549773, + "grad_norm": 0.8992629051208496, + "learning_rate": 4.9845077930554265e-06, + "loss": 0.5936, + "step": 2990 + }, + { + "epoch": 0.21989413321570359, + "grad_norm": 0.8672676682472229, + "learning_rate": 4.9844970810464515e-06, + "loss": 0.5705, + "step": 2991 + }, + { + "epoch": 0.21996765181590944, + "grad_norm": 0.916617214679718, + "learning_rate": 4.984486365346892e-06, + "loss": 0.5932, + "step": 2992 + }, + { + "epoch": 0.2200411704161153, + "grad_norm": 0.8412333130836487, + "learning_rate": 4.984475645956765e-06, + "loss": 0.5844, + "step": 2993 + }, + { + "epoch": 0.22011468901632114, + "grad_norm": 0.8307098746299744, + "learning_rate": 4.984464922876085e-06, + "loss": 0.5772, + "step": 2994 + }, + { + "epoch": 0.220188207616527, + "grad_norm": 0.8871253728866577, + "learning_rate": 4.984454196104868e-06, + "loss": 0.5501, + "step": 2995 + }, + { + "epoch": 0.22026172621673284, + "grad_norm": 0.8932816982269287, + "learning_rate": 4.984443465643131e-06, + "loss": 0.5577, + "step": 2996 + }, + { + "epoch": 0.2203352448169387, + "grad_norm": 0.8382696509361267, + "learning_rate": 4.984432731490889e-06, + "loss": 0.5809, + "step": 2997 + }, + { + "epoch": 0.22040876341714455, + "grad_norm": 0.9149091839790344, + "learning_rate": 4.984421993648158e-06, + "loss": 0.5969, + "step": 2998 + }, + { + "epoch": 0.2204822820173504, + "grad_norm": 0.879693865776062, + "learning_rate": 4.984411252114955e-06, + "loss": 0.631, + "step": 2999 + }, + { + "epoch": 0.22055580061755625, + "grad_norm": 0.8794325590133667, + "learning_rate": 4.984400506891294e-06, + "loss": 0.5901, + "step": 3000 + }, + { + "epoch": 0.2206293192177621, + "grad_norm": 0.9307263493537903, + "learning_rate": 4.984389757977192e-06, + "loss": 0.5968, + "step": 3001 + }, + { + "epoch": 0.22070283781796796, + "grad_norm": 0.8654848337173462, + "learning_rate": 4.984379005372666e-06, + "loss": 0.5736, + "step": 3002 + }, + { + "epoch": 0.2207763564181738, + "grad_norm": 0.9218815565109253, + "learning_rate": 4.984368249077731e-06, + "loss": 0.6057, + "step": 3003 + }, + { + "epoch": 0.22084987501837966, + "grad_norm": 0.8660706281661987, + "learning_rate": 4.984357489092402e-06, + "loss": 0.587, + "step": 3004 + }, + { + "epoch": 0.2209233936185855, + "grad_norm": 0.9214508533477783, + "learning_rate": 4.984346725416697e-06, + "loss": 0.6127, + "step": 3005 + }, + { + "epoch": 0.22099691221879136, + "grad_norm": 0.8651875853538513, + "learning_rate": 4.984335958050631e-06, + "loss": 0.5875, + "step": 3006 + }, + { + "epoch": 0.22107043081899722, + "grad_norm": 0.8485608696937561, + "learning_rate": 4.98432518699422e-06, + "loss": 0.5929, + "step": 3007 + }, + { + "epoch": 0.22114394941920307, + "grad_norm": 0.8503898978233337, + "learning_rate": 4.984314412247479e-06, + "loss": 0.5903, + "step": 3008 + }, + { + "epoch": 0.22121746801940892, + "grad_norm": 0.8662557601928711, + "learning_rate": 4.984303633810426e-06, + "loss": 0.5849, + "step": 3009 + }, + { + "epoch": 0.22129098661961477, + "grad_norm": 0.8688508868217468, + "learning_rate": 4.984292851683075e-06, + "loss": 0.5699, + "step": 3010 + }, + { + "epoch": 0.22136450521982062, + "grad_norm": 0.8872182965278625, + "learning_rate": 4.9842820658654436e-06, + "loss": 0.6257, + "step": 3011 + }, + { + "epoch": 0.22143802382002647, + "grad_norm": 0.9437422752380371, + "learning_rate": 4.984271276357547e-06, + "loss": 0.6025, + "step": 3012 + }, + { + "epoch": 0.22151154242023233, + "grad_norm": 0.8622238039970398, + "learning_rate": 4.984260483159402e-06, + "loss": 0.5785, + "step": 3013 + }, + { + "epoch": 0.22158506102043818, + "grad_norm": 0.8749601244926453, + "learning_rate": 4.984249686271023e-06, + "loss": 0.5585, + "step": 3014 + }, + { + "epoch": 0.22165857962064403, + "grad_norm": 0.8562569618225098, + "learning_rate": 4.984238885692428e-06, + "loss": 0.5507, + "step": 3015 + }, + { + "epoch": 0.22173209822084988, + "grad_norm": 0.8351296186447144, + "learning_rate": 4.984228081423632e-06, + "loss": 0.5658, + "step": 3016 + }, + { + "epoch": 0.22180561682105573, + "grad_norm": 0.9334781169891357, + "learning_rate": 4.984217273464652e-06, + "loss": 0.5983, + "step": 3017 + }, + { + "epoch": 0.2218791354212616, + "grad_norm": 0.8182063698768616, + "learning_rate": 4.984206461815501e-06, + "loss": 0.5994, + "step": 3018 + }, + { + "epoch": 0.22195265402146744, + "grad_norm": 0.8556950092315674, + "learning_rate": 4.9841956464761995e-06, + "loss": 0.5832, + "step": 3019 + }, + { + "epoch": 0.2220261726216733, + "grad_norm": 0.8753175139427185, + "learning_rate": 4.9841848274467605e-06, + "loss": 0.6215, + "step": 3020 + }, + { + "epoch": 0.22209969122187914, + "grad_norm": 0.869107186794281, + "learning_rate": 4.984174004727201e-06, + "loss": 0.602, + "step": 3021 + }, + { + "epoch": 0.222173209822085, + "grad_norm": 0.8416204452514648, + "learning_rate": 4.984163178317538e-06, + "loss": 0.5862, + "step": 3022 + }, + { + "epoch": 0.22224672842229085, + "grad_norm": 0.8859381675720215, + "learning_rate": 4.984152348217786e-06, + "loss": 0.5783, + "step": 3023 + }, + { + "epoch": 0.2223202470224967, + "grad_norm": 0.8862499594688416, + "learning_rate": 4.984141514427962e-06, + "loss": 0.5812, + "step": 3024 + }, + { + "epoch": 0.22239376562270255, + "grad_norm": 0.8783819079399109, + "learning_rate": 4.984130676948082e-06, + "loss": 0.5843, + "step": 3025 + }, + { + "epoch": 0.2224672842229084, + "grad_norm": 0.847111701965332, + "learning_rate": 4.98411983577816e-06, + "loss": 0.577, + "step": 3026 + }, + { + "epoch": 0.22254080282311425, + "grad_norm": 0.9023949503898621, + "learning_rate": 4.984108990918215e-06, + "loss": 0.6126, + "step": 3027 + }, + { + "epoch": 0.2226143214233201, + "grad_norm": 0.8639888763427734, + "learning_rate": 4.984098142368263e-06, + "loss": 0.5498, + "step": 3028 + }, + { + "epoch": 0.22268784002352596, + "grad_norm": 0.8488218784332275, + "learning_rate": 4.984087290128318e-06, + "loss": 0.5988, + "step": 3029 + }, + { + "epoch": 0.2227613586237318, + "grad_norm": 0.8633898496627808, + "learning_rate": 4.984076434198398e-06, + "loss": 0.5899, + "step": 3030 + }, + { + "epoch": 0.22283487722393766, + "grad_norm": 0.9286959767341614, + "learning_rate": 4.984065574578519e-06, + "loss": 0.6444, + "step": 3031 + }, + { + "epoch": 0.2229083958241435, + "grad_norm": 0.8328529000282288, + "learning_rate": 4.984054711268697e-06, + "loss": 0.5703, + "step": 3032 + }, + { + "epoch": 0.22298191442434936, + "grad_norm": 0.8432232737541199, + "learning_rate": 4.984043844268946e-06, + "loss": 0.5608, + "step": 3033 + }, + { + "epoch": 0.22305543302455522, + "grad_norm": 0.8905603289604187, + "learning_rate": 4.984032973579285e-06, + "loss": 0.596, + "step": 3034 + }, + { + "epoch": 0.22312895162476107, + "grad_norm": 0.9429107904434204, + "learning_rate": 4.984022099199729e-06, + "loss": 0.634, + "step": 3035 + }, + { + "epoch": 0.22320247022496692, + "grad_norm": 0.8546863794326782, + "learning_rate": 4.984011221130294e-06, + "loss": 0.6007, + "step": 3036 + }, + { + "epoch": 0.22327598882517277, + "grad_norm": 0.92024827003479, + "learning_rate": 4.984000339370996e-06, + "loss": 0.5741, + "step": 3037 + }, + { + "epoch": 0.22334950742537862, + "grad_norm": 0.8269678950309753, + "learning_rate": 4.983989453921851e-06, + "loss": 0.5827, + "step": 3038 + }, + { + "epoch": 0.22342302602558448, + "grad_norm": 0.8577226400375366, + "learning_rate": 4.983978564782878e-06, + "loss": 0.5803, + "step": 3039 + }, + { + "epoch": 0.22349654462579033, + "grad_norm": 0.9054408669471741, + "learning_rate": 4.983967671954089e-06, + "loss": 0.5807, + "step": 3040 + }, + { + "epoch": 0.22357006322599618, + "grad_norm": 0.9452145099639893, + "learning_rate": 4.9839567754355025e-06, + "loss": 0.5567, + "step": 3041 + }, + { + "epoch": 0.22364358182620203, + "grad_norm": 0.8720293641090393, + "learning_rate": 4.983945875227134e-06, + "loss": 0.6091, + "step": 3042 + }, + { + "epoch": 0.22371710042640788, + "grad_norm": 0.9145992398262024, + "learning_rate": 4.983934971329e-06, + "loss": 0.6031, + "step": 3043 + }, + { + "epoch": 0.22379061902661374, + "grad_norm": 0.8789807558059692, + "learning_rate": 4.983924063741117e-06, + "loss": 0.5413, + "step": 3044 + }, + { + "epoch": 0.2238641376268196, + "grad_norm": 0.8610498309135437, + "learning_rate": 4.9839131524635e-06, + "loss": 0.5557, + "step": 3045 + }, + { + "epoch": 0.22393765622702544, + "grad_norm": 0.8656947612762451, + "learning_rate": 4.983902237496167e-06, + "loss": 0.6362, + "step": 3046 + }, + { + "epoch": 0.2240111748272313, + "grad_norm": 0.8552951812744141, + "learning_rate": 4.9838913188391326e-06, + "loss": 0.5414, + "step": 3047 + }, + { + "epoch": 0.22408469342743714, + "grad_norm": 0.8669518232345581, + "learning_rate": 4.983880396492414e-06, + "loss": 0.5894, + "step": 3048 + }, + { + "epoch": 0.224158212027643, + "grad_norm": 0.8658132553100586, + "learning_rate": 4.983869470456027e-06, + "loss": 0.5781, + "step": 3049 + }, + { + "epoch": 0.22423173062784885, + "grad_norm": 0.9107221364974976, + "learning_rate": 4.983858540729988e-06, + "loss": 0.6175, + "step": 3050 + }, + { + "epoch": 0.2243052492280547, + "grad_norm": 0.8969014286994934, + "learning_rate": 4.983847607314313e-06, + "loss": 0.6196, + "step": 3051 + }, + { + "epoch": 0.22437876782826055, + "grad_norm": 0.8620468378067017, + "learning_rate": 4.983836670209018e-06, + "loss": 0.574, + "step": 3052 + }, + { + "epoch": 0.2244522864284664, + "grad_norm": 0.9051669836044312, + "learning_rate": 4.9838257294141204e-06, + "loss": 0.5564, + "step": 3053 + }, + { + "epoch": 0.22452580502867225, + "grad_norm": 0.8581008911132812, + "learning_rate": 4.983814784929635e-06, + "loss": 0.5822, + "step": 3054 + }, + { + "epoch": 0.2245993236288781, + "grad_norm": 1.020583987236023, + "learning_rate": 4.983803836755579e-06, + "loss": 0.6114, + "step": 3055 + }, + { + "epoch": 0.22467284222908396, + "grad_norm": 0.869963526725769, + "learning_rate": 4.9837928848919685e-06, + "loss": 0.5991, + "step": 3056 + }, + { + "epoch": 0.2247463608292898, + "grad_norm": 0.8752977848052979, + "learning_rate": 4.98378192933882e-06, + "loss": 0.611, + "step": 3057 + }, + { + "epoch": 0.22481987942949566, + "grad_norm": 0.9056130647659302, + "learning_rate": 4.983770970096149e-06, + "loss": 0.6038, + "step": 3058 + }, + { + "epoch": 0.2248933980297015, + "grad_norm": 0.8745107650756836, + "learning_rate": 4.983760007163973e-06, + "loss": 0.5438, + "step": 3059 + }, + { + "epoch": 0.22496691662990737, + "grad_norm": 0.901940643787384, + "learning_rate": 4.983749040542306e-06, + "loss": 0.5889, + "step": 3060 + }, + { + "epoch": 0.22504043523011322, + "grad_norm": 0.9137243032455444, + "learning_rate": 4.983738070231167e-06, + "loss": 0.5993, + "step": 3061 + }, + { + "epoch": 0.22511395383031907, + "grad_norm": 0.8431603312492371, + "learning_rate": 4.98372709623057e-06, + "loss": 0.5007, + "step": 3062 + }, + { + "epoch": 0.22518747243052492, + "grad_norm": 0.8562221527099609, + "learning_rate": 4.983716118540534e-06, + "loss": 0.5618, + "step": 3063 + }, + { + "epoch": 0.22526099103073077, + "grad_norm": 0.8735974431037903, + "learning_rate": 4.983705137161072e-06, + "loss": 0.5677, + "step": 3064 + }, + { + "epoch": 0.22533450963093662, + "grad_norm": 0.8790309429168701, + "learning_rate": 4.983694152092203e-06, + "loss": 0.591, + "step": 3065 + }, + { + "epoch": 0.22540802823114248, + "grad_norm": 0.8673346638679504, + "learning_rate": 4.983683163333943e-06, + "loss": 0.5918, + "step": 3066 + }, + { + "epoch": 0.22548154683134833, + "grad_norm": 0.8610057234764099, + "learning_rate": 4.983672170886306e-06, + "loss": 0.5281, + "step": 3067 + }, + { + "epoch": 0.22555506543155418, + "grad_norm": 0.9392654895782471, + "learning_rate": 4.983661174749311e-06, + "loss": 0.6029, + "step": 3068 + }, + { + "epoch": 0.22562858403176003, + "grad_norm": 0.8945070505142212, + "learning_rate": 4.983650174922974e-06, + "loss": 0.5851, + "step": 3069 + }, + { + "epoch": 0.22570210263196588, + "grad_norm": 0.8566968441009521, + "learning_rate": 4.983639171407309e-06, + "loss": 0.5812, + "step": 3070 + }, + { + "epoch": 0.22577562123217174, + "grad_norm": 0.834647536277771, + "learning_rate": 4.983628164202335e-06, + "loss": 0.5621, + "step": 3071 + }, + { + "epoch": 0.2258491398323776, + "grad_norm": 0.9134584069252014, + "learning_rate": 4.983617153308068e-06, + "loss": 0.6219, + "step": 3072 + }, + { + "epoch": 0.22592265843258344, + "grad_norm": 0.8537484407424927, + "learning_rate": 4.983606138724522e-06, + "loss": 0.5708, + "step": 3073 + }, + { + "epoch": 0.2259961770327893, + "grad_norm": 0.7997972965240479, + "learning_rate": 4.983595120451717e-06, + "loss": 0.5771, + "step": 3074 + }, + { + "epoch": 0.22606969563299514, + "grad_norm": 0.8794361352920532, + "learning_rate": 4.983584098489667e-06, + "loss": 0.5756, + "step": 3075 + }, + { + "epoch": 0.226143214233201, + "grad_norm": 0.819843590259552, + "learning_rate": 4.983573072838388e-06, + "loss": 0.5754, + "step": 3076 + }, + { + "epoch": 0.22621673283340685, + "grad_norm": 0.9459713101387024, + "learning_rate": 4.9835620434978975e-06, + "loss": 0.6016, + "step": 3077 + }, + { + "epoch": 0.2262902514336127, + "grad_norm": 0.8939942717552185, + "learning_rate": 4.983551010468212e-06, + "loss": 0.5852, + "step": 3078 + }, + { + "epoch": 0.22636377003381855, + "grad_norm": 0.8754364848136902, + "learning_rate": 4.983539973749346e-06, + "loss": 0.5848, + "step": 3079 + }, + { + "epoch": 0.2264372886340244, + "grad_norm": 0.8873910307884216, + "learning_rate": 4.983528933341319e-06, + "loss": 0.6033, + "step": 3080 + }, + { + "epoch": 0.22651080723423025, + "grad_norm": 0.868751049041748, + "learning_rate": 4.983517889244145e-06, + "loss": 0.5721, + "step": 3081 + }, + { + "epoch": 0.2265843258344361, + "grad_norm": 0.896730899810791, + "learning_rate": 4.983506841457841e-06, + "loss": 0.6309, + "step": 3082 + }, + { + "epoch": 0.22665784443464196, + "grad_norm": 0.8989806175231934, + "learning_rate": 4.983495789982424e-06, + "loss": 0.5456, + "step": 3083 + }, + { + "epoch": 0.2267313630348478, + "grad_norm": 0.8396703600883484, + "learning_rate": 4.983484734817911e-06, + "loss": 0.5593, + "step": 3084 + }, + { + "epoch": 0.22680488163505366, + "grad_norm": 0.9348222613334656, + "learning_rate": 4.983473675964316e-06, + "loss": 0.5764, + "step": 3085 + }, + { + "epoch": 0.22687840023525951, + "grad_norm": 0.8713961243629456, + "learning_rate": 4.983462613421657e-06, + "loss": 0.5567, + "step": 3086 + }, + { + "epoch": 0.22695191883546537, + "grad_norm": 0.8549190759658813, + "learning_rate": 4.98345154718995e-06, + "loss": 0.5697, + "step": 3087 + }, + { + "epoch": 0.22702543743567122, + "grad_norm": 0.8881616592407227, + "learning_rate": 4.983440477269212e-06, + "loss": 0.6061, + "step": 3088 + }, + { + "epoch": 0.22709895603587707, + "grad_norm": 0.8568992614746094, + "learning_rate": 4.98342940365946e-06, + "loss": 0.6189, + "step": 3089 + }, + { + "epoch": 0.22717247463608292, + "grad_norm": 0.8172438740730286, + "learning_rate": 4.983418326360709e-06, + "loss": 0.5644, + "step": 3090 + }, + { + "epoch": 0.22724599323628877, + "grad_norm": 0.8806970715522766, + "learning_rate": 4.983407245372977e-06, + "loss": 0.5857, + "step": 3091 + }, + { + "epoch": 0.22731951183649463, + "grad_norm": 0.8355310559272766, + "learning_rate": 4.983396160696278e-06, + "loss": 0.5315, + "step": 3092 + }, + { + "epoch": 0.22739303043670048, + "grad_norm": 0.8940824866294861, + "learning_rate": 4.98338507233063e-06, + "loss": 0.6046, + "step": 3093 + }, + { + "epoch": 0.22746654903690633, + "grad_norm": 0.9132765531539917, + "learning_rate": 4.98337398027605e-06, + "loss": 0.5884, + "step": 3094 + }, + { + "epoch": 0.22754006763711218, + "grad_norm": 0.8822723627090454, + "learning_rate": 4.983362884532554e-06, + "loss": 0.6211, + "step": 3095 + }, + { + "epoch": 0.22761358623731803, + "grad_norm": 0.8626614809036255, + "learning_rate": 4.983351785100159e-06, + "loss": 0.5853, + "step": 3096 + }, + { + "epoch": 0.22768710483752388, + "grad_norm": 0.8383496999740601, + "learning_rate": 4.98334068197888e-06, + "loss": 0.5255, + "step": 3097 + }, + { + "epoch": 0.22776062343772974, + "grad_norm": 0.9242650866508484, + "learning_rate": 4.983329575168735e-06, + "loss": 0.5961, + "step": 3098 + }, + { + "epoch": 0.2278341420379356, + "grad_norm": 0.8608441948890686, + "learning_rate": 4.983318464669739e-06, + "loss": 0.5616, + "step": 3099 + }, + { + "epoch": 0.22790766063814144, + "grad_norm": 0.8871802091598511, + "learning_rate": 4.98330735048191e-06, + "loss": 0.5826, + "step": 3100 + }, + { + "epoch": 0.2279811792383473, + "grad_norm": 0.8730566501617432, + "learning_rate": 4.9832962326052644e-06, + "loss": 0.5772, + "step": 3101 + }, + { + "epoch": 0.22805469783855314, + "grad_norm": 0.8792027235031128, + "learning_rate": 4.983285111039817e-06, + "loss": 0.5477, + "step": 3102 + }, + { + "epoch": 0.228128216438759, + "grad_norm": 0.8566625714302063, + "learning_rate": 4.983273985785587e-06, + "loss": 0.5951, + "step": 3103 + }, + { + "epoch": 0.22820173503896485, + "grad_norm": 0.9146084189414978, + "learning_rate": 4.983262856842589e-06, + "loss": 0.607, + "step": 3104 + }, + { + "epoch": 0.2282752536391707, + "grad_norm": 0.8454351425170898, + "learning_rate": 4.983251724210839e-06, + "loss": 0.5458, + "step": 3105 + }, + { + "epoch": 0.22834877223937655, + "grad_norm": 0.919786274433136, + "learning_rate": 4.983240587890355e-06, + "loss": 0.5854, + "step": 3106 + }, + { + "epoch": 0.2284222908395824, + "grad_norm": 0.9193314909934998, + "learning_rate": 4.983229447881154e-06, + "loss": 0.5544, + "step": 3107 + }, + { + "epoch": 0.22849580943978826, + "grad_norm": 0.8687009811401367, + "learning_rate": 4.983218304183251e-06, + "loss": 0.6394, + "step": 3108 + }, + { + "epoch": 0.2285693280399941, + "grad_norm": 0.8410642147064209, + "learning_rate": 4.983207156796663e-06, + "loss": 0.5757, + "step": 3109 + }, + { + "epoch": 0.22864284664019996, + "grad_norm": 0.8501585125923157, + "learning_rate": 4.983196005721407e-06, + "loss": 0.5939, + "step": 3110 + }, + { + "epoch": 0.2287163652404058, + "grad_norm": 0.8582203388214111, + "learning_rate": 4.9831848509574995e-06, + "loss": 0.5828, + "step": 3111 + }, + { + "epoch": 0.22878988384061166, + "grad_norm": 0.9193985462188721, + "learning_rate": 4.983173692504957e-06, + "loss": 0.5909, + "step": 3112 + }, + { + "epoch": 0.22886340244081751, + "grad_norm": 0.8704157471656799, + "learning_rate": 4.983162530363796e-06, + "loss": 0.5914, + "step": 3113 + }, + { + "epoch": 0.22893692104102337, + "grad_norm": 0.9787272214889526, + "learning_rate": 4.983151364534032e-06, + "loss": 0.6372, + "step": 3114 + }, + { + "epoch": 0.22901043964122922, + "grad_norm": 0.8758975267410278, + "learning_rate": 4.983140195015684e-06, + "loss": 0.6153, + "step": 3115 + }, + { + "epoch": 0.22908395824143507, + "grad_norm": 0.9417439699172974, + "learning_rate": 4.983129021808766e-06, + "loss": 0.6065, + "step": 3116 + }, + { + "epoch": 0.22915747684164092, + "grad_norm": 0.8790421485900879, + "learning_rate": 4.983117844913295e-06, + "loss": 0.6036, + "step": 3117 + }, + { + "epoch": 0.22923099544184677, + "grad_norm": 0.85923832654953, + "learning_rate": 4.98310666432929e-06, + "loss": 0.577, + "step": 3118 + }, + { + "epoch": 0.22930451404205263, + "grad_norm": 0.9057769775390625, + "learning_rate": 4.983095480056767e-06, + "loss": 0.6052, + "step": 3119 + }, + { + "epoch": 0.22937803264225848, + "grad_norm": 0.8158347010612488, + "learning_rate": 4.98308429209574e-06, + "loss": 0.5657, + "step": 3120 + }, + { + "epoch": 0.22945155124246433, + "grad_norm": 0.8745317459106445, + "learning_rate": 4.983073100446227e-06, + "loss": 0.603, + "step": 3121 + }, + { + "epoch": 0.22952506984267018, + "grad_norm": 0.8179181218147278, + "learning_rate": 4.983061905108246e-06, + "loss": 0.551, + "step": 3122 + }, + { + "epoch": 0.22959858844287606, + "grad_norm": 0.9103482961654663, + "learning_rate": 4.983050706081812e-06, + "loss": 0.6612, + "step": 3123 + }, + { + "epoch": 0.2296721070430819, + "grad_norm": 0.8550155162811279, + "learning_rate": 4.983039503366942e-06, + "loss": 0.591, + "step": 3124 + }, + { + "epoch": 0.22974562564328777, + "grad_norm": 0.8731188178062439, + "learning_rate": 4.983028296963653e-06, + "loss": 0.604, + "step": 3125 + }, + { + "epoch": 0.22981914424349362, + "grad_norm": 0.8307414650917053, + "learning_rate": 4.983017086871961e-06, + "loss": 0.6075, + "step": 3126 + }, + { + "epoch": 0.22989266284369947, + "grad_norm": 0.8209845423698425, + "learning_rate": 4.983005873091884e-06, + "loss": 0.6099, + "step": 3127 + }, + { + "epoch": 0.22996618144390532, + "grad_norm": 0.8183659911155701, + "learning_rate": 4.982994655623437e-06, + "loss": 0.5974, + "step": 3128 + }, + { + "epoch": 0.23003970004411117, + "grad_norm": 0.8776965737342834, + "learning_rate": 4.9829834344666375e-06, + "loss": 0.556, + "step": 3129 + }, + { + "epoch": 0.23011321864431702, + "grad_norm": 0.8414424657821655, + "learning_rate": 4.982972209621502e-06, + "loss": 0.5551, + "step": 3130 + }, + { + "epoch": 0.23018673724452288, + "grad_norm": 0.8456410765647888, + "learning_rate": 4.982960981088048e-06, + "loss": 0.5738, + "step": 3131 + }, + { + "epoch": 0.23026025584472873, + "grad_norm": 0.7951115369796753, + "learning_rate": 4.98294974886629e-06, + "loss": 0.5344, + "step": 3132 + }, + { + "epoch": 0.23033377444493458, + "grad_norm": 0.9124892950057983, + "learning_rate": 4.982938512956247e-06, + "loss": 0.604, + "step": 3133 + }, + { + "epoch": 0.23040729304514043, + "grad_norm": 0.8666853904724121, + "learning_rate": 4.9829272733579344e-06, + "loss": 0.5605, + "step": 3134 + }, + { + "epoch": 0.23048081164534628, + "grad_norm": 0.8663138151168823, + "learning_rate": 4.98291603007137e-06, + "loss": 0.6135, + "step": 3135 + }, + { + "epoch": 0.23055433024555214, + "grad_norm": 0.8503029942512512, + "learning_rate": 4.98290478309657e-06, + "loss": 0.5796, + "step": 3136 + }, + { + "epoch": 0.230627848845758, + "grad_norm": 0.9025608897209167, + "learning_rate": 4.9828935324335504e-06, + "loss": 0.6144, + "step": 3137 + }, + { + "epoch": 0.23070136744596384, + "grad_norm": 0.914727509021759, + "learning_rate": 4.982882278082328e-06, + "loss": 0.6166, + "step": 3138 + }, + { + "epoch": 0.2307748860461697, + "grad_norm": 0.8911975622177124, + "learning_rate": 4.9828710200429205e-06, + "loss": 0.6047, + "step": 3139 + }, + { + "epoch": 0.23084840464637554, + "grad_norm": 0.8304436206817627, + "learning_rate": 4.982859758315344e-06, + "loss": 0.5607, + "step": 3140 + }, + { + "epoch": 0.2309219232465814, + "grad_norm": 0.8527318835258484, + "learning_rate": 4.982848492899614e-06, + "loss": 0.5566, + "step": 3141 + }, + { + "epoch": 0.23099544184678725, + "grad_norm": 0.859195351600647, + "learning_rate": 4.98283722379575e-06, + "loss": 0.6236, + "step": 3142 + }, + { + "epoch": 0.2310689604469931, + "grad_norm": 0.8886541724205017, + "learning_rate": 4.982825951003767e-06, + "loss": 0.6053, + "step": 3143 + }, + { + "epoch": 0.23114247904719895, + "grad_norm": 0.8366897106170654, + "learning_rate": 4.982814674523683e-06, + "loss": 0.5591, + "step": 3144 + }, + { + "epoch": 0.2312159976474048, + "grad_norm": 0.8411690592765808, + "learning_rate": 4.982803394355512e-06, + "loss": 0.5991, + "step": 3145 + }, + { + "epoch": 0.23128951624761065, + "grad_norm": 0.9082803130149841, + "learning_rate": 4.982792110499273e-06, + "loss": 0.61, + "step": 3146 + }, + { + "epoch": 0.2313630348478165, + "grad_norm": 0.9015140533447266, + "learning_rate": 4.982780822954982e-06, + "loss": 0.5912, + "step": 3147 + }, + { + "epoch": 0.23143655344802236, + "grad_norm": 0.900816798210144, + "learning_rate": 4.982769531722657e-06, + "loss": 0.6218, + "step": 3148 + }, + { + "epoch": 0.2315100720482282, + "grad_norm": 0.9256121516227722, + "learning_rate": 4.9827582368023135e-06, + "loss": 0.5843, + "step": 3149 + }, + { + "epoch": 0.23158359064843406, + "grad_norm": 0.8820819854736328, + "learning_rate": 4.982746938193968e-06, + "loss": 0.6147, + "step": 3150 + }, + { + "epoch": 0.23165710924863991, + "grad_norm": 0.8736037611961365, + "learning_rate": 4.9827356358976385e-06, + "loss": 0.6258, + "step": 3151 + }, + { + "epoch": 0.23173062784884577, + "grad_norm": 0.9564252495765686, + "learning_rate": 4.982724329913341e-06, + "loss": 0.6173, + "step": 3152 + }, + { + "epoch": 0.23180414644905162, + "grad_norm": 0.8746057748794556, + "learning_rate": 4.982713020241093e-06, + "loss": 0.6124, + "step": 3153 + }, + { + "epoch": 0.23187766504925747, + "grad_norm": 0.8581736087799072, + "learning_rate": 4.98270170688091e-06, + "loss": 0.5917, + "step": 3154 + }, + { + "epoch": 0.23195118364946332, + "grad_norm": 0.879137396812439, + "learning_rate": 4.982690389832809e-06, + "loss": 0.5955, + "step": 3155 + }, + { + "epoch": 0.23202470224966917, + "grad_norm": 0.8843123316764832, + "learning_rate": 4.982679069096808e-06, + "loss": 0.5981, + "step": 3156 + }, + { + "epoch": 0.23209822084987503, + "grad_norm": 0.8334278464317322, + "learning_rate": 4.982667744672924e-06, + "loss": 0.5949, + "step": 3157 + }, + { + "epoch": 0.23217173945008088, + "grad_norm": 0.9043882489204407, + "learning_rate": 4.982656416561172e-06, + "loss": 0.5834, + "step": 3158 + }, + { + "epoch": 0.23224525805028673, + "grad_norm": 0.9105309247970581, + "learning_rate": 4.982645084761571e-06, + "loss": 0.5999, + "step": 3159 + }, + { + "epoch": 0.23231877665049258, + "grad_norm": 0.839272141456604, + "learning_rate": 4.982633749274135e-06, + "loss": 0.5886, + "step": 3160 + }, + { + "epoch": 0.23239229525069843, + "grad_norm": 0.8858162760734558, + "learning_rate": 4.982622410098884e-06, + "loss": 0.5784, + "step": 3161 + }, + { + "epoch": 0.23246581385090428, + "grad_norm": 0.857999861240387, + "learning_rate": 4.982611067235832e-06, + "loss": 0.5872, + "step": 3162 + }, + { + "epoch": 0.23253933245111014, + "grad_norm": 0.910207211971283, + "learning_rate": 4.982599720684998e-06, + "loss": 0.5569, + "step": 3163 + }, + { + "epoch": 0.232612851051316, + "grad_norm": 0.8193223476409912, + "learning_rate": 4.9825883704463985e-06, + "loss": 0.5738, + "step": 3164 + }, + { + "epoch": 0.23268636965152184, + "grad_norm": 0.8485036492347717, + "learning_rate": 4.9825770165200494e-06, + "loss": 0.5553, + "step": 3165 + }, + { + "epoch": 0.2327598882517277, + "grad_norm": 0.8655261993408203, + "learning_rate": 4.982565658905968e-06, + "loss": 0.5679, + "step": 3166 + }, + { + "epoch": 0.23283340685193354, + "grad_norm": 0.8580840826034546, + "learning_rate": 4.982554297604171e-06, + "loss": 0.5779, + "step": 3167 + }, + { + "epoch": 0.2329069254521394, + "grad_norm": 0.842097282409668, + "learning_rate": 4.982542932614675e-06, + "loss": 0.6259, + "step": 3168 + }, + { + "epoch": 0.23298044405234525, + "grad_norm": 0.8629012107849121, + "learning_rate": 4.982531563937499e-06, + "loss": 0.5888, + "step": 3169 + }, + { + "epoch": 0.2330539626525511, + "grad_norm": 0.7987809777259827, + "learning_rate": 4.982520191572657e-06, + "loss": 0.546, + "step": 3170 + }, + { + "epoch": 0.23312748125275695, + "grad_norm": 0.8988904356956482, + "learning_rate": 4.9825088155201685e-06, + "loss": 0.5976, + "step": 3171 + }, + { + "epoch": 0.2332009998529628, + "grad_norm": 0.898259162902832, + "learning_rate": 4.982497435780048e-06, + "loss": 0.594, + "step": 3172 + }, + { + "epoch": 0.23327451845316866, + "grad_norm": 0.8589652180671692, + "learning_rate": 4.982486052352315e-06, + "loss": 0.615, + "step": 3173 + }, + { + "epoch": 0.2333480370533745, + "grad_norm": 0.8667933344841003, + "learning_rate": 4.982474665236983e-06, + "loss": 0.6124, + "step": 3174 + }, + { + "epoch": 0.23342155565358036, + "grad_norm": 0.8589207530021667, + "learning_rate": 4.982463274434072e-06, + "loss": 0.5982, + "step": 3175 + }, + { + "epoch": 0.2334950742537862, + "grad_norm": 0.8989638090133667, + "learning_rate": 4.982451879943597e-06, + "loss": 0.6056, + "step": 3176 + }, + { + "epoch": 0.23356859285399206, + "grad_norm": 0.8403399586677551, + "learning_rate": 4.982440481765576e-06, + "loss": 0.5654, + "step": 3177 + }, + { + "epoch": 0.23364211145419791, + "grad_norm": 0.8658835291862488, + "learning_rate": 4.982429079900025e-06, + "loss": 0.588, + "step": 3178 + }, + { + "epoch": 0.23371563005440377, + "grad_norm": 0.8638744950294495, + "learning_rate": 4.982417674346962e-06, + "loss": 0.5962, + "step": 3179 + }, + { + "epoch": 0.23378914865460962, + "grad_norm": 0.8536926507949829, + "learning_rate": 4.982406265106404e-06, + "loss": 0.6214, + "step": 3180 + }, + { + "epoch": 0.23386266725481547, + "grad_norm": 0.940937876701355, + "learning_rate": 4.982394852178367e-06, + "loss": 0.6002, + "step": 3181 + }, + { + "epoch": 0.23393618585502132, + "grad_norm": 0.8644130825996399, + "learning_rate": 4.982383435562869e-06, + "loss": 0.5229, + "step": 3182 + }, + { + "epoch": 0.23400970445522717, + "grad_norm": 0.8206847906112671, + "learning_rate": 4.982372015259925e-06, + "loss": 0.5672, + "step": 3183 + }, + { + "epoch": 0.23408322305543303, + "grad_norm": 0.9013881683349609, + "learning_rate": 4.982360591269554e-06, + "loss": 0.6068, + "step": 3184 + }, + { + "epoch": 0.23415674165563888, + "grad_norm": 0.8296981453895569, + "learning_rate": 4.982349163591772e-06, + "loss": 0.5613, + "step": 3185 + }, + { + "epoch": 0.23423026025584473, + "grad_norm": 0.8556239008903503, + "learning_rate": 4.982337732226597e-06, + "loss": 0.5843, + "step": 3186 + }, + { + "epoch": 0.23430377885605058, + "grad_norm": 0.8849702477455139, + "learning_rate": 4.982326297174045e-06, + "loss": 0.5063, + "step": 3187 + }, + { + "epoch": 0.23437729745625643, + "grad_norm": 0.8628386855125427, + "learning_rate": 4.982314858434133e-06, + "loss": 0.5845, + "step": 3188 + }, + { + "epoch": 0.23445081605646229, + "grad_norm": 0.8533003926277161, + "learning_rate": 4.982303416006878e-06, + "loss": 0.6154, + "step": 3189 + }, + { + "epoch": 0.23452433465666814, + "grad_norm": 0.881259560585022, + "learning_rate": 4.982291969892298e-06, + "loss": 0.5724, + "step": 3190 + }, + { + "epoch": 0.234597853256874, + "grad_norm": 0.9068023562431335, + "learning_rate": 4.982280520090408e-06, + "loss": 0.6148, + "step": 3191 + }, + { + "epoch": 0.23467137185707984, + "grad_norm": 0.9047123789787292, + "learning_rate": 4.9822690666012265e-06, + "loss": 0.6125, + "step": 3192 + }, + { + "epoch": 0.2347448904572857, + "grad_norm": 0.9074934720993042, + "learning_rate": 4.982257609424771e-06, + "loss": 0.5631, + "step": 3193 + }, + { + "epoch": 0.23481840905749155, + "grad_norm": 0.910194993019104, + "learning_rate": 4.982246148561057e-06, + "loss": 0.5701, + "step": 3194 + }, + { + "epoch": 0.2348919276576974, + "grad_norm": 0.906133770942688, + "learning_rate": 4.982234684010103e-06, + "loss": 0.6292, + "step": 3195 + }, + { + "epoch": 0.23496544625790325, + "grad_norm": 0.8744733929634094, + "learning_rate": 4.982223215771924e-06, + "loss": 0.6554, + "step": 3196 + }, + { + "epoch": 0.2350389648581091, + "grad_norm": 0.924163818359375, + "learning_rate": 4.98221174384654e-06, + "loss": 0.5643, + "step": 3197 + }, + { + "epoch": 0.23511248345831495, + "grad_norm": 0.8358569741249084, + "learning_rate": 4.982200268233966e-06, + "loss": 0.5841, + "step": 3198 + }, + { + "epoch": 0.2351860020585208, + "grad_norm": 0.87060546875, + "learning_rate": 4.982188788934219e-06, + "loss": 0.5776, + "step": 3199 + }, + { + "epoch": 0.23525952065872666, + "grad_norm": 0.8423094153404236, + "learning_rate": 4.982177305947316e-06, + "loss": 0.558, + "step": 3200 + }, + { + "epoch": 0.2353330392589325, + "grad_norm": 0.8698108196258545, + "learning_rate": 4.982165819273275e-06, + "loss": 0.584, + "step": 3201 + }, + { + "epoch": 0.23540655785913836, + "grad_norm": 0.8570834994316101, + "learning_rate": 4.982154328912113e-06, + "loss": 0.5932, + "step": 3202 + }, + { + "epoch": 0.2354800764593442, + "grad_norm": 0.862817645072937, + "learning_rate": 4.982142834863846e-06, + "loss": 0.5612, + "step": 3203 + }, + { + "epoch": 0.23555359505955006, + "grad_norm": 0.8056060671806335, + "learning_rate": 4.982131337128492e-06, + "loss": 0.5871, + "step": 3204 + }, + { + "epoch": 0.23562711365975592, + "grad_norm": 0.8953344225883484, + "learning_rate": 4.982119835706068e-06, + "loss": 0.597, + "step": 3205 + }, + { + "epoch": 0.23570063225996177, + "grad_norm": 0.8539981245994568, + "learning_rate": 4.98210833059659e-06, + "loss": 0.5528, + "step": 3206 + }, + { + "epoch": 0.23577415086016762, + "grad_norm": 0.935154914855957, + "learning_rate": 4.982096821800077e-06, + "loss": 0.5842, + "step": 3207 + }, + { + "epoch": 0.23584766946037347, + "grad_norm": 0.8500944375991821, + "learning_rate": 4.982085309316546e-06, + "loss": 0.561, + "step": 3208 + }, + { + "epoch": 0.23592118806057932, + "grad_norm": 0.8774210214614868, + "learning_rate": 4.982073793146011e-06, + "loss": 0.6112, + "step": 3209 + }, + { + "epoch": 0.23599470666078518, + "grad_norm": 0.8886145353317261, + "learning_rate": 4.982062273288493e-06, + "loss": 0.6212, + "step": 3210 + }, + { + "epoch": 0.23606822526099103, + "grad_norm": 0.8990269303321838, + "learning_rate": 4.982050749744006e-06, + "loss": 0.5637, + "step": 3211 + }, + { + "epoch": 0.23614174386119688, + "grad_norm": 0.8720525503158569, + "learning_rate": 4.982039222512569e-06, + "loss": 0.5231, + "step": 3212 + }, + { + "epoch": 0.23621526246140273, + "grad_norm": 0.8979282975196838, + "learning_rate": 4.982027691594199e-06, + "loss": 0.5655, + "step": 3213 + }, + { + "epoch": 0.23628878106160858, + "grad_norm": 0.8770400285720825, + "learning_rate": 4.982016156988912e-06, + "loss": 0.6133, + "step": 3214 + }, + { + "epoch": 0.23636229966181443, + "grad_norm": 0.846315324306488, + "learning_rate": 4.982004618696727e-06, + "loss": 0.5506, + "step": 3215 + }, + { + "epoch": 0.2364358182620203, + "grad_norm": 0.9099037647247314, + "learning_rate": 4.98199307671766e-06, + "loss": 0.6091, + "step": 3216 + }, + { + "epoch": 0.23650933686222614, + "grad_norm": 0.8689888715744019, + "learning_rate": 4.981981531051727e-06, + "loss": 0.5642, + "step": 3217 + }, + { + "epoch": 0.236582855462432, + "grad_norm": 0.8805835843086243, + "learning_rate": 4.981969981698948e-06, + "loss": 0.5818, + "step": 3218 + }, + { + "epoch": 0.23665637406263784, + "grad_norm": 0.8299066424369812, + "learning_rate": 4.981958428659338e-06, + "loss": 0.5801, + "step": 3219 + }, + { + "epoch": 0.2367298926628437, + "grad_norm": 0.8456267714500427, + "learning_rate": 4.981946871932914e-06, + "loss": 0.5794, + "step": 3220 + }, + { + "epoch": 0.23680341126304955, + "grad_norm": 0.8783370852470398, + "learning_rate": 4.981935311519694e-06, + "loss": 0.6145, + "step": 3221 + }, + { + "epoch": 0.2368769298632554, + "grad_norm": 0.8588176369667053, + "learning_rate": 4.981923747419695e-06, + "loss": 0.5873, + "step": 3222 + }, + { + "epoch": 0.23695044846346125, + "grad_norm": 0.8813894391059875, + "learning_rate": 4.981912179632935e-06, + "loss": 0.5929, + "step": 3223 + }, + { + "epoch": 0.2370239670636671, + "grad_norm": 0.8274548649787903, + "learning_rate": 4.981900608159429e-06, + "loss": 0.5569, + "step": 3224 + }, + { + "epoch": 0.23709748566387295, + "grad_norm": 0.9263317584991455, + "learning_rate": 4.981889032999196e-06, + "loss": 0.6191, + "step": 3225 + }, + { + "epoch": 0.2371710042640788, + "grad_norm": 0.8685421943664551, + "learning_rate": 4.981877454152254e-06, + "loss": 0.5665, + "step": 3226 + }, + { + "epoch": 0.23724452286428466, + "grad_norm": 0.8399171233177185, + "learning_rate": 4.981865871618617e-06, + "loss": 0.5873, + "step": 3227 + }, + { + "epoch": 0.2373180414644905, + "grad_norm": 0.877690851688385, + "learning_rate": 4.981854285398305e-06, + "loss": 0.5978, + "step": 3228 + }, + { + "epoch": 0.23739156006469636, + "grad_norm": 0.8715471029281616, + "learning_rate": 4.981842695491335e-06, + "loss": 0.6255, + "step": 3229 + }, + { + "epoch": 0.2374650786649022, + "grad_norm": 0.8768184185028076, + "learning_rate": 4.981831101897723e-06, + "loss": 0.562, + "step": 3230 + }, + { + "epoch": 0.23753859726510806, + "grad_norm": 0.935276985168457, + "learning_rate": 4.981819504617487e-06, + "loss": 0.5856, + "step": 3231 + }, + { + "epoch": 0.23761211586531392, + "grad_norm": 0.9383913278579712, + "learning_rate": 4.981807903650644e-06, + "loss": 0.6393, + "step": 3232 + }, + { + "epoch": 0.23768563446551977, + "grad_norm": 0.8736304640769958, + "learning_rate": 4.98179629899721e-06, + "loss": 0.572, + "step": 3233 + }, + { + "epoch": 0.23775915306572562, + "grad_norm": 0.8162198066711426, + "learning_rate": 4.981784690657206e-06, + "loss": 0.5834, + "step": 3234 + }, + { + "epoch": 0.23783267166593147, + "grad_norm": 0.8645538687705994, + "learning_rate": 4.981773078630645e-06, + "loss": 0.5711, + "step": 3235 + }, + { + "epoch": 0.23790619026613732, + "grad_norm": 0.8245202302932739, + "learning_rate": 4.9817614629175455e-06, + "loss": 0.5303, + "step": 3236 + }, + { + "epoch": 0.23797970886634318, + "grad_norm": 0.8622675538063049, + "learning_rate": 4.9817498435179265e-06, + "loss": 0.5738, + "step": 3237 + }, + { + "epoch": 0.23805322746654903, + "grad_norm": 0.8438717722892761, + "learning_rate": 4.981738220431804e-06, + "loss": 0.5189, + "step": 3238 + }, + { + "epoch": 0.23812674606675488, + "grad_norm": 0.91068434715271, + "learning_rate": 4.981726593659194e-06, + "loss": 0.6082, + "step": 3239 + }, + { + "epoch": 0.23820026466696073, + "grad_norm": 0.8904057145118713, + "learning_rate": 4.981714963200117e-06, + "loss": 0.611, + "step": 3240 + }, + { + "epoch": 0.23827378326716658, + "grad_norm": 0.8538376092910767, + "learning_rate": 4.981703329054587e-06, + "loss": 0.5954, + "step": 3241 + }, + { + "epoch": 0.23834730186737244, + "grad_norm": 0.8370858430862427, + "learning_rate": 4.981691691222623e-06, + "loss": 0.5368, + "step": 3242 + }, + { + "epoch": 0.2384208204675783, + "grad_norm": 0.8185572624206543, + "learning_rate": 4.981680049704242e-06, + "loss": 0.5058, + "step": 3243 + }, + { + "epoch": 0.23849433906778414, + "grad_norm": 0.8563992977142334, + "learning_rate": 4.98166840449946e-06, + "loss": 0.5409, + "step": 3244 + }, + { + "epoch": 0.23856785766799, + "grad_norm": 0.9309245347976685, + "learning_rate": 4.981656755608296e-06, + "loss": 0.5564, + "step": 3245 + }, + { + "epoch": 0.23864137626819584, + "grad_norm": 0.8896757960319519, + "learning_rate": 4.981645103030768e-06, + "loss": 0.5777, + "step": 3246 + }, + { + "epoch": 0.2387148948684017, + "grad_norm": 0.8968111872673035, + "learning_rate": 4.981633446766891e-06, + "loss": 0.61, + "step": 3247 + }, + { + "epoch": 0.23878841346860755, + "grad_norm": 0.894108772277832, + "learning_rate": 4.981621786816684e-06, + "loss": 0.5976, + "step": 3248 + }, + { + "epoch": 0.2388619320688134, + "grad_norm": 0.8583199977874756, + "learning_rate": 4.9816101231801625e-06, + "loss": 0.6074, + "step": 3249 + }, + { + "epoch": 0.23893545066901925, + "grad_norm": 0.8819045424461365, + "learning_rate": 4.981598455857346e-06, + "loss": 0.5939, + "step": 3250 + }, + { + "epoch": 0.2390089692692251, + "grad_norm": 0.8733527064323425, + "learning_rate": 4.981586784848251e-06, + "loss": 0.5758, + "step": 3251 + }, + { + "epoch": 0.23908248786943095, + "grad_norm": 0.8859284520149231, + "learning_rate": 4.981575110152895e-06, + "loss": 0.5981, + "step": 3252 + }, + { + "epoch": 0.2391560064696368, + "grad_norm": 0.8664432168006897, + "learning_rate": 4.981563431771294e-06, + "loss": 0.5828, + "step": 3253 + }, + { + "epoch": 0.23922952506984266, + "grad_norm": 0.8764304518699646, + "learning_rate": 4.981551749703467e-06, + "loss": 0.6302, + "step": 3254 + }, + { + "epoch": 0.2393030436700485, + "grad_norm": 0.8504794239997864, + "learning_rate": 4.9815400639494314e-06, + "loss": 0.5915, + "step": 3255 + }, + { + "epoch": 0.23937656227025436, + "grad_norm": 0.8692317008972168, + "learning_rate": 4.981528374509203e-06, + "loss": 0.5618, + "step": 3256 + }, + { + "epoch": 0.2394500808704602, + "grad_norm": 0.8427916765213013, + "learning_rate": 4.9815166813828e-06, + "loss": 0.5878, + "step": 3257 + }, + { + "epoch": 0.23952359947066607, + "grad_norm": 0.9558819532394409, + "learning_rate": 4.98150498457024e-06, + "loss": 0.6453, + "step": 3258 + }, + { + "epoch": 0.23959711807087192, + "grad_norm": 0.8836528062820435, + "learning_rate": 4.981493284071541e-06, + "loss": 0.6101, + "step": 3259 + }, + { + "epoch": 0.23967063667107777, + "grad_norm": 0.8751712441444397, + "learning_rate": 4.981481579886719e-06, + "loss": 0.575, + "step": 3260 + }, + { + "epoch": 0.23974415527128362, + "grad_norm": 0.8874496817588806, + "learning_rate": 4.981469872015792e-06, + "loss": 0.592, + "step": 3261 + }, + { + "epoch": 0.2398176738714895, + "grad_norm": 0.8735393285751343, + "learning_rate": 4.981458160458777e-06, + "loss": 0.6007, + "step": 3262 + }, + { + "epoch": 0.23989119247169535, + "grad_norm": 0.8782009482383728, + "learning_rate": 4.981446445215693e-06, + "loss": 0.5555, + "step": 3263 + }, + { + "epoch": 0.2399647110719012, + "grad_norm": 0.869060754776001, + "learning_rate": 4.981434726286555e-06, + "loss": 0.5865, + "step": 3264 + }, + { + "epoch": 0.24003822967210706, + "grad_norm": 0.8608112931251526, + "learning_rate": 4.981423003671382e-06, + "loss": 0.5714, + "step": 3265 + }, + { + "epoch": 0.2401117482723129, + "grad_norm": 0.8935235738754272, + "learning_rate": 4.98141127737019e-06, + "loss": 0.6453, + "step": 3266 + }, + { + "epoch": 0.24018526687251876, + "grad_norm": 0.8183345794677734, + "learning_rate": 4.981399547382999e-06, + "loss": 0.5868, + "step": 3267 + }, + { + "epoch": 0.2402587854727246, + "grad_norm": 0.8903567790985107, + "learning_rate": 4.981387813709825e-06, + "loss": 0.6223, + "step": 3268 + }, + { + "epoch": 0.24033230407293046, + "grad_norm": 0.8435699343681335, + "learning_rate": 4.981376076350684e-06, + "loss": 0.546, + "step": 3269 + }, + { + "epoch": 0.24040582267313632, + "grad_norm": 0.9057692289352417, + "learning_rate": 4.981364335305595e-06, + "loss": 0.5906, + "step": 3270 + }, + { + "epoch": 0.24047934127334217, + "grad_norm": 0.8918054699897766, + "learning_rate": 4.981352590574576e-06, + "loss": 0.586, + "step": 3271 + }, + { + "epoch": 0.24055285987354802, + "grad_norm": 0.861311674118042, + "learning_rate": 4.981340842157643e-06, + "loss": 0.6136, + "step": 3272 + }, + { + "epoch": 0.24062637847375387, + "grad_norm": 0.8648567199707031, + "learning_rate": 4.9813290900548135e-06, + "loss": 0.5929, + "step": 3273 + }, + { + "epoch": 0.24069989707395972, + "grad_norm": 0.9014003872871399, + "learning_rate": 4.981317334266107e-06, + "loss": 0.5913, + "step": 3274 + }, + { + "epoch": 0.24077341567416558, + "grad_norm": 0.906273365020752, + "learning_rate": 4.981305574791538e-06, + "loss": 0.5817, + "step": 3275 + }, + { + "epoch": 0.24084693427437143, + "grad_norm": 0.8824869394302368, + "learning_rate": 4.981293811631126e-06, + "loss": 0.5582, + "step": 3276 + }, + { + "epoch": 0.24092045287457728, + "grad_norm": 0.8320083618164062, + "learning_rate": 4.981282044784888e-06, + "loss": 0.5542, + "step": 3277 + }, + { + "epoch": 0.24099397147478313, + "grad_norm": 0.8297879695892334, + "learning_rate": 4.9812702742528415e-06, + "loss": 0.5808, + "step": 3278 + }, + { + "epoch": 0.24106749007498898, + "grad_norm": 0.8459263443946838, + "learning_rate": 4.9812585000350035e-06, + "loss": 0.5579, + "step": 3279 + }, + { + "epoch": 0.24114100867519483, + "grad_norm": 0.8378893733024597, + "learning_rate": 4.981246722131392e-06, + "loss": 0.5842, + "step": 3280 + }, + { + "epoch": 0.2412145272754007, + "grad_norm": 0.8667270541191101, + "learning_rate": 4.981234940542025e-06, + "loss": 0.5708, + "step": 3281 + }, + { + "epoch": 0.24128804587560654, + "grad_norm": 0.8471582531929016, + "learning_rate": 4.981223155266918e-06, + "loss": 0.5803, + "step": 3282 + }, + { + "epoch": 0.2413615644758124, + "grad_norm": 0.8993300199508667, + "learning_rate": 4.981211366306091e-06, + "loss": 0.5783, + "step": 3283 + }, + { + "epoch": 0.24143508307601824, + "grad_norm": 0.905836284160614, + "learning_rate": 4.98119957365956e-06, + "loss": 0.5503, + "step": 3284 + }, + { + "epoch": 0.2415086016762241, + "grad_norm": 0.8572825789451599, + "learning_rate": 4.981187777327343e-06, + "loss": 0.6269, + "step": 3285 + }, + { + "epoch": 0.24158212027642995, + "grad_norm": 0.8103967905044556, + "learning_rate": 4.981175977309457e-06, + "loss": 0.5453, + "step": 3286 + }, + { + "epoch": 0.2416556388766358, + "grad_norm": 0.8739509582519531, + "learning_rate": 4.981164173605921e-06, + "loss": 0.5992, + "step": 3287 + }, + { + "epoch": 0.24172915747684165, + "grad_norm": 0.8718313574790955, + "learning_rate": 4.981152366216751e-06, + "loss": 0.585, + "step": 3288 + }, + { + "epoch": 0.2418026760770475, + "grad_norm": 0.8465788960456848, + "learning_rate": 4.981140555141965e-06, + "loss": 0.5655, + "step": 3289 + }, + { + "epoch": 0.24187619467725335, + "grad_norm": 0.8347590565681458, + "learning_rate": 4.98112874038158e-06, + "loss": 0.5582, + "step": 3290 + }, + { + "epoch": 0.2419497132774592, + "grad_norm": 0.8204392194747925, + "learning_rate": 4.981116921935614e-06, + "loss": 0.5662, + "step": 3291 + }, + { + "epoch": 0.24202323187766506, + "grad_norm": 0.9044927954673767, + "learning_rate": 4.9811050998040854e-06, + "loss": 0.6268, + "step": 3292 + }, + { + "epoch": 0.2420967504778709, + "grad_norm": 0.8293443918228149, + "learning_rate": 4.981093273987011e-06, + "loss": 0.5767, + "step": 3293 + }, + { + "epoch": 0.24217026907807676, + "grad_norm": 0.8704966306686401, + "learning_rate": 4.981081444484408e-06, + "loss": 0.5898, + "step": 3294 + }, + { + "epoch": 0.2422437876782826, + "grad_norm": 0.862064003944397, + "learning_rate": 4.981069611296295e-06, + "loss": 0.5997, + "step": 3295 + }, + { + "epoch": 0.24231730627848846, + "grad_norm": 0.8400482535362244, + "learning_rate": 4.981057774422688e-06, + "loss": 0.5763, + "step": 3296 + }, + { + "epoch": 0.24239082487869432, + "grad_norm": 0.8945494294166565, + "learning_rate": 4.981045933863607e-06, + "loss": 0.6107, + "step": 3297 + }, + { + "epoch": 0.24246434347890017, + "grad_norm": 0.9220641255378723, + "learning_rate": 4.9810340896190664e-06, + "loss": 0.5486, + "step": 3298 + }, + { + "epoch": 0.24253786207910602, + "grad_norm": 0.898455023765564, + "learning_rate": 4.981022241689087e-06, + "loss": 0.62, + "step": 3299 + }, + { + "epoch": 0.24261138067931187, + "grad_norm": 0.8761276602745056, + "learning_rate": 4.981010390073684e-06, + "loss": 0.5647, + "step": 3300 + }, + { + "epoch": 0.24268489927951772, + "grad_norm": 0.8770159482955933, + "learning_rate": 4.980998534772876e-06, + "loss": 0.6331, + "step": 3301 + }, + { + "epoch": 0.24275841787972358, + "grad_norm": 0.8434653878211975, + "learning_rate": 4.980986675786681e-06, + "loss": 0.5634, + "step": 3302 + }, + { + "epoch": 0.24283193647992943, + "grad_norm": 0.8105149865150452, + "learning_rate": 4.9809748131151154e-06, + "loss": 0.5585, + "step": 3303 + }, + { + "epoch": 0.24290545508013528, + "grad_norm": 0.836749792098999, + "learning_rate": 4.980962946758197e-06, + "loss": 0.5814, + "step": 3304 + }, + { + "epoch": 0.24297897368034113, + "grad_norm": 0.925025999546051, + "learning_rate": 4.980951076715946e-06, + "loss": 0.5666, + "step": 3305 + }, + { + "epoch": 0.24305249228054698, + "grad_norm": 0.8842073678970337, + "learning_rate": 4.980939202988377e-06, + "loss": 0.5782, + "step": 3306 + }, + { + "epoch": 0.24312601088075284, + "grad_norm": 0.9285688996315002, + "learning_rate": 4.980927325575509e-06, + "loss": 0.6338, + "step": 3307 + }, + { + "epoch": 0.2431995294809587, + "grad_norm": 0.8293660879135132, + "learning_rate": 4.980915444477359e-06, + "loss": 0.5724, + "step": 3308 + }, + { + "epoch": 0.24327304808116454, + "grad_norm": 0.8793867230415344, + "learning_rate": 4.980903559693944e-06, + "loss": 0.5755, + "step": 3309 + }, + { + "epoch": 0.2433465666813704, + "grad_norm": 0.9006592631340027, + "learning_rate": 4.9808916712252845e-06, + "loss": 0.5711, + "step": 3310 + }, + { + "epoch": 0.24342008528157624, + "grad_norm": 0.9039967060089111, + "learning_rate": 4.980879779071395e-06, + "loss": 0.613, + "step": 3311 + }, + { + "epoch": 0.2434936038817821, + "grad_norm": 0.9362679123878479, + "learning_rate": 4.980867883232295e-06, + "loss": 0.5718, + "step": 3312 + }, + { + "epoch": 0.24356712248198795, + "grad_norm": 0.8637611269950867, + "learning_rate": 4.980855983708002e-06, + "loss": 0.5461, + "step": 3313 + }, + { + "epoch": 0.2436406410821938, + "grad_norm": 0.8836668133735657, + "learning_rate": 4.980844080498533e-06, + "loss": 0.5761, + "step": 3314 + }, + { + "epoch": 0.24371415968239965, + "grad_norm": 0.8861873149871826, + "learning_rate": 4.980832173603905e-06, + "loss": 0.6052, + "step": 3315 + }, + { + "epoch": 0.2437876782826055, + "grad_norm": 0.8770596385002136, + "learning_rate": 4.980820263024137e-06, + "loss": 0.5451, + "step": 3316 + }, + { + "epoch": 0.24386119688281135, + "grad_norm": 0.834925651550293, + "learning_rate": 4.9808083487592465e-06, + "loss": 0.5732, + "step": 3317 + }, + { + "epoch": 0.2439347154830172, + "grad_norm": 0.8935666680335999, + "learning_rate": 4.980796430809252e-06, + "loss": 0.5972, + "step": 3318 + }, + { + "epoch": 0.24400823408322306, + "grad_norm": 0.8927639722824097, + "learning_rate": 4.980784509174169e-06, + "loss": 0.5712, + "step": 3319 + }, + { + "epoch": 0.2440817526834289, + "grad_norm": 0.9020939469337463, + "learning_rate": 4.980772583854017e-06, + "loss": 0.6237, + "step": 3320 + }, + { + "epoch": 0.24415527128363476, + "grad_norm": 0.8721089363098145, + "learning_rate": 4.980760654848813e-06, + "loss": 0.5399, + "step": 3321 + }, + { + "epoch": 0.2442287898838406, + "grad_norm": 0.9052689075469971, + "learning_rate": 4.980748722158575e-06, + "loss": 0.5803, + "step": 3322 + }, + { + "epoch": 0.24430230848404647, + "grad_norm": 0.9097012281417847, + "learning_rate": 4.98073678578332e-06, + "loss": 0.6096, + "step": 3323 + }, + { + "epoch": 0.24437582708425232, + "grad_norm": 0.833879292011261, + "learning_rate": 4.980724845723067e-06, + "loss": 0.5842, + "step": 3324 + }, + { + "epoch": 0.24444934568445817, + "grad_norm": 0.9208071827888489, + "learning_rate": 4.980712901977833e-06, + "loss": 0.5917, + "step": 3325 + }, + { + "epoch": 0.24452286428466402, + "grad_norm": 0.8791316747665405, + "learning_rate": 4.980700954547637e-06, + "loss": 0.6225, + "step": 3326 + }, + { + "epoch": 0.24459638288486987, + "grad_norm": 0.8813761472702026, + "learning_rate": 4.980689003432494e-06, + "loss": 0.5995, + "step": 3327 + }, + { + "epoch": 0.24466990148507572, + "grad_norm": 0.9080275893211365, + "learning_rate": 4.980677048632424e-06, + "loss": 0.5923, + "step": 3328 + }, + { + "epoch": 0.24474342008528158, + "grad_norm": 0.8666114807128906, + "learning_rate": 4.980665090147443e-06, + "loss": 0.5541, + "step": 3329 + }, + { + "epoch": 0.24481693868548743, + "grad_norm": 0.8403679132461548, + "learning_rate": 4.980653127977571e-06, + "loss": 0.5808, + "step": 3330 + }, + { + "epoch": 0.24489045728569328, + "grad_norm": 0.8459550142288208, + "learning_rate": 4.980641162122824e-06, + "loss": 0.5827, + "step": 3331 + }, + { + "epoch": 0.24496397588589913, + "grad_norm": 0.9134854674339294, + "learning_rate": 4.9806291925832205e-06, + "loss": 0.6007, + "step": 3332 + }, + { + "epoch": 0.24503749448610498, + "grad_norm": 0.8471869230270386, + "learning_rate": 4.980617219358779e-06, + "loss": 0.5607, + "step": 3333 + }, + { + "epoch": 0.24511101308631084, + "grad_norm": 0.9009538888931274, + "learning_rate": 4.980605242449516e-06, + "loss": 0.6235, + "step": 3334 + }, + { + "epoch": 0.2451845316865167, + "grad_norm": 0.8641784191131592, + "learning_rate": 4.9805932618554485e-06, + "loss": 0.5554, + "step": 3335 + }, + { + "epoch": 0.24525805028672254, + "grad_norm": 0.8379017114639282, + "learning_rate": 4.980581277576597e-06, + "loss": 0.5665, + "step": 3336 + }, + { + "epoch": 0.2453315688869284, + "grad_norm": 0.8440181016921997, + "learning_rate": 4.9805692896129775e-06, + "loss": 0.5456, + "step": 3337 + }, + { + "epoch": 0.24540508748713424, + "grad_norm": 0.838815450668335, + "learning_rate": 4.9805572979646085e-06, + "loss": 0.5949, + "step": 3338 + }, + { + "epoch": 0.2454786060873401, + "grad_norm": 0.8597015738487244, + "learning_rate": 4.9805453026315065e-06, + "loss": 0.5621, + "step": 3339 + }, + { + "epoch": 0.24555212468754595, + "grad_norm": 0.9197859764099121, + "learning_rate": 4.980533303613691e-06, + "loss": 0.6408, + "step": 3340 + }, + { + "epoch": 0.2456256432877518, + "grad_norm": 0.812284529209137, + "learning_rate": 4.9805213009111785e-06, + "loss": 0.5414, + "step": 3341 + }, + { + "epoch": 0.24569916188795765, + "grad_norm": 0.9153264164924622, + "learning_rate": 4.980509294523988e-06, + "loss": 0.5857, + "step": 3342 + }, + { + "epoch": 0.2457726804881635, + "grad_norm": 0.8735047578811646, + "learning_rate": 4.980497284452137e-06, + "loss": 0.568, + "step": 3343 + }, + { + "epoch": 0.24584619908836935, + "grad_norm": 0.931809663772583, + "learning_rate": 4.980485270695643e-06, + "loss": 0.6209, + "step": 3344 + }, + { + "epoch": 0.2459197176885752, + "grad_norm": 0.8980610966682434, + "learning_rate": 4.9804732532545235e-06, + "loss": 0.624, + "step": 3345 + }, + { + "epoch": 0.24599323628878106, + "grad_norm": 0.861431360244751, + "learning_rate": 4.980461232128797e-06, + "loss": 0.5884, + "step": 3346 + }, + { + "epoch": 0.2460667548889869, + "grad_norm": 0.8725383877754211, + "learning_rate": 4.980449207318481e-06, + "loss": 0.6035, + "step": 3347 + }, + { + "epoch": 0.24614027348919276, + "grad_norm": 0.8811129927635193, + "learning_rate": 4.980437178823594e-06, + "loss": 0.5653, + "step": 3348 + }, + { + "epoch": 0.24621379208939861, + "grad_norm": 0.8876240253448486, + "learning_rate": 4.9804251466441535e-06, + "loss": 0.6005, + "step": 3349 + }, + { + "epoch": 0.24628731068960447, + "grad_norm": 0.8940693736076355, + "learning_rate": 4.980413110780177e-06, + "loss": 0.5961, + "step": 3350 + }, + { + "epoch": 0.24636082928981032, + "grad_norm": 0.8388009071350098, + "learning_rate": 4.980401071231682e-06, + "loss": 0.5699, + "step": 3351 + }, + { + "epoch": 0.24643434789001617, + "grad_norm": 0.9332628846168518, + "learning_rate": 4.980389027998687e-06, + "loss": 0.5939, + "step": 3352 + }, + { + "epoch": 0.24650786649022202, + "grad_norm": 0.9029348492622375, + "learning_rate": 4.980376981081211e-06, + "loss": 0.6037, + "step": 3353 + }, + { + "epoch": 0.24658138509042787, + "grad_norm": 0.8641435503959656, + "learning_rate": 4.98036493047927e-06, + "loss": 0.5735, + "step": 3354 + }, + { + "epoch": 0.24665490369063373, + "grad_norm": 0.8772703409194946, + "learning_rate": 4.980352876192883e-06, + "loss": 0.5842, + "step": 3355 + }, + { + "epoch": 0.24672842229083958, + "grad_norm": 0.8131714463233948, + "learning_rate": 4.980340818222068e-06, + "loss": 0.5449, + "step": 3356 + }, + { + "epoch": 0.24680194089104543, + "grad_norm": 0.8783583045005798, + "learning_rate": 4.980328756566842e-06, + "loss": 0.5185, + "step": 3357 + }, + { + "epoch": 0.24687545949125128, + "grad_norm": 0.8544705510139465, + "learning_rate": 4.9803166912272234e-06, + "loss": 0.5305, + "step": 3358 + }, + { + "epoch": 0.24694897809145713, + "grad_norm": 0.8544558882713318, + "learning_rate": 4.980304622203231e-06, + "loss": 0.5598, + "step": 3359 + }, + { + "epoch": 0.24702249669166298, + "grad_norm": 0.9191678166389465, + "learning_rate": 4.980292549494881e-06, + "loss": 0.5735, + "step": 3360 + }, + { + "epoch": 0.24709601529186884, + "grad_norm": 0.8893195986747742, + "learning_rate": 4.980280473102192e-06, + "loss": 0.5843, + "step": 3361 + }, + { + "epoch": 0.2471695338920747, + "grad_norm": 0.8692947030067444, + "learning_rate": 4.9802683930251836e-06, + "loss": 0.5677, + "step": 3362 + }, + { + "epoch": 0.24724305249228054, + "grad_norm": 0.8638460040092468, + "learning_rate": 4.980256309263871e-06, + "loss": 0.6087, + "step": 3363 + }, + { + "epoch": 0.2473165710924864, + "grad_norm": 0.9586246013641357, + "learning_rate": 4.980244221818274e-06, + "loss": 0.5924, + "step": 3364 + }, + { + "epoch": 0.24739008969269224, + "grad_norm": 0.8943278193473816, + "learning_rate": 4.9802321306884096e-06, + "loss": 0.6087, + "step": 3365 + }, + { + "epoch": 0.2474636082928981, + "grad_norm": 0.9142093658447266, + "learning_rate": 4.9802200358742965e-06, + "loss": 0.5803, + "step": 3366 + }, + { + "epoch": 0.24753712689310395, + "grad_norm": 0.8704412579536438, + "learning_rate": 4.980207937375953e-06, + "loss": 0.5814, + "step": 3367 + }, + { + "epoch": 0.2476106454933098, + "grad_norm": 0.932197093963623, + "learning_rate": 4.980195835193396e-06, + "loss": 0.6291, + "step": 3368 + }, + { + "epoch": 0.24768416409351565, + "grad_norm": 0.9262928366661072, + "learning_rate": 4.980183729326643e-06, + "loss": 0.6256, + "step": 3369 + }, + { + "epoch": 0.2477576826937215, + "grad_norm": 0.8930256962776184, + "learning_rate": 4.9801716197757135e-06, + "loss": 0.6058, + "step": 3370 + }, + { + "epoch": 0.24783120129392736, + "grad_norm": 0.8557350039482117, + "learning_rate": 4.980159506540625e-06, + "loss": 0.5637, + "step": 3371 + }, + { + "epoch": 0.2479047198941332, + "grad_norm": 0.8596311211585999, + "learning_rate": 4.980147389621395e-06, + "loss": 0.574, + "step": 3372 + }, + { + "epoch": 0.24797823849433906, + "grad_norm": 0.8794519901275635, + "learning_rate": 4.9801352690180426e-06, + "loss": 0.533, + "step": 3373 + }, + { + "epoch": 0.2480517570945449, + "grad_norm": 0.8836476802825928, + "learning_rate": 4.980123144730585e-06, + "loss": 0.5979, + "step": 3374 + }, + { + "epoch": 0.24812527569475076, + "grad_norm": 0.8789488077163696, + "learning_rate": 4.980111016759039e-06, + "loss": 0.5974, + "step": 3375 + }, + { + "epoch": 0.24819879429495662, + "grad_norm": 0.8269225358963013, + "learning_rate": 4.980098885103425e-06, + "loss": 0.5688, + "step": 3376 + }, + { + "epoch": 0.24827231289516247, + "grad_norm": 0.8498914837837219, + "learning_rate": 4.98008674976376e-06, + "loss": 0.5649, + "step": 3377 + }, + { + "epoch": 0.24834583149536832, + "grad_norm": 0.9326357245445251, + "learning_rate": 4.980074610740062e-06, + "loss": 0.5931, + "step": 3378 + }, + { + "epoch": 0.24841935009557417, + "grad_norm": 0.896079957485199, + "learning_rate": 4.980062468032348e-06, + "loss": 0.5921, + "step": 3379 + }, + { + "epoch": 0.24849286869578002, + "grad_norm": 0.91025310754776, + "learning_rate": 4.980050321640638e-06, + "loss": 0.5711, + "step": 3380 + }, + { + "epoch": 0.24856638729598587, + "grad_norm": 0.8840847611427307, + "learning_rate": 4.980038171564948e-06, + "loss": 0.5413, + "step": 3381 + }, + { + "epoch": 0.24863990589619173, + "grad_norm": 0.9038803577423096, + "learning_rate": 4.980026017805298e-06, + "loss": 0.5926, + "step": 3382 + }, + { + "epoch": 0.24871342449639758, + "grad_norm": 0.8458215594291687, + "learning_rate": 4.9800138603617056e-06, + "loss": 0.577, + "step": 3383 + }, + { + "epoch": 0.24878694309660343, + "grad_norm": 0.8978497385978699, + "learning_rate": 4.980001699234187e-06, + "loss": 0.6092, + "step": 3384 + }, + { + "epoch": 0.24886046169680928, + "grad_norm": 0.8725066184997559, + "learning_rate": 4.979989534422763e-06, + "loss": 0.5862, + "step": 3385 + }, + { + "epoch": 0.24893398029701513, + "grad_norm": 0.8709545135498047, + "learning_rate": 4.97997736592745e-06, + "loss": 0.5675, + "step": 3386 + }, + { + "epoch": 0.24900749889722099, + "grad_norm": 0.8633967041969299, + "learning_rate": 4.9799651937482655e-06, + "loss": 0.6345, + "step": 3387 + }, + { + "epoch": 0.24908101749742684, + "grad_norm": 0.8876833915710449, + "learning_rate": 4.97995301788523e-06, + "loss": 0.5458, + "step": 3388 + }, + { + "epoch": 0.2491545360976327, + "grad_norm": 0.941777765750885, + "learning_rate": 4.979940838338359e-06, + "loss": 0.6293, + "step": 3389 + }, + { + "epoch": 0.24922805469783854, + "grad_norm": 0.8910330533981323, + "learning_rate": 4.979928655107672e-06, + "loss": 0.5651, + "step": 3390 + }, + { + "epoch": 0.2493015732980444, + "grad_norm": 0.87666255235672, + "learning_rate": 4.979916468193187e-06, + "loss": 0.5753, + "step": 3391 + }, + { + "epoch": 0.24937509189825025, + "grad_norm": 0.9122459292411804, + "learning_rate": 4.979904277594922e-06, + "loss": 0.5831, + "step": 3392 + }, + { + "epoch": 0.2494486104984561, + "grad_norm": 0.882826030254364, + "learning_rate": 4.979892083312894e-06, + "loss": 0.5345, + "step": 3393 + }, + { + "epoch": 0.24952212909866195, + "grad_norm": 0.8275552988052368, + "learning_rate": 4.979879885347122e-06, + "loss": 0.5516, + "step": 3394 + }, + { + "epoch": 0.2495956476988678, + "grad_norm": 0.8704695105552673, + "learning_rate": 4.979867683697626e-06, + "loss": 0.5741, + "step": 3395 + }, + { + "epoch": 0.24966916629907365, + "grad_norm": 0.8607016205787659, + "learning_rate": 4.979855478364421e-06, + "loss": 0.5847, + "step": 3396 + }, + { + "epoch": 0.2497426848992795, + "grad_norm": 0.8535463809967041, + "learning_rate": 4.979843269347526e-06, + "loss": 0.5884, + "step": 3397 + }, + { + "epoch": 0.24981620349948536, + "grad_norm": 0.8764389753341675, + "learning_rate": 4.979831056646961e-06, + "loss": 0.5798, + "step": 3398 + }, + { + "epoch": 0.2498897220996912, + "grad_norm": 0.8837199211120605, + "learning_rate": 4.979818840262742e-06, + "loss": 0.6052, + "step": 3399 + }, + { + "epoch": 0.24996324069989706, + "grad_norm": 0.9353827238082886, + "learning_rate": 4.979806620194888e-06, + "loss": 0.6216, + "step": 3400 + }, + { + "epoch": 0.25003675930010294, + "grad_norm": 0.9345768094062805, + "learning_rate": 4.979794396443417e-06, + "loss": 0.608, + "step": 3401 + }, + { + "epoch": 0.25011027790030876, + "grad_norm": 0.861730694770813, + "learning_rate": 4.979782169008347e-06, + "loss": 0.5763, + "step": 3402 + }, + { + "epoch": 0.25018379650051464, + "grad_norm": 0.8656452894210815, + "learning_rate": 4.979769937889697e-06, + "loss": 0.5834, + "step": 3403 + }, + { + "epoch": 0.25025731510072047, + "grad_norm": 0.8844925165176392, + "learning_rate": 4.979757703087485e-06, + "loss": 0.5549, + "step": 3404 + }, + { + "epoch": 0.25033083370092635, + "grad_norm": 0.8925885558128357, + "learning_rate": 4.979745464601728e-06, + "loss": 0.5973, + "step": 3405 + }, + { + "epoch": 0.25040435230113217, + "grad_norm": 0.862116813659668, + "learning_rate": 4.979733222432444e-06, + "loss": 0.5974, + "step": 3406 + }, + { + "epoch": 0.25047787090133805, + "grad_norm": 0.8703225255012512, + "learning_rate": 4.979720976579652e-06, + "loss": 0.5911, + "step": 3407 + }, + { + "epoch": 0.2505513895015439, + "grad_norm": 0.94173663854599, + "learning_rate": 4.979708727043372e-06, + "loss": 0.5763, + "step": 3408 + }, + { + "epoch": 0.25062490810174975, + "grad_norm": 0.8723125457763672, + "learning_rate": 4.9796964738236194e-06, + "loss": 0.5753, + "step": 3409 + }, + { + "epoch": 0.2506984267019556, + "grad_norm": 0.8367246985435486, + "learning_rate": 4.979684216920414e-06, + "loss": 0.5803, + "step": 3410 + }, + { + "epoch": 0.25077194530216146, + "grad_norm": 0.8757695555686951, + "learning_rate": 4.979671956333774e-06, + "loss": 0.5601, + "step": 3411 + }, + { + "epoch": 0.2508454639023673, + "grad_norm": 0.8182063698768616, + "learning_rate": 4.979659692063716e-06, + "loss": 0.5743, + "step": 3412 + }, + { + "epoch": 0.25091898250257316, + "grad_norm": 0.8682364225387573, + "learning_rate": 4.97964742411026e-06, + "loss": 0.5488, + "step": 3413 + }, + { + "epoch": 0.250992501102779, + "grad_norm": 0.8660481572151184, + "learning_rate": 4.979635152473423e-06, + "loss": 0.6129, + "step": 3414 + }, + { + "epoch": 0.25106601970298487, + "grad_norm": 0.8805404305458069, + "learning_rate": 4.979622877153224e-06, + "loss": 0.6131, + "step": 3415 + }, + { + "epoch": 0.2511395383031907, + "grad_norm": 0.8906995058059692, + "learning_rate": 4.97961059814968e-06, + "loss": 0.5489, + "step": 3416 + }, + { + "epoch": 0.25121305690339657, + "grad_norm": 0.8780801296234131, + "learning_rate": 4.979598315462812e-06, + "loss": 0.5925, + "step": 3417 + }, + { + "epoch": 0.2512865755036024, + "grad_norm": 0.8717365264892578, + "learning_rate": 4.9795860290926355e-06, + "loss": 0.5812, + "step": 3418 + }, + { + "epoch": 0.2513600941038083, + "grad_norm": 0.8720627427101135, + "learning_rate": 4.97957373903917e-06, + "loss": 0.5675, + "step": 3419 + }, + { + "epoch": 0.2514336127040141, + "grad_norm": 0.8646062612533569, + "learning_rate": 4.9795614453024335e-06, + "loss": 0.6016, + "step": 3420 + }, + { + "epoch": 0.25150713130422, + "grad_norm": 0.8595025539398193, + "learning_rate": 4.979549147882444e-06, + "loss": 0.566, + "step": 3421 + }, + { + "epoch": 0.2515806499044258, + "grad_norm": 0.8342881798744202, + "learning_rate": 4.97953684677922e-06, + "loss": 0.5879, + "step": 3422 + }, + { + "epoch": 0.2516541685046317, + "grad_norm": 0.8597196340560913, + "learning_rate": 4.97952454199278e-06, + "loss": 0.5933, + "step": 3423 + }, + { + "epoch": 0.2517276871048375, + "grad_norm": 0.8250268697738647, + "learning_rate": 4.979512233523141e-06, + "loss": 0.5859, + "step": 3424 + }, + { + "epoch": 0.2518012057050434, + "grad_norm": 0.9325810670852661, + "learning_rate": 4.979499921370324e-06, + "loss": 0.6098, + "step": 3425 + }, + { + "epoch": 0.2518747243052492, + "grad_norm": 0.8292895555496216, + "learning_rate": 4.979487605534346e-06, + "loss": 0.5382, + "step": 3426 + }, + { + "epoch": 0.2519482429054551, + "grad_norm": 0.8761629462242126, + "learning_rate": 4.979475286015223e-06, + "loss": 0.5942, + "step": 3427 + }, + { + "epoch": 0.2520217615056609, + "grad_norm": 0.8467264175415039, + "learning_rate": 4.979462962812977e-06, + "loss": 0.5928, + "step": 3428 + }, + { + "epoch": 0.2520952801058668, + "grad_norm": 0.9136559367179871, + "learning_rate": 4.979450635927623e-06, + "loss": 0.6052, + "step": 3429 + }, + { + "epoch": 0.2521687987060726, + "grad_norm": 0.8636101484298706, + "learning_rate": 4.979438305359181e-06, + "loss": 0.5615, + "step": 3430 + }, + { + "epoch": 0.2522423173062785, + "grad_norm": 0.8675488829612732, + "learning_rate": 4.97942597110767e-06, + "loss": 0.5896, + "step": 3431 + }, + { + "epoch": 0.2523158359064843, + "grad_norm": 0.8950270414352417, + "learning_rate": 4.979413633173108e-06, + "loss": 0.5407, + "step": 3432 + }, + { + "epoch": 0.2523893545066902, + "grad_norm": 0.8552728891372681, + "learning_rate": 4.979401291555512e-06, + "loss": 0.5556, + "step": 3433 + }, + { + "epoch": 0.252462873106896, + "grad_norm": 0.9124823212623596, + "learning_rate": 4.979388946254901e-06, + "loss": 0.596, + "step": 3434 + }, + { + "epoch": 0.2525363917071019, + "grad_norm": 0.8009727597236633, + "learning_rate": 4.9793765972712935e-06, + "loss": 0.5879, + "step": 3435 + }, + { + "epoch": 0.25260991030730773, + "grad_norm": 0.8734394311904907, + "learning_rate": 4.979364244604708e-06, + "loss": 0.5755, + "step": 3436 + }, + { + "epoch": 0.2526834289075136, + "grad_norm": 0.8415405750274658, + "learning_rate": 4.979351888255163e-06, + "loss": 0.5538, + "step": 3437 + }, + { + "epoch": 0.25275694750771943, + "grad_norm": 0.8625704050064087, + "learning_rate": 4.979339528222676e-06, + "loss": 0.6065, + "step": 3438 + }, + { + "epoch": 0.2528304661079253, + "grad_norm": 0.8463070392608643, + "learning_rate": 4.979327164507266e-06, + "loss": 0.5252, + "step": 3439 + }, + { + "epoch": 0.25290398470813114, + "grad_norm": 0.8648967146873474, + "learning_rate": 4.9793147971089514e-06, + "loss": 0.5322, + "step": 3440 + }, + { + "epoch": 0.252977503308337, + "grad_norm": 0.8424087166786194, + "learning_rate": 4.97930242602775e-06, + "loss": 0.5812, + "step": 3441 + }, + { + "epoch": 0.25305102190854284, + "grad_norm": 0.887228786945343, + "learning_rate": 4.979290051263681e-06, + "loss": 0.599, + "step": 3442 + }, + { + "epoch": 0.2531245405087487, + "grad_norm": 0.820136547088623, + "learning_rate": 4.979277672816763e-06, + "loss": 0.5564, + "step": 3443 + }, + { + "epoch": 0.25319805910895454, + "grad_norm": 0.9365143179893494, + "learning_rate": 4.979265290687013e-06, + "loss": 0.5735, + "step": 3444 + }, + { + "epoch": 0.2532715777091604, + "grad_norm": 0.8362377285957336, + "learning_rate": 4.979252904874451e-06, + "loss": 0.5279, + "step": 3445 + }, + { + "epoch": 0.25334509630936625, + "grad_norm": 0.8892658948898315, + "learning_rate": 4.979240515379093e-06, + "loss": 0.5954, + "step": 3446 + }, + { + "epoch": 0.2534186149095721, + "grad_norm": 0.8481915593147278, + "learning_rate": 4.97922812220096e-06, + "loss": 0.5761, + "step": 3447 + }, + { + "epoch": 0.25349213350977795, + "grad_norm": 0.8933392763137817, + "learning_rate": 4.97921572534007e-06, + "loss": 0.5925, + "step": 3448 + }, + { + "epoch": 0.25356565210998383, + "grad_norm": 0.8315197825431824, + "learning_rate": 4.97920332479644e-06, + "loss": 0.5504, + "step": 3449 + }, + { + "epoch": 0.25363917071018965, + "grad_norm": 0.8738065958023071, + "learning_rate": 4.9791909205700885e-06, + "loss": 0.5657, + "step": 3450 + }, + { + "epoch": 0.25371268931039553, + "grad_norm": 0.8436481356620789, + "learning_rate": 4.979178512661036e-06, + "loss": 0.6125, + "step": 3451 + }, + { + "epoch": 0.25378620791060136, + "grad_norm": 0.8440051078796387, + "learning_rate": 4.979166101069299e-06, + "loss": 0.5513, + "step": 3452 + }, + { + "epoch": 0.25385972651080724, + "grad_norm": 0.8793778419494629, + "learning_rate": 4.9791536857948965e-06, + "loss": 0.6044, + "step": 3453 + }, + { + "epoch": 0.25393324511101306, + "grad_norm": 0.9046981930732727, + "learning_rate": 4.979141266837847e-06, + "loss": 0.5679, + "step": 3454 + }, + { + "epoch": 0.25400676371121894, + "grad_norm": 0.8654525279998779, + "learning_rate": 4.979128844198169e-06, + "loss": 0.5838, + "step": 3455 + }, + { + "epoch": 0.25408028231142477, + "grad_norm": 0.8263704180717468, + "learning_rate": 4.97911641787588e-06, + "loss": 0.5532, + "step": 3456 + }, + { + "epoch": 0.25415380091163065, + "grad_norm": 0.8809503316879272, + "learning_rate": 4.979103987871001e-06, + "loss": 0.5606, + "step": 3457 + }, + { + "epoch": 0.25422731951183647, + "grad_norm": 0.8773579001426697, + "learning_rate": 4.979091554183547e-06, + "loss": 0.5826, + "step": 3458 + }, + { + "epoch": 0.25430083811204235, + "grad_norm": 0.9026330709457397, + "learning_rate": 4.97907911681354e-06, + "loss": 0.577, + "step": 3459 + }, + { + "epoch": 0.2543743567122482, + "grad_norm": 0.8166041970252991, + "learning_rate": 4.979066675760995e-06, + "loss": 0.5432, + "step": 3460 + }, + { + "epoch": 0.25444787531245405, + "grad_norm": 0.9045501351356506, + "learning_rate": 4.979054231025934e-06, + "loss": 0.577, + "step": 3461 + }, + { + "epoch": 0.2545213939126599, + "grad_norm": 0.9043230414390564, + "learning_rate": 4.979041782608372e-06, + "loss": 0.5298, + "step": 3462 + }, + { + "epoch": 0.25459491251286576, + "grad_norm": 0.8946472406387329, + "learning_rate": 4.97902933050833e-06, + "loss": 0.5569, + "step": 3463 + }, + { + "epoch": 0.2546684311130716, + "grad_norm": 0.8859156370162964, + "learning_rate": 4.979016874725825e-06, + "loss": 0.604, + "step": 3464 + }, + { + "epoch": 0.25474194971327746, + "grad_norm": 0.8962371945381165, + "learning_rate": 4.979004415260877e-06, + "loss": 0.5317, + "step": 3465 + }, + { + "epoch": 0.2548154683134833, + "grad_norm": 0.8661246299743652, + "learning_rate": 4.978991952113504e-06, + "loss": 0.5754, + "step": 3466 + }, + { + "epoch": 0.25488898691368916, + "grad_norm": 0.8225045204162598, + "learning_rate": 4.978979485283724e-06, + "loss": 0.6015, + "step": 3467 + }, + { + "epoch": 0.254962505513895, + "grad_norm": 0.8494601845741272, + "learning_rate": 4.978967014771555e-06, + "loss": 0.5825, + "step": 3468 + }, + { + "epoch": 0.25503602411410087, + "grad_norm": 0.88455730676651, + "learning_rate": 4.9789545405770166e-06, + "loss": 0.5622, + "step": 3469 + }, + { + "epoch": 0.25510954271430675, + "grad_norm": 0.8690223693847656, + "learning_rate": 4.978942062700126e-06, + "loss": 0.6073, + "step": 3470 + }, + { + "epoch": 0.25518306131451257, + "grad_norm": 0.8638222217559814, + "learning_rate": 4.978929581140904e-06, + "loss": 0.5503, + "step": 3471 + }, + { + "epoch": 0.25525657991471845, + "grad_norm": 0.8603418469429016, + "learning_rate": 4.978917095899368e-06, + "loss": 0.6034, + "step": 3472 + }, + { + "epoch": 0.2553300985149243, + "grad_norm": 0.9051628708839417, + "learning_rate": 4.978904606975536e-06, + "loss": 0.6159, + "step": 3473 + }, + { + "epoch": 0.25540361711513015, + "grad_norm": 0.844060480594635, + "learning_rate": 4.9788921143694265e-06, + "loss": 0.5496, + "step": 3474 + }, + { + "epoch": 0.255477135715336, + "grad_norm": 0.9023271799087524, + "learning_rate": 4.978879618081059e-06, + "loss": 0.5377, + "step": 3475 + }, + { + "epoch": 0.25555065431554186, + "grad_norm": 0.8572578430175781, + "learning_rate": 4.978867118110452e-06, + "loss": 0.5823, + "step": 3476 + }, + { + "epoch": 0.2556241729157477, + "grad_norm": 0.8602006435394287, + "learning_rate": 4.978854614457622e-06, + "loss": 0.5817, + "step": 3477 + }, + { + "epoch": 0.25569769151595356, + "grad_norm": 0.9035036563873291, + "learning_rate": 4.978842107122591e-06, + "loss": 0.5707, + "step": 3478 + }, + { + "epoch": 0.2557712101161594, + "grad_norm": 0.824791669845581, + "learning_rate": 4.978829596105374e-06, + "loss": 0.5496, + "step": 3479 + }, + { + "epoch": 0.25584472871636527, + "grad_norm": 0.9011222720146179, + "learning_rate": 4.978817081405993e-06, + "loss": 0.5965, + "step": 3480 + }, + { + "epoch": 0.2559182473165711, + "grad_norm": 0.8698598742485046, + "learning_rate": 4.978804563024465e-06, + "loss": 0.6211, + "step": 3481 + }, + { + "epoch": 0.25599176591677697, + "grad_norm": 0.8380092978477478, + "learning_rate": 4.9787920409608085e-06, + "loss": 0.5478, + "step": 3482 + }, + { + "epoch": 0.2560652845169828, + "grad_norm": 0.9408102631568909, + "learning_rate": 4.9787795152150405e-06, + "loss": 0.6209, + "step": 3483 + }, + { + "epoch": 0.2561388031171887, + "grad_norm": 0.895717978477478, + "learning_rate": 4.978766985787183e-06, + "loss": 0.5626, + "step": 3484 + }, + { + "epoch": 0.2562123217173945, + "grad_norm": 0.8929533958435059, + "learning_rate": 4.978754452677252e-06, + "loss": 0.5726, + "step": 3485 + }, + { + "epoch": 0.2562858403176004, + "grad_norm": 0.8323006629943848, + "learning_rate": 4.978741915885267e-06, + "loss": 0.5601, + "step": 3486 + }, + { + "epoch": 0.2563593589178062, + "grad_norm": 0.9777092337608337, + "learning_rate": 4.978729375411247e-06, + "loss": 0.5811, + "step": 3487 + }, + { + "epoch": 0.2564328775180121, + "grad_norm": 0.8761686682701111, + "learning_rate": 4.97871683125521e-06, + "loss": 0.5605, + "step": 3488 + }, + { + "epoch": 0.2565063961182179, + "grad_norm": 0.9167448282241821, + "learning_rate": 4.9787042834171754e-06, + "loss": 0.5748, + "step": 3489 + }, + { + "epoch": 0.2565799147184238, + "grad_norm": 0.8656714558601379, + "learning_rate": 4.97869173189716e-06, + "loss": 0.5605, + "step": 3490 + }, + { + "epoch": 0.2566534333186296, + "grad_norm": 0.8506410717964172, + "learning_rate": 4.9786791766951855e-06, + "loss": 0.5778, + "step": 3491 + }, + { + "epoch": 0.2567269519188355, + "grad_norm": 0.8605562448501587, + "learning_rate": 4.978666617811268e-06, + "loss": 0.6216, + "step": 3492 + }, + { + "epoch": 0.2568004705190413, + "grad_norm": 0.8073444366455078, + "learning_rate": 4.978654055245426e-06, + "loss": 0.5289, + "step": 3493 + }, + { + "epoch": 0.2568739891192472, + "grad_norm": 0.8908278346061707, + "learning_rate": 4.978641488997681e-06, + "loss": 0.5844, + "step": 3494 + }, + { + "epoch": 0.256947507719453, + "grad_norm": 0.8627954125404358, + "learning_rate": 4.978628919068049e-06, + "loss": 0.5976, + "step": 3495 + }, + { + "epoch": 0.2570210263196589, + "grad_norm": 0.8427750468254089, + "learning_rate": 4.9786163454565495e-06, + "loss": 0.5877, + "step": 3496 + }, + { + "epoch": 0.2570945449198647, + "grad_norm": 0.9220764636993408, + "learning_rate": 4.9786037681632006e-06, + "loss": 0.5859, + "step": 3497 + }, + { + "epoch": 0.2571680635200706, + "grad_norm": 0.8397420644760132, + "learning_rate": 4.978591187188022e-06, + "loss": 0.6228, + "step": 3498 + }, + { + "epoch": 0.2572415821202764, + "grad_norm": 0.8981111645698547, + "learning_rate": 4.978578602531031e-06, + "loss": 0.5573, + "step": 3499 + }, + { + "epoch": 0.2573151007204823, + "grad_norm": 0.8234740495681763, + "learning_rate": 4.978566014192248e-06, + "loss": 0.586, + "step": 3500 + }, + { + "epoch": 0.25738861932068813, + "grad_norm": 0.8495211601257324, + "learning_rate": 4.978553422171692e-06, + "loss": 0.5408, + "step": 3501 + }, + { + "epoch": 0.257462137920894, + "grad_norm": 0.9106199145317078, + "learning_rate": 4.9785408264693794e-06, + "loss": 0.5929, + "step": 3502 + }, + { + "epoch": 0.25753565652109983, + "grad_norm": 0.866326630115509, + "learning_rate": 4.97852822708533e-06, + "loss": 0.5886, + "step": 3503 + }, + { + "epoch": 0.2576091751213057, + "grad_norm": 0.8854931592941284, + "learning_rate": 4.978515624019563e-06, + "loss": 0.5932, + "step": 3504 + }, + { + "epoch": 0.25768269372151154, + "grad_norm": 0.8727004528045654, + "learning_rate": 4.978503017272098e-06, + "loss": 0.5996, + "step": 3505 + }, + { + "epoch": 0.2577562123217174, + "grad_norm": 0.8820353150367737, + "learning_rate": 4.97849040684295e-06, + "loss": 0.6001, + "step": 3506 + }, + { + "epoch": 0.25782973092192324, + "grad_norm": 0.8656907677650452, + "learning_rate": 4.978477792732141e-06, + "loss": 0.5849, + "step": 3507 + }, + { + "epoch": 0.2579032495221291, + "grad_norm": 0.822066068649292, + "learning_rate": 4.97846517493969e-06, + "loss": 0.5763, + "step": 3508 + }, + { + "epoch": 0.25797676812233494, + "grad_norm": 0.9405322074890137, + "learning_rate": 4.978452553465614e-06, + "loss": 0.5868, + "step": 3509 + }, + { + "epoch": 0.2580502867225408, + "grad_norm": 0.8599412441253662, + "learning_rate": 4.978439928309932e-06, + "loss": 0.6033, + "step": 3510 + }, + { + "epoch": 0.25812380532274665, + "grad_norm": 0.8868339657783508, + "learning_rate": 4.978427299472664e-06, + "loss": 0.5581, + "step": 3511 + }, + { + "epoch": 0.2581973239229525, + "grad_norm": 0.8488537669181824, + "learning_rate": 4.978414666953828e-06, + "loss": 0.5798, + "step": 3512 + }, + { + "epoch": 0.25827084252315835, + "grad_norm": 0.8519881963729858, + "learning_rate": 4.978402030753442e-06, + "loss": 0.5993, + "step": 3513 + }, + { + "epoch": 0.25834436112336423, + "grad_norm": 0.8545396327972412, + "learning_rate": 4.978389390871526e-06, + "loss": 0.5921, + "step": 3514 + }, + { + "epoch": 0.25841787972357005, + "grad_norm": 0.8839825987815857, + "learning_rate": 4.978376747308098e-06, + "loss": 0.5892, + "step": 3515 + }, + { + "epoch": 0.25849139832377593, + "grad_norm": 0.9215220808982849, + "learning_rate": 4.978364100063178e-06, + "loss": 0.6248, + "step": 3516 + }, + { + "epoch": 0.25856491692398176, + "grad_norm": 0.9043372273445129, + "learning_rate": 4.978351449136782e-06, + "loss": 0.6025, + "step": 3517 + }, + { + "epoch": 0.25863843552418764, + "grad_norm": 0.8421404361724854, + "learning_rate": 4.978338794528932e-06, + "loss": 0.5249, + "step": 3518 + }, + { + "epoch": 0.25871195412439346, + "grad_norm": 0.8227682709693909, + "learning_rate": 4.978326136239645e-06, + "loss": 0.5056, + "step": 3519 + }, + { + "epoch": 0.25878547272459934, + "grad_norm": 0.847987174987793, + "learning_rate": 4.978313474268941e-06, + "loss": 0.5485, + "step": 3520 + }, + { + "epoch": 0.25885899132480517, + "grad_norm": 0.8709067106246948, + "learning_rate": 4.978300808616837e-06, + "loss": 0.5952, + "step": 3521 + }, + { + "epoch": 0.25893250992501105, + "grad_norm": 0.8742778301239014, + "learning_rate": 4.978288139283353e-06, + "loss": 0.5627, + "step": 3522 + }, + { + "epoch": 0.25900602852521687, + "grad_norm": 0.8666485548019409, + "learning_rate": 4.978275466268509e-06, + "loss": 0.5661, + "step": 3523 + }, + { + "epoch": 0.25907954712542275, + "grad_norm": 0.816457211971283, + "learning_rate": 4.9782627895723205e-06, + "loss": 0.5732, + "step": 3524 + }, + { + "epoch": 0.2591530657256286, + "grad_norm": 0.861789882183075, + "learning_rate": 4.9782501091948095e-06, + "loss": 0.5932, + "step": 3525 + }, + { + "epoch": 0.25922658432583445, + "grad_norm": 0.880933403968811, + "learning_rate": 4.9782374251359934e-06, + "loss": 0.6027, + "step": 3526 + }, + { + "epoch": 0.2593001029260403, + "grad_norm": 0.8801786303520203, + "learning_rate": 4.978224737395892e-06, + "loss": 0.5729, + "step": 3527 + }, + { + "epoch": 0.25937362152624616, + "grad_norm": 0.8500756025314331, + "learning_rate": 4.978212045974522e-06, + "loss": 0.5947, + "step": 3528 + }, + { + "epoch": 0.259447140126452, + "grad_norm": 0.8448624014854431, + "learning_rate": 4.978199350871905e-06, + "loss": 0.5911, + "step": 3529 + }, + { + "epoch": 0.25952065872665786, + "grad_norm": 0.8792920112609863, + "learning_rate": 4.978186652088057e-06, + "loss": 0.5342, + "step": 3530 + }, + { + "epoch": 0.2595941773268637, + "grad_norm": 0.8310382962226868, + "learning_rate": 4.978173949623e-06, + "loss": 0.537, + "step": 3531 + }, + { + "epoch": 0.25966769592706956, + "grad_norm": 0.8619163632392883, + "learning_rate": 4.978161243476751e-06, + "loss": 0.5553, + "step": 3532 + }, + { + "epoch": 0.2597412145272754, + "grad_norm": 0.8418304920196533, + "learning_rate": 4.978148533649329e-06, + "loss": 0.5664, + "step": 3533 + }, + { + "epoch": 0.25981473312748127, + "grad_norm": 0.8355199098587036, + "learning_rate": 4.978135820140753e-06, + "loss": 0.59, + "step": 3534 + }, + { + "epoch": 0.2598882517276871, + "grad_norm": 0.8813066482543945, + "learning_rate": 4.978123102951042e-06, + "loss": 0.6016, + "step": 3535 + }, + { + "epoch": 0.25996177032789297, + "grad_norm": 0.8942103981971741, + "learning_rate": 4.9781103820802144e-06, + "loss": 0.6154, + "step": 3536 + }, + { + "epoch": 0.2600352889280988, + "grad_norm": 0.8650285005569458, + "learning_rate": 4.97809765752829e-06, + "loss": 0.625, + "step": 3537 + }, + { + "epoch": 0.2601088075283047, + "grad_norm": 0.8509597778320312, + "learning_rate": 4.978084929295287e-06, + "loss": 0.581, + "step": 3538 + }, + { + "epoch": 0.2601823261285105, + "grad_norm": 0.908936083316803, + "learning_rate": 4.9780721973812255e-06, + "loss": 0.6147, + "step": 3539 + }, + { + "epoch": 0.2602558447287164, + "grad_norm": 0.8596780300140381, + "learning_rate": 4.978059461786122e-06, + "loss": 0.5899, + "step": 3540 + }, + { + "epoch": 0.2603293633289222, + "grad_norm": 0.8957427740097046, + "learning_rate": 4.978046722509998e-06, + "loss": 0.581, + "step": 3541 + }, + { + "epoch": 0.2604028819291281, + "grad_norm": 0.8655859231948853, + "learning_rate": 4.97803397955287e-06, + "loss": 0.5934, + "step": 3542 + }, + { + "epoch": 0.2604764005293339, + "grad_norm": 0.8430014252662659, + "learning_rate": 4.9780212329147595e-06, + "loss": 0.5873, + "step": 3543 + }, + { + "epoch": 0.2605499191295398, + "grad_norm": 0.9081000685691833, + "learning_rate": 4.978008482595684e-06, + "loss": 0.5738, + "step": 3544 + }, + { + "epoch": 0.2606234377297456, + "grad_norm": 0.8665691018104553, + "learning_rate": 4.977995728595662e-06, + "loss": 0.587, + "step": 3545 + }, + { + "epoch": 0.2606969563299515, + "grad_norm": 0.8182713389396667, + "learning_rate": 4.977982970914713e-06, + "loss": 0.5455, + "step": 3546 + }, + { + "epoch": 0.2607704749301573, + "grad_norm": 0.8158529996871948, + "learning_rate": 4.977970209552856e-06, + "loss": 0.5561, + "step": 3547 + }, + { + "epoch": 0.2608439935303632, + "grad_norm": 0.8657669425010681, + "learning_rate": 4.977957444510111e-06, + "loss": 0.5585, + "step": 3548 + }, + { + "epoch": 0.260917512130569, + "grad_norm": 0.8522875308990479, + "learning_rate": 4.977944675786494e-06, + "loss": 0.5642, + "step": 3549 + }, + { + "epoch": 0.2609910307307749, + "grad_norm": 0.9184039235115051, + "learning_rate": 4.9779319033820276e-06, + "loss": 0.5697, + "step": 3550 + }, + { + "epoch": 0.2610645493309807, + "grad_norm": 0.8983437418937683, + "learning_rate": 4.977919127296729e-06, + "loss": 0.5875, + "step": 3551 + }, + { + "epoch": 0.2611380679311866, + "grad_norm": 0.8681696057319641, + "learning_rate": 4.977906347530616e-06, + "loss": 0.5974, + "step": 3552 + }, + { + "epoch": 0.2612115865313924, + "grad_norm": 1.000626564025879, + "learning_rate": 4.97789356408371e-06, + "loss": 0.6031, + "step": 3553 + }, + { + "epoch": 0.2612851051315983, + "grad_norm": 0.8830830454826355, + "learning_rate": 4.977880776956027e-06, + "loss": 0.5756, + "step": 3554 + }, + { + "epoch": 0.26135862373180413, + "grad_norm": 0.8533809185028076, + "learning_rate": 4.9778679861475895e-06, + "loss": 0.5952, + "step": 3555 + }, + { + "epoch": 0.26143214233201, + "grad_norm": 0.8615033030509949, + "learning_rate": 4.977855191658415e-06, + "loss": 0.5658, + "step": 3556 + }, + { + "epoch": 0.26150566093221583, + "grad_norm": 0.9538399577140808, + "learning_rate": 4.977842393488521e-06, + "loss": 0.6365, + "step": 3557 + }, + { + "epoch": 0.2615791795324217, + "grad_norm": 0.8794892430305481, + "learning_rate": 4.977829591637929e-06, + "loss": 0.5443, + "step": 3558 + }, + { + "epoch": 0.26165269813262754, + "grad_norm": 0.9041966199874878, + "learning_rate": 4.977816786106656e-06, + "loss": 0.5945, + "step": 3559 + }, + { + "epoch": 0.2617262167328334, + "grad_norm": 0.8444801568984985, + "learning_rate": 4.977803976894723e-06, + "loss": 0.5875, + "step": 3560 + }, + { + "epoch": 0.26179973533303924, + "grad_norm": 0.9191594123840332, + "learning_rate": 4.977791164002146e-06, + "loss": 0.5961, + "step": 3561 + }, + { + "epoch": 0.2618732539332451, + "grad_norm": 0.8568938374519348, + "learning_rate": 4.977778347428947e-06, + "loss": 0.5831, + "step": 3562 + }, + { + "epoch": 0.26194677253345094, + "grad_norm": 0.8525648713111877, + "learning_rate": 4.977765527175144e-06, + "loss": 0.5646, + "step": 3563 + }, + { + "epoch": 0.2620202911336568, + "grad_norm": 0.928629994392395, + "learning_rate": 4.977752703240756e-06, + "loss": 0.6378, + "step": 3564 + }, + { + "epoch": 0.26209380973386265, + "grad_norm": 0.8518588542938232, + "learning_rate": 4.977739875625802e-06, + "loss": 0.6056, + "step": 3565 + }, + { + "epoch": 0.26216732833406853, + "grad_norm": 0.8629477620124817, + "learning_rate": 4.9777270443303006e-06, + "loss": 0.5758, + "step": 3566 + }, + { + "epoch": 0.26224084693427435, + "grad_norm": 0.9022663831710815, + "learning_rate": 4.9777142093542715e-06, + "loss": 0.5761, + "step": 3567 + }, + { + "epoch": 0.26231436553448023, + "grad_norm": 0.920772135257721, + "learning_rate": 4.9777013706977335e-06, + "loss": 0.5897, + "step": 3568 + }, + { + "epoch": 0.26238788413468606, + "grad_norm": 0.9068032503128052, + "learning_rate": 4.977688528360707e-06, + "loss": 0.6277, + "step": 3569 + }, + { + "epoch": 0.26246140273489194, + "grad_norm": 0.9177910089492798, + "learning_rate": 4.977675682343209e-06, + "loss": 0.5863, + "step": 3570 + }, + { + "epoch": 0.26253492133509776, + "grad_norm": 0.9221982955932617, + "learning_rate": 4.977662832645259e-06, + "loss": 0.6013, + "step": 3571 + }, + { + "epoch": 0.26260843993530364, + "grad_norm": 0.8300094604492188, + "learning_rate": 4.977649979266877e-06, + "loss": 0.54, + "step": 3572 + }, + { + "epoch": 0.26268195853550946, + "grad_norm": 0.8592062592506409, + "learning_rate": 4.9776371222080815e-06, + "loss": 0.5658, + "step": 3573 + }, + { + "epoch": 0.26275547713571534, + "grad_norm": 0.8691192865371704, + "learning_rate": 4.977624261468892e-06, + "loss": 0.5861, + "step": 3574 + }, + { + "epoch": 0.26282899573592117, + "grad_norm": 0.831625759601593, + "learning_rate": 4.977611397049327e-06, + "loss": 0.5717, + "step": 3575 + }, + { + "epoch": 0.26290251433612705, + "grad_norm": 0.8610044121742249, + "learning_rate": 4.977598528949406e-06, + "loss": 0.5603, + "step": 3576 + }, + { + "epoch": 0.26297603293633287, + "grad_norm": 0.8358936309814453, + "learning_rate": 4.977585657169149e-06, + "loss": 0.5572, + "step": 3577 + }, + { + "epoch": 0.26304955153653875, + "grad_norm": 0.8473699688911438, + "learning_rate": 4.9775727817085725e-06, + "loss": 0.5643, + "step": 3578 + }, + { + "epoch": 0.2631230701367446, + "grad_norm": 0.8753644824028015, + "learning_rate": 4.977559902567698e-06, + "loss": 0.6011, + "step": 3579 + }, + { + "epoch": 0.26319658873695045, + "grad_norm": 0.9040158987045288, + "learning_rate": 4.977547019746544e-06, + "loss": 0.5676, + "step": 3580 + }, + { + "epoch": 0.2632701073371563, + "grad_norm": 0.8809459805488586, + "learning_rate": 4.97753413324513e-06, + "loss": 0.5792, + "step": 3581 + }, + { + "epoch": 0.26334362593736216, + "grad_norm": 0.8669165968894958, + "learning_rate": 4.977521243063474e-06, + "loss": 0.5552, + "step": 3582 + }, + { + "epoch": 0.263417144537568, + "grad_norm": 0.8665956258773804, + "learning_rate": 4.977508349201596e-06, + "loss": 0.5702, + "step": 3583 + }, + { + "epoch": 0.26349066313777386, + "grad_norm": 0.8469021916389465, + "learning_rate": 4.977495451659515e-06, + "loss": 0.5947, + "step": 3584 + }, + { + "epoch": 0.2635641817379797, + "grad_norm": 0.8356596827507019, + "learning_rate": 4.977482550437251e-06, + "loss": 0.6082, + "step": 3585 + }, + { + "epoch": 0.26363770033818557, + "grad_norm": 0.8532134890556335, + "learning_rate": 4.977469645534822e-06, + "loss": 0.5771, + "step": 3586 + }, + { + "epoch": 0.2637112189383914, + "grad_norm": 0.8819760680198669, + "learning_rate": 4.977456736952247e-06, + "loss": 0.5653, + "step": 3587 + }, + { + "epoch": 0.26378473753859727, + "grad_norm": 0.853797197341919, + "learning_rate": 4.977443824689546e-06, + "loss": 0.589, + "step": 3588 + }, + { + "epoch": 0.2638582561388031, + "grad_norm": 0.8465482592582703, + "learning_rate": 4.977430908746737e-06, + "loss": 0.5745, + "step": 3589 + }, + { + "epoch": 0.263931774739009, + "grad_norm": 0.8777023553848267, + "learning_rate": 4.977417989123842e-06, + "loss": 0.5986, + "step": 3590 + }, + { + "epoch": 0.2640052933392148, + "grad_norm": 0.8341900706291199, + "learning_rate": 4.977405065820876e-06, + "loss": 0.5321, + "step": 3591 + }, + { + "epoch": 0.2640788119394207, + "grad_norm": 0.889387845993042, + "learning_rate": 4.977392138837862e-06, + "loss": 0.5769, + "step": 3592 + }, + { + "epoch": 0.2641523305396265, + "grad_norm": 0.8371066451072693, + "learning_rate": 4.977379208174817e-06, + "loss": 0.5781, + "step": 3593 + }, + { + "epoch": 0.2642258491398324, + "grad_norm": 0.853949785232544, + "learning_rate": 4.977366273831761e-06, + "loss": 0.5745, + "step": 3594 + }, + { + "epoch": 0.2642993677400382, + "grad_norm": 0.8625797033309937, + "learning_rate": 4.977353335808713e-06, + "loss": 0.595, + "step": 3595 + }, + { + "epoch": 0.2643728863402441, + "grad_norm": 0.8714184165000916, + "learning_rate": 4.977340394105692e-06, + "loss": 0.6129, + "step": 3596 + }, + { + "epoch": 0.2644464049404499, + "grad_norm": 0.9022769331932068, + "learning_rate": 4.977327448722718e-06, + "loss": 0.6002, + "step": 3597 + }, + { + "epoch": 0.2645199235406558, + "grad_norm": 0.8554514050483704, + "learning_rate": 4.97731449965981e-06, + "loss": 0.5477, + "step": 3598 + }, + { + "epoch": 0.2645934421408616, + "grad_norm": 0.8427987694740295, + "learning_rate": 4.977301546916986e-06, + "loss": 0.5765, + "step": 3599 + }, + { + "epoch": 0.2646669607410675, + "grad_norm": 0.8388752937316895, + "learning_rate": 4.977288590494267e-06, + "loss": 0.5647, + "step": 3600 + }, + { + "epoch": 0.2647404793412733, + "grad_norm": 0.9013117551803589, + "learning_rate": 4.977275630391671e-06, + "loss": 0.5725, + "step": 3601 + }, + { + "epoch": 0.2648139979414792, + "grad_norm": 0.8895541429519653, + "learning_rate": 4.977262666609218e-06, + "loss": 0.6024, + "step": 3602 + }, + { + "epoch": 0.264887516541685, + "grad_norm": 0.8640336990356445, + "learning_rate": 4.977249699146927e-06, + "loss": 0.5943, + "step": 3603 + }, + { + "epoch": 0.2649610351418909, + "grad_norm": 0.8511720299720764, + "learning_rate": 4.977236728004816e-06, + "loss": 0.5639, + "step": 3604 + }, + { + "epoch": 0.2650345537420967, + "grad_norm": 0.9227741956710815, + "learning_rate": 4.977223753182907e-06, + "loss": 0.6284, + "step": 3605 + }, + { + "epoch": 0.2651080723423026, + "grad_norm": 0.8544073700904846, + "learning_rate": 4.977210774681217e-06, + "loss": 0.5995, + "step": 3606 + }, + { + "epoch": 0.2651815909425084, + "grad_norm": 0.8802953958511353, + "learning_rate": 4.977197792499766e-06, + "loss": 0.5817, + "step": 3607 + }, + { + "epoch": 0.2652551095427143, + "grad_norm": 0.8622373342514038, + "learning_rate": 4.9771848066385744e-06, + "loss": 0.5524, + "step": 3608 + }, + { + "epoch": 0.2653286281429202, + "grad_norm": 0.9198972582817078, + "learning_rate": 4.97717181709766e-06, + "loss": 0.5697, + "step": 3609 + }, + { + "epoch": 0.265402146743126, + "grad_norm": 0.8823155760765076, + "learning_rate": 4.977158823877042e-06, + "loss": 0.5926, + "step": 3610 + }, + { + "epoch": 0.2654756653433319, + "grad_norm": 0.8814069032669067, + "learning_rate": 4.97714582697674e-06, + "loss": 0.617, + "step": 3611 + }, + { + "epoch": 0.2655491839435377, + "grad_norm": 0.8452867269515991, + "learning_rate": 4.9771328263967736e-06, + "loss": 0.5585, + "step": 3612 + }, + { + "epoch": 0.2656227025437436, + "grad_norm": 0.8422202467918396, + "learning_rate": 4.977119822137163e-06, + "loss": 0.5744, + "step": 3613 + }, + { + "epoch": 0.2656962211439494, + "grad_norm": 0.8919637799263, + "learning_rate": 4.977106814197926e-06, + "loss": 0.5485, + "step": 3614 + }, + { + "epoch": 0.2657697397441553, + "grad_norm": 0.9005799889564514, + "learning_rate": 4.977093802579081e-06, + "loss": 0.6272, + "step": 3615 + }, + { + "epoch": 0.2658432583443611, + "grad_norm": 0.8887921571731567, + "learning_rate": 4.977080787280652e-06, + "loss": 0.5665, + "step": 3616 + }, + { + "epoch": 0.265916776944567, + "grad_norm": 0.883894681930542, + "learning_rate": 4.977067768302653e-06, + "loss": 0.5903, + "step": 3617 + }, + { + "epoch": 0.2659902955447728, + "grad_norm": 0.8642902374267578, + "learning_rate": 4.977054745645105e-06, + "loss": 0.589, + "step": 3618 + }, + { + "epoch": 0.2660638141449787, + "grad_norm": 0.8860084414482117, + "learning_rate": 4.977041719308029e-06, + "loss": 0.5819, + "step": 3619 + }, + { + "epoch": 0.26613733274518453, + "grad_norm": 0.9169331192970276, + "learning_rate": 4.977028689291442e-06, + "loss": 0.5997, + "step": 3620 + }, + { + "epoch": 0.2662108513453904, + "grad_norm": 0.8862228393554688, + "learning_rate": 4.977015655595366e-06, + "loss": 0.5698, + "step": 3621 + }, + { + "epoch": 0.26628436994559623, + "grad_norm": 0.9054365158081055, + "learning_rate": 4.977002618219818e-06, + "loss": 0.5909, + "step": 3622 + }, + { + "epoch": 0.2663578885458021, + "grad_norm": 0.939762532711029, + "learning_rate": 4.976989577164818e-06, + "loss": 0.5826, + "step": 3623 + }, + { + "epoch": 0.26643140714600794, + "grad_norm": 0.8370988368988037, + "learning_rate": 4.976976532430387e-06, + "loss": 0.5628, + "step": 3624 + }, + { + "epoch": 0.2665049257462138, + "grad_norm": 0.8434675335884094, + "learning_rate": 4.976963484016542e-06, + "loss": 0.58, + "step": 3625 + }, + { + "epoch": 0.26657844434641964, + "grad_norm": 0.9140786528587341, + "learning_rate": 4.9769504319233026e-06, + "loss": 0.6028, + "step": 3626 + }, + { + "epoch": 0.2666519629466255, + "grad_norm": 0.9049773812294006, + "learning_rate": 4.97693737615069e-06, + "loss": 0.6102, + "step": 3627 + }, + { + "epoch": 0.26672548154683134, + "grad_norm": 0.9159494042396545, + "learning_rate": 4.976924316698723e-06, + "loss": 0.5932, + "step": 3628 + }, + { + "epoch": 0.2667990001470372, + "grad_norm": 0.8527408242225647, + "learning_rate": 4.976911253567419e-06, + "loss": 0.5621, + "step": 3629 + }, + { + "epoch": 0.26687251874724305, + "grad_norm": 0.903144121170044, + "learning_rate": 4.9768981867568e-06, + "loss": 0.5877, + "step": 3630 + }, + { + "epoch": 0.26694603734744893, + "grad_norm": 0.8696234822273254, + "learning_rate": 4.976885116266884e-06, + "loss": 0.5762, + "step": 3631 + }, + { + "epoch": 0.26701955594765475, + "grad_norm": 0.8639746904373169, + "learning_rate": 4.9768720420976914e-06, + "loss": 0.5647, + "step": 3632 + }, + { + "epoch": 0.26709307454786063, + "grad_norm": 0.8763720393180847, + "learning_rate": 4.976858964249241e-06, + "loss": 0.6011, + "step": 3633 + }, + { + "epoch": 0.26716659314806646, + "grad_norm": 0.8575021624565125, + "learning_rate": 4.976845882721551e-06, + "loss": 0.5542, + "step": 3634 + }, + { + "epoch": 0.26724011174827234, + "grad_norm": 0.8602535724639893, + "learning_rate": 4.976832797514643e-06, + "loss": 0.5543, + "step": 3635 + }, + { + "epoch": 0.26731363034847816, + "grad_norm": 0.8577951192855835, + "learning_rate": 4.976819708628535e-06, + "loss": 0.5712, + "step": 3636 + }, + { + "epoch": 0.26738714894868404, + "grad_norm": 0.8224485516548157, + "learning_rate": 4.976806616063248e-06, + "loss": 0.5641, + "step": 3637 + }, + { + "epoch": 0.26746066754888986, + "grad_norm": 0.880348801612854, + "learning_rate": 4.976793519818799e-06, + "loss": 0.5822, + "step": 3638 + }, + { + "epoch": 0.26753418614909574, + "grad_norm": 0.8830097913742065, + "learning_rate": 4.97678041989521e-06, + "loss": 0.5827, + "step": 3639 + }, + { + "epoch": 0.26760770474930157, + "grad_norm": 0.882949948310852, + "learning_rate": 4.976767316292499e-06, + "loss": 0.6375, + "step": 3640 + }, + { + "epoch": 0.26768122334950745, + "grad_norm": 0.8377553224563599, + "learning_rate": 4.976754209010686e-06, + "loss": 0.5329, + "step": 3641 + }, + { + "epoch": 0.26775474194971327, + "grad_norm": 0.867732584476471, + "learning_rate": 4.97674109804979e-06, + "loss": 0.5697, + "step": 3642 + }, + { + "epoch": 0.26782826054991915, + "grad_norm": 0.9140981435775757, + "learning_rate": 4.976727983409831e-06, + "loss": 0.5852, + "step": 3643 + }, + { + "epoch": 0.267901779150125, + "grad_norm": 0.9034959077835083, + "learning_rate": 4.976714865090827e-06, + "loss": 0.5671, + "step": 3644 + }, + { + "epoch": 0.26797529775033085, + "grad_norm": 0.8674265146255493, + "learning_rate": 4.9767017430928e-06, + "loss": 0.5631, + "step": 3645 + }, + { + "epoch": 0.2680488163505367, + "grad_norm": 0.8796173930168152, + "learning_rate": 4.976688617415768e-06, + "loss": 0.6031, + "step": 3646 + }, + { + "epoch": 0.26812233495074256, + "grad_norm": 0.8494717478752136, + "learning_rate": 4.97667548805975e-06, + "loss": 0.5666, + "step": 3647 + }, + { + "epoch": 0.2681958535509484, + "grad_norm": 0.8571521043777466, + "learning_rate": 4.9766623550247665e-06, + "loss": 0.5931, + "step": 3648 + }, + { + "epoch": 0.26826937215115426, + "grad_norm": 0.8682136535644531, + "learning_rate": 4.976649218310837e-06, + "loss": 0.5645, + "step": 3649 + }, + { + "epoch": 0.2683428907513601, + "grad_norm": 0.8535782694816589, + "learning_rate": 4.97663607791798e-06, + "loss": 0.5895, + "step": 3650 + }, + { + "epoch": 0.26841640935156597, + "grad_norm": 0.862360954284668, + "learning_rate": 4.976622933846217e-06, + "loss": 0.6222, + "step": 3651 + }, + { + "epoch": 0.2684899279517718, + "grad_norm": 0.9362276792526245, + "learning_rate": 4.976609786095565e-06, + "loss": 0.6237, + "step": 3652 + }, + { + "epoch": 0.26856344655197767, + "grad_norm": 0.8983790874481201, + "learning_rate": 4.976596634666045e-06, + "loss": 0.5945, + "step": 3653 + }, + { + "epoch": 0.2686369651521835, + "grad_norm": 0.9105384945869446, + "learning_rate": 4.976583479557677e-06, + "loss": 0.5857, + "step": 3654 + }, + { + "epoch": 0.2687104837523894, + "grad_norm": 0.9097986221313477, + "learning_rate": 4.97657032077048e-06, + "loss": 0.6017, + "step": 3655 + }, + { + "epoch": 0.2687840023525952, + "grad_norm": 0.857448160648346, + "learning_rate": 4.9765571583044725e-06, + "loss": 0.5732, + "step": 3656 + }, + { + "epoch": 0.2688575209528011, + "grad_norm": 0.8916842341423035, + "learning_rate": 4.976543992159675e-06, + "loss": 0.5409, + "step": 3657 + }, + { + "epoch": 0.2689310395530069, + "grad_norm": 0.9101468324661255, + "learning_rate": 4.976530822336108e-06, + "loss": 0.5813, + "step": 3658 + }, + { + "epoch": 0.2690045581532128, + "grad_norm": 0.8988044261932373, + "learning_rate": 4.976517648833789e-06, + "loss": 0.6479, + "step": 3659 + }, + { + "epoch": 0.2690780767534186, + "grad_norm": 0.8577653169631958, + "learning_rate": 4.976504471652739e-06, + "loss": 0.596, + "step": 3660 + }, + { + "epoch": 0.2691515953536245, + "grad_norm": 0.8920888900756836, + "learning_rate": 4.976491290792977e-06, + "loss": 0.5992, + "step": 3661 + }, + { + "epoch": 0.2692251139538303, + "grad_norm": 0.8466759324073792, + "learning_rate": 4.976478106254523e-06, + "loss": 0.5754, + "step": 3662 + }, + { + "epoch": 0.2692986325540362, + "grad_norm": 0.8624151349067688, + "learning_rate": 4.976464918037396e-06, + "loss": 0.5803, + "step": 3663 + }, + { + "epoch": 0.269372151154242, + "grad_norm": 0.8882655501365662, + "learning_rate": 4.976451726141616e-06, + "loss": 0.604, + "step": 3664 + }, + { + "epoch": 0.2694456697544479, + "grad_norm": 0.8850154280662537, + "learning_rate": 4.976438530567204e-06, + "loss": 0.5784, + "step": 3665 + }, + { + "epoch": 0.2695191883546537, + "grad_norm": 0.8384115099906921, + "learning_rate": 4.976425331314177e-06, + "loss": 0.5675, + "step": 3666 + }, + { + "epoch": 0.2695927069548596, + "grad_norm": 0.9003543853759766, + "learning_rate": 4.976412128382556e-06, + "loss": 0.637, + "step": 3667 + }, + { + "epoch": 0.2696662255550654, + "grad_norm": 0.8551012873649597, + "learning_rate": 4.9763989217723595e-06, + "loss": 0.5575, + "step": 3668 + }, + { + "epoch": 0.2697397441552713, + "grad_norm": 0.9070199131965637, + "learning_rate": 4.976385711483609e-06, + "loss": 0.5682, + "step": 3669 + }, + { + "epoch": 0.2698132627554771, + "grad_norm": 0.8942102193832397, + "learning_rate": 4.976372497516323e-06, + "loss": 0.5953, + "step": 3670 + }, + { + "epoch": 0.269886781355683, + "grad_norm": 0.8909363746643066, + "learning_rate": 4.976359279870521e-06, + "loss": 0.603, + "step": 3671 + }, + { + "epoch": 0.2699602999558888, + "grad_norm": 0.9039493799209595, + "learning_rate": 4.976346058546223e-06, + "loss": 0.5989, + "step": 3672 + }, + { + "epoch": 0.2700338185560947, + "grad_norm": 0.8826491236686707, + "learning_rate": 4.976332833543449e-06, + "loss": 0.5496, + "step": 3673 + }, + { + "epoch": 0.27010733715630053, + "grad_norm": 0.9328016042709351, + "learning_rate": 4.976319604862217e-06, + "loss": 0.5556, + "step": 3674 + }, + { + "epoch": 0.2701808557565064, + "grad_norm": 0.8569868206977844, + "learning_rate": 4.97630637250255e-06, + "loss": 0.5568, + "step": 3675 + }, + { + "epoch": 0.27025437435671223, + "grad_norm": 0.8628056645393372, + "learning_rate": 4.9762931364644635e-06, + "loss": 0.5937, + "step": 3676 + }, + { + "epoch": 0.2703278929569181, + "grad_norm": 0.8721897602081299, + "learning_rate": 4.97627989674798e-06, + "loss": 0.5789, + "step": 3677 + }, + { + "epoch": 0.27040141155712394, + "grad_norm": 0.8643059730529785, + "learning_rate": 4.976266653353118e-06, + "loss": 0.5652, + "step": 3678 + }, + { + "epoch": 0.2704749301573298, + "grad_norm": 0.869795560836792, + "learning_rate": 4.976253406279898e-06, + "loss": 0.5944, + "step": 3679 + }, + { + "epoch": 0.27054844875753564, + "grad_norm": 0.8442280888557434, + "learning_rate": 4.976240155528338e-06, + "loss": 0.5671, + "step": 3680 + }, + { + "epoch": 0.2706219673577415, + "grad_norm": 0.8488362431526184, + "learning_rate": 4.976226901098459e-06, + "loss": 0.5651, + "step": 3681 + }, + { + "epoch": 0.27069548595794735, + "grad_norm": 0.8503871560096741, + "learning_rate": 4.9762136429902815e-06, + "loss": 0.5604, + "step": 3682 + }, + { + "epoch": 0.2707690045581532, + "grad_norm": 0.8195472955703735, + "learning_rate": 4.976200381203824e-06, + "loss": 0.5572, + "step": 3683 + }, + { + "epoch": 0.27084252315835905, + "grad_norm": 0.8691091537475586, + "learning_rate": 4.976187115739106e-06, + "loss": 0.5909, + "step": 3684 + }, + { + "epoch": 0.27091604175856493, + "grad_norm": 0.9241306185722351, + "learning_rate": 4.976173846596148e-06, + "loss": 0.5904, + "step": 3685 + }, + { + "epoch": 0.27098956035877075, + "grad_norm": 0.8671323657035828, + "learning_rate": 4.976160573774969e-06, + "loss": 0.5756, + "step": 3686 + }, + { + "epoch": 0.27106307895897663, + "grad_norm": 0.9147495031356812, + "learning_rate": 4.976147297275589e-06, + "loss": 0.5822, + "step": 3687 + }, + { + "epoch": 0.27113659755918246, + "grad_norm": 0.8898688554763794, + "learning_rate": 4.9761340170980285e-06, + "loss": 0.5981, + "step": 3688 + }, + { + "epoch": 0.27121011615938834, + "grad_norm": 0.833175539970398, + "learning_rate": 4.976120733242306e-06, + "loss": 0.5693, + "step": 3689 + }, + { + "epoch": 0.27128363475959416, + "grad_norm": 0.8331900835037231, + "learning_rate": 4.976107445708441e-06, + "loss": 0.5403, + "step": 3690 + }, + { + "epoch": 0.27135715335980004, + "grad_norm": 0.8509117364883423, + "learning_rate": 4.976094154496456e-06, + "loss": 0.5445, + "step": 3691 + }, + { + "epoch": 0.27143067196000586, + "grad_norm": 0.8416308760643005, + "learning_rate": 4.976080859606367e-06, + "loss": 0.5627, + "step": 3692 + }, + { + "epoch": 0.27150419056021174, + "grad_norm": 0.8652122020721436, + "learning_rate": 4.976067561038196e-06, + "loss": 0.5302, + "step": 3693 + }, + { + "epoch": 0.27157770916041757, + "grad_norm": 0.8496164083480835, + "learning_rate": 4.976054258791962e-06, + "loss": 0.5569, + "step": 3694 + }, + { + "epoch": 0.27165122776062345, + "grad_norm": 0.8388689160346985, + "learning_rate": 4.976040952867685e-06, + "loss": 0.6083, + "step": 3695 + }, + { + "epoch": 0.2717247463608293, + "grad_norm": 0.9218834638595581, + "learning_rate": 4.976027643265385e-06, + "loss": 0.5999, + "step": 3696 + }, + { + "epoch": 0.27179826496103515, + "grad_norm": 0.8827906250953674, + "learning_rate": 4.976014329985082e-06, + "loss": 0.5739, + "step": 3697 + }, + { + "epoch": 0.271871783561241, + "grad_norm": 0.8603799939155579, + "learning_rate": 4.976001013026795e-06, + "loss": 0.5725, + "step": 3698 + }, + { + "epoch": 0.27194530216144686, + "grad_norm": 0.848567545413971, + "learning_rate": 4.9759876923905425e-06, + "loss": 0.5508, + "step": 3699 + }, + { + "epoch": 0.2720188207616527, + "grad_norm": 0.8984389305114746, + "learning_rate": 4.975974368076347e-06, + "loss": 0.571, + "step": 3700 + }, + { + "epoch": 0.27209233936185856, + "grad_norm": 0.8564833402633667, + "learning_rate": 4.975961040084228e-06, + "loss": 0.567, + "step": 3701 + }, + { + "epoch": 0.2721658579620644, + "grad_norm": 0.8799907565116882, + "learning_rate": 4.975947708414204e-06, + "loss": 0.5971, + "step": 3702 + }, + { + "epoch": 0.27223937656227026, + "grad_norm": 0.8410612344741821, + "learning_rate": 4.975934373066295e-06, + "loss": 0.5907, + "step": 3703 + }, + { + "epoch": 0.2723128951624761, + "grad_norm": 0.9115939140319824, + "learning_rate": 4.97592103404052e-06, + "loss": 0.5656, + "step": 3704 + }, + { + "epoch": 0.27238641376268197, + "grad_norm": 0.873970091342926, + "learning_rate": 4.9759076913369016e-06, + "loss": 0.582, + "step": 3705 + }, + { + "epoch": 0.2724599323628878, + "grad_norm": 0.8554779887199402, + "learning_rate": 4.975894344955457e-06, + "loss": 0.5801, + "step": 3706 + }, + { + "epoch": 0.27253345096309367, + "grad_norm": 0.8723180890083313, + "learning_rate": 4.975880994896207e-06, + "loss": 0.5357, + "step": 3707 + }, + { + "epoch": 0.2726069695632995, + "grad_norm": 0.9181703329086304, + "learning_rate": 4.975867641159172e-06, + "loss": 0.6341, + "step": 3708 + }, + { + "epoch": 0.2726804881635054, + "grad_norm": 0.8193990588188171, + "learning_rate": 4.97585428374437e-06, + "loss": 0.4989, + "step": 3709 + }, + { + "epoch": 0.2727540067637112, + "grad_norm": 0.9137048721313477, + "learning_rate": 4.975840922651823e-06, + "loss": 0.6282, + "step": 3710 + }, + { + "epoch": 0.2728275253639171, + "grad_norm": 0.8294498920440674, + "learning_rate": 4.97582755788155e-06, + "loss": 0.5422, + "step": 3711 + }, + { + "epoch": 0.2729010439641229, + "grad_norm": 0.8771476149559021, + "learning_rate": 4.975814189433571e-06, + "loss": 0.5809, + "step": 3712 + }, + { + "epoch": 0.2729745625643288, + "grad_norm": 0.8387982845306396, + "learning_rate": 4.975800817307904e-06, + "loss": 0.5612, + "step": 3713 + }, + { + "epoch": 0.2730480811645346, + "grad_norm": 0.8556387424468994, + "learning_rate": 4.975787441504572e-06, + "loss": 0.5722, + "step": 3714 + }, + { + "epoch": 0.2731215997647405, + "grad_norm": 0.8990598320960999, + "learning_rate": 4.975774062023592e-06, + "loss": 0.5996, + "step": 3715 + }, + { + "epoch": 0.2731951183649463, + "grad_norm": 0.8597322106361389, + "learning_rate": 4.975760678864987e-06, + "loss": 0.576, + "step": 3716 + }, + { + "epoch": 0.2732686369651522, + "grad_norm": 0.8814663290977478, + "learning_rate": 4.975747292028774e-06, + "loss": 0.5968, + "step": 3717 + }, + { + "epoch": 0.273342155565358, + "grad_norm": 0.9161898493766785, + "learning_rate": 4.9757339015149745e-06, + "loss": 0.6509, + "step": 3718 + }, + { + "epoch": 0.2734156741655639, + "grad_norm": 0.8343521952629089, + "learning_rate": 4.975720507323607e-06, + "loss": 0.5611, + "step": 3719 + }, + { + "epoch": 0.2734891927657697, + "grad_norm": 0.8441069722175598, + "learning_rate": 4.975707109454693e-06, + "loss": 0.5903, + "step": 3720 + }, + { + "epoch": 0.2735627113659756, + "grad_norm": 0.8755204081535339, + "learning_rate": 4.97569370790825e-06, + "loss": 0.6026, + "step": 3721 + }, + { + "epoch": 0.2736362299661814, + "grad_norm": 0.9236829876899719, + "learning_rate": 4.975680302684301e-06, + "loss": 0.6025, + "step": 3722 + }, + { + "epoch": 0.2737097485663873, + "grad_norm": 0.8225849866867065, + "learning_rate": 4.975666893782865e-06, + "loss": 0.5556, + "step": 3723 + }, + { + "epoch": 0.2737832671665931, + "grad_norm": 0.8810742497444153, + "learning_rate": 4.97565348120396e-06, + "loss": 0.5757, + "step": 3724 + }, + { + "epoch": 0.273856785766799, + "grad_norm": 0.8618945479393005, + "learning_rate": 4.975640064947609e-06, + "loss": 0.5759, + "step": 3725 + }, + { + "epoch": 0.27393030436700483, + "grad_norm": 0.9750493168830872, + "learning_rate": 4.975626645013829e-06, + "loss": 0.6474, + "step": 3726 + }, + { + "epoch": 0.2740038229672107, + "grad_norm": 0.9004732370376587, + "learning_rate": 4.975613221402641e-06, + "loss": 0.5967, + "step": 3727 + }, + { + "epoch": 0.27407734156741653, + "grad_norm": 0.939383327960968, + "learning_rate": 4.975599794114066e-06, + "loss": 0.6452, + "step": 3728 + }, + { + "epoch": 0.2741508601676224, + "grad_norm": 0.8936992287635803, + "learning_rate": 4.975586363148122e-06, + "loss": 0.587, + "step": 3729 + }, + { + "epoch": 0.27422437876782824, + "grad_norm": 0.835616409778595, + "learning_rate": 4.975572928504831e-06, + "loss": 0.5662, + "step": 3730 + }, + { + "epoch": 0.2742978973680341, + "grad_norm": 0.8514701128005981, + "learning_rate": 4.9755594901842115e-06, + "loss": 0.5613, + "step": 3731 + }, + { + "epoch": 0.27437141596823994, + "grad_norm": 0.8510655760765076, + "learning_rate": 4.975546048186285e-06, + "loss": 0.5964, + "step": 3732 + }, + { + "epoch": 0.2744449345684458, + "grad_norm": 0.8282982110977173, + "learning_rate": 4.975532602511069e-06, + "loss": 0.5572, + "step": 3733 + }, + { + "epoch": 0.27451845316865164, + "grad_norm": 0.8719733357429504, + "learning_rate": 4.975519153158585e-06, + "loss": 0.5327, + "step": 3734 + }, + { + "epoch": 0.2745919717688575, + "grad_norm": 0.8439358472824097, + "learning_rate": 4.975505700128854e-06, + "loss": 0.5998, + "step": 3735 + }, + { + "epoch": 0.27466549036906335, + "grad_norm": 0.8466256260871887, + "learning_rate": 4.9754922434218935e-06, + "loss": 0.5572, + "step": 3736 + }, + { + "epoch": 0.2747390089692692, + "grad_norm": 0.9268506765365601, + "learning_rate": 4.975478783037726e-06, + "loss": 0.6302, + "step": 3737 + }, + { + "epoch": 0.27481252756947505, + "grad_norm": 0.8551582098007202, + "learning_rate": 4.975465318976369e-06, + "loss": 0.5449, + "step": 3738 + }, + { + "epoch": 0.27488604616968093, + "grad_norm": 0.8374884128570557, + "learning_rate": 4.975451851237844e-06, + "loss": 0.5829, + "step": 3739 + }, + { + "epoch": 0.27495956476988676, + "grad_norm": 0.8586747050285339, + "learning_rate": 4.975438379822172e-06, + "loss": 0.5529, + "step": 3740 + }, + { + "epoch": 0.27503308337009263, + "grad_norm": 0.8866751790046692, + "learning_rate": 4.975424904729371e-06, + "loss": 0.6124, + "step": 3741 + }, + { + "epoch": 0.27510660197029846, + "grad_norm": 0.9185434579849243, + "learning_rate": 4.975411425959462e-06, + "loss": 0.6012, + "step": 3742 + }, + { + "epoch": 0.27518012057050434, + "grad_norm": 0.8222064971923828, + "learning_rate": 4.975397943512466e-06, + "loss": 0.5685, + "step": 3743 + }, + { + "epoch": 0.27525363917071016, + "grad_norm": 0.8393653631210327, + "learning_rate": 4.9753844573884e-06, + "loss": 0.5633, + "step": 3744 + }, + { + "epoch": 0.27532715777091604, + "grad_norm": 0.8216907978057861, + "learning_rate": 4.975370967587287e-06, + "loss": 0.5823, + "step": 3745 + }, + { + "epoch": 0.27540067637112187, + "grad_norm": 0.8402168154716492, + "learning_rate": 4.975357474109146e-06, + "loss": 0.5464, + "step": 3746 + }, + { + "epoch": 0.27547419497132775, + "grad_norm": 0.9273286461830139, + "learning_rate": 4.975343976953996e-06, + "loss": 0.6226, + "step": 3747 + }, + { + "epoch": 0.2755477135715336, + "grad_norm": 0.880702793598175, + "learning_rate": 4.975330476121859e-06, + "loss": 0.6058, + "step": 3748 + }, + { + "epoch": 0.27562123217173945, + "grad_norm": 0.8506665825843811, + "learning_rate": 4.975316971612753e-06, + "loss": 0.6147, + "step": 3749 + }, + { + "epoch": 0.27569475077194533, + "grad_norm": 0.8613796234130859, + "learning_rate": 4.975303463426701e-06, + "loss": 0.5823, + "step": 3750 + }, + { + "epoch": 0.27576826937215115, + "grad_norm": 0.8851396441459656, + "learning_rate": 4.97528995156372e-06, + "loss": 0.5782, + "step": 3751 + }, + { + "epoch": 0.27584178797235703, + "grad_norm": 0.795465350151062, + "learning_rate": 4.975276436023832e-06, + "loss": 0.5583, + "step": 3752 + }, + { + "epoch": 0.27591530657256286, + "grad_norm": 0.9295992851257324, + "learning_rate": 4.9752629168070566e-06, + "loss": 0.5489, + "step": 3753 + }, + { + "epoch": 0.27598882517276874, + "grad_norm": 0.8972066044807434, + "learning_rate": 4.9752493939134125e-06, + "loss": 0.6005, + "step": 3754 + }, + { + "epoch": 0.27606234377297456, + "grad_norm": 0.8917617797851562, + "learning_rate": 4.975235867342922e-06, + "loss": 0.57, + "step": 3755 + }, + { + "epoch": 0.27613586237318044, + "grad_norm": 0.8416844010353088, + "learning_rate": 4.975222337095604e-06, + "loss": 0.6044, + "step": 3756 + }, + { + "epoch": 0.27620938097338626, + "grad_norm": 0.819865882396698, + "learning_rate": 4.9752088031714785e-06, + "loss": 0.564, + "step": 3757 + }, + { + "epoch": 0.27628289957359214, + "grad_norm": 0.8506949543952942, + "learning_rate": 4.975195265570566e-06, + "loss": 0.5746, + "step": 3758 + }, + { + "epoch": 0.27635641817379797, + "grad_norm": 0.8459702134132385, + "learning_rate": 4.975181724292887e-06, + "loss": 0.5362, + "step": 3759 + }, + { + "epoch": 0.27642993677400385, + "grad_norm": 0.8453572988510132, + "learning_rate": 4.97516817933846e-06, + "loss": 0.5748, + "step": 3760 + }, + { + "epoch": 0.2765034553742097, + "grad_norm": 0.8548834323883057, + "learning_rate": 4.975154630707307e-06, + "loss": 0.5829, + "step": 3761 + }, + { + "epoch": 0.27657697397441555, + "grad_norm": 0.8519638776779175, + "learning_rate": 4.9751410783994465e-06, + "loss": 0.6117, + "step": 3762 + }, + { + "epoch": 0.2766504925746214, + "grad_norm": 0.8305591344833374, + "learning_rate": 4.9751275224149e-06, + "loss": 0.5578, + "step": 3763 + }, + { + "epoch": 0.27672401117482726, + "grad_norm": 0.8656392097473145, + "learning_rate": 4.975113962753687e-06, + "loss": 0.5924, + "step": 3764 + }, + { + "epoch": 0.2767975297750331, + "grad_norm": 0.8378586173057556, + "learning_rate": 4.975100399415827e-06, + "loss": 0.5855, + "step": 3765 + }, + { + "epoch": 0.27687104837523896, + "grad_norm": 0.8663030862808228, + "learning_rate": 4.9750868324013425e-06, + "loss": 0.5738, + "step": 3766 + }, + { + "epoch": 0.2769445669754448, + "grad_norm": 0.898114800453186, + "learning_rate": 4.975073261710252e-06, + "loss": 0.5503, + "step": 3767 + }, + { + "epoch": 0.27701808557565066, + "grad_norm": 0.8672802448272705, + "learning_rate": 4.975059687342574e-06, + "loss": 0.6115, + "step": 3768 + }, + { + "epoch": 0.2770916041758565, + "grad_norm": 0.8344489932060242, + "learning_rate": 4.975046109298332e-06, + "loss": 0.5715, + "step": 3769 + }, + { + "epoch": 0.27716512277606237, + "grad_norm": 0.8726556301116943, + "learning_rate": 4.975032527577543e-06, + "loss": 0.5674, + "step": 3770 + }, + { + "epoch": 0.2772386413762682, + "grad_norm": 0.8747580647468567, + "learning_rate": 4.97501894218023e-06, + "loss": 0.594, + "step": 3771 + }, + { + "epoch": 0.27731215997647407, + "grad_norm": 0.8650317788124084, + "learning_rate": 4.975005353106411e-06, + "loss": 0.6106, + "step": 3772 + }, + { + "epoch": 0.2773856785766799, + "grad_norm": 0.897064208984375, + "learning_rate": 4.974991760356108e-06, + "loss": 0.599, + "step": 3773 + }, + { + "epoch": 0.2774591971768858, + "grad_norm": 0.9693883657455444, + "learning_rate": 4.9749781639293395e-06, + "loss": 0.613, + "step": 3774 + }, + { + "epoch": 0.2775327157770916, + "grad_norm": 0.8721312284469604, + "learning_rate": 4.974964563826127e-06, + "loss": 0.5911, + "step": 3775 + }, + { + "epoch": 0.2776062343772975, + "grad_norm": 0.8799925446510315, + "learning_rate": 4.974950960046489e-06, + "loss": 0.5745, + "step": 3776 + }, + { + "epoch": 0.2776797529775033, + "grad_norm": 0.8252274394035339, + "learning_rate": 4.9749373525904484e-06, + "loss": 0.5167, + "step": 3777 + }, + { + "epoch": 0.2777532715777092, + "grad_norm": 0.8450571298599243, + "learning_rate": 4.974923741458023e-06, + "loss": 0.6008, + "step": 3778 + }, + { + "epoch": 0.277826790177915, + "grad_norm": 0.9154884815216064, + "learning_rate": 4.974910126649235e-06, + "loss": 0.5996, + "step": 3779 + }, + { + "epoch": 0.2779003087781209, + "grad_norm": 0.8530998826026917, + "learning_rate": 4.974896508164102e-06, + "loss": 0.5508, + "step": 3780 + }, + { + "epoch": 0.2779738273783267, + "grad_norm": 0.8566741943359375, + "learning_rate": 4.9748828860026465e-06, + "loss": 0.5656, + "step": 3781 + }, + { + "epoch": 0.2780473459785326, + "grad_norm": 0.881523847579956, + "learning_rate": 4.974869260164889e-06, + "loss": 0.598, + "step": 3782 + }, + { + "epoch": 0.2781208645787384, + "grad_norm": 0.9177414178848267, + "learning_rate": 4.974855630650846e-06, + "loss": 0.5384, + "step": 3783 + }, + { + "epoch": 0.2781943831789443, + "grad_norm": 0.8458245992660522, + "learning_rate": 4.974841997460544e-06, + "loss": 0.6056, + "step": 3784 + }, + { + "epoch": 0.2782679017791501, + "grad_norm": 0.8929088711738586, + "learning_rate": 4.974828360593998e-06, + "loss": 0.6128, + "step": 3785 + }, + { + "epoch": 0.278341420379356, + "grad_norm": 0.9245168566703796, + "learning_rate": 4.974814720051229e-06, + "loss": 0.6121, + "step": 3786 + }, + { + "epoch": 0.2784149389795618, + "grad_norm": 0.88201904296875, + "learning_rate": 4.974801075832259e-06, + "loss": 0.5943, + "step": 3787 + }, + { + "epoch": 0.2784884575797677, + "grad_norm": 0.855500340461731, + "learning_rate": 4.974787427937108e-06, + "loss": 0.5909, + "step": 3788 + }, + { + "epoch": 0.2785619761799735, + "grad_norm": 0.8653497099876404, + "learning_rate": 4.974773776365795e-06, + "loss": 0.5354, + "step": 3789 + }, + { + "epoch": 0.2786354947801794, + "grad_norm": 0.8951855897903442, + "learning_rate": 4.974760121118341e-06, + "loss": 0.5834, + "step": 3790 + }, + { + "epoch": 0.27870901338038523, + "grad_norm": 0.8325261473655701, + "learning_rate": 4.974746462194768e-06, + "loss": 0.5282, + "step": 3791 + }, + { + "epoch": 0.2787825319805911, + "grad_norm": 0.8567858338356018, + "learning_rate": 4.974732799595093e-06, + "loss": 0.5871, + "step": 3792 + }, + { + "epoch": 0.27885605058079693, + "grad_norm": 0.8867344856262207, + "learning_rate": 4.974719133319339e-06, + "loss": 0.5989, + "step": 3793 + }, + { + "epoch": 0.2789295691810028, + "grad_norm": 0.835946798324585, + "learning_rate": 4.974705463367524e-06, + "loss": 0.5869, + "step": 3794 + }, + { + "epoch": 0.27900308778120864, + "grad_norm": 0.8568546175956726, + "learning_rate": 4.97469178973967e-06, + "loss": 0.5806, + "step": 3795 + }, + { + "epoch": 0.2790766063814145, + "grad_norm": 0.9134561419487, + "learning_rate": 4.974678112435798e-06, + "loss": 0.5937, + "step": 3796 + }, + { + "epoch": 0.27915012498162034, + "grad_norm": 0.8799413442611694, + "learning_rate": 4.974664431455926e-06, + "loss": 0.6188, + "step": 3797 + }, + { + "epoch": 0.2792236435818262, + "grad_norm": 0.8299461603164673, + "learning_rate": 4.974650746800076e-06, + "loss": 0.5763, + "step": 3798 + }, + { + "epoch": 0.27929716218203204, + "grad_norm": 0.8784864544868469, + "learning_rate": 4.974637058468267e-06, + "loss": 0.5966, + "step": 3799 + }, + { + "epoch": 0.2793706807822379, + "grad_norm": 0.8688875436782837, + "learning_rate": 4.974623366460522e-06, + "loss": 0.5847, + "step": 3800 + }, + { + "epoch": 0.27944419938244375, + "grad_norm": 0.8727064728736877, + "learning_rate": 4.9746096707768574e-06, + "loss": 0.632, + "step": 3801 + }, + { + "epoch": 0.2795177179826496, + "grad_norm": 0.8814237713813782, + "learning_rate": 4.974595971417296e-06, + "loss": 0.5534, + "step": 3802 + }, + { + "epoch": 0.27959123658285545, + "grad_norm": 0.8464584946632385, + "learning_rate": 4.974582268381859e-06, + "loss": 0.5984, + "step": 3803 + }, + { + "epoch": 0.27966475518306133, + "grad_norm": 0.8405470848083496, + "learning_rate": 4.974568561670565e-06, + "loss": 0.5646, + "step": 3804 + }, + { + "epoch": 0.27973827378326716, + "grad_norm": 0.8969526886940002, + "learning_rate": 4.974554851283434e-06, + "loss": 0.5915, + "step": 3805 + }, + { + "epoch": 0.27981179238347303, + "grad_norm": 0.8374898433685303, + "learning_rate": 4.974541137220489e-06, + "loss": 0.6148, + "step": 3806 + }, + { + "epoch": 0.27988531098367886, + "grad_norm": 0.8735193610191345, + "learning_rate": 4.9745274194817475e-06, + "loss": 0.5989, + "step": 3807 + }, + { + "epoch": 0.27995882958388474, + "grad_norm": 0.8672826290130615, + "learning_rate": 4.974513698067231e-06, + "loss": 0.5622, + "step": 3808 + }, + { + "epoch": 0.28003234818409056, + "grad_norm": 0.8536990880966187, + "learning_rate": 4.97449997297696e-06, + "loss": 0.5852, + "step": 3809 + }, + { + "epoch": 0.28010586678429644, + "grad_norm": 0.8549034595489502, + "learning_rate": 4.9744862442109545e-06, + "loss": 0.5371, + "step": 3810 + }, + { + "epoch": 0.28017938538450227, + "grad_norm": 0.8337482810020447, + "learning_rate": 4.974472511769236e-06, + "loss": 0.5844, + "step": 3811 + }, + { + "epoch": 0.28025290398470815, + "grad_norm": 0.8482750654220581, + "learning_rate": 4.974458775651824e-06, + "loss": 0.5669, + "step": 3812 + }, + { + "epoch": 0.28032642258491397, + "grad_norm": 0.8674596548080444, + "learning_rate": 4.974445035858738e-06, + "loss": 0.5454, + "step": 3813 + }, + { + "epoch": 0.28039994118511985, + "grad_norm": 0.8958350419998169, + "learning_rate": 4.97443129239e-06, + "loss": 0.6041, + "step": 3814 + }, + { + "epoch": 0.2804734597853257, + "grad_norm": 0.8956770300865173, + "learning_rate": 4.97441754524563e-06, + "loss": 0.5452, + "step": 3815 + }, + { + "epoch": 0.28054697838553155, + "grad_norm": 0.8477604985237122, + "learning_rate": 4.974403794425648e-06, + "loss": 0.5792, + "step": 3816 + }, + { + "epoch": 0.2806204969857374, + "grad_norm": 0.8548541069030762, + "learning_rate": 4.9743900399300745e-06, + "loss": 0.5424, + "step": 3817 + }, + { + "epoch": 0.28069401558594326, + "grad_norm": 0.8661667108535767, + "learning_rate": 4.9743762817589305e-06, + "loss": 0.5576, + "step": 3818 + }, + { + "epoch": 0.2807675341861491, + "grad_norm": 0.9348957538604736, + "learning_rate": 4.974362519912236e-06, + "loss": 0.6096, + "step": 3819 + }, + { + "epoch": 0.28084105278635496, + "grad_norm": 0.9599189758300781, + "learning_rate": 4.974348754390011e-06, + "loss": 0.6282, + "step": 3820 + }, + { + "epoch": 0.2809145713865608, + "grad_norm": 0.8632001280784607, + "learning_rate": 4.974334985192277e-06, + "loss": 0.6144, + "step": 3821 + }, + { + "epoch": 0.28098808998676666, + "grad_norm": 0.8990876078605652, + "learning_rate": 4.974321212319053e-06, + "loss": 0.59, + "step": 3822 + }, + { + "epoch": 0.2810616085869725, + "grad_norm": 0.8375502228736877, + "learning_rate": 4.974307435770361e-06, + "loss": 0.5917, + "step": 3823 + }, + { + "epoch": 0.28113512718717837, + "grad_norm": 0.885019838809967, + "learning_rate": 4.9742936555462206e-06, + "loss": 0.5305, + "step": 3824 + }, + { + "epoch": 0.2812086457873842, + "grad_norm": 0.9388319849967957, + "learning_rate": 4.974279871646653e-06, + "loss": 0.6268, + "step": 3825 + }, + { + "epoch": 0.2812821643875901, + "grad_norm": 0.8342400789260864, + "learning_rate": 4.974266084071678e-06, + "loss": 0.583, + "step": 3826 + }, + { + "epoch": 0.2813556829877959, + "grad_norm": 0.8718454837799072, + "learning_rate": 4.9742522928213155e-06, + "loss": 0.6229, + "step": 3827 + }, + { + "epoch": 0.2814292015880018, + "grad_norm": 0.7911084890365601, + "learning_rate": 4.974238497895587e-06, + "loss": 0.5093, + "step": 3828 + }, + { + "epoch": 0.2815027201882076, + "grad_norm": 0.8370251059532166, + "learning_rate": 4.974224699294513e-06, + "loss": 0.5597, + "step": 3829 + }, + { + "epoch": 0.2815762387884135, + "grad_norm": 0.8607432842254639, + "learning_rate": 4.974210897018114e-06, + "loss": 0.5758, + "step": 3830 + }, + { + "epoch": 0.2816497573886193, + "grad_norm": 0.8273751735687256, + "learning_rate": 4.974197091066409e-06, + "loss": 0.5613, + "step": 3831 + }, + { + "epoch": 0.2817232759888252, + "grad_norm": 0.9010818004608154, + "learning_rate": 4.974183281439421e-06, + "loss": 0.6024, + "step": 3832 + }, + { + "epoch": 0.281796794589031, + "grad_norm": 0.8820961117744446, + "learning_rate": 4.974169468137168e-06, + "loss": 0.5824, + "step": 3833 + }, + { + "epoch": 0.2818703131892369, + "grad_norm": 0.9142652750015259, + "learning_rate": 4.974155651159672e-06, + "loss": 0.6405, + "step": 3834 + }, + { + "epoch": 0.2819438317894427, + "grad_norm": 0.8559768795967102, + "learning_rate": 4.974141830506954e-06, + "loss": 0.5537, + "step": 3835 + }, + { + "epoch": 0.2820173503896486, + "grad_norm": 0.8630015850067139, + "learning_rate": 4.9741280061790345e-06, + "loss": 0.5715, + "step": 3836 + }, + { + "epoch": 0.2820908689898544, + "grad_norm": 0.8906663656234741, + "learning_rate": 4.974114178175932e-06, + "loss": 0.5748, + "step": 3837 + }, + { + "epoch": 0.2821643875900603, + "grad_norm": 0.8278405666351318, + "learning_rate": 4.974100346497668e-06, + "loss": 0.5502, + "step": 3838 + }, + { + "epoch": 0.2822379061902661, + "grad_norm": 0.9046358466148376, + "learning_rate": 4.974086511144264e-06, + "loss": 0.6215, + "step": 3839 + }, + { + "epoch": 0.282311424790472, + "grad_norm": 0.8648897409439087, + "learning_rate": 4.974072672115741e-06, + "loss": 0.5921, + "step": 3840 + }, + { + "epoch": 0.2823849433906778, + "grad_norm": 0.8295883536338806, + "learning_rate": 4.974058829412117e-06, + "loss": 0.5768, + "step": 3841 + }, + { + "epoch": 0.2824584619908837, + "grad_norm": 0.8732019662857056, + "learning_rate": 4.974044983033415e-06, + "loss": 0.5564, + "step": 3842 + }, + { + "epoch": 0.2825319805910895, + "grad_norm": 0.9128786325454712, + "learning_rate": 4.974031132979654e-06, + "loss": 0.5799, + "step": 3843 + }, + { + "epoch": 0.2826054991912954, + "grad_norm": 0.9370744228363037, + "learning_rate": 4.974017279250855e-06, + "loss": 0.5882, + "step": 3844 + }, + { + "epoch": 0.28267901779150123, + "grad_norm": 0.8772763013839722, + "learning_rate": 4.9740034218470395e-06, + "loss": 0.6041, + "step": 3845 + }, + { + "epoch": 0.2827525363917071, + "grad_norm": 0.8854936957359314, + "learning_rate": 4.973989560768228e-06, + "loss": 0.5621, + "step": 3846 + }, + { + "epoch": 0.28282605499191293, + "grad_norm": 0.8604376912117004, + "learning_rate": 4.9739756960144394e-06, + "loss": 0.5881, + "step": 3847 + }, + { + "epoch": 0.2828995735921188, + "grad_norm": 0.8522787690162659, + "learning_rate": 4.973961827585695e-06, + "loss": 0.5591, + "step": 3848 + }, + { + "epoch": 0.28297309219232464, + "grad_norm": 0.8715974688529968, + "learning_rate": 4.973947955482017e-06, + "loss": 0.5538, + "step": 3849 + }, + { + "epoch": 0.2830466107925305, + "grad_norm": 0.8567101955413818, + "learning_rate": 4.973934079703424e-06, + "loss": 0.5492, + "step": 3850 + }, + { + "epoch": 0.28312012939273634, + "grad_norm": 0.8273793458938599, + "learning_rate": 4.973920200249938e-06, + "loss": 0.572, + "step": 3851 + }, + { + "epoch": 0.2831936479929422, + "grad_norm": 0.8802024126052856, + "learning_rate": 4.973906317121578e-06, + "loss": 0.5794, + "step": 3852 + }, + { + "epoch": 0.28326716659314805, + "grad_norm": 0.9032513499259949, + "learning_rate": 4.9738924303183664e-06, + "loss": 0.637, + "step": 3853 + }, + { + "epoch": 0.2833406851933539, + "grad_norm": 0.8316348791122437, + "learning_rate": 4.973878539840323e-06, + "loss": 0.5833, + "step": 3854 + }, + { + "epoch": 0.28341420379355975, + "grad_norm": 0.8620200157165527, + "learning_rate": 4.973864645687468e-06, + "loss": 0.561, + "step": 3855 + }, + { + "epoch": 0.28348772239376563, + "grad_norm": 0.9037960171699524, + "learning_rate": 4.973850747859823e-06, + "loss": 0.5874, + "step": 3856 + }, + { + "epoch": 0.28356124099397145, + "grad_norm": 0.944177508354187, + "learning_rate": 4.973836846357408e-06, + "loss": 0.6164, + "step": 3857 + }, + { + "epoch": 0.28363475959417733, + "grad_norm": 0.8803848624229431, + "learning_rate": 4.973822941180244e-06, + "loss": 0.5751, + "step": 3858 + }, + { + "epoch": 0.28370827819438316, + "grad_norm": 0.8786578178405762, + "learning_rate": 4.973809032328352e-06, + "loss": 0.5997, + "step": 3859 + }, + { + "epoch": 0.28378179679458904, + "grad_norm": 0.8794791102409363, + "learning_rate": 4.973795119801751e-06, + "loss": 0.57, + "step": 3860 + }, + { + "epoch": 0.28385531539479486, + "grad_norm": 0.9361885786056519, + "learning_rate": 4.973781203600464e-06, + "loss": 0.5851, + "step": 3861 + }, + { + "epoch": 0.28392883399500074, + "grad_norm": 0.8640011548995972, + "learning_rate": 4.973767283724509e-06, + "loss": 0.5259, + "step": 3862 + }, + { + "epoch": 0.28400235259520656, + "grad_norm": 0.8183791637420654, + "learning_rate": 4.973753360173909e-06, + "loss": 0.5767, + "step": 3863 + }, + { + "epoch": 0.28407587119541244, + "grad_norm": 0.8927613496780396, + "learning_rate": 4.973739432948685e-06, + "loss": 0.5592, + "step": 3864 + }, + { + "epoch": 0.28414938979561827, + "grad_norm": 0.9361879229545593, + "learning_rate": 4.973725502048854e-06, + "loss": 0.5912, + "step": 3865 + }, + { + "epoch": 0.28422290839582415, + "grad_norm": 0.8951817154884338, + "learning_rate": 4.973711567474441e-06, + "loss": 0.573, + "step": 3866 + }, + { + "epoch": 0.28429642699602997, + "grad_norm": 0.8613684773445129, + "learning_rate": 4.973697629225465e-06, + "loss": 0.5945, + "step": 3867 + }, + { + "epoch": 0.28436994559623585, + "grad_norm": 0.820609986782074, + "learning_rate": 4.973683687301945e-06, + "loss": 0.5772, + "step": 3868 + }, + { + "epoch": 0.2844434641964417, + "grad_norm": 0.8632335662841797, + "learning_rate": 4.973669741703905e-06, + "loss": 0.5932, + "step": 3869 + }, + { + "epoch": 0.28451698279664756, + "grad_norm": 0.8822126388549805, + "learning_rate": 4.973655792431364e-06, + "loss": 0.589, + "step": 3870 + }, + { + "epoch": 0.2845905013968534, + "grad_norm": 0.8679861426353455, + "learning_rate": 4.973641839484342e-06, + "loss": 0.5688, + "step": 3871 + }, + { + "epoch": 0.28466401999705926, + "grad_norm": 0.8784146308898926, + "learning_rate": 4.973627882862862e-06, + "loss": 0.5832, + "step": 3872 + }, + { + "epoch": 0.2847375385972651, + "grad_norm": 0.8561198115348816, + "learning_rate": 4.973613922566941e-06, + "loss": 0.5491, + "step": 3873 + }, + { + "epoch": 0.28481105719747096, + "grad_norm": 0.8580086827278137, + "learning_rate": 4.973599958596604e-06, + "loss": 0.5829, + "step": 3874 + }, + { + "epoch": 0.2848845757976768, + "grad_norm": 0.9082037210464478, + "learning_rate": 4.973585990951868e-06, + "loss": 0.5966, + "step": 3875 + }, + { + "epoch": 0.28495809439788267, + "grad_norm": 0.88446444272995, + "learning_rate": 4.973572019632756e-06, + "loss": 0.5641, + "step": 3876 + }, + { + "epoch": 0.2850316129980885, + "grad_norm": 0.8376434445381165, + "learning_rate": 4.973558044639289e-06, + "loss": 0.5616, + "step": 3877 + }, + { + "epoch": 0.28510513159829437, + "grad_norm": 0.922153115272522, + "learning_rate": 4.973544065971486e-06, + "loss": 0.5669, + "step": 3878 + }, + { + "epoch": 0.2851786501985002, + "grad_norm": 0.8627899289131165, + "learning_rate": 4.973530083629369e-06, + "loss": 0.6047, + "step": 3879 + }, + { + "epoch": 0.2852521687987061, + "grad_norm": 0.8498644828796387, + "learning_rate": 4.973516097612959e-06, + "loss": 0.5862, + "step": 3880 + }, + { + "epoch": 0.2853256873989119, + "grad_norm": 0.8926449418067932, + "learning_rate": 4.973502107922276e-06, + "loss": 0.6241, + "step": 3881 + }, + { + "epoch": 0.2853992059991178, + "grad_norm": 0.8847448229789734, + "learning_rate": 4.973488114557341e-06, + "loss": 0.605, + "step": 3882 + }, + { + "epoch": 0.2854727245993236, + "grad_norm": 0.8710697889328003, + "learning_rate": 4.973474117518174e-06, + "loss": 0.5791, + "step": 3883 + }, + { + "epoch": 0.2855462431995295, + "grad_norm": 0.815375566482544, + "learning_rate": 4.973460116804798e-06, + "loss": 0.5464, + "step": 3884 + }, + { + "epoch": 0.2856197617997353, + "grad_norm": 0.8712039589881897, + "learning_rate": 4.9734461124172316e-06, + "loss": 0.5154, + "step": 3885 + }, + { + "epoch": 0.2856932803999412, + "grad_norm": 0.8862223029136658, + "learning_rate": 4.973432104355497e-06, + "loss": 0.6171, + "step": 3886 + }, + { + "epoch": 0.28576679900014706, + "grad_norm": 0.8817654252052307, + "learning_rate": 4.9734180926196144e-06, + "loss": 0.6106, + "step": 3887 + }, + { + "epoch": 0.2858403176003529, + "grad_norm": 0.8767346739768982, + "learning_rate": 4.973404077209605e-06, + "loss": 0.5313, + "step": 3888 + }, + { + "epoch": 0.28591383620055877, + "grad_norm": 0.9056532382965088, + "learning_rate": 4.973390058125488e-06, + "loss": 0.5877, + "step": 3889 + }, + { + "epoch": 0.2859873548007646, + "grad_norm": 0.8282418251037598, + "learning_rate": 4.973376035367286e-06, + "loss": 0.5528, + "step": 3890 + }, + { + "epoch": 0.2860608734009705, + "grad_norm": 0.8363142013549805, + "learning_rate": 4.97336200893502e-06, + "loss": 0.5411, + "step": 3891 + }, + { + "epoch": 0.2861343920011763, + "grad_norm": 0.8788071274757385, + "learning_rate": 4.97334797882871e-06, + "loss": 0.5757, + "step": 3892 + }, + { + "epoch": 0.2862079106013822, + "grad_norm": 0.896665096282959, + "learning_rate": 4.973333945048377e-06, + "loss": 0.6047, + "step": 3893 + }, + { + "epoch": 0.286281429201588, + "grad_norm": 0.8379198908805847, + "learning_rate": 4.973319907594042e-06, + "loss": 0.5662, + "step": 3894 + }, + { + "epoch": 0.2863549478017939, + "grad_norm": 0.8602544069290161, + "learning_rate": 4.973305866465724e-06, + "loss": 0.5821, + "step": 3895 + }, + { + "epoch": 0.2864284664019997, + "grad_norm": 0.9047395586967468, + "learning_rate": 4.973291821663447e-06, + "loss": 0.5469, + "step": 3896 + }, + { + "epoch": 0.2865019850022056, + "grad_norm": 0.8236153721809387, + "learning_rate": 4.973277773187231e-06, + "loss": 0.555, + "step": 3897 + }, + { + "epoch": 0.2865755036024114, + "grad_norm": 0.858555257320404, + "learning_rate": 4.973263721037096e-06, + "loss": 0.5466, + "step": 3898 + }, + { + "epoch": 0.2866490222026173, + "grad_norm": 0.818044126033783, + "learning_rate": 4.973249665213063e-06, + "loss": 0.5479, + "step": 3899 + }, + { + "epoch": 0.2867225408028231, + "grad_norm": 0.8915843367576599, + "learning_rate": 4.973235605715152e-06, + "loss": 0.5912, + "step": 3900 + }, + { + "epoch": 0.286796059403029, + "grad_norm": 0.8430159091949463, + "learning_rate": 4.973221542543386e-06, + "loss": 0.5702, + "step": 3901 + }, + { + "epoch": 0.2868695780032348, + "grad_norm": 0.9174548983573914, + "learning_rate": 4.973207475697785e-06, + "loss": 0.6462, + "step": 3902 + }, + { + "epoch": 0.2869430966034407, + "grad_norm": 0.8882114887237549, + "learning_rate": 4.973193405178369e-06, + "loss": 0.5942, + "step": 3903 + }, + { + "epoch": 0.2870166152036465, + "grad_norm": 0.8492469191551208, + "learning_rate": 4.973179330985161e-06, + "loss": 0.5644, + "step": 3904 + }, + { + "epoch": 0.2870901338038524, + "grad_norm": 0.8842396140098572, + "learning_rate": 4.973165253118179e-06, + "loss": 0.5838, + "step": 3905 + }, + { + "epoch": 0.2871636524040582, + "grad_norm": 0.8346566557884216, + "learning_rate": 4.973151171577446e-06, + "loss": 0.5844, + "step": 3906 + }, + { + "epoch": 0.2872371710042641, + "grad_norm": 0.9224783778190613, + "learning_rate": 4.973137086362982e-06, + "loss": 0.5553, + "step": 3907 + }, + { + "epoch": 0.2873106896044699, + "grad_norm": 0.821525514125824, + "learning_rate": 4.973122997474809e-06, + "loss": 0.5864, + "step": 3908 + }, + { + "epoch": 0.2873842082046758, + "grad_norm": 0.8589324355125427, + "learning_rate": 4.9731089049129465e-06, + "loss": 0.5839, + "step": 3909 + }, + { + "epoch": 0.28745772680488163, + "grad_norm": 0.9387845396995544, + "learning_rate": 4.973094808677417e-06, + "loss": 0.5791, + "step": 3910 + }, + { + "epoch": 0.2875312454050875, + "grad_norm": 0.8355178236961365, + "learning_rate": 4.97308070876824e-06, + "loss": 0.5785, + "step": 3911 + }, + { + "epoch": 0.28760476400529333, + "grad_norm": 0.8489434719085693, + "learning_rate": 4.973066605185437e-06, + "loss": 0.5624, + "step": 3912 + }, + { + "epoch": 0.2876782826054992, + "grad_norm": 0.8813144564628601, + "learning_rate": 4.973052497929029e-06, + "loss": 0.558, + "step": 3913 + }, + { + "epoch": 0.28775180120570504, + "grad_norm": 0.9063687324523926, + "learning_rate": 4.973038386999037e-06, + "loss": 0.5707, + "step": 3914 + }, + { + "epoch": 0.2878253198059109, + "grad_norm": 0.8516044020652771, + "learning_rate": 4.973024272395482e-06, + "loss": 0.5506, + "step": 3915 + }, + { + "epoch": 0.28789883840611674, + "grad_norm": 0.8309330940246582, + "learning_rate": 4.973010154118385e-06, + "loss": 0.5783, + "step": 3916 + }, + { + "epoch": 0.2879723570063226, + "grad_norm": 0.8491642475128174, + "learning_rate": 4.972996032167766e-06, + "loss": 0.5769, + "step": 3917 + }, + { + "epoch": 0.28804587560652845, + "grad_norm": 0.8692825436592102, + "learning_rate": 4.972981906543648e-06, + "loss": 0.5362, + "step": 3918 + }, + { + "epoch": 0.2881193942067343, + "grad_norm": 0.8395025134086609, + "learning_rate": 4.97296777724605e-06, + "loss": 0.5619, + "step": 3919 + }, + { + "epoch": 0.28819291280694015, + "grad_norm": 0.8238471746444702, + "learning_rate": 4.972953644274994e-06, + "loss": 0.5523, + "step": 3920 + }, + { + "epoch": 0.28826643140714603, + "grad_norm": 0.831596314907074, + "learning_rate": 4.9729395076305005e-06, + "loss": 0.5523, + "step": 3921 + }, + { + "epoch": 0.28833995000735185, + "grad_norm": 0.8404622673988342, + "learning_rate": 4.972925367312591e-06, + "loss": 0.5415, + "step": 3922 + }, + { + "epoch": 0.28841346860755773, + "grad_norm": 0.8982489109039307, + "learning_rate": 4.972911223321286e-06, + "loss": 0.5891, + "step": 3923 + }, + { + "epoch": 0.28848698720776356, + "grad_norm": 0.8884453773498535, + "learning_rate": 4.972897075656607e-06, + "loss": 0.5782, + "step": 3924 + }, + { + "epoch": 0.28856050580796944, + "grad_norm": 0.8560363054275513, + "learning_rate": 4.9728829243185765e-06, + "loss": 0.5898, + "step": 3925 + }, + { + "epoch": 0.28863402440817526, + "grad_norm": 0.8697994351387024, + "learning_rate": 4.972868769307211e-06, + "loss": 0.5855, + "step": 3926 + }, + { + "epoch": 0.28870754300838114, + "grad_norm": 0.8390358686447144, + "learning_rate": 4.972854610622536e-06, + "loss": 0.5225, + "step": 3927 + }, + { + "epoch": 0.28878106160858696, + "grad_norm": 0.9031485319137573, + "learning_rate": 4.972840448264571e-06, + "loss": 0.6293, + "step": 3928 + }, + { + "epoch": 0.28885458020879284, + "grad_norm": 0.8895864486694336, + "learning_rate": 4.972826282233336e-06, + "loss": 0.6037, + "step": 3929 + }, + { + "epoch": 0.28892809880899867, + "grad_norm": 0.8824878334999084, + "learning_rate": 4.9728121125288534e-06, + "loss": 0.5563, + "step": 3930 + }, + { + "epoch": 0.28900161740920455, + "grad_norm": 0.8641299605369568, + "learning_rate": 4.972797939151144e-06, + "loss": 0.5689, + "step": 3931 + }, + { + "epoch": 0.28907513600941037, + "grad_norm": 0.8667170405387878, + "learning_rate": 4.972783762100229e-06, + "loss": 0.5622, + "step": 3932 + }, + { + "epoch": 0.28914865460961625, + "grad_norm": 0.8334138989448547, + "learning_rate": 4.972769581376128e-06, + "loss": 0.5322, + "step": 3933 + }, + { + "epoch": 0.2892221732098221, + "grad_norm": 0.8354852199554443, + "learning_rate": 4.972755396978864e-06, + "loss": 0.5979, + "step": 3934 + }, + { + "epoch": 0.28929569181002796, + "grad_norm": 0.8514084219932556, + "learning_rate": 4.972741208908457e-06, + "loss": 0.5731, + "step": 3935 + }, + { + "epoch": 0.2893692104102338, + "grad_norm": 0.8812042474746704, + "learning_rate": 4.972727017164928e-06, + "loss": 0.5527, + "step": 3936 + }, + { + "epoch": 0.28944272901043966, + "grad_norm": 0.8806264996528625, + "learning_rate": 4.972712821748299e-06, + "loss": 0.5946, + "step": 3937 + }, + { + "epoch": 0.2895162476106455, + "grad_norm": 0.9001259207725525, + "learning_rate": 4.97269862265859e-06, + "loss": 0.5516, + "step": 3938 + }, + { + "epoch": 0.28958976621085136, + "grad_norm": 0.8798425197601318, + "learning_rate": 4.972684419895824e-06, + "loss": 0.5977, + "step": 3939 + }, + { + "epoch": 0.2896632848110572, + "grad_norm": 0.8355428576469421, + "learning_rate": 4.9726702134600195e-06, + "loss": 0.5317, + "step": 3940 + }, + { + "epoch": 0.28973680341126307, + "grad_norm": 0.8256295919418335, + "learning_rate": 4.972656003351198e-06, + "loss": 0.5552, + "step": 3941 + }, + { + "epoch": 0.2898103220114689, + "grad_norm": 0.889830470085144, + "learning_rate": 4.972641789569383e-06, + "loss": 0.5582, + "step": 3942 + }, + { + "epoch": 0.28988384061167477, + "grad_norm": 0.8702132701873779, + "learning_rate": 4.9726275721145935e-06, + "loss": 0.5426, + "step": 3943 + }, + { + "epoch": 0.2899573592118806, + "grad_norm": 0.9206307530403137, + "learning_rate": 4.97261335098685e-06, + "loss": 0.572, + "step": 3944 + }, + { + "epoch": 0.2900308778120865, + "grad_norm": 0.8509941101074219, + "learning_rate": 4.972599126186176e-06, + "loss": 0.5526, + "step": 3945 + }, + { + "epoch": 0.2901043964122923, + "grad_norm": 0.8778802156448364, + "learning_rate": 4.972584897712591e-06, + "loss": 0.5682, + "step": 3946 + }, + { + "epoch": 0.2901779150124982, + "grad_norm": 0.8766213655471802, + "learning_rate": 4.972570665566116e-06, + "loss": 0.5717, + "step": 3947 + }, + { + "epoch": 0.290251433612704, + "grad_norm": 0.8644251823425293, + "learning_rate": 4.972556429746775e-06, + "loss": 0.5669, + "step": 3948 + }, + { + "epoch": 0.2903249522129099, + "grad_norm": 0.8655006289482117, + "learning_rate": 4.972542190254584e-06, + "loss": 0.5807, + "step": 3949 + }, + { + "epoch": 0.2903984708131157, + "grad_norm": 0.8762001395225525, + "learning_rate": 4.972527947089568e-06, + "loss": 0.5768, + "step": 3950 + }, + { + "epoch": 0.2904719894133216, + "grad_norm": 0.8641055226325989, + "learning_rate": 4.972513700251748e-06, + "loss": 0.5801, + "step": 3951 + }, + { + "epoch": 0.2905455080135274, + "grad_norm": 0.9308143854141235, + "learning_rate": 4.972499449741143e-06, + "loss": 0.5593, + "step": 3952 + }, + { + "epoch": 0.2906190266137333, + "grad_norm": 0.9107832908630371, + "learning_rate": 4.972485195557776e-06, + "loss": 0.6143, + "step": 3953 + }, + { + "epoch": 0.2906925452139391, + "grad_norm": 0.8603370785713196, + "learning_rate": 4.972470937701668e-06, + "loss": 0.5908, + "step": 3954 + }, + { + "epoch": 0.290766063814145, + "grad_norm": 0.868354856967926, + "learning_rate": 4.972456676172839e-06, + "loss": 0.6026, + "step": 3955 + }, + { + "epoch": 0.2908395824143508, + "grad_norm": 0.9312296509742737, + "learning_rate": 4.972442410971312e-06, + "loss": 0.576, + "step": 3956 + }, + { + "epoch": 0.2909131010145567, + "grad_norm": 0.8566174507141113, + "learning_rate": 4.972428142097106e-06, + "loss": 0.5866, + "step": 3957 + }, + { + "epoch": 0.2909866196147625, + "grad_norm": 0.8766180872917175, + "learning_rate": 4.972413869550245e-06, + "loss": 0.5877, + "step": 3958 + }, + { + "epoch": 0.2910601382149684, + "grad_norm": 0.8905947208404541, + "learning_rate": 4.972399593330747e-06, + "loss": 0.6361, + "step": 3959 + }, + { + "epoch": 0.2911336568151742, + "grad_norm": 0.8476017117500305, + "learning_rate": 4.972385313438636e-06, + "loss": 0.5834, + "step": 3960 + }, + { + "epoch": 0.2912071754153801, + "grad_norm": 0.8725162744522095, + "learning_rate": 4.972371029873931e-06, + "loss": 0.6, + "step": 3961 + }, + { + "epoch": 0.29128069401558593, + "grad_norm": 0.8821106553077698, + "learning_rate": 4.9723567426366546e-06, + "loss": 0.5766, + "step": 3962 + }, + { + "epoch": 0.2913542126157918, + "grad_norm": 0.8884928822517395, + "learning_rate": 4.972342451726827e-06, + "loss": 0.6124, + "step": 3963 + }, + { + "epoch": 0.29142773121599763, + "grad_norm": 0.8423734307289124, + "learning_rate": 4.972328157144472e-06, + "loss": 0.5383, + "step": 3964 + }, + { + "epoch": 0.2915012498162035, + "grad_norm": 0.9170969724655151, + "learning_rate": 4.972313858889607e-06, + "loss": 0.6063, + "step": 3965 + }, + { + "epoch": 0.29157476841640934, + "grad_norm": 0.8861764073371887, + "learning_rate": 4.972299556962256e-06, + "loss": 0.6409, + "step": 3966 + }, + { + "epoch": 0.2916482870166152, + "grad_norm": 0.874661386013031, + "learning_rate": 4.972285251362438e-06, + "loss": 0.5313, + "step": 3967 + }, + { + "epoch": 0.29172180561682104, + "grad_norm": 0.8798588514328003, + "learning_rate": 4.972270942090177e-06, + "loss": 0.6398, + "step": 3968 + }, + { + "epoch": 0.2917953242170269, + "grad_norm": 0.8182119727134705, + "learning_rate": 4.972256629145493e-06, + "loss": 0.5733, + "step": 3969 + }, + { + "epoch": 0.29186884281723274, + "grad_norm": 0.878757894039154, + "learning_rate": 4.972242312528407e-06, + "loss": 0.5759, + "step": 3970 + }, + { + "epoch": 0.2919423614174386, + "grad_norm": 0.821625292301178, + "learning_rate": 4.972227992238941e-06, + "loss": 0.5522, + "step": 3971 + }, + { + "epoch": 0.29201588001764445, + "grad_norm": 0.8778731822967529, + "learning_rate": 4.972213668277114e-06, + "loss": 0.5659, + "step": 3972 + }, + { + "epoch": 0.2920893986178503, + "grad_norm": 0.9260889887809753, + "learning_rate": 4.972199340642951e-06, + "loss": 0.5737, + "step": 3973 + }, + { + "epoch": 0.29216291721805615, + "grad_norm": 0.8213694095611572, + "learning_rate": 4.972185009336468e-06, + "loss": 0.5739, + "step": 3974 + }, + { + "epoch": 0.29223643581826203, + "grad_norm": 0.8991402387619019, + "learning_rate": 4.972170674357692e-06, + "loss": 0.5981, + "step": 3975 + }, + { + "epoch": 0.29230995441846785, + "grad_norm": 0.8591686487197876, + "learning_rate": 4.9721563357066424e-06, + "loss": 0.5367, + "step": 3976 + }, + { + "epoch": 0.29238347301867373, + "grad_norm": 0.872077465057373, + "learning_rate": 4.9721419933833396e-06, + "loss": 0.6033, + "step": 3977 + }, + { + "epoch": 0.29245699161887956, + "grad_norm": 0.8885363936424255, + "learning_rate": 4.972127647387804e-06, + "loss": 0.629, + "step": 3978 + }, + { + "epoch": 0.29253051021908544, + "grad_norm": 0.8689450621604919, + "learning_rate": 4.972113297720059e-06, + "loss": 0.5695, + "step": 3979 + }, + { + "epoch": 0.29260402881929126, + "grad_norm": 0.9039717316627502, + "learning_rate": 4.9720989443801244e-06, + "loss": 0.5525, + "step": 3980 + }, + { + "epoch": 0.29267754741949714, + "grad_norm": 0.8777436017990112, + "learning_rate": 4.9720845873680234e-06, + "loss": 0.5447, + "step": 3981 + }, + { + "epoch": 0.29275106601970297, + "grad_norm": 0.9061790704727173, + "learning_rate": 4.972070226683775e-06, + "loss": 0.6237, + "step": 3982 + }, + { + "epoch": 0.29282458461990885, + "grad_norm": 0.8559431433677673, + "learning_rate": 4.972055862327402e-06, + "loss": 0.5979, + "step": 3983 + }, + { + "epoch": 0.29289810322011467, + "grad_norm": 0.9089655876159668, + "learning_rate": 4.9720414942989255e-06, + "loss": 0.5696, + "step": 3984 + }, + { + "epoch": 0.29297162182032055, + "grad_norm": 0.8509940505027771, + "learning_rate": 4.972027122598367e-06, + "loss": 0.6076, + "step": 3985 + }, + { + "epoch": 0.2930451404205264, + "grad_norm": 0.8527077436447144, + "learning_rate": 4.972012747225748e-06, + "loss": 0.5998, + "step": 3986 + }, + { + "epoch": 0.29311865902073225, + "grad_norm": 0.8128467798233032, + "learning_rate": 4.971998368181088e-06, + "loss": 0.5125, + "step": 3987 + }, + { + "epoch": 0.2931921776209381, + "grad_norm": 0.8874367475509644, + "learning_rate": 4.97198398546441e-06, + "loss": 0.6132, + "step": 3988 + }, + { + "epoch": 0.29326569622114396, + "grad_norm": 0.8149003982543945, + "learning_rate": 4.971969599075736e-06, + "loss": 0.5649, + "step": 3989 + }, + { + "epoch": 0.2933392148213498, + "grad_norm": 0.8323718309402466, + "learning_rate": 4.971955209015086e-06, + "loss": 0.6028, + "step": 3990 + }, + { + "epoch": 0.29341273342155566, + "grad_norm": 0.8204851746559143, + "learning_rate": 4.971940815282482e-06, + "loss": 0.5383, + "step": 3991 + }, + { + "epoch": 0.2934862520217615, + "grad_norm": 0.8235000967979431, + "learning_rate": 4.971926417877946e-06, + "loss": 0.5704, + "step": 3992 + }, + { + "epoch": 0.29355977062196736, + "grad_norm": 0.937429666519165, + "learning_rate": 4.9719120168014975e-06, + "loss": 0.6174, + "step": 3993 + }, + { + "epoch": 0.2936332892221732, + "grad_norm": 0.8444026708602905, + "learning_rate": 4.971897612053159e-06, + "loss": 0.599, + "step": 3994 + }, + { + "epoch": 0.29370680782237907, + "grad_norm": 0.80802321434021, + "learning_rate": 4.971883203632954e-06, + "loss": 0.5559, + "step": 3995 + }, + { + "epoch": 0.2937803264225849, + "grad_norm": 0.9114919900894165, + "learning_rate": 4.971868791540899e-06, + "loss": 0.6073, + "step": 3996 + }, + { + "epoch": 0.29385384502279077, + "grad_norm": 0.9541204571723938, + "learning_rate": 4.97185437577702e-06, + "loss": 0.5803, + "step": 3997 + }, + { + "epoch": 0.2939273636229966, + "grad_norm": 0.8144898414611816, + "learning_rate": 4.9718399563413366e-06, + "loss": 0.5744, + "step": 3998 + }, + { + "epoch": 0.2940008822232025, + "grad_norm": 0.8806759715080261, + "learning_rate": 4.97182553323387e-06, + "loss": 0.5695, + "step": 3999 + }, + { + "epoch": 0.2940744008234083, + "grad_norm": 0.8788102269172668, + "learning_rate": 4.971811106454641e-06, + "loss": 0.5993, + "step": 4000 + }, + { + "epoch": 0.2941479194236142, + "grad_norm": 0.8804169297218323, + "learning_rate": 4.971796676003673e-06, + "loss": 0.5939, + "step": 4001 + }, + { + "epoch": 0.29422143802382, + "grad_norm": 0.8996578454971313, + "learning_rate": 4.971782241880987e-06, + "loss": 0.5384, + "step": 4002 + }, + { + "epoch": 0.2942949566240259, + "grad_norm": 0.8548516631126404, + "learning_rate": 4.971767804086603e-06, + "loss": 0.5927, + "step": 4003 + }, + { + "epoch": 0.2943684752242317, + "grad_norm": 0.867058277130127, + "learning_rate": 4.9717533626205435e-06, + "loss": 0.5763, + "step": 4004 + }, + { + "epoch": 0.2944419938244376, + "grad_norm": 0.7882376909255981, + "learning_rate": 4.971738917482829e-06, + "loss": 0.5499, + "step": 4005 + }, + { + "epoch": 0.2945155124246434, + "grad_norm": 0.8448413610458374, + "learning_rate": 4.971724468673482e-06, + "loss": 0.5775, + "step": 4006 + }, + { + "epoch": 0.2945890310248493, + "grad_norm": 0.9032270908355713, + "learning_rate": 4.971710016192525e-06, + "loss": 0.5842, + "step": 4007 + }, + { + "epoch": 0.2946625496250551, + "grad_norm": 0.8598490953445435, + "learning_rate": 4.971695560039977e-06, + "loss": 0.6035, + "step": 4008 + }, + { + "epoch": 0.294736068225261, + "grad_norm": 0.8988653421401978, + "learning_rate": 4.97168110021586e-06, + "loss": 0.6018, + "step": 4009 + }, + { + "epoch": 0.2948095868254668, + "grad_norm": 0.8539258241653442, + "learning_rate": 4.971666636720197e-06, + "loss": 0.5711, + "step": 4010 + }, + { + "epoch": 0.2948831054256727, + "grad_norm": 0.8627692461013794, + "learning_rate": 4.971652169553008e-06, + "loss": 0.6146, + "step": 4011 + }, + { + "epoch": 0.2949566240258785, + "grad_norm": 0.8457290530204773, + "learning_rate": 4.971637698714316e-06, + "loss": 0.5637, + "step": 4012 + }, + { + "epoch": 0.2950301426260844, + "grad_norm": 0.8628051280975342, + "learning_rate": 4.97162322420414e-06, + "loss": 0.5784, + "step": 4013 + }, + { + "epoch": 0.2951036612262902, + "grad_norm": 0.864960253238678, + "learning_rate": 4.971608746022504e-06, + "loss": 0.5703, + "step": 4014 + }, + { + "epoch": 0.2951771798264961, + "grad_norm": 0.8831839561462402, + "learning_rate": 4.971594264169429e-06, + "loss": 0.6218, + "step": 4015 + }, + { + "epoch": 0.29525069842670193, + "grad_norm": 0.8474220037460327, + "learning_rate": 4.971579778644935e-06, + "loss": 0.5792, + "step": 4016 + }, + { + "epoch": 0.2953242170269078, + "grad_norm": 0.9152776598930359, + "learning_rate": 4.971565289449045e-06, + "loss": 0.541, + "step": 4017 + }, + { + "epoch": 0.29539773562711363, + "grad_norm": 0.8867896795272827, + "learning_rate": 4.971550796581781e-06, + "loss": 0.5839, + "step": 4018 + }, + { + "epoch": 0.2954712542273195, + "grad_norm": 0.8564320802688599, + "learning_rate": 4.971536300043163e-06, + "loss": 0.5803, + "step": 4019 + }, + { + "epoch": 0.29554477282752534, + "grad_norm": 0.865562379360199, + "learning_rate": 4.9715217998332125e-06, + "loss": 0.5781, + "step": 4020 + }, + { + "epoch": 0.2956182914277312, + "grad_norm": 0.8224685192108154, + "learning_rate": 4.971507295951952e-06, + "loss": 0.5649, + "step": 4021 + }, + { + "epoch": 0.29569181002793704, + "grad_norm": 0.8663924336433411, + "learning_rate": 4.971492788399403e-06, + "loss": 0.588, + "step": 4022 + }, + { + "epoch": 0.2957653286281429, + "grad_norm": 0.8616053462028503, + "learning_rate": 4.971478277175586e-06, + "loss": 0.5396, + "step": 4023 + }, + { + "epoch": 0.29583884722834874, + "grad_norm": 0.8193510174751282, + "learning_rate": 4.971463762280525e-06, + "loss": 0.563, + "step": 4024 + }, + { + "epoch": 0.2959123658285546, + "grad_norm": 0.8699986934661865, + "learning_rate": 4.971449243714239e-06, + "loss": 0.6412, + "step": 4025 + }, + { + "epoch": 0.2959858844287605, + "grad_norm": 0.881372332572937, + "learning_rate": 4.97143472147675e-06, + "loss": 0.63, + "step": 4026 + }, + { + "epoch": 0.29605940302896633, + "grad_norm": 0.8568068742752075, + "learning_rate": 4.9714201955680805e-06, + "loss": 0.5422, + "step": 4027 + }, + { + "epoch": 0.2961329216291722, + "grad_norm": 0.8432124853134155, + "learning_rate": 4.971405665988253e-06, + "loss": 0.5525, + "step": 4028 + }, + { + "epoch": 0.29620644022937803, + "grad_norm": 0.9590840935707092, + "learning_rate": 4.971391132737285e-06, + "loss": 0.6122, + "step": 4029 + }, + { + "epoch": 0.2962799588295839, + "grad_norm": 0.8849202394485474, + "learning_rate": 4.971376595815203e-06, + "loss": 0.5973, + "step": 4030 + }, + { + "epoch": 0.29635347742978974, + "grad_norm": 0.8541272878646851, + "learning_rate": 4.971362055222026e-06, + "loss": 0.56, + "step": 4031 + }, + { + "epoch": 0.2964269960299956, + "grad_norm": 0.8926579356193542, + "learning_rate": 4.971347510957775e-06, + "loss": 0.5734, + "step": 4032 + }, + { + "epoch": 0.29650051463020144, + "grad_norm": 0.9326959252357483, + "learning_rate": 4.971332963022473e-06, + "loss": 0.5821, + "step": 4033 + }, + { + "epoch": 0.2965740332304073, + "grad_norm": 0.8979946970939636, + "learning_rate": 4.9713184114161415e-06, + "loss": 0.5756, + "step": 4034 + }, + { + "epoch": 0.29664755183061314, + "grad_norm": 0.8842946887016296, + "learning_rate": 4.971303856138801e-06, + "loss": 0.5823, + "step": 4035 + }, + { + "epoch": 0.296721070430819, + "grad_norm": 0.8414936065673828, + "learning_rate": 4.9712892971904755e-06, + "loss": 0.5939, + "step": 4036 + }, + { + "epoch": 0.29679458903102485, + "grad_norm": 0.8553427457809448, + "learning_rate": 4.971274734571184e-06, + "loss": 0.593, + "step": 4037 + }, + { + "epoch": 0.2968681076312307, + "grad_norm": 0.8234105706214905, + "learning_rate": 4.9712601682809495e-06, + "loss": 0.5941, + "step": 4038 + }, + { + "epoch": 0.29694162623143655, + "grad_norm": 0.8634146451950073, + "learning_rate": 4.9712455983197934e-06, + "loss": 0.5887, + "step": 4039 + }, + { + "epoch": 0.29701514483164243, + "grad_norm": 0.9162194132804871, + "learning_rate": 4.971231024687737e-06, + "loss": 0.5955, + "step": 4040 + }, + { + "epoch": 0.29708866343184825, + "grad_norm": 0.8831806182861328, + "learning_rate": 4.971216447384802e-06, + "loss": 0.558, + "step": 4041 + }, + { + "epoch": 0.29716218203205413, + "grad_norm": 0.854078471660614, + "learning_rate": 4.9712018664110114e-06, + "loss": 0.5934, + "step": 4042 + }, + { + "epoch": 0.29723570063225996, + "grad_norm": 0.8731881380081177, + "learning_rate": 4.971187281766385e-06, + "loss": 0.5676, + "step": 4043 + }, + { + "epoch": 0.29730921923246584, + "grad_norm": 0.8885738253593445, + "learning_rate": 4.971172693450945e-06, + "loss": 0.5988, + "step": 4044 + }, + { + "epoch": 0.29738273783267166, + "grad_norm": 0.8578054904937744, + "learning_rate": 4.971158101464714e-06, + "loss": 0.5544, + "step": 4045 + }, + { + "epoch": 0.29745625643287754, + "grad_norm": 0.8986631631851196, + "learning_rate": 4.971143505807712e-06, + "loss": 0.6062, + "step": 4046 + }, + { + "epoch": 0.29752977503308337, + "grad_norm": 0.8215470910072327, + "learning_rate": 4.971128906479963e-06, + "loss": 0.5566, + "step": 4047 + }, + { + "epoch": 0.29760329363328925, + "grad_norm": 0.8417088389396667, + "learning_rate": 4.971114303481485e-06, + "loss": 0.5989, + "step": 4048 + }, + { + "epoch": 0.29767681223349507, + "grad_norm": 0.862511157989502, + "learning_rate": 4.971099696812304e-06, + "loss": 0.5965, + "step": 4049 + }, + { + "epoch": 0.29775033083370095, + "grad_norm": 0.8813555836677551, + "learning_rate": 4.9710850864724394e-06, + "loss": 0.5667, + "step": 4050 + }, + { + "epoch": 0.2978238494339068, + "grad_norm": 0.8376708626747131, + "learning_rate": 4.971070472461913e-06, + "loss": 0.5289, + "step": 4051 + }, + { + "epoch": 0.29789736803411265, + "grad_norm": 0.8710941672325134, + "learning_rate": 4.9710558547807466e-06, + "loss": 0.5981, + "step": 4052 + }, + { + "epoch": 0.2979708866343185, + "grad_norm": 0.874400794506073, + "learning_rate": 4.971041233428962e-06, + "loss": 0.6191, + "step": 4053 + }, + { + "epoch": 0.29804440523452436, + "grad_norm": 0.8520534634590149, + "learning_rate": 4.971026608406582e-06, + "loss": 0.5632, + "step": 4054 + }, + { + "epoch": 0.2981179238347302, + "grad_norm": 0.8294652104377747, + "learning_rate": 4.971011979713626e-06, + "loss": 0.5588, + "step": 4055 + }, + { + "epoch": 0.29819144243493606, + "grad_norm": 0.8175615072250366, + "learning_rate": 4.970997347350117e-06, + "loss": 0.5317, + "step": 4056 + }, + { + "epoch": 0.2982649610351419, + "grad_norm": 0.8346705436706543, + "learning_rate": 4.970982711316078e-06, + "loss": 0.558, + "step": 4057 + }, + { + "epoch": 0.29833847963534776, + "grad_norm": 0.8893750905990601, + "learning_rate": 4.970968071611528e-06, + "loss": 0.5408, + "step": 4058 + }, + { + "epoch": 0.2984119982355536, + "grad_norm": 0.8444280624389648, + "learning_rate": 4.970953428236491e-06, + "loss": 0.5768, + "step": 4059 + }, + { + "epoch": 0.29848551683575947, + "grad_norm": 0.9016358852386475, + "learning_rate": 4.970938781190988e-06, + "loss": 0.5317, + "step": 4060 + }, + { + "epoch": 0.2985590354359653, + "grad_norm": 0.8368706107139587, + "learning_rate": 4.970924130475041e-06, + "loss": 0.5552, + "step": 4061 + }, + { + "epoch": 0.29863255403617117, + "grad_norm": 0.8301984071731567, + "learning_rate": 4.970909476088671e-06, + "loss": 0.5524, + "step": 4062 + }, + { + "epoch": 0.298706072636377, + "grad_norm": 0.8278689980506897, + "learning_rate": 4.970894818031901e-06, + "loss": 0.5501, + "step": 4063 + }, + { + "epoch": 0.2987795912365829, + "grad_norm": 0.8605788946151733, + "learning_rate": 4.97088015630475e-06, + "loss": 0.5831, + "step": 4064 + }, + { + "epoch": 0.2988531098367887, + "grad_norm": 0.880050539970398, + "learning_rate": 4.970865490907244e-06, + "loss": 0.5845, + "step": 4065 + }, + { + "epoch": 0.2989266284369946, + "grad_norm": 0.8318799138069153, + "learning_rate": 4.9708508218394015e-06, + "loss": 0.5357, + "step": 4066 + }, + { + "epoch": 0.2990001470372004, + "grad_norm": 0.8354960680007935, + "learning_rate": 4.970836149101246e-06, + "loss": 0.5788, + "step": 4067 + }, + { + "epoch": 0.2990736656374063, + "grad_norm": 0.9254164695739746, + "learning_rate": 4.970821472692799e-06, + "loss": 0.6343, + "step": 4068 + }, + { + "epoch": 0.2991471842376121, + "grad_norm": 0.8300535678863525, + "learning_rate": 4.970806792614081e-06, + "loss": 0.5439, + "step": 4069 + }, + { + "epoch": 0.299220702837818, + "grad_norm": 0.8814462423324585, + "learning_rate": 4.970792108865116e-06, + "loss": 0.5231, + "step": 4070 + }, + { + "epoch": 0.2992942214380238, + "grad_norm": 0.8278272747993469, + "learning_rate": 4.970777421445924e-06, + "loss": 0.5633, + "step": 4071 + }, + { + "epoch": 0.2993677400382297, + "grad_norm": 0.8238525986671448, + "learning_rate": 4.970762730356527e-06, + "loss": 0.5388, + "step": 4072 + }, + { + "epoch": 0.2994412586384355, + "grad_norm": 0.8191718459129333, + "learning_rate": 4.970748035596948e-06, + "loss": 0.5694, + "step": 4073 + }, + { + "epoch": 0.2995147772386414, + "grad_norm": 0.8556621074676514, + "learning_rate": 4.9707333371672075e-06, + "loss": 0.5646, + "step": 4074 + }, + { + "epoch": 0.2995882958388472, + "grad_norm": 0.9083544015884399, + "learning_rate": 4.970718635067328e-06, + "loss": 0.6152, + "step": 4075 + }, + { + "epoch": 0.2996618144390531, + "grad_norm": 0.8681440949440002, + "learning_rate": 4.970703929297331e-06, + "loss": 0.5608, + "step": 4076 + }, + { + "epoch": 0.2997353330392589, + "grad_norm": 0.8395824432373047, + "learning_rate": 4.970689219857239e-06, + "loss": 0.5637, + "step": 4077 + }, + { + "epoch": 0.2998088516394648, + "grad_norm": 0.9263561367988586, + "learning_rate": 4.970674506747074e-06, + "loss": 0.5806, + "step": 4078 + }, + { + "epoch": 0.2998823702396706, + "grad_norm": 0.8685020804405212, + "learning_rate": 4.970659789966856e-06, + "loss": 0.5791, + "step": 4079 + }, + { + "epoch": 0.2999558888398765, + "grad_norm": 0.8798215985298157, + "learning_rate": 4.97064506951661e-06, + "loss": 0.5324, + "step": 4080 + }, + { + "epoch": 0.30002940744008233, + "grad_norm": 0.8150950074195862, + "learning_rate": 4.970630345396354e-06, + "loss": 0.5622, + "step": 4081 + }, + { + "epoch": 0.3001029260402882, + "grad_norm": 0.9311608672142029, + "learning_rate": 4.970615617606114e-06, + "loss": 0.6461, + "step": 4082 + }, + { + "epoch": 0.30017644464049403, + "grad_norm": 0.872236430644989, + "learning_rate": 4.9706008861459086e-06, + "loss": 0.5796, + "step": 4083 + }, + { + "epoch": 0.3002499632406999, + "grad_norm": 0.8650315999984741, + "learning_rate": 4.9705861510157605e-06, + "loss": 0.5665, + "step": 4084 + }, + { + "epoch": 0.30032348184090574, + "grad_norm": 0.8436409831047058, + "learning_rate": 4.970571412215692e-06, + "loss": 0.5927, + "step": 4085 + }, + { + "epoch": 0.3003970004411116, + "grad_norm": 0.8626211285591125, + "learning_rate": 4.970556669745726e-06, + "loss": 0.5853, + "step": 4086 + }, + { + "epoch": 0.30047051904131744, + "grad_norm": 0.9225160479545593, + "learning_rate": 4.9705419236058825e-06, + "loss": 0.6189, + "step": 4087 + }, + { + "epoch": 0.3005440376415233, + "grad_norm": 0.8712136745452881, + "learning_rate": 4.970527173796185e-06, + "loss": 0.6118, + "step": 4088 + }, + { + "epoch": 0.30061755624172914, + "grad_norm": 0.8948162198066711, + "learning_rate": 4.970512420316654e-06, + "loss": 0.6257, + "step": 4089 + }, + { + "epoch": 0.300691074841935, + "grad_norm": 0.8661245703697205, + "learning_rate": 4.970497663167312e-06, + "loss": 0.6096, + "step": 4090 + }, + { + "epoch": 0.30076459344214085, + "grad_norm": 0.8612847924232483, + "learning_rate": 4.9704829023481815e-06, + "loss": 0.5738, + "step": 4091 + }, + { + "epoch": 0.30083811204234673, + "grad_norm": 0.9011790752410889, + "learning_rate": 4.970468137859284e-06, + "loss": 0.6231, + "step": 4092 + }, + { + "epoch": 0.30091163064255255, + "grad_norm": 0.909822940826416, + "learning_rate": 4.970453369700641e-06, + "loss": 0.5793, + "step": 4093 + }, + { + "epoch": 0.30098514924275843, + "grad_norm": 0.9311142563819885, + "learning_rate": 4.970438597872274e-06, + "loss": 0.5919, + "step": 4094 + }, + { + "epoch": 0.30105866784296426, + "grad_norm": 0.939483642578125, + "learning_rate": 4.9704238223742066e-06, + "loss": 0.578, + "step": 4095 + }, + { + "epoch": 0.30113218644317014, + "grad_norm": 0.8510898947715759, + "learning_rate": 4.97040904320646e-06, + "loss": 0.5626, + "step": 4096 + }, + { + "epoch": 0.30120570504337596, + "grad_norm": 0.9044508337974548, + "learning_rate": 4.9703942603690555e-06, + "loss": 0.5776, + "step": 4097 + }, + { + "epoch": 0.30127922364358184, + "grad_norm": 0.8619199991226196, + "learning_rate": 4.9703794738620155e-06, + "loss": 0.5749, + "step": 4098 + }, + { + "epoch": 0.30135274224378766, + "grad_norm": 0.9099642634391785, + "learning_rate": 4.970364683685364e-06, + "loss": 0.5704, + "step": 4099 + }, + { + "epoch": 0.30142626084399354, + "grad_norm": 0.8358913064002991, + "learning_rate": 4.970349889839119e-06, + "loss": 0.5562, + "step": 4100 + }, + { + "epoch": 0.30149977944419937, + "grad_norm": 0.8636524677276611, + "learning_rate": 4.970335092323305e-06, + "loss": 0.5748, + "step": 4101 + }, + { + "epoch": 0.30157329804440525, + "grad_norm": 0.8709864616394043, + "learning_rate": 4.970320291137943e-06, + "loss": 0.6251, + "step": 4102 + }, + { + "epoch": 0.30164681664461107, + "grad_norm": 0.8966766595840454, + "learning_rate": 4.970305486283057e-06, + "loss": 0.5664, + "step": 4103 + }, + { + "epoch": 0.30172033524481695, + "grad_norm": 0.8130505084991455, + "learning_rate": 4.970290677758666e-06, + "loss": 0.5343, + "step": 4104 + }, + { + "epoch": 0.3017938538450228, + "grad_norm": 0.90833580493927, + "learning_rate": 4.9702758655647945e-06, + "loss": 0.6284, + "step": 4105 + }, + { + "epoch": 0.30186737244522865, + "grad_norm": 0.8380922079086304, + "learning_rate": 4.970261049701463e-06, + "loss": 0.5424, + "step": 4106 + }, + { + "epoch": 0.3019408910454345, + "grad_norm": 0.8132293224334717, + "learning_rate": 4.970246230168694e-06, + "loss": 0.58, + "step": 4107 + }, + { + "epoch": 0.30201440964564036, + "grad_norm": 0.8923648595809937, + "learning_rate": 4.9702314069665096e-06, + "loss": 0.5626, + "step": 4108 + }, + { + "epoch": 0.3020879282458462, + "grad_norm": 0.8190730214118958, + "learning_rate": 4.970216580094931e-06, + "loss": 0.5562, + "step": 4109 + }, + { + "epoch": 0.30216144684605206, + "grad_norm": 0.8576318025588989, + "learning_rate": 4.970201749553981e-06, + "loss": 0.5569, + "step": 4110 + }, + { + "epoch": 0.3022349654462579, + "grad_norm": 0.8124940991401672, + "learning_rate": 4.970186915343682e-06, + "loss": 0.547, + "step": 4111 + }, + { + "epoch": 0.30230848404646377, + "grad_norm": 0.831725537776947, + "learning_rate": 4.970172077464056e-06, + "loss": 0.5581, + "step": 4112 + }, + { + "epoch": 0.3023820026466696, + "grad_norm": 0.8602932691574097, + "learning_rate": 4.9701572359151245e-06, + "loss": 0.5924, + "step": 4113 + }, + { + "epoch": 0.30245552124687547, + "grad_norm": 0.8904943466186523, + "learning_rate": 4.970142390696909e-06, + "loss": 0.5938, + "step": 4114 + }, + { + "epoch": 0.3025290398470813, + "grad_norm": 0.8519209623336792, + "learning_rate": 4.970127541809433e-06, + "loss": 0.5535, + "step": 4115 + }, + { + "epoch": 0.3026025584472872, + "grad_norm": 0.8669244050979614, + "learning_rate": 4.970112689252718e-06, + "loss": 0.6279, + "step": 4116 + }, + { + "epoch": 0.302676077047493, + "grad_norm": 0.8641859889030457, + "learning_rate": 4.970097833026785e-06, + "loss": 0.5823, + "step": 4117 + }, + { + "epoch": 0.3027495956476989, + "grad_norm": 0.8670492172241211, + "learning_rate": 4.970082973131658e-06, + "loss": 0.6082, + "step": 4118 + }, + { + "epoch": 0.3028231142479047, + "grad_norm": 0.8304994702339172, + "learning_rate": 4.970068109567357e-06, + "loss": 0.5416, + "step": 4119 + }, + { + "epoch": 0.3028966328481106, + "grad_norm": 0.8728642463684082, + "learning_rate": 4.970053242333905e-06, + "loss": 0.5412, + "step": 4120 + }, + { + "epoch": 0.3029701514483164, + "grad_norm": 0.8810554146766663, + "learning_rate": 4.970038371431325e-06, + "loss": 0.5589, + "step": 4121 + }, + { + "epoch": 0.3030436700485223, + "grad_norm": 0.8306745290756226, + "learning_rate": 4.970023496859638e-06, + "loss": 0.5679, + "step": 4122 + }, + { + "epoch": 0.3031171886487281, + "grad_norm": 0.864225447177887, + "learning_rate": 4.9700086186188655e-06, + "loss": 0.5453, + "step": 4123 + }, + { + "epoch": 0.303190707248934, + "grad_norm": 0.8899684548377991, + "learning_rate": 4.969993736709031e-06, + "loss": 0.6098, + "step": 4124 + }, + { + "epoch": 0.3032642258491398, + "grad_norm": 0.9102900624275208, + "learning_rate": 4.969978851130156e-06, + "loss": 0.5823, + "step": 4125 + }, + { + "epoch": 0.3033377444493457, + "grad_norm": 0.8339845538139343, + "learning_rate": 4.9699639618822635e-06, + "loss": 0.5584, + "step": 4126 + }, + { + "epoch": 0.3034112630495515, + "grad_norm": 0.8568351864814758, + "learning_rate": 4.969949068965374e-06, + "loss": 0.5784, + "step": 4127 + }, + { + "epoch": 0.3034847816497574, + "grad_norm": 0.850752592086792, + "learning_rate": 4.969934172379511e-06, + "loss": 0.5949, + "step": 4128 + }, + { + "epoch": 0.3035583002499632, + "grad_norm": 0.9008724689483643, + "learning_rate": 4.969919272124695e-06, + "loss": 0.6154, + "step": 4129 + }, + { + "epoch": 0.3036318188501691, + "grad_norm": 0.8519163131713867, + "learning_rate": 4.96990436820095e-06, + "loss": 0.5517, + "step": 4130 + }, + { + "epoch": 0.3037053374503749, + "grad_norm": 0.862023651599884, + "learning_rate": 4.969889460608297e-06, + "loss": 0.616, + "step": 4131 + }, + { + "epoch": 0.3037788560505808, + "grad_norm": 0.8745807409286499, + "learning_rate": 4.969874549346759e-06, + "loss": 0.5853, + "step": 4132 + }, + { + "epoch": 0.3038523746507866, + "grad_norm": 0.8876067996025085, + "learning_rate": 4.969859634416356e-06, + "loss": 0.5727, + "step": 4133 + }, + { + "epoch": 0.3039258932509925, + "grad_norm": 0.8753001093864441, + "learning_rate": 4.969844715817114e-06, + "loss": 0.6026, + "step": 4134 + }, + { + "epoch": 0.30399941185119833, + "grad_norm": 0.8532238006591797, + "learning_rate": 4.969829793549051e-06, + "loss": 0.5891, + "step": 4135 + }, + { + "epoch": 0.3040729304514042, + "grad_norm": 0.8163655400276184, + "learning_rate": 4.969814867612192e-06, + "loss": 0.5322, + "step": 4136 + }, + { + "epoch": 0.30414644905161003, + "grad_norm": 0.8838476538658142, + "learning_rate": 4.969799938006558e-06, + "loss": 0.5754, + "step": 4137 + }, + { + "epoch": 0.3042199676518159, + "grad_norm": 0.7983153462409973, + "learning_rate": 4.9697850047321715e-06, + "loss": 0.5156, + "step": 4138 + }, + { + "epoch": 0.30429348625202174, + "grad_norm": 0.8331690430641174, + "learning_rate": 4.969770067789055e-06, + "loss": 0.5563, + "step": 4139 + }, + { + "epoch": 0.3043670048522276, + "grad_norm": 0.8630769848823547, + "learning_rate": 4.9697551271772304e-06, + "loss": 0.5343, + "step": 4140 + }, + { + "epoch": 0.30444052345243344, + "grad_norm": 0.8715433478355408, + "learning_rate": 4.969740182896719e-06, + "loss": 0.6095, + "step": 4141 + }, + { + "epoch": 0.3045140420526393, + "grad_norm": 0.8720673322677612, + "learning_rate": 4.969725234947544e-06, + "loss": 0.5786, + "step": 4142 + }, + { + "epoch": 0.30458756065284515, + "grad_norm": 0.824607789516449, + "learning_rate": 4.969710283329728e-06, + "loss": 0.5567, + "step": 4143 + }, + { + "epoch": 0.304661079253051, + "grad_norm": 0.8784499168395996, + "learning_rate": 4.969695328043293e-06, + "loss": 0.57, + "step": 4144 + }, + { + "epoch": 0.30473459785325685, + "grad_norm": 0.8164864182472229, + "learning_rate": 4.969680369088259e-06, + "loss": 0.5937, + "step": 4145 + }, + { + "epoch": 0.30480811645346273, + "grad_norm": 0.9703261256217957, + "learning_rate": 4.969665406464651e-06, + "loss": 0.6099, + "step": 4146 + }, + { + "epoch": 0.30488163505366855, + "grad_norm": 0.8781460523605347, + "learning_rate": 4.969650440172491e-06, + "loss": 0.5952, + "step": 4147 + }, + { + "epoch": 0.30495515365387443, + "grad_norm": 0.8659241199493408, + "learning_rate": 4.9696354702118e-06, + "loss": 0.5888, + "step": 4148 + }, + { + "epoch": 0.30502867225408026, + "grad_norm": 0.8548066020011902, + "learning_rate": 4.9696204965826e-06, + "loss": 0.571, + "step": 4149 + }, + { + "epoch": 0.30510219085428614, + "grad_norm": 0.8678986430168152, + "learning_rate": 4.969605519284915e-06, + "loss": 0.5704, + "step": 4150 + }, + { + "epoch": 0.30517570945449196, + "grad_norm": 0.9218003153800964, + "learning_rate": 4.969590538318766e-06, + "loss": 0.5762, + "step": 4151 + }, + { + "epoch": 0.30524922805469784, + "grad_norm": 0.8501452803611755, + "learning_rate": 4.969575553684175e-06, + "loss": 0.5685, + "step": 4152 + }, + { + "epoch": 0.30532274665490367, + "grad_norm": 0.899420976638794, + "learning_rate": 4.9695605653811655e-06, + "loss": 0.6189, + "step": 4153 + }, + { + "epoch": 0.30539626525510954, + "grad_norm": 0.8911144733428955, + "learning_rate": 4.969545573409758e-06, + "loss": 0.5924, + "step": 4154 + }, + { + "epoch": 0.30546978385531537, + "grad_norm": 0.9279232621192932, + "learning_rate": 4.9695305777699765e-06, + "loss": 0.6404, + "step": 4155 + }, + { + "epoch": 0.30554330245552125, + "grad_norm": 0.8467051982879639, + "learning_rate": 4.969515578461842e-06, + "loss": 0.5882, + "step": 4156 + }, + { + "epoch": 0.3056168210557271, + "grad_norm": 0.9458869099617004, + "learning_rate": 4.9695005754853775e-06, + "loss": 0.593, + "step": 4157 + }, + { + "epoch": 0.30569033965593295, + "grad_norm": 0.8294467926025391, + "learning_rate": 4.9694855688406054e-06, + "loss": 0.5417, + "step": 4158 + }, + { + "epoch": 0.3057638582561388, + "grad_norm": 0.8747893571853638, + "learning_rate": 4.969470558527548e-06, + "loss": 0.6064, + "step": 4159 + }, + { + "epoch": 0.30583737685634466, + "grad_norm": 0.8651025295257568, + "learning_rate": 4.969455544546227e-06, + "loss": 0.5876, + "step": 4160 + }, + { + "epoch": 0.3059108954565505, + "grad_norm": 0.9037330746650696, + "learning_rate": 4.969440526896665e-06, + "loss": 0.6466, + "step": 4161 + }, + { + "epoch": 0.30598441405675636, + "grad_norm": 0.8246637582778931, + "learning_rate": 4.969425505578884e-06, + "loss": 0.5302, + "step": 4162 + }, + { + "epoch": 0.3060579326569622, + "grad_norm": 0.8631872534751892, + "learning_rate": 4.969410480592907e-06, + "loss": 0.5546, + "step": 4163 + }, + { + "epoch": 0.30613145125716806, + "grad_norm": 0.8858150839805603, + "learning_rate": 4.969395451938756e-06, + "loss": 0.5516, + "step": 4164 + }, + { + "epoch": 0.30620496985737394, + "grad_norm": 0.8871409893035889, + "learning_rate": 4.969380419616453e-06, + "loss": 0.5499, + "step": 4165 + }, + { + "epoch": 0.30627848845757977, + "grad_norm": 0.8506675958633423, + "learning_rate": 4.969365383626021e-06, + "loss": 0.5804, + "step": 4166 + }, + { + "epoch": 0.30635200705778565, + "grad_norm": 0.8373497724533081, + "learning_rate": 4.969350343967482e-06, + "loss": 0.5658, + "step": 4167 + }, + { + "epoch": 0.30642552565799147, + "grad_norm": 0.846997082233429, + "learning_rate": 4.969335300640858e-06, + "loss": 0.5445, + "step": 4168 + }, + { + "epoch": 0.30649904425819735, + "grad_norm": 0.9178855419158936, + "learning_rate": 4.9693202536461715e-06, + "loss": 0.614, + "step": 4169 + }, + { + "epoch": 0.3065725628584032, + "grad_norm": 0.8897877931594849, + "learning_rate": 4.969305202983445e-06, + "loss": 0.6418, + "step": 4170 + }, + { + "epoch": 0.30664608145860905, + "grad_norm": 0.8682242631912231, + "learning_rate": 4.969290148652701e-06, + "loss": 0.6082, + "step": 4171 + }, + { + "epoch": 0.3067196000588149, + "grad_norm": 0.9100226163864136, + "learning_rate": 4.969275090653962e-06, + "loss": 0.5692, + "step": 4172 + }, + { + "epoch": 0.30679311865902076, + "grad_norm": 0.8395865559577942, + "learning_rate": 4.969260028987249e-06, + "loss": 0.5411, + "step": 4173 + }, + { + "epoch": 0.3068666372592266, + "grad_norm": 0.8181416392326355, + "learning_rate": 4.969244963652587e-06, + "loss": 0.5138, + "step": 4174 + }, + { + "epoch": 0.30694015585943246, + "grad_norm": 0.8720656037330627, + "learning_rate": 4.969229894649995e-06, + "loss": 0.6043, + "step": 4175 + }, + { + "epoch": 0.3070136744596383, + "grad_norm": 0.8513621091842651, + "learning_rate": 4.969214821979499e-06, + "loss": 0.5293, + "step": 4176 + }, + { + "epoch": 0.30708719305984417, + "grad_norm": 0.8411255478858948, + "learning_rate": 4.969199745641118e-06, + "loss": 0.5876, + "step": 4177 + }, + { + "epoch": 0.30716071166005, + "grad_norm": 0.829836905002594, + "learning_rate": 4.969184665634878e-06, + "loss": 0.5531, + "step": 4178 + }, + { + "epoch": 0.30723423026025587, + "grad_norm": 0.8799328804016113, + "learning_rate": 4.969169581960797e-06, + "loss": 0.5958, + "step": 4179 + }, + { + "epoch": 0.3073077488604617, + "grad_norm": 0.8788072466850281, + "learning_rate": 4.969154494618902e-06, + "loss": 0.5803, + "step": 4180 + }, + { + "epoch": 0.3073812674606676, + "grad_norm": 0.8041824102401733, + "learning_rate": 4.969139403609211e-06, + "loss": 0.5476, + "step": 4181 + }, + { + "epoch": 0.3074547860608734, + "grad_norm": 0.8668938875198364, + "learning_rate": 4.969124308931751e-06, + "loss": 0.5934, + "step": 4182 + }, + { + "epoch": 0.3075283046610793, + "grad_norm": 0.8576663732528687, + "learning_rate": 4.96910921058654e-06, + "loss": 0.5664, + "step": 4183 + }, + { + "epoch": 0.3076018232612851, + "grad_norm": 0.8906913995742798, + "learning_rate": 4.969094108573603e-06, + "loss": 0.5686, + "step": 4184 + }, + { + "epoch": 0.307675341861491, + "grad_norm": 0.8220974206924438, + "learning_rate": 4.969079002892963e-06, + "loss": 0.5538, + "step": 4185 + }, + { + "epoch": 0.3077488604616968, + "grad_norm": 0.8316819667816162, + "learning_rate": 4.96906389354464e-06, + "loss": 0.5997, + "step": 4186 + }, + { + "epoch": 0.3078223790619027, + "grad_norm": 0.8128035068511963, + "learning_rate": 4.969048780528658e-06, + "loss": 0.583, + "step": 4187 + }, + { + "epoch": 0.3078958976621085, + "grad_norm": 0.8777484893798828, + "learning_rate": 4.96903366384504e-06, + "loss": 0.5729, + "step": 4188 + }, + { + "epoch": 0.3079694162623144, + "grad_norm": 0.8515799045562744, + "learning_rate": 4.969018543493807e-06, + "loss": 0.5456, + "step": 4189 + }, + { + "epoch": 0.3080429348625202, + "grad_norm": 0.8088400363922119, + "learning_rate": 4.969003419474982e-06, + "loss": 0.4929, + "step": 4190 + }, + { + "epoch": 0.3081164534627261, + "grad_norm": 0.8644422888755798, + "learning_rate": 4.968988291788589e-06, + "loss": 0.572, + "step": 4191 + }, + { + "epoch": 0.3081899720629319, + "grad_norm": 0.9000773429870605, + "learning_rate": 4.9689731604346484e-06, + "loss": 0.5872, + "step": 4192 + }, + { + "epoch": 0.3082634906631378, + "grad_norm": 0.8297194242477417, + "learning_rate": 4.9689580254131825e-06, + "loss": 0.5639, + "step": 4193 + }, + { + "epoch": 0.3083370092633436, + "grad_norm": 0.9193030595779419, + "learning_rate": 4.968942886724215e-06, + "loss": 0.6288, + "step": 4194 + }, + { + "epoch": 0.3084105278635495, + "grad_norm": 0.8552859425544739, + "learning_rate": 4.968927744367769e-06, + "loss": 0.5578, + "step": 4195 + }, + { + "epoch": 0.3084840464637553, + "grad_norm": 0.8944327235221863, + "learning_rate": 4.968912598343865e-06, + "loss": 0.5566, + "step": 4196 + }, + { + "epoch": 0.3085575650639612, + "grad_norm": 0.8319527506828308, + "learning_rate": 4.968897448652526e-06, + "loss": 0.5811, + "step": 4197 + }, + { + "epoch": 0.308631083664167, + "grad_norm": 0.9216459393501282, + "learning_rate": 4.968882295293776e-06, + "loss": 0.6146, + "step": 4198 + }, + { + "epoch": 0.3087046022643729, + "grad_norm": 0.8879275918006897, + "learning_rate": 4.968867138267637e-06, + "loss": 0.5742, + "step": 4199 + }, + { + "epoch": 0.30877812086457873, + "grad_norm": 0.872942328453064, + "learning_rate": 4.9688519775741305e-06, + "loss": 0.5661, + "step": 4200 + }, + { + "epoch": 0.3088516394647846, + "grad_norm": 0.8495293259620667, + "learning_rate": 4.968836813213279e-06, + "loss": 0.5912, + "step": 4201 + }, + { + "epoch": 0.30892515806499043, + "grad_norm": 0.8977748155593872, + "learning_rate": 4.968821645185107e-06, + "loss": 0.5868, + "step": 4202 + }, + { + "epoch": 0.3089986766651963, + "grad_norm": 0.8489415645599365, + "learning_rate": 4.968806473489635e-06, + "loss": 0.5522, + "step": 4203 + }, + { + "epoch": 0.30907219526540214, + "grad_norm": 0.8473638296127319, + "learning_rate": 4.968791298126885e-06, + "loss": 0.5842, + "step": 4204 + }, + { + "epoch": 0.309145713865608, + "grad_norm": 0.8768849968910217, + "learning_rate": 4.968776119096882e-06, + "loss": 0.6098, + "step": 4205 + }, + { + "epoch": 0.30921923246581384, + "grad_norm": 0.8667110800743103, + "learning_rate": 4.968760936399647e-06, + "loss": 0.5957, + "step": 4206 + }, + { + "epoch": 0.3092927510660197, + "grad_norm": 0.8903204202651978, + "learning_rate": 4.968745750035202e-06, + "loss": 0.5754, + "step": 4207 + }, + { + "epoch": 0.30936626966622555, + "grad_norm": 0.9059459567070007, + "learning_rate": 4.968730560003571e-06, + "loss": 0.5835, + "step": 4208 + }, + { + "epoch": 0.3094397882664314, + "grad_norm": 0.8288982510566711, + "learning_rate": 4.968715366304777e-06, + "loss": 0.5801, + "step": 4209 + }, + { + "epoch": 0.30951330686663725, + "grad_norm": 0.8429561257362366, + "learning_rate": 4.9687001689388405e-06, + "loss": 0.5492, + "step": 4210 + }, + { + "epoch": 0.30958682546684313, + "grad_norm": 0.8254418969154358, + "learning_rate": 4.968684967905785e-06, + "loss": 0.5327, + "step": 4211 + }, + { + "epoch": 0.30966034406704895, + "grad_norm": 0.8867694139480591, + "learning_rate": 4.968669763205633e-06, + "loss": 0.5662, + "step": 4212 + }, + { + "epoch": 0.30973386266725483, + "grad_norm": 0.8460152745246887, + "learning_rate": 4.968654554838408e-06, + "loss": 0.5811, + "step": 4213 + }, + { + "epoch": 0.30980738126746066, + "grad_norm": 0.8595378994941711, + "learning_rate": 4.9686393428041315e-06, + "loss": 0.5478, + "step": 4214 + }, + { + "epoch": 0.30988089986766654, + "grad_norm": 0.8160780668258667, + "learning_rate": 4.968624127102826e-06, + "loss": 0.5362, + "step": 4215 + }, + { + "epoch": 0.30995441846787236, + "grad_norm": 0.9387231469154358, + "learning_rate": 4.968608907734514e-06, + "loss": 0.5635, + "step": 4216 + }, + { + "epoch": 0.31002793706807824, + "grad_norm": 0.8828626275062561, + "learning_rate": 4.96859368469922e-06, + "loss": 0.5791, + "step": 4217 + }, + { + "epoch": 0.31010145566828407, + "grad_norm": 0.8360896706581116, + "learning_rate": 4.968578457996966e-06, + "loss": 0.5836, + "step": 4218 + }, + { + "epoch": 0.31017497426848994, + "grad_norm": 0.8799370527267456, + "learning_rate": 4.9685632276277716e-06, + "loss": 0.5491, + "step": 4219 + }, + { + "epoch": 0.31024849286869577, + "grad_norm": 0.8168659806251526, + "learning_rate": 4.968547993591663e-06, + "loss": 0.5536, + "step": 4220 + }, + { + "epoch": 0.31032201146890165, + "grad_norm": 0.8592742085456848, + "learning_rate": 4.968532755888661e-06, + "loss": 0.5353, + "step": 4221 + }, + { + "epoch": 0.3103955300691075, + "grad_norm": 0.833655059337616, + "learning_rate": 4.968517514518789e-06, + "loss": 0.5228, + "step": 4222 + }, + { + "epoch": 0.31046904866931335, + "grad_norm": 0.8754260540008545, + "learning_rate": 4.96850226948207e-06, + "loss": 0.5737, + "step": 4223 + }, + { + "epoch": 0.3105425672695192, + "grad_norm": 0.889998197555542, + "learning_rate": 4.968487020778525e-06, + "loss": 0.5802, + "step": 4224 + }, + { + "epoch": 0.31061608586972506, + "grad_norm": 0.9433203339576721, + "learning_rate": 4.9684717684081784e-06, + "loss": 0.6469, + "step": 4225 + }, + { + "epoch": 0.3106896044699309, + "grad_norm": 0.8685833215713501, + "learning_rate": 4.968456512371052e-06, + "loss": 0.6098, + "step": 4226 + }, + { + "epoch": 0.31076312307013676, + "grad_norm": 0.9112816452980042, + "learning_rate": 4.968441252667169e-06, + "loss": 0.5981, + "step": 4227 + }, + { + "epoch": 0.3108366416703426, + "grad_norm": 0.8609490394592285, + "learning_rate": 4.968425989296551e-06, + "loss": 0.572, + "step": 4228 + }, + { + "epoch": 0.31091016027054846, + "grad_norm": 0.9431452751159668, + "learning_rate": 4.968410722259221e-06, + "loss": 0.6161, + "step": 4229 + }, + { + "epoch": 0.3109836788707543, + "grad_norm": 0.900547981262207, + "learning_rate": 4.968395451555203e-06, + "loss": 0.5687, + "step": 4230 + }, + { + "epoch": 0.31105719747096017, + "grad_norm": 0.8997505307197571, + "learning_rate": 4.968380177184519e-06, + "loss": 0.5842, + "step": 4231 + }, + { + "epoch": 0.311130716071166, + "grad_norm": 0.8542223572731018, + "learning_rate": 4.96836489914719e-06, + "loss": 0.5518, + "step": 4232 + }, + { + "epoch": 0.31120423467137187, + "grad_norm": 0.9480488896369934, + "learning_rate": 4.968349617443241e-06, + "loss": 0.6048, + "step": 4233 + }, + { + "epoch": 0.3112777532715777, + "grad_norm": 0.8662374019622803, + "learning_rate": 4.968334332072694e-06, + "loss": 0.5592, + "step": 4234 + }, + { + "epoch": 0.3113512718717836, + "grad_norm": 0.8216953277587891, + "learning_rate": 4.968319043035571e-06, + "loss": 0.5781, + "step": 4235 + }, + { + "epoch": 0.3114247904719894, + "grad_norm": 0.872084379196167, + "learning_rate": 4.968303750331895e-06, + "loss": 0.5922, + "step": 4236 + }, + { + "epoch": 0.3114983090721953, + "grad_norm": 0.8492376208305359, + "learning_rate": 4.96828845396169e-06, + "loss": 0.5493, + "step": 4237 + }, + { + "epoch": 0.3115718276724011, + "grad_norm": 0.8495681285858154, + "learning_rate": 4.968273153924977e-06, + "loss": 0.5844, + "step": 4238 + }, + { + "epoch": 0.311645346272607, + "grad_norm": 0.853556215763092, + "learning_rate": 4.968257850221779e-06, + "loss": 0.549, + "step": 4239 + }, + { + "epoch": 0.3117188648728128, + "grad_norm": 0.8951748013496399, + "learning_rate": 4.96824254285212e-06, + "loss": 0.5347, + "step": 4240 + }, + { + "epoch": 0.3117923834730187, + "grad_norm": 0.889530599117279, + "learning_rate": 4.968227231816021e-06, + "loss": 0.5703, + "step": 4241 + }, + { + "epoch": 0.3118659020732245, + "grad_norm": 0.8587749600410461, + "learning_rate": 4.968211917113506e-06, + "loss": 0.5805, + "step": 4242 + }, + { + "epoch": 0.3119394206734304, + "grad_norm": 0.9179521203041077, + "learning_rate": 4.968196598744598e-06, + "loss": 0.6175, + "step": 4243 + }, + { + "epoch": 0.3120129392736362, + "grad_norm": 0.8336641192436218, + "learning_rate": 4.9681812767093185e-06, + "loss": 0.578, + "step": 4244 + }, + { + "epoch": 0.3120864578738421, + "grad_norm": 0.8537348508834839, + "learning_rate": 4.968165951007691e-06, + "loss": 0.5437, + "step": 4245 + }, + { + "epoch": 0.3121599764740479, + "grad_norm": 0.8232987523078918, + "learning_rate": 4.968150621639737e-06, + "loss": 0.507, + "step": 4246 + }, + { + "epoch": 0.3122334950742538, + "grad_norm": 0.8402501344680786, + "learning_rate": 4.968135288605482e-06, + "loss": 0.5685, + "step": 4247 + }, + { + "epoch": 0.3123070136744596, + "grad_norm": 0.8948465585708618, + "learning_rate": 4.968119951904946e-06, + "loss": 0.5587, + "step": 4248 + }, + { + "epoch": 0.3123805322746655, + "grad_norm": 0.8223844766616821, + "learning_rate": 4.968104611538154e-06, + "loss": 0.5463, + "step": 4249 + }, + { + "epoch": 0.3124540508748713, + "grad_norm": 0.8118067979812622, + "learning_rate": 4.968089267505127e-06, + "loss": 0.5317, + "step": 4250 + }, + { + "epoch": 0.3125275694750772, + "grad_norm": 0.8117496371269226, + "learning_rate": 4.968073919805889e-06, + "loss": 0.5533, + "step": 4251 + }, + { + "epoch": 0.31260108807528303, + "grad_norm": 0.9400447607040405, + "learning_rate": 4.968058568440463e-06, + "loss": 0.6146, + "step": 4252 + }, + { + "epoch": 0.3126746066754889, + "grad_norm": 0.9014922380447388, + "learning_rate": 4.968043213408869e-06, + "loss": 0.6007, + "step": 4253 + }, + { + "epoch": 0.31274812527569473, + "grad_norm": 0.8490110039710999, + "learning_rate": 4.9680278547111334e-06, + "loss": 0.5391, + "step": 4254 + }, + { + "epoch": 0.3128216438759006, + "grad_norm": 0.8612743020057678, + "learning_rate": 4.968012492347278e-06, + "loss": 0.5854, + "step": 4255 + }, + { + "epoch": 0.31289516247610644, + "grad_norm": 0.8154244422912598, + "learning_rate": 4.9679971263173244e-06, + "loss": 0.5612, + "step": 4256 + }, + { + "epoch": 0.3129686810763123, + "grad_norm": 0.8516013622283936, + "learning_rate": 4.967981756621296e-06, + "loss": 0.5599, + "step": 4257 + }, + { + "epoch": 0.31304219967651814, + "grad_norm": 0.8938354849815369, + "learning_rate": 4.967966383259216e-06, + "loss": 0.5748, + "step": 4258 + }, + { + "epoch": 0.313115718276724, + "grad_norm": 0.8927688598632812, + "learning_rate": 4.967951006231108e-06, + "loss": 0.6386, + "step": 4259 + }, + { + "epoch": 0.31318923687692984, + "grad_norm": 0.8860495090484619, + "learning_rate": 4.967935625536993e-06, + "loss": 0.5953, + "step": 4260 + }, + { + "epoch": 0.3132627554771357, + "grad_norm": 0.913967490196228, + "learning_rate": 4.967920241176895e-06, + "loss": 0.6263, + "step": 4261 + }, + { + "epoch": 0.31333627407734155, + "grad_norm": 0.860304057598114, + "learning_rate": 4.967904853150837e-06, + "loss": 0.5574, + "step": 4262 + }, + { + "epoch": 0.3134097926775474, + "grad_norm": 0.830289363861084, + "learning_rate": 4.967889461458841e-06, + "loss": 0.5376, + "step": 4263 + }, + { + "epoch": 0.31348331127775325, + "grad_norm": 0.9061805009841919, + "learning_rate": 4.967874066100931e-06, + "loss": 0.5876, + "step": 4264 + }, + { + "epoch": 0.31355682987795913, + "grad_norm": 0.8795597553253174, + "learning_rate": 4.967858667077128e-06, + "loss": 0.5873, + "step": 4265 + }, + { + "epoch": 0.31363034847816496, + "grad_norm": 0.8691571354866028, + "learning_rate": 4.967843264387457e-06, + "loss": 0.5733, + "step": 4266 + }, + { + "epoch": 0.31370386707837083, + "grad_norm": 0.8538479208946228, + "learning_rate": 4.967827858031939e-06, + "loss": 0.5654, + "step": 4267 + }, + { + "epoch": 0.31377738567857666, + "grad_norm": 0.8648941516876221, + "learning_rate": 4.967812448010599e-06, + "loss": 0.5428, + "step": 4268 + }, + { + "epoch": 0.31385090427878254, + "grad_norm": 0.8801499605178833, + "learning_rate": 4.967797034323458e-06, + "loss": 0.5626, + "step": 4269 + }, + { + "epoch": 0.31392442287898836, + "grad_norm": 0.8845944404602051, + "learning_rate": 4.967781616970541e-06, + "loss": 0.598, + "step": 4270 + }, + { + "epoch": 0.31399794147919424, + "grad_norm": 0.870485246181488, + "learning_rate": 4.967766195951868e-06, + "loss": 0.5909, + "step": 4271 + }, + { + "epoch": 0.31407146007940007, + "grad_norm": 0.8380717039108276, + "learning_rate": 4.9677507712674646e-06, + "loss": 0.5523, + "step": 4272 + }, + { + "epoch": 0.31414497867960595, + "grad_norm": 0.8502963781356812, + "learning_rate": 4.9677353429173515e-06, + "loss": 0.5546, + "step": 4273 + }, + { + "epoch": 0.31421849727981177, + "grad_norm": 0.8489350080490112, + "learning_rate": 4.9677199109015525e-06, + "loss": 0.5527, + "step": 4274 + }, + { + "epoch": 0.31429201588001765, + "grad_norm": 0.8454988598823547, + "learning_rate": 4.967704475220092e-06, + "loss": 0.575, + "step": 4275 + }, + { + "epoch": 0.3143655344802235, + "grad_norm": 0.8594914078712463, + "learning_rate": 4.96768903587299e-06, + "loss": 0.6097, + "step": 4276 + }, + { + "epoch": 0.31443905308042935, + "grad_norm": 0.8728494048118591, + "learning_rate": 4.967673592860273e-06, + "loss": 0.5393, + "step": 4277 + }, + { + "epoch": 0.3145125716806352, + "grad_norm": 0.8073095679283142, + "learning_rate": 4.967658146181961e-06, + "loss": 0.5824, + "step": 4278 + }, + { + "epoch": 0.31458609028084106, + "grad_norm": 0.8817291259765625, + "learning_rate": 4.967642695838077e-06, + "loss": 0.564, + "step": 4279 + }, + { + "epoch": 0.3146596088810469, + "grad_norm": 0.8657675385475159, + "learning_rate": 4.967627241828646e-06, + "loss": 0.5671, + "step": 4280 + }, + { + "epoch": 0.31473312748125276, + "grad_norm": 0.862373411655426, + "learning_rate": 4.96761178415369e-06, + "loss": 0.5955, + "step": 4281 + }, + { + "epoch": 0.3148066460814586, + "grad_norm": 0.9014101028442383, + "learning_rate": 4.967596322813232e-06, + "loss": 0.583, + "step": 4282 + }, + { + "epoch": 0.31488016468166447, + "grad_norm": 0.8302887678146362, + "learning_rate": 4.9675808578072936e-06, + "loss": 0.5729, + "step": 4283 + }, + { + "epoch": 0.3149536832818703, + "grad_norm": 0.8861685395240784, + "learning_rate": 4.9675653891359e-06, + "loss": 0.5826, + "step": 4284 + }, + { + "epoch": 0.31502720188207617, + "grad_norm": 0.8728423714637756, + "learning_rate": 4.967549916799073e-06, + "loss": 0.5714, + "step": 4285 + }, + { + "epoch": 0.315100720482282, + "grad_norm": 0.8841899037361145, + "learning_rate": 4.967534440796835e-06, + "loss": 0.5834, + "step": 4286 + }, + { + "epoch": 0.3151742390824879, + "grad_norm": 0.8487553596496582, + "learning_rate": 4.967518961129211e-06, + "loss": 0.5399, + "step": 4287 + }, + { + "epoch": 0.3152477576826937, + "grad_norm": 0.8891316056251526, + "learning_rate": 4.967503477796222e-06, + "loss": 0.5988, + "step": 4288 + }, + { + "epoch": 0.3153212762828996, + "grad_norm": 0.9384140372276306, + "learning_rate": 4.967487990797892e-06, + "loss": 0.6359, + "step": 4289 + }, + { + "epoch": 0.3153947948831054, + "grad_norm": 0.9215573072433472, + "learning_rate": 4.967472500134243e-06, + "loss": 0.6096, + "step": 4290 + }, + { + "epoch": 0.3154683134833113, + "grad_norm": 0.7984583973884583, + "learning_rate": 4.967457005805298e-06, + "loss": 0.5319, + "step": 4291 + }, + { + "epoch": 0.3155418320835171, + "grad_norm": 0.8587648272514343, + "learning_rate": 4.967441507811082e-06, + "loss": 0.5561, + "step": 4292 + }, + { + "epoch": 0.315615350683723, + "grad_norm": 0.9402391314506531, + "learning_rate": 4.967426006151617e-06, + "loss": 0.6185, + "step": 4293 + }, + { + "epoch": 0.3156888692839288, + "grad_norm": 0.8607705235481262, + "learning_rate": 4.967410500826925e-06, + "loss": 0.5335, + "step": 4294 + }, + { + "epoch": 0.3157623878841347, + "grad_norm": 0.8215501308441162, + "learning_rate": 4.96739499183703e-06, + "loss": 0.5831, + "step": 4295 + }, + { + "epoch": 0.3158359064843405, + "grad_norm": 0.8589345216751099, + "learning_rate": 4.967379479181955e-06, + "loss": 0.5553, + "step": 4296 + }, + { + "epoch": 0.3159094250845464, + "grad_norm": 0.8546443581581116, + "learning_rate": 4.9673639628617235e-06, + "loss": 0.5894, + "step": 4297 + }, + { + "epoch": 0.3159829436847522, + "grad_norm": 0.9341171383857727, + "learning_rate": 4.967348442876357e-06, + "loss": 0.6485, + "step": 4298 + }, + { + "epoch": 0.3160564622849581, + "grad_norm": 0.855554461479187, + "learning_rate": 4.9673329192258795e-06, + "loss": 0.55, + "step": 4299 + }, + { + "epoch": 0.3161299808851639, + "grad_norm": 0.906363308429718, + "learning_rate": 4.967317391910315e-06, + "loss": 0.5785, + "step": 4300 + }, + { + "epoch": 0.3162034994853698, + "grad_norm": 0.8516570329666138, + "learning_rate": 4.967301860929684e-06, + "loss": 0.5504, + "step": 4301 + }, + { + "epoch": 0.3162770180855756, + "grad_norm": 0.8167484998703003, + "learning_rate": 4.967286326284012e-06, + "loss": 0.5488, + "step": 4302 + }, + { + "epoch": 0.3163505366857815, + "grad_norm": 0.8689941763877869, + "learning_rate": 4.967270787973322e-06, + "loss": 0.5348, + "step": 4303 + }, + { + "epoch": 0.3164240552859874, + "grad_norm": 0.8543239831924438, + "learning_rate": 4.967255245997636e-06, + "loss": 0.5454, + "step": 4304 + }, + { + "epoch": 0.3164975738861932, + "grad_norm": 0.8930913805961609, + "learning_rate": 4.967239700356977e-06, + "loss": 0.5711, + "step": 4305 + }, + { + "epoch": 0.3165710924863991, + "grad_norm": 0.8341824412345886, + "learning_rate": 4.967224151051369e-06, + "loss": 0.5835, + "step": 4306 + }, + { + "epoch": 0.3166446110866049, + "grad_norm": 0.9031514525413513, + "learning_rate": 4.967208598080835e-06, + "loss": 0.6261, + "step": 4307 + }, + { + "epoch": 0.3167181296868108, + "grad_norm": 0.8105534911155701, + "learning_rate": 4.967193041445396e-06, + "loss": 0.5273, + "step": 4308 + }, + { + "epoch": 0.3167916482870166, + "grad_norm": 0.8350682854652405, + "learning_rate": 4.9671774811450785e-06, + "loss": 0.5571, + "step": 4309 + }, + { + "epoch": 0.3168651668872225, + "grad_norm": 0.8588230609893799, + "learning_rate": 4.967161917179903e-06, + "loss": 0.5673, + "step": 4310 + }, + { + "epoch": 0.3169386854874283, + "grad_norm": 0.8686412572860718, + "learning_rate": 4.9671463495498935e-06, + "loss": 0.5716, + "step": 4311 + }, + { + "epoch": 0.3170122040876342, + "grad_norm": 0.9037911891937256, + "learning_rate": 4.9671307782550745e-06, + "loss": 0.6198, + "step": 4312 + }, + { + "epoch": 0.31708572268784, + "grad_norm": 0.8791512250900269, + "learning_rate": 4.967115203295468e-06, + "loss": 0.5641, + "step": 4313 + }, + { + "epoch": 0.3171592412880459, + "grad_norm": 0.8672962784767151, + "learning_rate": 4.967099624671095e-06, + "loss": 0.5713, + "step": 4314 + }, + { + "epoch": 0.3172327598882517, + "grad_norm": 0.8539873361587524, + "learning_rate": 4.967084042381981e-06, + "loss": 0.5635, + "step": 4315 + }, + { + "epoch": 0.3173062784884576, + "grad_norm": 0.8681338429450989, + "learning_rate": 4.96706845642815e-06, + "loss": 0.6041, + "step": 4316 + }, + { + "epoch": 0.31737979708866343, + "grad_norm": 0.9116312861442566, + "learning_rate": 4.967052866809623e-06, + "loss": 0.5961, + "step": 4317 + }, + { + "epoch": 0.3174533156888693, + "grad_norm": 0.8770429491996765, + "learning_rate": 4.967037273526424e-06, + "loss": 0.6002, + "step": 4318 + }, + { + "epoch": 0.31752683428907513, + "grad_norm": 0.8504251837730408, + "learning_rate": 4.967021676578576e-06, + "loss": 0.5244, + "step": 4319 + }, + { + "epoch": 0.317600352889281, + "grad_norm": 0.8564139008522034, + "learning_rate": 4.967006075966103e-06, + "loss": 0.5625, + "step": 4320 + }, + { + "epoch": 0.31767387148948684, + "grad_norm": 0.8673343658447266, + "learning_rate": 4.966990471689028e-06, + "loss": 0.5637, + "step": 4321 + }, + { + "epoch": 0.3177473900896927, + "grad_norm": 0.8485369086265564, + "learning_rate": 4.966974863747373e-06, + "loss": 0.5452, + "step": 4322 + }, + { + "epoch": 0.31782090868989854, + "grad_norm": 0.921635091304779, + "learning_rate": 4.966959252141162e-06, + "loss": 0.5991, + "step": 4323 + }, + { + "epoch": 0.3178944272901044, + "grad_norm": 0.8578601479530334, + "learning_rate": 4.966943636870418e-06, + "loss": 0.5671, + "step": 4324 + }, + { + "epoch": 0.31796794589031024, + "grad_norm": 0.9600050449371338, + "learning_rate": 4.966928017935165e-06, + "loss": 0.5635, + "step": 4325 + }, + { + "epoch": 0.3180414644905161, + "grad_norm": 0.8449795842170715, + "learning_rate": 4.966912395335425e-06, + "loss": 0.5649, + "step": 4326 + }, + { + "epoch": 0.31811498309072195, + "grad_norm": 0.8265968561172485, + "learning_rate": 4.966896769071221e-06, + "loss": 0.598, + "step": 4327 + }, + { + "epoch": 0.3181885016909278, + "grad_norm": 0.8759956955909729, + "learning_rate": 4.966881139142579e-06, + "loss": 0.5463, + "step": 4328 + }, + { + "epoch": 0.31826202029113365, + "grad_norm": 0.853374183177948, + "learning_rate": 4.966865505549519e-06, + "loss": 0.5803, + "step": 4329 + }, + { + "epoch": 0.31833553889133953, + "grad_norm": 0.8422298431396484, + "learning_rate": 4.966849868292064e-06, + "loss": 0.5846, + "step": 4330 + }, + { + "epoch": 0.31840905749154536, + "grad_norm": 0.9003772139549255, + "learning_rate": 4.966834227370241e-06, + "loss": 0.5787, + "step": 4331 + }, + { + "epoch": 0.31848257609175123, + "grad_norm": 0.8767066597938538, + "learning_rate": 4.966818582784069e-06, + "loss": 0.5996, + "step": 4332 + }, + { + "epoch": 0.31855609469195706, + "grad_norm": 0.9034563899040222, + "learning_rate": 4.9668029345335735e-06, + "loss": 0.5979, + "step": 4333 + }, + { + "epoch": 0.31862961329216294, + "grad_norm": 0.8921782374382019, + "learning_rate": 4.966787282618778e-06, + "loss": 0.5925, + "step": 4334 + }, + { + "epoch": 0.31870313189236876, + "grad_norm": 0.8330830335617065, + "learning_rate": 4.966771627039705e-06, + "loss": 0.5546, + "step": 4335 + }, + { + "epoch": 0.31877665049257464, + "grad_norm": 0.8667590618133545, + "learning_rate": 4.9667559677963775e-06, + "loss": 0.5804, + "step": 4336 + }, + { + "epoch": 0.31885016909278047, + "grad_norm": 0.9329108595848083, + "learning_rate": 4.966740304888819e-06, + "loss": 0.6013, + "step": 4337 + }, + { + "epoch": 0.31892368769298635, + "grad_norm": 0.8841165900230408, + "learning_rate": 4.966724638317052e-06, + "loss": 0.5769, + "step": 4338 + }, + { + "epoch": 0.31899720629319217, + "grad_norm": 0.9331912398338318, + "learning_rate": 4.966708968081102e-06, + "loss": 0.5874, + "step": 4339 + }, + { + "epoch": 0.31907072489339805, + "grad_norm": 0.8745338320732117, + "learning_rate": 4.9666932941809905e-06, + "loss": 0.5561, + "step": 4340 + }, + { + "epoch": 0.3191442434936039, + "grad_norm": 0.849960446357727, + "learning_rate": 4.966677616616741e-06, + "loss": 0.5594, + "step": 4341 + }, + { + "epoch": 0.31921776209380975, + "grad_norm": 0.879187285900116, + "learning_rate": 4.966661935388377e-06, + "loss": 0.6092, + "step": 4342 + }, + { + "epoch": 0.3192912806940156, + "grad_norm": 0.8038995265960693, + "learning_rate": 4.9666462504959216e-06, + "loss": 0.5588, + "step": 4343 + }, + { + "epoch": 0.31936479929422146, + "grad_norm": 0.9280381798744202, + "learning_rate": 4.966630561939399e-06, + "loss": 0.6307, + "step": 4344 + }, + { + "epoch": 0.3194383178944273, + "grad_norm": 0.8880375623703003, + "learning_rate": 4.966614869718831e-06, + "loss": 0.5588, + "step": 4345 + }, + { + "epoch": 0.31951183649463316, + "grad_norm": 0.8578158617019653, + "learning_rate": 4.966599173834242e-06, + "loss": 0.558, + "step": 4346 + }, + { + "epoch": 0.319585355094839, + "grad_norm": 0.8280487060546875, + "learning_rate": 4.966583474285655e-06, + "loss": 0.5585, + "step": 4347 + }, + { + "epoch": 0.31965887369504487, + "grad_norm": 0.8998667597770691, + "learning_rate": 4.966567771073093e-06, + "loss": 0.5975, + "step": 4348 + }, + { + "epoch": 0.3197323922952507, + "grad_norm": 0.8612572550773621, + "learning_rate": 4.96655206419658e-06, + "loss": 0.5875, + "step": 4349 + }, + { + "epoch": 0.31980591089545657, + "grad_norm": 0.9469992518424988, + "learning_rate": 4.966536353656139e-06, + "loss": 0.6082, + "step": 4350 + }, + { + "epoch": 0.3198794294956624, + "grad_norm": 0.9349981546401978, + "learning_rate": 4.966520639451793e-06, + "loss": 0.6214, + "step": 4351 + }, + { + "epoch": 0.3199529480958683, + "grad_norm": 0.9455004334449768, + "learning_rate": 4.966504921583566e-06, + "loss": 0.5591, + "step": 4352 + }, + { + "epoch": 0.3200264666960741, + "grad_norm": 0.8652238845825195, + "learning_rate": 4.9664892000514806e-06, + "loss": 0.5611, + "step": 4353 + }, + { + "epoch": 0.32009998529628, + "grad_norm": 0.8439163565635681, + "learning_rate": 4.966473474855561e-06, + "loss": 0.5751, + "step": 4354 + }, + { + "epoch": 0.3201735038964858, + "grad_norm": 0.8293033242225647, + "learning_rate": 4.96645774599583e-06, + "loss": 0.6071, + "step": 4355 + }, + { + "epoch": 0.3202470224966917, + "grad_norm": 0.8619861602783203, + "learning_rate": 4.966442013472311e-06, + "loss": 0.5482, + "step": 4356 + }, + { + "epoch": 0.3203205410968975, + "grad_norm": 0.8702002167701721, + "learning_rate": 4.966426277285027e-06, + "loss": 0.6141, + "step": 4357 + }, + { + "epoch": 0.3203940596971034, + "grad_norm": 0.7951069474220276, + "learning_rate": 4.966410537434003e-06, + "loss": 0.5381, + "step": 4358 + }, + { + "epoch": 0.3204675782973092, + "grad_norm": 0.9190537929534912, + "learning_rate": 4.96639479391926e-06, + "loss": 0.5316, + "step": 4359 + }, + { + "epoch": 0.3205410968975151, + "grad_norm": 0.8155707716941833, + "learning_rate": 4.9663790467408235e-06, + "loss": 0.5583, + "step": 4360 + }, + { + "epoch": 0.3206146154977209, + "grad_norm": 0.861619770526886, + "learning_rate": 4.9663632958987155e-06, + "loss": 0.5631, + "step": 4361 + }, + { + "epoch": 0.3206881340979268, + "grad_norm": 0.8736541271209717, + "learning_rate": 4.9663475413929595e-06, + "loss": 0.5748, + "step": 4362 + }, + { + "epoch": 0.3207616526981326, + "grad_norm": 0.8281785249710083, + "learning_rate": 4.966331783223579e-06, + "loss": 0.581, + "step": 4363 + }, + { + "epoch": 0.3208351712983385, + "grad_norm": 0.8587681651115417, + "learning_rate": 4.966316021390599e-06, + "loss": 0.5631, + "step": 4364 + }, + { + "epoch": 0.3209086898985443, + "grad_norm": 0.8782926201820374, + "learning_rate": 4.966300255894041e-06, + "loss": 0.5097, + "step": 4365 + }, + { + "epoch": 0.3209822084987502, + "grad_norm": 0.8586467504501343, + "learning_rate": 4.966284486733929e-06, + "loss": 0.5789, + "step": 4366 + }, + { + "epoch": 0.321055727098956, + "grad_norm": 0.8494811058044434, + "learning_rate": 4.9662687139102865e-06, + "loss": 0.5653, + "step": 4367 + }, + { + "epoch": 0.3211292456991619, + "grad_norm": 0.8599536418914795, + "learning_rate": 4.966252937423137e-06, + "loss": 0.5329, + "step": 4368 + }, + { + "epoch": 0.3212027642993677, + "grad_norm": 0.8276538252830505, + "learning_rate": 4.966237157272503e-06, + "loss": 0.5655, + "step": 4369 + }, + { + "epoch": 0.3212762828995736, + "grad_norm": 0.8421481251716614, + "learning_rate": 4.9662213734584095e-06, + "loss": 0.5841, + "step": 4370 + }, + { + "epoch": 0.32134980149977943, + "grad_norm": 0.8386508226394653, + "learning_rate": 4.96620558598088e-06, + "loss": 0.5513, + "step": 4371 + }, + { + "epoch": 0.3214233200999853, + "grad_norm": 0.8759320378303528, + "learning_rate": 4.966189794839936e-06, + "loss": 0.5953, + "step": 4372 + }, + { + "epoch": 0.32149683870019113, + "grad_norm": 0.8245579600334167, + "learning_rate": 4.966174000035602e-06, + "loss": 0.5435, + "step": 4373 + }, + { + "epoch": 0.321570357300397, + "grad_norm": 0.8803996443748474, + "learning_rate": 4.966158201567902e-06, + "loss": 0.5636, + "step": 4374 + }, + { + "epoch": 0.32164387590060284, + "grad_norm": 0.8847981095314026, + "learning_rate": 4.966142399436859e-06, + "loss": 0.608, + "step": 4375 + }, + { + "epoch": 0.3217173945008087, + "grad_norm": 0.8992866277694702, + "learning_rate": 4.9661265936424965e-06, + "loss": 0.5917, + "step": 4376 + }, + { + "epoch": 0.32179091310101454, + "grad_norm": 0.8559979200363159, + "learning_rate": 4.966110784184838e-06, + "loss": 0.5715, + "step": 4377 + }, + { + "epoch": 0.3218644317012204, + "grad_norm": 0.814283549785614, + "learning_rate": 4.966094971063906e-06, + "loss": 0.5287, + "step": 4378 + }, + { + "epoch": 0.32193795030142625, + "grad_norm": 0.8819839358329773, + "learning_rate": 4.966079154279727e-06, + "loss": 0.5548, + "step": 4379 + }, + { + "epoch": 0.3220114689016321, + "grad_norm": 0.8415680527687073, + "learning_rate": 4.9660633338323205e-06, + "loss": 0.5697, + "step": 4380 + }, + { + "epoch": 0.32208498750183795, + "grad_norm": 0.8178902864456177, + "learning_rate": 4.966047509721713e-06, + "loss": 0.5545, + "step": 4381 + }, + { + "epoch": 0.32215850610204383, + "grad_norm": 0.8486229181289673, + "learning_rate": 4.966031681947927e-06, + "loss": 0.5888, + "step": 4382 + }, + { + "epoch": 0.32223202470224965, + "grad_norm": 0.9037350416183472, + "learning_rate": 4.966015850510985e-06, + "loss": 0.6074, + "step": 4383 + }, + { + "epoch": 0.32230554330245553, + "grad_norm": 0.8790952563285828, + "learning_rate": 4.966000015410913e-06, + "loss": 0.5343, + "step": 4384 + }, + { + "epoch": 0.32237906190266136, + "grad_norm": 0.8735036849975586, + "learning_rate": 4.965984176647733e-06, + "loss": 0.5335, + "step": 4385 + }, + { + "epoch": 0.32245258050286724, + "grad_norm": 0.9271963238716125, + "learning_rate": 4.965968334221467e-06, + "loss": 0.577, + "step": 4386 + }, + { + "epoch": 0.32252609910307306, + "grad_norm": 0.8471961617469788, + "learning_rate": 4.9659524881321405e-06, + "loss": 0.6083, + "step": 4387 + }, + { + "epoch": 0.32259961770327894, + "grad_norm": 0.8548614978790283, + "learning_rate": 4.965936638379777e-06, + "loss": 0.5412, + "step": 4388 + }, + { + "epoch": 0.32267313630348476, + "grad_norm": 0.8570525050163269, + "learning_rate": 4.9659207849644e-06, + "loss": 0.566, + "step": 4389 + }, + { + "epoch": 0.32274665490369064, + "grad_norm": 0.8557136058807373, + "learning_rate": 4.965904927886032e-06, + "loss": 0.5748, + "step": 4390 + }, + { + "epoch": 0.32282017350389647, + "grad_norm": 0.8564863801002502, + "learning_rate": 4.965889067144697e-06, + "loss": 0.5675, + "step": 4391 + }, + { + "epoch": 0.32289369210410235, + "grad_norm": 0.8390181064605713, + "learning_rate": 4.96587320274042e-06, + "loss": 0.5572, + "step": 4392 + }, + { + "epoch": 0.32296721070430817, + "grad_norm": 0.8052465319633484, + "learning_rate": 4.965857334673222e-06, + "loss": 0.5867, + "step": 4393 + }, + { + "epoch": 0.32304072930451405, + "grad_norm": 0.8509655594825745, + "learning_rate": 4.9658414629431295e-06, + "loss": 0.5913, + "step": 4394 + }, + { + "epoch": 0.3231142479047199, + "grad_norm": 0.8431513905525208, + "learning_rate": 4.965825587550164e-06, + "loss": 0.5684, + "step": 4395 + }, + { + "epoch": 0.32318776650492576, + "grad_norm": 0.8681073188781738, + "learning_rate": 4.965809708494349e-06, + "loss": 0.5799, + "step": 4396 + }, + { + "epoch": 0.3232612851051316, + "grad_norm": 0.8832506537437439, + "learning_rate": 4.9657938257757084e-06, + "loss": 0.5741, + "step": 4397 + }, + { + "epoch": 0.32333480370533746, + "grad_norm": 0.8299573063850403, + "learning_rate": 4.965777939394267e-06, + "loss": 0.548, + "step": 4398 + }, + { + "epoch": 0.3234083223055433, + "grad_norm": 0.9786739349365234, + "learning_rate": 4.965762049350047e-06, + "loss": 0.5561, + "step": 4399 + }, + { + "epoch": 0.32348184090574916, + "grad_norm": 0.8601897954940796, + "learning_rate": 4.965746155643073e-06, + "loss": 0.564, + "step": 4400 + }, + { + "epoch": 0.323555359505955, + "grad_norm": 0.860901951789856, + "learning_rate": 4.965730258273368e-06, + "loss": 0.5812, + "step": 4401 + }, + { + "epoch": 0.32362887810616087, + "grad_norm": 0.8361808657646179, + "learning_rate": 4.965714357240956e-06, + "loss": 0.5633, + "step": 4402 + }, + { + "epoch": 0.3237023967063667, + "grad_norm": 0.8714155554771423, + "learning_rate": 4.96569845254586e-06, + "loss": 0.5968, + "step": 4403 + }, + { + "epoch": 0.32377591530657257, + "grad_norm": 0.9385797381401062, + "learning_rate": 4.965682544188103e-06, + "loss": 0.5881, + "step": 4404 + }, + { + "epoch": 0.3238494339067784, + "grad_norm": 0.852120041847229, + "learning_rate": 4.965666632167711e-06, + "loss": 0.5487, + "step": 4405 + }, + { + "epoch": 0.3239229525069843, + "grad_norm": 0.8391937017440796, + "learning_rate": 4.965650716484705e-06, + "loss": 0.5402, + "step": 4406 + }, + { + "epoch": 0.3239964711071901, + "grad_norm": 0.8379449844360352, + "learning_rate": 4.965634797139112e-06, + "loss": 0.6, + "step": 4407 + }, + { + "epoch": 0.324069989707396, + "grad_norm": 0.8855657577514648, + "learning_rate": 4.965618874130951e-06, + "loss": 0.5661, + "step": 4408 + }, + { + "epoch": 0.3241435083076018, + "grad_norm": 0.8403074741363525, + "learning_rate": 4.96560294746025e-06, + "loss": 0.5779, + "step": 4409 + }, + { + "epoch": 0.3242170269078077, + "grad_norm": 0.9343879818916321, + "learning_rate": 4.96558701712703e-06, + "loss": 0.5736, + "step": 4410 + }, + { + "epoch": 0.3242905455080135, + "grad_norm": 0.8278440833091736, + "learning_rate": 4.965571083131315e-06, + "loss": 0.553, + "step": 4411 + }, + { + "epoch": 0.3243640641082194, + "grad_norm": 0.9437693357467651, + "learning_rate": 4.9655551454731295e-06, + "loss": 0.6416, + "step": 4412 + }, + { + "epoch": 0.3244375827084252, + "grad_norm": 0.872194230556488, + "learning_rate": 4.965539204152498e-06, + "loss": 0.592, + "step": 4413 + }, + { + "epoch": 0.3245111013086311, + "grad_norm": 0.8989313244819641, + "learning_rate": 4.965523259169442e-06, + "loss": 0.546, + "step": 4414 + }, + { + "epoch": 0.3245846199088369, + "grad_norm": 0.8292286396026611, + "learning_rate": 4.965507310523986e-06, + "loss": 0.5321, + "step": 4415 + }, + { + "epoch": 0.3246581385090428, + "grad_norm": 0.8803378343582153, + "learning_rate": 4.965491358216155e-06, + "loss": 0.5953, + "step": 4416 + }, + { + "epoch": 0.3247316571092486, + "grad_norm": 0.847882866859436, + "learning_rate": 4.96547540224597e-06, + "loss": 0.6237, + "step": 4417 + }, + { + "epoch": 0.3248051757094545, + "grad_norm": 0.8567730188369751, + "learning_rate": 4.965459442613457e-06, + "loss": 0.6, + "step": 4418 + }, + { + "epoch": 0.3248786943096603, + "grad_norm": 0.8871641159057617, + "learning_rate": 4.96544347931864e-06, + "loss": 0.5415, + "step": 4419 + }, + { + "epoch": 0.3249522129098662, + "grad_norm": 0.8729745149612427, + "learning_rate": 4.9654275123615405e-06, + "loss": 0.5266, + "step": 4420 + }, + { + "epoch": 0.325025731510072, + "grad_norm": 0.8504700660705566, + "learning_rate": 4.965411541742184e-06, + "loss": 0.511, + "step": 4421 + }, + { + "epoch": 0.3250992501102779, + "grad_norm": 0.8677511215209961, + "learning_rate": 4.965395567460593e-06, + "loss": 0.5742, + "step": 4422 + }, + { + "epoch": 0.32517276871048373, + "grad_norm": 0.8668593168258667, + "learning_rate": 4.965379589516793e-06, + "loss": 0.5783, + "step": 4423 + }, + { + "epoch": 0.3252462873106896, + "grad_norm": 0.8511696457862854, + "learning_rate": 4.965363607910806e-06, + "loss": 0.5618, + "step": 4424 + }, + { + "epoch": 0.32531980591089543, + "grad_norm": 0.8675864338874817, + "learning_rate": 4.965347622642656e-06, + "loss": 0.579, + "step": 4425 + }, + { + "epoch": 0.3253933245111013, + "grad_norm": 0.8300654888153076, + "learning_rate": 4.965331633712368e-06, + "loss": 0.5294, + "step": 4426 + }, + { + "epoch": 0.32546684311130714, + "grad_norm": 0.8769707679748535, + "learning_rate": 4.965315641119964e-06, + "loss": 0.5668, + "step": 4427 + }, + { + "epoch": 0.325540361711513, + "grad_norm": 0.8674326539039612, + "learning_rate": 4.965299644865469e-06, + "loss": 0.5657, + "step": 4428 + }, + { + "epoch": 0.32561388031171884, + "grad_norm": 0.853434681892395, + "learning_rate": 4.9652836449489065e-06, + "loss": 0.5731, + "step": 4429 + }, + { + "epoch": 0.3256873989119247, + "grad_norm": 0.8849751353263855, + "learning_rate": 4.9652676413702995e-06, + "loss": 0.5849, + "step": 4430 + }, + { + "epoch": 0.32576091751213054, + "grad_norm": 0.8756359815597534, + "learning_rate": 4.9652516341296725e-06, + "loss": 0.5757, + "step": 4431 + }, + { + "epoch": 0.3258344361123364, + "grad_norm": 0.8520228862762451, + "learning_rate": 4.965235623227049e-06, + "loss": 0.5229, + "step": 4432 + }, + { + "epoch": 0.32590795471254225, + "grad_norm": 0.8729908466339111, + "learning_rate": 4.965219608662454e-06, + "loss": 0.5631, + "step": 4433 + }, + { + "epoch": 0.3259814733127481, + "grad_norm": 0.8409538269042969, + "learning_rate": 4.9652035904359094e-06, + "loss": 0.5435, + "step": 4434 + }, + { + "epoch": 0.32605499191295395, + "grad_norm": 0.8571044206619263, + "learning_rate": 4.96518756854744e-06, + "loss": 0.5605, + "step": 4435 + }, + { + "epoch": 0.32612851051315983, + "grad_norm": 0.8601775765419006, + "learning_rate": 4.96517154299707e-06, + "loss": 0.5953, + "step": 4436 + }, + { + "epoch": 0.32620202911336565, + "grad_norm": 0.8548752069473267, + "learning_rate": 4.965155513784823e-06, + "loss": 0.5732, + "step": 4437 + }, + { + "epoch": 0.32627554771357153, + "grad_norm": 0.795238196849823, + "learning_rate": 4.9651394809107215e-06, + "loss": 0.5581, + "step": 4438 + }, + { + "epoch": 0.32634906631377736, + "grad_norm": 0.834988534450531, + "learning_rate": 4.965123444374791e-06, + "loss": 0.5556, + "step": 4439 + }, + { + "epoch": 0.32642258491398324, + "grad_norm": 0.8560869693756104, + "learning_rate": 4.965107404177054e-06, + "loss": 0.6051, + "step": 4440 + }, + { + "epoch": 0.32649610351418906, + "grad_norm": 0.8435426354408264, + "learning_rate": 4.965091360317535e-06, + "loss": 0.569, + "step": 4441 + }, + { + "epoch": 0.32656962211439494, + "grad_norm": 0.8009468913078308, + "learning_rate": 4.965075312796258e-06, + "loss": 0.5124, + "step": 4442 + }, + { + "epoch": 0.3266431407146008, + "grad_norm": 0.903281569480896, + "learning_rate": 4.965059261613246e-06, + "loss": 0.6076, + "step": 4443 + }, + { + "epoch": 0.32671665931480665, + "grad_norm": 0.8613739013671875, + "learning_rate": 4.965043206768524e-06, + "loss": 0.545, + "step": 4444 + }, + { + "epoch": 0.3267901779150125, + "grad_norm": 0.8261101245880127, + "learning_rate": 4.965027148262116e-06, + "loss": 0.5244, + "step": 4445 + }, + { + "epoch": 0.32686369651521835, + "grad_norm": 0.8402925729751587, + "learning_rate": 4.965011086094044e-06, + "loss": 0.553, + "step": 4446 + }, + { + "epoch": 0.32693721511542423, + "grad_norm": 0.9064859747886658, + "learning_rate": 4.964995020264334e-06, + "loss": 0.5629, + "step": 4447 + }, + { + "epoch": 0.32701073371563005, + "grad_norm": 0.8452730774879456, + "learning_rate": 4.964978950773007e-06, + "loss": 0.5466, + "step": 4448 + }, + { + "epoch": 0.32708425231583593, + "grad_norm": 0.8583142161369324, + "learning_rate": 4.964962877620091e-06, + "loss": 0.6156, + "step": 4449 + }, + { + "epoch": 0.32715777091604176, + "grad_norm": 0.9270214438438416, + "learning_rate": 4.964946800805607e-06, + "loss": 0.5622, + "step": 4450 + }, + { + "epoch": 0.32723128951624764, + "grad_norm": 0.8419800996780396, + "learning_rate": 4.964930720329578e-06, + "loss": 0.5517, + "step": 4451 + }, + { + "epoch": 0.32730480811645346, + "grad_norm": 0.9359748959541321, + "learning_rate": 4.964914636192031e-06, + "loss": 0.6, + "step": 4452 + }, + { + "epoch": 0.32737832671665934, + "grad_norm": 0.8546661138534546, + "learning_rate": 4.964898548392988e-06, + "loss": 0.5518, + "step": 4453 + }, + { + "epoch": 0.32745184531686516, + "grad_norm": 0.8474684953689575, + "learning_rate": 4.964882456932473e-06, + "loss": 0.5281, + "step": 4454 + }, + { + "epoch": 0.32752536391707104, + "grad_norm": 0.8337804675102234, + "learning_rate": 4.96486636181051e-06, + "loss": 0.5819, + "step": 4455 + }, + { + "epoch": 0.32759888251727687, + "grad_norm": 0.8638193607330322, + "learning_rate": 4.964850263027123e-06, + "loss": 0.5751, + "step": 4456 + }, + { + "epoch": 0.32767240111748275, + "grad_norm": 0.9249865412712097, + "learning_rate": 4.9648341605823366e-06, + "loss": 0.5875, + "step": 4457 + }, + { + "epoch": 0.32774591971768857, + "grad_norm": 0.8729387521743774, + "learning_rate": 4.964818054476173e-06, + "loss": 0.5971, + "step": 4458 + }, + { + "epoch": 0.32781943831789445, + "grad_norm": 0.838030993938446, + "learning_rate": 4.964801944708658e-06, + "loss": 0.5553, + "step": 4459 + }, + { + "epoch": 0.3278929569181003, + "grad_norm": 0.8159907460212708, + "learning_rate": 4.964785831279813e-06, + "loss": 0.5599, + "step": 4460 + }, + { + "epoch": 0.32796647551830616, + "grad_norm": 0.8639986515045166, + "learning_rate": 4.964769714189665e-06, + "loss": 0.5642, + "step": 4461 + }, + { + "epoch": 0.328039994118512, + "grad_norm": 0.8887448310852051, + "learning_rate": 4.964753593438237e-06, + "loss": 0.5728, + "step": 4462 + }, + { + "epoch": 0.32811351271871786, + "grad_norm": 0.8662588000297546, + "learning_rate": 4.964737469025552e-06, + "loss": 0.5485, + "step": 4463 + }, + { + "epoch": 0.3281870313189237, + "grad_norm": 0.9023897647857666, + "learning_rate": 4.964721340951635e-06, + "loss": 0.6091, + "step": 4464 + }, + { + "epoch": 0.32826054991912956, + "grad_norm": 0.8481564521789551, + "learning_rate": 4.964705209216508e-06, + "loss": 0.5778, + "step": 4465 + }, + { + "epoch": 0.3283340685193354, + "grad_norm": 0.917716920375824, + "learning_rate": 4.964689073820198e-06, + "loss": 0.5885, + "step": 4466 + }, + { + "epoch": 0.32840758711954127, + "grad_norm": 0.8452950119972229, + "learning_rate": 4.9646729347627264e-06, + "loss": 0.6039, + "step": 4467 + }, + { + "epoch": 0.3284811057197471, + "grad_norm": 0.8604231476783752, + "learning_rate": 4.964656792044118e-06, + "loss": 0.6053, + "step": 4468 + }, + { + "epoch": 0.32855462431995297, + "grad_norm": 0.8836000561714172, + "learning_rate": 4.964640645664397e-06, + "loss": 0.579, + "step": 4469 + }, + { + "epoch": 0.3286281429201588, + "grad_norm": 0.8386307954788208, + "learning_rate": 4.964624495623588e-06, + "loss": 0.5639, + "step": 4470 + }, + { + "epoch": 0.3287016615203647, + "grad_norm": 0.8770734071731567, + "learning_rate": 4.964608341921713e-06, + "loss": 0.5731, + "step": 4471 + }, + { + "epoch": 0.3287751801205705, + "grad_norm": 0.8776385188102722, + "learning_rate": 4.964592184558799e-06, + "loss": 0.5664, + "step": 4472 + }, + { + "epoch": 0.3288486987207764, + "grad_norm": 0.8221646547317505, + "learning_rate": 4.964576023534866e-06, + "loss": 0.5528, + "step": 4473 + }, + { + "epoch": 0.3289222173209822, + "grad_norm": 0.8587406277656555, + "learning_rate": 4.964559858849942e-06, + "loss": 0.5828, + "step": 4474 + }, + { + "epoch": 0.3289957359211881, + "grad_norm": 0.8942182660102844, + "learning_rate": 4.964543690504049e-06, + "loss": 0.6275, + "step": 4475 + }, + { + "epoch": 0.3290692545213939, + "grad_norm": 0.8137276768684387, + "learning_rate": 4.964527518497211e-06, + "loss": 0.574, + "step": 4476 + }, + { + "epoch": 0.3291427731215998, + "grad_norm": 0.9152695536613464, + "learning_rate": 4.964511342829452e-06, + "loss": 0.6025, + "step": 4477 + }, + { + "epoch": 0.3292162917218056, + "grad_norm": 0.872092604637146, + "learning_rate": 4.964495163500797e-06, + "loss": 0.5823, + "step": 4478 + }, + { + "epoch": 0.3292898103220115, + "grad_norm": 0.8549174070358276, + "learning_rate": 4.964478980511269e-06, + "loss": 0.574, + "step": 4479 + }, + { + "epoch": 0.3293633289222173, + "grad_norm": 0.874172031879425, + "learning_rate": 4.964462793860893e-06, + "loss": 0.6146, + "step": 4480 + }, + { + "epoch": 0.3294368475224232, + "grad_norm": 0.8510291576385498, + "learning_rate": 4.964446603549692e-06, + "loss": 0.5921, + "step": 4481 + }, + { + "epoch": 0.329510366122629, + "grad_norm": 0.8094064593315125, + "learning_rate": 4.964430409577691e-06, + "loss": 0.5704, + "step": 4482 + }, + { + "epoch": 0.3295838847228349, + "grad_norm": 0.8823286294937134, + "learning_rate": 4.964414211944912e-06, + "loss": 0.5648, + "step": 4483 + }, + { + "epoch": 0.3296574033230407, + "grad_norm": 0.9443519115447998, + "learning_rate": 4.964398010651382e-06, + "loss": 0.5823, + "step": 4484 + }, + { + "epoch": 0.3297309219232466, + "grad_norm": 0.8708899617195129, + "learning_rate": 4.964381805697124e-06, + "loss": 0.5808, + "step": 4485 + }, + { + "epoch": 0.3298044405234524, + "grad_norm": 0.8331232666969299, + "learning_rate": 4.964365597082161e-06, + "loss": 0.5363, + "step": 4486 + }, + { + "epoch": 0.3298779591236583, + "grad_norm": 0.8435372114181519, + "learning_rate": 4.964349384806518e-06, + "loss": 0.5354, + "step": 4487 + }, + { + "epoch": 0.32995147772386413, + "grad_norm": 0.8816725015640259, + "learning_rate": 4.964333168870219e-06, + "loss": 0.5925, + "step": 4488 + }, + { + "epoch": 0.33002499632407, + "grad_norm": 0.8556358218193054, + "learning_rate": 4.964316949273288e-06, + "loss": 0.5767, + "step": 4489 + }, + { + "epoch": 0.33009851492427583, + "grad_norm": 0.8372395634651184, + "learning_rate": 4.964300726015749e-06, + "loss": 0.5776, + "step": 4490 + }, + { + "epoch": 0.3301720335244817, + "grad_norm": 0.8652081489562988, + "learning_rate": 4.964284499097627e-06, + "loss": 0.5751, + "step": 4491 + }, + { + "epoch": 0.33024555212468754, + "grad_norm": 0.9200541377067566, + "learning_rate": 4.964268268518945e-06, + "loss": 0.6167, + "step": 4492 + }, + { + "epoch": 0.3303190707248934, + "grad_norm": 0.8784170746803284, + "learning_rate": 4.964252034279726e-06, + "loss": 0.523, + "step": 4493 + }, + { + "epoch": 0.33039258932509924, + "grad_norm": 0.8282028436660767, + "learning_rate": 4.964235796379997e-06, + "loss": 0.5339, + "step": 4494 + }, + { + "epoch": 0.3304661079253051, + "grad_norm": 0.8613936305046082, + "learning_rate": 4.96421955481978e-06, + "loss": 0.606, + "step": 4495 + }, + { + "epoch": 0.33053962652551094, + "grad_norm": 0.8179067969322205, + "learning_rate": 4.9642033095991e-06, + "loss": 0.5572, + "step": 4496 + }, + { + "epoch": 0.3306131451257168, + "grad_norm": 0.9877486228942871, + "learning_rate": 4.964187060717982e-06, + "loss": 0.6223, + "step": 4497 + }, + { + "epoch": 0.33068666372592265, + "grad_norm": 0.8423711061477661, + "learning_rate": 4.964170808176448e-06, + "loss": 0.546, + "step": 4498 + }, + { + "epoch": 0.3307601823261285, + "grad_norm": 0.8659893274307251, + "learning_rate": 4.964154551974523e-06, + "loss": 0.6063, + "step": 4499 + }, + { + "epoch": 0.33083370092633435, + "grad_norm": 0.9336012005805969, + "learning_rate": 4.964138292112232e-06, + "loss": 0.5724, + "step": 4500 + }, + { + "epoch": 0.33090721952654023, + "grad_norm": 0.8772481083869934, + "learning_rate": 4.964122028589598e-06, + "loss": 0.5706, + "step": 4501 + }, + { + "epoch": 0.33098073812674605, + "grad_norm": 0.8608142137527466, + "learning_rate": 4.964105761406646e-06, + "loss": 0.5563, + "step": 4502 + }, + { + "epoch": 0.33105425672695193, + "grad_norm": 0.9413098096847534, + "learning_rate": 4.9640894905634e-06, + "loss": 0.627, + "step": 4503 + }, + { + "epoch": 0.33112777532715776, + "grad_norm": 0.8863834142684937, + "learning_rate": 4.964073216059883e-06, + "loss": 0.5727, + "step": 4504 + }, + { + "epoch": 0.33120129392736364, + "grad_norm": 0.8448939323425293, + "learning_rate": 4.9640569378961215e-06, + "loss": 0.5977, + "step": 4505 + }, + { + "epoch": 0.33127481252756946, + "grad_norm": 0.8640400171279907, + "learning_rate": 4.964040656072137e-06, + "loss": 0.5333, + "step": 4506 + }, + { + "epoch": 0.33134833112777534, + "grad_norm": 0.8295495510101318, + "learning_rate": 4.9640243705879556e-06, + "loss": 0.547, + "step": 4507 + }, + { + "epoch": 0.33142184972798117, + "grad_norm": 0.8770223259925842, + "learning_rate": 4.964008081443601e-06, + "loss": 0.6098, + "step": 4508 + }, + { + "epoch": 0.33149536832818705, + "grad_norm": 0.8980183005332947, + "learning_rate": 4.963991788639098e-06, + "loss": 0.5493, + "step": 4509 + }, + { + "epoch": 0.33156888692839287, + "grad_norm": 0.8668574690818787, + "learning_rate": 4.963975492174468e-06, + "loss": 0.5764, + "step": 4510 + }, + { + "epoch": 0.33164240552859875, + "grad_norm": 0.8497627973556519, + "learning_rate": 4.96395919204974e-06, + "loss": 0.5582, + "step": 4511 + }, + { + "epoch": 0.3317159241288046, + "grad_norm": 0.8518467545509338, + "learning_rate": 4.963942888264934e-06, + "loss": 0.5835, + "step": 4512 + }, + { + "epoch": 0.33178944272901045, + "grad_norm": 0.8527345657348633, + "learning_rate": 4.963926580820076e-06, + "loss": 0.5573, + "step": 4513 + }, + { + "epoch": 0.3318629613292163, + "grad_norm": 0.8410201668739319, + "learning_rate": 4.96391026971519e-06, + "loss": 0.5694, + "step": 4514 + }, + { + "epoch": 0.33193647992942216, + "grad_norm": 0.8142828941345215, + "learning_rate": 4.9638939549503006e-06, + "loss": 0.5506, + "step": 4515 + }, + { + "epoch": 0.332009998529628, + "grad_norm": 0.8676577210426331, + "learning_rate": 4.963877636525431e-06, + "loss": 0.5544, + "step": 4516 + }, + { + "epoch": 0.33208351712983386, + "grad_norm": 0.9007539749145508, + "learning_rate": 4.963861314440606e-06, + "loss": 0.6, + "step": 4517 + }, + { + "epoch": 0.3321570357300397, + "grad_norm": 0.8809939622879028, + "learning_rate": 4.96384498869585e-06, + "loss": 0.5138, + "step": 4518 + }, + { + "epoch": 0.33223055433024556, + "grad_norm": 0.8225247263908386, + "learning_rate": 4.9638286592911884e-06, + "loss": 0.527, + "step": 4519 + }, + { + "epoch": 0.3323040729304514, + "grad_norm": 0.8703388571739197, + "learning_rate": 4.963812326226644e-06, + "loss": 0.5692, + "step": 4520 + }, + { + "epoch": 0.33237759153065727, + "grad_norm": 0.8121532201766968, + "learning_rate": 4.96379598950224e-06, + "loss": 0.5452, + "step": 4521 + }, + { + "epoch": 0.3324511101308631, + "grad_norm": 0.8503285646438599, + "learning_rate": 4.963779649118003e-06, + "loss": 0.5715, + "step": 4522 + }, + { + "epoch": 0.33252462873106897, + "grad_norm": 0.8763834238052368, + "learning_rate": 4.963763305073955e-06, + "loss": 0.5791, + "step": 4523 + }, + { + "epoch": 0.3325981473312748, + "grad_norm": 0.8681154251098633, + "learning_rate": 4.9637469573701225e-06, + "loss": 0.6011, + "step": 4524 + }, + { + "epoch": 0.3326716659314807, + "grad_norm": 0.8969462513923645, + "learning_rate": 4.96373060600653e-06, + "loss": 0.5635, + "step": 4525 + }, + { + "epoch": 0.3327451845316865, + "grad_norm": 0.8856502771377563, + "learning_rate": 4.963714250983198e-06, + "loss": 0.6022, + "step": 4526 + }, + { + "epoch": 0.3328187031318924, + "grad_norm": 0.8644835948944092, + "learning_rate": 4.963697892300155e-06, + "loss": 0.5687, + "step": 4527 + }, + { + "epoch": 0.3328922217320982, + "grad_norm": 0.8693060278892517, + "learning_rate": 4.963681529957424e-06, + "loss": 0.5784, + "step": 4528 + }, + { + "epoch": 0.3329657403323041, + "grad_norm": 0.8347862958908081, + "learning_rate": 4.963665163955028e-06, + "loss": 0.5449, + "step": 4529 + }, + { + "epoch": 0.3330392589325099, + "grad_norm": 0.8719387054443359, + "learning_rate": 4.963648794292992e-06, + "loss": 0.5527, + "step": 4530 + }, + { + "epoch": 0.3331127775327158, + "grad_norm": 0.8393582701683044, + "learning_rate": 4.9636324209713415e-06, + "loss": 0.559, + "step": 4531 + }, + { + "epoch": 0.3331862961329216, + "grad_norm": 0.8241118788719177, + "learning_rate": 4.9636160439901e-06, + "loss": 0.5529, + "step": 4532 + }, + { + "epoch": 0.3332598147331275, + "grad_norm": 0.8527097105979919, + "learning_rate": 4.963599663349291e-06, + "loss": 0.5675, + "step": 4533 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.8557189106941223, + "learning_rate": 4.963583279048941e-06, + "loss": 0.5636, + "step": 4534 + }, + { + "epoch": 0.3334068519335392, + "grad_norm": 0.8586276173591614, + "learning_rate": 4.963566891089072e-06, + "loss": 0.5983, + "step": 4535 + }, + { + "epoch": 0.333480370533745, + "grad_norm": 0.8969571590423584, + "learning_rate": 4.963550499469709e-06, + "loss": 0.5919, + "step": 4536 + }, + { + "epoch": 0.3335538891339509, + "grad_norm": 0.8642018437385559, + "learning_rate": 4.963534104190877e-06, + "loss": 0.5812, + "step": 4537 + }, + { + "epoch": 0.3336274077341567, + "grad_norm": 0.8920226097106934, + "learning_rate": 4.9635177052526e-06, + "loss": 0.607, + "step": 4538 + }, + { + "epoch": 0.3337009263343626, + "grad_norm": 0.8891538381576538, + "learning_rate": 4.963501302654903e-06, + "loss": 0.5838, + "step": 4539 + }, + { + "epoch": 0.3337744449345684, + "grad_norm": 0.8798891305923462, + "learning_rate": 4.9634848963978084e-06, + "loss": 0.5632, + "step": 4540 + }, + { + "epoch": 0.3338479635347743, + "grad_norm": 0.8710957169532776, + "learning_rate": 4.963468486481343e-06, + "loss": 0.5839, + "step": 4541 + }, + { + "epoch": 0.33392148213498013, + "grad_norm": 0.8934955596923828, + "learning_rate": 4.963452072905529e-06, + "loss": 0.5786, + "step": 4542 + }, + { + "epoch": 0.333995000735186, + "grad_norm": 0.9170438051223755, + "learning_rate": 4.963435655670393e-06, + "loss": 0.6026, + "step": 4543 + }, + { + "epoch": 0.33406851933539183, + "grad_norm": 0.8676872849464417, + "learning_rate": 4.9634192347759574e-06, + "loss": 0.5502, + "step": 4544 + }, + { + "epoch": 0.3341420379355977, + "grad_norm": 0.8670379519462585, + "learning_rate": 4.9634028102222475e-06, + "loss": 0.559, + "step": 4545 + }, + { + "epoch": 0.33421555653580354, + "grad_norm": 0.8167371153831482, + "learning_rate": 4.9633863820092874e-06, + "loss": 0.5789, + "step": 4546 + }, + { + "epoch": 0.3342890751360094, + "grad_norm": 0.7848652601242065, + "learning_rate": 4.963369950137103e-06, + "loss": 0.5198, + "step": 4547 + }, + { + "epoch": 0.33436259373621524, + "grad_norm": 0.8293746113777161, + "learning_rate": 4.963353514605717e-06, + "loss": 0.5362, + "step": 4548 + }, + { + "epoch": 0.3344361123364211, + "grad_norm": 0.8859875202178955, + "learning_rate": 4.963337075415153e-06, + "loss": 0.5567, + "step": 4549 + }, + { + "epoch": 0.33450963093662694, + "grad_norm": 0.842463493347168, + "learning_rate": 4.963320632565437e-06, + "loss": 0.6079, + "step": 4550 + }, + { + "epoch": 0.3345831495368328, + "grad_norm": 0.8581362962722778, + "learning_rate": 4.963304186056594e-06, + "loss": 0.5532, + "step": 4551 + }, + { + "epoch": 0.33465666813703865, + "grad_norm": 0.9296174049377441, + "learning_rate": 4.9632877358886465e-06, + "loss": 0.5921, + "step": 4552 + }, + { + "epoch": 0.33473018673724453, + "grad_norm": 0.844852864742279, + "learning_rate": 4.963271282061621e-06, + "loss": 0.5602, + "step": 4553 + }, + { + "epoch": 0.33480370533745035, + "grad_norm": 0.9303575754165649, + "learning_rate": 4.963254824575541e-06, + "loss": 0.6017, + "step": 4554 + }, + { + "epoch": 0.33487722393765623, + "grad_norm": 0.9162569642066956, + "learning_rate": 4.96323836343043e-06, + "loss": 0.5787, + "step": 4555 + }, + { + "epoch": 0.33495074253786206, + "grad_norm": 0.8555418848991394, + "learning_rate": 4.963221898626313e-06, + "loss": 0.5524, + "step": 4556 + }, + { + "epoch": 0.33502426113806794, + "grad_norm": 0.84161776304245, + "learning_rate": 4.963205430163215e-06, + "loss": 0.5316, + "step": 4557 + }, + { + "epoch": 0.33509777973827376, + "grad_norm": 0.8705741167068481, + "learning_rate": 4.963188958041161e-06, + "loss": 0.5783, + "step": 4558 + }, + { + "epoch": 0.33517129833847964, + "grad_norm": 0.8288487195968628, + "learning_rate": 4.963172482260175e-06, + "loss": 0.5781, + "step": 4559 + }, + { + "epoch": 0.33524481693868546, + "grad_norm": 0.8367265462875366, + "learning_rate": 4.96315600282028e-06, + "loss": 0.5334, + "step": 4560 + }, + { + "epoch": 0.33531833553889134, + "grad_norm": 0.9200619459152222, + "learning_rate": 4.963139519721502e-06, + "loss": 0.595, + "step": 4561 + }, + { + "epoch": 0.33539185413909717, + "grad_norm": 0.9015363454818726, + "learning_rate": 4.963123032963865e-06, + "loss": 0.5716, + "step": 4562 + }, + { + "epoch": 0.33546537273930305, + "grad_norm": 0.8672231435775757, + "learning_rate": 4.9631065425473935e-06, + "loss": 0.5635, + "step": 4563 + }, + { + "epoch": 0.33553889133950887, + "grad_norm": 0.8290905356407166, + "learning_rate": 4.963090048472112e-06, + "loss": 0.5801, + "step": 4564 + }, + { + "epoch": 0.33561240993971475, + "grad_norm": 0.9064881205558777, + "learning_rate": 4.963073550738046e-06, + "loss": 0.6171, + "step": 4565 + }, + { + "epoch": 0.3356859285399206, + "grad_norm": 0.8395309448242188, + "learning_rate": 4.963057049345218e-06, + "loss": 0.5497, + "step": 4566 + }, + { + "epoch": 0.33575944714012645, + "grad_norm": 0.9650911688804626, + "learning_rate": 4.963040544293655e-06, + "loss": 0.6192, + "step": 4567 + }, + { + "epoch": 0.3358329657403323, + "grad_norm": 0.8766315579414368, + "learning_rate": 4.9630240355833785e-06, + "loss": 0.5843, + "step": 4568 + }, + { + "epoch": 0.33590648434053816, + "grad_norm": 0.8636158108711243, + "learning_rate": 4.963007523214416e-06, + "loss": 0.5766, + "step": 4569 + }, + { + "epoch": 0.335980002940744, + "grad_norm": 0.8261372447013855, + "learning_rate": 4.96299100718679e-06, + "loss": 0.5662, + "step": 4570 + }, + { + "epoch": 0.33605352154094986, + "grad_norm": 0.8223912119865417, + "learning_rate": 4.962974487500526e-06, + "loss": 0.5691, + "step": 4571 + }, + { + "epoch": 0.3361270401411557, + "grad_norm": 0.8456782102584839, + "learning_rate": 4.962957964155648e-06, + "loss": 0.5608, + "step": 4572 + }, + { + "epoch": 0.33620055874136157, + "grad_norm": 0.8995372653007507, + "learning_rate": 4.962941437152181e-06, + "loss": 0.5407, + "step": 4573 + }, + { + "epoch": 0.3362740773415674, + "grad_norm": 0.8643891215324402, + "learning_rate": 4.962924906490149e-06, + "loss": 0.5914, + "step": 4574 + }, + { + "epoch": 0.33634759594177327, + "grad_norm": 0.8384696245193481, + "learning_rate": 4.9629083721695775e-06, + "loss": 0.5479, + "step": 4575 + }, + { + "epoch": 0.3364211145419791, + "grad_norm": 0.832654595375061, + "learning_rate": 4.96289183419049e-06, + "loss": 0.5831, + "step": 4576 + }, + { + "epoch": 0.336494633142185, + "grad_norm": 0.9187278151512146, + "learning_rate": 4.962875292552912e-06, + "loss": 0.5692, + "step": 4577 + }, + { + "epoch": 0.3365681517423908, + "grad_norm": 0.9030089974403381, + "learning_rate": 4.962858747256868e-06, + "loss": 0.6085, + "step": 4578 + }, + { + "epoch": 0.3366416703425967, + "grad_norm": 0.891452968120575, + "learning_rate": 4.962842198302381e-06, + "loss": 0.5754, + "step": 4579 + }, + { + "epoch": 0.3367151889428025, + "grad_norm": 0.9602774977684021, + "learning_rate": 4.962825645689477e-06, + "loss": 0.6195, + "step": 4580 + }, + { + "epoch": 0.3367887075430084, + "grad_norm": 0.8954156041145325, + "learning_rate": 4.962809089418181e-06, + "loss": 0.5708, + "step": 4581 + }, + { + "epoch": 0.33686222614321426, + "grad_norm": 0.8917964696884155, + "learning_rate": 4.962792529488517e-06, + "loss": 0.5312, + "step": 4582 + }, + { + "epoch": 0.3369357447434201, + "grad_norm": 0.8378130793571472, + "learning_rate": 4.96277596590051e-06, + "loss": 0.5394, + "step": 4583 + }, + { + "epoch": 0.33700926334362596, + "grad_norm": 0.8973307013511658, + "learning_rate": 4.962759398654182e-06, + "loss": 0.5288, + "step": 4584 + }, + { + "epoch": 0.3370827819438318, + "grad_norm": 0.9295499920845032, + "learning_rate": 4.962742827749562e-06, + "loss": 0.6123, + "step": 4585 + }, + { + "epoch": 0.33715630054403767, + "grad_norm": 0.8287844061851501, + "learning_rate": 4.962726253186672e-06, + "loss": 0.5506, + "step": 4586 + }, + { + "epoch": 0.3372298191442435, + "grad_norm": 0.8299131989479065, + "learning_rate": 4.9627096749655355e-06, + "loss": 0.5315, + "step": 4587 + }, + { + "epoch": 0.33730333774444937, + "grad_norm": 0.8963624238967896, + "learning_rate": 4.96269309308618e-06, + "loss": 0.5756, + "step": 4588 + }, + { + "epoch": 0.3373768563446552, + "grad_norm": 0.8628765344619751, + "learning_rate": 4.962676507548629e-06, + "loss": 0.5702, + "step": 4589 + }, + { + "epoch": 0.3374503749448611, + "grad_norm": 0.8866403698921204, + "learning_rate": 4.962659918352906e-06, + "loss": 0.5864, + "step": 4590 + }, + { + "epoch": 0.3375238935450669, + "grad_norm": 0.8305349946022034, + "learning_rate": 4.962643325499037e-06, + "loss": 0.571, + "step": 4591 + }, + { + "epoch": 0.3375974121452728, + "grad_norm": 0.8707558512687683, + "learning_rate": 4.962626728987046e-06, + "loss": 0.5737, + "step": 4592 + }, + { + "epoch": 0.3376709307454786, + "grad_norm": 0.8932754993438721, + "learning_rate": 4.962610128816958e-06, + "loss": 0.6116, + "step": 4593 + }, + { + "epoch": 0.3377444493456845, + "grad_norm": 0.9090648293495178, + "learning_rate": 4.962593524988798e-06, + "loss": 0.561, + "step": 4594 + }, + { + "epoch": 0.3378179679458903, + "grad_norm": 0.8433675169944763, + "learning_rate": 4.962576917502589e-06, + "loss": 0.581, + "step": 4595 + }, + { + "epoch": 0.3378914865460962, + "grad_norm": 0.8658644556999207, + "learning_rate": 4.962560306358357e-06, + "loss": 0.6169, + "step": 4596 + }, + { + "epoch": 0.337965005146302, + "grad_norm": 0.8492559194564819, + "learning_rate": 4.962543691556128e-06, + "loss": 0.5667, + "step": 4597 + }, + { + "epoch": 0.3380385237465079, + "grad_norm": 0.8824112415313721, + "learning_rate": 4.962527073095924e-06, + "loss": 0.5458, + "step": 4598 + }, + { + "epoch": 0.3381120423467137, + "grad_norm": 0.9014942646026611, + "learning_rate": 4.962510450977771e-06, + "loss": 0.5748, + "step": 4599 + }, + { + "epoch": 0.3381855609469196, + "grad_norm": 0.8861005306243896, + "learning_rate": 4.962493825201693e-06, + "loss": 0.6008, + "step": 4600 + }, + { + "epoch": 0.3382590795471254, + "grad_norm": 0.8499997854232788, + "learning_rate": 4.962477195767716e-06, + "loss": 0.5909, + "step": 4601 + }, + { + "epoch": 0.3383325981473313, + "grad_norm": 0.8226455450057983, + "learning_rate": 4.962460562675864e-06, + "loss": 0.5255, + "step": 4602 + }, + { + "epoch": 0.3384061167475371, + "grad_norm": 0.9016053676605225, + "learning_rate": 4.962443925926161e-06, + "loss": 0.5765, + "step": 4603 + }, + { + "epoch": 0.338479635347743, + "grad_norm": 0.9061883091926575, + "learning_rate": 4.962427285518633e-06, + "loss": 0.5994, + "step": 4604 + }, + { + "epoch": 0.3385531539479488, + "grad_norm": 0.8647778630256653, + "learning_rate": 4.962410641453305e-06, + "loss": 0.5435, + "step": 4605 + }, + { + "epoch": 0.3386266725481547, + "grad_norm": 0.896408200263977, + "learning_rate": 4.962393993730199e-06, + "loss": 0.5936, + "step": 4606 + }, + { + "epoch": 0.33870019114836053, + "grad_norm": 0.8713698387145996, + "learning_rate": 4.962377342349343e-06, + "loss": 0.6079, + "step": 4607 + }, + { + "epoch": 0.3387737097485664, + "grad_norm": 0.8874958157539368, + "learning_rate": 4.962360687310759e-06, + "loss": 0.5348, + "step": 4608 + }, + { + "epoch": 0.33884722834877223, + "grad_norm": 0.8992979526519775, + "learning_rate": 4.962344028614474e-06, + "loss": 0.5926, + "step": 4609 + }, + { + "epoch": 0.3389207469489781, + "grad_norm": 0.9039971828460693, + "learning_rate": 4.962327366260511e-06, + "loss": 0.5455, + "step": 4610 + }, + { + "epoch": 0.33899426554918394, + "grad_norm": 0.8288872838020325, + "learning_rate": 4.962310700248897e-06, + "loss": 0.5443, + "step": 4611 + }, + { + "epoch": 0.3390677841493898, + "grad_norm": 0.8435302376747131, + "learning_rate": 4.962294030579654e-06, + "loss": 0.5513, + "step": 4612 + }, + { + "epoch": 0.33914130274959564, + "grad_norm": 0.8883662223815918, + "learning_rate": 4.962277357252808e-06, + "loss": 0.5843, + "step": 4613 + }, + { + "epoch": 0.3392148213498015, + "grad_norm": 0.8318282961845398, + "learning_rate": 4.9622606802683845e-06, + "loss": 0.5272, + "step": 4614 + }, + { + "epoch": 0.33928833995000734, + "grad_norm": 0.8322310447692871, + "learning_rate": 4.962243999626407e-06, + "loss": 0.5468, + "step": 4615 + }, + { + "epoch": 0.3393618585502132, + "grad_norm": 0.8951018452644348, + "learning_rate": 4.962227315326901e-06, + "loss": 0.5656, + "step": 4616 + }, + { + "epoch": 0.33943537715041905, + "grad_norm": 0.8642719388008118, + "learning_rate": 4.962210627369892e-06, + "loss": 0.6083, + "step": 4617 + }, + { + "epoch": 0.33950889575062493, + "grad_norm": 0.8293258547782898, + "learning_rate": 4.962193935755403e-06, + "loss": 0.5159, + "step": 4618 + }, + { + "epoch": 0.33958241435083075, + "grad_norm": 0.9238324761390686, + "learning_rate": 4.96217724048346e-06, + "loss": 0.5784, + "step": 4619 + }, + { + "epoch": 0.33965593295103663, + "grad_norm": 0.8982626795768738, + "learning_rate": 4.962160541554087e-06, + "loss": 0.6473, + "step": 4620 + }, + { + "epoch": 0.33972945155124246, + "grad_norm": 0.8127766847610474, + "learning_rate": 4.96214383896731e-06, + "loss": 0.5785, + "step": 4621 + }, + { + "epoch": 0.33980297015144834, + "grad_norm": 0.8718191385269165, + "learning_rate": 4.962127132723154e-06, + "loss": 0.5695, + "step": 4622 + }, + { + "epoch": 0.33987648875165416, + "grad_norm": 0.8293112516403198, + "learning_rate": 4.962110422821641e-06, + "loss": 0.5409, + "step": 4623 + }, + { + "epoch": 0.33995000735186004, + "grad_norm": 0.8746054768562317, + "learning_rate": 4.962093709262798e-06, + "loss": 0.5741, + "step": 4624 + }, + { + "epoch": 0.34002352595206586, + "grad_norm": 0.8504027724266052, + "learning_rate": 4.962076992046652e-06, + "loss": 0.5855, + "step": 4625 + }, + { + "epoch": 0.34009704455227174, + "grad_norm": 0.9319602847099304, + "learning_rate": 4.962060271173223e-06, + "loss": 0.5986, + "step": 4626 + }, + { + "epoch": 0.34017056315247757, + "grad_norm": 0.8611686825752258, + "learning_rate": 4.962043546642538e-06, + "loss": 0.5246, + "step": 4627 + }, + { + "epoch": 0.34024408175268345, + "grad_norm": 0.8535488247871399, + "learning_rate": 4.962026818454624e-06, + "loss": 0.5447, + "step": 4628 + }, + { + "epoch": 0.34031760035288927, + "grad_norm": 0.8235706686973572, + "learning_rate": 4.962010086609503e-06, + "loss": 0.5534, + "step": 4629 + }, + { + "epoch": 0.34039111895309515, + "grad_norm": 0.9438055753707886, + "learning_rate": 4.9619933511072016e-06, + "loss": 0.5894, + "step": 4630 + }, + { + "epoch": 0.340464637553301, + "grad_norm": 0.8893630504608154, + "learning_rate": 4.961976611947743e-06, + "loss": 0.5754, + "step": 4631 + }, + { + "epoch": 0.34053815615350685, + "grad_norm": 0.9248397946357727, + "learning_rate": 4.961959869131153e-06, + "loss": 0.5617, + "step": 4632 + }, + { + "epoch": 0.3406116747537127, + "grad_norm": 0.8808282613754272, + "learning_rate": 4.961943122657458e-06, + "loss": 0.5713, + "step": 4633 + }, + { + "epoch": 0.34068519335391856, + "grad_norm": 0.8053242564201355, + "learning_rate": 4.961926372526679e-06, + "loss": 0.5537, + "step": 4634 + }, + { + "epoch": 0.3407587119541244, + "grad_norm": 0.8728961944580078, + "learning_rate": 4.961909618738845e-06, + "loss": 0.5644, + "step": 4635 + }, + { + "epoch": 0.34083223055433026, + "grad_norm": 0.8698500394821167, + "learning_rate": 4.961892861293977e-06, + "loss": 0.5547, + "step": 4636 + }, + { + "epoch": 0.3409057491545361, + "grad_norm": 0.8524126410484314, + "learning_rate": 4.961876100192104e-06, + "loss": 0.5894, + "step": 4637 + }, + { + "epoch": 0.34097926775474197, + "grad_norm": 0.9026541113853455, + "learning_rate": 4.9618593354332475e-06, + "loss": 0.5906, + "step": 4638 + }, + { + "epoch": 0.3410527863549478, + "grad_norm": 0.8278653621673584, + "learning_rate": 4.961842567017435e-06, + "loss": 0.5446, + "step": 4639 + }, + { + "epoch": 0.34112630495515367, + "grad_norm": 0.829285740852356, + "learning_rate": 4.96182579494469e-06, + "loss": 0.5797, + "step": 4640 + }, + { + "epoch": 0.3411998235553595, + "grad_norm": 0.9089422225952148, + "learning_rate": 4.961809019215037e-06, + "loss": 0.6385, + "step": 4641 + }, + { + "epoch": 0.3412733421555654, + "grad_norm": 0.8279508352279663, + "learning_rate": 4.9617922398285015e-06, + "loss": 0.5832, + "step": 4642 + }, + { + "epoch": 0.3413468607557712, + "grad_norm": 0.8627976775169373, + "learning_rate": 4.9617754567851085e-06, + "loss": 0.4941, + "step": 4643 + }, + { + "epoch": 0.3414203793559771, + "grad_norm": 0.8939419984817505, + "learning_rate": 4.961758670084883e-06, + "loss": 0.559, + "step": 4644 + }, + { + "epoch": 0.3414938979561829, + "grad_norm": 0.8597356677055359, + "learning_rate": 4.96174187972785e-06, + "loss": 0.5956, + "step": 4645 + }, + { + "epoch": 0.3415674165563888, + "grad_norm": 0.8452478051185608, + "learning_rate": 4.961725085714034e-06, + "loss": 0.5447, + "step": 4646 + }, + { + "epoch": 0.3416409351565946, + "grad_norm": 0.8829392790794373, + "learning_rate": 4.961708288043459e-06, + "loss": 0.5222, + "step": 4647 + }, + { + "epoch": 0.3417144537568005, + "grad_norm": 0.8377543687820435, + "learning_rate": 4.961691486716153e-06, + "loss": 0.554, + "step": 4648 + }, + { + "epoch": 0.3417879723570063, + "grad_norm": 0.8344792723655701, + "learning_rate": 4.961674681732138e-06, + "loss": 0.5828, + "step": 4649 + }, + { + "epoch": 0.3418614909572122, + "grad_norm": 0.910408616065979, + "learning_rate": 4.961657873091441e-06, + "loss": 0.5616, + "step": 4650 + }, + { + "epoch": 0.341935009557418, + "grad_norm": 0.9263701438903809, + "learning_rate": 4.961641060794085e-06, + "loss": 0.6146, + "step": 4651 + }, + { + "epoch": 0.3420085281576239, + "grad_norm": 0.8848537802696228, + "learning_rate": 4.961624244840096e-06, + "loss": 0.5741, + "step": 4652 + }, + { + "epoch": 0.3420820467578297, + "grad_norm": 0.869797945022583, + "learning_rate": 4.9616074252294996e-06, + "loss": 0.5734, + "step": 4653 + }, + { + "epoch": 0.3421555653580356, + "grad_norm": 0.8461419939994812, + "learning_rate": 4.96159060196232e-06, + "loss": 0.5989, + "step": 4654 + }, + { + "epoch": 0.3422290839582414, + "grad_norm": 0.8414024114608765, + "learning_rate": 4.961573775038583e-06, + "loss": 0.5436, + "step": 4655 + }, + { + "epoch": 0.3423026025584473, + "grad_norm": 0.9009036421775818, + "learning_rate": 4.961556944458312e-06, + "loss": 0.5818, + "step": 4656 + }, + { + "epoch": 0.3423761211586531, + "grad_norm": 0.8240041732788086, + "learning_rate": 4.961540110221533e-06, + "loss": 0.5498, + "step": 4657 + }, + { + "epoch": 0.342449639758859, + "grad_norm": 0.8489848971366882, + "learning_rate": 4.961523272328272e-06, + "loss": 0.5527, + "step": 4658 + }, + { + "epoch": 0.3425231583590648, + "grad_norm": 0.8339333534240723, + "learning_rate": 4.961506430778552e-06, + "loss": 0.5595, + "step": 4659 + }, + { + "epoch": 0.3425966769592707, + "grad_norm": 0.8599769473075867, + "learning_rate": 4.961489585572399e-06, + "loss": 0.59, + "step": 4660 + }, + { + "epoch": 0.34267019555947653, + "grad_norm": 0.8077322840690613, + "learning_rate": 4.961472736709838e-06, + "loss": 0.5968, + "step": 4661 + }, + { + "epoch": 0.3427437141596824, + "grad_norm": 0.9129850268363953, + "learning_rate": 4.9614558841908944e-06, + "loss": 0.5234, + "step": 4662 + }, + { + "epoch": 0.34281723275988824, + "grad_norm": 0.8598368167877197, + "learning_rate": 4.961439028015593e-06, + "loss": 0.5603, + "step": 4663 + }, + { + "epoch": 0.3428907513600941, + "grad_norm": 0.8428186178207397, + "learning_rate": 4.961422168183958e-06, + "loss": 0.5636, + "step": 4664 + }, + { + "epoch": 0.34296426996029994, + "grad_norm": 0.8958212733268738, + "learning_rate": 4.961405304696016e-06, + "loss": 0.5676, + "step": 4665 + }, + { + "epoch": 0.3430377885605058, + "grad_norm": 0.8199888467788696, + "learning_rate": 4.961388437551791e-06, + "loss": 0.5593, + "step": 4666 + }, + { + "epoch": 0.34311130716071164, + "grad_norm": 0.8699430227279663, + "learning_rate": 4.961371566751307e-06, + "loss": 0.568, + "step": 4667 + }, + { + "epoch": 0.3431848257609175, + "grad_norm": 0.8774440288543701, + "learning_rate": 4.961354692294592e-06, + "loss": 0.5946, + "step": 4668 + }, + { + "epoch": 0.34325834436112335, + "grad_norm": 0.8417625427246094, + "learning_rate": 4.961337814181669e-06, + "loss": 0.5255, + "step": 4669 + }, + { + "epoch": 0.3433318629613292, + "grad_norm": 0.853598952293396, + "learning_rate": 4.961320932412563e-06, + "loss": 0.5641, + "step": 4670 + }, + { + "epoch": 0.34340538156153505, + "grad_norm": 0.8371121287345886, + "learning_rate": 4.9613040469873e-06, + "loss": 0.5942, + "step": 4671 + }, + { + "epoch": 0.34347890016174093, + "grad_norm": 0.8147546052932739, + "learning_rate": 4.9612871579059044e-06, + "loss": 0.5403, + "step": 4672 + }, + { + "epoch": 0.34355241876194675, + "grad_norm": 0.9099727869033813, + "learning_rate": 4.961270265168401e-06, + "loss": 0.5359, + "step": 4673 + }, + { + "epoch": 0.34362593736215263, + "grad_norm": 0.8961179852485657, + "learning_rate": 4.961253368774815e-06, + "loss": 0.57, + "step": 4674 + }, + { + "epoch": 0.34369945596235846, + "grad_norm": 0.9132282733917236, + "learning_rate": 4.9612364687251735e-06, + "loss": 0.5781, + "step": 4675 + }, + { + "epoch": 0.34377297456256434, + "grad_norm": 0.8393197655677795, + "learning_rate": 4.961219565019499e-06, + "loss": 0.5685, + "step": 4676 + }, + { + "epoch": 0.34384649316277016, + "grad_norm": 0.8550254702568054, + "learning_rate": 4.961202657657818e-06, + "loss": 0.5603, + "step": 4677 + }, + { + "epoch": 0.34392001176297604, + "grad_norm": 0.8250532150268555, + "learning_rate": 4.961185746640155e-06, + "loss": 0.5634, + "step": 4678 + }, + { + "epoch": 0.34399353036318187, + "grad_norm": 0.8913498520851135, + "learning_rate": 4.961168831966535e-06, + "loss": 0.5922, + "step": 4679 + }, + { + "epoch": 0.34406704896338774, + "grad_norm": 0.9292895793914795, + "learning_rate": 4.9611519136369844e-06, + "loss": 0.6213, + "step": 4680 + }, + { + "epoch": 0.34414056756359357, + "grad_norm": 0.8360369205474854, + "learning_rate": 4.961134991651527e-06, + "loss": 0.5716, + "step": 4681 + }, + { + "epoch": 0.34421408616379945, + "grad_norm": 0.8782820701599121, + "learning_rate": 4.961118066010188e-06, + "loss": 0.5683, + "step": 4682 + }, + { + "epoch": 0.3442876047640053, + "grad_norm": 0.8592291474342346, + "learning_rate": 4.961101136712994e-06, + "loss": 0.568, + "step": 4683 + }, + { + "epoch": 0.34436112336421115, + "grad_norm": 0.8579493165016174, + "learning_rate": 4.9610842037599675e-06, + "loss": 0.5583, + "step": 4684 + }, + { + "epoch": 0.344434641964417, + "grad_norm": 0.8108445405960083, + "learning_rate": 4.961067267151136e-06, + "loss": 0.5643, + "step": 4685 + }, + { + "epoch": 0.34450816056462286, + "grad_norm": 0.8865973949432373, + "learning_rate": 4.961050326886523e-06, + "loss": 0.5871, + "step": 4686 + }, + { + "epoch": 0.3445816791648287, + "grad_norm": 0.8068916201591492, + "learning_rate": 4.961033382966155e-06, + "loss": 0.5531, + "step": 4687 + }, + { + "epoch": 0.34465519776503456, + "grad_norm": 0.8720250129699707, + "learning_rate": 4.961016435390057e-06, + "loss": 0.5701, + "step": 4688 + }, + { + "epoch": 0.3447287163652404, + "grad_norm": 0.9062375426292419, + "learning_rate": 4.9609994841582535e-06, + "loss": 0.6067, + "step": 4689 + }, + { + "epoch": 0.34480223496544626, + "grad_norm": 0.917977511882782, + "learning_rate": 4.96098252927077e-06, + "loss": 0.6057, + "step": 4690 + }, + { + "epoch": 0.3448757535656521, + "grad_norm": 0.9513715505599976, + "learning_rate": 4.9609655707276325e-06, + "loss": 0.6214, + "step": 4691 + }, + { + "epoch": 0.34494927216585797, + "grad_norm": 0.8375378847122192, + "learning_rate": 4.960948608528865e-06, + "loss": 0.5388, + "step": 4692 + }, + { + "epoch": 0.3450227907660638, + "grad_norm": 0.870511531829834, + "learning_rate": 4.960931642674493e-06, + "loss": 0.5682, + "step": 4693 + }, + { + "epoch": 0.34509630936626967, + "grad_norm": 0.8475044369697571, + "learning_rate": 4.960914673164542e-06, + "loss": 0.5564, + "step": 4694 + }, + { + "epoch": 0.3451698279664755, + "grad_norm": 0.9083751440048218, + "learning_rate": 4.960897699999036e-06, + "loss": 0.5606, + "step": 4695 + }, + { + "epoch": 0.3452433465666814, + "grad_norm": 0.8281998038291931, + "learning_rate": 4.960880723178002e-06, + "loss": 0.557, + "step": 4696 + }, + { + "epoch": 0.3453168651668872, + "grad_norm": 0.8156173229217529, + "learning_rate": 4.960863742701465e-06, + "loss": 0.4726, + "step": 4697 + }, + { + "epoch": 0.3453903837670931, + "grad_norm": 0.8452776670455933, + "learning_rate": 4.960846758569449e-06, + "loss": 0.5085, + "step": 4698 + }, + { + "epoch": 0.3454639023672989, + "grad_norm": 0.8611462116241455, + "learning_rate": 4.960829770781979e-06, + "loss": 0.5419, + "step": 4699 + }, + { + "epoch": 0.3455374209675048, + "grad_norm": 0.8306328654289246, + "learning_rate": 4.960812779339082e-06, + "loss": 0.5631, + "step": 4700 + }, + { + "epoch": 0.3456109395677106, + "grad_norm": 0.8326524496078491, + "learning_rate": 4.960795784240783e-06, + "loss": 0.5402, + "step": 4701 + }, + { + "epoch": 0.3456844581679165, + "grad_norm": 0.7894123196601868, + "learning_rate": 4.960778785487106e-06, + "loss": 0.5327, + "step": 4702 + }, + { + "epoch": 0.3457579767681223, + "grad_norm": 0.8797257542610168, + "learning_rate": 4.960761783078076e-06, + "loss": 0.5891, + "step": 4703 + }, + { + "epoch": 0.3458314953683282, + "grad_norm": 0.8865137696266174, + "learning_rate": 4.960744777013721e-06, + "loss": 0.6249, + "step": 4704 + }, + { + "epoch": 0.345905013968534, + "grad_norm": 0.8898247480392456, + "learning_rate": 4.960727767294064e-06, + "loss": 0.5403, + "step": 4705 + }, + { + "epoch": 0.3459785325687399, + "grad_norm": 0.825171947479248, + "learning_rate": 4.960710753919129e-06, + "loss": 0.5395, + "step": 4706 + }, + { + "epoch": 0.3460520511689457, + "grad_norm": 0.9037418365478516, + "learning_rate": 4.960693736888944e-06, + "loss": 0.6405, + "step": 4707 + }, + { + "epoch": 0.3461255697691516, + "grad_norm": 0.8181114196777344, + "learning_rate": 4.960676716203533e-06, + "loss": 0.5057, + "step": 4708 + }, + { + "epoch": 0.3461990883693574, + "grad_norm": 0.8201615810394287, + "learning_rate": 4.960659691862922e-06, + "loss": 0.5227, + "step": 4709 + }, + { + "epoch": 0.3462726069695633, + "grad_norm": 0.851142168045044, + "learning_rate": 4.9606426638671354e-06, + "loss": 0.5416, + "step": 4710 + }, + { + "epoch": 0.3463461255697691, + "grad_norm": 0.8698261380195618, + "learning_rate": 4.960625632216199e-06, + "loss": 0.5347, + "step": 4711 + }, + { + "epoch": 0.346419644169975, + "grad_norm": 0.8434862494468689, + "learning_rate": 4.960608596910138e-06, + "loss": 0.5286, + "step": 4712 + }, + { + "epoch": 0.34649316277018083, + "grad_norm": 0.871705174446106, + "learning_rate": 4.960591557948977e-06, + "loss": 0.5815, + "step": 4713 + }, + { + "epoch": 0.3465666813703867, + "grad_norm": 0.8427960872650146, + "learning_rate": 4.960574515332742e-06, + "loss": 0.5512, + "step": 4714 + }, + { + "epoch": 0.34664019997059253, + "grad_norm": 0.8599200248718262, + "learning_rate": 4.960557469061459e-06, + "loss": 0.5708, + "step": 4715 + }, + { + "epoch": 0.3467137185707984, + "grad_norm": 0.8815653324127197, + "learning_rate": 4.960540419135152e-06, + "loss": 0.5829, + "step": 4716 + }, + { + "epoch": 0.34678723717100424, + "grad_norm": 0.8979823589324951, + "learning_rate": 4.960523365553848e-06, + "loss": 0.5602, + "step": 4717 + }, + { + "epoch": 0.3468607557712101, + "grad_norm": 0.8457053899765015, + "learning_rate": 4.960506308317571e-06, + "loss": 0.5415, + "step": 4718 + }, + { + "epoch": 0.34693427437141594, + "grad_norm": 0.8094039559364319, + "learning_rate": 4.960489247426346e-06, + "loss": 0.5229, + "step": 4719 + }, + { + "epoch": 0.3470077929716218, + "grad_norm": 0.8723036050796509, + "learning_rate": 4.9604721828802e-06, + "loss": 0.6053, + "step": 4720 + }, + { + "epoch": 0.3470813115718277, + "grad_norm": 0.844852089881897, + "learning_rate": 4.960455114679156e-06, + "loss": 0.5911, + "step": 4721 + }, + { + "epoch": 0.3471548301720335, + "grad_norm": 0.8863261342048645, + "learning_rate": 4.96043804282324e-06, + "loss": 0.5809, + "step": 4722 + }, + { + "epoch": 0.3472283487722394, + "grad_norm": 0.8770760893821716, + "learning_rate": 4.96042096731248e-06, + "loss": 0.5616, + "step": 4723 + }, + { + "epoch": 0.3473018673724452, + "grad_norm": 0.9035050868988037, + "learning_rate": 4.960403888146899e-06, + "loss": 0.5728, + "step": 4724 + }, + { + "epoch": 0.3473753859726511, + "grad_norm": 0.8932836651802063, + "learning_rate": 4.960386805326522e-06, + "loss": 0.5645, + "step": 4725 + }, + { + "epoch": 0.34744890457285693, + "grad_norm": 0.8382366895675659, + "learning_rate": 4.960369718851375e-06, + "loss": 0.5547, + "step": 4726 + }, + { + "epoch": 0.3475224231730628, + "grad_norm": 0.79499751329422, + "learning_rate": 4.9603526287214845e-06, + "loss": 0.5545, + "step": 4727 + }, + { + "epoch": 0.34759594177326864, + "grad_norm": 0.8936932682991028, + "learning_rate": 4.9603355349368745e-06, + "loss": 0.6243, + "step": 4728 + }, + { + "epoch": 0.3476694603734745, + "grad_norm": 0.8711003661155701, + "learning_rate": 4.96031843749757e-06, + "loss": 0.5397, + "step": 4729 + }, + { + "epoch": 0.34774297897368034, + "grad_norm": 0.9002156257629395, + "learning_rate": 4.960301336403598e-06, + "loss": 0.5885, + "step": 4730 + }, + { + "epoch": 0.3478164975738862, + "grad_norm": 0.8726794719696045, + "learning_rate": 4.9602842316549825e-06, + "loss": 0.6067, + "step": 4731 + }, + { + "epoch": 0.34789001617409204, + "grad_norm": 0.8289440870285034, + "learning_rate": 4.96026712325175e-06, + "loss": 0.5907, + "step": 4732 + }, + { + "epoch": 0.3479635347742979, + "grad_norm": 0.8215957880020142, + "learning_rate": 4.9602500111939254e-06, + "loss": 0.5731, + "step": 4733 + }, + { + "epoch": 0.34803705337450375, + "grad_norm": 0.8734919428825378, + "learning_rate": 4.960232895481534e-06, + "loss": 0.5675, + "step": 4734 + }, + { + "epoch": 0.3481105719747096, + "grad_norm": 0.8412034511566162, + "learning_rate": 4.960215776114602e-06, + "loss": 0.5982, + "step": 4735 + }, + { + "epoch": 0.34818409057491545, + "grad_norm": 0.9290422797203064, + "learning_rate": 4.960198653093154e-06, + "loss": 0.6088, + "step": 4736 + }, + { + "epoch": 0.34825760917512133, + "grad_norm": 0.8983630537986755, + "learning_rate": 4.960181526417214e-06, + "loss": 0.6111, + "step": 4737 + }, + { + "epoch": 0.34833112777532715, + "grad_norm": 0.7926350831985474, + "learning_rate": 4.96016439608681e-06, + "loss": 0.5655, + "step": 4738 + }, + { + "epoch": 0.34840464637553303, + "grad_norm": 0.8838599920272827, + "learning_rate": 4.960147262101967e-06, + "loss": 0.6142, + "step": 4739 + }, + { + "epoch": 0.34847816497573886, + "grad_norm": 0.8647797107696533, + "learning_rate": 4.96013012446271e-06, + "loss": 0.5673, + "step": 4740 + }, + { + "epoch": 0.34855168357594474, + "grad_norm": 0.7914909720420837, + "learning_rate": 4.960112983169064e-06, + "loss": 0.5373, + "step": 4741 + }, + { + "epoch": 0.34862520217615056, + "grad_norm": 0.8492546081542969, + "learning_rate": 4.960095838221055e-06, + "loss": 0.552, + "step": 4742 + }, + { + "epoch": 0.34869872077635644, + "grad_norm": 0.81171715259552, + "learning_rate": 4.960078689618708e-06, + "loss": 0.5399, + "step": 4743 + }, + { + "epoch": 0.34877223937656227, + "grad_norm": 0.8709095120429993, + "learning_rate": 4.96006153736205e-06, + "loss": 0.5975, + "step": 4744 + }, + { + "epoch": 0.34884575797676814, + "grad_norm": 0.8689814209938049, + "learning_rate": 4.960044381451104e-06, + "loss": 0.5578, + "step": 4745 + }, + { + "epoch": 0.34891927657697397, + "grad_norm": 0.8547964096069336, + "learning_rate": 4.960027221885897e-06, + "loss": 0.5819, + "step": 4746 + }, + { + "epoch": 0.34899279517717985, + "grad_norm": 0.8496105074882507, + "learning_rate": 4.960010058666455e-06, + "loss": 0.5951, + "step": 4747 + }, + { + "epoch": 0.3490663137773857, + "grad_norm": 0.8754352331161499, + "learning_rate": 4.959992891792802e-06, + "loss": 0.5793, + "step": 4748 + }, + { + "epoch": 0.34913983237759155, + "grad_norm": 0.8575917482376099, + "learning_rate": 4.959975721264965e-06, + "loss": 0.5682, + "step": 4749 + }, + { + "epoch": 0.3492133509777974, + "grad_norm": 0.871071994304657, + "learning_rate": 4.959958547082968e-06, + "loss": 0.5391, + "step": 4750 + }, + { + "epoch": 0.34928686957800326, + "grad_norm": 0.8784207105636597, + "learning_rate": 4.9599413692468374e-06, + "loss": 0.5519, + "step": 4751 + }, + { + "epoch": 0.3493603881782091, + "grad_norm": 0.90694260597229, + "learning_rate": 4.959924187756598e-06, + "loss": 0.5401, + "step": 4752 + }, + { + "epoch": 0.34943390677841496, + "grad_norm": 0.8830965161323547, + "learning_rate": 4.959907002612277e-06, + "loss": 0.5899, + "step": 4753 + }, + { + "epoch": 0.3495074253786208, + "grad_norm": 0.904812753200531, + "learning_rate": 4.959889813813899e-06, + "loss": 0.6252, + "step": 4754 + }, + { + "epoch": 0.34958094397882666, + "grad_norm": 0.8274221420288086, + "learning_rate": 4.959872621361489e-06, + "loss": 0.5216, + "step": 4755 + }, + { + "epoch": 0.3496544625790325, + "grad_norm": 0.8852218389511108, + "learning_rate": 4.959855425255072e-06, + "loss": 0.5569, + "step": 4756 + }, + { + "epoch": 0.34972798117923837, + "grad_norm": 0.7986953854560852, + "learning_rate": 4.959838225494676e-06, + "loss": 0.5641, + "step": 4757 + }, + { + "epoch": 0.3498014997794442, + "grad_norm": 0.8448214530944824, + "learning_rate": 4.959821022080325e-06, + "loss": 0.5817, + "step": 4758 + }, + { + "epoch": 0.34987501837965007, + "grad_norm": 0.8719046711921692, + "learning_rate": 4.9598038150120434e-06, + "loss": 0.6087, + "step": 4759 + }, + { + "epoch": 0.3499485369798559, + "grad_norm": 0.8783541321754456, + "learning_rate": 4.959786604289858e-06, + "loss": 0.5765, + "step": 4760 + }, + { + "epoch": 0.3500220555800618, + "grad_norm": 0.9330359101295471, + "learning_rate": 4.9597693899137946e-06, + "loss": 0.6076, + "step": 4761 + }, + { + "epoch": 0.3500955741802676, + "grad_norm": 0.8466233611106873, + "learning_rate": 4.9597521718838795e-06, + "loss": 0.5496, + "step": 4762 + }, + { + "epoch": 0.3501690927804735, + "grad_norm": 0.9394394159317017, + "learning_rate": 4.959734950200135e-06, + "loss": 0.5833, + "step": 4763 + }, + { + "epoch": 0.3502426113806793, + "grad_norm": 0.8479570746421814, + "learning_rate": 4.959717724862591e-06, + "loss": 0.5912, + "step": 4764 + }, + { + "epoch": 0.3503161299808852, + "grad_norm": 0.8708478212356567, + "learning_rate": 4.95970049587127e-06, + "loss": 0.5975, + "step": 4765 + }, + { + "epoch": 0.350389648581091, + "grad_norm": 0.8779246211051941, + "learning_rate": 4.959683263226199e-06, + "loss": 0.6061, + "step": 4766 + }, + { + "epoch": 0.3504631671812969, + "grad_norm": 0.8282793164253235, + "learning_rate": 4.959666026927403e-06, + "loss": 0.5202, + "step": 4767 + }, + { + "epoch": 0.3505366857815027, + "grad_norm": 0.8227545022964478, + "learning_rate": 4.9596487869749075e-06, + "loss": 0.56, + "step": 4768 + }, + { + "epoch": 0.3506102043817086, + "grad_norm": 0.9249321222305298, + "learning_rate": 4.959631543368739e-06, + "loss": 0.6048, + "step": 4769 + }, + { + "epoch": 0.3506837229819144, + "grad_norm": 0.7725911140441895, + "learning_rate": 4.959614296108923e-06, + "loss": 0.5506, + "step": 4770 + }, + { + "epoch": 0.3507572415821203, + "grad_norm": 0.8533499836921692, + "learning_rate": 4.9595970451954834e-06, + "loss": 0.5249, + "step": 4771 + }, + { + "epoch": 0.3508307601823261, + "grad_norm": 0.8292264938354492, + "learning_rate": 4.959579790628447e-06, + "loss": 0.5523, + "step": 4772 + }, + { + "epoch": 0.350904278782532, + "grad_norm": 0.8204984068870544, + "learning_rate": 4.959562532407841e-06, + "loss": 0.5687, + "step": 4773 + }, + { + "epoch": 0.3509777973827378, + "grad_norm": 0.9175398945808411, + "learning_rate": 4.959545270533689e-06, + "loss": 0.6096, + "step": 4774 + }, + { + "epoch": 0.3510513159829437, + "grad_norm": 0.8413866758346558, + "learning_rate": 4.959528005006016e-06, + "loss": 0.56, + "step": 4775 + }, + { + "epoch": 0.3511248345831495, + "grad_norm": 0.8692787885665894, + "learning_rate": 4.95951073582485e-06, + "loss": 0.5072, + "step": 4776 + }, + { + "epoch": 0.3511983531833554, + "grad_norm": 0.8689632415771484, + "learning_rate": 4.9594934629902145e-06, + "loss": 0.5931, + "step": 4777 + }, + { + "epoch": 0.35127187178356123, + "grad_norm": 0.8935190439224243, + "learning_rate": 4.9594761865021376e-06, + "loss": 0.573, + "step": 4778 + }, + { + "epoch": 0.3513453903837671, + "grad_norm": 0.8436724543571472, + "learning_rate": 4.959458906360642e-06, + "loss": 0.5832, + "step": 4779 + }, + { + "epoch": 0.35141890898397293, + "grad_norm": 0.8765549659729004, + "learning_rate": 4.959441622565756e-06, + "loss": 0.6089, + "step": 4780 + }, + { + "epoch": 0.3514924275841788, + "grad_norm": 0.9100958704948425, + "learning_rate": 4.959424335117504e-06, + "loss": 0.562, + "step": 4781 + }, + { + "epoch": 0.35156594618438464, + "grad_norm": 0.7927442193031311, + "learning_rate": 4.9594070440159116e-06, + "loss": 0.5202, + "step": 4782 + }, + { + "epoch": 0.3516394647845905, + "grad_norm": 0.8148897886276245, + "learning_rate": 4.959389749261004e-06, + "loss": 0.6062, + "step": 4783 + }, + { + "epoch": 0.35171298338479634, + "grad_norm": 0.862069308757782, + "learning_rate": 4.959372450852808e-06, + "loss": 0.5673, + "step": 4784 + }, + { + "epoch": 0.3517865019850022, + "grad_norm": 0.8861299753189087, + "learning_rate": 4.95935514879135e-06, + "loss": 0.5833, + "step": 4785 + }, + { + "epoch": 0.35186002058520804, + "grad_norm": 0.9099246859550476, + "learning_rate": 4.959337843076653e-06, + "loss": 0.5933, + "step": 4786 + }, + { + "epoch": 0.3519335391854139, + "grad_norm": 0.8570312857627869, + "learning_rate": 4.9593205337087454e-06, + "loss": 0.5698, + "step": 4787 + }, + { + "epoch": 0.35200705778561975, + "grad_norm": 0.8738184571266174, + "learning_rate": 4.959303220687651e-06, + "loss": 0.5795, + "step": 4788 + }, + { + "epoch": 0.3520805763858256, + "grad_norm": 0.8624793291091919, + "learning_rate": 4.959285904013397e-06, + "loss": 0.5805, + "step": 4789 + }, + { + "epoch": 0.35215409498603145, + "grad_norm": 0.8654090762138367, + "learning_rate": 4.959268583686009e-06, + "loss": 0.579, + "step": 4790 + }, + { + "epoch": 0.35222761358623733, + "grad_norm": 0.8823844194412231, + "learning_rate": 4.959251259705511e-06, + "loss": 0.5495, + "step": 4791 + }, + { + "epoch": 0.35230113218644316, + "grad_norm": 0.8356195092201233, + "learning_rate": 4.959233932071931e-06, + "loss": 0.6018, + "step": 4792 + }, + { + "epoch": 0.35237465078664904, + "grad_norm": 0.8193452954292297, + "learning_rate": 4.959216600785293e-06, + "loss": 0.5171, + "step": 4793 + }, + { + "epoch": 0.35244816938685486, + "grad_norm": 0.8091182112693787, + "learning_rate": 4.959199265845623e-06, + "loss": 0.5062, + "step": 4794 + }, + { + "epoch": 0.35252168798706074, + "grad_norm": 0.9093305468559265, + "learning_rate": 4.959181927252947e-06, + "loss": 0.6069, + "step": 4795 + }, + { + "epoch": 0.35259520658726656, + "grad_norm": 0.8589522242546082, + "learning_rate": 4.959164585007293e-06, + "loss": 0.6017, + "step": 4796 + }, + { + "epoch": 0.35266872518747244, + "grad_norm": 0.8502134084701538, + "learning_rate": 4.959147239108682e-06, + "loss": 0.5721, + "step": 4797 + }, + { + "epoch": 0.35274224378767827, + "grad_norm": 0.8990846276283264, + "learning_rate": 4.9591298895571435e-06, + "loss": 0.583, + "step": 4798 + }, + { + "epoch": 0.35281576238788415, + "grad_norm": 0.8531656265258789, + "learning_rate": 4.959112536352702e-06, + "loss": 0.5416, + "step": 4799 + }, + { + "epoch": 0.35288928098808997, + "grad_norm": 0.8283219933509827, + "learning_rate": 4.959095179495385e-06, + "loss": 0.5834, + "step": 4800 + }, + { + "epoch": 0.35296279958829585, + "grad_norm": 0.8309706449508667, + "learning_rate": 4.959077818985215e-06, + "loss": 0.5636, + "step": 4801 + }, + { + "epoch": 0.3530363181885017, + "grad_norm": 0.8569040298461914, + "learning_rate": 4.9590604548222195e-06, + "loss": 0.5939, + "step": 4802 + }, + { + "epoch": 0.35310983678870755, + "grad_norm": 0.8581138253211975, + "learning_rate": 4.959043087006425e-06, + "loss": 0.5866, + "step": 4803 + }, + { + "epoch": 0.3531833553889134, + "grad_norm": 0.8631120920181274, + "learning_rate": 4.959025715537857e-06, + "loss": 0.5788, + "step": 4804 + }, + { + "epoch": 0.35325687398911926, + "grad_norm": 0.994267463684082, + "learning_rate": 4.95900834041654e-06, + "loss": 0.6318, + "step": 4805 + }, + { + "epoch": 0.3533303925893251, + "grad_norm": 0.8587363362312317, + "learning_rate": 4.958990961642502e-06, + "loss": 0.567, + "step": 4806 + }, + { + "epoch": 0.35340391118953096, + "grad_norm": 0.8905619978904724, + "learning_rate": 4.958973579215767e-06, + "loss": 0.576, + "step": 4807 + }, + { + "epoch": 0.3534774297897368, + "grad_norm": 0.8174751996994019, + "learning_rate": 4.9589561931363605e-06, + "loss": 0.5567, + "step": 4808 + }, + { + "epoch": 0.35355094838994267, + "grad_norm": 0.8598952889442444, + "learning_rate": 4.95893880340431e-06, + "loss": 0.558, + "step": 4809 + }, + { + "epoch": 0.3536244669901485, + "grad_norm": 0.8148205280303955, + "learning_rate": 4.958921410019641e-06, + "loss": 0.586, + "step": 4810 + }, + { + "epoch": 0.35369798559035437, + "grad_norm": 0.8659611344337463, + "learning_rate": 4.9589040129823785e-06, + "loss": 0.556, + "step": 4811 + }, + { + "epoch": 0.3537715041905602, + "grad_norm": 0.8508137464523315, + "learning_rate": 4.9588866122925485e-06, + "loss": 0.5757, + "step": 4812 + }, + { + "epoch": 0.3538450227907661, + "grad_norm": 0.8174759745597839, + "learning_rate": 4.958869207950178e-06, + "loss": 0.5483, + "step": 4813 + }, + { + "epoch": 0.3539185413909719, + "grad_norm": 0.8691043257713318, + "learning_rate": 4.958851799955291e-06, + "loss": 0.6018, + "step": 4814 + }, + { + "epoch": 0.3539920599911778, + "grad_norm": 0.8746324777603149, + "learning_rate": 4.958834388307914e-06, + "loss": 0.5807, + "step": 4815 + }, + { + "epoch": 0.3540655785913836, + "grad_norm": 0.8221780061721802, + "learning_rate": 4.958816973008073e-06, + "loss": 0.5848, + "step": 4816 + }, + { + "epoch": 0.3541390971915895, + "grad_norm": 0.9514843821525574, + "learning_rate": 4.9587995540557955e-06, + "loss": 0.6119, + "step": 4817 + }, + { + "epoch": 0.3542126157917953, + "grad_norm": 0.8976331353187561, + "learning_rate": 4.9587821314511055e-06, + "loss": 0.5505, + "step": 4818 + }, + { + "epoch": 0.3542861343920012, + "grad_norm": 0.839257001876831, + "learning_rate": 4.958764705194028e-06, + "loss": 0.5675, + "step": 4819 + }, + { + "epoch": 0.354359652992207, + "grad_norm": 0.8516917824745178, + "learning_rate": 4.958747275284591e-06, + "loss": 0.5317, + "step": 4820 + }, + { + "epoch": 0.3544331715924129, + "grad_norm": 0.8694362044334412, + "learning_rate": 4.9587298417228204e-06, + "loss": 0.6036, + "step": 4821 + }, + { + "epoch": 0.3545066901926187, + "grad_norm": 0.8336926102638245, + "learning_rate": 4.95871240450874e-06, + "loss": 0.5547, + "step": 4822 + }, + { + "epoch": 0.3545802087928246, + "grad_norm": 0.8698325753211975, + "learning_rate": 4.958694963642377e-06, + "loss": 0.6143, + "step": 4823 + }, + { + "epoch": 0.3546537273930304, + "grad_norm": 0.9147480726242065, + "learning_rate": 4.958677519123758e-06, + "loss": 0.5657, + "step": 4824 + }, + { + "epoch": 0.3547272459932363, + "grad_norm": 0.8355149030685425, + "learning_rate": 4.9586600709529085e-06, + "loss": 0.5608, + "step": 4825 + }, + { + "epoch": 0.3548007645934421, + "grad_norm": 0.8619258403778076, + "learning_rate": 4.9586426191298535e-06, + "loss": 0.5691, + "step": 4826 + }, + { + "epoch": 0.354874283193648, + "grad_norm": 0.8760502338409424, + "learning_rate": 4.9586251636546186e-06, + "loss": 0.5529, + "step": 4827 + }, + { + "epoch": 0.3549478017938538, + "grad_norm": 0.8500862121582031, + "learning_rate": 4.958607704527232e-06, + "loss": 0.5786, + "step": 4828 + }, + { + "epoch": 0.3550213203940597, + "grad_norm": 0.8870114684104919, + "learning_rate": 4.958590241747717e-06, + "loss": 0.6086, + "step": 4829 + }, + { + "epoch": 0.3550948389942655, + "grad_norm": 0.8315834403038025, + "learning_rate": 4.958572775316102e-06, + "loss": 0.5431, + "step": 4830 + }, + { + "epoch": 0.3551683575944714, + "grad_norm": 0.9370827078819275, + "learning_rate": 4.958555305232412e-06, + "loss": 0.588, + "step": 4831 + }, + { + "epoch": 0.35524187619467723, + "grad_norm": 0.8351330757141113, + "learning_rate": 4.958537831496672e-06, + "loss": 0.5707, + "step": 4832 + }, + { + "epoch": 0.3553153947948831, + "grad_norm": 0.8276867866516113, + "learning_rate": 4.958520354108909e-06, + "loss": 0.5113, + "step": 4833 + }, + { + "epoch": 0.35538891339508893, + "grad_norm": 0.8909245133399963, + "learning_rate": 4.958502873069148e-06, + "loss": 0.5999, + "step": 4834 + }, + { + "epoch": 0.3554624319952948, + "grad_norm": 0.816241443157196, + "learning_rate": 4.958485388377417e-06, + "loss": 0.5392, + "step": 4835 + }, + { + "epoch": 0.35553595059550064, + "grad_norm": 0.8863404393196106, + "learning_rate": 4.958467900033739e-06, + "loss": 0.5923, + "step": 4836 + }, + { + "epoch": 0.3556094691957065, + "grad_norm": 0.8801902532577515, + "learning_rate": 4.9584504080381425e-06, + "loss": 0.5637, + "step": 4837 + }, + { + "epoch": 0.35568298779591234, + "grad_norm": 0.947746992111206, + "learning_rate": 4.958432912390651e-06, + "loss": 0.5925, + "step": 4838 + }, + { + "epoch": 0.3557565063961182, + "grad_norm": 0.8756659626960754, + "learning_rate": 4.958415413091295e-06, + "loss": 0.5534, + "step": 4839 + }, + { + "epoch": 0.35583002499632405, + "grad_norm": 0.8432943224906921, + "learning_rate": 4.958397910140095e-06, + "loss": 0.557, + "step": 4840 + }, + { + "epoch": 0.3559035435965299, + "grad_norm": 0.8261925578117371, + "learning_rate": 4.95838040353708e-06, + "loss": 0.5547, + "step": 4841 + }, + { + "epoch": 0.35597706219673575, + "grad_norm": 0.8327315449714661, + "learning_rate": 4.958362893282276e-06, + "loss": 0.5538, + "step": 4842 + }, + { + "epoch": 0.35605058079694163, + "grad_norm": 0.8378610014915466, + "learning_rate": 4.958345379375708e-06, + "loss": 0.5743, + "step": 4843 + }, + { + "epoch": 0.35612409939714745, + "grad_norm": 0.8682479858398438, + "learning_rate": 4.9583278618174035e-06, + "loss": 0.5398, + "step": 4844 + }, + { + "epoch": 0.35619761799735333, + "grad_norm": 0.8242632746696472, + "learning_rate": 4.958310340607386e-06, + "loss": 0.5314, + "step": 4845 + }, + { + "epoch": 0.35627113659755916, + "grad_norm": 0.9169785380363464, + "learning_rate": 4.958292815745684e-06, + "loss": 0.6156, + "step": 4846 + }, + { + "epoch": 0.35634465519776504, + "grad_norm": 0.9067801833152771, + "learning_rate": 4.958275287232324e-06, + "loss": 0.5367, + "step": 4847 + }, + { + "epoch": 0.35641817379797086, + "grad_norm": 0.9014294147491455, + "learning_rate": 4.958257755067329e-06, + "loss": 0.5449, + "step": 4848 + }, + { + "epoch": 0.35649169239817674, + "grad_norm": 0.8377835750579834, + "learning_rate": 4.958240219250727e-06, + "loss": 0.5846, + "step": 4849 + }, + { + "epoch": 0.35656521099838256, + "grad_norm": 0.8336018919944763, + "learning_rate": 4.958222679782544e-06, + "loss": 0.5597, + "step": 4850 + }, + { + "epoch": 0.35663872959858844, + "grad_norm": 0.8808287382125854, + "learning_rate": 4.958205136662806e-06, + "loss": 0.5599, + "step": 4851 + }, + { + "epoch": 0.35671224819879427, + "grad_norm": 0.8526450991630554, + "learning_rate": 4.958187589891539e-06, + "loss": 0.5702, + "step": 4852 + }, + { + "epoch": 0.35678576679900015, + "grad_norm": 0.8737280368804932, + "learning_rate": 4.958170039468768e-06, + "loss": 0.5563, + "step": 4853 + }, + { + "epoch": 0.35685928539920597, + "grad_norm": 0.8537328243255615, + "learning_rate": 4.9581524853945206e-06, + "loss": 0.572, + "step": 4854 + }, + { + "epoch": 0.35693280399941185, + "grad_norm": 0.8285911679267883, + "learning_rate": 4.958134927668822e-06, + "loss": 0.5574, + "step": 4855 + }, + { + "epoch": 0.3570063225996177, + "grad_norm": 0.9540696740150452, + "learning_rate": 4.958117366291698e-06, + "loss": 0.5706, + "step": 4856 + }, + { + "epoch": 0.35707984119982356, + "grad_norm": 0.8211883306503296, + "learning_rate": 4.958099801263176e-06, + "loss": 0.5548, + "step": 4857 + }, + { + "epoch": 0.35715335980002944, + "grad_norm": 0.8380770683288574, + "learning_rate": 4.958082232583282e-06, + "loss": 0.5923, + "step": 4858 + }, + { + "epoch": 0.35722687840023526, + "grad_norm": 0.8917102813720703, + "learning_rate": 4.958064660252041e-06, + "loss": 0.5401, + "step": 4859 + }, + { + "epoch": 0.35730039700044114, + "grad_norm": 0.8298209309577942, + "learning_rate": 4.958047084269479e-06, + "loss": 0.5475, + "step": 4860 + }, + { + "epoch": 0.35737391560064696, + "grad_norm": 0.8731881380081177, + "learning_rate": 4.958029504635624e-06, + "loss": 0.5919, + "step": 4861 + }, + { + "epoch": 0.35744743420085284, + "grad_norm": 0.8209697604179382, + "learning_rate": 4.9580119213505e-06, + "loss": 0.5326, + "step": 4862 + }, + { + "epoch": 0.35752095280105867, + "grad_norm": 0.8813408613204956, + "learning_rate": 4.957994334414133e-06, + "loss": 0.5366, + "step": 4863 + }, + { + "epoch": 0.35759447140126455, + "grad_norm": 0.9075714349746704, + "learning_rate": 4.957976743826551e-06, + "loss": 0.5668, + "step": 4864 + }, + { + "epoch": 0.35766799000147037, + "grad_norm": 0.8776890635490417, + "learning_rate": 4.957959149587779e-06, + "loss": 0.5433, + "step": 4865 + }, + { + "epoch": 0.35774150860167625, + "grad_norm": 0.8866822123527527, + "learning_rate": 4.9579415516978426e-06, + "loss": 0.5814, + "step": 4866 + }, + { + "epoch": 0.3578150272018821, + "grad_norm": 0.8661695718765259, + "learning_rate": 4.957923950156769e-06, + "loss": 0.5585, + "step": 4867 + }, + { + "epoch": 0.35788854580208795, + "grad_norm": 0.8688331246376038, + "learning_rate": 4.957906344964584e-06, + "loss": 0.5617, + "step": 4868 + }, + { + "epoch": 0.3579620644022938, + "grad_norm": 0.8819311261177063, + "learning_rate": 4.957888736121315e-06, + "loss": 0.5611, + "step": 4869 + }, + { + "epoch": 0.35803558300249966, + "grad_norm": 0.9333789348602295, + "learning_rate": 4.957871123626985e-06, + "loss": 0.5666, + "step": 4870 + }, + { + "epoch": 0.3581091016027055, + "grad_norm": 0.9049641489982605, + "learning_rate": 4.9578535074816225e-06, + "loss": 0.5433, + "step": 4871 + }, + { + "epoch": 0.35818262020291136, + "grad_norm": 0.8655918836593628, + "learning_rate": 4.9578358876852535e-06, + "loss": 0.626, + "step": 4872 + }, + { + "epoch": 0.3582561388031172, + "grad_norm": 0.8723995685577393, + "learning_rate": 4.957818264237904e-06, + "loss": 0.5552, + "step": 4873 + }, + { + "epoch": 0.35832965740332307, + "grad_norm": 0.8568547368049622, + "learning_rate": 4.9578006371396e-06, + "loss": 0.572, + "step": 4874 + }, + { + "epoch": 0.3584031760035289, + "grad_norm": 0.8395042419433594, + "learning_rate": 4.957783006390368e-06, + "loss": 0.5851, + "step": 4875 + }, + { + "epoch": 0.35847669460373477, + "grad_norm": 0.8692488074302673, + "learning_rate": 4.957765371990233e-06, + "loss": 0.5837, + "step": 4876 + }, + { + "epoch": 0.3585502132039406, + "grad_norm": 0.9164255857467651, + "learning_rate": 4.9577477339392226e-06, + "loss": 0.6149, + "step": 4877 + }, + { + "epoch": 0.3586237318041465, + "grad_norm": 0.9415677785873413, + "learning_rate": 4.957730092237363e-06, + "loss": 0.6037, + "step": 4878 + }, + { + "epoch": 0.3586972504043523, + "grad_norm": 0.8504351377487183, + "learning_rate": 4.957712446884679e-06, + "loss": 0.5441, + "step": 4879 + }, + { + "epoch": 0.3587707690045582, + "grad_norm": 0.8696678876876831, + "learning_rate": 4.9576947978811985e-06, + "loss": 0.5814, + "step": 4880 + }, + { + "epoch": 0.358844287604764, + "grad_norm": 0.9319431781768799, + "learning_rate": 4.957677145226947e-06, + "loss": 0.606, + "step": 4881 + }, + { + "epoch": 0.3589178062049699, + "grad_norm": 0.9680397510528564, + "learning_rate": 4.95765948892195e-06, + "loss": 0.634, + "step": 4882 + }, + { + "epoch": 0.3589913248051757, + "grad_norm": 0.8703407645225525, + "learning_rate": 4.9576418289662345e-06, + "loss": 0.5717, + "step": 4883 + }, + { + "epoch": 0.3590648434053816, + "grad_norm": 0.8124736547470093, + "learning_rate": 4.957624165359827e-06, + "loss": 0.5028, + "step": 4884 + }, + { + "epoch": 0.3591383620055874, + "grad_norm": 0.8633534908294678, + "learning_rate": 4.957606498102754e-06, + "loss": 0.572, + "step": 4885 + }, + { + "epoch": 0.3592118806057933, + "grad_norm": 0.8533469438552856, + "learning_rate": 4.95758882719504e-06, + "loss": 0.5895, + "step": 4886 + }, + { + "epoch": 0.3592853992059991, + "grad_norm": 0.7905944585800171, + "learning_rate": 4.957571152636713e-06, + "loss": 0.5597, + "step": 4887 + }, + { + "epoch": 0.359358917806205, + "grad_norm": 0.8858564496040344, + "learning_rate": 4.957553474427798e-06, + "loss": 0.5672, + "step": 4888 + }, + { + "epoch": 0.3594324364064108, + "grad_norm": 0.8469855189323425, + "learning_rate": 4.9575357925683225e-06, + "loss": 0.5759, + "step": 4889 + }, + { + "epoch": 0.3595059550066167, + "grad_norm": 0.8362587690353394, + "learning_rate": 4.957518107058312e-06, + "loss": 0.5352, + "step": 4890 + }, + { + "epoch": 0.3595794736068225, + "grad_norm": 0.8341770172119141, + "learning_rate": 4.957500417897793e-06, + "loss": 0.5359, + "step": 4891 + }, + { + "epoch": 0.3596529922070284, + "grad_norm": 0.8813852071762085, + "learning_rate": 4.9574827250867915e-06, + "loss": 0.5531, + "step": 4892 + }, + { + "epoch": 0.3597265108072342, + "grad_norm": 0.8718481063842773, + "learning_rate": 4.957465028625335e-06, + "loss": 0.5737, + "step": 4893 + }, + { + "epoch": 0.3598000294074401, + "grad_norm": 0.8903034925460815, + "learning_rate": 4.9574473285134476e-06, + "loss": 0.6372, + "step": 4894 + }, + { + "epoch": 0.3598735480076459, + "grad_norm": 0.8104434013366699, + "learning_rate": 4.9574296247511565e-06, + "loss": 0.5436, + "step": 4895 + }, + { + "epoch": 0.3599470666078518, + "grad_norm": 0.8404643535614014, + "learning_rate": 4.957411917338488e-06, + "loss": 0.568, + "step": 4896 + }, + { + "epoch": 0.36002058520805763, + "grad_norm": 0.8737446665763855, + "learning_rate": 4.95739420627547e-06, + "loss": 0.6296, + "step": 4897 + }, + { + "epoch": 0.3600941038082635, + "grad_norm": 0.923463761806488, + "learning_rate": 4.957376491562127e-06, + "loss": 0.5814, + "step": 4898 + }, + { + "epoch": 0.36016762240846933, + "grad_norm": 0.8851578831672668, + "learning_rate": 4.957358773198486e-06, + "loss": 0.5888, + "step": 4899 + }, + { + "epoch": 0.3602411410086752, + "grad_norm": 0.868629515171051, + "learning_rate": 4.957341051184572e-06, + "loss": 0.5913, + "step": 4900 + }, + { + "epoch": 0.36031465960888104, + "grad_norm": 0.8559620380401611, + "learning_rate": 4.957323325520413e-06, + "loss": 0.5713, + "step": 4901 + }, + { + "epoch": 0.3603881782090869, + "grad_norm": 0.8324640393257141, + "learning_rate": 4.957305596206036e-06, + "loss": 0.5779, + "step": 4902 + }, + { + "epoch": 0.36046169680929274, + "grad_norm": 0.8292014598846436, + "learning_rate": 4.957287863241464e-06, + "loss": 0.5684, + "step": 4903 + }, + { + "epoch": 0.3605352154094986, + "grad_norm": 0.9016004800796509, + "learning_rate": 4.9572701266267275e-06, + "loss": 0.5372, + "step": 4904 + }, + { + "epoch": 0.36060873400970445, + "grad_norm": 0.8623924255371094, + "learning_rate": 4.95725238636185e-06, + "loss": 0.6051, + "step": 4905 + }, + { + "epoch": 0.3606822526099103, + "grad_norm": 0.8122285008430481, + "learning_rate": 4.957234642446858e-06, + "loss": 0.5695, + "step": 4906 + }, + { + "epoch": 0.36075577121011615, + "grad_norm": 0.836424708366394, + "learning_rate": 4.957216894881779e-06, + "loss": 0.5502, + "step": 4907 + }, + { + "epoch": 0.36082928981032203, + "grad_norm": 0.8507296442985535, + "learning_rate": 4.957199143666639e-06, + "loss": 0.5685, + "step": 4908 + }, + { + "epoch": 0.36090280841052785, + "grad_norm": 0.8522238731384277, + "learning_rate": 4.957181388801464e-06, + "loss": 0.5346, + "step": 4909 + }, + { + "epoch": 0.36097632701073373, + "grad_norm": 0.9078484773635864, + "learning_rate": 4.95716363028628e-06, + "loss": 0.5817, + "step": 4910 + }, + { + "epoch": 0.36104984561093956, + "grad_norm": 0.8268055319786072, + "learning_rate": 4.957145868121115e-06, + "loss": 0.545, + "step": 4911 + }, + { + "epoch": 0.36112336421114544, + "grad_norm": 0.887952446937561, + "learning_rate": 4.957128102305993e-06, + "loss": 0.5683, + "step": 4912 + }, + { + "epoch": 0.36119688281135126, + "grad_norm": 0.8707730770111084, + "learning_rate": 4.957110332840943e-06, + "loss": 0.5637, + "step": 4913 + }, + { + "epoch": 0.36127040141155714, + "grad_norm": 0.9506030678749084, + "learning_rate": 4.95709255972599e-06, + "loss": 0.5981, + "step": 4914 + }, + { + "epoch": 0.36134392001176296, + "grad_norm": 0.8291738629341125, + "learning_rate": 4.957074782961161e-06, + "loss": 0.5704, + "step": 4915 + }, + { + "epoch": 0.36141743861196884, + "grad_norm": 0.8035950064659119, + "learning_rate": 4.957057002546481e-06, + "loss": 0.5379, + "step": 4916 + }, + { + "epoch": 0.36149095721217467, + "grad_norm": 0.8694557547569275, + "learning_rate": 4.957039218481978e-06, + "loss": 0.5589, + "step": 4917 + }, + { + "epoch": 0.36156447581238055, + "grad_norm": 0.860905647277832, + "learning_rate": 4.957021430767677e-06, + "loss": 0.5624, + "step": 4918 + }, + { + "epoch": 0.36163799441258637, + "grad_norm": 0.8275445103645325, + "learning_rate": 4.957003639403606e-06, + "loss": 0.5487, + "step": 4919 + }, + { + "epoch": 0.36171151301279225, + "grad_norm": 0.8677213788032532, + "learning_rate": 4.95698584438979e-06, + "loss": 0.5849, + "step": 4920 + }, + { + "epoch": 0.3617850316129981, + "grad_norm": 0.9472433924674988, + "learning_rate": 4.956968045726256e-06, + "loss": 0.5768, + "step": 4921 + }, + { + "epoch": 0.36185855021320396, + "grad_norm": 0.8484727144241333, + "learning_rate": 4.956950243413031e-06, + "loss": 0.5479, + "step": 4922 + }, + { + "epoch": 0.3619320688134098, + "grad_norm": 0.8967534899711609, + "learning_rate": 4.956932437450142e-06, + "loss": 0.5844, + "step": 4923 + }, + { + "epoch": 0.36200558741361566, + "grad_norm": 0.8888144493103027, + "learning_rate": 4.956914627837612e-06, + "loss": 0.5496, + "step": 4924 + }, + { + "epoch": 0.3620791060138215, + "grad_norm": 0.7855153679847717, + "learning_rate": 4.956896814575471e-06, + "loss": 0.5462, + "step": 4925 + }, + { + "epoch": 0.36215262461402736, + "grad_norm": 0.8445549607276917, + "learning_rate": 4.956878997663746e-06, + "loss": 0.5709, + "step": 4926 + }, + { + "epoch": 0.3622261432142332, + "grad_norm": 0.9130795001983643, + "learning_rate": 4.95686117710246e-06, + "loss": 0.6124, + "step": 4927 + }, + { + "epoch": 0.36229966181443907, + "grad_norm": 0.8606483340263367, + "learning_rate": 4.956843352891641e-06, + "loss": 0.5862, + "step": 4928 + }, + { + "epoch": 0.3623731804146449, + "grad_norm": 0.8687755465507507, + "learning_rate": 4.9568255250313155e-06, + "loss": 0.6036, + "step": 4929 + }, + { + "epoch": 0.36244669901485077, + "grad_norm": 0.9437105655670166, + "learning_rate": 4.956807693521512e-06, + "loss": 0.6271, + "step": 4930 + }, + { + "epoch": 0.3625202176150566, + "grad_norm": 0.9248873591423035, + "learning_rate": 4.956789858362254e-06, + "loss": 0.5377, + "step": 4931 + }, + { + "epoch": 0.3625937362152625, + "grad_norm": 0.8946888446807861, + "learning_rate": 4.956772019553569e-06, + "loss": 0.5828, + "step": 4932 + }, + { + "epoch": 0.3626672548154683, + "grad_norm": 0.8675443530082703, + "learning_rate": 4.956754177095484e-06, + "loss": 0.5415, + "step": 4933 + }, + { + "epoch": 0.3627407734156742, + "grad_norm": 0.8378772735595703, + "learning_rate": 4.956736330988026e-06, + "loss": 0.5472, + "step": 4934 + }, + { + "epoch": 0.36281429201588, + "grad_norm": 0.856203019618988, + "learning_rate": 4.956718481231219e-06, + "loss": 0.5768, + "step": 4935 + }, + { + "epoch": 0.3628878106160859, + "grad_norm": 0.9756702184677124, + "learning_rate": 4.9567006278250924e-06, + "loss": 0.6275, + "step": 4936 + }, + { + "epoch": 0.3629613292162917, + "grad_norm": 0.8914369344711304, + "learning_rate": 4.956682770769671e-06, + "loss": 0.6058, + "step": 4937 + }, + { + "epoch": 0.3630348478164976, + "grad_norm": 0.8782329559326172, + "learning_rate": 4.956664910064982e-06, + "loss": 0.5915, + "step": 4938 + }, + { + "epoch": 0.3631083664167034, + "grad_norm": 0.8846394419670105, + "learning_rate": 4.956647045711052e-06, + "loss": 0.6016, + "step": 4939 + }, + { + "epoch": 0.3631818850169093, + "grad_norm": 0.8339295983314514, + "learning_rate": 4.956629177707908e-06, + "loss": 0.5576, + "step": 4940 + }, + { + "epoch": 0.3632554036171151, + "grad_norm": 0.8310378193855286, + "learning_rate": 4.956611306055575e-06, + "loss": 0.531, + "step": 4941 + }, + { + "epoch": 0.363328922217321, + "grad_norm": 0.8726720809936523, + "learning_rate": 4.956593430754081e-06, + "loss": 0.5877, + "step": 4942 + }, + { + "epoch": 0.3634024408175268, + "grad_norm": 0.9210278987884521, + "learning_rate": 4.956575551803451e-06, + "loss": 0.5296, + "step": 4943 + }, + { + "epoch": 0.3634759594177327, + "grad_norm": 0.8174000382423401, + "learning_rate": 4.956557669203713e-06, + "loss": 0.5292, + "step": 4944 + }, + { + "epoch": 0.3635494780179385, + "grad_norm": 0.8400081396102905, + "learning_rate": 4.956539782954894e-06, + "loss": 0.5779, + "step": 4945 + }, + { + "epoch": 0.3636229966181444, + "grad_norm": 0.8730660676956177, + "learning_rate": 4.9565218930570185e-06, + "loss": 0.6145, + "step": 4946 + }, + { + "epoch": 0.3636965152183502, + "grad_norm": 0.9193313717842102, + "learning_rate": 4.956503999510114e-06, + "loss": 0.604, + "step": 4947 + }, + { + "epoch": 0.3637700338185561, + "grad_norm": 0.8244699239730835, + "learning_rate": 4.956486102314208e-06, + "loss": 0.5216, + "step": 4948 + }, + { + "epoch": 0.36384355241876193, + "grad_norm": 0.8302149176597595, + "learning_rate": 4.956468201469327e-06, + "loss": 0.4962, + "step": 4949 + }, + { + "epoch": 0.3639170710189678, + "grad_norm": 0.8613237738609314, + "learning_rate": 4.956450296975496e-06, + "loss": 0.5824, + "step": 4950 + }, + { + "epoch": 0.36399058961917363, + "grad_norm": 0.8365382552146912, + "learning_rate": 4.9564323888327425e-06, + "loss": 0.5714, + "step": 4951 + }, + { + "epoch": 0.3640641082193795, + "grad_norm": 0.8445557951927185, + "learning_rate": 4.956414477041094e-06, + "loss": 0.5746, + "step": 4952 + }, + { + "epoch": 0.36413762681958534, + "grad_norm": 0.8528575897216797, + "learning_rate": 4.956396561600576e-06, + "loss": 0.5236, + "step": 4953 + }, + { + "epoch": 0.3642111454197912, + "grad_norm": 0.8679909110069275, + "learning_rate": 4.956378642511216e-06, + "loss": 0.6027, + "step": 4954 + }, + { + "epoch": 0.36428466401999704, + "grad_norm": 0.8250327110290527, + "learning_rate": 4.956360719773039e-06, + "loss": 0.5614, + "step": 4955 + }, + { + "epoch": 0.3643581826202029, + "grad_norm": 0.8347751498222351, + "learning_rate": 4.956342793386073e-06, + "loss": 0.5934, + "step": 4956 + }, + { + "epoch": 0.36443170122040874, + "grad_norm": 0.8678663969039917, + "learning_rate": 4.956324863350345e-06, + "loss": 0.5516, + "step": 4957 + }, + { + "epoch": 0.3645052198206146, + "grad_norm": 0.8511024117469788, + "learning_rate": 4.956306929665879e-06, + "loss": 0.5866, + "step": 4958 + }, + { + "epoch": 0.36457873842082045, + "grad_norm": 0.8184733390808105, + "learning_rate": 4.956288992332705e-06, + "loss": 0.5335, + "step": 4959 + }, + { + "epoch": 0.3646522570210263, + "grad_norm": 0.865011990070343, + "learning_rate": 4.956271051350848e-06, + "loss": 0.5887, + "step": 4960 + }, + { + "epoch": 0.36472577562123215, + "grad_norm": 0.8519481420516968, + "learning_rate": 4.956253106720336e-06, + "loss": 0.5266, + "step": 4961 + }, + { + "epoch": 0.36479929422143803, + "grad_norm": 0.8166230320930481, + "learning_rate": 4.956235158441193e-06, + "loss": 0.5438, + "step": 4962 + }, + { + "epoch": 0.36487281282164385, + "grad_norm": 0.8379392027854919, + "learning_rate": 4.956217206513448e-06, + "loss": 0.5567, + "step": 4963 + }, + { + "epoch": 0.36494633142184973, + "grad_norm": 0.8645915389060974, + "learning_rate": 4.956199250937126e-06, + "loss": 0.5491, + "step": 4964 + }, + { + "epoch": 0.36501985002205556, + "grad_norm": 0.8208361268043518, + "learning_rate": 4.956181291712254e-06, + "loss": 0.568, + "step": 4965 + }, + { + "epoch": 0.36509336862226144, + "grad_norm": 0.8815531730651855, + "learning_rate": 4.956163328838861e-06, + "loss": 0.5576, + "step": 4966 + }, + { + "epoch": 0.36516688722246726, + "grad_norm": 0.8530023694038391, + "learning_rate": 4.956145362316971e-06, + "loss": 0.576, + "step": 4967 + }, + { + "epoch": 0.36524040582267314, + "grad_norm": 0.8342961668968201, + "learning_rate": 4.956127392146611e-06, + "loss": 0.5535, + "step": 4968 + }, + { + "epoch": 0.36531392442287897, + "grad_norm": 0.8356738686561584, + "learning_rate": 4.956109418327809e-06, + "loss": 0.5833, + "step": 4969 + }, + { + "epoch": 0.36538744302308485, + "grad_norm": 0.8766274452209473, + "learning_rate": 4.956091440860591e-06, + "loss": 0.5794, + "step": 4970 + }, + { + "epoch": 0.36546096162329067, + "grad_norm": 0.9172833561897278, + "learning_rate": 4.9560734597449835e-06, + "loss": 0.6002, + "step": 4971 + }, + { + "epoch": 0.36553448022349655, + "grad_norm": 0.8497419357299805, + "learning_rate": 4.956055474981013e-06, + "loss": 0.576, + "step": 4972 + }, + { + "epoch": 0.3656079988237024, + "grad_norm": 0.9907213449478149, + "learning_rate": 4.956037486568707e-06, + "loss": 0.5638, + "step": 4973 + }, + { + "epoch": 0.36568151742390825, + "grad_norm": 0.8522736430168152, + "learning_rate": 4.956019494508091e-06, + "loss": 0.5625, + "step": 4974 + }, + { + "epoch": 0.3657550360241141, + "grad_norm": 0.905447244644165, + "learning_rate": 4.956001498799193e-06, + "loss": 0.5683, + "step": 4975 + }, + { + "epoch": 0.36582855462431996, + "grad_norm": 0.8762950301170349, + "learning_rate": 4.955983499442039e-06, + "loss": 0.5215, + "step": 4976 + }, + { + "epoch": 0.3659020732245258, + "grad_norm": 0.8858057260513306, + "learning_rate": 4.955965496436658e-06, + "loss": 0.5848, + "step": 4977 + }, + { + "epoch": 0.36597559182473166, + "grad_norm": 0.8405357599258423, + "learning_rate": 4.955947489783073e-06, + "loss": 0.5448, + "step": 4978 + }, + { + "epoch": 0.3660491104249375, + "grad_norm": 0.8165714144706726, + "learning_rate": 4.955929479481312e-06, + "loss": 0.5702, + "step": 4979 + }, + { + "epoch": 0.36612262902514336, + "grad_norm": 0.8537527918815613, + "learning_rate": 4.955911465531404e-06, + "loss": 0.5273, + "step": 4980 + }, + { + "epoch": 0.3661961476253492, + "grad_norm": 0.8199896216392517, + "learning_rate": 4.955893447933372e-06, + "loss": 0.5837, + "step": 4981 + }, + { + "epoch": 0.36626966622555507, + "grad_norm": 0.891456127166748, + "learning_rate": 4.955875426687245e-06, + "loss": 0.6267, + "step": 4982 + }, + { + "epoch": 0.3663431848257609, + "grad_norm": 0.8913585543632507, + "learning_rate": 4.955857401793051e-06, + "loss": 0.587, + "step": 4983 + }, + { + "epoch": 0.36641670342596677, + "grad_norm": 0.8285171389579773, + "learning_rate": 4.9558393732508145e-06, + "loss": 0.562, + "step": 4984 + }, + { + "epoch": 0.3664902220261726, + "grad_norm": 0.8428564667701721, + "learning_rate": 4.955821341060563e-06, + "loss": 0.5658, + "step": 4985 + }, + { + "epoch": 0.3665637406263785, + "grad_norm": 0.8616759777069092, + "learning_rate": 4.955803305222324e-06, + "loss": 0.5497, + "step": 4986 + }, + { + "epoch": 0.3666372592265843, + "grad_norm": 0.8716629147529602, + "learning_rate": 4.955785265736123e-06, + "loss": 0.5651, + "step": 4987 + }, + { + "epoch": 0.3667107778267902, + "grad_norm": 0.8508442044258118, + "learning_rate": 4.955767222601987e-06, + "loss": 0.562, + "step": 4988 + }, + { + "epoch": 0.366784296426996, + "grad_norm": 0.8889966011047363, + "learning_rate": 4.955749175819944e-06, + "loss": 0.5538, + "step": 4989 + }, + { + "epoch": 0.3668578150272019, + "grad_norm": 0.8544955253601074, + "learning_rate": 4.9557311253900205e-06, + "loss": 0.5892, + "step": 4990 + }, + { + "epoch": 0.3669313336274077, + "grad_norm": 0.9392870664596558, + "learning_rate": 4.955713071312242e-06, + "loss": 0.5893, + "step": 4991 + }, + { + "epoch": 0.3670048522276136, + "grad_norm": 0.9309095740318298, + "learning_rate": 4.955695013586637e-06, + "loss": 0.5851, + "step": 4992 + }, + { + "epoch": 0.3670783708278194, + "grad_norm": 0.8385493159294128, + "learning_rate": 4.9556769522132305e-06, + "loss": 0.583, + "step": 4993 + }, + { + "epoch": 0.3671518894280253, + "grad_norm": 0.8742950558662415, + "learning_rate": 4.955658887192051e-06, + "loss": 0.5583, + "step": 4994 + }, + { + "epoch": 0.3672254080282311, + "grad_norm": 0.8598806262016296, + "learning_rate": 4.955640818523125e-06, + "loss": 0.5488, + "step": 4995 + }, + { + "epoch": 0.367298926628437, + "grad_norm": 0.8626855611801147, + "learning_rate": 4.9556227462064784e-06, + "loss": 0.5673, + "step": 4996 + }, + { + "epoch": 0.3673724452286429, + "grad_norm": 0.8440122008323669, + "learning_rate": 4.955604670242139e-06, + "loss": 0.6033, + "step": 4997 + }, + { + "epoch": 0.3674459638288487, + "grad_norm": 0.8503528833389282, + "learning_rate": 4.9555865906301335e-06, + "loss": 0.5207, + "step": 4998 + }, + { + "epoch": 0.3675194824290546, + "grad_norm": 0.897754967212677, + "learning_rate": 4.955568507370488e-06, + "loss": 0.6159, + "step": 4999 + }, + { + "epoch": 0.3675930010292604, + "grad_norm": 0.8986088037490845, + "learning_rate": 4.95555042046323e-06, + "loss": 0.5894, + "step": 5000 + }, + { + "epoch": 0.3676665196294663, + "grad_norm": 0.9523294568061829, + "learning_rate": 4.955532329908387e-06, + "loss": 0.6294, + "step": 5001 + }, + { + "epoch": 0.3677400382296721, + "grad_norm": 0.8162776827812195, + "learning_rate": 4.9555142357059845e-06, + "loss": 0.5484, + "step": 5002 + }, + { + "epoch": 0.367813556829878, + "grad_norm": 0.8742413520812988, + "learning_rate": 4.9554961378560506e-06, + "loss": 0.5351, + "step": 5003 + }, + { + "epoch": 0.3678870754300838, + "grad_norm": 0.8594333529472351, + "learning_rate": 4.9554780363586115e-06, + "loss": 0.5395, + "step": 5004 + }, + { + "epoch": 0.3679605940302897, + "grad_norm": 0.8405804634094238, + "learning_rate": 4.955459931213694e-06, + "loss": 0.5874, + "step": 5005 + }, + { + "epoch": 0.3680341126304955, + "grad_norm": 0.8580273985862732, + "learning_rate": 4.9554418224213255e-06, + "loss": 0.6065, + "step": 5006 + }, + { + "epoch": 0.3681076312307014, + "grad_norm": 0.8298083543777466, + "learning_rate": 4.955423709981532e-06, + "loss": 0.5793, + "step": 5007 + }, + { + "epoch": 0.3681811498309072, + "grad_norm": 0.8374712467193604, + "learning_rate": 4.955405593894342e-06, + "loss": 0.5528, + "step": 5008 + }, + { + "epoch": 0.3682546684311131, + "grad_norm": 0.8218320608139038, + "learning_rate": 4.955387474159781e-06, + "loss": 0.5726, + "step": 5009 + }, + { + "epoch": 0.3683281870313189, + "grad_norm": 0.8727290034294128, + "learning_rate": 4.955369350777876e-06, + "loss": 0.5794, + "step": 5010 + }, + { + "epoch": 0.3684017056315248, + "grad_norm": 0.9451023936271667, + "learning_rate": 4.955351223748654e-06, + "loss": 0.6081, + "step": 5011 + }, + { + "epoch": 0.3684752242317306, + "grad_norm": 0.8916710019111633, + "learning_rate": 4.955333093072143e-06, + "loss": 0.5627, + "step": 5012 + }, + { + "epoch": 0.3685487428319365, + "grad_norm": 0.9353955388069153, + "learning_rate": 4.95531495874837e-06, + "loss": 0.5892, + "step": 5013 + }, + { + "epoch": 0.36862226143214233, + "grad_norm": 0.8593958020210266, + "learning_rate": 4.955296820777359e-06, + "loss": 0.5414, + "step": 5014 + }, + { + "epoch": 0.3686957800323482, + "grad_norm": 0.802415132522583, + "learning_rate": 4.95527867915914e-06, + "loss": 0.5844, + "step": 5015 + }, + { + "epoch": 0.36876929863255403, + "grad_norm": 0.8522468209266663, + "learning_rate": 4.95526053389374e-06, + "loss": 0.5468, + "step": 5016 + }, + { + "epoch": 0.3688428172327599, + "grad_norm": 0.8070512413978577, + "learning_rate": 4.9552423849811835e-06, + "loss": 0.5432, + "step": 5017 + }, + { + "epoch": 0.36891633583296574, + "grad_norm": 0.9051820635795593, + "learning_rate": 4.955224232421499e-06, + "loss": 0.5883, + "step": 5018 + }, + { + "epoch": 0.3689898544331716, + "grad_norm": 0.8936783671379089, + "learning_rate": 4.9552060762147136e-06, + "loss": 0.6001, + "step": 5019 + }, + { + "epoch": 0.36906337303337744, + "grad_norm": 0.8879299163818359, + "learning_rate": 4.955187916360854e-06, + "loss": 0.615, + "step": 5020 + }, + { + "epoch": 0.3691368916335833, + "grad_norm": 0.8616184592247009, + "learning_rate": 4.955169752859948e-06, + "loss": 0.5559, + "step": 5021 + }, + { + "epoch": 0.36921041023378914, + "grad_norm": 0.842745304107666, + "learning_rate": 4.955151585712021e-06, + "loss": 0.5486, + "step": 5022 + }, + { + "epoch": 0.369283928833995, + "grad_norm": 0.854925274848938, + "learning_rate": 4.9551334149171e-06, + "loss": 0.5717, + "step": 5023 + }, + { + "epoch": 0.36935744743420085, + "grad_norm": 0.847323477268219, + "learning_rate": 4.955115240475214e-06, + "loss": 0.5552, + "step": 5024 + }, + { + "epoch": 0.3694309660344067, + "grad_norm": 0.8436806201934814, + "learning_rate": 4.955097062386388e-06, + "loss": 0.5709, + "step": 5025 + }, + { + "epoch": 0.36950448463461255, + "grad_norm": 0.8781603574752808, + "learning_rate": 4.955078880650651e-06, + "loss": 0.5979, + "step": 5026 + }, + { + "epoch": 0.36957800323481843, + "grad_norm": 0.8219835162162781, + "learning_rate": 4.955060695268027e-06, + "loss": 0.5453, + "step": 5027 + }, + { + "epoch": 0.36965152183502425, + "grad_norm": 0.8852593302726746, + "learning_rate": 4.955042506238546e-06, + "loss": 0.562, + "step": 5028 + }, + { + "epoch": 0.36972504043523013, + "grad_norm": 0.8798519968986511, + "learning_rate": 4.955024313562233e-06, + "loss": 0.6111, + "step": 5029 + }, + { + "epoch": 0.36979855903543596, + "grad_norm": 0.8630296587944031, + "learning_rate": 4.955006117239116e-06, + "loss": 0.5606, + "step": 5030 + }, + { + "epoch": 0.36987207763564184, + "grad_norm": 0.9029631614685059, + "learning_rate": 4.954987917269223e-06, + "loss": 0.617, + "step": 5031 + }, + { + "epoch": 0.36994559623584766, + "grad_norm": 0.8840370178222656, + "learning_rate": 4.9549697136525785e-06, + "loss": 0.5743, + "step": 5032 + }, + { + "epoch": 0.37001911483605354, + "grad_norm": 0.844547688961029, + "learning_rate": 4.954951506389212e-06, + "loss": 0.5317, + "step": 5033 + }, + { + "epoch": 0.37009263343625937, + "grad_norm": 0.8404814600944519, + "learning_rate": 4.954933295479149e-06, + "loss": 0.5636, + "step": 5034 + }, + { + "epoch": 0.37016615203646525, + "grad_norm": 0.837090253829956, + "learning_rate": 4.954915080922416e-06, + "loss": 0.5551, + "step": 5035 + }, + { + "epoch": 0.37023967063667107, + "grad_norm": 0.8584856986999512, + "learning_rate": 4.954896862719042e-06, + "loss": 0.573, + "step": 5036 + }, + { + "epoch": 0.37031318923687695, + "grad_norm": 0.9117465615272522, + "learning_rate": 4.954878640869053e-06, + "loss": 0.5852, + "step": 5037 + }, + { + "epoch": 0.3703867078370828, + "grad_norm": 0.819515585899353, + "learning_rate": 4.954860415372477e-06, + "loss": 0.5501, + "step": 5038 + }, + { + "epoch": 0.37046022643728865, + "grad_norm": 0.8579872250556946, + "learning_rate": 4.954842186229339e-06, + "loss": 0.5543, + "step": 5039 + }, + { + "epoch": 0.3705337450374945, + "grad_norm": 0.8370821475982666, + "learning_rate": 4.954823953439668e-06, + "loss": 0.5432, + "step": 5040 + }, + { + "epoch": 0.37060726363770036, + "grad_norm": 0.854739785194397, + "learning_rate": 4.9548057170034905e-06, + "loss": 0.5709, + "step": 5041 + }, + { + "epoch": 0.3706807822379062, + "grad_norm": 0.8830294013023376, + "learning_rate": 4.954787476920833e-06, + "loss": 0.5242, + "step": 5042 + }, + { + "epoch": 0.37075430083811206, + "grad_norm": 0.8964335918426514, + "learning_rate": 4.954769233191723e-06, + "loss": 0.5994, + "step": 5043 + }, + { + "epoch": 0.3708278194383179, + "grad_norm": 0.8219721913337708, + "learning_rate": 4.954750985816188e-06, + "loss": 0.556, + "step": 5044 + }, + { + "epoch": 0.37090133803852376, + "grad_norm": 0.9025956988334656, + "learning_rate": 4.954732734794256e-06, + "loss": 0.5789, + "step": 5045 + }, + { + "epoch": 0.3709748566387296, + "grad_norm": 0.879093587398529, + "learning_rate": 4.9547144801259515e-06, + "loss": 0.6117, + "step": 5046 + }, + { + "epoch": 0.37104837523893547, + "grad_norm": 0.867308497428894, + "learning_rate": 4.954696221811303e-06, + "loss": 0.5895, + "step": 5047 + }, + { + "epoch": 0.3711218938391413, + "grad_norm": 0.8642439842224121, + "learning_rate": 4.954677959850339e-06, + "loss": 0.6004, + "step": 5048 + }, + { + "epoch": 0.37119541243934717, + "grad_norm": 0.8853713870048523, + "learning_rate": 4.954659694243085e-06, + "loss": 0.5793, + "step": 5049 + }, + { + "epoch": 0.371268931039553, + "grad_norm": 0.8590825796127319, + "learning_rate": 4.954641424989567e-06, + "loss": 0.5336, + "step": 5050 + }, + { + "epoch": 0.3713424496397589, + "grad_norm": 0.9169995188713074, + "learning_rate": 4.954623152089814e-06, + "loss": 0.5596, + "step": 5051 + }, + { + "epoch": 0.3714159682399647, + "grad_norm": 0.8179082274436951, + "learning_rate": 4.954604875543854e-06, + "loss": 0.5762, + "step": 5052 + }, + { + "epoch": 0.3714894868401706, + "grad_norm": 0.8681473135948181, + "learning_rate": 4.954586595351712e-06, + "loss": 0.5352, + "step": 5053 + }, + { + "epoch": 0.3715630054403764, + "grad_norm": 0.8335408568382263, + "learning_rate": 4.954568311513416e-06, + "loss": 0.4952, + "step": 5054 + }, + { + "epoch": 0.3716365240405823, + "grad_norm": 0.8773470520973206, + "learning_rate": 4.954550024028993e-06, + "loss": 0.5485, + "step": 5055 + }, + { + "epoch": 0.3717100426407881, + "grad_norm": 0.860826313495636, + "learning_rate": 4.954531732898471e-06, + "loss": 0.5405, + "step": 5056 + }, + { + "epoch": 0.371783561240994, + "grad_norm": 0.8434218168258667, + "learning_rate": 4.954513438121877e-06, + "loss": 0.564, + "step": 5057 + }, + { + "epoch": 0.3718570798411998, + "grad_norm": 0.8476212024688721, + "learning_rate": 4.954495139699237e-06, + "loss": 0.6057, + "step": 5058 + }, + { + "epoch": 0.3719305984414057, + "grad_norm": 0.8446148633956909, + "learning_rate": 4.954476837630578e-06, + "loss": 0.5576, + "step": 5059 + }, + { + "epoch": 0.3720041170416115, + "grad_norm": 0.8303256630897522, + "learning_rate": 4.954458531915929e-06, + "loss": 0.5064, + "step": 5060 + }, + { + "epoch": 0.3720776356418174, + "grad_norm": 0.8557532429695129, + "learning_rate": 4.954440222555316e-06, + "loss": 0.5738, + "step": 5061 + }, + { + "epoch": 0.3721511542420232, + "grad_norm": 0.822523832321167, + "learning_rate": 4.9544219095487665e-06, + "loss": 0.5467, + "step": 5062 + }, + { + "epoch": 0.3722246728422291, + "grad_norm": 0.8624863624572754, + "learning_rate": 4.954403592896307e-06, + "loss": 0.5388, + "step": 5063 + }, + { + "epoch": 0.3722981914424349, + "grad_norm": 0.8499675393104553, + "learning_rate": 4.954385272597967e-06, + "loss": 0.5612, + "step": 5064 + }, + { + "epoch": 0.3723717100426408, + "grad_norm": 0.8633657693862915, + "learning_rate": 4.954366948653771e-06, + "loss": 0.6341, + "step": 5065 + }, + { + "epoch": 0.3724452286428466, + "grad_norm": 0.8448708057403564, + "learning_rate": 4.954348621063747e-06, + "loss": 0.5409, + "step": 5066 + }, + { + "epoch": 0.3725187472430525, + "grad_norm": 0.8479748964309692, + "learning_rate": 4.954330289827923e-06, + "loss": 0.5771, + "step": 5067 + }, + { + "epoch": 0.37259226584325833, + "grad_norm": 0.8590689897537231, + "learning_rate": 4.954311954946326e-06, + "loss": 0.5788, + "step": 5068 + }, + { + "epoch": 0.3726657844434642, + "grad_norm": 0.8607152700424194, + "learning_rate": 4.954293616418982e-06, + "loss": 0.5364, + "step": 5069 + }, + { + "epoch": 0.37273930304367003, + "grad_norm": 0.8387811183929443, + "learning_rate": 4.954275274245921e-06, + "loss": 0.5561, + "step": 5070 + }, + { + "epoch": 0.3728128216438759, + "grad_norm": 0.8005693554878235, + "learning_rate": 4.954256928427167e-06, + "loss": 0.5368, + "step": 5071 + }, + { + "epoch": 0.37288634024408174, + "grad_norm": 0.9077394008636475, + "learning_rate": 4.9542385789627486e-06, + "loss": 0.5963, + "step": 5072 + }, + { + "epoch": 0.3729598588442876, + "grad_norm": 0.8723526000976562, + "learning_rate": 4.954220225852694e-06, + "loss": 0.5826, + "step": 5073 + }, + { + "epoch": 0.37303337744449344, + "grad_norm": 0.8354877233505249, + "learning_rate": 4.954201869097029e-06, + "loss": 0.5656, + "step": 5074 + }, + { + "epoch": 0.3731068960446993, + "grad_norm": 0.8807774782180786, + "learning_rate": 4.954183508695782e-06, + "loss": 0.5543, + "step": 5075 + }, + { + "epoch": 0.37318041464490515, + "grad_norm": 0.8575526475906372, + "learning_rate": 4.954165144648979e-06, + "loss": 0.563, + "step": 5076 + }, + { + "epoch": 0.373253933245111, + "grad_norm": 0.854086697101593, + "learning_rate": 4.954146776956649e-06, + "loss": 0.5679, + "step": 5077 + }, + { + "epoch": 0.37332745184531685, + "grad_norm": 0.8490797281265259, + "learning_rate": 4.954128405618818e-06, + "loss": 0.5812, + "step": 5078 + }, + { + "epoch": 0.37340097044552273, + "grad_norm": 0.858744204044342, + "learning_rate": 4.9541100306355135e-06, + "loss": 0.5846, + "step": 5079 + }, + { + "epoch": 0.37347448904572855, + "grad_norm": 0.8803591132164001, + "learning_rate": 4.954091652006764e-06, + "loss": 0.617, + "step": 5080 + }, + { + "epoch": 0.37354800764593443, + "grad_norm": 0.9032207131385803, + "learning_rate": 4.954073269732595e-06, + "loss": 0.5527, + "step": 5081 + }, + { + "epoch": 0.37362152624614026, + "grad_norm": 0.8578789830207825, + "learning_rate": 4.9540548838130335e-06, + "loss": 0.5337, + "step": 5082 + }, + { + "epoch": 0.37369504484634614, + "grad_norm": 0.8074519038200378, + "learning_rate": 4.954036494248109e-06, + "loss": 0.5693, + "step": 5083 + }, + { + "epoch": 0.37376856344655196, + "grad_norm": 0.9015600085258484, + "learning_rate": 4.954018101037848e-06, + "loss": 0.5513, + "step": 5084 + }, + { + "epoch": 0.37384208204675784, + "grad_norm": 0.8945223093032837, + "learning_rate": 4.9539997041822765e-06, + "loss": 0.5896, + "step": 5085 + }, + { + "epoch": 0.37391560064696366, + "grad_norm": 0.8637062311172485, + "learning_rate": 4.9539813036814236e-06, + "loss": 0.5454, + "step": 5086 + }, + { + "epoch": 0.37398911924716954, + "grad_norm": 0.8347316980361938, + "learning_rate": 4.953962899535315e-06, + "loss": 0.5472, + "step": 5087 + }, + { + "epoch": 0.37406263784737537, + "grad_norm": 0.874896764755249, + "learning_rate": 4.953944491743979e-06, + "loss": 0.6071, + "step": 5088 + }, + { + "epoch": 0.37413615644758125, + "grad_norm": 0.8378825783729553, + "learning_rate": 4.953926080307443e-06, + "loss": 0.5387, + "step": 5089 + }, + { + "epoch": 0.37420967504778707, + "grad_norm": 0.8227342367172241, + "learning_rate": 4.9539076652257346e-06, + "loss": 0.5719, + "step": 5090 + }, + { + "epoch": 0.37428319364799295, + "grad_norm": 0.8996040225028992, + "learning_rate": 4.953889246498881e-06, + "loss": 0.5755, + "step": 5091 + }, + { + "epoch": 0.3743567122481988, + "grad_norm": 0.8083701729774475, + "learning_rate": 4.9538708241269086e-06, + "loss": 0.5566, + "step": 5092 + }, + { + "epoch": 0.37443023084840465, + "grad_norm": 0.8478320837020874, + "learning_rate": 4.953852398109845e-06, + "loss": 0.5434, + "step": 5093 + }, + { + "epoch": 0.3745037494486105, + "grad_norm": 0.8231631517410278, + "learning_rate": 4.953833968447719e-06, + "loss": 0.5339, + "step": 5094 + }, + { + "epoch": 0.37457726804881636, + "grad_norm": 0.8599284291267395, + "learning_rate": 4.953815535140557e-06, + "loss": 0.582, + "step": 5095 + }, + { + "epoch": 0.3746507866490222, + "grad_norm": 0.8423525094985962, + "learning_rate": 4.953797098188385e-06, + "loss": 0.5776, + "step": 5096 + }, + { + "epoch": 0.37472430524922806, + "grad_norm": 0.8684161305427551, + "learning_rate": 4.953778657591233e-06, + "loss": 0.5257, + "step": 5097 + }, + { + "epoch": 0.3747978238494339, + "grad_norm": 0.8960159420967102, + "learning_rate": 4.953760213349128e-06, + "loss": 0.5682, + "step": 5098 + }, + { + "epoch": 0.37487134244963977, + "grad_norm": 0.8761608004570007, + "learning_rate": 4.953741765462095e-06, + "loss": 0.5897, + "step": 5099 + }, + { + "epoch": 0.3749448610498456, + "grad_norm": 0.8882933855056763, + "learning_rate": 4.953723313930163e-06, + "loss": 0.5633, + "step": 5100 + }, + { + "epoch": 0.37501837965005147, + "grad_norm": 0.8905074596405029, + "learning_rate": 4.9537048587533606e-06, + "loss": 0.601, + "step": 5101 + }, + { + "epoch": 0.3750918982502573, + "grad_norm": 0.8288078904151917, + "learning_rate": 4.953686399931713e-06, + "loss": 0.5561, + "step": 5102 + }, + { + "epoch": 0.3751654168504632, + "grad_norm": 0.8342600464820862, + "learning_rate": 4.953667937465249e-06, + "loss": 0.545, + "step": 5103 + }, + { + "epoch": 0.375238935450669, + "grad_norm": 0.9437768459320068, + "learning_rate": 4.953649471353995e-06, + "loss": 0.5637, + "step": 5104 + }, + { + "epoch": 0.3753124540508749, + "grad_norm": 0.8513747453689575, + "learning_rate": 4.95363100159798e-06, + "loss": 0.5552, + "step": 5105 + }, + { + "epoch": 0.3753859726510807, + "grad_norm": 0.8386257886886597, + "learning_rate": 4.95361252819723e-06, + "loss": 0.6033, + "step": 5106 + }, + { + "epoch": 0.3754594912512866, + "grad_norm": 0.8883077502250671, + "learning_rate": 4.953594051151773e-06, + "loss": 0.6111, + "step": 5107 + }, + { + "epoch": 0.3755330098514924, + "grad_norm": 0.8495038747787476, + "learning_rate": 4.953575570461638e-06, + "loss": 0.5828, + "step": 5108 + }, + { + "epoch": 0.3756065284516983, + "grad_norm": 0.8491619229316711, + "learning_rate": 4.953557086126848e-06, + "loss": 0.5999, + "step": 5109 + }, + { + "epoch": 0.3756800470519041, + "grad_norm": 0.8894087672233582, + "learning_rate": 4.953538598147435e-06, + "loss": 0.6103, + "step": 5110 + }, + { + "epoch": 0.37575356565211, + "grad_norm": 0.8100324273109436, + "learning_rate": 4.953520106523425e-06, + "loss": 0.5322, + "step": 5111 + }, + { + "epoch": 0.3758270842523158, + "grad_norm": 0.8620578646659851, + "learning_rate": 4.953501611254844e-06, + "loss": 0.5508, + "step": 5112 + }, + { + "epoch": 0.3759006028525217, + "grad_norm": 0.8795405626296997, + "learning_rate": 4.953483112341722e-06, + "loss": 0.5858, + "step": 5113 + }, + { + "epoch": 0.3759741214527275, + "grad_norm": 0.8367680907249451, + "learning_rate": 4.953464609784084e-06, + "loss": 0.5216, + "step": 5114 + }, + { + "epoch": 0.3760476400529334, + "grad_norm": 0.784511148929596, + "learning_rate": 4.953446103581959e-06, + "loss": 0.5677, + "step": 5115 + }, + { + "epoch": 0.3761211586531392, + "grad_norm": 0.8917108774185181, + "learning_rate": 4.953427593735374e-06, + "loss": 0.5967, + "step": 5116 + }, + { + "epoch": 0.3761946772533451, + "grad_norm": 0.8121073246002197, + "learning_rate": 4.953409080244358e-06, + "loss": 0.513, + "step": 5117 + }, + { + "epoch": 0.3762681958535509, + "grad_norm": 0.8341931104660034, + "learning_rate": 4.953390563108935e-06, + "loss": 0.5749, + "step": 5118 + }, + { + "epoch": 0.3763417144537568, + "grad_norm": 0.8302233219146729, + "learning_rate": 4.9533720423291366e-06, + "loss": 0.5454, + "step": 5119 + }, + { + "epoch": 0.37641523305396263, + "grad_norm": 0.844351053237915, + "learning_rate": 4.9533535179049875e-06, + "loss": 0.5583, + "step": 5120 + }, + { + "epoch": 0.3764887516541685, + "grad_norm": 0.8448929786682129, + "learning_rate": 4.953334989836516e-06, + "loss": 0.5736, + "step": 5121 + }, + { + "epoch": 0.37656227025437433, + "grad_norm": 0.8412840366363525, + "learning_rate": 4.9533164581237505e-06, + "loss": 0.5373, + "step": 5122 + }, + { + "epoch": 0.3766357888545802, + "grad_norm": 0.863577663898468, + "learning_rate": 4.953297922766717e-06, + "loss": 0.5727, + "step": 5123 + }, + { + "epoch": 0.37670930745478604, + "grad_norm": 0.8760634660720825, + "learning_rate": 4.953279383765443e-06, + "loss": 0.6035, + "step": 5124 + }, + { + "epoch": 0.3767828260549919, + "grad_norm": 0.8882340788841248, + "learning_rate": 4.953260841119958e-06, + "loss": 0.6067, + "step": 5125 + }, + { + "epoch": 0.37685634465519774, + "grad_norm": 0.866111695766449, + "learning_rate": 4.9532422948302886e-06, + "loss": 0.5416, + "step": 5126 + }, + { + "epoch": 0.3769298632554036, + "grad_norm": 0.875210702419281, + "learning_rate": 4.9532237448964616e-06, + "loss": 0.5777, + "step": 5127 + }, + { + "epoch": 0.37700338185560944, + "grad_norm": 0.8238228559494019, + "learning_rate": 4.953205191318505e-06, + "loss": 0.5799, + "step": 5128 + }, + { + "epoch": 0.3770769004558153, + "grad_norm": 0.9534255862236023, + "learning_rate": 4.9531866340964464e-06, + "loss": 0.5355, + "step": 5129 + }, + { + "epoch": 0.37715041905602115, + "grad_norm": 0.8899615406990051, + "learning_rate": 4.953168073230313e-06, + "loss": 0.5542, + "step": 5130 + }, + { + "epoch": 0.377223937656227, + "grad_norm": 0.8356230854988098, + "learning_rate": 4.9531495087201335e-06, + "loss": 0.5587, + "step": 5131 + }, + { + "epoch": 0.37729745625643285, + "grad_norm": 0.7989831566810608, + "learning_rate": 4.953130940565934e-06, + "loss": 0.5174, + "step": 5132 + }, + { + "epoch": 0.37737097485663873, + "grad_norm": 0.8980981707572937, + "learning_rate": 4.953112368767744e-06, + "loss": 0.5276, + "step": 5133 + }, + { + "epoch": 0.37744449345684455, + "grad_norm": 0.824948787689209, + "learning_rate": 4.953093793325589e-06, + "loss": 0.568, + "step": 5134 + }, + { + "epoch": 0.37751801205705043, + "grad_norm": 0.9047276973724365, + "learning_rate": 4.953075214239498e-06, + "loss": 0.6207, + "step": 5135 + }, + { + "epoch": 0.3775915306572563, + "grad_norm": 0.842512845993042, + "learning_rate": 4.953056631509497e-06, + "loss": 0.5562, + "step": 5136 + }, + { + "epoch": 0.37766504925746214, + "grad_norm": 0.8752496838569641, + "learning_rate": 4.953038045135616e-06, + "loss": 0.5894, + "step": 5137 + }, + { + "epoch": 0.377738567857668, + "grad_norm": 0.8450636267662048, + "learning_rate": 4.9530194551178805e-06, + "loss": 0.5361, + "step": 5138 + }, + { + "epoch": 0.37781208645787384, + "grad_norm": 0.8650813698768616, + "learning_rate": 4.95300086145632e-06, + "loss": 0.5502, + "step": 5139 + }, + { + "epoch": 0.3778856050580797, + "grad_norm": 0.8753524422645569, + "learning_rate": 4.95298226415096e-06, + "loss": 0.6057, + "step": 5140 + }, + { + "epoch": 0.37795912365828555, + "grad_norm": 0.8399566411972046, + "learning_rate": 4.952963663201829e-06, + "loss": 0.545, + "step": 5141 + }, + { + "epoch": 0.3780326422584914, + "grad_norm": 0.8901600241661072, + "learning_rate": 4.952945058608956e-06, + "loss": 0.5496, + "step": 5142 + }, + { + "epoch": 0.37810616085869725, + "grad_norm": 0.8686105608940125, + "learning_rate": 4.952926450372366e-06, + "loss": 0.6154, + "step": 5143 + }, + { + "epoch": 0.37817967945890313, + "grad_norm": 0.8577929735183716, + "learning_rate": 4.952907838492088e-06, + "loss": 0.5286, + "step": 5144 + }, + { + "epoch": 0.37825319805910895, + "grad_norm": 0.8657265901565552, + "learning_rate": 4.952889222968152e-06, + "loss": 0.5286, + "step": 5145 + }, + { + "epoch": 0.37832671665931483, + "grad_norm": 0.8172129392623901, + "learning_rate": 4.9528706038005815e-06, + "loss": 0.5591, + "step": 5146 + }, + { + "epoch": 0.37840023525952066, + "grad_norm": 0.8156399726867676, + "learning_rate": 4.952851980989406e-06, + "loss": 0.5621, + "step": 5147 + }, + { + "epoch": 0.37847375385972654, + "grad_norm": 0.8342336416244507, + "learning_rate": 4.952833354534654e-06, + "loss": 0.5258, + "step": 5148 + }, + { + "epoch": 0.37854727245993236, + "grad_norm": 0.8936564326286316, + "learning_rate": 4.9528147244363514e-06, + "loss": 0.6031, + "step": 5149 + }, + { + "epoch": 0.37862079106013824, + "grad_norm": 0.8717545866966248, + "learning_rate": 4.9527960906945275e-06, + "loss": 0.5285, + "step": 5150 + }, + { + "epoch": 0.37869430966034406, + "grad_norm": 0.888857901096344, + "learning_rate": 4.952777453309209e-06, + "loss": 0.6195, + "step": 5151 + }, + { + "epoch": 0.37876782826054994, + "grad_norm": 0.8508867025375366, + "learning_rate": 4.952758812280424e-06, + "loss": 0.5254, + "step": 5152 + }, + { + "epoch": 0.37884134686075577, + "grad_norm": 0.8872334957122803, + "learning_rate": 4.9527401676082e-06, + "loss": 0.6254, + "step": 5153 + }, + { + "epoch": 0.37891486546096165, + "grad_norm": 0.8452281355857849, + "learning_rate": 4.952721519292565e-06, + "loss": 0.5434, + "step": 5154 + }, + { + "epoch": 0.37898838406116747, + "grad_norm": 0.8144627213478088, + "learning_rate": 4.952702867333547e-06, + "loss": 0.5885, + "step": 5155 + }, + { + "epoch": 0.37906190266137335, + "grad_norm": 0.8506197929382324, + "learning_rate": 4.952684211731172e-06, + "loss": 0.5732, + "step": 5156 + }, + { + "epoch": 0.3791354212615792, + "grad_norm": 0.8699508309364319, + "learning_rate": 4.95266555248547e-06, + "loss": 0.5847, + "step": 5157 + }, + { + "epoch": 0.37920893986178505, + "grad_norm": 0.838924765586853, + "learning_rate": 4.952646889596468e-06, + "loss": 0.5326, + "step": 5158 + }, + { + "epoch": 0.3792824584619909, + "grad_norm": 0.8188021183013916, + "learning_rate": 4.952628223064192e-06, + "loss": 0.5093, + "step": 5159 + }, + { + "epoch": 0.37935597706219676, + "grad_norm": 0.888565182685852, + "learning_rate": 4.952609552888671e-06, + "loss": 0.6147, + "step": 5160 + }, + { + "epoch": 0.3794294956624026, + "grad_norm": 0.871396005153656, + "learning_rate": 4.952590879069934e-06, + "loss": 0.5643, + "step": 5161 + }, + { + "epoch": 0.37950301426260846, + "grad_norm": 0.875594973564148, + "learning_rate": 4.952572201608007e-06, + "loss": 0.5575, + "step": 5162 + }, + { + "epoch": 0.3795765328628143, + "grad_norm": 0.816775381565094, + "learning_rate": 4.952553520502918e-06, + "loss": 0.5409, + "step": 5163 + }, + { + "epoch": 0.37965005146302017, + "grad_norm": 0.8575699925422668, + "learning_rate": 4.952534835754695e-06, + "loss": 0.5206, + "step": 5164 + }, + { + "epoch": 0.379723570063226, + "grad_norm": 0.8391170501708984, + "learning_rate": 4.952516147363367e-06, + "loss": 0.5807, + "step": 5165 + }, + { + "epoch": 0.37979708866343187, + "grad_norm": 0.7918140888214111, + "learning_rate": 4.952497455328959e-06, + "loss": 0.5168, + "step": 5166 + }, + { + "epoch": 0.3798706072636377, + "grad_norm": 0.8355668187141418, + "learning_rate": 4.9524787596515015e-06, + "loss": 0.5401, + "step": 5167 + }, + { + "epoch": 0.3799441258638436, + "grad_norm": 0.9224867224693298, + "learning_rate": 4.9524600603310205e-06, + "loss": 0.5505, + "step": 5168 + }, + { + "epoch": 0.3800176444640494, + "grad_norm": 0.8877074122428894, + "learning_rate": 4.952441357367545e-06, + "loss": 0.574, + "step": 5169 + }, + { + "epoch": 0.3800911630642553, + "grad_norm": 0.8733975291252136, + "learning_rate": 4.952422650761101e-06, + "loss": 0.5864, + "step": 5170 + }, + { + "epoch": 0.3801646816644611, + "grad_norm": 0.8948168754577637, + "learning_rate": 4.952403940511718e-06, + "loss": 0.5455, + "step": 5171 + }, + { + "epoch": 0.380238200264667, + "grad_norm": 0.910416305065155, + "learning_rate": 4.952385226619423e-06, + "loss": 0.5547, + "step": 5172 + }, + { + "epoch": 0.3803117188648728, + "grad_norm": 0.8628091216087341, + "learning_rate": 4.952366509084244e-06, + "loss": 0.5574, + "step": 5173 + }, + { + "epoch": 0.3803852374650787, + "grad_norm": 0.8473021388053894, + "learning_rate": 4.95234778790621e-06, + "loss": 0.5466, + "step": 5174 + }, + { + "epoch": 0.3804587560652845, + "grad_norm": 0.8247257471084595, + "learning_rate": 4.952329063085346e-06, + "loss": 0.5381, + "step": 5175 + }, + { + "epoch": 0.3805322746654904, + "grad_norm": 0.8508352041244507, + "learning_rate": 4.952310334621683e-06, + "loss": 0.5437, + "step": 5176 + }, + { + "epoch": 0.3806057932656962, + "grad_norm": 0.8785255551338196, + "learning_rate": 4.9522916025152466e-06, + "loss": 0.5741, + "step": 5177 + }, + { + "epoch": 0.3806793118659021, + "grad_norm": 0.8237895369529724, + "learning_rate": 4.9522728667660655e-06, + "loss": 0.5727, + "step": 5178 + }, + { + "epoch": 0.3807528304661079, + "grad_norm": 0.8249993324279785, + "learning_rate": 4.952254127374167e-06, + "loss": 0.5116, + "step": 5179 + }, + { + "epoch": 0.3808263490663138, + "grad_norm": 0.8742660284042358, + "learning_rate": 4.952235384339579e-06, + "loss": 0.5698, + "step": 5180 + }, + { + "epoch": 0.3808998676665196, + "grad_norm": 0.8490806818008423, + "learning_rate": 4.9522166376623305e-06, + "loss": 0.534, + "step": 5181 + }, + { + "epoch": 0.3809733862667255, + "grad_norm": 0.8896047472953796, + "learning_rate": 4.952197887342447e-06, + "loss": 0.548, + "step": 5182 + }, + { + "epoch": 0.3810469048669313, + "grad_norm": 0.849496066570282, + "learning_rate": 4.95217913337996e-06, + "loss": 0.5738, + "step": 5183 + }, + { + "epoch": 0.3811204234671372, + "grad_norm": 0.8528271913528442, + "learning_rate": 4.952160375774893e-06, + "loss": 0.5531, + "step": 5184 + }, + { + "epoch": 0.38119394206734303, + "grad_norm": 0.8448230624198914, + "learning_rate": 4.952141614527277e-06, + "loss": 0.5385, + "step": 5185 + }, + { + "epoch": 0.3812674606675489, + "grad_norm": 0.8242020010948181, + "learning_rate": 4.95212284963714e-06, + "loss": 0.5541, + "step": 5186 + }, + { + "epoch": 0.38134097926775473, + "grad_norm": 0.8608794808387756, + "learning_rate": 4.952104081104507e-06, + "loss": 0.5359, + "step": 5187 + }, + { + "epoch": 0.3814144978679606, + "grad_norm": 0.9209957718849182, + "learning_rate": 4.952085308929409e-06, + "loss": 0.6062, + "step": 5188 + }, + { + "epoch": 0.38148801646816644, + "grad_norm": 0.8016406297683716, + "learning_rate": 4.952066533111871e-06, + "loss": 0.5473, + "step": 5189 + }, + { + "epoch": 0.3815615350683723, + "grad_norm": 0.8277620077133179, + "learning_rate": 4.952047753651924e-06, + "loss": 0.5521, + "step": 5190 + }, + { + "epoch": 0.38163505366857814, + "grad_norm": 0.8358766436576843, + "learning_rate": 4.952028970549593e-06, + "loss": 0.5592, + "step": 5191 + }, + { + "epoch": 0.381708572268784, + "grad_norm": 0.9060627222061157, + "learning_rate": 4.952010183804909e-06, + "loss": 0.593, + "step": 5192 + }, + { + "epoch": 0.38178209086898984, + "grad_norm": 0.8340677618980408, + "learning_rate": 4.951991393417896e-06, + "loss": 0.5865, + "step": 5193 + }, + { + "epoch": 0.3818556094691957, + "grad_norm": 0.8421095609664917, + "learning_rate": 4.951972599388585e-06, + "loss": 0.5547, + "step": 5194 + }, + { + "epoch": 0.38192912806940155, + "grad_norm": 0.8548167943954468, + "learning_rate": 4.951953801717003e-06, + "loss": 0.5667, + "step": 5195 + }, + { + "epoch": 0.3820026466696074, + "grad_norm": 0.8372230529785156, + "learning_rate": 4.951935000403178e-06, + "loss": 0.5013, + "step": 5196 + }, + { + "epoch": 0.38207616526981325, + "grad_norm": 0.8092156052589417, + "learning_rate": 4.951916195447138e-06, + "loss": 0.5083, + "step": 5197 + }, + { + "epoch": 0.38214968387001913, + "grad_norm": 0.8430763483047485, + "learning_rate": 4.95189738684891e-06, + "loss": 0.5885, + "step": 5198 + }, + { + "epoch": 0.38222320247022495, + "grad_norm": 0.8885713815689087, + "learning_rate": 4.951878574608524e-06, + "loss": 0.5501, + "step": 5199 + }, + { + "epoch": 0.38229672107043083, + "grad_norm": 0.8336193561553955, + "learning_rate": 4.951859758726006e-06, + "loss": 0.5831, + "step": 5200 + }, + { + "epoch": 0.38237023967063666, + "grad_norm": 0.8676403760910034, + "learning_rate": 4.951840939201385e-06, + "loss": 0.5696, + "step": 5201 + }, + { + "epoch": 0.38244375827084254, + "grad_norm": 0.8961933255195618, + "learning_rate": 4.951822116034688e-06, + "loss": 0.5254, + "step": 5202 + }, + { + "epoch": 0.38251727687104836, + "grad_norm": 0.8874987363815308, + "learning_rate": 4.951803289225943e-06, + "loss": 0.5802, + "step": 5203 + }, + { + "epoch": 0.38259079547125424, + "grad_norm": 0.8412922024726868, + "learning_rate": 4.951784458775178e-06, + "loss": 0.5547, + "step": 5204 + }, + { + "epoch": 0.38266431407146007, + "grad_norm": 0.9827057719230652, + "learning_rate": 4.951765624682423e-06, + "loss": 0.5818, + "step": 5205 + }, + { + "epoch": 0.38273783267166595, + "grad_norm": 0.9026125073432922, + "learning_rate": 4.951746786947704e-06, + "loss": 0.5425, + "step": 5206 + }, + { + "epoch": 0.38281135127187177, + "grad_norm": 0.8198124170303345, + "learning_rate": 4.95172794557105e-06, + "loss": 0.5293, + "step": 5207 + }, + { + "epoch": 0.38288486987207765, + "grad_norm": 0.8287114500999451, + "learning_rate": 4.951709100552487e-06, + "loss": 0.5442, + "step": 5208 + }, + { + "epoch": 0.3829583884722835, + "grad_norm": 0.8657484650611877, + "learning_rate": 4.951690251892045e-06, + "loss": 0.5537, + "step": 5209 + }, + { + "epoch": 0.38303190707248935, + "grad_norm": 0.8768131732940674, + "learning_rate": 4.951671399589751e-06, + "loss": 0.6117, + "step": 5210 + }, + { + "epoch": 0.3831054256726952, + "grad_norm": 0.8894246220588684, + "learning_rate": 4.951652543645634e-06, + "loss": 0.5823, + "step": 5211 + }, + { + "epoch": 0.38317894427290106, + "grad_norm": 0.8644480109214783, + "learning_rate": 4.951633684059721e-06, + "loss": 0.598, + "step": 5212 + }, + { + "epoch": 0.3832524628731069, + "grad_norm": 0.8881182074546814, + "learning_rate": 4.9516148208320414e-06, + "loss": 0.5718, + "step": 5213 + }, + { + "epoch": 0.38332598147331276, + "grad_norm": 0.8288980722427368, + "learning_rate": 4.951595953962621e-06, + "loss": 0.5639, + "step": 5214 + }, + { + "epoch": 0.3833995000735186, + "grad_norm": 0.8290022015571594, + "learning_rate": 4.95157708345149e-06, + "loss": 0.57, + "step": 5215 + }, + { + "epoch": 0.38347301867372446, + "grad_norm": 0.8676572442054749, + "learning_rate": 4.9515582092986745e-06, + "loss": 0.5896, + "step": 5216 + }, + { + "epoch": 0.3835465372739303, + "grad_norm": 0.8098839521408081, + "learning_rate": 4.9515393315042036e-06, + "loss": 0.5758, + "step": 5217 + }, + { + "epoch": 0.38362005587413617, + "grad_norm": 0.8786696791648865, + "learning_rate": 4.951520450068106e-06, + "loss": 0.5494, + "step": 5218 + }, + { + "epoch": 0.383693574474342, + "grad_norm": 0.8265358805656433, + "learning_rate": 4.951501564990409e-06, + "loss": 0.5617, + "step": 5219 + }, + { + "epoch": 0.38376709307454787, + "grad_norm": 0.8556003570556641, + "learning_rate": 4.95148267627114e-06, + "loss": 0.5653, + "step": 5220 + }, + { + "epoch": 0.3838406116747537, + "grad_norm": 0.8589320182800293, + "learning_rate": 4.951463783910328e-06, + "loss": 0.5755, + "step": 5221 + }, + { + "epoch": 0.3839141302749596, + "grad_norm": 0.8806377649307251, + "learning_rate": 4.951444887908001e-06, + "loss": 0.6042, + "step": 5222 + }, + { + "epoch": 0.3839876488751654, + "grad_norm": 0.8726502060890198, + "learning_rate": 4.951425988264187e-06, + "loss": 0.5831, + "step": 5223 + }, + { + "epoch": 0.3840611674753713, + "grad_norm": 0.8576326966285706, + "learning_rate": 4.951407084978913e-06, + "loss": 0.5624, + "step": 5224 + }, + { + "epoch": 0.3841346860755771, + "grad_norm": 0.8362401723861694, + "learning_rate": 4.951388178052209e-06, + "loss": 0.5892, + "step": 5225 + }, + { + "epoch": 0.384208204675783, + "grad_norm": 0.8762145638465881, + "learning_rate": 4.9513692674841015e-06, + "loss": 0.6247, + "step": 5226 + }, + { + "epoch": 0.3842817232759888, + "grad_norm": 0.8642435669898987, + "learning_rate": 4.95135035327462e-06, + "loss": 0.5509, + "step": 5227 + }, + { + "epoch": 0.3843552418761947, + "grad_norm": 0.8267562389373779, + "learning_rate": 4.951331435423791e-06, + "loss": 0.5638, + "step": 5228 + }, + { + "epoch": 0.3844287604764005, + "grad_norm": 0.856096625328064, + "learning_rate": 4.951312513931643e-06, + "loss": 0.6039, + "step": 5229 + }, + { + "epoch": 0.3845022790766064, + "grad_norm": 0.815955638885498, + "learning_rate": 4.951293588798205e-06, + "loss": 0.5276, + "step": 5230 + }, + { + "epoch": 0.3845757976768122, + "grad_norm": 0.8790425658226013, + "learning_rate": 4.9512746600235055e-06, + "loss": 0.586, + "step": 5231 + }, + { + "epoch": 0.3846493162770181, + "grad_norm": 0.823010265827179, + "learning_rate": 4.95125572760757e-06, + "loss": 0.5838, + "step": 5232 + }, + { + "epoch": 0.3847228348772239, + "grad_norm": 0.8669293522834778, + "learning_rate": 4.95123679155043e-06, + "loss": 0.5459, + "step": 5233 + }, + { + "epoch": 0.3847963534774298, + "grad_norm": 0.9437379240989685, + "learning_rate": 4.9512178518521115e-06, + "loss": 0.5901, + "step": 5234 + }, + { + "epoch": 0.3848698720776356, + "grad_norm": 0.8710545301437378, + "learning_rate": 4.951198908512642e-06, + "loss": 0.568, + "step": 5235 + }, + { + "epoch": 0.3849433906778415, + "grad_norm": 0.8890810608863831, + "learning_rate": 4.9511799615320525e-06, + "loss": 0.5399, + "step": 5236 + }, + { + "epoch": 0.3850169092780473, + "grad_norm": 0.8939972519874573, + "learning_rate": 4.951161010910368e-06, + "loss": 0.5716, + "step": 5237 + }, + { + "epoch": 0.3850904278782532, + "grad_norm": 0.8663721084594727, + "learning_rate": 4.9511420566476185e-06, + "loss": 0.5888, + "step": 5238 + }, + { + "epoch": 0.38516394647845903, + "grad_norm": 0.8566877841949463, + "learning_rate": 4.951123098743832e-06, + "loss": 0.5466, + "step": 5239 + }, + { + "epoch": 0.3852374650786649, + "grad_norm": 0.8538244366645813, + "learning_rate": 4.951104137199036e-06, + "loss": 0.5389, + "step": 5240 + }, + { + "epoch": 0.38531098367887073, + "grad_norm": 0.8547864556312561, + "learning_rate": 4.951085172013259e-06, + "loss": 0.5778, + "step": 5241 + }, + { + "epoch": 0.3853845022790766, + "grad_norm": 0.8825634717941284, + "learning_rate": 4.95106620318653e-06, + "loss": 0.5932, + "step": 5242 + }, + { + "epoch": 0.38545802087928244, + "grad_norm": 0.8683826923370361, + "learning_rate": 4.951047230718875e-06, + "loss": 0.619, + "step": 5243 + }, + { + "epoch": 0.3855315394794883, + "grad_norm": 0.8670037388801575, + "learning_rate": 4.9510282546103236e-06, + "loss": 0.5803, + "step": 5244 + }, + { + "epoch": 0.38560505807969414, + "grad_norm": 0.8499712944030762, + "learning_rate": 4.951009274860905e-06, + "loss": 0.5602, + "step": 5245 + }, + { + "epoch": 0.3856785766799, + "grad_norm": 0.8181026577949524, + "learning_rate": 4.9509902914706455e-06, + "loss": 0.5435, + "step": 5246 + }, + { + "epoch": 0.38575209528010584, + "grad_norm": 0.8290568590164185, + "learning_rate": 4.950971304439574e-06, + "loss": 0.5397, + "step": 5247 + }, + { + "epoch": 0.3858256138803117, + "grad_norm": 0.9125316143035889, + "learning_rate": 4.95095231376772e-06, + "loss": 0.5454, + "step": 5248 + }, + { + "epoch": 0.38589913248051755, + "grad_norm": 0.8609066009521484, + "learning_rate": 4.9509333194551105e-06, + "loss": 0.638, + "step": 5249 + }, + { + "epoch": 0.38597265108072343, + "grad_norm": 0.9451175928115845, + "learning_rate": 4.950914321501773e-06, + "loss": 0.5847, + "step": 5250 + }, + { + "epoch": 0.38604616968092925, + "grad_norm": 0.8558393120765686, + "learning_rate": 4.950895319907736e-06, + "loss": 0.5978, + "step": 5251 + }, + { + "epoch": 0.38611968828113513, + "grad_norm": 0.8488638997077942, + "learning_rate": 4.950876314673029e-06, + "loss": 0.5955, + "step": 5252 + }, + { + "epoch": 0.38619320688134096, + "grad_norm": 0.86415034532547, + "learning_rate": 4.95085730579768e-06, + "loss": 0.6019, + "step": 5253 + }, + { + "epoch": 0.38626672548154684, + "grad_norm": 0.8348182439804077, + "learning_rate": 4.9508382932817155e-06, + "loss": 0.5283, + "step": 5254 + }, + { + "epoch": 0.38634024408175266, + "grad_norm": 0.8227891325950623, + "learning_rate": 4.950819277125165e-06, + "loss": 0.5761, + "step": 5255 + }, + { + "epoch": 0.38641376268195854, + "grad_norm": 0.8285584449768066, + "learning_rate": 4.950800257328058e-06, + "loss": 0.5602, + "step": 5256 + }, + { + "epoch": 0.38648728128216436, + "grad_norm": 0.8063985705375671, + "learning_rate": 4.95078123389042e-06, + "loss": 0.51, + "step": 5257 + }, + { + "epoch": 0.38656079988237024, + "grad_norm": 0.9314280152320862, + "learning_rate": 4.950762206812281e-06, + "loss": 0.6256, + "step": 5258 + }, + { + "epoch": 0.38663431848257607, + "grad_norm": 0.8096585273742676, + "learning_rate": 4.950743176093669e-06, + "loss": 0.5393, + "step": 5259 + }, + { + "epoch": 0.38670783708278195, + "grad_norm": 0.8443103432655334, + "learning_rate": 4.950724141734612e-06, + "loss": 0.5762, + "step": 5260 + }, + { + "epoch": 0.38678135568298777, + "grad_norm": 0.8447991609573364, + "learning_rate": 4.950705103735139e-06, + "loss": 0.5303, + "step": 5261 + }, + { + "epoch": 0.38685487428319365, + "grad_norm": 0.8287588953971863, + "learning_rate": 4.950686062095278e-06, + "loss": 0.566, + "step": 5262 + }, + { + "epoch": 0.3869283928833995, + "grad_norm": 0.838760256767273, + "learning_rate": 4.950667016815056e-06, + "loss": 0.5482, + "step": 5263 + }, + { + "epoch": 0.38700191148360535, + "grad_norm": 0.8478254079818726, + "learning_rate": 4.950647967894504e-06, + "loss": 0.546, + "step": 5264 + }, + { + "epoch": 0.3870754300838112, + "grad_norm": 0.8771262168884277, + "learning_rate": 4.950628915333648e-06, + "loss": 0.5584, + "step": 5265 + }, + { + "epoch": 0.38714894868401706, + "grad_norm": 0.8443684577941895, + "learning_rate": 4.950609859132516e-06, + "loss": 0.5462, + "step": 5266 + }, + { + "epoch": 0.3872224672842229, + "grad_norm": 0.8860298991203308, + "learning_rate": 4.950590799291137e-06, + "loss": 0.6111, + "step": 5267 + }, + { + "epoch": 0.38729598588442876, + "grad_norm": 0.833118200302124, + "learning_rate": 4.950571735809541e-06, + "loss": 0.5448, + "step": 5268 + }, + { + "epoch": 0.3873695044846346, + "grad_norm": 0.8467259407043457, + "learning_rate": 4.950552668687754e-06, + "loss": 0.5802, + "step": 5269 + }, + { + "epoch": 0.38744302308484047, + "grad_norm": 0.8584019541740417, + "learning_rate": 4.950533597925805e-06, + "loss": 0.6028, + "step": 5270 + }, + { + "epoch": 0.3875165416850463, + "grad_norm": 0.8193987607955933, + "learning_rate": 4.9505145235237236e-06, + "loss": 0.5063, + "step": 5271 + }, + { + "epoch": 0.38759006028525217, + "grad_norm": 0.8331886529922485, + "learning_rate": 4.950495445481536e-06, + "loss": 0.6155, + "step": 5272 + }, + { + "epoch": 0.387663578885458, + "grad_norm": 0.8540771007537842, + "learning_rate": 4.950476363799272e-06, + "loss": 0.5712, + "step": 5273 + }, + { + "epoch": 0.3877370974856639, + "grad_norm": 0.8853555917739868, + "learning_rate": 4.95045727847696e-06, + "loss": 0.5702, + "step": 5274 + }, + { + "epoch": 0.38781061608586975, + "grad_norm": 0.8465666174888611, + "learning_rate": 4.950438189514627e-06, + "loss": 0.5117, + "step": 5275 + }, + { + "epoch": 0.3878841346860756, + "grad_norm": 0.846652090549469, + "learning_rate": 4.950419096912303e-06, + "loss": 0.5699, + "step": 5276 + }, + { + "epoch": 0.38795765328628146, + "grad_norm": 0.887685239315033, + "learning_rate": 4.950400000670015e-06, + "loss": 0.5366, + "step": 5277 + }, + { + "epoch": 0.3880311718864873, + "grad_norm": 0.8627173900604248, + "learning_rate": 4.950380900787792e-06, + "loss": 0.5157, + "step": 5278 + }, + { + "epoch": 0.38810469048669316, + "grad_norm": 0.8393559455871582, + "learning_rate": 4.950361797265663e-06, + "loss": 0.5705, + "step": 5279 + }, + { + "epoch": 0.388178209086899, + "grad_norm": 0.84979647397995, + "learning_rate": 4.950342690103655e-06, + "loss": 0.5417, + "step": 5280 + }, + { + "epoch": 0.38825172768710486, + "grad_norm": 0.8276805281639099, + "learning_rate": 4.950323579301798e-06, + "loss": 0.5257, + "step": 5281 + }, + { + "epoch": 0.3883252462873107, + "grad_norm": 0.8692408204078674, + "learning_rate": 4.950304464860119e-06, + "loss": 0.5819, + "step": 5282 + }, + { + "epoch": 0.38839876488751657, + "grad_norm": 0.8309153318405151, + "learning_rate": 4.950285346778647e-06, + "loss": 0.5555, + "step": 5283 + }, + { + "epoch": 0.3884722834877224, + "grad_norm": 0.8828760981559753, + "learning_rate": 4.95026622505741e-06, + "loss": 0.5473, + "step": 5284 + }, + { + "epoch": 0.38854580208792827, + "grad_norm": 0.8524052500724792, + "learning_rate": 4.950247099696437e-06, + "loss": 0.5564, + "step": 5285 + }, + { + "epoch": 0.3886193206881341, + "grad_norm": 0.9367899894714355, + "learning_rate": 4.9502279706957555e-06, + "loss": 0.6328, + "step": 5286 + }, + { + "epoch": 0.38869283928834, + "grad_norm": 0.799746572971344, + "learning_rate": 4.950208838055396e-06, + "loss": 0.5381, + "step": 5287 + }, + { + "epoch": 0.3887663578885458, + "grad_norm": 0.8875378966331482, + "learning_rate": 4.950189701775384e-06, + "loss": 0.5683, + "step": 5288 + }, + { + "epoch": 0.3888398764887517, + "grad_norm": 0.8606201410293579, + "learning_rate": 4.95017056185575e-06, + "loss": 0.5543, + "step": 5289 + }, + { + "epoch": 0.3889133950889575, + "grad_norm": 0.9139611124992371, + "learning_rate": 4.950151418296522e-06, + "loss": 0.5739, + "step": 5290 + }, + { + "epoch": 0.3889869136891634, + "grad_norm": 0.8550909161567688, + "learning_rate": 4.950132271097727e-06, + "loss": 0.5366, + "step": 5291 + }, + { + "epoch": 0.3890604322893692, + "grad_norm": 0.8490029573440552, + "learning_rate": 4.950113120259396e-06, + "loss": 0.5801, + "step": 5292 + }, + { + "epoch": 0.3891339508895751, + "grad_norm": 0.925418496131897, + "learning_rate": 4.950093965781555e-06, + "loss": 0.5979, + "step": 5293 + }, + { + "epoch": 0.3892074694897809, + "grad_norm": 0.910542905330658, + "learning_rate": 4.950074807664235e-06, + "loss": 0.5553, + "step": 5294 + }, + { + "epoch": 0.3892809880899868, + "grad_norm": 0.8308764696121216, + "learning_rate": 4.950055645907461e-06, + "loss": 0.5838, + "step": 5295 + }, + { + "epoch": 0.3893545066901926, + "grad_norm": 0.8870161175727844, + "learning_rate": 4.950036480511265e-06, + "loss": 0.5944, + "step": 5296 + }, + { + "epoch": 0.3894280252903985, + "grad_norm": 0.9027101993560791, + "learning_rate": 4.950017311475674e-06, + "loss": 0.581, + "step": 5297 + }, + { + "epoch": 0.3895015438906043, + "grad_norm": 0.8695716857910156, + "learning_rate": 4.949998138800717e-06, + "loss": 0.5796, + "step": 5298 + }, + { + "epoch": 0.3895750624908102, + "grad_norm": 0.8940121531486511, + "learning_rate": 4.94997896248642e-06, + "loss": 0.5659, + "step": 5299 + }, + { + "epoch": 0.389648581091016, + "grad_norm": 0.9041800498962402, + "learning_rate": 4.949959782532815e-06, + "loss": 0.6055, + "step": 5300 + }, + { + "epoch": 0.3897220996912219, + "grad_norm": 0.8839337229728699, + "learning_rate": 4.949940598939929e-06, + "loss": 0.5471, + "step": 5301 + }, + { + "epoch": 0.3897956182914277, + "grad_norm": 0.8804837465286255, + "learning_rate": 4.949921411707789e-06, + "loss": 0.5441, + "step": 5302 + }, + { + "epoch": 0.3898691368916336, + "grad_norm": 0.8186084032058716, + "learning_rate": 4.949902220836425e-06, + "loss": 0.505, + "step": 5303 + }, + { + "epoch": 0.38994265549183943, + "grad_norm": 0.8784686326980591, + "learning_rate": 4.949883026325867e-06, + "loss": 0.5877, + "step": 5304 + }, + { + "epoch": 0.3900161740920453, + "grad_norm": 0.8628556728363037, + "learning_rate": 4.949863828176141e-06, + "loss": 0.5975, + "step": 5305 + }, + { + "epoch": 0.39008969269225113, + "grad_norm": 0.8233557343482971, + "learning_rate": 4.9498446263872765e-06, + "loss": 0.5715, + "step": 5306 + }, + { + "epoch": 0.390163211292457, + "grad_norm": 0.7926196455955505, + "learning_rate": 4.949825420959302e-06, + "loss": 0.4995, + "step": 5307 + }, + { + "epoch": 0.39023672989266284, + "grad_norm": 0.858781635761261, + "learning_rate": 4.949806211892245e-06, + "loss": 0.5759, + "step": 5308 + }, + { + "epoch": 0.3903102484928687, + "grad_norm": 0.8903806805610657, + "learning_rate": 4.949786999186137e-06, + "loss": 0.5791, + "step": 5309 + }, + { + "epoch": 0.39038376709307454, + "grad_norm": 0.844555139541626, + "learning_rate": 4.949767782841003e-06, + "loss": 0.5495, + "step": 5310 + }, + { + "epoch": 0.3904572856932804, + "grad_norm": 0.8643844723701477, + "learning_rate": 4.949748562856874e-06, + "loss": 0.538, + "step": 5311 + }, + { + "epoch": 0.39053080429348624, + "grad_norm": 0.8534306883811951, + "learning_rate": 4.949729339233776e-06, + "loss": 0.5655, + "step": 5312 + }, + { + "epoch": 0.3906043228936921, + "grad_norm": 0.8573969006538391, + "learning_rate": 4.949710111971741e-06, + "loss": 0.5639, + "step": 5313 + }, + { + "epoch": 0.39067784149389795, + "grad_norm": 0.8793839812278748, + "learning_rate": 4.9496908810707955e-06, + "loss": 0.5728, + "step": 5314 + }, + { + "epoch": 0.39075136009410383, + "grad_norm": 0.8423780798912048, + "learning_rate": 4.949671646530968e-06, + "loss": 0.5376, + "step": 5315 + }, + { + "epoch": 0.39082487869430965, + "grad_norm": 0.8603119850158691, + "learning_rate": 4.949652408352287e-06, + "loss": 0.5732, + "step": 5316 + }, + { + "epoch": 0.39089839729451553, + "grad_norm": 0.8052823543548584, + "learning_rate": 4.949633166534782e-06, + "loss": 0.5509, + "step": 5317 + }, + { + "epoch": 0.39097191589472136, + "grad_norm": 0.8909420967102051, + "learning_rate": 4.949613921078481e-06, + "loss": 0.6104, + "step": 5318 + }, + { + "epoch": 0.39104543449492724, + "grad_norm": 0.8399983644485474, + "learning_rate": 4.9495946719834136e-06, + "loss": 0.525, + "step": 5319 + }, + { + "epoch": 0.39111895309513306, + "grad_norm": 0.8541041016578674, + "learning_rate": 4.949575419249606e-06, + "loss": 0.5391, + "step": 5320 + }, + { + "epoch": 0.39119247169533894, + "grad_norm": 0.8481703996658325, + "learning_rate": 4.949556162877088e-06, + "loss": 0.5668, + "step": 5321 + }, + { + "epoch": 0.39126599029554476, + "grad_norm": 0.8226895928382874, + "learning_rate": 4.9495369028658905e-06, + "loss": 0.5525, + "step": 5322 + }, + { + "epoch": 0.39133950889575064, + "grad_norm": 0.8828909397125244, + "learning_rate": 4.949517639216038e-06, + "loss": 0.5823, + "step": 5323 + }, + { + "epoch": 0.39141302749595647, + "grad_norm": 0.8427041172981262, + "learning_rate": 4.949498371927563e-06, + "loss": 0.5848, + "step": 5324 + }, + { + "epoch": 0.39148654609616235, + "grad_norm": 0.9255278706550598, + "learning_rate": 4.9494791010004905e-06, + "loss": 0.5602, + "step": 5325 + }, + { + "epoch": 0.39156006469636817, + "grad_norm": 0.8726583123207092, + "learning_rate": 4.949459826434852e-06, + "loss": 0.5366, + "step": 5326 + }, + { + "epoch": 0.39163358329657405, + "grad_norm": 0.8141171336174011, + "learning_rate": 4.949440548230674e-06, + "loss": 0.5247, + "step": 5327 + }, + { + "epoch": 0.3917071018967799, + "grad_norm": 0.853008508682251, + "learning_rate": 4.949421266387987e-06, + "loss": 0.595, + "step": 5328 + }, + { + "epoch": 0.39178062049698575, + "grad_norm": 0.8649303317070007, + "learning_rate": 4.949401980906819e-06, + "loss": 0.5217, + "step": 5329 + }, + { + "epoch": 0.3918541390971916, + "grad_norm": 0.9274719953536987, + "learning_rate": 4.949382691787198e-06, + "loss": 0.6538, + "step": 5330 + }, + { + "epoch": 0.39192765769739746, + "grad_norm": 0.8667900562286377, + "learning_rate": 4.9493633990291535e-06, + "loss": 0.5404, + "step": 5331 + }, + { + "epoch": 0.3920011762976033, + "grad_norm": 0.9109351634979248, + "learning_rate": 4.949344102632714e-06, + "loss": 0.5952, + "step": 5332 + }, + { + "epoch": 0.39207469489780916, + "grad_norm": 0.8694210648536682, + "learning_rate": 4.949324802597907e-06, + "loss": 0.5307, + "step": 5333 + }, + { + "epoch": 0.392148213498015, + "grad_norm": 0.8918265104293823, + "learning_rate": 4.949305498924764e-06, + "loss": 0.6102, + "step": 5334 + }, + { + "epoch": 0.39222173209822087, + "grad_norm": 0.808394730091095, + "learning_rate": 4.949286191613309e-06, + "loss": 0.561, + "step": 5335 + }, + { + "epoch": 0.3922952506984267, + "grad_norm": 0.7870844006538391, + "learning_rate": 4.949266880663576e-06, + "loss": 0.509, + "step": 5336 + }, + { + "epoch": 0.39236876929863257, + "grad_norm": 0.9547329545021057, + "learning_rate": 4.94924756607559e-06, + "loss": 0.6212, + "step": 5337 + }, + { + "epoch": 0.3924422878988384, + "grad_norm": 0.8315987586975098, + "learning_rate": 4.949228247849381e-06, + "loss": 0.5411, + "step": 5338 + }, + { + "epoch": 0.3925158064990443, + "grad_norm": 0.834363579750061, + "learning_rate": 4.949208925984978e-06, + "loss": 0.5351, + "step": 5339 + }, + { + "epoch": 0.3925893250992501, + "grad_norm": 0.8273093700408936, + "learning_rate": 4.9491896004824085e-06, + "loss": 0.5595, + "step": 5340 + }, + { + "epoch": 0.392662843699456, + "grad_norm": 0.8460586071014404, + "learning_rate": 4.949170271341703e-06, + "loss": 0.5643, + "step": 5341 + }, + { + "epoch": 0.3927363622996618, + "grad_norm": 0.8557379245758057, + "learning_rate": 4.949150938562888e-06, + "loss": 0.6165, + "step": 5342 + }, + { + "epoch": 0.3928098808998677, + "grad_norm": 0.8605520129203796, + "learning_rate": 4.949131602145994e-06, + "loss": 0.5463, + "step": 5343 + }, + { + "epoch": 0.3928833995000735, + "grad_norm": 0.8396883606910706, + "learning_rate": 4.949112262091049e-06, + "loss": 0.5869, + "step": 5344 + }, + { + "epoch": 0.3929569181002794, + "grad_norm": 0.8879771828651428, + "learning_rate": 4.9490929183980815e-06, + "loss": 0.5698, + "step": 5345 + }, + { + "epoch": 0.3930304367004852, + "grad_norm": 0.8829185366630554, + "learning_rate": 4.949073571067121e-06, + "loss": 0.547, + "step": 5346 + }, + { + "epoch": 0.3931039553006911, + "grad_norm": 0.8190836310386658, + "learning_rate": 4.949054220098196e-06, + "loss": 0.5319, + "step": 5347 + }, + { + "epoch": 0.3931774739008969, + "grad_norm": 0.8609139919281006, + "learning_rate": 4.949034865491335e-06, + "loss": 0.5462, + "step": 5348 + }, + { + "epoch": 0.3932509925011028, + "grad_norm": 0.8576662540435791, + "learning_rate": 4.949015507246566e-06, + "loss": 0.5739, + "step": 5349 + }, + { + "epoch": 0.3933245111013086, + "grad_norm": 0.838678777217865, + "learning_rate": 4.94899614536392e-06, + "loss": 0.5049, + "step": 5350 + }, + { + "epoch": 0.3933980297015145, + "grad_norm": 0.8541538715362549, + "learning_rate": 4.948976779843423e-06, + "loss": 0.5714, + "step": 5351 + }, + { + "epoch": 0.3934715483017203, + "grad_norm": 0.85447096824646, + "learning_rate": 4.9489574106851055e-06, + "loss": 0.5475, + "step": 5352 + }, + { + "epoch": 0.3935450669019262, + "grad_norm": 0.8605865836143494, + "learning_rate": 4.948938037888996e-06, + "loss": 0.5566, + "step": 5353 + }, + { + "epoch": 0.393618585502132, + "grad_norm": 0.8419682383537292, + "learning_rate": 4.9489186614551235e-06, + "loss": 0.5692, + "step": 5354 + }, + { + "epoch": 0.3936921041023379, + "grad_norm": 1.0208542346954346, + "learning_rate": 4.948899281383517e-06, + "loss": 0.5701, + "step": 5355 + }, + { + "epoch": 0.3937656227025437, + "grad_norm": 0.8353664875030518, + "learning_rate": 4.948879897674203e-06, + "loss": 0.5899, + "step": 5356 + }, + { + "epoch": 0.3938391413027496, + "grad_norm": 0.8867056369781494, + "learning_rate": 4.9488605103272125e-06, + "loss": 0.5916, + "step": 5357 + }, + { + "epoch": 0.39391265990295543, + "grad_norm": 0.8875647187232971, + "learning_rate": 4.9488411193425746e-06, + "loss": 0.6078, + "step": 5358 + }, + { + "epoch": 0.3939861785031613, + "grad_norm": 0.8019676208496094, + "learning_rate": 4.948821724720316e-06, + "loss": 0.5477, + "step": 5359 + }, + { + "epoch": 0.39405969710336713, + "grad_norm": 0.8812392950057983, + "learning_rate": 4.948802326460468e-06, + "loss": 0.5359, + "step": 5360 + }, + { + "epoch": 0.394133215703573, + "grad_norm": 0.811538577079773, + "learning_rate": 4.948782924563058e-06, + "loss": 0.5463, + "step": 5361 + }, + { + "epoch": 0.39420673430377884, + "grad_norm": 0.9205713868141174, + "learning_rate": 4.948763519028115e-06, + "loss": 0.5643, + "step": 5362 + }, + { + "epoch": 0.3942802529039847, + "grad_norm": 0.8662441968917847, + "learning_rate": 4.948744109855668e-06, + "loss": 0.5691, + "step": 5363 + }, + { + "epoch": 0.39435377150419054, + "grad_norm": 0.8588101267814636, + "learning_rate": 4.9487246970457456e-06, + "loss": 0.5594, + "step": 5364 + }, + { + "epoch": 0.3944272901043964, + "grad_norm": 0.8775619268417358, + "learning_rate": 4.9487052805983765e-06, + "loss": 0.5288, + "step": 5365 + }, + { + "epoch": 0.39450080870460225, + "grad_norm": 0.8652393221855164, + "learning_rate": 4.9486858605135905e-06, + "loss": 0.5935, + "step": 5366 + }, + { + "epoch": 0.3945743273048081, + "grad_norm": 0.8494625091552734, + "learning_rate": 4.948666436791415e-06, + "loss": 0.5582, + "step": 5367 + }, + { + "epoch": 0.39464784590501395, + "grad_norm": 0.9451626539230347, + "learning_rate": 4.948647009431879e-06, + "loss": 0.6055, + "step": 5368 + }, + { + "epoch": 0.39472136450521983, + "grad_norm": 0.8729644417762756, + "learning_rate": 4.9486275784350135e-06, + "loss": 0.5345, + "step": 5369 + }, + { + "epoch": 0.39479488310542565, + "grad_norm": 0.8864854574203491, + "learning_rate": 4.948608143800845e-06, + "loss": 0.5941, + "step": 5370 + }, + { + "epoch": 0.39486840170563153, + "grad_norm": 0.8437178730964661, + "learning_rate": 4.948588705529404e-06, + "loss": 0.5461, + "step": 5371 + }, + { + "epoch": 0.39494192030583736, + "grad_norm": 0.853297770023346, + "learning_rate": 4.948569263620717e-06, + "loss": 0.5077, + "step": 5372 + }, + { + "epoch": 0.39501543890604324, + "grad_norm": 0.8399381637573242, + "learning_rate": 4.948549818074815e-06, + "loss": 0.5938, + "step": 5373 + }, + { + "epoch": 0.39508895750624906, + "grad_norm": 0.8111253976821899, + "learning_rate": 4.948530368891727e-06, + "loss": 0.511, + "step": 5374 + }, + { + "epoch": 0.39516247610645494, + "grad_norm": 0.857244610786438, + "learning_rate": 4.948510916071481e-06, + "loss": 0.6011, + "step": 5375 + }, + { + "epoch": 0.39523599470666076, + "grad_norm": 0.8811755776405334, + "learning_rate": 4.948491459614105e-06, + "loss": 0.6137, + "step": 5376 + }, + { + "epoch": 0.39530951330686664, + "grad_norm": 0.8631924390792847, + "learning_rate": 4.948471999519631e-06, + "loss": 0.5718, + "step": 5377 + }, + { + "epoch": 0.39538303190707247, + "grad_norm": 0.8465908169746399, + "learning_rate": 4.948452535788084e-06, + "loss": 0.6217, + "step": 5378 + }, + { + "epoch": 0.39545655050727835, + "grad_norm": 0.8897339701652527, + "learning_rate": 4.948433068419496e-06, + "loss": 0.5606, + "step": 5379 + }, + { + "epoch": 0.3955300691074842, + "grad_norm": 0.8913036584854126, + "learning_rate": 4.948413597413895e-06, + "loss": 0.5562, + "step": 5380 + }, + { + "epoch": 0.39560358770769005, + "grad_norm": 0.9096770286560059, + "learning_rate": 4.948394122771309e-06, + "loss": 0.5896, + "step": 5381 + }, + { + "epoch": 0.3956771063078959, + "grad_norm": 0.8535564541816711, + "learning_rate": 4.948374644491768e-06, + "loss": 0.615, + "step": 5382 + }, + { + "epoch": 0.39575062490810176, + "grad_norm": 0.879543662071228, + "learning_rate": 4.948355162575299e-06, + "loss": 0.5961, + "step": 5383 + }, + { + "epoch": 0.3958241435083076, + "grad_norm": 0.8651822209358215, + "learning_rate": 4.948335677021934e-06, + "loss": 0.5379, + "step": 5384 + }, + { + "epoch": 0.39589766210851346, + "grad_norm": 0.8269044160842896, + "learning_rate": 4.948316187831701e-06, + "loss": 0.5523, + "step": 5385 + }, + { + "epoch": 0.3959711807087193, + "grad_norm": 0.7996842861175537, + "learning_rate": 4.9482966950046274e-06, + "loss": 0.5634, + "step": 5386 + }, + { + "epoch": 0.39604469930892516, + "grad_norm": 0.8224093317985535, + "learning_rate": 4.948277198540743e-06, + "loss": 0.5377, + "step": 5387 + }, + { + "epoch": 0.396118217909131, + "grad_norm": 0.8478211760520935, + "learning_rate": 4.948257698440077e-06, + "loss": 0.5828, + "step": 5388 + }, + { + "epoch": 0.39619173650933687, + "grad_norm": 0.8738064169883728, + "learning_rate": 4.9482381947026585e-06, + "loss": 0.5528, + "step": 5389 + }, + { + "epoch": 0.3962652551095427, + "grad_norm": 0.8826322555541992, + "learning_rate": 4.948218687328516e-06, + "loss": 0.587, + "step": 5390 + }, + { + "epoch": 0.39633877370974857, + "grad_norm": 0.8360243439674377, + "learning_rate": 4.948199176317679e-06, + "loss": 0.5928, + "step": 5391 + }, + { + "epoch": 0.3964122923099544, + "grad_norm": 0.8590363264083862, + "learning_rate": 4.948179661670176e-06, + "loss": 0.5549, + "step": 5392 + }, + { + "epoch": 0.3964858109101603, + "grad_norm": 0.8481371998786926, + "learning_rate": 4.948160143386036e-06, + "loss": 0.5539, + "step": 5393 + }, + { + "epoch": 0.3965593295103661, + "grad_norm": 0.8300761580467224, + "learning_rate": 4.948140621465288e-06, + "loss": 0.5343, + "step": 5394 + }, + { + "epoch": 0.396632848110572, + "grad_norm": 0.8076537847518921, + "learning_rate": 4.948121095907962e-06, + "loss": 0.5445, + "step": 5395 + }, + { + "epoch": 0.3967063667107778, + "grad_norm": 0.8082634210586548, + "learning_rate": 4.948101566714085e-06, + "loss": 0.5421, + "step": 5396 + }, + { + "epoch": 0.3967798853109837, + "grad_norm": 0.9310968518257141, + "learning_rate": 4.948082033883688e-06, + "loss": 0.5985, + "step": 5397 + }, + { + "epoch": 0.3968534039111895, + "grad_norm": 0.8452551960945129, + "learning_rate": 4.948062497416799e-06, + "loss": 0.6108, + "step": 5398 + }, + { + "epoch": 0.3969269225113954, + "grad_norm": 0.8397094011306763, + "learning_rate": 4.948042957313448e-06, + "loss": 0.5544, + "step": 5399 + }, + { + "epoch": 0.3970004411116012, + "grad_norm": 0.8867619633674622, + "learning_rate": 4.9480234135736615e-06, + "loss": 0.5873, + "step": 5400 + }, + { + "epoch": 0.3970739597118071, + "grad_norm": 0.8313382863998413, + "learning_rate": 4.9480038661974715e-06, + "loss": 0.5777, + "step": 5401 + }, + { + "epoch": 0.3971474783120129, + "grad_norm": 0.8533156514167786, + "learning_rate": 4.9479843151849054e-06, + "loss": 0.5661, + "step": 5402 + }, + { + "epoch": 0.3972209969122188, + "grad_norm": 0.8613928556442261, + "learning_rate": 4.947964760535992e-06, + "loss": 0.575, + "step": 5403 + }, + { + "epoch": 0.3972945155124246, + "grad_norm": 0.8644111156463623, + "learning_rate": 4.947945202250763e-06, + "loss": 0.5606, + "step": 5404 + }, + { + "epoch": 0.3973680341126305, + "grad_norm": 0.9256121516227722, + "learning_rate": 4.947925640329244e-06, + "loss": 0.5663, + "step": 5405 + }, + { + "epoch": 0.3974415527128363, + "grad_norm": 0.7974121570587158, + "learning_rate": 4.947906074771466e-06, + "loss": 0.5516, + "step": 5406 + }, + { + "epoch": 0.3975150713130422, + "grad_norm": 0.8272190690040588, + "learning_rate": 4.947886505577457e-06, + "loss": 0.5269, + "step": 5407 + }, + { + "epoch": 0.397588589913248, + "grad_norm": 0.8817211389541626, + "learning_rate": 4.947866932747247e-06, + "loss": 0.5673, + "step": 5408 + }, + { + "epoch": 0.3976621085134539, + "grad_norm": 0.889664888381958, + "learning_rate": 4.947847356280865e-06, + "loss": 0.5991, + "step": 5409 + }, + { + "epoch": 0.39773562711365973, + "grad_norm": 0.8374505043029785, + "learning_rate": 4.947827776178339e-06, + "loss": 0.5392, + "step": 5410 + }, + { + "epoch": 0.3978091457138656, + "grad_norm": 0.8620784282684326, + "learning_rate": 4.947808192439699e-06, + "loss": 0.521, + "step": 5411 + }, + { + "epoch": 0.39788266431407143, + "grad_norm": 0.896178662776947, + "learning_rate": 4.9477886050649746e-06, + "loss": 0.5808, + "step": 5412 + }, + { + "epoch": 0.3979561829142773, + "grad_norm": 0.8537264466285706, + "learning_rate": 4.947769014054194e-06, + "loss": 0.6116, + "step": 5413 + }, + { + "epoch": 0.3980297015144832, + "grad_norm": 0.8796949982643127, + "learning_rate": 4.947749419407386e-06, + "loss": 0.5576, + "step": 5414 + }, + { + "epoch": 0.398103220114689, + "grad_norm": 0.9116522073745728, + "learning_rate": 4.947729821124581e-06, + "loss": 0.5879, + "step": 5415 + }, + { + "epoch": 0.3981767387148949, + "grad_norm": 0.9030027985572815, + "learning_rate": 4.947710219205808e-06, + "loss": 0.5725, + "step": 5416 + }, + { + "epoch": 0.3982502573151007, + "grad_norm": 0.8969836235046387, + "learning_rate": 4.9476906136510945e-06, + "loss": 0.5395, + "step": 5417 + }, + { + "epoch": 0.3983237759153066, + "grad_norm": 0.8462505340576172, + "learning_rate": 4.94767100446047e-06, + "loss": 0.5828, + "step": 5418 + }, + { + "epoch": 0.3983972945155124, + "grad_norm": 0.863811731338501, + "learning_rate": 4.947651391633966e-06, + "loss": 0.5117, + "step": 5419 + }, + { + "epoch": 0.3984708131157183, + "grad_norm": 0.918060302734375, + "learning_rate": 4.9476317751716085e-06, + "loss": 0.6037, + "step": 5420 + }, + { + "epoch": 0.3985443317159241, + "grad_norm": 0.8767993450164795, + "learning_rate": 4.9476121550734285e-06, + "loss": 0.6119, + "step": 5421 + }, + { + "epoch": 0.39861785031613, + "grad_norm": 0.7927995920181274, + "learning_rate": 4.947592531339454e-06, + "loss": 0.4842, + "step": 5422 + }, + { + "epoch": 0.39869136891633583, + "grad_norm": 0.8699789643287659, + "learning_rate": 4.947572903969716e-06, + "loss": 0.543, + "step": 5423 + }, + { + "epoch": 0.3987648875165417, + "grad_norm": 0.8533583879470825, + "learning_rate": 4.947553272964243e-06, + "loss": 0.5694, + "step": 5424 + }, + { + "epoch": 0.39883840611674753, + "grad_norm": 0.8773676156997681, + "learning_rate": 4.9475336383230625e-06, + "loss": 0.5905, + "step": 5425 + }, + { + "epoch": 0.3989119247169534, + "grad_norm": 0.8011319637298584, + "learning_rate": 4.947514000046204e-06, + "loss": 0.5315, + "step": 5426 + }, + { + "epoch": 0.39898544331715924, + "grad_norm": 0.8283319473266602, + "learning_rate": 4.947494358133699e-06, + "loss": 0.5936, + "step": 5427 + }, + { + "epoch": 0.3990589619173651, + "grad_norm": 0.8772026896476746, + "learning_rate": 4.947474712585574e-06, + "loss": 0.5444, + "step": 5428 + }, + { + "epoch": 0.39913248051757094, + "grad_norm": 0.8939775228500366, + "learning_rate": 4.94745506340186e-06, + "loss": 0.5625, + "step": 5429 + }, + { + "epoch": 0.3992059991177768, + "grad_norm": 0.8632132411003113, + "learning_rate": 4.947435410582585e-06, + "loss": 0.5711, + "step": 5430 + }, + { + "epoch": 0.39927951771798265, + "grad_norm": 0.8159066438674927, + "learning_rate": 4.947415754127779e-06, + "loss": 0.5374, + "step": 5431 + }, + { + "epoch": 0.3993530363181885, + "grad_norm": 0.8914837837219238, + "learning_rate": 4.947396094037471e-06, + "loss": 0.5593, + "step": 5432 + }, + { + "epoch": 0.39942655491839435, + "grad_norm": 0.8598623275756836, + "learning_rate": 4.947376430311691e-06, + "loss": 0.573, + "step": 5433 + }, + { + "epoch": 0.39950007351860023, + "grad_norm": 0.8422434329986572, + "learning_rate": 4.947356762950466e-06, + "loss": 0.54, + "step": 5434 + }, + { + "epoch": 0.39957359211880605, + "grad_norm": 0.8621357083320618, + "learning_rate": 4.947337091953827e-06, + "loss": 0.532, + "step": 5435 + }, + { + "epoch": 0.39964711071901193, + "grad_norm": 0.8031423687934875, + "learning_rate": 4.947317417321803e-06, + "loss": 0.5127, + "step": 5436 + }, + { + "epoch": 0.39972062931921776, + "grad_norm": 0.8604046702384949, + "learning_rate": 4.947297739054422e-06, + "loss": 0.5822, + "step": 5437 + }, + { + "epoch": 0.39979414791942364, + "grad_norm": 0.8893494606018066, + "learning_rate": 4.947278057151715e-06, + "loss": 0.5798, + "step": 5438 + }, + { + "epoch": 0.39986766651962946, + "grad_norm": 0.8800523281097412, + "learning_rate": 4.947258371613711e-06, + "loss": 0.5327, + "step": 5439 + }, + { + "epoch": 0.39994118511983534, + "grad_norm": 0.8682111501693726, + "learning_rate": 4.947238682440438e-06, + "loss": 0.5827, + "step": 5440 + }, + { + "epoch": 0.40001470372004116, + "grad_norm": 0.8336948156356812, + "learning_rate": 4.9472189896319255e-06, + "loss": 0.5323, + "step": 5441 + }, + { + "epoch": 0.40008822232024704, + "grad_norm": 0.8266518115997314, + "learning_rate": 4.947199293188204e-06, + "loss": 0.5482, + "step": 5442 + }, + { + "epoch": 0.40016174092045287, + "grad_norm": 0.8284192085266113, + "learning_rate": 4.947179593109302e-06, + "loss": 0.5173, + "step": 5443 + }, + { + "epoch": 0.40023525952065875, + "grad_norm": 0.8923625946044922, + "learning_rate": 4.947159889395248e-06, + "loss": 0.598, + "step": 5444 + }, + { + "epoch": 0.4003087781208646, + "grad_norm": 0.863575279712677, + "learning_rate": 4.9471401820460726e-06, + "loss": 0.5634, + "step": 5445 + }, + { + "epoch": 0.40038229672107045, + "grad_norm": 0.8169558048248291, + "learning_rate": 4.9471204710618045e-06, + "loss": 0.5569, + "step": 5446 + }, + { + "epoch": 0.4004558153212763, + "grad_norm": 0.8533003330230713, + "learning_rate": 4.9471007564424725e-06, + "loss": 0.5509, + "step": 5447 + }, + { + "epoch": 0.40052933392148216, + "grad_norm": 0.7962599992752075, + "learning_rate": 4.947081038188107e-06, + "loss": 0.5435, + "step": 5448 + }, + { + "epoch": 0.400602852521688, + "grad_norm": 0.8313368558883667, + "learning_rate": 4.947061316298736e-06, + "loss": 0.5684, + "step": 5449 + }, + { + "epoch": 0.40067637112189386, + "grad_norm": 0.8677752017974854, + "learning_rate": 4.947041590774389e-06, + "loss": 0.5745, + "step": 5450 + }, + { + "epoch": 0.4007498897220997, + "grad_norm": 0.9099029302597046, + "learning_rate": 4.9470218616150965e-06, + "loss": 0.5297, + "step": 5451 + }, + { + "epoch": 0.40082340832230556, + "grad_norm": 0.8718276619911194, + "learning_rate": 4.947002128820887e-06, + "loss": 0.6099, + "step": 5452 + }, + { + "epoch": 0.4008969269225114, + "grad_norm": 0.8075804114341736, + "learning_rate": 4.94698239239179e-06, + "loss": 0.5469, + "step": 5453 + }, + { + "epoch": 0.40097044552271727, + "grad_norm": 0.8027554154396057, + "learning_rate": 4.946962652327834e-06, + "loss": 0.5265, + "step": 5454 + }, + { + "epoch": 0.4010439641229231, + "grad_norm": 0.8157762289047241, + "learning_rate": 4.946942908629049e-06, + "loss": 0.5505, + "step": 5455 + }, + { + "epoch": 0.40111748272312897, + "grad_norm": 0.8471122980117798, + "learning_rate": 4.946923161295465e-06, + "loss": 0.5634, + "step": 5456 + }, + { + "epoch": 0.4011910013233348, + "grad_norm": 0.8287758827209473, + "learning_rate": 4.94690341032711e-06, + "loss": 0.5715, + "step": 5457 + }, + { + "epoch": 0.4012645199235407, + "grad_norm": 0.8443456292152405, + "learning_rate": 4.946883655724014e-06, + "loss": 0.574, + "step": 5458 + }, + { + "epoch": 0.4013380385237465, + "grad_norm": 0.8577566742897034, + "learning_rate": 4.946863897486207e-06, + "loss": 0.591, + "step": 5459 + }, + { + "epoch": 0.4014115571239524, + "grad_norm": 0.8268653750419617, + "learning_rate": 4.946844135613716e-06, + "loss": 0.5254, + "step": 5460 + }, + { + "epoch": 0.4014850757241582, + "grad_norm": 0.847804844379425, + "learning_rate": 4.9468243701065744e-06, + "loss": 0.5717, + "step": 5461 + }, + { + "epoch": 0.4015585943243641, + "grad_norm": 0.9006772637367249, + "learning_rate": 4.946804600964807e-06, + "loss": 0.593, + "step": 5462 + }, + { + "epoch": 0.4016321129245699, + "grad_norm": 0.7788833379745483, + "learning_rate": 4.9467848281884464e-06, + "loss": 0.5131, + "step": 5463 + }, + { + "epoch": 0.4017056315247758, + "grad_norm": 0.8845556974411011, + "learning_rate": 4.946765051777521e-06, + "loss": 0.5794, + "step": 5464 + }, + { + "epoch": 0.4017791501249816, + "grad_norm": 0.8641830086708069, + "learning_rate": 4.94674527173206e-06, + "loss": 0.5642, + "step": 5465 + }, + { + "epoch": 0.4018526687251875, + "grad_norm": 0.8422178626060486, + "learning_rate": 4.946725488052093e-06, + "loss": 0.5592, + "step": 5466 + }, + { + "epoch": 0.4019261873253933, + "grad_norm": 0.8340059518814087, + "learning_rate": 4.946705700737649e-06, + "loss": 0.5936, + "step": 5467 + }, + { + "epoch": 0.4019997059255992, + "grad_norm": 0.8072087168693542, + "learning_rate": 4.946685909788758e-06, + "loss": 0.6067, + "step": 5468 + }, + { + "epoch": 0.402073224525805, + "grad_norm": 0.8538738489151001, + "learning_rate": 4.946666115205448e-06, + "loss": 0.6104, + "step": 5469 + }, + { + "epoch": 0.4021467431260109, + "grad_norm": 0.8613632321357727, + "learning_rate": 4.946646316987751e-06, + "loss": 0.5455, + "step": 5470 + }, + { + "epoch": 0.4022202617262167, + "grad_norm": 0.8608142733573914, + "learning_rate": 4.946626515135694e-06, + "loss": 0.5654, + "step": 5471 + }, + { + "epoch": 0.4022937803264226, + "grad_norm": 0.879723310470581, + "learning_rate": 4.946606709649307e-06, + "loss": 0.6083, + "step": 5472 + }, + { + "epoch": 0.4023672989266284, + "grad_norm": 0.8151835799217224, + "learning_rate": 4.94658690052862e-06, + "loss": 0.5253, + "step": 5473 + }, + { + "epoch": 0.4024408175268343, + "grad_norm": 0.882381796836853, + "learning_rate": 4.946567087773663e-06, + "loss": 0.5896, + "step": 5474 + }, + { + "epoch": 0.40251433612704013, + "grad_norm": 0.8116175532341003, + "learning_rate": 4.946547271384463e-06, + "loss": 0.4994, + "step": 5475 + }, + { + "epoch": 0.402587854727246, + "grad_norm": 0.8114009499549866, + "learning_rate": 4.946527451361053e-06, + "loss": 0.5491, + "step": 5476 + }, + { + "epoch": 0.40266137332745183, + "grad_norm": 0.8751361966133118, + "learning_rate": 4.946507627703458e-06, + "loss": 0.5589, + "step": 5477 + }, + { + "epoch": 0.4027348919276577, + "grad_norm": 0.8840398788452148, + "learning_rate": 4.946487800411712e-06, + "loss": 0.6061, + "step": 5478 + }, + { + "epoch": 0.40280841052786354, + "grad_norm": 0.8555218577384949, + "learning_rate": 4.9464679694858405e-06, + "loss": 0.5656, + "step": 5479 + }, + { + "epoch": 0.4028819291280694, + "grad_norm": 0.8837113380432129, + "learning_rate": 4.9464481349258766e-06, + "loss": 0.6009, + "step": 5480 + }, + { + "epoch": 0.40295544772827524, + "grad_norm": 0.8306499123573303, + "learning_rate": 4.9464282967318466e-06, + "loss": 0.5352, + "step": 5481 + }, + { + "epoch": 0.4030289663284811, + "grad_norm": 0.8437321782112122, + "learning_rate": 4.946408454903782e-06, + "loss": 0.5198, + "step": 5482 + }, + { + "epoch": 0.40310248492868694, + "grad_norm": 0.8431016802787781, + "learning_rate": 4.946388609441711e-06, + "loss": 0.5998, + "step": 5483 + }, + { + "epoch": 0.4031760035288928, + "grad_norm": 0.862293541431427, + "learning_rate": 4.946368760345665e-06, + "loss": 0.5749, + "step": 5484 + }, + { + "epoch": 0.40324952212909865, + "grad_norm": 0.8375864624977112, + "learning_rate": 4.946348907615671e-06, + "loss": 0.5676, + "step": 5485 + }, + { + "epoch": 0.4033230407293045, + "grad_norm": 0.8765907287597656, + "learning_rate": 4.94632905125176e-06, + "loss": 0.5529, + "step": 5486 + }, + { + "epoch": 0.40339655932951035, + "grad_norm": 0.8401917815208435, + "learning_rate": 4.94630919125396e-06, + "loss": 0.4839, + "step": 5487 + }, + { + "epoch": 0.40347007792971623, + "grad_norm": 0.8522670269012451, + "learning_rate": 4.9462893276223036e-06, + "loss": 0.6009, + "step": 5488 + }, + { + "epoch": 0.40354359652992206, + "grad_norm": 0.82747483253479, + "learning_rate": 4.946269460356818e-06, + "loss": 0.57, + "step": 5489 + }, + { + "epoch": 0.40361711513012793, + "grad_norm": 0.8947484493255615, + "learning_rate": 4.946249589457532e-06, + "loss": 0.5893, + "step": 5490 + }, + { + "epoch": 0.40369063373033376, + "grad_norm": 0.864239513874054, + "learning_rate": 4.946229714924477e-06, + "loss": 0.5674, + "step": 5491 + }, + { + "epoch": 0.40376415233053964, + "grad_norm": 0.832816481590271, + "learning_rate": 4.946209836757682e-06, + "loss": 0.5149, + "step": 5492 + }, + { + "epoch": 0.40383767093074546, + "grad_norm": 0.8374007344245911, + "learning_rate": 4.946189954957176e-06, + "loss": 0.5873, + "step": 5493 + }, + { + "epoch": 0.40391118953095134, + "grad_norm": 0.8487154245376587, + "learning_rate": 4.946170069522988e-06, + "loss": 0.5841, + "step": 5494 + }, + { + "epoch": 0.40398470813115717, + "grad_norm": 0.8827086091041565, + "learning_rate": 4.946150180455148e-06, + "loss": 0.6073, + "step": 5495 + }, + { + "epoch": 0.40405822673136305, + "grad_norm": 0.8458613157272339, + "learning_rate": 4.946130287753688e-06, + "loss": 0.6186, + "step": 5496 + }, + { + "epoch": 0.40413174533156887, + "grad_norm": 0.8528395891189575, + "learning_rate": 4.946110391418633e-06, + "loss": 0.567, + "step": 5497 + }, + { + "epoch": 0.40420526393177475, + "grad_norm": 0.8495476841926575, + "learning_rate": 4.946090491450016e-06, + "loss": 0.5698, + "step": 5498 + }, + { + "epoch": 0.4042787825319806, + "grad_norm": 0.8510456681251526, + "learning_rate": 4.946070587847866e-06, + "loss": 0.5845, + "step": 5499 + }, + { + "epoch": 0.40435230113218645, + "grad_norm": 0.824834406375885, + "learning_rate": 4.9460506806122115e-06, + "loss": 0.5811, + "step": 5500 + }, + { + "epoch": 0.4044258197323923, + "grad_norm": 0.8359707593917847, + "learning_rate": 4.946030769743083e-06, + "loss": 0.5641, + "step": 5501 + }, + { + "epoch": 0.40449933833259816, + "grad_norm": 0.8393021821975708, + "learning_rate": 4.94601085524051e-06, + "loss": 0.5171, + "step": 5502 + }, + { + "epoch": 0.404572856932804, + "grad_norm": 0.8866618275642395, + "learning_rate": 4.945990937104521e-06, + "loss": 0.5802, + "step": 5503 + }, + { + "epoch": 0.40464637553300986, + "grad_norm": 0.8940759301185608, + "learning_rate": 4.9459710153351465e-06, + "loss": 0.6001, + "step": 5504 + }, + { + "epoch": 0.4047198941332157, + "grad_norm": 0.8945134282112122, + "learning_rate": 4.945951089932416e-06, + "loss": 0.5913, + "step": 5505 + }, + { + "epoch": 0.40479341273342156, + "grad_norm": 0.8070722818374634, + "learning_rate": 4.94593116089636e-06, + "loss": 0.5377, + "step": 5506 + }, + { + "epoch": 0.4048669313336274, + "grad_norm": 0.8416051268577576, + "learning_rate": 4.945911228227006e-06, + "loss": 0.59, + "step": 5507 + }, + { + "epoch": 0.40494044993383327, + "grad_norm": 0.8523047566413879, + "learning_rate": 4.945891291924385e-06, + "loss": 0.5517, + "step": 5508 + }, + { + "epoch": 0.4050139685340391, + "grad_norm": 0.7992328405380249, + "learning_rate": 4.945871351988527e-06, + "loss": 0.5334, + "step": 5509 + }, + { + "epoch": 0.405087487134245, + "grad_norm": 0.834603488445282, + "learning_rate": 4.945851408419461e-06, + "loss": 0.555, + "step": 5510 + }, + { + "epoch": 0.4051610057344508, + "grad_norm": 0.8330718874931335, + "learning_rate": 4.945831461217216e-06, + "loss": 0.5566, + "step": 5511 + }, + { + "epoch": 0.4052345243346567, + "grad_norm": 0.8891319632530212, + "learning_rate": 4.945811510381823e-06, + "loss": 0.5735, + "step": 5512 + }, + { + "epoch": 0.4053080429348625, + "grad_norm": 0.8737086057662964, + "learning_rate": 4.945791555913311e-06, + "loss": 0.5532, + "step": 5513 + }, + { + "epoch": 0.4053815615350684, + "grad_norm": 0.8555448055267334, + "learning_rate": 4.945771597811709e-06, + "loss": 0.5863, + "step": 5514 + }, + { + "epoch": 0.4054550801352742, + "grad_norm": 0.8838697075843811, + "learning_rate": 4.9457516360770474e-06, + "loss": 0.5433, + "step": 5515 + }, + { + "epoch": 0.4055285987354801, + "grad_norm": 0.8433166146278381, + "learning_rate": 4.945731670709355e-06, + "loss": 0.5451, + "step": 5516 + }, + { + "epoch": 0.4056021173356859, + "grad_norm": 0.8918185830116272, + "learning_rate": 4.945711701708663e-06, + "loss": 0.5632, + "step": 5517 + }, + { + "epoch": 0.4056756359358918, + "grad_norm": 0.8835387229919434, + "learning_rate": 4.945691729075e-06, + "loss": 0.5975, + "step": 5518 + }, + { + "epoch": 0.4057491545360976, + "grad_norm": 0.8249240517616272, + "learning_rate": 4.945671752808396e-06, + "loss": 0.5142, + "step": 5519 + }, + { + "epoch": 0.4058226731363035, + "grad_norm": 0.8711811304092407, + "learning_rate": 4.945651772908881e-06, + "loss": 0.5418, + "step": 5520 + }, + { + "epoch": 0.4058961917365093, + "grad_norm": 0.8776015639305115, + "learning_rate": 4.945631789376484e-06, + "loss": 0.5439, + "step": 5521 + }, + { + "epoch": 0.4059697103367152, + "grad_norm": 0.8578088283538818, + "learning_rate": 4.945611802211234e-06, + "loss": 0.5657, + "step": 5522 + }, + { + "epoch": 0.406043228936921, + "grad_norm": 0.8288078308105469, + "learning_rate": 4.945591811413162e-06, + "loss": 0.5404, + "step": 5523 + }, + { + "epoch": 0.4061167475371269, + "grad_norm": 0.8569355010986328, + "learning_rate": 4.945571816982297e-06, + "loss": 0.5346, + "step": 5524 + }, + { + "epoch": 0.4061902661373327, + "grad_norm": 0.8366249203681946, + "learning_rate": 4.94555181891867e-06, + "loss": 0.5688, + "step": 5525 + }, + { + "epoch": 0.4062637847375386, + "grad_norm": 0.84102863073349, + "learning_rate": 4.945531817222309e-06, + "loss": 0.5616, + "step": 5526 + }, + { + "epoch": 0.4063373033377444, + "grad_norm": 0.8687151074409485, + "learning_rate": 4.945511811893245e-06, + "loss": 0.5605, + "step": 5527 + }, + { + "epoch": 0.4064108219379503, + "grad_norm": 0.84203040599823, + "learning_rate": 4.945491802931507e-06, + "loss": 0.565, + "step": 5528 + }, + { + "epoch": 0.40648434053815613, + "grad_norm": 0.8094838857650757, + "learning_rate": 4.945471790337124e-06, + "loss": 0.5418, + "step": 5529 + }, + { + "epoch": 0.406557859138362, + "grad_norm": 0.8605569005012512, + "learning_rate": 4.945451774110126e-06, + "loss": 0.5456, + "step": 5530 + }, + { + "epoch": 0.40663137773856783, + "grad_norm": 0.8357288837432861, + "learning_rate": 4.945431754250545e-06, + "loss": 0.5534, + "step": 5531 + }, + { + "epoch": 0.4067048963387737, + "grad_norm": 0.815669059753418, + "learning_rate": 4.945411730758408e-06, + "loss": 0.51, + "step": 5532 + }, + { + "epoch": 0.40677841493897954, + "grad_norm": 0.8669511079788208, + "learning_rate": 4.945391703633746e-06, + "loss": 0.5867, + "step": 5533 + }, + { + "epoch": 0.4068519335391854, + "grad_norm": 0.8675908446311951, + "learning_rate": 4.945371672876589e-06, + "loss": 0.5681, + "step": 5534 + }, + { + "epoch": 0.40692545213939124, + "grad_norm": 0.9023345708847046, + "learning_rate": 4.945351638486966e-06, + "loss": 0.5993, + "step": 5535 + }, + { + "epoch": 0.4069989707395971, + "grad_norm": 0.851257860660553, + "learning_rate": 4.945331600464907e-06, + "loss": 0.5206, + "step": 5536 + }, + { + "epoch": 0.40707248933980295, + "grad_norm": 0.8338031768798828, + "learning_rate": 4.945311558810442e-06, + "loss": 0.5202, + "step": 5537 + }, + { + "epoch": 0.4071460079400088, + "grad_norm": 0.8547917008399963, + "learning_rate": 4.9452915135236e-06, + "loss": 0.5143, + "step": 5538 + }, + { + "epoch": 0.40721952654021465, + "grad_norm": 0.8747217655181885, + "learning_rate": 4.945271464604412e-06, + "loss": 0.6002, + "step": 5539 + }, + { + "epoch": 0.40729304514042053, + "grad_norm": 0.7834990620613098, + "learning_rate": 4.945251412052907e-06, + "loss": 0.5362, + "step": 5540 + }, + { + "epoch": 0.40736656374062635, + "grad_norm": 0.8442685008049011, + "learning_rate": 4.945231355869114e-06, + "loss": 0.5252, + "step": 5541 + }, + { + "epoch": 0.40744008234083223, + "grad_norm": 0.829853892326355, + "learning_rate": 4.945211296053065e-06, + "loss": 0.5719, + "step": 5542 + }, + { + "epoch": 0.40751360094103806, + "grad_norm": 0.8325871825218201, + "learning_rate": 4.945191232604788e-06, + "loss": 0.5645, + "step": 5543 + }, + { + "epoch": 0.40758711954124394, + "grad_norm": 0.8603687882423401, + "learning_rate": 4.945171165524313e-06, + "loss": 0.5847, + "step": 5544 + }, + { + "epoch": 0.40766063814144976, + "grad_norm": 0.825253427028656, + "learning_rate": 4.945151094811671e-06, + "loss": 0.573, + "step": 5545 + }, + { + "epoch": 0.40773415674165564, + "grad_norm": 0.8939588665962219, + "learning_rate": 4.94513102046689e-06, + "loss": 0.5653, + "step": 5546 + }, + { + "epoch": 0.40780767534186146, + "grad_norm": 0.8051988482475281, + "learning_rate": 4.945110942490001e-06, + "loss": 0.5103, + "step": 5547 + }, + { + "epoch": 0.40788119394206734, + "grad_norm": 0.9033169150352478, + "learning_rate": 4.945090860881034e-06, + "loss": 0.5521, + "step": 5548 + }, + { + "epoch": 0.40795471254227317, + "grad_norm": 0.8831459879875183, + "learning_rate": 4.945070775640019e-06, + "loss": 0.5601, + "step": 5549 + }, + { + "epoch": 0.40802823114247905, + "grad_norm": 0.8121309280395508, + "learning_rate": 4.945050686766984e-06, + "loss": 0.5123, + "step": 5550 + }, + { + "epoch": 0.40810174974268487, + "grad_norm": 0.8405619859695435, + "learning_rate": 4.94503059426196e-06, + "loss": 0.6176, + "step": 5551 + }, + { + "epoch": 0.40817526834289075, + "grad_norm": 0.8263376355171204, + "learning_rate": 4.945010498124978e-06, + "loss": 0.5467, + "step": 5552 + }, + { + "epoch": 0.40824878694309663, + "grad_norm": 0.8384341597557068, + "learning_rate": 4.944990398356067e-06, + "loss": 0.546, + "step": 5553 + }, + { + "epoch": 0.40832230554330246, + "grad_norm": 0.8794180750846863, + "learning_rate": 4.944970294955255e-06, + "loss": 0.5794, + "step": 5554 + }, + { + "epoch": 0.40839582414350833, + "grad_norm": 0.8267393112182617, + "learning_rate": 4.944950187922575e-06, + "loss": 0.5767, + "step": 5555 + }, + { + "epoch": 0.40846934274371416, + "grad_norm": 0.8385246396064758, + "learning_rate": 4.944930077258054e-06, + "loss": 0.5705, + "step": 5556 + }, + { + "epoch": 0.40854286134392004, + "grad_norm": 0.8523449897766113, + "learning_rate": 4.944909962961724e-06, + "loss": 0.5802, + "step": 5557 + }, + { + "epoch": 0.40861637994412586, + "grad_norm": 0.8665246367454529, + "learning_rate": 4.944889845033615e-06, + "loss": 0.5533, + "step": 5558 + }, + { + "epoch": 0.40868989854433174, + "grad_norm": 0.8590063452720642, + "learning_rate": 4.944869723473755e-06, + "loss": 0.5938, + "step": 5559 + }, + { + "epoch": 0.40876341714453757, + "grad_norm": 0.8583419322967529, + "learning_rate": 4.944849598282175e-06, + "loss": 0.5527, + "step": 5560 + }, + { + "epoch": 0.40883693574474345, + "grad_norm": 0.8679451942443848, + "learning_rate": 4.944829469458905e-06, + "loss": 0.5949, + "step": 5561 + }, + { + "epoch": 0.40891045434494927, + "grad_norm": 0.8388366103172302, + "learning_rate": 4.944809337003974e-06, + "loss": 0.5399, + "step": 5562 + }, + { + "epoch": 0.40898397294515515, + "grad_norm": 0.8988344669342041, + "learning_rate": 4.944789200917414e-06, + "loss": 0.5686, + "step": 5563 + }, + { + "epoch": 0.409057491545361, + "grad_norm": 0.8455119132995605, + "learning_rate": 4.944769061199252e-06, + "loss": 0.5646, + "step": 5564 + }, + { + "epoch": 0.40913101014556685, + "grad_norm": 0.8811810612678528, + "learning_rate": 4.9447489178495204e-06, + "loss": 0.5773, + "step": 5565 + }, + { + "epoch": 0.4092045287457727, + "grad_norm": 0.8450952768325806, + "learning_rate": 4.944728770868248e-06, + "loss": 0.5027, + "step": 5566 + }, + { + "epoch": 0.40927804734597856, + "grad_norm": 0.8261908292770386, + "learning_rate": 4.944708620255465e-06, + "loss": 0.5359, + "step": 5567 + }, + { + "epoch": 0.4093515659461844, + "grad_norm": 0.881325364112854, + "learning_rate": 4.9446884660112e-06, + "loss": 0.5428, + "step": 5568 + }, + { + "epoch": 0.40942508454639026, + "grad_norm": 0.8274286389350891, + "learning_rate": 4.9446683081354855e-06, + "loss": 0.5577, + "step": 5569 + }, + { + "epoch": 0.4094986031465961, + "grad_norm": 0.871252715587616, + "learning_rate": 4.944648146628349e-06, + "loss": 0.5722, + "step": 5570 + }, + { + "epoch": 0.40957212174680196, + "grad_norm": 0.8765962719917297, + "learning_rate": 4.944627981489823e-06, + "loss": 0.6012, + "step": 5571 + }, + { + "epoch": 0.4096456403470078, + "grad_norm": 0.8416531085968018, + "learning_rate": 4.944607812719936e-06, + "loss": 0.5654, + "step": 5572 + }, + { + "epoch": 0.40971915894721367, + "grad_norm": 0.8591434359550476, + "learning_rate": 4.944587640318717e-06, + "loss": 0.5784, + "step": 5573 + }, + { + "epoch": 0.4097926775474195, + "grad_norm": 0.8744601607322693, + "learning_rate": 4.944567464286196e-06, + "loss": 0.5888, + "step": 5574 + }, + { + "epoch": 0.4098661961476254, + "grad_norm": 0.8684166669845581, + "learning_rate": 4.9445472846224056e-06, + "loss": 0.5128, + "step": 5575 + }, + { + "epoch": 0.4099397147478312, + "grad_norm": 0.8544395565986633, + "learning_rate": 4.944527101327373e-06, + "loss": 0.5597, + "step": 5576 + }, + { + "epoch": 0.4100132333480371, + "grad_norm": 0.8425198197364807, + "learning_rate": 4.94450691440113e-06, + "loss": 0.5585, + "step": 5577 + }, + { + "epoch": 0.4100867519482429, + "grad_norm": 0.8994043469429016, + "learning_rate": 4.944486723843706e-06, + "loss": 0.6086, + "step": 5578 + }, + { + "epoch": 0.4101602705484488, + "grad_norm": 0.8649784326553345, + "learning_rate": 4.94446652965513e-06, + "loss": 0.5713, + "step": 5579 + }, + { + "epoch": 0.4102337891486546, + "grad_norm": 0.8698369264602661, + "learning_rate": 4.944446331835434e-06, + "loss": 0.5538, + "step": 5580 + }, + { + "epoch": 0.4103073077488605, + "grad_norm": 0.8363588452339172, + "learning_rate": 4.9444261303846455e-06, + "loss": 0.5739, + "step": 5581 + }, + { + "epoch": 0.4103808263490663, + "grad_norm": 0.856896162033081, + "learning_rate": 4.944405925302796e-06, + "loss": 0.5788, + "step": 5582 + }, + { + "epoch": 0.4104543449492722, + "grad_norm": 0.8760468363761902, + "learning_rate": 4.944385716589917e-06, + "loss": 0.52, + "step": 5583 + }, + { + "epoch": 0.410527863549478, + "grad_norm": 0.8698544502258301, + "learning_rate": 4.944365504246035e-06, + "loss": 0.5508, + "step": 5584 + }, + { + "epoch": 0.4106013821496839, + "grad_norm": 0.8349841237068176, + "learning_rate": 4.944345288271183e-06, + "loss": 0.5371, + "step": 5585 + }, + { + "epoch": 0.4106749007498897, + "grad_norm": 0.9564717411994934, + "learning_rate": 4.9443250686653886e-06, + "loss": 0.5754, + "step": 5586 + }, + { + "epoch": 0.4107484193500956, + "grad_norm": 0.8396409153938293, + "learning_rate": 4.944304845428685e-06, + "loss": 0.5634, + "step": 5587 + }, + { + "epoch": 0.4108219379503014, + "grad_norm": 0.9095085859298706, + "learning_rate": 4.9442846185610985e-06, + "loss": 0.5914, + "step": 5588 + }, + { + "epoch": 0.4108954565505073, + "grad_norm": 0.8370979428291321, + "learning_rate": 4.944264388062662e-06, + "loss": 0.5391, + "step": 5589 + }, + { + "epoch": 0.4109689751507131, + "grad_norm": 0.8190925717353821, + "learning_rate": 4.944244153933405e-06, + "loss": 0.5622, + "step": 5590 + }, + { + "epoch": 0.411042493750919, + "grad_norm": 0.8757154941558838, + "learning_rate": 4.944223916173356e-06, + "loss": 0.5747, + "step": 5591 + }, + { + "epoch": 0.4111160123511248, + "grad_norm": 0.8435576558113098, + "learning_rate": 4.944203674782547e-06, + "loss": 0.5951, + "step": 5592 + }, + { + "epoch": 0.4111895309513307, + "grad_norm": 0.8301160931587219, + "learning_rate": 4.944183429761007e-06, + "loss": 0.5705, + "step": 5593 + }, + { + "epoch": 0.41126304955153653, + "grad_norm": 0.8396345973014832, + "learning_rate": 4.944163181108767e-06, + "loss": 0.5427, + "step": 5594 + }, + { + "epoch": 0.4113365681517424, + "grad_norm": 0.8540611863136292, + "learning_rate": 4.944142928825856e-06, + "loss": 0.5706, + "step": 5595 + }, + { + "epoch": 0.41141008675194823, + "grad_norm": 0.839297890663147, + "learning_rate": 4.944122672912305e-06, + "loss": 0.5476, + "step": 5596 + }, + { + "epoch": 0.4114836053521541, + "grad_norm": 0.8771520853042603, + "learning_rate": 4.9441024133681425e-06, + "loss": 0.5957, + "step": 5597 + }, + { + "epoch": 0.41155712395235994, + "grad_norm": 0.8543042540550232, + "learning_rate": 4.9440821501934e-06, + "loss": 0.5748, + "step": 5598 + }, + { + "epoch": 0.4116306425525658, + "grad_norm": 0.8408507704734802, + "learning_rate": 4.9440618833881085e-06, + "loss": 0.5118, + "step": 5599 + }, + { + "epoch": 0.41170416115277164, + "grad_norm": 0.8071730732917786, + "learning_rate": 4.944041612952296e-06, + "loss": 0.5632, + "step": 5600 + }, + { + "epoch": 0.4117776797529775, + "grad_norm": 0.8718110918998718, + "learning_rate": 4.944021338885993e-06, + "loss": 0.5516, + "step": 5601 + }, + { + "epoch": 0.41185119835318335, + "grad_norm": 0.9320552349090576, + "learning_rate": 4.9440010611892305e-06, + "loss": 0.5838, + "step": 5602 + }, + { + "epoch": 0.4119247169533892, + "grad_norm": 0.8838436603546143, + "learning_rate": 4.943980779862038e-06, + "loss": 0.5299, + "step": 5603 + }, + { + "epoch": 0.41199823555359505, + "grad_norm": 0.9251457452774048, + "learning_rate": 4.943960494904447e-06, + "loss": 0.6174, + "step": 5604 + }, + { + "epoch": 0.41207175415380093, + "grad_norm": 0.8473591208457947, + "learning_rate": 4.943940206316486e-06, + "loss": 0.5537, + "step": 5605 + }, + { + "epoch": 0.41214527275400675, + "grad_norm": 0.8424149751663208, + "learning_rate": 4.943919914098185e-06, + "loss": 0.527, + "step": 5606 + }, + { + "epoch": 0.41221879135421263, + "grad_norm": 0.8269003629684448, + "learning_rate": 4.943899618249576e-06, + "loss": 0.5497, + "step": 5607 + }, + { + "epoch": 0.41229230995441846, + "grad_norm": 0.8657980561256409, + "learning_rate": 4.9438793187706864e-06, + "loss": 0.5677, + "step": 5608 + }, + { + "epoch": 0.41236582855462434, + "grad_norm": 0.9340149760246277, + "learning_rate": 4.943859015661549e-06, + "loss": 0.576, + "step": 5609 + }, + { + "epoch": 0.41243934715483016, + "grad_norm": 0.8447810411453247, + "learning_rate": 4.943838708922193e-06, + "loss": 0.6099, + "step": 5610 + }, + { + "epoch": 0.41251286575503604, + "grad_norm": 0.9055266380310059, + "learning_rate": 4.9438183985526475e-06, + "loss": 0.5464, + "step": 5611 + }, + { + "epoch": 0.41258638435524186, + "grad_norm": 0.9067651033401489, + "learning_rate": 4.943798084552944e-06, + "loss": 0.5851, + "step": 5612 + }, + { + "epoch": 0.41265990295544774, + "grad_norm": 0.8525153398513794, + "learning_rate": 4.943777766923113e-06, + "loss": 0.5477, + "step": 5613 + }, + { + "epoch": 0.41273342155565357, + "grad_norm": 0.8182719349861145, + "learning_rate": 4.9437574456631824e-06, + "loss": 0.5319, + "step": 5614 + }, + { + "epoch": 0.41280694015585945, + "grad_norm": 0.8169090747833252, + "learning_rate": 4.943737120773185e-06, + "loss": 0.5463, + "step": 5615 + }, + { + "epoch": 0.41288045875606527, + "grad_norm": 0.9293051362037659, + "learning_rate": 4.94371679225315e-06, + "loss": 0.595, + "step": 5616 + }, + { + "epoch": 0.41295397735627115, + "grad_norm": 0.8449007868766785, + "learning_rate": 4.943696460103107e-06, + "loss": 0.5619, + "step": 5617 + }, + { + "epoch": 0.413027495956477, + "grad_norm": 0.8354411721229553, + "learning_rate": 4.943676124323087e-06, + "loss": 0.5611, + "step": 5618 + }, + { + "epoch": 0.41310101455668286, + "grad_norm": 0.863073468208313, + "learning_rate": 4.94365578491312e-06, + "loss": 0.5561, + "step": 5619 + }, + { + "epoch": 0.4131745331568887, + "grad_norm": 0.8762817978858948, + "learning_rate": 4.943635441873236e-06, + "loss": 0.5518, + "step": 5620 + }, + { + "epoch": 0.41324805175709456, + "grad_norm": 0.851527214050293, + "learning_rate": 4.9436150952034656e-06, + "loss": 0.5355, + "step": 5621 + }, + { + "epoch": 0.4133215703573004, + "grad_norm": 0.8155947327613831, + "learning_rate": 4.943594744903838e-06, + "loss": 0.5302, + "step": 5622 + }, + { + "epoch": 0.41339508895750626, + "grad_norm": 0.8725613355636597, + "learning_rate": 4.943574390974385e-06, + "loss": 0.5953, + "step": 5623 + }, + { + "epoch": 0.4134686075577121, + "grad_norm": 0.8739736080169678, + "learning_rate": 4.943554033415136e-06, + "loss": 0.5609, + "step": 5624 + }, + { + "epoch": 0.41354212615791797, + "grad_norm": 0.8449422121047974, + "learning_rate": 4.943533672226121e-06, + "loss": 0.5665, + "step": 5625 + }, + { + "epoch": 0.4136156447581238, + "grad_norm": 0.8535363078117371, + "learning_rate": 4.943513307407371e-06, + "loss": 0.5724, + "step": 5626 + }, + { + "epoch": 0.41368916335832967, + "grad_norm": 0.8076558709144592, + "learning_rate": 4.943492938958915e-06, + "loss": 0.5748, + "step": 5627 + }, + { + "epoch": 0.4137626819585355, + "grad_norm": 0.8484912514686584, + "learning_rate": 4.943472566880785e-06, + "loss": 0.5494, + "step": 5628 + }, + { + "epoch": 0.4138362005587414, + "grad_norm": 0.9115769863128662, + "learning_rate": 4.943452191173009e-06, + "loss": 0.5698, + "step": 5629 + }, + { + "epoch": 0.4139097191589472, + "grad_norm": 0.8793432116508484, + "learning_rate": 4.9434318118356195e-06, + "loss": 0.5674, + "step": 5630 + }, + { + "epoch": 0.4139832377591531, + "grad_norm": 0.857602059841156, + "learning_rate": 4.943411428868646e-06, + "loss": 0.575, + "step": 5631 + }, + { + "epoch": 0.4140567563593589, + "grad_norm": 0.8666999340057373, + "learning_rate": 4.943391042272118e-06, + "loss": 0.5366, + "step": 5632 + }, + { + "epoch": 0.4141302749595648, + "grad_norm": 0.8165535926818848, + "learning_rate": 4.9433706520460665e-06, + "loss": 0.5711, + "step": 5633 + }, + { + "epoch": 0.4142037935597706, + "grad_norm": 0.8675072193145752, + "learning_rate": 4.943350258190522e-06, + "loss": 0.5882, + "step": 5634 + }, + { + "epoch": 0.4142773121599765, + "grad_norm": 0.8615362048149109, + "learning_rate": 4.943329860705515e-06, + "loss": 0.5742, + "step": 5635 + }, + { + "epoch": 0.4143508307601823, + "grad_norm": 0.8962957859039307, + "learning_rate": 4.943309459591074e-06, + "loss": 0.5729, + "step": 5636 + }, + { + "epoch": 0.4144243493603882, + "grad_norm": 0.8560360074043274, + "learning_rate": 4.943289054847232e-06, + "loss": 0.5781, + "step": 5637 + }, + { + "epoch": 0.414497867960594, + "grad_norm": 0.8622186183929443, + "learning_rate": 4.943268646474016e-06, + "loss": 0.556, + "step": 5638 + }, + { + "epoch": 0.4145713865607999, + "grad_norm": 0.8753817677497864, + "learning_rate": 4.943248234471461e-06, + "loss": 0.5253, + "step": 5639 + }, + { + "epoch": 0.4146449051610057, + "grad_norm": 0.83315110206604, + "learning_rate": 4.943227818839592e-06, + "loss": 0.5863, + "step": 5640 + }, + { + "epoch": 0.4147184237612116, + "grad_norm": 0.8523180484771729, + "learning_rate": 4.943207399578443e-06, + "loss": 0.5883, + "step": 5641 + }, + { + "epoch": 0.4147919423614174, + "grad_norm": 0.8713707327842712, + "learning_rate": 4.943186976688043e-06, + "loss": 0.5736, + "step": 5642 + }, + { + "epoch": 0.4148654609616233, + "grad_norm": 0.8568722009658813, + "learning_rate": 4.943166550168422e-06, + "loss": 0.5455, + "step": 5643 + }, + { + "epoch": 0.4149389795618291, + "grad_norm": 0.8127759099006653, + "learning_rate": 4.943146120019612e-06, + "loss": 0.5876, + "step": 5644 + }, + { + "epoch": 0.415012498162035, + "grad_norm": 0.8471834063529968, + "learning_rate": 4.943125686241641e-06, + "loss": 0.5607, + "step": 5645 + }, + { + "epoch": 0.41508601676224083, + "grad_norm": 0.895693302154541, + "learning_rate": 4.9431052488345415e-06, + "loss": 0.5805, + "step": 5646 + }, + { + "epoch": 0.4151595353624467, + "grad_norm": 0.865990400314331, + "learning_rate": 4.943084807798343e-06, + "loss": 0.5616, + "step": 5647 + }, + { + "epoch": 0.41523305396265253, + "grad_norm": 0.9740357398986816, + "learning_rate": 4.943064363133075e-06, + "loss": 0.5923, + "step": 5648 + }, + { + "epoch": 0.4153065725628584, + "grad_norm": 0.8550482392311096, + "learning_rate": 4.943043914838769e-06, + "loss": 0.5922, + "step": 5649 + }, + { + "epoch": 0.41538009116306424, + "grad_norm": 0.8833985924720764, + "learning_rate": 4.943023462915455e-06, + "loss": 0.5977, + "step": 5650 + }, + { + "epoch": 0.4154536097632701, + "grad_norm": 0.8616220355033875, + "learning_rate": 4.943003007363164e-06, + "loss": 0.5631, + "step": 5651 + }, + { + "epoch": 0.41552712836347594, + "grad_norm": 0.8954569697380066, + "learning_rate": 4.942982548181925e-06, + "loss": 0.5892, + "step": 5652 + }, + { + "epoch": 0.4156006469636818, + "grad_norm": 0.8488690257072449, + "learning_rate": 4.94296208537177e-06, + "loss": 0.5924, + "step": 5653 + }, + { + "epoch": 0.41567416556388764, + "grad_norm": 0.8515125513076782, + "learning_rate": 4.942941618932728e-06, + "loss": 0.5566, + "step": 5654 + }, + { + "epoch": 0.4157476841640935, + "grad_norm": 0.8895637392997742, + "learning_rate": 4.94292114886483e-06, + "loss": 0.5729, + "step": 5655 + }, + { + "epoch": 0.41582120276429935, + "grad_norm": 0.9179414510726929, + "learning_rate": 4.942900675168106e-06, + "loss": 0.5501, + "step": 5656 + }, + { + "epoch": 0.4158947213645052, + "grad_norm": 0.8330017328262329, + "learning_rate": 4.9428801978425875e-06, + "loss": 0.5798, + "step": 5657 + }, + { + "epoch": 0.41596823996471105, + "grad_norm": 0.8536288738250732, + "learning_rate": 4.942859716888304e-06, + "loss": 0.4947, + "step": 5658 + }, + { + "epoch": 0.41604175856491693, + "grad_norm": 0.8693055510520935, + "learning_rate": 4.942839232305286e-06, + "loss": 0.5275, + "step": 5659 + }, + { + "epoch": 0.41611527716512275, + "grad_norm": 0.8506203889846802, + "learning_rate": 4.942818744093565e-06, + "loss": 0.5544, + "step": 5660 + }, + { + "epoch": 0.41618879576532863, + "grad_norm": 0.9001123905181885, + "learning_rate": 4.94279825225317e-06, + "loss": 0.5578, + "step": 5661 + }, + { + "epoch": 0.41626231436553446, + "grad_norm": 0.8787826895713806, + "learning_rate": 4.942777756784132e-06, + "loss": 0.5262, + "step": 5662 + }, + { + "epoch": 0.41633583296574034, + "grad_norm": 0.8411694169044495, + "learning_rate": 4.942757257686481e-06, + "loss": 0.602, + "step": 5663 + }, + { + "epoch": 0.41640935156594616, + "grad_norm": 0.8735531568527222, + "learning_rate": 4.942736754960248e-06, + "loss": 0.5934, + "step": 5664 + }, + { + "epoch": 0.41648287016615204, + "grad_norm": 0.8581012487411499, + "learning_rate": 4.942716248605464e-06, + "loss": 0.5494, + "step": 5665 + }, + { + "epoch": 0.41655638876635787, + "grad_norm": 0.8441959619522095, + "learning_rate": 4.942695738622157e-06, + "loss": 0.5634, + "step": 5666 + }, + { + "epoch": 0.41662990736656375, + "grad_norm": 0.8578888177871704, + "learning_rate": 4.942675225010361e-06, + "loss": 0.5716, + "step": 5667 + }, + { + "epoch": 0.41670342596676957, + "grad_norm": 0.8654326796531677, + "learning_rate": 4.942654707770104e-06, + "loss": 0.6025, + "step": 5668 + }, + { + "epoch": 0.41677694456697545, + "grad_norm": 0.8395187258720398, + "learning_rate": 4.942634186901418e-06, + "loss": 0.5504, + "step": 5669 + }, + { + "epoch": 0.4168504631671813, + "grad_norm": 0.8962806463241577, + "learning_rate": 4.942613662404332e-06, + "loss": 0.5863, + "step": 5670 + }, + { + "epoch": 0.41692398176738715, + "grad_norm": 0.8125831484794617, + "learning_rate": 4.942593134278877e-06, + "loss": 0.5748, + "step": 5671 + }, + { + "epoch": 0.416997500367593, + "grad_norm": 0.82728111743927, + "learning_rate": 4.942572602525084e-06, + "loss": 0.5713, + "step": 5672 + }, + { + "epoch": 0.41707101896779886, + "grad_norm": 0.8705993294715881, + "learning_rate": 4.942552067142982e-06, + "loss": 0.5826, + "step": 5673 + }, + { + "epoch": 0.4171445375680047, + "grad_norm": 0.9215688705444336, + "learning_rate": 4.942531528132604e-06, + "loss": 0.5853, + "step": 5674 + }, + { + "epoch": 0.41721805616821056, + "grad_norm": 0.8908909559249878, + "learning_rate": 4.942510985493979e-06, + "loss": 0.5372, + "step": 5675 + }, + { + "epoch": 0.4172915747684164, + "grad_norm": 0.8709513545036316, + "learning_rate": 4.942490439227137e-06, + "loss": 0.5889, + "step": 5676 + }, + { + "epoch": 0.41736509336862226, + "grad_norm": 0.8469648361206055, + "learning_rate": 4.94246988933211e-06, + "loss": 0.5679, + "step": 5677 + }, + { + "epoch": 0.4174386119688281, + "grad_norm": 0.8383297920227051, + "learning_rate": 4.942449335808928e-06, + "loss": 0.5679, + "step": 5678 + }, + { + "epoch": 0.41751213056903397, + "grad_norm": 0.875877320766449, + "learning_rate": 4.94242877865762e-06, + "loss": 0.5915, + "step": 5679 + }, + { + "epoch": 0.4175856491692398, + "grad_norm": 0.8830773234367371, + "learning_rate": 4.942408217878218e-06, + "loss": 0.577, + "step": 5680 + }, + { + "epoch": 0.41765916776944567, + "grad_norm": 0.7892526984214783, + "learning_rate": 4.942387653470753e-06, + "loss": 0.5347, + "step": 5681 + }, + { + "epoch": 0.4177326863696515, + "grad_norm": 0.8873932957649231, + "learning_rate": 4.942367085435255e-06, + "loss": 0.5787, + "step": 5682 + }, + { + "epoch": 0.4178062049698574, + "grad_norm": 0.9010302424430847, + "learning_rate": 4.942346513771754e-06, + "loss": 0.6, + "step": 5683 + }, + { + "epoch": 0.4178797235700632, + "grad_norm": 0.8315152525901794, + "learning_rate": 4.94232593848028e-06, + "loss": 0.5501, + "step": 5684 + }, + { + "epoch": 0.4179532421702691, + "grad_norm": 0.8495314121246338, + "learning_rate": 4.942305359560866e-06, + "loss": 0.5871, + "step": 5685 + }, + { + "epoch": 0.4180267607704749, + "grad_norm": 0.7960696816444397, + "learning_rate": 4.942284777013541e-06, + "loss": 0.5017, + "step": 5686 + }, + { + "epoch": 0.4181002793706808, + "grad_norm": 0.8750170469284058, + "learning_rate": 4.942264190838336e-06, + "loss": 0.5676, + "step": 5687 + }, + { + "epoch": 0.4181737979708866, + "grad_norm": 0.8810212016105652, + "learning_rate": 4.942243601035281e-06, + "loss": 0.5123, + "step": 5688 + }, + { + "epoch": 0.4182473165710925, + "grad_norm": 0.835769534111023, + "learning_rate": 4.942223007604406e-06, + "loss": 0.5695, + "step": 5689 + }, + { + "epoch": 0.4183208351712983, + "grad_norm": 0.830689549446106, + "learning_rate": 4.942202410545743e-06, + "loss": 0.4425, + "step": 5690 + }, + { + "epoch": 0.4183943537715042, + "grad_norm": 0.8590388298034668, + "learning_rate": 4.942181809859322e-06, + "loss": 0.5848, + "step": 5691 + }, + { + "epoch": 0.41846787237171007, + "grad_norm": 0.9038001894950867, + "learning_rate": 4.942161205545174e-06, + "loss": 0.5941, + "step": 5692 + }, + { + "epoch": 0.4185413909719159, + "grad_norm": 0.8418582081794739, + "learning_rate": 4.942140597603329e-06, + "loss": 0.5303, + "step": 5693 + }, + { + "epoch": 0.4186149095721218, + "grad_norm": 0.9704475402832031, + "learning_rate": 4.942119986033818e-06, + "loss": 0.5901, + "step": 5694 + }, + { + "epoch": 0.4186884281723276, + "grad_norm": 0.8419850468635559, + "learning_rate": 4.942099370836671e-06, + "loss": 0.5592, + "step": 5695 + }, + { + "epoch": 0.4187619467725335, + "grad_norm": 0.8954502940177917, + "learning_rate": 4.94207875201192e-06, + "loss": 0.5572, + "step": 5696 + }, + { + "epoch": 0.4188354653727393, + "grad_norm": 0.8656160235404968, + "learning_rate": 4.942058129559594e-06, + "loss": 0.57, + "step": 5697 + }, + { + "epoch": 0.4189089839729452, + "grad_norm": 0.8888777494430542, + "learning_rate": 4.942037503479724e-06, + "loss": 0.5942, + "step": 5698 + }, + { + "epoch": 0.418982502573151, + "grad_norm": 0.8314371705055237, + "learning_rate": 4.9420168737723416e-06, + "loss": 0.56, + "step": 5699 + }, + { + "epoch": 0.4190560211733569, + "grad_norm": 0.9212138652801514, + "learning_rate": 4.9419962404374775e-06, + "loss": 0.5615, + "step": 5700 + }, + { + "epoch": 0.4191295397735627, + "grad_norm": 0.8297631144523621, + "learning_rate": 4.941975603475161e-06, + "loss": 0.5252, + "step": 5701 + }, + { + "epoch": 0.4192030583737686, + "grad_norm": 0.8148805499076843, + "learning_rate": 4.941954962885423e-06, + "loss": 0.5472, + "step": 5702 + }, + { + "epoch": 0.4192765769739744, + "grad_norm": 0.8402536511421204, + "learning_rate": 4.941934318668296e-06, + "loss": 0.5408, + "step": 5703 + }, + { + "epoch": 0.4193500955741803, + "grad_norm": 0.8260670304298401, + "learning_rate": 4.941913670823808e-06, + "loss": 0.5663, + "step": 5704 + }, + { + "epoch": 0.4194236141743861, + "grad_norm": 0.8729987144470215, + "learning_rate": 4.941893019351991e-06, + "loss": 0.5721, + "step": 5705 + }, + { + "epoch": 0.419497132774592, + "grad_norm": 0.8392453789710999, + "learning_rate": 4.941872364252877e-06, + "loss": 0.52, + "step": 5706 + }, + { + "epoch": 0.4195706513747978, + "grad_norm": 0.8822004199028015, + "learning_rate": 4.941851705526493e-06, + "loss": 0.5813, + "step": 5707 + }, + { + "epoch": 0.4196441699750037, + "grad_norm": 0.8283119797706604, + "learning_rate": 4.941831043172874e-06, + "loss": 0.5102, + "step": 5708 + }, + { + "epoch": 0.4197176885752095, + "grad_norm": 0.8880718350410461, + "learning_rate": 4.9418103771920475e-06, + "loss": 0.6009, + "step": 5709 + }, + { + "epoch": 0.4197912071754154, + "grad_norm": 0.8509794473648071, + "learning_rate": 4.941789707584046e-06, + "loss": 0.557, + "step": 5710 + }, + { + "epoch": 0.41986472577562123, + "grad_norm": 0.9380194544792175, + "learning_rate": 4.9417690343489e-06, + "loss": 0.6051, + "step": 5711 + }, + { + "epoch": 0.4199382443758271, + "grad_norm": 0.8209242820739746, + "learning_rate": 4.941748357486639e-06, + "loss": 0.5488, + "step": 5712 + }, + { + "epoch": 0.42001176297603293, + "grad_norm": 0.8372262120246887, + "learning_rate": 4.9417276769972945e-06, + "loss": 0.5705, + "step": 5713 + }, + { + "epoch": 0.4200852815762388, + "grad_norm": 0.8409395813941956, + "learning_rate": 4.941706992880898e-06, + "loss": 0.5525, + "step": 5714 + }, + { + "epoch": 0.42015880017644464, + "grad_norm": 0.870317280292511, + "learning_rate": 4.941686305137479e-06, + "loss": 0.5856, + "step": 5715 + }, + { + "epoch": 0.4202323187766505, + "grad_norm": 0.9025053977966309, + "learning_rate": 4.941665613767069e-06, + "loss": 0.5751, + "step": 5716 + }, + { + "epoch": 0.42030583737685634, + "grad_norm": 0.8157532215118408, + "learning_rate": 4.941644918769698e-06, + "loss": 0.5449, + "step": 5717 + }, + { + "epoch": 0.4203793559770622, + "grad_norm": 0.8426591157913208, + "learning_rate": 4.941624220145397e-06, + "loss": 0.5437, + "step": 5718 + }, + { + "epoch": 0.42045287457726804, + "grad_norm": 0.8126623034477234, + "learning_rate": 4.941603517894198e-06, + "loss": 0.5734, + "step": 5719 + }, + { + "epoch": 0.4205263931774739, + "grad_norm": 0.8885464668273926, + "learning_rate": 4.941582812016129e-06, + "loss": 0.5912, + "step": 5720 + }, + { + "epoch": 0.42059991177767975, + "grad_norm": 0.7968602776527405, + "learning_rate": 4.9415621025112236e-06, + "loss": 0.5581, + "step": 5721 + }, + { + "epoch": 0.4206734303778856, + "grad_norm": 0.8390501737594604, + "learning_rate": 4.9415413893795116e-06, + "loss": 0.5167, + "step": 5722 + }, + { + "epoch": 0.42074694897809145, + "grad_norm": 0.8331729173660278, + "learning_rate": 4.941520672621023e-06, + "loss": 0.5363, + "step": 5723 + }, + { + "epoch": 0.42082046757829733, + "grad_norm": 0.8332390785217285, + "learning_rate": 4.94149995223579e-06, + "loss": 0.5449, + "step": 5724 + }, + { + "epoch": 0.42089398617850315, + "grad_norm": 0.8408259153366089, + "learning_rate": 4.941479228223841e-06, + "loss": 0.5156, + "step": 5725 + }, + { + "epoch": 0.42096750477870903, + "grad_norm": 0.837531566619873, + "learning_rate": 4.941458500585209e-06, + "loss": 0.553, + "step": 5726 + }, + { + "epoch": 0.42104102337891486, + "grad_norm": 0.8646148443222046, + "learning_rate": 4.941437769319924e-06, + "loss": 0.5458, + "step": 5727 + }, + { + "epoch": 0.42111454197912074, + "grad_norm": 0.8385757207870483, + "learning_rate": 4.941417034428018e-06, + "loss": 0.5286, + "step": 5728 + }, + { + "epoch": 0.42118806057932656, + "grad_norm": 0.8311551809310913, + "learning_rate": 4.94139629590952e-06, + "loss": 0.5623, + "step": 5729 + }, + { + "epoch": 0.42126157917953244, + "grad_norm": 0.8102257251739502, + "learning_rate": 4.94137555376446e-06, + "loss": 0.5479, + "step": 5730 + }, + { + "epoch": 0.42133509777973827, + "grad_norm": 0.829332709312439, + "learning_rate": 4.941354807992872e-06, + "loss": 0.5245, + "step": 5731 + }, + { + "epoch": 0.42140861637994415, + "grad_norm": 0.9050908088684082, + "learning_rate": 4.941334058594784e-06, + "loss": 0.5787, + "step": 5732 + }, + { + "epoch": 0.42148213498014997, + "grad_norm": 0.8456541299819946, + "learning_rate": 4.941313305570229e-06, + "loss": 0.5768, + "step": 5733 + }, + { + "epoch": 0.42155565358035585, + "grad_norm": 0.8542787432670593, + "learning_rate": 4.941292548919237e-06, + "loss": 0.5808, + "step": 5734 + }, + { + "epoch": 0.4216291721805617, + "grad_norm": 0.8402213454246521, + "learning_rate": 4.941271788641838e-06, + "loss": 0.568, + "step": 5735 + }, + { + "epoch": 0.42170269078076755, + "grad_norm": 0.8888499736785889, + "learning_rate": 4.941251024738063e-06, + "loss": 0.5222, + "step": 5736 + }, + { + "epoch": 0.4217762093809734, + "grad_norm": 0.8376493453979492, + "learning_rate": 4.941230257207944e-06, + "loss": 0.567, + "step": 5737 + }, + { + "epoch": 0.42184972798117926, + "grad_norm": 0.8661887645721436, + "learning_rate": 4.94120948605151e-06, + "loss": 0.5451, + "step": 5738 + }, + { + "epoch": 0.4219232465813851, + "grad_norm": 0.853888988494873, + "learning_rate": 4.941188711268794e-06, + "loss": 0.5517, + "step": 5739 + }, + { + "epoch": 0.42199676518159096, + "grad_norm": 0.8577131032943726, + "learning_rate": 4.941167932859826e-06, + "loss": 0.5722, + "step": 5740 + }, + { + "epoch": 0.4220702837817968, + "grad_norm": 0.8511307835578918, + "learning_rate": 4.941147150824636e-06, + "loss": 0.5896, + "step": 5741 + }, + { + "epoch": 0.42214380238200266, + "grad_norm": 0.8544118404388428, + "learning_rate": 4.941126365163256e-06, + "loss": 0.556, + "step": 5742 + }, + { + "epoch": 0.4222173209822085, + "grad_norm": 0.8402457237243652, + "learning_rate": 4.9411055758757165e-06, + "loss": 0.561, + "step": 5743 + }, + { + "epoch": 0.42229083958241437, + "grad_norm": 0.8710503578186035, + "learning_rate": 4.941084782962048e-06, + "loss": 0.5679, + "step": 5744 + }, + { + "epoch": 0.4223643581826202, + "grad_norm": 0.8844261765480042, + "learning_rate": 4.941063986422282e-06, + "loss": 0.5972, + "step": 5745 + }, + { + "epoch": 0.42243787678282607, + "grad_norm": 0.834429144859314, + "learning_rate": 4.941043186256449e-06, + "loss": 0.5216, + "step": 5746 + }, + { + "epoch": 0.4225113953830319, + "grad_norm": 0.8508890867233276, + "learning_rate": 4.9410223824645805e-06, + "loss": 0.5329, + "step": 5747 + }, + { + "epoch": 0.4225849139832378, + "grad_norm": 0.9168327450752258, + "learning_rate": 4.941001575046706e-06, + "loss": 0.6026, + "step": 5748 + }, + { + "epoch": 0.4226584325834436, + "grad_norm": 0.8222439885139465, + "learning_rate": 4.940980764002859e-06, + "loss": 0.5423, + "step": 5749 + }, + { + "epoch": 0.4227319511836495, + "grad_norm": 0.9523693323135376, + "learning_rate": 4.940959949333067e-06, + "loss": 0.6452, + "step": 5750 + }, + { + "epoch": 0.4228054697838553, + "grad_norm": 0.8188104033470154, + "learning_rate": 4.940939131037364e-06, + "loss": 0.5307, + "step": 5751 + }, + { + "epoch": 0.4228789883840612, + "grad_norm": 0.8750057816505432, + "learning_rate": 4.940918309115777e-06, + "loss": 0.5725, + "step": 5752 + }, + { + "epoch": 0.422952506984267, + "grad_norm": 0.8704073429107666, + "learning_rate": 4.940897483568342e-06, + "loss": 0.556, + "step": 5753 + }, + { + "epoch": 0.4230260255844729, + "grad_norm": 0.8366780877113342, + "learning_rate": 4.940876654395087e-06, + "loss": 0.56, + "step": 5754 + }, + { + "epoch": 0.4230995441846787, + "grad_norm": 0.8623337149620056, + "learning_rate": 4.940855821596044e-06, + "loss": 0.5512, + "step": 5755 + }, + { + "epoch": 0.4231730627848846, + "grad_norm": 0.8567788600921631, + "learning_rate": 4.940834985171241e-06, + "loss": 0.5689, + "step": 5756 + }, + { + "epoch": 0.4232465813850904, + "grad_norm": 0.8122424483299255, + "learning_rate": 4.940814145120713e-06, + "loss": 0.4974, + "step": 5757 + }, + { + "epoch": 0.4233200999852963, + "grad_norm": 0.8965974450111389, + "learning_rate": 4.940793301444489e-06, + "loss": 0.6137, + "step": 5758 + }, + { + "epoch": 0.4233936185855021, + "grad_norm": 0.8168135285377502, + "learning_rate": 4.9407724541426e-06, + "loss": 0.5812, + "step": 5759 + }, + { + "epoch": 0.423467137185708, + "grad_norm": 0.8416956663131714, + "learning_rate": 4.940751603215078e-06, + "loss": 0.5483, + "step": 5760 + }, + { + "epoch": 0.4235406557859138, + "grad_norm": 0.808059573173523, + "learning_rate": 4.940730748661953e-06, + "loss": 0.5207, + "step": 5761 + }, + { + "epoch": 0.4236141743861197, + "grad_norm": 0.8177242875099182, + "learning_rate": 4.940709890483255e-06, + "loss": 0.5539, + "step": 5762 + }, + { + "epoch": 0.4236876929863255, + "grad_norm": 0.8117708563804626, + "learning_rate": 4.940689028679017e-06, + "loss": 0.5502, + "step": 5763 + }, + { + "epoch": 0.4237612115865314, + "grad_norm": 0.8492175936698914, + "learning_rate": 4.940668163249269e-06, + "loss": 0.5916, + "step": 5764 + }, + { + "epoch": 0.42383473018673723, + "grad_norm": 0.846267580986023, + "learning_rate": 4.940647294194042e-06, + "loss": 0.5584, + "step": 5765 + }, + { + "epoch": 0.4239082487869431, + "grad_norm": 0.879347562789917, + "learning_rate": 4.940626421513367e-06, + "loss": 0.5938, + "step": 5766 + }, + { + "epoch": 0.42398176738714893, + "grad_norm": 0.8036531805992126, + "learning_rate": 4.940605545207276e-06, + "loss": 0.5182, + "step": 5767 + }, + { + "epoch": 0.4240552859873548, + "grad_norm": 0.8278072476387024, + "learning_rate": 4.940584665275797e-06, + "loss": 0.5236, + "step": 5768 + }, + { + "epoch": 0.42412880458756064, + "grad_norm": 0.823492705821991, + "learning_rate": 4.9405637817189645e-06, + "loss": 0.5618, + "step": 5769 + }, + { + "epoch": 0.4242023231877665, + "grad_norm": 0.8851661086082458, + "learning_rate": 4.940542894536808e-06, + "loss": 0.5776, + "step": 5770 + }, + { + "epoch": 0.42427584178797234, + "grad_norm": 0.8645790815353394, + "learning_rate": 4.940522003729359e-06, + "loss": 0.5507, + "step": 5771 + }, + { + "epoch": 0.4243493603881782, + "grad_norm": 0.8350909352302551, + "learning_rate": 4.940501109296648e-06, + "loss": 0.5346, + "step": 5772 + }, + { + "epoch": 0.42442287898838404, + "grad_norm": 0.86342853307724, + "learning_rate": 4.9404802112387064e-06, + "loss": 0.5713, + "step": 5773 + }, + { + "epoch": 0.4244963975885899, + "grad_norm": 0.8935312032699585, + "learning_rate": 4.940459309555564e-06, + "loss": 0.5684, + "step": 5774 + }, + { + "epoch": 0.42456991618879575, + "grad_norm": 0.863854706287384, + "learning_rate": 4.940438404247254e-06, + "loss": 0.5755, + "step": 5775 + }, + { + "epoch": 0.42464343478900163, + "grad_norm": 0.8751413822174072, + "learning_rate": 4.940417495313806e-06, + "loss": 0.5496, + "step": 5776 + }, + { + "epoch": 0.42471695338920745, + "grad_norm": 0.8687617182731628, + "learning_rate": 4.940396582755252e-06, + "loss": 0.5621, + "step": 5777 + }, + { + "epoch": 0.42479047198941333, + "grad_norm": 0.8040814399719238, + "learning_rate": 4.940375666571622e-06, + "loss": 0.5547, + "step": 5778 + }, + { + "epoch": 0.42486399058961916, + "grad_norm": 0.8663309812545776, + "learning_rate": 4.940354746762947e-06, + "loss": 0.5521, + "step": 5779 + }, + { + "epoch": 0.42493750918982504, + "grad_norm": 0.8593219518661499, + "learning_rate": 4.94033382332926e-06, + "loss": 0.5973, + "step": 5780 + }, + { + "epoch": 0.42501102779003086, + "grad_norm": 0.877897322177887, + "learning_rate": 4.94031289627059e-06, + "loss": 0.5832, + "step": 5781 + }, + { + "epoch": 0.42508454639023674, + "grad_norm": 0.8052195310592651, + "learning_rate": 4.940291965586969e-06, + "loss": 0.544, + "step": 5782 + }, + { + "epoch": 0.42515806499044256, + "grad_norm": 0.8854167461395264, + "learning_rate": 4.940271031278428e-06, + "loss": 0.6161, + "step": 5783 + }, + { + "epoch": 0.42523158359064844, + "grad_norm": 0.8263436555862427, + "learning_rate": 4.9402500933449974e-06, + "loss": 0.5316, + "step": 5784 + }, + { + "epoch": 0.42530510219085427, + "grad_norm": 0.9357253909111023, + "learning_rate": 4.94022915178671e-06, + "loss": 0.5824, + "step": 5785 + }, + { + "epoch": 0.42537862079106015, + "grad_norm": 0.8893451690673828, + "learning_rate": 4.940208206603594e-06, + "loss": 0.5624, + "step": 5786 + }, + { + "epoch": 0.42545213939126597, + "grad_norm": 0.8615387678146362, + "learning_rate": 4.940187257795684e-06, + "loss": 0.5936, + "step": 5787 + }, + { + "epoch": 0.42552565799147185, + "grad_norm": 0.8726805448532104, + "learning_rate": 4.940166305363009e-06, + "loss": 0.5511, + "step": 5788 + }, + { + "epoch": 0.4255991765916777, + "grad_norm": 0.8500005602836609, + "learning_rate": 4.940145349305601e-06, + "loss": 0.5546, + "step": 5789 + }, + { + "epoch": 0.42567269519188355, + "grad_norm": 0.8612450957298279, + "learning_rate": 4.94012438962349e-06, + "loss": 0.5237, + "step": 5790 + }, + { + "epoch": 0.4257462137920894, + "grad_norm": 0.8162636756896973, + "learning_rate": 4.940103426316707e-06, + "loss": 0.5549, + "step": 5791 + }, + { + "epoch": 0.42581973239229526, + "grad_norm": 0.9294846057891846, + "learning_rate": 4.940082459385286e-06, + "loss": 0.5913, + "step": 5792 + }, + { + "epoch": 0.4258932509925011, + "grad_norm": 0.8274019360542297, + "learning_rate": 4.940061488829255e-06, + "loss": 0.54, + "step": 5793 + }, + { + "epoch": 0.42596676959270696, + "grad_norm": 0.867026686668396, + "learning_rate": 4.940040514648646e-06, + "loss": 0.589, + "step": 5794 + }, + { + "epoch": 0.4260402881929128, + "grad_norm": 0.848020076751709, + "learning_rate": 4.940019536843491e-06, + "loss": 0.5508, + "step": 5795 + }, + { + "epoch": 0.42611380679311867, + "grad_norm": 0.9305757284164429, + "learning_rate": 4.9399985554138206e-06, + "loss": 0.6384, + "step": 5796 + }, + { + "epoch": 0.4261873253933245, + "grad_norm": 0.8269542455673218, + "learning_rate": 4.939977570359665e-06, + "loss": 0.5415, + "step": 5797 + }, + { + "epoch": 0.42626084399353037, + "grad_norm": 0.8509697318077087, + "learning_rate": 4.939956581681058e-06, + "loss": 0.548, + "step": 5798 + }, + { + "epoch": 0.4263343625937362, + "grad_norm": 0.7937617301940918, + "learning_rate": 4.939935589378028e-06, + "loss": 0.5323, + "step": 5799 + }, + { + "epoch": 0.4264078811939421, + "grad_norm": 0.8560966849327087, + "learning_rate": 4.939914593450607e-06, + "loss": 0.5327, + "step": 5800 + }, + { + "epoch": 0.4264813997941479, + "grad_norm": 0.8793072700500488, + "learning_rate": 4.939893593898827e-06, + "loss": 0.5729, + "step": 5801 + }, + { + "epoch": 0.4265549183943538, + "grad_norm": 0.8621581792831421, + "learning_rate": 4.939872590722719e-06, + "loss": 0.5337, + "step": 5802 + }, + { + "epoch": 0.4266284369945596, + "grad_norm": 0.8565388917922974, + "learning_rate": 4.939851583922313e-06, + "loss": 0.5989, + "step": 5803 + }, + { + "epoch": 0.4267019555947655, + "grad_norm": 0.8570975065231323, + "learning_rate": 4.939830573497642e-06, + "loss": 0.558, + "step": 5804 + }, + { + "epoch": 0.4267754741949713, + "grad_norm": 0.8796389698982239, + "learning_rate": 4.939809559448736e-06, + "loss": 0.5595, + "step": 5805 + }, + { + "epoch": 0.4268489927951772, + "grad_norm": 0.8942188024520874, + "learning_rate": 4.939788541775627e-06, + "loss": 0.5867, + "step": 5806 + }, + { + "epoch": 0.426922511395383, + "grad_norm": 0.8625896573066711, + "learning_rate": 4.9397675204783445e-06, + "loss": 0.5457, + "step": 5807 + }, + { + "epoch": 0.4269960299955889, + "grad_norm": 0.8417090177536011, + "learning_rate": 4.939746495556922e-06, + "loss": 0.5613, + "step": 5808 + }, + { + "epoch": 0.4270695485957947, + "grad_norm": 0.8692400455474854, + "learning_rate": 4.939725467011388e-06, + "loss": 0.5296, + "step": 5809 + }, + { + "epoch": 0.4271430671960006, + "grad_norm": 0.8659632205963135, + "learning_rate": 4.939704434841777e-06, + "loss": 0.5664, + "step": 5810 + }, + { + "epoch": 0.4272165857962064, + "grad_norm": 0.8626614212989807, + "learning_rate": 4.939683399048118e-06, + "loss": 0.5682, + "step": 5811 + }, + { + "epoch": 0.4272901043964123, + "grad_norm": 0.8075825572013855, + "learning_rate": 4.939662359630444e-06, + "loss": 0.5144, + "step": 5812 + }, + { + "epoch": 0.4273636229966181, + "grad_norm": 0.8947359323501587, + "learning_rate": 4.939641316588784e-06, + "loss": 0.5929, + "step": 5813 + }, + { + "epoch": 0.427437141596824, + "grad_norm": 0.921930730342865, + "learning_rate": 4.939620269923171e-06, + "loss": 0.6289, + "step": 5814 + }, + { + "epoch": 0.4275106601970298, + "grad_norm": 0.8842116594314575, + "learning_rate": 4.939599219633635e-06, + "loss": 0.5389, + "step": 5815 + }, + { + "epoch": 0.4275841787972357, + "grad_norm": 0.8541107773780823, + "learning_rate": 4.939578165720209e-06, + "loss": 0.5757, + "step": 5816 + }, + { + "epoch": 0.4276576973974415, + "grad_norm": 0.8609312176704407, + "learning_rate": 4.939557108182922e-06, + "loss": 0.5568, + "step": 5817 + }, + { + "epoch": 0.4277312159976474, + "grad_norm": 0.847599983215332, + "learning_rate": 4.939536047021808e-06, + "loss": 0.5628, + "step": 5818 + }, + { + "epoch": 0.42780473459785323, + "grad_norm": 0.8432541489601135, + "learning_rate": 4.939514982236895e-06, + "loss": 0.5034, + "step": 5819 + }, + { + "epoch": 0.4278782531980591, + "grad_norm": 0.8541356921195984, + "learning_rate": 4.939493913828217e-06, + "loss": 0.512, + "step": 5820 + }, + { + "epoch": 0.42795177179826493, + "grad_norm": 0.8829058408737183, + "learning_rate": 4.939472841795805e-06, + "loss": 0.5637, + "step": 5821 + }, + { + "epoch": 0.4280252903984708, + "grad_norm": 0.8115208745002747, + "learning_rate": 4.939451766139688e-06, + "loss": 0.5333, + "step": 5822 + }, + { + "epoch": 0.42809880899867664, + "grad_norm": 0.8683201670646667, + "learning_rate": 4.9394306868599005e-06, + "loss": 0.6096, + "step": 5823 + }, + { + "epoch": 0.4281723275988825, + "grad_norm": 0.9070594906806946, + "learning_rate": 4.939409603956472e-06, + "loss": 0.5944, + "step": 5824 + }, + { + "epoch": 0.42824584619908834, + "grad_norm": 0.8285245299339294, + "learning_rate": 4.939388517429434e-06, + "loss": 0.5524, + "step": 5825 + }, + { + "epoch": 0.4283193647992942, + "grad_norm": 0.8227688074111938, + "learning_rate": 4.939367427278818e-06, + "loss": 0.5664, + "step": 5826 + }, + { + "epoch": 0.42839288339950005, + "grad_norm": 0.8349943161010742, + "learning_rate": 4.939346333504655e-06, + "loss": 0.5288, + "step": 5827 + }, + { + "epoch": 0.4284664019997059, + "grad_norm": 0.8752535581588745, + "learning_rate": 4.939325236106977e-06, + "loss": 0.5632, + "step": 5828 + }, + { + "epoch": 0.42853992059991175, + "grad_norm": 0.8564954400062561, + "learning_rate": 4.939304135085814e-06, + "loss": 0.5465, + "step": 5829 + }, + { + "epoch": 0.42861343920011763, + "grad_norm": 0.8581334352493286, + "learning_rate": 4.939283030441199e-06, + "loss": 0.5336, + "step": 5830 + }, + { + "epoch": 0.4286869578003235, + "grad_norm": 0.8451259732246399, + "learning_rate": 4.939261922173163e-06, + "loss": 0.5618, + "step": 5831 + }, + { + "epoch": 0.42876047640052933, + "grad_norm": 0.8538471460342407, + "learning_rate": 4.939240810281736e-06, + "loss": 0.5188, + "step": 5832 + }, + { + "epoch": 0.4288339950007352, + "grad_norm": 0.8340693116188049, + "learning_rate": 4.939219694766952e-06, + "loss": 0.5265, + "step": 5833 + }, + { + "epoch": 0.42890751360094104, + "grad_norm": 0.877341628074646, + "learning_rate": 4.939198575628839e-06, + "loss": 0.5505, + "step": 5834 + }, + { + "epoch": 0.4289810322011469, + "grad_norm": 0.8573412895202637, + "learning_rate": 4.93917745286743e-06, + "loss": 0.609, + "step": 5835 + }, + { + "epoch": 0.42905455080135274, + "grad_norm": 0.8544580936431885, + "learning_rate": 4.939156326482757e-06, + "loss": 0.5758, + "step": 5836 + }, + { + "epoch": 0.4291280694015586, + "grad_norm": 0.934984564781189, + "learning_rate": 4.939135196474851e-06, + "loss": 0.6175, + "step": 5837 + }, + { + "epoch": 0.42920158800176444, + "grad_norm": 0.8279903531074524, + "learning_rate": 4.939114062843744e-06, + "loss": 0.5409, + "step": 5838 + }, + { + "epoch": 0.4292751066019703, + "grad_norm": 0.9157954454421997, + "learning_rate": 4.939092925589466e-06, + "loss": 0.5838, + "step": 5839 + }, + { + "epoch": 0.42934862520217615, + "grad_norm": 0.8341495394706726, + "learning_rate": 4.939071784712048e-06, + "loss": 0.5793, + "step": 5840 + }, + { + "epoch": 0.42942214380238203, + "grad_norm": 0.8234595656394958, + "learning_rate": 4.939050640211523e-06, + "loss": 0.5548, + "step": 5841 + }, + { + "epoch": 0.42949566240258785, + "grad_norm": 0.8829561471939087, + "learning_rate": 4.939029492087923e-06, + "loss": 0.5588, + "step": 5842 + }, + { + "epoch": 0.42956918100279373, + "grad_norm": 0.9337680339813232, + "learning_rate": 4.939008340341277e-06, + "loss": 0.608, + "step": 5843 + }, + { + "epoch": 0.42964269960299956, + "grad_norm": 0.9208091497421265, + "learning_rate": 4.9389871849716185e-06, + "loss": 0.6199, + "step": 5844 + }, + { + "epoch": 0.42971621820320544, + "grad_norm": 0.8309066295623779, + "learning_rate": 4.938966025978977e-06, + "loss": 0.5498, + "step": 5845 + }, + { + "epoch": 0.42978973680341126, + "grad_norm": 0.8200415372848511, + "learning_rate": 4.9389448633633865e-06, + "loss": 0.5334, + "step": 5846 + }, + { + "epoch": 0.42986325540361714, + "grad_norm": 0.8072905540466309, + "learning_rate": 4.938923697124876e-06, + "loss": 0.5605, + "step": 5847 + }, + { + "epoch": 0.42993677400382296, + "grad_norm": 0.8450892567634583, + "learning_rate": 4.938902527263477e-06, + "loss": 0.5603, + "step": 5848 + }, + { + "epoch": 0.43001029260402884, + "grad_norm": 0.868007481098175, + "learning_rate": 4.938881353779224e-06, + "loss": 0.6345, + "step": 5849 + }, + { + "epoch": 0.43008381120423467, + "grad_norm": 0.8651533722877502, + "learning_rate": 4.938860176672144e-06, + "loss": 0.5357, + "step": 5850 + }, + { + "epoch": 0.43015732980444055, + "grad_norm": 0.8412454128265381, + "learning_rate": 4.938838995942273e-06, + "loss": 0.5476, + "step": 5851 + }, + { + "epoch": 0.43023084840464637, + "grad_norm": 0.8336376547813416, + "learning_rate": 4.938817811589639e-06, + "loss": 0.565, + "step": 5852 + }, + { + "epoch": 0.43030436700485225, + "grad_norm": 0.8670269846916199, + "learning_rate": 4.938796623614274e-06, + "loss": 0.531, + "step": 5853 + }, + { + "epoch": 0.4303778856050581, + "grad_norm": 0.88372403383255, + "learning_rate": 4.938775432016211e-06, + "loss": 0.5988, + "step": 5854 + }, + { + "epoch": 0.43045140420526395, + "grad_norm": 0.8768195509910583, + "learning_rate": 4.938754236795481e-06, + "loss": 0.5414, + "step": 5855 + }, + { + "epoch": 0.4305249228054698, + "grad_norm": 0.8616595268249512, + "learning_rate": 4.938733037952115e-06, + "loss": 0.5695, + "step": 5856 + }, + { + "epoch": 0.43059844140567566, + "grad_norm": 0.8498494625091553, + "learning_rate": 4.938711835486144e-06, + "loss": 0.5492, + "step": 5857 + }, + { + "epoch": 0.4306719600058815, + "grad_norm": 0.848477303981781, + "learning_rate": 4.9386906293976e-06, + "loss": 0.5403, + "step": 5858 + }, + { + "epoch": 0.43074547860608736, + "grad_norm": 0.864628791809082, + "learning_rate": 4.938669419686516e-06, + "loss": 0.5488, + "step": 5859 + }, + { + "epoch": 0.4308189972062932, + "grad_norm": 0.8903564810752869, + "learning_rate": 4.938648206352921e-06, + "loss": 0.5549, + "step": 5860 + }, + { + "epoch": 0.43089251580649907, + "grad_norm": 0.8709189891815186, + "learning_rate": 4.938626989396848e-06, + "loss": 0.5988, + "step": 5861 + }, + { + "epoch": 0.4309660344067049, + "grad_norm": 0.8113354444503784, + "learning_rate": 4.938605768818327e-06, + "loss": 0.5279, + "step": 5862 + }, + { + "epoch": 0.43103955300691077, + "grad_norm": 0.881168007850647, + "learning_rate": 4.938584544617392e-06, + "loss": 0.5365, + "step": 5863 + }, + { + "epoch": 0.4311130716071166, + "grad_norm": 0.8289696574211121, + "learning_rate": 4.938563316794073e-06, + "loss": 0.5525, + "step": 5864 + }, + { + "epoch": 0.4311865902073225, + "grad_norm": 0.8989803791046143, + "learning_rate": 4.938542085348401e-06, + "loss": 0.5744, + "step": 5865 + }, + { + "epoch": 0.4312601088075283, + "grad_norm": 0.8390684127807617, + "learning_rate": 4.93852085028041e-06, + "loss": 0.5303, + "step": 5866 + }, + { + "epoch": 0.4313336274077342, + "grad_norm": 0.8233065605163574, + "learning_rate": 4.938499611590128e-06, + "loss": 0.5581, + "step": 5867 + }, + { + "epoch": 0.43140714600794, + "grad_norm": 0.8441178202629089, + "learning_rate": 4.938478369277589e-06, + "loss": 0.5464, + "step": 5868 + }, + { + "epoch": 0.4314806646081459, + "grad_norm": 0.886667788028717, + "learning_rate": 4.938457123342824e-06, + "loss": 0.5709, + "step": 5869 + }, + { + "epoch": 0.4315541832083517, + "grad_norm": 0.8705106973648071, + "learning_rate": 4.938435873785864e-06, + "loss": 0.5411, + "step": 5870 + }, + { + "epoch": 0.4316277018085576, + "grad_norm": 0.8875359892845154, + "learning_rate": 4.938414620606742e-06, + "loss": 0.5883, + "step": 5871 + }, + { + "epoch": 0.4317012204087634, + "grad_norm": 0.8324692845344543, + "learning_rate": 4.9383933638054874e-06, + "loss": 0.5418, + "step": 5872 + }, + { + "epoch": 0.4317747390089693, + "grad_norm": 0.8471485376358032, + "learning_rate": 4.938372103382134e-06, + "loss": 0.569, + "step": 5873 + }, + { + "epoch": 0.4318482576091751, + "grad_norm": 0.8837981820106506, + "learning_rate": 4.938350839336711e-06, + "loss": 0.6092, + "step": 5874 + }, + { + "epoch": 0.431921776209381, + "grad_norm": 0.79567551612854, + "learning_rate": 4.938329571669252e-06, + "loss": 0.5013, + "step": 5875 + }, + { + "epoch": 0.4319952948095868, + "grad_norm": 0.9095391035079956, + "learning_rate": 4.938308300379789e-06, + "loss": 0.5444, + "step": 5876 + }, + { + "epoch": 0.4320688134097927, + "grad_norm": 0.8349650502204895, + "learning_rate": 4.938287025468351e-06, + "loss": 0.5658, + "step": 5877 + }, + { + "epoch": 0.4321423320099985, + "grad_norm": 0.8516146540641785, + "learning_rate": 4.938265746934973e-06, + "loss": 0.5479, + "step": 5878 + }, + { + "epoch": 0.4322158506102044, + "grad_norm": 0.8262496590614319, + "learning_rate": 4.938244464779684e-06, + "loss": 0.5741, + "step": 5879 + }, + { + "epoch": 0.4322893692104102, + "grad_norm": 0.8315979242324829, + "learning_rate": 4.938223179002515e-06, + "loss": 0.5562, + "step": 5880 + }, + { + "epoch": 0.4323628878106161, + "grad_norm": 0.8547444343566895, + "learning_rate": 4.9382018896035e-06, + "loss": 0.5552, + "step": 5881 + }, + { + "epoch": 0.4324364064108219, + "grad_norm": 0.8980317115783691, + "learning_rate": 4.9381805965826705e-06, + "loss": 0.5674, + "step": 5882 + }, + { + "epoch": 0.4325099250110278, + "grad_norm": 0.8346752524375916, + "learning_rate": 4.938159299940055e-06, + "loss": 0.5622, + "step": 5883 + }, + { + "epoch": 0.43258344361123363, + "grad_norm": 0.8630633354187012, + "learning_rate": 4.93813799967569e-06, + "loss": 0.5363, + "step": 5884 + }, + { + "epoch": 0.4326569622114395, + "grad_norm": 0.8395256400108337, + "learning_rate": 4.938116695789602e-06, + "loss": 0.5624, + "step": 5885 + }, + { + "epoch": 0.43273048081164533, + "grad_norm": 0.8389791250228882, + "learning_rate": 4.938095388281827e-06, + "loss": 0.584, + "step": 5886 + }, + { + "epoch": 0.4328039994118512, + "grad_norm": 0.810396134853363, + "learning_rate": 4.938074077152394e-06, + "loss": 0.5325, + "step": 5887 + }, + { + "epoch": 0.43287751801205704, + "grad_norm": 0.8300068378448486, + "learning_rate": 4.938052762401335e-06, + "loss": 0.5377, + "step": 5888 + }, + { + "epoch": 0.4329510366122629, + "grad_norm": 0.8151875734329224, + "learning_rate": 4.938031444028683e-06, + "loss": 0.5408, + "step": 5889 + }, + { + "epoch": 0.43302455521246874, + "grad_norm": 0.8249801397323608, + "learning_rate": 4.938010122034469e-06, + "loss": 0.5843, + "step": 5890 + }, + { + "epoch": 0.4330980738126746, + "grad_norm": 0.8394193649291992, + "learning_rate": 4.937988796418724e-06, + "loss": 0.592, + "step": 5891 + }, + { + "epoch": 0.43317159241288045, + "grad_norm": 0.907817006111145, + "learning_rate": 4.93796746718148e-06, + "loss": 0.567, + "step": 5892 + }, + { + "epoch": 0.4332451110130863, + "grad_norm": 0.8985273241996765, + "learning_rate": 4.937946134322769e-06, + "loss": 0.5919, + "step": 5893 + }, + { + "epoch": 0.43331862961329215, + "grad_norm": 0.86811763048172, + "learning_rate": 4.937924797842622e-06, + "loss": 0.5552, + "step": 5894 + }, + { + "epoch": 0.43339214821349803, + "grad_norm": 0.8117223381996155, + "learning_rate": 4.9379034577410724e-06, + "loss": 0.5412, + "step": 5895 + }, + { + "epoch": 0.43346566681370385, + "grad_norm": 0.8180073499679565, + "learning_rate": 4.93788211401815e-06, + "loss": 0.5292, + "step": 5896 + }, + { + "epoch": 0.43353918541390973, + "grad_norm": 0.8163172602653503, + "learning_rate": 4.937860766673887e-06, + "loss": 0.5486, + "step": 5897 + }, + { + "epoch": 0.43361270401411556, + "grad_norm": 0.8287296295166016, + "learning_rate": 4.937839415708315e-06, + "loss": 0.5743, + "step": 5898 + }, + { + "epoch": 0.43368622261432144, + "grad_norm": 0.8903658986091614, + "learning_rate": 4.937818061121468e-06, + "loss": 0.5974, + "step": 5899 + }, + { + "epoch": 0.43375974121452726, + "grad_norm": 0.7923168540000916, + "learning_rate": 4.937796702913373e-06, + "loss": 0.5189, + "step": 5900 + }, + { + "epoch": 0.43383325981473314, + "grad_norm": 0.8488616943359375, + "learning_rate": 4.937775341084067e-06, + "loss": 0.5755, + "step": 5901 + }, + { + "epoch": 0.43390677841493897, + "grad_norm": 0.8210403919219971, + "learning_rate": 4.937753975633577e-06, + "loss": 0.5544, + "step": 5902 + }, + { + "epoch": 0.43398029701514484, + "grad_norm": 0.8970739841461182, + "learning_rate": 4.937732606561939e-06, + "loss": 0.5918, + "step": 5903 + }, + { + "epoch": 0.43405381561535067, + "grad_norm": 0.7945690751075745, + "learning_rate": 4.937711233869181e-06, + "loss": 0.5274, + "step": 5904 + }, + { + "epoch": 0.43412733421555655, + "grad_norm": 0.8515077233314514, + "learning_rate": 4.937689857555338e-06, + "loss": 0.541, + "step": 5905 + }, + { + "epoch": 0.4342008528157624, + "grad_norm": 0.8944291472434998, + "learning_rate": 4.937668477620439e-06, + "loss": 0.5959, + "step": 5906 + }, + { + "epoch": 0.43427437141596825, + "grad_norm": 0.8327376246452332, + "learning_rate": 4.9376470940645176e-06, + "loss": 0.5382, + "step": 5907 + }, + { + "epoch": 0.4343478900161741, + "grad_norm": 0.8797633051872253, + "learning_rate": 4.937625706887604e-06, + "loss": 0.604, + "step": 5908 + }, + { + "epoch": 0.43442140861637996, + "grad_norm": 0.8296888470649719, + "learning_rate": 4.937604316089731e-06, + "loss": 0.5748, + "step": 5909 + }, + { + "epoch": 0.4344949272165858, + "grad_norm": 0.8530277609825134, + "learning_rate": 4.9375829216709316e-06, + "loss": 0.5394, + "step": 5910 + }, + { + "epoch": 0.43456844581679166, + "grad_norm": 0.8510040640830994, + "learning_rate": 4.9375615236312355e-06, + "loss": 0.5802, + "step": 5911 + }, + { + "epoch": 0.4346419644169975, + "grad_norm": 0.8566755056381226, + "learning_rate": 4.937540121970675e-06, + "loss": 0.5466, + "step": 5912 + }, + { + "epoch": 0.43471548301720336, + "grad_norm": 0.8658506870269775, + "learning_rate": 4.937518716689283e-06, + "loss": 0.5563, + "step": 5913 + }, + { + "epoch": 0.4347890016174092, + "grad_norm": 0.9122067093849182, + "learning_rate": 4.9374973077870895e-06, + "loss": 0.5741, + "step": 5914 + }, + { + "epoch": 0.43486252021761507, + "grad_norm": 0.8129492998123169, + "learning_rate": 4.937475895264127e-06, + "loss": 0.5312, + "step": 5915 + }, + { + "epoch": 0.4349360388178209, + "grad_norm": 0.845777153968811, + "learning_rate": 4.937454479120428e-06, + "loss": 0.5581, + "step": 5916 + }, + { + "epoch": 0.43500955741802677, + "grad_norm": 0.8635559678077698, + "learning_rate": 4.937433059356024e-06, + "loss": 0.561, + "step": 5917 + }, + { + "epoch": 0.4350830760182326, + "grad_norm": 0.8812333345413208, + "learning_rate": 4.9374116359709466e-06, + "loss": 0.62, + "step": 5918 + }, + { + "epoch": 0.4351565946184385, + "grad_norm": 0.7991049885749817, + "learning_rate": 4.9373902089652275e-06, + "loss": 0.5628, + "step": 5919 + }, + { + "epoch": 0.4352301132186443, + "grad_norm": 0.8227559924125671, + "learning_rate": 4.937368778338899e-06, + "loss": 0.5281, + "step": 5920 + }, + { + "epoch": 0.4353036318188502, + "grad_norm": 0.8182511329650879, + "learning_rate": 4.937347344091992e-06, + "loss": 0.5362, + "step": 5921 + }, + { + "epoch": 0.435377150419056, + "grad_norm": 0.8300790786743164, + "learning_rate": 4.937325906224539e-06, + "loss": 0.553, + "step": 5922 + }, + { + "epoch": 0.4354506690192619, + "grad_norm": 0.820948600769043, + "learning_rate": 4.937304464736573e-06, + "loss": 0.4719, + "step": 5923 + }, + { + "epoch": 0.4355241876194677, + "grad_norm": 0.8538265824317932, + "learning_rate": 4.937283019628124e-06, + "loss": 0.5446, + "step": 5924 + }, + { + "epoch": 0.4355977062196736, + "grad_norm": 0.8707088232040405, + "learning_rate": 4.937261570899224e-06, + "loss": 0.5084, + "step": 5925 + }, + { + "epoch": 0.4356712248198794, + "grad_norm": 0.8091280460357666, + "learning_rate": 4.9372401185499065e-06, + "loss": 0.5334, + "step": 5926 + }, + { + "epoch": 0.4357447434200853, + "grad_norm": 0.8695529103279114, + "learning_rate": 4.937218662580201e-06, + "loss": 0.5512, + "step": 5927 + }, + { + "epoch": 0.4358182620202911, + "grad_norm": 0.8287887573242188, + "learning_rate": 4.9371972029901414e-06, + "loss": 0.5442, + "step": 5928 + }, + { + "epoch": 0.435891780620497, + "grad_norm": 0.8031046390533447, + "learning_rate": 4.937175739779759e-06, + "loss": 0.5305, + "step": 5929 + }, + { + "epoch": 0.4359652992207028, + "grad_norm": 0.8489088416099548, + "learning_rate": 4.937154272949085e-06, + "loss": 0.5522, + "step": 5930 + }, + { + "epoch": 0.4360388178209087, + "grad_norm": 0.8191258907318115, + "learning_rate": 4.937132802498153e-06, + "loss": 0.5625, + "step": 5931 + }, + { + "epoch": 0.4361123364211145, + "grad_norm": 0.8358453512191772, + "learning_rate": 4.937111328426992e-06, + "loss": 0.553, + "step": 5932 + }, + { + "epoch": 0.4361858550213204, + "grad_norm": 0.8507808446884155, + "learning_rate": 4.937089850735635e-06, + "loss": 0.5241, + "step": 5933 + }, + { + "epoch": 0.4362593736215262, + "grad_norm": 0.8470038771629333, + "learning_rate": 4.937068369424117e-06, + "loss": 0.5777, + "step": 5934 + }, + { + "epoch": 0.4363328922217321, + "grad_norm": 0.9024307131767273, + "learning_rate": 4.9370468844924655e-06, + "loss": 0.5618, + "step": 5935 + }, + { + "epoch": 0.43640641082193793, + "grad_norm": 0.7840368747711182, + "learning_rate": 4.937025395940716e-06, + "loss": 0.5262, + "step": 5936 + }, + { + "epoch": 0.4364799294221438, + "grad_norm": 0.8507412672042847, + "learning_rate": 4.9370039037688965e-06, + "loss": 0.5297, + "step": 5937 + }, + { + "epoch": 0.43655344802234963, + "grad_norm": 0.8343524932861328, + "learning_rate": 4.936982407977042e-06, + "loss": 0.5804, + "step": 5938 + }, + { + "epoch": 0.4366269666225555, + "grad_norm": 0.8237587213516235, + "learning_rate": 4.936960908565184e-06, + "loss": 0.5644, + "step": 5939 + }, + { + "epoch": 0.43670048522276134, + "grad_norm": 0.8189142942428589, + "learning_rate": 4.9369394055333544e-06, + "loss": 0.5336, + "step": 5940 + }, + { + "epoch": 0.4367740038229672, + "grad_norm": 0.8576098084449768, + "learning_rate": 4.936917898881584e-06, + "loss": 0.5365, + "step": 5941 + }, + { + "epoch": 0.43684752242317304, + "grad_norm": 0.8129881620407104, + "learning_rate": 4.936896388609905e-06, + "loss": 0.5507, + "step": 5942 + }, + { + "epoch": 0.4369210410233789, + "grad_norm": 0.9173382520675659, + "learning_rate": 4.936874874718351e-06, + "loss": 0.5739, + "step": 5943 + }, + { + "epoch": 0.43699455962358474, + "grad_norm": 0.8909888863563538, + "learning_rate": 4.936853357206952e-06, + "loss": 0.5926, + "step": 5944 + }, + { + "epoch": 0.4370680782237906, + "grad_norm": 0.9226690530776978, + "learning_rate": 4.9368318360757414e-06, + "loss": 0.6116, + "step": 5945 + }, + { + "epoch": 0.43714159682399645, + "grad_norm": 0.8766199350357056, + "learning_rate": 4.9368103113247504e-06, + "loss": 0.5209, + "step": 5946 + }, + { + "epoch": 0.4372151154242023, + "grad_norm": 0.8767815232276917, + "learning_rate": 4.936788782954012e-06, + "loss": 0.5453, + "step": 5947 + }, + { + "epoch": 0.43728863402440815, + "grad_norm": 0.9196457862854004, + "learning_rate": 4.936767250963555e-06, + "loss": 0.5616, + "step": 5948 + }, + { + "epoch": 0.43736215262461403, + "grad_norm": 0.8247069120407104, + "learning_rate": 4.936745715353416e-06, + "loss": 0.5478, + "step": 5949 + }, + { + "epoch": 0.43743567122481986, + "grad_norm": 0.8428139090538025, + "learning_rate": 4.936724176123623e-06, + "loss": 0.5459, + "step": 5950 + }, + { + "epoch": 0.43750918982502573, + "grad_norm": 0.8397913575172424, + "learning_rate": 4.936702633274211e-06, + "loss": 0.6211, + "step": 5951 + }, + { + "epoch": 0.43758270842523156, + "grad_norm": 0.8341753482818604, + "learning_rate": 4.93668108680521e-06, + "loss": 0.5655, + "step": 5952 + }, + { + "epoch": 0.43765622702543744, + "grad_norm": 0.848676860332489, + "learning_rate": 4.936659536716653e-06, + "loss": 0.5238, + "step": 5953 + }, + { + "epoch": 0.43772974562564326, + "grad_norm": 0.85426265001297, + "learning_rate": 4.936637983008571e-06, + "loss": 0.5653, + "step": 5954 + }, + { + "epoch": 0.43780326422584914, + "grad_norm": 0.8181531429290771, + "learning_rate": 4.936616425680997e-06, + "loss": 0.5535, + "step": 5955 + }, + { + "epoch": 0.43787678282605497, + "grad_norm": 0.88297039270401, + "learning_rate": 4.936594864733962e-06, + "loss": 0.5394, + "step": 5956 + }, + { + "epoch": 0.43795030142626085, + "grad_norm": 0.8738917112350464, + "learning_rate": 4.9365733001674995e-06, + "loss": 0.5495, + "step": 5957 + }, + { + "epoch": 0.43802382002646667, + "grad_norm": 0.8088390231132507, + "learning_rate": 4.9365517319816415e-06, + "loss": 0.5326, + "step": 5958 + }, + { + "epoch": 0.43809733862667255, + "grad_norm": 0.8471123576164246, + "learning_rate": 4.9365301601764186e-06, + "loss": 0.5552, + "step": 5959 + }, + { + "epoch": 0.4381708572268784, + "grad_norm": 0.8408938646316528, + "learning_rate": 4.936508584751864e-06, + "loss": 0.5457, + "step": 5960 + }, + { + "epoch": 0.43824437582708425, + "grad_norm": 0.876057505607605, + "learning_rate": 4.936487005708008e-06, + "loss": 0.5498, + "step": 5961 + }, + { + "epoch": 0.4383178944272901, + "grad_norm": 0.8583795428276062, + "learning_rate": 4.936465423044885e-06, + "loss": 0.561, + "step": 5962 + }, + { + "epoch": 0.43839141302749596, + "grad_norm": 0.8338358998298645, + "learning_rate": 4.936443836762526e-06, + "loss": 0.5457, + "step": 5963 + }, + { + "epoch": 0.4384649316277018, + "grad_norm": 0.8493192195892334, + "learning_rate": 4.9364222468609636e-06, + "loss": 0.5854, + "step": 5964 + }, + { + "epoch": 0.43853845022790766, + "grad_norm": 0.9184139966964722, + "learning_rate": 4.936400653340229e-06, + "loss": 0.5534, + "step": 5965 + }, + { + "epoch": 0.4386119688281135, + "grad_norm": 0.829724133014679, + "learning_rate": 4.936379056200354e-06, + "loss": 0.5524, + "step": 5966 + }, + { + "epoch": 0.43868548742831937, + "grad_norm": 0.8361853361129761, + "learning_rate": 4.936357455441372e-06, + "loss": 0.5711, + "step": 5967 + }, + { + "epoch": 0.4387590060285252, + "grad_norm": 0.8350392580032349, + "learning_rate": 4.936335851063314e-06, + "loss": 0.5349, + "step": 5968 + }, + { + "epoch": 0.43883252462873107, + "grad_norm": 0.8975895643234253, + "learning_rate": 4.936314243066212e-06, + "loss": 0.5971, + "step": 5969 + }, + { + "epoch": 0.43890604322893695, + "grad_norm": 0.8160673379898071, + "learning_rate": 4.9362926314501e-06, + "loss": 0.5299, + "step": 5970 + }, + { + "epoch": 0.4389795618291428, + "grad_norm": 0.9040906429290771, + "learning_rate": 4.936271016215008e-06, + "loss": 0.5666, + "step": 5971 + }, + { + "epoch": 0.43905308042934865, + "grad_norm": 0.8590342402458191, + "learning_rate": 4.936249397360969e-06, + "loss": 0.5645, + "step": 5972 + }, + { + "epoch": 0.4391265990295545, + "grad_norm": 0.8712059855461121, + "learning_rate": 4.9362277748880145e-06, + "loss": 0.5772, + "step": 5973 + }, + { + "epoch": 0.43920011762976036, + "grad_norm": 0.8424125909805298, + "learning_rate": 4.936206148796177e-06, + "loss": 0.5471, + "step": 5974 + }, + { + "epoch": 0.4392736362299662, + "grad_norm": 0.8407922983169556, + "learning_rate": 4.93618451908549e-06, + "loss": 0.5457, + "step": 5975 + }, + { + "epoch": 0.43934715483017206, + "grad_norm": 0.8758879899978638, + "learning_rate": 4.936162885755982e-06, + "loss": 0.5828, + "step": 5976 + }, + { + "epoch": 0.4394206734303779, + "grad_norm": 0.8341915607452393, + "learning_rate": 4.936141248807689e-06, + "loss": 0.5478, + "step": 5977 + }, + { + "epoch": 0.43949419203058376, + "grad_norm": 0.8489541411399841, + "learning_rate": 4.936119608240641e-06, + "loss": 0.5003, + "step": 5978 + }, + { + "epoch": 0.4395677106307896, + "grad_norm": 0.8768338561058044, + "learning_rate": 4.9360979640548715e-06, + "loss": 0.5353, + "step": 5979 + }, + { + "epoch": 0.43964122923099547, + "grad_norm": 0.807565450668335, + "learning_rate": 4.936076316250411e-06, + "loss": 0.5375, + "step": 5980 + }, + { + "epoch": 0.4397147478312013, + "grad_norm": 0.8443534970283508, + "learning_rate": 4.936054664827294e-06, + "loss": 0.535, + "step": 5981 + }, + { + "epoch": 0.43978826643140717, + "grad_norm": 0.8514692187309265, + "learning_rate": 4.936033009785549e-06, + "loss": 0.5437, + "step": 5982 + }, + { + "epoch": 0.439861785031613, + "grad_norm": 0.7944656014442444, + "learning_rate": 4.936011351125212e-06, + "loss": 0.5013, + "step": 5983 + }, + { + "epoch": 0.4399353036318189, + "grad_norm": 0.788613498210907, + "learning_rate": 4.935989688846312e-06, + "loss": 0.5582, + "step": 5984 + }, + { + "epoch": 0.4400088222320247, + "grad_norm": 0.8814897537231445, + "learning_rate": 4.935968022948884e-06, + "loss": 0.5883, + "step": 5985 + }, + { + "epoch": 0.4400823408322306, + "grad_norm": 0.8324216604232788, + "learning_rate": 4.935946353432959e-06, + "loss": 0.5828, + "step": 5986 + }, + { + "epoch": 0.4401558594324364, + "grad_norm": 0.8653540015220642, + "learning_rate": 4.935924680298568e-06, + "loss": 0.5648, + "step": 5987 + }, + { + "epoch": 0.4402293780326423, + "grad_norm": 0.8634859323501587, + "learning_rate": 4.9359030035457455e-06, + "loss": 0.5438, + "step": 5988 + }, + { + "epoch": 0.4403028966328481, + "grad_norm": 0.8252637386322021, + "learning_rate": 4.9358813231745216e-06, + "loss": 0.5252, + "step": 5989 + }, + { + "epoch": 0.440376415233054, + "grad_norm": 0.8897470235824585, + "learning_rate": 4.9358596391849304e-06, + "loss": 0.5671, + "step": 5990 + }, + { + "epoch": 0.4404499338332598, + "grad_norm": 0.8194985389709473, + "learning_rate": 4.935837951577002e-06, + "loss": 0.5448, + "step": 5991 + }, + { + "epoch": 0.4405234524334657, + "grad_norm": 0.8191813826560974, + "learning_rate": 4.935816260350769e-06, + "loss": 0.5427, + "step": 5992 + }, + { + "epoch": 0.4405969710336715, + "grad_norm": 0.8208812475204468, + "learning_rate": 4.935794565506266e-06, + "loss": 0.5492, + "step": 5993 + }, + { + "epoch": 0.4406704896338774, + "grad_norm": 0.8950857520103455, + "learning_rate": 4.935772867043523e-06, + "loss": 0.6242, + "step": 5994 + }, + { + "epoch": 0.4407440082340832, + "grad_norm": 0.856421172618866, + "learning_rate": 4.935751164962573e-06, + "loss": 0.5389, + "step": 5995 + }, + { + "epoch": 0.4408175268342891, + "grad_norm": 0.8450967669487, + "learning_rate": 4.935729459263448e-06, + "loss": 0.5574, + "step": 5996 + }, + { + "epoch": 0.4408910454344949, + "grad_norm": 0.8292104601860046, + "learning_rate": 4.935707749946179e-06, + "loss": 0.5268, + "step": 5997 + }, + { + "epoch": 0.4409645640347008, + "grad_norm": 0.9185115098953247, + "learning_rate": 4.935686037010801e-06, + "loss": 0.5505, + "step": 5998 + }, + { + "epoch": 0.4410380826349066, + "grad_norm": 0.8447003960609436, + "learning_rate": 4.935664320457344e-06, + "loss": 0.5743, + "step": 5999 + }, + { + "epoch": 0.4411116012351125, + "grad_norm": 0.8302648067474365, + "learning_rate": 4.935642600285842e-06, + "loss": 0.5112, + "step": 6000 + }, + { + "epoch": 0.44118511983531833, + "grad_norm": 0.8681151866912842, + "learning_rate": 4.9356208764963255e-06, + "loss": 0.5717, + "step": 6001 + }, + { + "epoch": 0.4412586384355242, + "grad_norm": 0.8953342437744141, + "learning_rate": 4.9355991490888275e-06, + "loss": 0.5648, + "step": 6002 + }, + { + "epoch": 0.44133215703573003, + "grad_norm": 0.9523429274559021, + "learning_rate": 4.93557741806338e-06, + "loss": 0.5935, + "step": 6003 + }, + { + "epoch": 0.4414056756359359, + "grad_norm": 0.7991346120834351, + "learning_rate": 4.935555683420017e-06, + "loss": 0.5406, + "step": 6004 + }, + { + "epoch": 0.44147919423614174, + "grad_norm": 0.8151861429214478, + "learning_rate": 4.935533945158768e-06, + "loss": 0.5543, + "step": 6005 + }, + { + "epoch": 0.4415527128363476, + "grad_norm": 0.8633672595024109, + "learning_rate": 4.935512203279667e-06, + "loss": 0.5758, + "step": 6006 + }, + { + "epoch": 0.44162623143655344, + "grad_norm": 0.8580055832862854, + "learning_rate": 4.9354904577827466e-06, + "loss": 0.5233, + "step": 6007 + }, + { + "epoch": 0.4416997500367593, + "grad_norm": 0.8094178438186646, + "learning_rate": 4.935468708668037e-06, + "loss": 0.5103, + "step": 6008 + }, + { + "epoch": 0.44177326863696514, + "grad_norm": 0.8268056511878967, + "learning_rate": 4.935446955935574e-06, + "loss": 0.5934, + "step": 6009 + }, + { + "epoch": 0.441846787237171, + "grad_norm": 0.8623610138893127, + "learning_rate": 4.935425199585387e-06, + "loss": 0.5787, + "step": 6010 + }, + { + "epoch": 0.44192030583737685, + "grad_norm": 0.8722565174102783, + "learning_rate": 4.935403439617509e-06, + "loss": 0.5724, + "step": 6011 + }, + { + "epoch": 0.4419938244375827, + "grad_norm": 0.838323175907135, + "learning_rate": 4.935381676031973e-06, + "loss": 0.542, + "step": 6012 + }, + { + "epoch": 0.44206734303778855, + "grad_norm": 0.8148189783096313, + "learning_rate": 4.935359908828811e-06, + "loss": 0.5614, + "step": 6013 + }, + { + "epoch": 0.44214086163799443, + "grad_norm": 0.863937497138977, + "learning_rate": 4.9353381380080545e-06, + "loss": 0.5781, + "step": 6014 + }, + { + "epoch": 0.44221438023820026, + "grad_norm": 0.870824933052063, + "learning_rate": 4.9353163635697365e-06, + "loss": 0.5724, + "step": 6015 + }, + { + "epoch": 0.44228789883840613, + "grad_norm": 0.8459540009498596, + "learning_rate": 4.93529458551389e-06, + "loss": 0.5358, + "step": 6016 + }, + { + "epoch": 0.44236141743861196, + "grad_norm": 0.855694055557251, + "learning_rate": 4.935272803840547e-06, + "loss": 0.6276, + "step": 6017 + }, + { + "epoch": 0.44243493603881784, + "grad_norm": 0.7812016010284424, + "learning_rate": 4.935251018549739e-06, + "loss": 0.5053, + "step": 6018 + }, + { + "epoch": 0.44250845463902366, + "grad_norm": 0.8995918035507202, + "learning_rate": 4.9352292296414985e-06, + "loss": 0.5718, + "step": 6019 + }, + { + "epoch": 0.44258197323922954, + "grad_norm": 0.8326776027679443, + "learning_rate": 4.935207437115859e-06, + "loss": 0.5363, + "step": 6020 + }, + { + "epoch": 0.44265549183943537, + "grad_norm": 0.8432713150978088, + "learning_rate": 4.935185640972852e-06, + "loss": 0.5567, + "step": 6021 + }, + { + "epoch": 0.44272901043964125, + "grad_norm": 0.9029448628425598, + "learning_rate": 4.935163841212511e-06, + "loss": 0.5352, + "step": 6022 + }, + { + "epoch": 0.44280252903984707, + "grad_norm": 0.86225825548172, + "learning_rate": 4.9351420378348665e-06, + "loss": 0.552, + "step": 6023 + }, + { + "epoch": 0.44287604764005295, + "grad_norm": 0.8426873683929443, + "learning_rate": 4.935120230839951e-06, + "loss": 0.541, + "step": 6024 + }, + { + "epoch": 0.4429495662402588, + "grad_norm": 0.8405120968818665, + "learning_rate": 4.9350984202277995e-06, + "loss": 0.5194, + "step": 6025 + }, + { + "epoch": 0.44302308484046465, + "grad_norm": 0.8521713614463806, + "learning_rate": 4.935076605998442e-06, + "loss": 0.5778, + "step": 6026 + }, + { + "epoch": 0.4430966034406705, + "grad_norm": 0.8645545840263367, + "learning_rate": 4.935054788151911e-06, + "loss": 0.5814, + "step": 6027 + }, + { + "epoch": 0.44317012204087636, + "grad_norm": 0.8300573229789734, + "learning_rate": 4.93503296668824e-06, + "loss": 0.5662, + "step": 6028 + }, + { + "epoch": 0.4432436406410822, + "grad_norm": 0.8864428997039795, + "learning_rate": 4.9350111416074605e-06, + "loss": 0.5812, + "step": 6029 + }, + { + "epoch": 0.44331715924128806, + "grad_norm": 0.8698586821556091, + "learning_rate": 4.934989312909606e-06, + "loss": 0.5424, + "step": 6030 + }, + { + "epoch": 0.4433906778414939, + "grad_norm": 0.8345034122467041, + "learning_rate": 4.934967480594708e-06, + "loss": 0.5816, + "step": 6031 + }, + { + "epoch": 0.44346419644169977, + "grad_norm": 0.8992088437080383, + "learning_rate": 4.934945644662799e-06, + "loss": 0.58, + "step": 6032 + }, + { + "epoch": 0.4435377150419056, + "grad_norm": 0.858595609664917, + "learning_rate": 4.934923805113911e-06, + "loss": 0.5381, + "step": 6033 + }, + { + "epoch": 0.44361123364211147, + "grad_norm": 0.8972746133804321, + "learning_rate": 4.934901961948078e-06, + "loss": 0.514, + "step": 6034 + }, + { + "epoch": 0.4436847522423173, + "grad_norm": 0.8729269504547119, + "learning_rate": 4.934880115165331e-06, + "loss": 0.5813, + "step": 6035 + }, + { + "epoch": 0.4437582708425232, + "grad_norm": 0.9142420291900635, + "learning_rate": 4.934858264765703e-06, + "loss": 0.5305, + "step": 6036 + }, + { + "epoch": 0.443831789442729, + "grad_norm": 0.8772768974304199, + "learning_rate": 4.934836410749226e-06, + "loss": 0.6019, + "step": 6037 + }, + { + "epoch": 0.4439053080429349, + "grad_norm": 0.8322816491127014, + "learning_rate": 4.934814553115933e-06, + "loss": 0.5525, + "step": 6038 + }, + { + "epoch": 0.4439788266431407, + "grad_norm": 0.825485348701477, + "learning_rate": 4.934792691865856e-06, + "loss": 0.5397, + "step": 6039 + }, + { + "epoch": 0.4440523452433466, + "grad_norm": 0.8458081483840942, + "learning_rate": 4.934770826999029e-06, + "loss": 0.5577, + "step": 6040 + }, + { + "epoch": 0.4441258638435524, + "grad_norm": 0.8585337996482849, + "learning_rate": 4.9347489585154825e-06, + "loss": 0.5543, + "step": 6041 + }, + { + "epoch": 0.4441993824437583, + "grad_norm": 0.9130807518959045, + "learning_rate": 4.93472708641525e-06, + "loss": 0.5606, + "step": 6042 + }, + { + "epoch": 0.4442729010439641, + "grad_norm": 0.914743959903717, + "learning_rate": 4.934705210698363e-06, + "loss": 0.5794, + "step": 6043 + }, + { + "epoch": 0.44434641964417, + "grad_norm": 0.869157612323761, + "learning_rate": 4.934683331364855e-06, + "loss": 0.6168, + "step": 6044 + }, + { + "epoch": 0.4444199382443758, + "grad_norm": 0.8328548073768616, + "learning_rate": 4.9346614484147584e-06, + "loss": 0.5405, + "step": 6045 + }, + { + "epoch": 0.4444934568445817, + "grad_norm": 0.874456524848938, + "learning_rate": 4.934639561848106e-06, + "loss": 0.5191, + "step": 6046 + }, + { + "epoch": 0.4445669754447875, + "grad_norm": 0.8321410417556763, + "learning_rate": 4.934617671664929e-06, + "loss": 0.5332, + "step": 6047 + }, + { + "epoch": 0.4446404940449934, + "grad_norm": 0.8301422595977783, + "learning_rate": 4.934595777865261e-06, + "loss": 0.5045, + "step": 6048 + }, + { + "epoch": 0.4447140126451992, + "grad_norm": 0.9388987421989441, + "learning_rate": 4.934573880449135e-06, + "loss": 0.5829, + "step": 6049 + }, + { + "epoch": 0.4447875312454051, + "grad_norm": 0.8586767911911011, + "learning_rate": 4.9345519794165815e-06, + "loss": 0.5411, + "step": 6050 + }, + { + "epoch": 0.4448610498456109, + "grad_norm": 0.8715449571609497, + "learning_rate": 4.934530074767636e-06, + "loss": 0.5588, + "step": 6051 + }, + { + "epoch": 0.4449345684458168, + "grad_norm": 0.8174730539321899, + "learning_rate": 4.934508166502328e-06, + "loss": 0.5452, + "step": 6052 + }, + { + "epoch": 0.4450080870460226, + "grad_norm": 0.8410118818283081, + "learning_rate": 4.934486254620692e-06, + "loss": 0.5873, + "step": 6053 + }, + { + "epoch": 0.4450816056462285, + "grad_norm": 0.8516241312026978, + "learning_rate": 4.93446433912276e-06, + "loss": 0.5889, + "step": 6054 + }, + { + "epoch": 0.44515512424643433, + "grad_norm": 0.8919525146484375, + "learning_rate": 4.934442420008564e-06, + "loss": 0.5377, + "step": 6055 + }, + { + "epoch": 0.4452286428466402, + "grad_norm": 0.8909178972244263, + "learning_rate": 4.9344204972781375e-06, + "loss": 0.5567, + "step": 6056 + }, + { + "epoch": 0.44530216144684603, + "grad_norm": 0.8767356276512146, + "learning_rate": 4.934398570931513e-06, + "loss": 0.5924, + "step": 6057 + }, + { + "epoch": 0.4453756800470519, + "grad_norm": 0.8215172290802002, + "learning_rate": 4.9343766409687225e-06, + "loss": 0.5547, + "step": 6058 + }, + { + "epoch": 0.44544919864725774, + "grad_norm": 0.8028683066368103, + "learning_rate": 4.9343547073897985e-06, + "loss": 0.5346, + "step": 6059 + }, + { + "epoch": 0.4455227172474636, + "grad_norm": 0.810564398765564, + "learning_rate": 4.934332770194774e-06, + "loss": 0.5218, + "step": 6060 + }, + { + "epoch": 0.44559623584766944, + "grad_norm": 0.8090018630027771, + "learning_rate": 4.934310829383682e-06, + "loss": 0.544, + "step": 6061 + }, + { + "epoch": 0.4456697544478753, + "grad_norm": 0.8143542408943176, + "learning_rate": 4.934288884956554e-06, + "loss": 0.5422, + "step": 6062 + }, + { + "epoch": 0.44574327304808115, + "grad_norm": 0.8261503577232361, + "learning_rate": 4.934266936913423e-06, + "loss": 0.5573, + "step": 6063 + }, + { + "epoch": 0.445816791648287, + "grad_norm": 0.8088696599006653, + "learning_rate": 4.934244985254322e-06, + "loss": 0.5286, + "step": 6064 + }, + { + "epoch": 0.44589031024849285, + "grad_norm": 0.8921037912368774, + "learning_rate": 4.934223029979283e-06, + "loss": 0.6103, + "step": 6065 + }, + { + "epoch": 0.44596382884869873, + "grad_norm": 0.8366177678108215, + "learning_rate": 4.93420107108834e-06, + "loss": 0.5842, + "step": 6066 + }, + { + "epoch": 0.44603734744890455, + "grad_norm": 0.83306485414505, + "learning_rate": 4.934179108581524e-06, + "loss": 0.5461, + "step": 6067 + }, + { + "epoch": 0.44611086604911043, + "grad_norm": 0.8507281541824341, + "learning_rate": 4.934157142458868e-06, + "loss": 0.5424, + "step": 6068 + }, + { + "epoch": 0.44618438464931626, + "grad_norm": 0.8303073048591614, + "learning_rate": 4.934135172720405e-06, + "loss": 0.5205, + "step": 6069 + }, + { + "epoch": 0.44625790324952214, + "grad_norm": 0.8365828394889832, + "learning_rate": 4.934113199366168e-06, + "loss": 0.5666, + "step": 6070 + }, + { + "epoch": 0.44633142184972796, + "grad_norm": 0.824725866317749, + "learning_rate": 4.934091222396188e-06, + "loss": 0.5199, + "step": 6071 + }, + { + "epoch": 0.44640494044993384, + "grad_norm": 0.87254399061203, + "learning_rate": 4.9340692418105e-06, + "loss": 0.5771, + "step": 6072 + }, + { + "epoch": 0.44647845905013966, + "grad_norm": 0.8344103097915649, + "learning_rate": 4.934047257609135e-06, + "loss": 0.5531, + "step": 6073 + }, + { + "epoch": 0.44655197765034554, + "grad_norm": 0.8445865511894226, + "learning_rate": 4.934025269792126e-06, + "loss": 0.5667, + "step": 6074 + }, + { + "epoch": 0.44662549625055137, + "grad_norm": 0.8381909728050232, + "learning_rate": 4.934003278359505e-06, + "loss": 0.5453, + "step": 6075 + }, + { + "epoch": 0.44669901485075725, + "grad_norm": 0.8565877079963684, + "learning_rate": 4.933981283311306e-06, + "loss": 0.5715, + "step": 6076 + }, + { + "epoch": 0.44677253345096307, + "grad_norm": 0.876899778842926, + "learning_rate": 4.933959284647562e-06, + "loss": 0.5216, + "step": 6077 + }, + { + "epoch": 0.44684605205116895, + "grad_norm": 0.8780990839004517, + "learning_rate": 4.933937282368304e-06, + "loss": 0.5919, + "step": 6078 + }, + { + "epoch": 0.4469195706513748, + "grad_norm": 0.8694236278533936, + "learning_rate": 4.933915276473565e-06, + "loss": 0.5967, + "step": 6079 + }, + { + "epoch": 0.44699308925158066, + "grad_norm": 0.8490318059921265, + "learning_rate": 4.933893266963379e-06, + "loss": 0.5974, + "step": 6080 + }, + { + "epoch": 0.4470666078517865, + "grad_norm": 0.8629139065742493, + "learning_rate": 4.9338712538377775e-06, + "loss": 0.5644, + "step": 6081 + }, + { + "epoch": 0.44714012645199236, + "grad_norm": 0.861901044845581, + "learning_rate": 4.933849237096794e-06, + "loss": 0.5599, + "step": 6082 + }, + { + "epoch": 0.4472136450521982, + "grad_norm": 0.9124665260314941, + "learning_rate": 4.933827216740459e-06, + "loss": 0.5881, + "step": 6083 + }, + { + "epoch": 0.44728716365240406, + "grad_norm": 0.8763757348060608, + "learning_rate": 4.933805192768809e-06, + "loss": 0.5354, + "step": 6084 + }, + { + "epoch": 0.4473606822526099, + "grad_norm": 0.7998202443122864, + "learning_rate": 4.933783165181873e-06, + "loss": 0.5778, + "step": 6085 + }, + { + "epoch": 0.44743420085281577, + "grad_norm": 0.8783031105995178, + "learning_rate": 4.933761133979686e-06, + "loss": 0.566, + "step": 6086 + }, + { + "epoch": 0.4475077194530216, + "grad_norm": 0.8800479173660278, + "learning_rate": 4.933739099162281e-06, + "loss": 0.5943, + "step": 6087 + }, + { + "epoch": 0.44758123805322747, + "grad_norm": 0.8925976753234863, + "learning_rate": 4.933717060729689e-06, + "loss": 0.5818, + "step": 6088 + }, + { + "epoch": 0.4476547566534333, + "grad_norm": 0.8586766719818115, + "learning_rate": 4.933695018681943e-06, + "loss": 0.5523, + "step": 6089 + }, + { + "epoch": 0.4477282752536392, + "grad_norm": 0.7904914617538452, + "learning_rate": 4.933672973019078e-06, + "loss": 0.5792, + "step": 6090 + }, + { + "epoch": 0.447801793853845, + "grad_norm": 0.8410845994949341, + "learning_rate": 4.933650923741123e-06, + "loss": 0.4956, + "step": 6091 + }, + { + "epoch": 0.4478753124540509, + "grad_norm": 0.8119987845420837, + "learning_rate": 4.933628870848114e-06, + "loss": 0.5455, + "step": 6092 + }, + { + "epoch": 0.4479488310542567, + "grad_norm": 0.8158290982246399, + "learning_rate": 4.933606814340083e-06, + "loss": 0.538, + "step": 6093 + }, + { + "epoch": 0.4480223496544626, + "grad_norm": 0.8389366269111633, + "learning_rate": 4.9335847542170616e-06, + "loss": 0.5592, + "step": 6094 + }, + { + "epoch": 0.4480958682546684, + "grad_norm": 0.8293508887290955, + "learning_rate": 4.933562690479083e-06, + "loss": 0.5716, + "step": 6095 + }, + { + "epoch": 0.4481693868548743, + "grad_norm": 0.8078495860099792, + "learning_rate": 4.933540623126182e-06, + "loss": 0.5443, + "step": 6096 + }, + { + "epoch": 0.4482429054550801, + "grad_norm": 0.8861211538314819, + "learning_rate": 4.933518552158387e-06, + "loss": 0.5175, + "step": 6097 + }, + { + "epoch": 0.448316424055286, + "grad_norm": 0.9051473140716553, + "learning_rate": 4.933496477575735e-06, + "loss": 0.5498, + "step": 6098 + }, + { + "epoch": 0.4483899426554918, + "grad_norm": 0.8777109980583191, + "learning_rate": 4.933474399378257e-06, + "loss": 0.5314, + "step": 6099 + }, + { + "epoch": 0.4484634612556977, + "grad_norm": 0.8515744805335999, + "learning_rate": 4.9334523175659866e-06, + "loss": 0.5416, + "step": 6100 + }, + { + "epoch": 0.4485369798559035, + "grad_norm": 0.8636679649353027, + "learning_rate": 4.933430232138956e-06, + "loss": 0.5521, + "step": 6101 + }, + { + "epoch": 0.4486104984561094, + "grad_norm": 0.8391525745391846, + "learning_rate": 4.933408143097197e-06, + "loss": 0.5012, + "step": 6102 + }, + { + "epoch": 0.4486840170563152, + "grad_norm": 0.8892884254455566, + "learning_rate": 4.933386050440744e-06, + "loss": 0.5518, + "step": 6103 + }, + { + "epoch": 0.4487575356565211, + "grad_norm": 0.8297102451324463, + "learning_rate": 4.93336395416963e-06, + "loss": 0.5744, + "step": 6104 + }, + { + "epoch": 0.4488310542567269, + "grad_norm": 0.8175090551376343, + "learning_rate": 4.933341854283886e-06, + "loss": 0.5303, + "step": 6105 + }, + { + "epoch": 0.4489045728569328, + "grad_norm": 0.9281814694404602, + "learning_rate": 4.933319750783546e-06, + "loss": 0.5357, + "step": 6106 + }, + { + "epoch": 0.44897809145713863, + "grad_norm": 0.894835889339447, + "learning_rate": 4.9332976436686434e-06, + "loss": 0.5476, + "step": 6107 + }, + { + "epoch": 0.4490516100573445, + "grad_norm": 0.8293629288673401, + "learning_rate": 4.93327553293921e-06, + "loss": 0.5577, + "step": 6108 + }, + { + "epoch": 0.4491251286575504, + "grad_norm": 0.881370484828949, + "learning_rate": 4.9332534185952796e-06, + "loss": 0.5636, + "step": 6109 + }, + { + "epoch": 0.4491986472577562, + "grad_norm": 0.8604033589363098, + "learning_rate": 4.933231300636883e-06, + "loss": 0.5972, + "step": 6110 + }, + { + "epoch": 0.4492721658579621, + "grad_norm": 0.8900611400604248, + "learning_rate": 4.933209179064056e-06, + "loss": 0.5621, + "step": 6111 + }, + { + "epoch": 0.4493456844581679, + "grad_norm": 0.8375688791275024, + "learning_rate": 4.93318705387683e-06, + "loss": 0.548, + "step": 6112 + }, + { + "epoch": 0.4494192030583738, + "grad_norm": 0.851661205291748, + "learning_rate": 4.933164925075237e-06, + "loss": 0.5602, + "step": 6113 + }, + { + "epoch": 0.4494927216585796, + "grad_norm": 0.8587968349456787, + "learning_rate": 4.9331427926593114e-06, + "loss": 0.6029, + "step": 6114 + }, + { + "epoch": 0.4495662402587855, + "grad_norm": 0.828054666519165, + "learning_rate": 4.933120656629085e-06, + "loss": 0.6055, + "step": 6115 + }, + { + "epoch": 0.4496397588589913, + "grad_norm": 0.8859608173370361, + "learning_rate": 4.9330985169845915e-06, + "loss": 0.5666, + "step": 6116 + }, + { + "epoch": 0.4497132774591972, + "grad_norm": 0.8618389368057251, + "learning_rate": 4.933076373725863e-06, + "loss": 0.5497, + "step": 6117 + }, + { + "epoch": 0.449786796059403, + "grad_norm": 0.8217050433158875, + "learning_rate": 4.933054226852933e-06, + "loss": 0.5749, + "step": 6118 + }, + { + "epoch": 0.4498603146596089, + "grad_norm": 0.8658814430236816, + "learning_rate": 4.933032076365834e-06, + "loss": 0.5437, + "step": 6119 + }, + { + "epoch": 0.44993383325981473, + "grad_norm": 0.8560968041419983, + "learning_rate": 4.9330099222646e-06, + "loss": 0.5322, + "step": 6120 + }, + { + "epoch": 0.4500073518600206, + "grad_norm": 0.8745714426040649, + "learning_rate": 4.932987764549262e-06, + "loss": 0.5477, + "step": 6121 + }, + { + "epoch": 0.45008087046022643, + "grad_norm": 0.9123449921607971, + "learning_rate": 4.932965603219853e-06, + "loss": 0.5943, + "step": 6122 + }, + { + "epoch": 0.4501543890604323, + "grad_norm": 0.8376361727714539, + "learning_rate": 4.932943438276409e-06, + "loss": 0.5491, + "step": 6123 + }, + { + "epoch": 0.45022790766063814, + "grad_norm": 0.8592362403869629, + "learning_rate": 4.93292126971896e-06, + "loss": 0.567, + "step": 6124 + }, + { + "epoch": 0.450301426260844, + "grad_norm": 0.8342194557189941, + "learning_rate": 4.9328990975475384e-06, + "loss": 0.5457, + "step": 6125 + }, + { + "epoch": 0.45037494486104984, + "grad_norm": 0.8609989881515503, + "learning_rate": 4.93287692176218e-06, + "loss": 0.5507, + "step": 6126 + }, + { + "epoch": 0.4504484634612557, + "grad_norm": 0.821334958076477, + "learning_rate": 4.932854742362916e-06, + "loss": 0.5153, + "step": 6127 + }, + { + "epoch": 0.45052198206146155, + "grad_norm": 0.8426330089569092, + "learning_rate": 4.9328325593497785e-06, + "loss": 0.5149, + "step": 6128 + }, + { + "epoch": 0.4505955006616674, + "grad_norm": 0.8935385346412659, + "learning_rate": 4.9328103727228015e-06, + "loss": 0.566, + "step": 6129 + }, + { + "epoch": 0.45066901926187325, + "grad_norm": 0.9248542785644531, + "learning_rate": 4.932788182482019e-06, + "loss": 0.5814, + "step": 6130 + }, + { + "epoch": 0.45074253786207913, + "grad_norm": 0.9329686164855957, + "learning_rate": 4.932765988627461e-06, + "loss": 0.5733, + "step": 6131 + }, + { + "epoch": 0.45081605646228495, + "grad_norm": 0.8225057721138, + "learning_rate": 4.932743791159165e-06, + "loss": 0.5593, + "step": 6132 + }, + { + "epoch": 0.45088957506249083, + "grad_norm": 0.8543820977210999, + "learning_rate": 4.93272159007716e-06, + "loss": 0.5145, + "step": 6133 + }, + { + "epoch": 0.45096309366269666, + "grad_norm": 0.8733291625976562, + "learning_rate": 4.93269938538148e-06, + "loss": 0.5463, + "step": 6134 + }, + { + "epoch": 0.45103661226290254, + "grad_norm": 0.8371374011039734, + "learning_rate": 4.932677177072158e-06, + "loss": 0.5733, + "step": 6135 + }, + { + "epoch": 0.45111013086310836, + "grad_norm": 0.8144261837005615, + "learning_rate": 4.932654965149228e-06, + "loss": 0.5361, + "step": 6136 + }, + { + "epoch": 0.45118364946331424, + "grad_norm": 0.8691345453262329, + "learning_rate": 4.932632749612722e-06, + "loss": 0.5704, + "step": 6137 + }, + { + "epoch": 0.45125716806352006, + "grad_norm": 0.8823772668838501, + "learning_rate": 4.932610530462673e-06, + "loss": 0.5612, + "step": 6138 + }, + { + "epoch": 0.45133068666372594, + "grad_norm": 0.8553526997566223, + "learning_rate": 4.932588307699114e-06, + "loss": 0.5608, + "step": 6139 + }, + { + "epoch": 0.45140420526393177, + "grad_norm": 0.8961080312728882, + "learning_rate": 4.9325660813220784e-06, + "loss": 0.6105, + "step": 6140 + }, + { + "epoch": 0.45147772386413765, + "grad_norm": 0.8401738405227661, + "learning_rate": 4.9325438513316e-06, + "loss": 0.5448, + "step": 6141 + }, + { + "epoch": 0.45155124246434347, + "grad_norm": 0.8274914026260376, + "learning_rate": 4.932521617727711e-06, + "loss": 0.5813, + "step": 6142 + }, + { + "epoch": 0.45162476106454935, + "grad_norm": 0.8358171582221985, + "learning_rate": 4.932499380510444e-06, + "loss": 0.5368, + "step": 6143 + }, + { + "epoch": 0.4516982796647552, + "grad_norm": 0.8085956573486328, + "learning_rate": 4.932477139679831e-06, + "loss": 0.5329, + "step": 6144 + }, + { + "epoch": 0.45177179826496106, + "grad_norm": 0.830424964427948, + "learning_rate": 4.932454895235908e-06, + "loss": 0.5496, + "step": 6145 + }, + { + "epoch": 0.4518453168651669, + "grad_norm": 0.8538293838500977, + "learning_rate": 4.932432647178706e-06, + "loss": 0.5366, + "step": 6146 + }, + { + "epoch": 0.45191883546537276, + "grad_norm": 0.8211454749107361, + "learning_rate": 4.932410395508258e-06, + "loss": 0.551, + "step": 6147 + }, + { + "epoch": 0.4519923540655786, + "grad_norm": 0.8435774445533752, + "learning_rate": 4.9323881402245986e-06, + "loss": 0.5347, + "step": 6148 + }, + { + "epoch": 0.45206587266578446, + "grad_norm": 0.8763788342475891, + "learning_rate": 4.932365881327758e-06, + "loss": 0.5768, + "step": 6149 + }, + { + "epoch": 0.4521393912659903, + "grad_norm": 0.8142327666282654, + "learning_rate": 4.932343618817773e-06, + "loss": 0.5617, + "step": 6150 + }, + { + "epoch": 0.45221290986619617, + "grad_norm": 0.8842673301696777, + "learning_rate": 4.932321352694674e-06, + "loss": 0.6093, + "step": 6151 + }, + { + "epoch": 0.452286428466402, + "grad_norm": 0.7949696183204651, + "learning_rate": 4.932299082958495e-06, + "loss": 0.5227, + "step": 6152 + }, + { + "epoch": 0.45235994706660787, + "grad_norm": 0.8604792952537537, + "learning_rate": 4.9322768096092685e-06, + "loss": 0.524, + "step": 6153 + }, + { + "epoch": 0.4524334656668137, + "grad_norm": 0.8225197792053223, + "learning_rate": 4.932254532647028e-06, + "loss": 0.5917, + "step": 6154 + }, + { + "epoch": 0.4525069842670196, + "grad_norm": 0.808046817779541, + "learning_rate": 4.932232252071808e-06, + "loss": 0.5228, + "step": 6155 + }, + { + "epoch": 0.4525805028672254, + "grad_norm": 0.8490544557571411, + "learning_rate": 4.9322099678836386e-06, + "loss": 0.5695, + "step": 6156 + }, + { + "epoch": 0.4526540214674313, + "grad_norm": 0.8465648889541626, + "learning_rate": 4.9321876800825545e-06, + "loss": 0.5682, + "step": 6157 + }, + { + "epoch": 0.4527275400676371, + "grad_norm": 0.8612837791442871, + "learning_rate": 4.9321653886685896e-06, + "loss": 0.5429, + "step": 6158 + }, + { + "epoch": 0.452801058667843, + "grad_norm": 0.8193693161010742, + "learning_rate": 4.932143093641776e-06, + "loss": 0.5132, + "step": 6159 + }, + { + "epoch": 0.4528745772680488, + "grad_norm": 0.9022364616394043, + "learning_rate": 4.932120795002146e-06, + "loss": 0.5927, + "step": 6160 + }, + { + "epoch": 0.4529480958682547, + "grad_norm": 0.8462862372398376, + "learning_rate": 4.932098492749735e-06, + "loss": 0.5696, + "step": 6161 + }, + { + "epoch": 0.4530216144684605, + "grad_norm": 0.8715428113937378, + "learning_rate": 4.932076186884575e-06, + "loss": 0.5529, + "step": 6162 + }, + { + "epoch": 0.4530951330686664, + "grad_norm": 0.8748509287834167, + "learning_rate": 4.932053877406697e-06, + "loss": 0.5554, + "step": 6163 + }, + { + "epoch": 0.4531686516688722, + "grad_norm": 0.8723131418228149, + "learning_rate": 4.932031564316138e-06, + "loss": 0.562, + "step": 6164 + }, + { + "epoch": 0.4532421702690781, + "grad_norm": 0.8541145920753479, + "learning_rate": 4.932009247612929e-06, + "loss": 0.5737, + "step": 6165 + }, + { + "epoch": 0.4533156888692839, + "grad_norm": 0.8804247379302979, + "learning_rate": 4.931986927297104e-06, + "loss": 0.5577, + "step": 6166 + }, + { + "epoch": 0.4533892074694898, + "grad_norm": 0.8694369792938232, + "learning_rate": 4.931964603368694e-06, + "loss": 0.5798, + "step": 6167 + }, + { + "epoch": 0.4534627260696956, + "grad_norm": 0.8734681010246277, + "learning_rate": 4.9319422758277345e-06, + "loss": 0.5927, + "step": 6168 + }, + { + "epoch": 0.4535362446699015, + "grad_norm": 0.8223910331726074, + "learning_rate": 4.931919944674258e-06, + "loss": 0.5858, + "step": 6169 + }, + { + "epoch": 0.4536097632701073, + "grad_norm": 0.8713114261627197, + "learning_rate": 4.9318976099082974e-06, + "loss": 0.5533, + "step": 6170 + }, + { + "epoch": 0.4536832818703132, + "grad_norm": 0.8416121006011963, + "learning_rate": 4.931875271529887e-06, + "loss": 0.5718, + "step": 6171 + }, + { + "epoch": 0.45375680047051903, + "grad_norm": 0.8112354874610901, + "learning_rate": 4.931852929539058e-06, + "loss": 0.5706, + "step": 6172 + }, + { + "epoch": 0.4538303190707249, + "grad_norm": 0.8312424421310425, + "learning_rate": 4.931830583935845e-06, + "loss": 0.5596, + "step": 6173 + }, + { + "epoch": 0.45390383767093073, + "grad_norm": 0.8493066430091858, + "learning_rate": 4.93180823472028e-06, + "loss": 0.5607, + "step": 6174 + }, + { + "epoch": 0.4539773562711366, + "grad_norm": 0.8358021378517151, + "learning_rate": 4.931785881892398e-06, + "loss": 0.5427, + "step": 6175 + }, + { + "epoch": 0.45405087487134244, + "grad_norm": 0.8099457025527954, + "learning_rate": 4.931763525452231e-06, + "loss": 0.5579, + "step": 6176 + }, + { + "epoch": 0.4541243934715483, + "grad_norm": 0.835097074508667, + "learning_rate": 4.931741165399813e-06, + "loss": 0.5444, + "step": 6177 + }, + { + "epoch": 0.45419791207175414, + "grad_norm": 0.8785600662231445, + "learning_rate": 4.931718801735176e-06, + "loss": 0.5707, + "step": 6178 + }, + { + "epoch": 0.45427143067196, + "grad_norm": 0.8930386900901794, + "learning_rate": 4.931696434458354e-06, + "loss": 0.5603, + "step": 6179 + }, + { + "epoch": 0.45434494927216584, + "grad_norm": 0.9181625843048096, + "learning_rate": 4.931674063569381e-06, + "loss": 0.5821, + "step": 6180 + }, + { + "epoch": 0.4544184678723717, + "grad_norm": 0.8777456283569336, + "learning_rate": 4.931651689068288e-06, + "loss": 0.5945, + "step": 6181 + }, + { + "epoch": 0.45449198647257755, + "grad_norm": 0.8676781058311462, + "learning_rate": 4.9316293109551094e-06, + "loss": 0.5524, + "step": 6182 + }, + { + "epoch": 0.4545655050727834, + "grad_norm": 0.8566883206367493, + "learning_rate": 4.931606929229879e-06, + "loss": 0.5333, + "step": 6183 + }, + { + "epoch": 0.45463902367298925, + "grad_norm": 0.7887613773345947, + "learning_rate": 4.93158454389263e-06, + "loss": 0.5252, + "step": 6184 + }, + { + "epoch": 0.45471254227319513, + "grad_norm": 0.8047403693199158, + "learning_rate": 4.931562154943396e-06, + "loss": 0.5238, + "step": 6185 + }, + { + "epoch": 0.45478606087340095, + "grad_norm": 0.7977640628814697, + "learning_rate": 4.931539762382208e-06, + "loss": 0.5191, + "step": 6186 + }, + { + "epoch": 0.45485957947360683, + "grad_norm": 0.8441499471664429, + "learning_rate": 4.931517366209102e-06, + "loss": 0.5955, + "step": 6187 + }, + { + "epoch": 0.45493309807381266, + "grad_norm": 0.8554752469062805, + "learning_rate": 4.931494966424109e-06, + "loss": 0.5889, + "step": 6188 + }, + { + "epoch": 0.45500661667401854, + "grad_norm": 0.8241404294967651, + "learning_rate": 4.931472563027265e-06, + "loss": 0.5398, + "step": 6189 + }, + { + "epoch": 0.45508013527422436, + "grad_norm": 0.8977396488189697, + "learning_rate": 4.9314501560186e-06, + "loss": 0.6007, + "step": 6190 + }, + { + "epoch": 0.45515365387443024, + "grad_norm": 0.9056172370910645, + "learning_rate": 4.9314277453981505e-06, + "loss": 0.5831, + "step": 6191 + }, + { + "epoch": 0.45522717247463607, + "grad_norm": 0.8912062048912048, + "learning_rate": 4.931405331165947e-06, + "loss": 0.5166, + "step": 6192 + }, + { + "epoch": 0.45530069107484195, + "grad_norm": 0.8291208744049072, + "learning_rate": 4.931382913322025e-06, + "loss": 0.5906, + "step": 6193 + }, + { + "epoch": 0.45537420967504777, + "grad_norm": 0.8521814346313477, + "learning_rate": 4.931360491866416e-06, + "loss": 0.5372, + "step": 6194 + }, + { + "epoch": 0.45544772827525365, + "grad_norm": 0.8333451151847839, + "learning_rate": 4.931338066799154e-06, + "loss": 0.4859, + "step": 6195 + }, + { + "epoch": 0.4555212468754595, + "grad_norm": 0.8377135396003723, + "learning_rate": 4.9313156381202735e-06, + "loss": 0.5846, + "step": 6196 + }, + { + "epoch": 0.45559476547566535, + "grad_norm": 0.8574617505073547, + "learning_rate": 4.9312932058298055e-06, + "loss": 0.545, + "step": 6197 + }, + { + "epoch": 0.4556682840758712, + "grad_norm": 0.9246452450752258, + "learning_rate": 4.931270769927785e-06, + "loss": 0.5803, + "step": 6198 + }, + { + "epoch": 0.45574180267607706, + "grad_norm": 0.8312580585479736, + "learning_rate": 4.931248330414245e-06, + "loss": 0.5413, + "step": 6199 + }, + { + "epoch": 0.4558153212762829, + "grad_norm": 0.8214659094810486, + "learning_rate": 4.9312258872892184e-06, + "loss": 0.5137, + "step": 6200 + }, + { + "epoch": 0.45588883987648876, + "grad_norm": 0.850787878036499, + "learning_rate": 4.93120344055274e-06, + "loss": 0.5708, + "step": 6201 + }, + { + "epoch": 0.4559623584766946, + "grad_norm": 0.8426433801651001, + "learning_rate": 4.931180990204841e-06, + "loss": 0.5582, + "step": 6202 + }, + { + "epoch": 0.45603587707690046, + "grad_norm": 0.8478251695632935, + "learning_rate": 4.931158536245556e-06, + "loss": 0.5448, + "step": 6203 + }, + { + "epoch": 0.4561093956771063, + "grad_norm": 0.8117929100990295, + "learning_rate": 4.931136078674918e-06, + "loss": 0.5341, + "step": 6204 + }, + { + "epoch": 0.45618291427731217, + "grad_norm": 0.838887631893158, + "learning_rate": 4.931113617492961e-06, + "loss": 0.5433, + "step": 6205 + }, + { + "epoch": 0.456256432877518, + "grad_norm": 0.8325961828231812, + "learning_rate": 4.931091152699717e-06, + "loss": 0.5723, + "step": 6206 + }, + { + "epoch": 0.45632995147772387, + "grad_norm": 0.8416414856910706, + "learning_rate": 4.93106868429522e-06, + "loss": 0.597, + "step": 6207 + }, + { + "epoch": 0.4564034700779297, + "grad_norm": 0.8352144360542297, + "learning_rate": 4.931046212279505e-06, + "loss": 0.5983, + "step": 6208 + }, + { + "epoch": 0.4564769886781356, + "grad_norm": 0.8969170451164246, + "learning_rate": 4.931023736652604e-06, + "loss": 0.5381, + "step": 6209 + }, + { + "epoch": 0.4565505072783414, + "grad_norm": 0.8300245404243469, + "learning_rate": 4.9310012574145495e-06, + "loss": 0.5787, + "step": 6210 + }, + { + "epoch": 0.4566240258785473, + "grad_norm": 0.8091853857040405, + "learning_rate": 4.930978774565375e-06, + "loss": 0.5174, + "step": 6211 + }, + { + "epoch": 0.4566975444787531, + "grad_norm": 0.8465105295181274, + "learning_rate": 4.930956288105116e-06, + "loss": 0.5134, + "step": 6212 + }, + { + "epoch": 0.456771063078959, + "grad_norm": 0.7618150115013123, + "learning_rate": 4.930933798033804e-06, + "loss": 0.5243, + "step": 6213 + }, + { + "epoch": 0.4568445816791648, + "grad_norm": 0.8681797385215759, + "learning_rate": 4.930911304351473e-06, + "loss": 0.5417, + "step": 6214 + }, + { + "epoch": 0.4569181002793707, + "grad_norm": 0.8553387522697449, + "learning_rate": 4.930888807058156e-06, + "loss": 0.5271, + "step": 6215 + }, + { + "epoch": 0.4569916188795765, + "grad_norm": 0.8169595003128052, + "learning_rate": 4.930866306153887e-06, + "loss": 0.5617, + "step": 6216 + }, + { + "epoch": 0.4570651374797824, + "grad_norm": 0.8923660516738892, + "learning_rate": 4.9308438016386995e-06, + "loss": 0.5654, + "step": 6217 + }, + { + "epoch": 0.4571386560799882, + "grad_norm": 0.8471712470054626, + "learning_rate": 4.930821293512626e-06, + "loss": 0.5625, + "step": 6218 + }, + { + "epoch": 0.4572121746801941, + "grad_norm": 0.8477069735527039, + "learning_rate": 4.930798781775702e-06, + "loss": 0.539, + "step": 6219 + }, + { + "epoch": 0.4572856932803999, + "grad_norm": 0.8772934675216675, + "learning_rate": 4.930776266427959e-06, + "loss": 0.5317, + "step": 6220 + }, + { + "epoch": 0.4573592118806058, + "grad_norm": 0.8213449716567993, + "learning_rate": 4.93075374746943e-06, + "loss": 0.5615, + "step": 6221 + }, + { + "epoch": 0.4574327304808116, + "grad_norm": 0.8479688763618469, + "learning_rate": 4.93073122490015e-06, + "loss": 0.5687, + "step": 6222 + }, + { + "epoch": 0.4575062490810175, + "grad_norm": 0.8271901607513428, + "learning_rate": 4.9307086987201526e-06, + "loss": 0.5477, + "step": 6223 + }, + { + "epoch": 0.4575797676812233, + "grad_norm": 0.8637251853942871, + "learning_rate": 4.9306861689294695e-06, + "loss": 0.5769, + "step": 6224 + }, + { + "epoch": 0.4576532862814292, + "grad_norm": 0.8371850252151489, + "learning_rate": 4.930663635528136e-06, + "loss": 0.5738, + "step": 6225 + }, + { + "epoch": 0.45772680488163503, + "grad_norm": 0.8090464472770691, + "learning_rate": 4.930641098516183e-06, + "loss": 0.5402, + "step": 6226 + }, + { + "epoch": 0.4578003234818409, + "grad_norm": 0.8708049654960632, + "learning_rate": 4.930618557893647e-06, + "loss": 0.532, + "step": 6227 + }, + { + "epoch": 0.45787384208204673, + "grad_norm": 0.8324411511421204, + "learning_rate": 4.930596013660561e-06, + "loss": 0.59, + "step": 6228 + }, + { + "epoch": 0.4579473606822526, + "grad_norm": 0.8918265104293823, + "learning_rate": 4.930573465816957e-06, + "loss": 0.5498, + "step": 6229 + }, + { + "epoch": 0.45802087928245844, + "grad_norm": 0.8467661142349243, + "learning_rate": 4.930550914362869e-06, + "loss": 0.5743, + "step": 6230 + }, + { + "epoch": 0.4580943978826643, + "grad_norm": 0.9005281329154968, + "learning_rate": 4.9305283592983315e-06, + "loss": 0.5653, + "step": 6231 + }, + { + "epoch": 0.45816791648287014, + "grad_norm": 0.9341692328453064, + "learning_rate": 4.930505800623376e-06, + "loss": 0.593, + "step": 6232 + }, + { + "epoch": 0.458241435083076, + "grad_norm": 0.8751522898674011, + "learning_rate": 4.930483238338037e-06, + "loss": 0.5648, + "step": 6233 + }, + { + "epoch": 0.45831495368328184, + "grad_norm": 0.8891525864601135, + "learning_rate": 4.9304606724423495e-06, + "loss": 0.6188, + "step": 6234 + }, + { + "epoch": 0.4583884722834877, + "grad_norm": 0.8174684643745422, + "learning_rate": 4.9304381029363456e-06, + "loss": 0.5394, + "step": 6235 + }, + { + "epoch": 0.45846199088369355, + "grad_norm": 0.8772435784339905, + "learning_rate": 4.9304155298200585e-06, + "loss": 0.567, + "step": 6236 + }, + { + "epoch": 0.45853550948389943, + "grad_norm": 0.8008195161819458, + "learning_rate": 4.930392953093522e-06, + "loss": 0.579, + "step": 6237 + }, + { + "epoch": 0.45860902808410525, + "grad_norm": 0.8376789093017578, + "learning_rate": 4.9303703727567705e-06, + "loss": 0.5693, + "step": 6238 + }, + { + "epoch": 0.45868254668431113, + "grad_norm": 0.7625346779823303, + "learning_rate": 4.930347788809837e-06, + "loss": 0.5167, + "step": 6239 + }, + { + "epoch": 0.45875606528451696, + "grad_norm": 0.8691253662109375, + "learning_rate": 4.930325201252755e-06, + "loss": 0.5318, + "step": 6240 + }, + { + "epoch": 0.45882958388472284, + "grad_norm": 0.8610232472419739, + "learning_rate": 4.930302610085557e-06, + "loss": 0.5448, + "step": 6241 + }, + { + "epoch": 0.45890310248492866, + "grad_norm": 0.8820167183876038, + "learning_rate": 4.9302800153082786e-06, + "loss": 0.5767, + "step": 6242 + }, + { + "epoch": 0.45897662108513454, + "grad_norm": 0.8359949588775635, + "learning_rate": 4.9302574169209515e-06, + "loss": 0.5612, + "step": 6243 + }, + { + "epoch": 0.45905013968534036, + "grad_norm": 0.8384299278259277, + "learning_rate": 4.930234814923611e-06, + "loss": 0.5164, + "step": 6244 + }, + { + "epoch": 0.45912365828554624, + "grad_norm": 0.87615567445755, + "learning_rate": 4.930212209316288e-06, + "loss": 0.6112, + "step": 6245 + }, + { + "epoch": 0.4591971768857521, + "grad_norm": 0.8324925303459167, + "learning_rate": 4.93018960009902e-06, + "loss": 0.5564, + "step": 6246 + }, + { + "epoch": 0.45927069548595795, + "grad_norm": 0.8259670734405518, + "learning_rate": 4.930166987271837e-06, + "loss": 0.5689, + "step": 6247 + }, + { + "epoch": 0.4593442140861638, + "grad_norm": 0.8771891593933105, + "learning_rate": 4.930144370834775e-06, + "loss": 0.5894, + "step": 6248 + }, + { + "epoch": 0.45941773268636965, + "grad_norm": 0.8694626092910767, + "learning_rate": 4.930121750787866e-06, + "loss": 0.5563, + "step": 6249 + }, + { + "epoch": 0.45949125128657553, + "grad_norm": 0.8113878965377808, + "learning_rate": 4.930099127131144e-06, + "loss": 0.5479, + "step": 6250 + }, + { + "epoch": 0.45956476988678135, + "grad_norm": 0.7813351154327393, + "learning_rate": 4.930076499864644e-06, + "loss": 0.5332, + "step": 6251 + }, + { + "epoch": 0.45963828848698723, + "grad_norm": 0.9477707147598267, + "learning_rate": 4.930053868988397e-06, + "loss": 0.585, + "step": 6252 + }, + { + "epoch": 0.45971180708719306, + "grad_norm": 0.8489460349082947, + "learning_rate": 4.930031234502438e-06, + "loss": 0.5754, + "step": 6253 + }, + { + "epoch": 0.45978532568739894, + "grad_norm": 0.8984528183937073, + "learning_rate": 4.930008596406802e-06, + "loss": 0.5926, + "step": 6254 + }, + { + "epoch": 0.45985884428760476, + "grad_norm": 0.8487734198570251, + "learning_rate": 4.92998595470152e-06, + "loss": 0.5426, + "step": 6255 + }, + { + "epoch": 0.45993236288781064, + "grad_norm": 0.841109573841095, + "learning_rate": 4.929963309386628e-06, + "loss": 0.5727, + "step": 6256 + }, + { + "epoch": 0.46000588148801647, + "grad_norm": 0.8731792569160461, + "learning_rate": 4.929940660462158e-06, + "loss": 0.5896, + "step": 6257 + }, + { + "epoch": 0.46007940008822235, + "grad_norm": 0.8657434582710266, + "learning_rate": 4.929918007928144e-06, + "loss": 0.5612, + "step": 6258 + }, + { + "epoch": 0.46015291868842817, + "grad_norm": 0.9109693765640259, + "learning_rate": 4.929895351784621e-06, + "loss": 0.5647, + "step": 6259 + }, + { + "epoch": 0.46022643728863405, + "grad_norm": 0.8979546427726746, + "learning_rate": 4.929872692031619e-06, + "loss": 0.5968, + "step": 6260 + }, + { + "epoch": 0.4602999558888399, + "grad_norm": 0.8254179954528809, + "learning_rate": 4.929850028669177e-06, + "loss": 0.5459, + "step": 6261 + }, + { + "epoch": 0.46037347448904575, + "grad_norm": 0.8239338397979736, + "learning_rate": 4.929827361697324e-06, + "loss": 0.5694, + "step": 6262 + }, + { + "epoch": 0.4604469930892516, + "grad_norm": 0.8528491258621216, + "learning_rate": 4.929804691116096e-06, + "loss": 0.5413, + "step": 6263 + }, + { + "epoch": 0.46052051168945746, + "grad_norm": 0.8300550580024719, + "learning_rate": 4.929782016925526e-06, + "loss": 0.5687, + "step": 6264 + }, + { + "epoch": 0.4605940302896633, + "grad_norm": 0.8706163763999939, + "learning_rate": 4.929759339125649e-06, + "loss": 0.5632, + "step": 6265 + }, + { + "epoch": 0.46066754888986916, + "grad_norm": 0.8767881989479065, + "learning_rate": 4.929736657716497e-06, + "loss": 0.5245, + "step": 6266 + }, + { + "epoch": 0.460741067490075, + "grad_norm": 0.8300466537475586, + "learning_rate": 4.929713972698103e-06, + "loss": 0.5388, + "step": 6267 + }, + { + "epoch": 0.46081458609028086, + "grad_norm": 0.8974681496620178, + "learning_rate": 4.929691284070503e-06, + "loss": 0.5669, + "step": 6268 + }, + { + "epoch": 0.4608881046904867, + "grad_norm": 0.8218626976013184, + "learning_rate": 4.92966859183373e-06, + "loss": 0.5446, + "step": 6269 + }, + { + "epoch": 0.46096162329069257, + "grad_norm": 0.8421556353569031, + "learning_rate": 4.929645895987817e-06, + "loss": 0.5328, + "step": 6270 + }, + { + "epoch": 0.4610351418908984, + "grad_norm": 0.8322173953056335, + "learning_rate": 4.929623196532798e-06, + "loss": 0.5711, + "step": 6271 + }, + { + "epoch": 0.46110866049110427, + "grad_norm": 0.8571609854698181, + "learning_rate": 4.929600493468707e-06, + "loss": 0.5766, + "step": 6272 + }, + { + "epoch": 0.4611821790913101, + "grad_norm": 0.799650251865387, + "learning_rate": 4.929577786795577e-06, + "loss": 0.5438, + "step": 6273 + }, + { + "epoch": 0.461255697691516, + "grad_norm": 0.8433086276054382, + "learning_rate": 4.929555076513443e-06, + "loss": 0.5667, + "step": 6274 + }, + { + "epoch": 0.4613292162917218, + "grad_norm": 0.8506143689155579, + "learning_rate": 4.9295323626223366e-06, + "loss": 0.5801, + "step": 6275 + }, + { + "epoch": 0.4614027348919277, + "grad_norm": 0.8106059432029724, + "learning_rate": 4.929509645122295e-06, + "loss": 0.5283, + "step": 6276 + }, + { + "epoch": 0.4614762534921335, + "grad_norm": 0.8719093799591064, + "learning_rate": 4.929486924013348e-06, + "loss": 0.5462, + "step": 6277 + }, + { + "epoch": 0.4615497720923394, + "grad_norm": 0.8308266997337341, + "learning_rate": 4.929464199295532e-06, + "loss": 0.5416, + "step": 6278 + }, + { + "epoch": 0.4616232906925452, + "grad_norm": 0.8402702212333679, + "learning_rate": 4.92944147096888e-06, + "loss": 0.5356, + "step": 6279 + }, + { + "epoch": 0.4616968092927511, + "grad_norm": 0.814490795135498, + "learning_rate": 4.929418739033425e-06, + "loss": 0.5315, + "step": 6280 + }, + { + "epoch": 0.4617703278929569, + "grad_norm": 0.85410076379776, + "learning_rate": 4.929396003489202e-06, + "loss": 0.5343, + "step": 6281 + }, + { + "epoch": 0.4618438464931628, + "grad_norm": 0.9029418230056763, + "learning_rate": 4.929373264336244e-06, + "loss": 0.5736, + "step": 6282 + }, + { + "epoch": 0.4619173650933686, + "grad_norm": 0.8448193669319153, + "learning_rate": 4.929350521574587e-06, + "loss": 0.55, + "step": 6283 + }, + { + "epoch": 0.4619908836935745, + "grad_norm": 0.9048749804496765, + "learning_rate": 4.92932777520426e-06, + "loss": 0.5859, + "step": 6284 + }, + { + "epoch": 0.4620644022937803, + "grad_norm": 0.8896695971488953, + "learning_rate": 4.929305025225301e-06, + "loss": 0.5642, + "step": 6285 + }, + { + "epoch": 0.4621379208939862, + "grad_norm": 0.8630073666572571, + "learning_rate": 4.929282271637742e-06, + "loss": 0.581, + "step": 6286 + }, + { + "epoch": 0.462211439494192, + "grad_norm": 0.8002333045005798, + "learning_rate": 4.929259514441617e-06, + "loss": 0.5475, + "step": 6287 + }, + { + "epoch": 0.4622849580943979, + "grad_norm": 0.8781028389930725, + "learning_rate": 4.929236753636961e-06, + "loss": 0.5473, + "step": 6288 + }, + { + "epoch": 0.4623584766946037, + "grad_norm": 0.8122872114181519, + "learning_rate": 4.929213989223806e-06, + "loss": 0.5275, + "step": 6289 + }, + { + "epoch": 0.4624319952948096, + "grad_norm": 0.892083466053009, + "learning_rate": 4.929191221202187e-06, + "loss": 0.5794, + "step": 6290 + }, + { + "epoch": 0.46250551389501543, + "grad_norm": 0.8779491782188416, + "learning_rate": 4.929168449572137e-06, + "loss": 0.5112, + "step": 6291 + }, + { + "epoch": 0.4625790324952213, + "grad_norm": 0.8635448813438416, + "learning_rate": 4.929145674333691e-06, + "loss": 0.5848, + "step": 6292 + }, + { + "epoch": 0.46265255109542713, + "grad_norm": 0.8387857675552368, + "learning_rate": 4.929122895486882e-06, + "loss": 0.5358, + "step": 6293 + }, + { + "epoch": 0.462726069695633, + "grad_norm": 0.805679440498352, + "learning_rate": 4.929100113031744e-06, + "loss": 0.5187, + "step": 6294 + }, + { + "epoch": 0.46279958829583884, + "grad_norm": 0.8442584872245789, + "learning_rate": 4.92907732696831e-06, + "loss": 0.5568, + "step": 6295 + }, + { + "epoch": 0.4628731068960447, + "grad_norm": 0.855218768119812, + "learning_rate": 4.929054537296616e-06, + "loss": 0.5398, + "step": 6296 + }, + { + "epoch": 0.46294662549625054, + "grad_norm": 0.8708183169364929, + "learning_rate": 4.929031744016694e-06, + "loss": 0.5105, + "step": 6297 + }, + { + "epoch": 0.4630201440964564, + "grad_norm": 0.8772466778755188, + "learning_rate": 4.929008947128577e-06, + "loss": 0.5819, + "step": 6298 + }, + { + "epoch": 0.46309366269666224, + "grad_norm": 0.8227020502090454, + "learning_rate": 4.928986146632302e-06, + "loss": 0.5678, + "step": 6299 + }, + { + "epoch": 0.4631671812968681, + "grad_norm": 0.8420547246932983, + "learning_rate": 4.9289633425279e-06, + "loss": 0.5021, + "step": 6300 + }, + { + "epoch": 0.46324069989707395, + "grad_norm": 0.797308623790741, + "learning_rate": 4.928940534815407e-06, + "loss": 0.5808, + "step": 6301 + }, + { + "epoch": 0.46331421849727983, + "grad_norm": 0.8553985953330994, + "learning_rate": 4.928917723494854e-06, + "loss": 0.5677, + "step": 6302 + }, + { + "epoch": 0.46338773709748565, + "grad_norm": 0.8864274621009827, + "learning_rate": 4.928894908566278e-06, + "loss": 0.5778, + "step": 6303 + }, + { + "epoch": 0.46346125569769153, + "grad_norm": 0.8413373827934265, + "learning_rate": 4.928872090029712e-06, + "loss": 0.5707, + "step": 6304 + }, + { + "epoch": 0.46353477429789736, + "grad_norm": 0.8185926079750061, + "learning_rate": 4.928849267885189e-06, + "loss": 0.5643, + "step": 6305 + }, + { + "epoch": 0.46360829289810324, + "grad_norm": 0.8682550191879272, + "learning_rate": 4.928826442132743e-06, + "loss": 0.5827, + "step": 6306 + }, + { + "epoch": 0.46368181149830906, + "grad_norm": 0.7894120812416077, + "learning_rate": 4.928803612772408e-06, + "loss": 0.52, + "step": 6307 + }, + { + "epoch": 0.46375533009851494, + "grad_norm": 0.8644710779190063, + "learning_rate": 4.92878077980422e-06, + "loss": 0.5592, + "step": 6308 + }, + { + "epoch": 0.46382884869872076, + "grad_norm": 0.842380940914154, + "learning_rate": 4.92875794322821e-06, + "loss": 0.6149, + "step": 6309 + }, + { + "epoch": 0.46390236729892664, + "grad_norm": 0.8315920233726501, + "learning_rate": 4.928735103044413e-06, + "loss": 0.549, + "step": 6310 + }, + { + "epoch": 0.46397588589913247, + "grad_norm": 0.8612805604934692, + "learning_rate": 4.928712259252863e-06, + "loss": 0.5673, + "step": 6311 + }, + { + "epoch": 0.46404940449933835, + "grad_norm": 0.8395251631736755, + "learning_rate": 4.928689411853594e-06, + "loss": 0.5584, + "step": 6312 + }, + { + "epoch": 0.46412292309954417, + "grad_norm": 0.8279275894165039, + "learning_rate": 4.92866656084664e-06, + "loss": 0.6016, + "step": 6313 + }, + { + "epoch": 0.46419644169975005, + "grad_norm": 0.8967268466949463, + "learning_rate": 4.928643706232035e-06, + "loss": 0.5329, + "step": 6314 + }, + { + "epoch": 0.4642699602999559, + "grad_norm": 0.8498127460479736, + "learning_rate": 4.928620848009812e-06, + "loss": 0.577, + "step": 6315 + }, + { + "epoch": 0.46434347890016175, + "grad_norm": 0.8309645056724548, + "learning_rate": 4.928597986180006e-06, + "loss": 0.5656, + "step": 6316 + }, + { + "epoch": 0.4644169975003676, + "grad_norm": 0.8382485508918762, + "learning_rate": 4.9285751207426515e-06, + "loss": 0.5613, + "step": 6317 + }, + { + "epoch": 0.46449051610057346, + "grad_norm": 0.8917602300643921, + "learning_rate": 4.928552251697781e-06, + "loss": 0.6033, + "step": 6318 + }, + { + "epoch": 0.4645640347007793, + "grad_norm": 0.8941105008125305, + "learning_rate": 4.928529379045429e-06, + "loss": 0.5845, + "step": 6319 + }, + { + "epoch": 0.46463755330098516, + "grad_norm": 0.8376110196113586, + "learning_rate": 4.92850650278563e-06, + "loss": 0.5568, + "step": 6320 + }, + { + "epoch": 0.464711071901191, + "grad_norm": 0.9007048010826111, + "learning_rate": 4.928483622918417e-06, + "loss": 0.5764, + "step": 6321 + }, + { + "epoch": 0.46478459050139687, + "grad_norm": 0.8613220453262329, + "learning_rate": 4.928460739443825e-06, + "loss": 0.5606, + "step": 6322 + }, + { + "epoch": 0.4648581091016027, + "grad_norm": 0.8995384573936462, + "learning_rate": 4.928437852361888e-06, + "loss": 0.6029, + "step": 6323 + }, + { + "epoch": 0.46493162770180857, + "grad_norm": 0.793840765953064, + "learning_rate": 4.928414961672639e-06, + "loss": 0.5325, + "step": 6324 + }, + { + "epoch": 0.4650051463020144, + "grad_norm": 0.8150179982185364, + "learning_rate": 4.928392067376112e-06, + "loss": 0.5401, + "step": 6325 + }, + { + "epoch": 0.4650786649022203, + "grad_norm": 0.8624694347381592, + "learning_rate": 4.928369169472342e-06, + "loss": 0.5868, + "step": 6326 + }, + { + "epoch": 0.4651521835024261, + "grad_norm": 0.8325203061103821, + "learning_rate": 4.928346267961363e-06, + "loss": 0.515, + "step": 6327 + }, + { + "epoch": 0.465225702102632, + "grad_norm": 0.8432071805000305, + "learning_rate": 4.928323362843208e-06, + "loss": 0.5258, + "step": 6328 + }, + { + "epoch": 0.4652992207028378, + "grad_norm": 0.8183289766311646, + "learning_rate": 4.928300454117912e-06, + "loss": 0.5599, + "step": 6329 + }, + { + "epoch": 0.4653727393030437, + "grad_norm": 0.8248687386512756, + "learning_rate": 4.928277541785509e-06, + "loss": 0.5268, + "step": 6330 + }, + { + "epoch": 0.4654462579032495, + "grad_norm": 0.8692880868911743, + "learning_rate": 4.928254625846032e-06, + "loss": 0.5314, + "step": 6331 + }, + { + "epoch": 0.4655197765034554, + "grad_norm": 0.8882659077644348, + "learning_rate": 4.928231706299516e-06, + "loss": 0.559, + "step": 6332 + }, + { + "epoch": 0.4655932951036612, + "grad_norm": 0.9010633230209351, + "learning_rate": 4.9282087831459945e-06, + "loss": 0.5544, + "step": 6333 + }, + { + "epoch": 0.4656668137038671, + "grad_norm": 0.8055613040924072, + "learning_rate": 4.9281858563855025e-06, + "loss": 0.5573, + "step": 6334 + }, + { + "epoch": 0.4657403323040729, + "grad_norm": 0.8493824005126953, + "learning_rate": 4.928162926018073e-06, + "loss": 0.5014, + "step": 6335 + }, + { + "epoch": 0.4658138509042788, + "grad_norm": 0.8269669413566589, + "learning_rate": 4.9281399920437405e-06, + "loss": 0.5636, + "step": 6336 + }, + { + "epoch": 0.4658873695044846, + "grad_norm": 0.8373110890388489, + "learning_rate": 4.928117054462538e-06, + "loss": 0.5423, + "step": 6337 + }, + { + "epoch": 0.4659608881046905, + "grad_norm": 0.8657724857330322, + "learning_rate": 4.928094113274502e-06, + "loss": 0.5415, + "step": 6338 + }, + { + "epoch": 0.4660344067048963, + "grad_norm": 0.8220646977424622, + "learning_rate": 4.9280711684796645e-06, + "loss": 0.5315, + "step": 6339 + }, + { + "epoch": 0.4661079253051022, + "grad_norm": 0.8428412079811096, + "learning_rate": 4.92804822007806e-06, + "loss": 0.5692, + "step": 6340 + }, + { + "epoch": 0.466181443905308, + "grad_norm": 0.8191500306129456, + "learning_rate": 4.928025268069723e-06, + "loss": 0.5313, + "step": 6341 + }, + { + "epoch": 0.4662549625055139, + "grad_norm": 0.9220864176750183, + "learning_rate": 4.928002312454688e-06, + "loss": 0.6064, + "step": 6342 + }, + { + "epoch": 0.4663284811057197, + "grad_norm": 0.8947920203208923, + "learning_rate": 4.9279793532329875e-06, + "loss": 0.5521, + "step": 6343 + }, + { + "epoch": 0.4664019997059256, + "grad_norm": 0.8979071378707886, + "learning_rate": 4.927956390404658e-06, + "loss": 0.5727, + "step": 6344 + }, + { + "epoch": 0.46647551830613143, + "grad_norm": 0.8289715051651001, + "learning_rate": 4.92793342396973e-06, + "loss": 0.5314, + "step": 6345 + }, + { + "epoch": 0.4665490369063373, + "grad_norm": 0.8201852440834045, + "learning_rate": 4.927910453928243e-06, + "loss": 0.5214, + "step": 6346 + }, + { + "epoch": 0.46662255550654314, + "grad_norm": 0.8741574883460999, + "learning_rate": 4.927887480280226e-06, + "loss": 0.5686, + "step": 6347 + }, + { + "epoch": 0.466696074106749, + "grad_norm": 0.8925459980964661, + "learning_rate": 4.927864503025715e-06, + "loss": 0.5539, + "step": 6348 + }, + { + "epoch": 0.46676959270695484, + "grad_norm": 0.8653926849365234, + "learning_rate": 4.927841522164745e-06, + "loss": 0.5292, + "step": 6349 + }, + { + "epoch": 0.4668431113071607, + "grad_norm": 0.8731087446212769, + "learning_rate": 4.927818537697349e-06, + "loss": 0.546, + "step": 6350 + }, + { + "epoch": 0.46691662990736654, + "grad_norm": 0.8117690682411194, + "learning_rate": 4.927795549623561e-06, + "loss": 0.5419, + "step": 6351 + }, + { + "epoch": 0.4669901485075724, + "grad_norm": 0.8614317178726196, + "learning_rate": 4.927772557943416e-06, + "loss": 0.5946, + "step": 6352 + }, + { + "epoch": 0.46706366710777825, + "grad_norm": 0.8393071889877319, + "learning_rate": 4.927749562656948e-06, + "loss": 0.5844, + "step": 6353 + }, + { + "epoch": 0.4671371857079841, + "grad_norm": 0.921568751335144, + "learning_rate": 4.927726563764191e-06, + "loss": 0.5936, + "step": 6354 + }, + { + "epoch": 0.46721070430818995, + "grad_norm": 0.8280810117721558, + "learning_rate": 4.927703561265179e-06, + "loss": 0.5185, + "step": 6355 + }, + { + "epoch": 0.46728422290839583, + "grad_norm": 0.8288780450820923, + "learning_rate": 4.927680555159946e-06, + "loss": 0.5524, + "step": 6356 + }, + { + "epoch": 0.46735774150860165, + "grad_norm": 0.849687397480011, + "learning_rate": 4.927657545448527e-06, + "loss": 0.4512, + "step": 6357 + }, + { + "epoch": 0.46743126010880753, + "grad_norm": 0.8603774309158325, + "learning_rate": 4.927634532130955e-06, + "loss": 0.5452, + "step": 6358 + }, + { + "epoch": 0.46750477870901336, + "grad_norm": 0.8073082566261292, + "learning_rate": 4.927611515207266e-06, + "loss": 0.5144, + "step": 6359 + }, + { + "epoch": 0.46757829730921924, + "grad_norm": 0.8576544523239136, + "learning_rate": 4.927588494677492e-06, + "loss": 0.563, + "step": 6360 + }, + { + "epoch": 0.46765181590942506, + "grad_norm": 0.8646279573440552, + "learning_rate": 4.927565470541669e-06, + "loss": 0.5598, + "step": 6361 + }, + { + "epoch": 0.46772533450963094, + "grad_norm": 0.8491887450218201, + "learning_rate": 4.927542442799829e-06, + "loss": 0.5285, + "step": 6362 + }, + { + "epoch": 0.46779885310983677, + "grad_norm": 0.8338109254837036, + "learning_rate": 4.927519411452009e-06, + "loss": 0.5571, + "step": 6363 + }, + { + "epoch": 0.46787237171004264, + "grad_norm": 0.8061416149139404, + "learning_rate": 4.927496376498241e-06, + "loss": 0.5411, + "step": 6364 + }, + { + "epoch": 0.46794589031024847, + "grad_norm": 0.8439573049545288, + "learning_rate": 4.927473337938561e-06, + "loss": 0.5311, + "step": 6365 + }, + { + "epoch": 0.46801940891045435, + "grad_norm": 0.865712583065033, + "learning_rate": 4.927450295773001e-06, + "loss": 0.5461, + "step": 6366 + }, + { + "epoch": 0.4680929275106602, + "grad_norm": 0.8339320421218872, + "learning_rate": 4.9274272500015975e-06, + "loss": 0.5906, + "step": 6367 + }, + { + "epoch": 0.46816644611086605, + "grad_norm": 0.8822190761566162, + "learning_rate": 4.927404200624384e-06, + "loss": 0.5286, + "step": 6368 + }, + { + "epoch": 0.4682399647110719, + "grad_norm": 0.8262592554092407, + "learning_rate": 4.927381147641394e-06, + "loss": 0.5497, + "step": 6369 + }, + { + "epoch": 0.46831348331127776, + "grad_norm": 0.8299071788787842, + "learning_rate": 4.927358091052662e-06, + "loss": 0.543, + "step": 6370 + }, + { + "epoch": 0.4683870019114836, + "grad_norm": 0.8392353057861328, + "learning_rate": 4.927335030858223e-06, + "loss": 0.5684, + "step": 6371 + }, + { + "epoch": 0.46846052051168946, + "grad_norm": 0.8850454092025757, + "learning_rate": 4.92731196705811e-06, + "loss": 0.5834, + "step": 6372 + }, + { + "epoch": 0.4685340391118953, + "grad_norm": 0.9239068627357483, + "learning_rate": 4.927288899652358e-06, + "loss": 0.5819, + "step": 6373 + }, + { + "epoch": 0.46860755771210116, + "grad_norm": 0.827019989490509, + "learning_rate": 4.927265828641002e-06, + "loss": 0.517, + "step": 6374 + }, + { + "epoch": 0.468681076312307, + "grad_norm": 0.8556103706359863, + "learning_rate": 4.927242754024075e-06, + "loss": 0.5665, + "step": 6375 + }, + { + "epoch": 0.46875459491251287, + "grad_norm": 0.8498051762580872, + "learning_rate": 4.927219675801612e-06, + "loss": 0.556, + "step": 6376 + }, + { + "epoch": 0.4688281135127187, + "grad_norm": 0.8164974451065063, + "learning_rate": 4.927196593973647e-06, + "loss": 0.522, + "step": 6377 + }, + { + "epoch": 0.46890163211292457, + "grad_norm": 0.8464735150337219, + "learning_rate": 4.927173508540215e-06, + "loss": 0.5593, + "step": 6378 + }, + { + "epoch": 0.4689751507131304, + "grad_norm": 0.8015729188919067, + "learning_rate": 4.9271504195013485e-06, + "loss": 0.5003, + "step": 6379 + }, + { + "epoch": 0.4690486693133363, + "grad_norm": 0.8070688247680664, + "learning_rate": 4.927127326857084e-06, + "loss": 0.5439, + "step": 6380 + }, + { + "epoch": 0.4691221879135421, + "grad_norm": 0.8433266282081604, + "learning_rate": 4.927104230607453e-06, + "loss": 0.5261, + "step": 6381 + }, + { + "epoch": 0.469195706513748, + "grad_norm": 0.8288190364837646, + "learning_rate": 4.927081130752494e-06, + "loss": 0.5484, + "step": 6382 + }, + { + "epoch": 0.4692692251139538, + "grad_norm": 0.9150087833404541, + "learning_rate": 4.9270580272922375e-06, + "loss": 0.5726, + "step": 6383 + }, + { + "epoch": 0.4693427437141597, + "grad_norm": 0.8967560529708862, + "learning_rate": 4.9270349202267194e-06, + "loss": 0.5325, + "step": 6384 + }, + { + "epoch": 0.46941626231436556, + "grad_norm": 0.8573803901672363, + "learning_rate": 4.927011809555974e-06, + "loss": 0.5742, + "step": 6385 + }, + { + "epoch": 0.4694897809145714, + "grad_norm": 0.8405991196632385, + "learning_rate": 4.926988695280035e-06, + "loss": 0.5359, + "step": 6386 + }, + { + "epoch": 0.46956329951477727, + "grad_norm": 0.8406454920768738, + "learning_rate": 4.926965577398938e-06, + "loss": 0.5552, + "step": 6387 + }, + { + "epoch": 0.4696368181149831, + "grad_norm": 0.8408031463623047, + "learning_rate": 4.926942455912716e-06, + "loss": 0.5252, + "step": 6388 + }, + { + "epoch": 0.46971033671518897, + "grad_norm": 0.8714610934257507, + "learning_rate": 4.926919330821403e-06, + "loss": 0.5749, + "step": 6389 + }, + { + "epoch": 0.4697838553153948, + "grad_norm": 0.827675461769104, + "learning_rate": 4.926896202125036e-06, + "loss": 0.5502, + "step": 6390 + }, + { + "epoch": 0.4698573739156007, + "grad_norm": 0.8420812487602234, + "learning_rate": 4.926873069823647e-06, + "loss": 0.5569, + "step": 6391 + }, + { + "epoch": 0.4699308925158065, + "grad_norm": 0.8393003344535828, + "learning_rate": 4.92684993391727e-06, + "loss": 0.5615, + "step": 6392 + }, + { + "epoch": 0.4700044111160124, + "grad_norm": 0.8873884677886963, + "learning_rate": 4.926826794405941e-06, + "loss": 0.6024, + "step": 6393 + }, + { + "epoch": 0.4700779297162182, + "grad_norm": 0.8952465653419495, + "learning_rate": 4.926803651289694e-06, + "loss": 0.6147, + "step": 6394 + }, + { + "epoch": 0.4701514483164241, + "grad_norm": 0.8305584192276001, + "learning_rate": 4.926780504568563e-06, + "loss": 0.5449, + "step": 6395 + }, + { + "epoch": 0.4702249669166299, + "grad_norm": 0.8583222031593323, + "learning_rate": 4.926757354242583e-06, + "loss": 0.5758, + "step": 6396 + }, + { + "epoch": 0.4702984855168358, + "grad_norm": 0.8581609129905701, + "learning_rate": 4.926734200311786e-06, + "loss": 0.564, + "step": 6397 + }, + { + "epoch": 0.4703720041170416, + "grad_norm": 0.8456708788871765, + "learning_rate": 4.92671104277621e-06, + "loss": 0.5401, + "step": 6398 + }, + { + "epoch": 0.4704455227172475, + "grad_norm": 0.9116605520248413, + "learning_rate": 4.9266878816358864e-06, + "loss": 0.5751, + "step": 6399 + }, + { + "epoch": 0.4705190413174533, + "grad_norm": 0.8772822618484497, + "learning_rate": 4.926664716890851e-06, + "loss": 0.5992, + "step": 6400 + }, + { + "epoch": 0.4705925599176592, + "grad_norm": 0.8839959502220154, + "learning_rate": 4.926641548541139e-06, + "loss": 0.5995, + "step": 6401 + }, + { + "epoch": 0.470666078517865, + "grad_norm": 0.861404299736023, + "learning_rate": 4.926618376586783e-06, + "loss": 0.5698, + "step": 6402 + }, + { + "epoch": 0.4707395971180709, + "grad_norm": 0.8701748847961426, + "learning_rate": 4.9265952010278185e-06, + "loss": 0.4889, + "step": 6403 + }, + { + "epoch": 0.4708131157182767, + "grad_norm": 0.8251132965087891, + "learning_rate": 4.92657202186428e-06, + "loss": 0.5364, + "step": 6404 + }, + { + "epoch": 0.4708866343184826, + "grad_norm": 0.8936349153518677, + "learning_rate": 4.926548839096201e-06, + "loss": 0.5859, + "step": 6405 + }, + { + "epoch": 0.4709601529186884, + "grad_norm": 0.7959588170051575, + "learning_rate": 4.926525652723617e-06, + "loss": 0.5395, + "step": 6406 + }, + { + "epoch": 0.4710336715188943, + "grad_norm": 0.8543835282325745, + "learning_rate": 4.926502462746562e-06, + "loss": 0.603, + "step": 6407 + }, + { + "epoch": 0.4711071901191001, + "grad_norm": 0.8915494680404663, + "learning_rate": 4.92647926916507e-06, + "loss": 0.547, + "step": 6408 + }, + { + "epoch": 0.471180708719306, + "grad_norm": 0.8075950741767883, + "learning_rate": 4.926456071979177e-06, + "loss": 0.5484, + "step": 6409 + }, + { + "epoch": 0.47125422731951183, + "grad_norm": 0.8044339418411255, + "learning_rate": 4.926432871188916e-06, + "loss": 0.5529, + "step": 6410 + }, + { + "epoch": 0.4713277459197177, + "grad_norm": 0.8300646543502808, + "learning_rate": 4.9264096667943205e-06, + "loss": 0.5372, + "step": 6411 + }, + { + "epoch": 0.47140126451992354, + "grad_norm": 0.8380259275436401, + "learning_rate": 4.926386458795428e-06, + "loss": 0.564, + "step": 6412 + }, + { + "epoch": 0.4714747831201294, + "grad_norm": 0.8327149152755737, + "learning_rate": 4.926363247192271e-06, + "loss": 0.561, + "step": 6413 + }, + { + "epoch": 0.47154830172033524, + "grad_norm": 0.8802539110183716, + "learning_rate": 4.926340031984883e-06, + "loss": 0.5241, + "step": 6414 + }, + { + "epoch": 0.4716218203205411, + "grad_norm": 0.838855504989624, + "learning_rate": 4.926316813173301e-06, + "loss": 0.5342, + "step": 6415 + }, + { + "epoch": 0.47169533892074694, + "grad_norm": 0.8218684196472168, + "learning_rate": 4.926293590757558e-06, + "loss": 0.5189, + "step": 6416 + }, + { + "epoch": 0.4717688575209528, + "grad_norm": 0.8250632882118225, + "learning_rate": 4.926270364737689e-06, + "loss": 0.5047, + "step": 6417 + }, + { + "epoch": 0.47184237612115865, + "grad_norm": 0.8065334558486938, + "learning_rate": 4.926247135113728e-06, + "loss": 0.5443, + "step": 6418 + }, + { + "epoch": 0.4719158947213645, + "grad_norm": 0.8522593379020691, + "learning_rate": 4.926223901885709e-06, + "loss": 0.5361, + "step": 6419 + }, + { + "epoch": 0.47198941332157035, + "grad_norm": 0.8261318802833557, + "learning_rate": 4.926200665053668e-06, + "loss": 0.5799, + "step": 6420 + }, + { + "epoch": 0.47206293192177623, + "grad_norm": 0.8981754183769226, + "learning_rate": 4.9261774246176384e-06, + "loss": 0.5607, + "step": 6421 + }, + { + "epoch": 0.47213645052198205, + "grad_norm": 0.9068091511726379, + "learning_rate": 4.926154180577655e-06, + "loss": 0.5695, + "step": 6422 + }, + { + "epoch": 0.47220996912218793, + "grad_norm": 0.8372043967247009, + "learning_rate": 4.926130932933753e-06, + "loss": 0.5416, + "step": 6423 + }, + { + "epoch": 0.47228348772239376, + "grad_norm": 0.8400736451148987, + "learning_rate": 4.926107681685967e-06, + "loss": 0.5543, + "step": 6424 + }, + { + "epoch": 0.47235700632259964, + "grad_norm": 0.8380042314529419, + "learning_rate": 4.9260844268343285e-06, + "loss": 0.5906, + "step": 6425 + }, + { + "epoch": 0.47243052492280546, + "grad_norm": 0.8136682510375977, + "learning_rate": 4.926061168378876e-06, + "loss": 0.5428, + "step": 6426 + }, + { + "epoch": 0.47250404352301134, + "grad_norm": 0.8348698019981384, + "learning_rate": 4.926037906319643e-06, + "loss": 0.5441, + "step": 6427 + }, + { + "epoch": 0.47257756212321717, + "grad_norm": 0.8881003260612488, + "learning_rate": 4.926014640656663e-06, + "loss": 0.6336, + "step": 6428 + }, + { + "epoch": 0.47265108072342304, + "grad_norm": 0.8358798623085022, + "learning_rate": 4.925991371389971e-06, + "loss": 0.5095, + "step": 6429 + }, + { + "epoch": 0.47272459932362887, + "grad_norm": 0.8274900913238525, + "learning_rate": 4.925968098519601e-06, + "loss": 0.5619, + "step": 6430 + }, + { + "epoch": 0.47279811792383475, + "grad_norm": 0.877854585647583, + "learning_rate": 4.925944822045589e-06, + "loss": 0.6018, + "step": 6431 + }, + { + "epoch": 0.4728716365240406, + "grad_norm": 0.8212347626686096, + "learning_rate": 4.925921541967969e-06, + "loss": 0.5458, + "step": 6432 + }, + { + "epoch": 0.47294515512424645, + "grad_norm": 0.8099265694618225, + "learning_rate": 4.925898258286776e-06, + "loss": 0.5038, + "step": 6433 + }, + { + "epoch": 0.4730186737244523, + "grad_norm": 0.8399066925048828, + "learning_rate": 4.925874971002042e-06, + "loss": 0.5059, + "step": 6434 + }, + { + "epoch": 0.47309219232465816, + "grad_norm": 0.8311896324157715, + "learning_rate": 4.9258516801138046e-06, + "loss": 0.5668, + "step": 6435 + }, + { + "epoch": 0.473165710924864, + "grad_norm": 0.9054107666015625, + "learning_rate": 4.925828385622098e-06, + "loss": 0.5623, + "step": 6436 + }, + { + "epoch": 0.47323922952506986, + "grad_norm": 0.859846830368042, + "learning_rate": 4.925805087526955e-06, + "loss": 0.5284, + "step": 6437 + }, + { + "epoch": 0.4733127481252757, + "grad_norm": 0.8571858406066895, + "learning_rate": 4.925781785828412e-06, + "loss": 0.5813, + "step": 6438 + }, + { + "epoch": 0.47338626672548156, + "grad_norm": 0.9071868062019348, + "learning_rate": 4.925758480526503e-06, + "loss": 0.6379, + "step": 6439 + }, + { + "epoch": 0.4734597853256874, + "grad_norm": 0.833840548992157, + "learning_rate": 4.925735171621263e-06, + "loss": 0.5284, + "step": 6440 + }, + { + "epoch": 0.47353330392589327, + "grad_norm": 0.8250616192817688, + "learning_rate": 4.925711859112725e-06, + "loss": 0.5362, + "step": 6441 + }, + { + "epoch": 0.4736068225260991, + "grad_norm": 0.8285875916481018, + "learning_rate": 4.925688543000926e-06, + "loss": 0.534, + "step": 6442 + }, + { + "epoch": 0.47368034112630497, + "grad_norm": 0.8240660429000854, + "learning_rate": 4.925665223285899e-06, + "loss": 0.5674, + "step": 6443 + }, + { + "epoch": 0.4737538597265108, + "grad_norm": 0.9051286578178406, + "learning_rate": 4.925641899967679e-06, + "loss": 0.573, + "step": 6444 + }, + { + "epoch": 0.4738273783267167, + "grad_norm": 0.8639701008796692, + "learning_rate": 4.925618573046302e-06, + "loss": 0.6001, + "step": 6445 + }, + { + "epoch": 0.4739008969269225, + "grad_norm": 0.8598604798316956, + "learning_rate": 4.9255952425218005e-06, + "loss": 0.5388, + "step": 6446 + }, + { + "epoch": 0.4739744155271284, + "grad_norm": 0.8855161070823669, + "learning_rate": 4.925571908394209e-06, + "loss": 0.5656, + "step": 6447 + }, + { + "epoch": 0.4740479341273342, + "grad_norm": 0.8853552937507629, + "learning_rate": 4.925548570663565e-06, + "loss": 0.5928, + "step": 6448 + }, + { + "epoch": 0.4741214527275401, + "grad_norm": 0.8270121812820435, + "learning_rate": 4.9255252293299e-06, + "loss": 0.5475, + "step": 6449 + }, + { + "epoch": 0.4741949713277459, + "grad_norm": 0.8687707781791687, + "learning_rate": 4.925501884393251e-06, + "loss": 0.5714, + "step": 6450 + }, + { + "epoch": 0.4742684899279518, + "grad_norm": 0.8589413166046143, + "learning_rate": 4.925478535853652e-06, + "loss": 0.5632, + "step": 6451 + }, + { + "epoch": 0.4743420085281576, + "grad_norm": 0.8629832863807678, + "learning_rate": 4.925455183711137e-06, + "loss": 0.5559, + "step": 6452 + }, + { + "epoch": 0.4744155271283635, + "grad_norm": 0.845632016658783, + "learning_rate": 4.925431827965741e-06, + "loss": 0.5514, + "step": 6453 + }, + { + "epoch": 0.4744890457285693, + "grad_norm": 0.8532368540763855, + "learning_rate": 4.925408468617499e-06, + "loss": 0.5626, + "step": 6454 + }, + { + "epoch": 0.4745625643287752, + "grad_norm": 0.8266406655311584, + "learning_rate": 4.9253851056664455e-06, + "loss": 0.5629, + "step": 6455 + }, + { + "epoch": 0.474636082928981, + "grad_norm": 0.817279577255249, + "learning_rate": 4.925361739112615e-06, + "loss": 0.5555, + "step": 6456 + }, + { + "epoch": 0.4747096015291869, + "grad_norm": 0.9014142751693726, + "learning_rate": 4.925338368956043e-06, + "loss": 0.5978, + "step": 6457 + }, + { + "epoch": 0.4747831201293927, + "grad_norm": 0.8564525246620178, + "learning_rate": 4.925314995196764e-06, + "loss": 0.5643, + "step": 6458 + }, + { + "epoch": 0.4748566387295986, + "grad_norm": 0.8123669624328613, + "learning_rate": 4.925291617834811e-06, + "loss": 0.5462, + "step": 6459 + }, + { + "epoch": 0.4749301573298044, + "grad_norm": 0.8380401134490967, + "learning_rate": 4.925268236870221e-06, + "loss": 0.5108, + "step": 6460 + }, + { + "epoch": 0.4750036759300103, + "grad_norm": 0.8663331270217896, + "learning_rate": 4.925244852303028e-06, + "loss": 0.5336, + "step": 6461 + }, + { + "epoch": 0.47507719453021613, + "grad_norm": 0.8279498815536499, + "learning_rate": 4.925221464133266e-06, + "loss": 0.572, + "step": 6462 + }, + { + "epoch": 0.475150713130422, + "grad_norm": 0.8568340539932251, + "learning_rate": 4.92519807236097e-06, + "loss": 0.5558, + "step": 6463 + }, + { + "epoch": 0.47522423173062783, + "grad_norm": 0.8506157398223877, + "learning_rate": 4.925174676986177e-06, + "loss": 0.5149, + "step": 6464 + }, + { + "epoch": 0.4752977503308337, + "grad_norm": 0.8454721570014954, + "learning_rate": 4.925151278008918e-06, + "loss": 0.5595, + "step": 6465 + }, + { + "epoch": 0.47537126893103954, + "grad_norm": 0.855911135673523, + "learning_rate": 4.92512787542923e-06, + "loss": 0.5671, + "step": 6466 + }, + { + "epoch": 0.4754447875312454, + "grad_norm": 0.820223867893219, + "learning_rate": 4.925104469247147e-06, + "loss": 0.5707, + "step": 6467 + }, + { + "epoch": 0.47551830613145124, + "grad_norm": 0.8518300652503967, + "learning_rate": 4.925081059462705e-06, + "loss": 0.5366, + "step": 6468 + }, + { + "epoch": 0.4755918247316571, + "grad_norm": 0.830791175365448, + "learning_rate": 4.925057646075937e-06, + "loss": 0.5574, + "step": 6469 + }, + { + "epoch": 0.47566534333186294, + "grad_norm": 0.8944136500358582, + "learning_rate": 4.925034229086879e-06, + "loss": 0.5754, + "step": 6470 + }, + { + "epoch": 0.4757388619320688, + "grad_norm": 0.889289915561676, + "learning_rate": 4.9250108084955665e-06, + "loss": 0.5489, + "step": 6471 + }, + { + "epoch": 0.47581238053227465, + "grad_norm": 0.8413439393043518, + "learning_rate": 4.924987384302031e-06, + "loss": 0.532, + "step": 6472 + }, + { + "epoch": 0.4758858991324805, + "grad_norm": 0.8277692198753357, + "learning_rate": 4.924963956506311e-06, + "loss": 0.5767, + "step": 6473 + }, + { + "epoch": 0.47595941773268635, + "grad_norm": 0.878512978553772, + "learning_rate": 4.92494052510844e-06, + "loss": 0.6023, + "step": 6474 + }, + { + "epoch": 0.47603293633289223, + "grad_norm": 0.85612952709198, + "learning_rate": 4.9249170901084516e-06, + "loss": 0.5923, + "step": 6475 + }, + { + "epoch": 0.47610645493309806, + "grad_norm": 0.8508586883544922, + "learning_rate": 4.9248936515063826e-06, + "loss": 0.594, + "step": 6476 + }, + { + "epoch": 0.47617997353330394, + "grad_norm": 0.8574453592300415, + "learning_rate": 4.924870209302267e-06, + "loss": 0.565, + "step": 6477 + }, + { + "epoch": 0.47625349213350976, + "grad_norm": 0.8557705283164978, + "learning_rate": 4.924846763496139e-06, + "loss": 0.5453, + "step": 6478 + }, + { + "epoch": 0.47632701073371564, + "grad_norm": 0.9119986891746521, + "learning_rate": 4.924823314088033e-06, + "loss": 0.5497, + "step": 6479 + }, + { + "epoch": 0.47640052933392146, + "grad_norm": 0.8576581478118896, + "learning_rate": 4.924799861077986e-06, + "loss": 0.5539, + "step": 6480 + }, + { + "epoch": 0.47647404793412734, + "grad_norm": 0.8579146862030029, + "learning_rate": 4.924776404466032e-06, + "loss": 0.5468, + "step": 6481 + }, + { + "epoch": 0.47654756653433317, + "grad_norm": 0.8968143463134766, + "learning_rate": 4.924752944252204e-06, + "loss": 0.5959, + "step": 6482 + }, + { + "epoch": 0.47662108513453905, + "grad_norm": 0.815956175327301, + "learning_rate": 4.9247294804365394e-06, + "loss": 0.5443, + "step": 6483 + }, + { + "epoch": 0.47669460373474487, + "grad_norm": 0.8145313262939453, + "learning_rate": 4.924706013019072e-06, + "loss": 0.5558, + "step": 6484 + }, + { + "epoch": 0.47676812233495075, + "grad_norm": 0.7846149206161499, + "learning_rate": 4.924682541999836e-06, + "loss": 0.5359, + "step": 6485 + }, + { + "epoch": 0.4768416409351566, + "grad_norm": 0.8311910033226013, + "learning_rate": 4.924659067378866e-06, + "loss": 0.5339, + "step": 6486 + }, + { + "epoch": 0.47691515953536245, + "grad_norm": 0.8683142066001892, + "learning_rate": 4.924635589156199e-06, + "loss": 0.5781, + "step": 6487 + }, + { + "epoch": 0.4769886781355683, + "grad_norm": 0.8020312190055847, + "learning_rate": 4.924612107331869e-06, + "loss": 0.5373, + "step": 6488 + }, + { + "epoch": 0.47706219673577416, + "grad_norm": 0.8637334704399109, + "learning_rate": 4.924588621905909e-06, + "loss": 0.5395, + "step": 6489 + }, + { + "epoch": 0.47713571533598, + "grad_norm": 0.8837231397628784, + "learning_rate": 4.924565132878356e-06, + "loss": 0.6028, + "step": 6490 + }, + { + "epoch": 0.47720923393618586, + "grad_norm": 0.839522659778595, + "learning_rate": 4.924541640249245e-06, + "loss": 0.527, + "step": 6491 + }, + { + "epoch": 0.4772827525363917, + "grad_norm": 0.8449727296829224, + "learning_rate": 4.924518144018609e-06, + "loss": 0.5435, + "step": 6492 + }, + { + "epoch": 0.47735627113659757, + "grad_norm": 0.8622677326202393, + "learning_rate": 4.924494644186485e-06, + "loss": 0.5704, + "step": 6493 + }, + { + "epoch": 0.4774297897368034, + "grad_norm": 0.8420141935348511, + "learning_rate": 4.924471140752906e-06, + "loss": 0.5489, + "step": 6494 + }, + { + "epoch": 0.47750330833700927, + "grad_norm": 0.8037816882133484, + "learning_rate": 4.924447633717909e-06, + "loss": 0.538, + "step": 6495 + }, + { + "epoch": 0.4775768269372151, + "grad_norm": 0.8026167750358582, + "learning_rate": 4.924424123081527e-06, + "loss": 0.5693, + "step": 6496 + }, + { + "epoch": 0.477650345537421, + "grad_norm": 0.8964582085609436, + "learning_rate": 4.924400608843796e-06, + "loss": 0.5495, + "step": 6497 + }, + { + "epoch": 0.4777238641376268, + "grad_norm": 0.8222034573554993, + "learning_rate": 4.924377091004751e-06, + "loss": 0.5544, + "step": 6498 + }, + { + "epoch": 0.4777973827378327, + "grad_norm": 0.8451371788978577, + "learning_rate": 4.924353569564427e-06, + "loss": 0.5648, + "step": 6499 + }, + { + "epoch": 0.4778709013380385, + "grad_norm": 0.8243440985679626, + "learning_rate": 4.924330044522857e-06, + "loss": 0.4935, + "step": 6500 + }, + { + "epoch": 0.4779444199382444, + "grad_norm": 0.8675200939178467, + "learning_rate": 4.924306515880078e-06, + "loss": 0.516, + "step": 6501 + }, + { + "epoch": 0.4780179385384502, + "grad_norm": 0.8378247618675232, + "learning_rate": 4.9242829836361245e-06, + "loss": 0.5807, + "step": 6502 + }, + { + "epoch": 0.4780914571386561, + "grad_norm": 0.8289957642555237, + "learning_rate": 4.924259447791032e-06, + "loss": 0.5467, + "step": 6503 + }, + { + "epoch": 0.4781649757388619, + "grad_norm": 0.8351048827171326, + "learning_rate": 4.9242359083448335e-06, + "loss": 0.5332, + "step": 6504 + }, + { + "epoch": 0.4782384943390678, + "grad_norm": 0.8651754856109619, + "learning_rate": 4.924212365297566e-06, + "loss": 0.5442, + "step": 6505 + }, + { + "epoch": 0.4783120129392736, + "grad_norm": 0.8301790356636047, + "learning_rate": 4.924188818649264e-06, + "loss": 0.5094, + "step": 6506 + }, + { + "epoch": 0.4783855315394795, + "grad_norm": 0.8464133739471436, + "learning_rate": 4.924165268399963e-06, + "loss": 0.529, + "step": 6507 + }, + { + "epoch": 0.4784590501396853, + "grad_norm": 0.8412655591964722, + "learning_rate": 4.924141714549696e-06, + "loss": 0.5185, + "step": 6508 + }, + { + "epoch": 0.4785325687398912, + "grad_norm": 0.8394877314567566, + "learning_rate": 4.9241181570984995e-06, + "loss": 0.545, + "step": 6509 + }, + { + "epoch": 0.478606087340097, + "grad_norm": 0.8850834965705872, + "learning_rate": 4.924094596046408e-06, + "loss": 0.5776, + "step": 6510 + }, + { + "epoch": 0.4786796059403029, + "grad_norm": 0.8687035441398621, + "learning_rate": 4.9240710313934574e-06, + "loss": 0.5998, + "step": 6511 + }, + { + "epoch": 0.4787531245405087, + "grad_norm": 0.8346076011657715, + "learning_rate": 4.924047463139681e-06, + "loss": 0.5459, + "step": 6512 + }, + { + "epoch": 0.4788266431407146, + "grad_norm": 0.8432566523551941, + "learning_rate": 4.9240238912851165e-06, + "loss": 0.5477, + "step": 6513 + }, + { + "epoch": 0.4789001617409204, + "grad_norm": 0.8937879800796509, + "learning_rate": 4.924000315829797e-06, + "loss": 0.5836, + "step": 6514 + }, + { + "epoch": 0.4789736803411263, + "grad_norm": 0.9063152074813843, + "learning_rate": 4.923976736773757e-06, + "loss": 0.6012, + "step": 6515 + }, + { + "epoch": 0.47904719894133213, + "grad_norm": 0.8027490973472595, + "learning_rate": 4.923953154117032e-06, + "loss": 0.56, + "step": 6516 + }, + { + "epoch": 0.479120717541538, + "grad_norm": 0.850821852684021, + "learning_rate": 4.923929567859658e-06, + "loss": 0.537, + "step": 6517 + }, + { + "epoch": 0.47919423614174383, + "grad_norm": 0.8668879866600037, + "learning_rate": 4.9239059780016685e-06, + "loss": 0.5666, + "step": 6518 + }, + { + "epoch": 0.4792677547419497, + "grad_norm": 0.8573931455612183, + "learning_rate": 4.9238823845431e-06, + "loss": 0.5579, + "step": 6519 + }, + { + "epoch": 0.47934127334215554, + "grad_norm": 0.8029609322547913, + "learning_rate": 4.923858787483986e-06, + "loss": 0.4863, + "step": 6520 + }, + { + "epoch": 0.4794147919423614, + "grad_norm": 0.8688554763793945, + "learning_rate": 4.9238351868243635e-06, + "loss": 0.5506, + "step": 6521 + }, + { + "epoch": 0.47948831054256724, + "grad_norm": 0.8082684278488159, + "learning_rate": 4.923811582564267e-06, + "loss": 0.5356, + "step": 6522 + }, + { + "epoch": 0.4795618291427731, + "grad_norm": 0.869727373123169, + "learning_rate": 4.923787974703731e-06, + "loss": 0.5802, + "step": 6523 + }, + { + "epoch": 0.479635347742979, + "grad_norm": 0.8698345422744751, + "learning_rate": 4.92376436324279e-06, + "loss": 0.5448, + "step": 6524 + }, + { + "epoch": 0.4797088663431848, + "grad_norm": 0.856268584728241, + "learning_rate": 4.923740748181479e-06, + "loss": 0.5543, + "step": 6525 + }, + { + "epoch": 0.4797823849433907, + "grad_norm": 0.8798048496246338, + "learning_rate": 4.923717129519836e-06, + "loss": 0.5828, + "step": 6526 + }, + { + "epoch": 0.47985590354359653, + "grad_norm": 0.8574510216712952, + "learning_rate": 4.923693507257892e-06, + "loss": 0.5454, + "step": 6527 + }, + { + "epoch": 0.4799294221438024, + "grad_norm": 0.840950608253479, + "learning_rate": 4.923669881395685e-06, + "loss": 0.5431, + "step": 6528 + }, + { + "epoch": 0.48000294074400823, + "grad_norm": 0.8688692450523376, + "learning_rate": 4.923646251933249e-06, + "loss": 0.5915, + "step": 6529 + }, + { + "epoch": 0.4800764593442141, + "grad_norm": 0.8351396918296814, + "learning_rate": 4.92362261887062e-06, + "loss": 0.5699, + "step": 6530 + }, + { + "epoch": 0.48014997794441994, + "grad_norm": 0.8452564477920532, + "learning_rate": 4.92359898220783e-06, + "loss": 0.5665, + "step": 6531 + }, + { + "epoch": 0.4802234965446258, + "grad_norm": 0.8749881982803345, + "learning_rate": 4.923575341944919e-06, + "loss": 0.5925, + "step": 6532 + }, + { + "epoch": 0.48029701514483164, + "grad_norm": 0.8339272141456604, + "learning_rate": 4.923551698081918e-06, + "loss": 0.5709, + "step": 6533 + }, + { + "epoch": 0.4803705337450375, + "grad_norm": 0.856990396976471, + "learning_rate": 4.923528050618864e-06, + "loss": 0.5849, + "step": 6534 + }, + { + "epoch": 0.48044405234524334, + "grad_norm": 0.8535630702972412, + "learning_rate": 4.923504399555792e-06, + "loss": 0.567, + "step": 6535 + }, + { + "epoch": 0.4805175709454492, + "grad_norm": 0.8195483684539795, + "learning_rate": 4.923480744892736e-06, + "loss": 0.5505, + "step": 6536 + }, + { + "epoch": 0.48059108954565505, + "grad_norm": 0.8842929005622864, + "learning_rate": 4.9234570866297325e-06, + "loss": 0.5427, + "step": 6537 + }, + { + "epoch": 0.4806646081458609, + "grad_norm": 0.8654270172119141, + "learning_rate": 4.923433424766817e-06, + "loss": 0.5503, + "step": 6538 + }, + { + "epoch": 0.48073812674606675, + "grad_norm": 0.8887842297554016, + "learning_rate": 4.9234097593040235e-06, + "loss": 0.5283, + "step": 6539 + }, + { + "epoch": 0.48081164534627263, + "grad_norm": 0.8742406964302063, + "learning_rate": 4.923386090241387e-06, + "loss": 0.5897, + "step": 6540 + }, + { + "epoch": 0.48088516394647846, + "grad_norm": 0.8705165982246399, + "learning_rate": 4.923362417578944e-06, + "loss": 0.6062, + "step": 6541 + }, + { + "epoch": 0.48095868254668434, + "grad_norm": 0.8421162962913513, + "learning_rate": 4.923338741316727e-06, + "loss": 0.5083, + "step": 6542 + }, + { + "epoch": 0.48103220114689016, + "grad_norm": 0.8643703460693359, + "learning_rate": 4.923315061454776e-06, + "loss": 0.5202, + "step": 6543 + }, + { + "epoch": 0.48110571974709604, + "grad_norm": 0.8531194925308228, + "learning_rate": 4.923291377993121e-06, + "loss": 0.5746, + "step": 6544 + }, + { + "epoch": 0.48117923834730186, + "grad_norm": 0.8185316324234009, + "learning_rate": 4.9232676909318e-06, + "loss": 0.4939, + "step": 6545 + }, + { + "epoch": 0.48125275694750774, + "grad_norm": 0.8338977694511414, + "learning_rate": 4.923244000270847e-06, + "loss": 0.5603, + "step": 6546 + }, + { + "epoch": 0.48132627554771357, + "grad_norm": 0.8202105164527893, + "learning_rate": 4.9232203060102986e-06, + "loss": 0.559, + "step": 6547 + }, + { + "epoch": 0.48139979414791945, + "grad_norm": 0.8209802508354187, + "learning_rate": 4.923196608150188e-06, + "loss": 0.5047, + "step": 6548 + }, + { + "epoch": 0.48147331274812527, + "grad_norm": 0.8535754084587097, + "learning_rate": 4.923172906690552e-06, + "loss": 0.5638, + "step": 6549 + }, + { + "epoch": 0.48154683134833115, + "grad_norm": 0.8452247977256775, + "learning_rate": 4.923149201631426e-06, + "loss": 0.5121, + "step": 6550 + }, + { + "epoch": 0.481620349948537, + "grad_norm": 0.9037220478057861, + "learning_rate": 4.9231254929728445e-06, + "loss": 0.6198, + "step": 6551 + }, + { + "epoch": 0.48169386854874285, + "grad_norm": 0.8397136926651001, + "learning_rate": 4.923101780714842e-06, + "loss": 0.5379, + "step": 6552 + }, + { + "epoch": 0.4817673871489487, + "grad_norm": 0.8887553811073303, + "learning_rate": 4.923078064857455e-06, + "loss": 0.5706, + "step": 6553 + }, + { + "epoch": 0.48184090574915456, + "grad_norm": 0.9137533903121948, + "learning_rate": 4.923054345400718e-06, + "loss": 0.553, + "step": 6554 + }, + { + "epoch": 0.4819144243493604, + "grad_norm": 0.8268388509750366, + "learning_rate": 4.923030622344667e-06, + "loss": 0.5055, + "step": 6555 + }, + { + "epoch": 0.48198794294956626, + "grad_norm": 0.8468559384346008, + "learning_rate": 4.923006895689336e-06, + "loss": 0.5616, + "step": 6556 + }, + { + "epoch": 0.4820614615497721, + "grad_norm": 0.8497968912124634, + "learning_rate": 4.922983165434761e-06, + "loss": 0.5477, + "step": 6557 + }, + { + "epoch": 0.48213498014997797, + "grad_norm": 0.8032820224761963, + "learning_rate": 4.922959431580977e-06, + "loss": 0.5332, + "step": 6558 + }, + { + "epoch": 0.4822084987501838, + "grad_norm": 0.901915967464447, + "learning_rate": 4.92293569412802e-06, + "loss": 0.604, + "step": 6559 + }, + { + "epoch": 0.48228201735038967, + "grad_norm": 0.9060603976249695, + "learning_rate": 4.922911953075924e-06, + "loss": 0.5403, + "step": 6560 + }, + { + "epoch": 0.4823555359505955, + "grad_norm": 0.8322463631629944, + "learning_rate": 4.9228882084247255e-06, + "loss": 0.5248, + "step": 6561 + }, + { + "epoch": 0.4824290545508014, + "grad_norm": 0.8563789129257202, + "learning_rate": 4.922864460174459e-06, + "loss": 0.5533, + "step": 6562 + }, + { + "epoch": 0.4825025731510072, + "grad_norm": 0.8604317903518677, + "learning_rate": 4.92284070832516e-06, + "loss": 0.5806, + "step": 6563 + }, + { + "epoch": 0.4825760917512131, + "grad_norm": 0.835394561290741, + "learning_rate": 4.922816952876865e-06, + "loss": 0.549, + "step": 6564 + }, + { + "epoch": 0.4826496103514189, + "grad_norm": 0.8600257635116577, + "learning_rate": 4.922793193829607e-06, + "loss": 0.5936, + "step": 6565 + }, + { + "epoch": 0.4827231289516248, + "grad_norm": 0.8561573028564453, + "learning_rate": 4.922769431183423e-06, + "loss": 0.5609, + "step": 6566 + }, + { + "epoch": 0.4827966475518306, + "grad_norm": 0.820091962814331, + "learning_rate": 4.9227456649383464e-06, + "loss": 0.5009, + "step": 6567 + }, + { + "epoch": 0.4828701661520365, + "grad_norm": 0.904185950756073, + "learning_rate": 4.922721895094414e-06, + "loss": 0.5527, + "step": 6568 + }, + { + "epoch": 0.4829436847522423, + "grad_norm": 0.8627462387084961, + "learning_rate": 4.922698121651662e-06, + "loss": 0.5261, + "step": 6569 + }, + { + "epoch": 0.4830172033524482, + "grad_norm": 0.8347588181495667, + "learning_rate": 4.922674344610124e-06, + "loss": 0.5659, + "step": 6570 + }, + { + "epoch": 0.483090721952654, + "grad_norm": 0.8268899917602539, + "learning_rate": 4.9226505639698364e-06, + "loss": 0.5754, + "step": 6571 + }, + { + "epoch": 0.4831642405528599, + "grad_norm": 0.8733866214752197, + "learning_rate": 4.922626779730833e-06, + "loss": 0.569, + "step": 6572 + }, + { + "epoch": 0.4832377591530657, + "grad_norm": 0.833548367023468, + "learning_rate": 4.922602991893151e-06, + "loss": 0.5894, + "step": 6573 + }, + { + "epoch": 0.4833112777532716, + "grad_norm": 0.802757203578949, + "learning_rate": 4.922579200456824e-06, + "loss": 0.5226, + "step": 6574 + }, + { + "epoch": 0.4833847963534774, + "grad_norm": 0.8931647539138794, + "learning_rate": 4.922555405421889e-06, + "loss": 0.5324, + "step": 6575 + }, + { + "epoch": 0.4834583149536833, + "grad_norm": 0.8411686420440674, + "learning_rate": 4.922531606788381e-06, + "loss": 0.5358, + "step": 6576 + }, + { + "epoch": 0.4835318335538891, + "grad_norm": 0.8919180035591125, + "learning_rate": 4.9225078045563344e-06, + "loss": 0.5513, + "step": 6577 + }, + { + "epoch": 0.483605352154095, + "grad_norm": 0.8305515050888062, + "learning_rate": 4.922483998725786e-06, + "loss": 0.5489, + "step": 6578 + }, + { + "epoch": 0.4836788707543008, + "grad_norm": 0.8707249760627747, + "learning_rate": 4.922460189296769e-06, + "loss": 0.5556, + "step": 6579 + }, + { + "epoch": 0.4837523893545067, + "grad_norm": 0.8664899468421936, + "learning_rate": 4.92243637626932e-06, + "loss": 0.5428, + "step": 6580 + }, + { + "epoch": 0.48382590795471253, + "grad_norm": 0.8570267558097839, + "learning_rate": 4.922412559643475e-06, + "loss": 0.5893, + "step": 6581 + }, + { + "epoch": 0.4838994265549184, + "grad_norm": 0.8527770042419434, + "learning_rate": 4.922388739419269e-06, + "loss": 0.5846, + "step": 6582 + }, + { + "epoch": 0.48397294515512423, + "grad_norm": 0.8495640158653259, + "learning_rate": 4.922364915596737e-06, + "loss": 0.5558, + "step": 6583 + }, + { + "epoch": 0.4840464637553301, + "grad_norm": 0.8811667561531067, + "learning_rate": 4.922341088175915e-06, + "loss": 0.5257, + "step": 6584 + }, + { + "epoch": 0.48411998235553594, + "grad_norm": 0.8519558906555176, + "learning_rate": 4.922317257156837e-06, + "loss": 0.5217, + "step": 6585 + }, + { + "epoch": 0.4841935009557418, + "grad_norm": 0.8984209895133972, + "learning_rate": 4.922293422539539e-06, + "loss": 0.5839, + "step": 6586 + }, + { + "epoch": 0.48426701955594764, + "grad_norm": 0.8316890597343445, + "learning_rate": 4.922269584324058e-06, + "loss": 0.554, + "step": 6587 + }, + { + "epoch": 0.4843405381561535, + "grad_norm": 0.8461958169937134, + "learning_rate": 4.922245742510427e-06, + "loss": 0.6152, + "step": 6588 + }, + { + "epoch": 0.48441405675635935, + "grad_norm": 0.7990990877151489, + "learning_rate": 4.922221897098684e-06, + "loss": 0.5294, + "step": 6589 + }, + { + "epoch": 0.4844875753565652, + "grad_norm": 0.8434154987335205, + "learning_rate": 4.922198048088862e-06, + "loss": 0.4771, + "step": 6590 + }, + { + "epoch": 0.48456109395677105, + "grad_norm": 0.8346431255340576, + "learning_rate": 4.9221741954809976e-06, + "loss": 0.5536, + "step": 6591 + }, + { + "epoch": 0.48463461255697693, + "grad_norm": 0.844813346862793, + "learning_rate": 4.922150339275127e-06, + "loss": 0.6031, + "step": 6592 + }, + { + "epoch": 0.48470813115718275, + "grad_norm": 0.8481226563453674, + "learning_rate": 4.922126479471283e-06, + "loss": 0.5606, + "step": 6593 + }, + { + "epoch": 0.48478164975738863, + "grad_norm": 0.8373385667800903, + "learning_rate": 4.922102616069504e-06, + "loss": 0.5369, + "step": 6594 + }, + { + "epoch": 0.48485516835759446, + "grad_norm": 0.8723721504211426, + "learning_rate": 4.922078749069824e-06, + "loss": 0.5565, + "step": 6595 + }, + { + "epoch": 0.48492868695780034, + "grad_norm": 0.7891162633895874, + "learning_rate": 4.922054878472279e-06, + "loss": 0.5029, + "step": 6596 + }, + { + "epoch": 0.48500220555800616, + "grad_norm": 0.8373776078224182, + "learning_rate": 4.922031004276903e-06, + "loss": 0.5971, + "step": 6597 + }, + { + "epoch": 0.48507572415821204, + "grad_norm": 0.8476256132125854, + "learning_rate": 4.922007126483733e-06, + "loss": 0.5933, + "step": 6598 + }, + { + "epoch": 0.48514924275841786, + "grad_norm": 0.8782491683959961, + "learning_rate": 4.921983245092805e-06, + "loss": 0.5773, + "step": 6599 + }, + { + "epoch": 0.48522276135862374, + "grad_norm": 0.8695361614227295, + "learning_rate": 4.9219593601041525e-06, + "loss": 0.5282, + "step": 6600 + }, + { + "epoch": 0.48529627995882957, + "grad_norm": 0.8295812010765076, + "learning_rate": 4.921935471517812e-06, + "loss": 0.5391, + "step": 6601 + }, + { + "epoch": 0.48536979855903545, + "grad_norm": 0.8439521193504333, + "learning_rate": 4.921911579333819e-06, + "loss": 0.5827, + "step": 6602 + }, + { + "epoch": 0.48544331715924127, + "grad_norm": 0.8546578884124756, + "learning_rate": 4.921887683552209e-06, + "loss": 0.5663, + "step": 6603 + }, + { + "epoch": 0.48551683575944715, + "grad_norm": 0.8435836434364319, + "learning_rate": 4.921863784173018e-06, + "loss": 0.5426, + "step": 6604 + }, + { + "epoch": 0.485590354359653, + "grad_norm": 0.8080605268478394, + "learning_rate": 4.921839881196281e-06, + "loss": 0.5263, + "step": 6605 + }, + { + "epoch": 0.48566387295985886, + "grad_norm": 0.8710407018661499, + "learning_rate": 4.921815974622033e-06, + "loss": 0.528, + "step": 6606 + }, + { + "epoch": 0.4857373915600647, + "grad_norm": 0.8446439504623413, + "learning_rate": 4.921792064450309e-06, + "loss": 0.5389, + "step": 6607 + }, + { + "epoch": 0.48581091016027056, + "grad_norm": 0.8504392504692078, + "learning_rate": 4.921768150681147e-06, + "loss": 0.5505, + "step": 6608 + }, + { + "epoch": 0.4858844287604764, + "grad_norm": 0.8891419172286987, + "learning_rate": 4.92174423331458e-06, + "loss": 0.5455, + "step": 6609 + }, + { + "epoch": 0.48595794736068226, + "grad_norm": 0.8639221787452698, + "learning_rate": 4.921720312350646e-06, + "loss": 0.5351, + "step": 6610 + }, + { + "epoch": 0.4860314659608881, + "grad_norm": 0.8857309222221375, + "learning_rate": 4.921696387789378e-06, + "loss": 0.584, + "step": 6611 + }, + { + "epoch": 0.48610498456109397, + "grad_norm": 0.8563494682312012, + "learning_rate": 4.921672459630813e-06, + "loss": 0.5482, + "step": 6612 + }, + { + "epoch": 0.4861785031612998, + "grad_norm": 0.8302930593490601, + "learning_rate": 4.921648527874986e-06, + "loss": 0.5652, + "step": 6613 + }, + { + "epoch": 0.48625202176150567, + "grad_norm": 0.8397381901741028, + "learning_rate": 4.921624592521932e-06, + "loss": 0.5351, + "step": 6614 + }, + { + "epoch": 0.4863255403617115, + "grad_norm": 0.8332838416099548, + "learning_rate": 4.921600653571689e-06, + "loss": 0.5313, + "step": 6615 + }, + { + "epoch": 0.4863990589619174, + "grad_norm": 0.844402551651001, + "learning_rate": 4.92157671102429e-06, + "loss": 0.5192, + "step": 6616 + }, + { + "epoch": 0.4864725775621232, + "grad_norm": 0.820965588092804, + "learning_rate": 4.9215527648797705e-06, + "loss": 0.5324, + "step": 6617 + }, + { + "epoch": 0.4865460961623291, + "grad_norm": 0.8403169512748718, + "learning_rate": 4.921528815138168e-06, + "loss": 0.5842, + "step": 6618 + }, + { + "epoch": 0.4866196147625349, + "grad_norm": 0.8608630895614624, + "learning_rate": 4.921504861799516e-06, + "loss": 0.5132, + "step": 6619 + }, + { + "epoch": 0.4866931333627408, + "grad_norm": 0.8082786202430725, + "learning_rate": 4.921480904863852e-06, + "loss": 0.5305, + "step": 6620 + }, + { + "epoch": 0.4867666519629466, + "grad_norm": 0.8352646231651306, + "learning_rate": 4.921456944331211e-06, + "loss": 0.5643, + "step": 6621 + }, + { + "epoch": 0.4868401705631525, + "grad_norm": 0.8651692867279053, + "learning_rate": 4.921432980201628e-06, + "loss": 0.5231, + "step": 6622 + }, + { + "epoch": 0.4869136891633583, + "grad_norm": 0.872032642364502, + "learning_rate": 4.921409012475139e-06, + "loss": 0.5637, + "step": 6623 + }, + { + "epoch": 0.4869872077635642, + "grad_norm": 0.8115562796592712, + "learning_rate": 4.921385041151779e-06, + "loss": 0.5501, + "step": 6624 + }, + { + "epoch": 0.48706072636377, + "grad_norm": 0.9256522059440613, + "learning_rate": 4.921361066231585e-06, + "loss": 0.5915, + "step": 6625 + }, + { + "epoch": 0.4871342449639759, + "grad_norm": 0.8453470468521118, + "learning_rate": 4.92133708771459e-06, + "loss": 0.5731, + "step": 6626 + }, + { + "epoch": 0.4872077635641817, + "grad_norm": 0.8067240715026855, + "learning_rate": 4.921313105600834e-06, + "loss": 0.5364, + "step": 6627 + }, + { + "epoch": 0.4872812821643876, + "grad_norm": 0.8393539786338806, + "learning_rate": 4.9212891198903475e-06, + "loss": 0.5725, + "step": 6628 + }, + { + "epoch": 0.4873548007645934, + "grad_norm": 0.8953565955162048, + "learning_rate": 4.92126513058317e-06, + "loss": 0.55, + "step": 6629 + }, + { + "epoch": 0.4874283193647993, + "grad_norm": 0.8553810715675354, + "learning_rate": 4.921241137679335e-06, + "loss": 0.5735, + "step": 6630 + }, + { + "epoch": 0.4875018379650051, + "grad_norm": 0.8760797381401062, + "learning_rate": 4.921217141178879e-06, + "loss": 0.5528, + "step": 6631 + }, + { + "epoch": 0.487575356565211, + "grad_norm": 0.8292677998542786, + "learning_rate": 4.921193141081838e-06, + "loss": 0.5727, + "step": 6632 + }, + { + "epoch": 0.48764887516541683, + "grad_norm": 0.905945360660553, + "learning_rate": 4.921169137388247e-06, + "loss": 0.5302, + "step": 6633 + }, + { + "epoch": 0.4877223937656227, + "grad_norm": 0.8241288661956787, + "learning_rate": 4.921145130098141e-06, + "loss": 0.5264, + "step": 6634 + }, + { + "epoch": 0.48779591236582853, + "grad_norm": 0.8457471132278442, + "learning_rate": 4.921121119211557e-06, + "loss": 0.5586, + "step": 6635 + }, + { + "epoch": 0.4878694309660344, + "grad_norm": 0.8821233510971069, + "learning_rate": 4.92109710472853e-06, + "loss": 0.5558, + "step": 6636 + }, + { + "epoch": 0.48794294956624024, + "grad_norm": 0.8313044905662537, + "learning_rate": 4.921073086649096e-06, + "loss": 0.5823, + "step": 6637 + }, + { + "epoch": 0.4880164681664461, + "grad_norm": 0.8798543214797974, + "learning_rate": 4.921049064973291e-06, + "loss": 0.5482, + "step": 6638 + }, + { + "epoch": 0.48808998676665194, + "grad_norm": 0.8411762118339539, + "learning_rate": 4.921025039701151e-06, + "loss": 0.5356, + "step": 6639 + }, + { + "epoch": 0.4881635053668578, + "grad_norm": 0.8425402641296387, + "learning_rate": 4.921001010832709e-06, + "loss": 0.5605, + "step": 6640 + }, + { + "epoch": 0.48823702396706364, + "grad_norm": 0.8221021294593811, + "learning_rate": 4.920976978368003e-06, + "loss": 0.5327, + "step": 6641 + }, + { + "epoch": 0.4883105425672695, + "grad_norm": 0.9163788557052612, + "learning_rate": 4.920952942307069e-06, + "loss": 0.5321, + "step": 6642 + }, + { + "epoch": 0.48838406116747535, + "grad_norm": 0.8163363337516785, + "learning_rate": 4.920928902649941e-06, + "loss": 0.5357, + "step": 6643 + }, + { + "epoch": 0.4884575797676812, + "grad_norm": 0.8536263108253479, + "learning_rate": 4.920904859396656e-06, + "loss": 0.5645, + "step": 6644 + }, + { + "epoch": 0.48853109836788705, + "grad_norm": 0.8199924230575562, + "learning_rate": 4.920880812547249e-06, + "loss": 0.5433, + "step": 6645 + }, + { + "epoch": 0.48860461696809293, + "grad_norm": 0.9045820832252502, + "learning_rate": 4.920856762101758e-06, + "loss": 0.5908, + "step": 6646 + }, + { + "epoch": 0.48867813556829875, + "grad_norm": 0.8103812336921692, + "learning_rate": 4.920832708060216e-06, + "loss": 0.5394, + "step": 6647 + }, + { + "epoch": 0.48875165416850463, + "grad_norm": 0.8744783401489258, + "learning_rate": 4.920808650422658e-06, + "loss": 0.5593, + "step": 6648 + }, + { + "epoch": 0.48882517276871046, + "grad_norm": 0.8173433542251587, + "learning_rate": 4.920784589189122e-06, + "loss": 0.4946, + "step": 6649 + }, + { + "epoch": 0.48889869136891634, + "grad_norm": 0.7888323664665222, + "learning_rate": 4.920760524359644e-06, + "loss": 0.5221, + "step": 6650 + }, + { + "epoch": 0.48897220996912216, + "grad_norm": 0.8551151156425476, + "learning_rate": 4.920736455934259e-06, + "loss": 0.5406, + "step": 6651 + }, + { + "epoch": 0.48904572856932804, + "grad_norm": 0.866715133190155, + "learning_rate": 4.9207123839130014e-06, + "loss": 0.583, + "step": 6652 + }, + { + "epoch": 0.48911924716953387, + "grad_norm": 0.8586180210113525, + "learning_rate": 4.920688308295908e-06, + "loss": 0.6038, + "step": 6653 + }, + { + "epoch": 0.48919276576973975, + "grad_norm": 0.8794726729393005, + "learning_rate": 4.920664229083015e-06, + "loss": 0.5621, + "step": 6654 + }, + { + "epoch": 0.48926628436994557, + "grad_norm": 0.8598440289497375, + "learning_rate": 4.920640146274359e-06, + "loss": 0.5018, + "step": 6655 + }, + { + "epoch": 0.48933980297015145, + "grad_norm": 0.8699421882629395, + "learning_rate": 4.920616059869974e-06, + "loss": 0.5476, + "step": 6656 + }, + { + "epoch": 0.4894133215703573, + "grad_norm": 0.834108829498291, + "learning_rate": 4.920591969869896e-06, + "loss": 0.5788, + "step": 6657 + }, + { + "epoch": 0.48948684017056315, + "grad_norm": 0.783550500869751, + "learning_rate": 4.920567876274161e-06, + "loss": 0.5301, + "step": 6658 + }, + { + "epoch": 0.489560358770769, + "grad_norm": 0.8322531580924988, + "learning_rate": 4.9205437790828045e-06, + "loss": 0.5236, + "step": 6659 + }, + { + "epoch": 0.48963387737097486, + "grad_norm": 0.8594717979431152, + "learning_rate": 4.9205196782958645e-06, + "loss": 0.5594, + "step": 6660 + }, + { + "epoch": 0.4897073959711807, + "grad_norm": 0.8756075501441956, + "learning_rate": 4.920495573913373e-06, + "loss": 0.5346, + "step": 6661 + }, + { + "epoch": 0.48978091457138656, + "grad_norm": 0.8278168439865112, + "learning_rate": 4.920471465935369e-06, + "loss": 0.5132, + "step": 6662 + }, + { + "epoch": 0.48985443317159244, + "grad_norm": 0.8215349912643433, + "learning_rate": 4.920447354361887e-06, + "loss": 0.5769, + "step": 6663 + }, + { + "epoch": 0.48992795177179826, + "grad_norm": 0.8427077531814575, + "learning_rate": 4.920423239192964e-06, + "loss": 0.5792, + "step": 6664 + }, + { + "epoch": 0.49000147037200414, + "grad_norm": 0.8164279460906982, + "learning_rate": 4.920399120428633e-06, + "loss": 0.5304, + "step": 6665 + }, + { + "epoch": 0.49007498897220997, + "grad_norm": 0.8624511361122131, + "learning_rate": 4.920374998068933e-06, + "loss": 0.5753, + "step": 6666 + }, + { + "epoch": 0.49014850757241585, + "grad_norm": 0.8721962571144104, + "learning_rate": 4.920350872113898e-06, + "loss": 0.5466, + "step": 6667 + }, + { + "epoch": 0.49022202617262167, + "grad_norm": 0.8462709188461304, + "learning_rate": 4.920326742563564e-06, + "loss": 0.5818, + "step": 6668 + }, + { + "epoch": 0.49029554477282755, + "grad_norm": 0.8220327496528625, + "learning_rate": 4.920302609417967e-06, + "loss": 0.5357, + "step": 6669 + }, + { + "epoch": 0.4903690633730334, + "grad_norm": 0.8625971078872681, + "learning_rate": 4.920278472677144e-06, + "loss": 0.5116, + "step": 6670 + }, + { + "epoch": 0.49044258197323926, + "grad_norm": 0.8594661951065063, + "learning_rate": 4.92025433234113e-06, + "loss": 0.5191, + "step": 6671 + }, + { + "epoch": 0.4905161005734451, + "grad_norm": 0.8576031923294067, + "learning_rate": 4.9202301884099594e-06, + "loss": 0.6059, + "step": 6672 + }, + { + "epoch": 0.49058961917365096, + "grad_norm": 0.8452153205871582, + "learning_rate": 4.92020604088367e-06, + "loss": 0.595, + "step": 6673 + }, + { + "epoch": 0.4906631377738568, + "grad_norm": 0.8261720538139343, + "learning_rate": 4.920181889762297e-06, + "loss": 0.5549, + "step": 6674 + }, + { + "epoch": 0.49073665637406266, + "grad_norm": 0.8707422018051147, + "learning_rate": 4.920157735045877e-06, + "loss": 0.5806, + "step": 6675 + }, + { + "epoch": 0.4908101749742685, + "grad_norm": 0.8284003138542175, + "learning_rate": 4.920133576734444e-06, + "loss": 0.5997, + "step": 6676 + }, + { + "epoch": 0.49088369357447437, + "grad_norm": 0.8347165584564209, + "learning_rate": 4.920109414828036e-06, + "loss": 0.531, + "step": 6677 + }, + { + "epoch": 0.4909572121746802, + "grad_norm": 0.854557454586029, + "learning_rate": 4.920085249326688e-06, + "loss": 0.5784, + "step": 6678 + }, + { + "epoch": 0.49103073077488607, + "grad_norm": 0.8851974606513977, + "learning_rate": 4.920061080230435e-06, + "loss": 0.5815, + "step": 6679 + }, + { + "epoch": 0.4911042493750919, + "grad_norm": 0.8880935907363892, + "learning_rate": 4.920036907539315e-06, + "loss": 0.5717, + "step": 6680 + }, + { + "epoch": 0.4911777679752978, + "grad_norm": 0.90240877866745, + "learning_rate": 4.920012731253362e-06, + "loss": 0.5612, + "step": 6681 + }, + { + "epoch": 0.4912512865755036, + "grad_norm": 0.878246545791626, + "learning_rate": 4.919988551372612e-06, + "loss": 0.619, + "step": 6682 + }, + { + "epoch": 0.4913248051757095, + "grad_norm": 0.8350247740745544, + "learning_rate": 4.919964367897102e-06, + "loss": 0.5621, + "step": 6683 + }, + { + "epoch": 0.4913983237759153, + "grad_norm": 0.8285530805587769, + "learning_rate": 4.919940180826868e-06, + "loss": 0.581, + "step": 6684 + }, + { + "epoch": 0.4914718423761212, + "grad_norm": 0.8976007699966431, + "learning_rate": 4.919915990161945e-06, + "loss": 0.5758, + "step": 6685 + }, + { + "epoch": 0.491545360976327, + "grad_norm": 0.8718271851539612, + "learning_rate": 4.91989179590237e-06, + "loss": 0.5461, + "step": 6686 + }, + { + "epoch": 0.4916188795765329, + "grad_norm": 0.7976516485214233, + "learning_rate": 4.919867598048177e-06, + "loss": 0.5163, + "step": 6687 + }, + { + "epoch": 0.4916923981767387, + "grad_norm": 0.8920926451683044, + "learning_rate": 4.919843396599404e-06, + "loss": 0.5558, + "step": 6688 + }, + { + "epoch": 0.4917659167769446, + "grad_norm": 0.8254573345184326, + "learning_rate": 4.919819191556085e-06, + "loss": 0.4891, + "step": 6689 + }, + { + "epoch": 0.4918394353771504, + "grad_norm": 0.8141077756881714, + "learning_rate": 4.919794982918259e-06, + "loss": 0.5156, + "step": 6690 + }, + { + "epoch": 0.4919129539773563, + "grad_norm": 0.8661051988601685, + "learning_rate": 4.919770770685959e-06, + "loss": 0.565, + "step": 6691 + }, + { + "epoch": 0.4919864725775621, + "grad_norm": 0.8477216362953186, + "learning_rate": 4.919746554859222e-06, + "loss": 0.519, + "step": 6692 + }, + { + "epoch": 0.492059991177768, + "grad_norm": 0.8031551837921143, + "learning_rate": 4.9197223354380845e-06, + "loss": 0.5364, + "step": 6693 + }, + { + "epoch": 0.4921335097779738, + "grad_norm": 0.9566237330436707, + "learning_rate": 4.919698112422581e-06, + "loss": 0.5743, + "step": 6694 + }, + { + "epoch": 0.4922070283781797, + "grad_norm": 0.8798445463180542, + "learning_rate": 4.919673885812749e-06, + "loss": 0.5752, + "step": 6695 + }, + { + "epoch": 0.4922805469783855, + "grad_norm": 0.8096166849136353, + "learning_rate": 4.919649655608623e-06, + "loss": 0.5218, + "step": 6696 + }, + { + "epoch": 0.4923540655785914, + "grad_norm": 0.8579367399215698, + "learning_rate": 4.919625421810242e-06, + "loss": 0.5372, + "step": 6697 + }, + { + "epoch": 0.49242758417879723, + "grad_norm": 0.8597109317779541, + "learning_rate": 4.919601184417639e-06, + "loss": 0.6052, + "step": 6698 + }, + { + "epoch": 0.4925011027790031, + "grad_norm": 0.8937738537788391, + "learning_rate": 4.91957694343085e-06, + "loss": 0.594, + "step": 6699 + }, + { + "epoch": 0.49257462137920893, + "grad_norm": 0.8397363424301147, + "learning_rate": 4.919552698849913e-06, + "loss": 0.5585, + "step": 6700 + }, + { + "epoch": 0.4926481399794148, + "grad_norm": 0.8351534008979797, + "learning_rate": 4.919528450674863e-06, + "loss": 0.5611, + "step": 6701 + }, + { + "epoch": 0.49272165857962064, + "grad_norm": 0.8185324668884277, + "learning_rate": 4.919504198905736e-06, + "loss": 0.5314, + "step": 6702 + }, + { + "epoch": 0.4927951771798265, + "grad_norm": 0.8643665313720703, + "learning_rate": 4.919479943542567e-06, + "loss": 0.5738, + "step": 6703 + }, + { + "epoch": 0.49286869578003234, + "grad_norm": 0.9127417206764221, + "learning_rate": 4.919455684585394e-06, + "loss": 0.5476, + "step": 6704 + }, + { + "epoch": 0.4929422143802382, + "grad_norm": 0.8388448357582092, + "learning_rate": 4.919431422034251e-06, + "loss": 0.5568, + "step": 6705 + }, + { + "epoch": 0.49301573298044404, + "grad_norm": 0.8323571085929871, + "learning_rate": 4.919407155889177e-06, + "loss": 0.5249, + "step": 6706 + }, + { + "epoch": 0.4930892515806499, + "grad_norm": 0.8635116815567017, + "learning_rate": 4.919382886150205e-06, + "loss": 0.5306, + "step": 6707 + }, + { + "epoch": 0.49316277018085575, + "grad_norm": 0.8264122605323792, + "learning_rate": 4.919358612817372e-06, + "loss": 0.5344, + "step": 6708 + }, + { + "epoch": 0.4932362887810616, + "grad_norm": 0.8948179483413696, + "learning_rate": 4.919334335890714e-06, + "loss": 0.5701, + "step": 6709 + }, + { + "epoch": 0.49330980738126745, + "grad_norm": 0.8700992465019226, + "learning_rate": 4.919310055370268e-06, + "loss": 0.5842, + "step": 6710 + }, + { + "epoch": 0.49338332598147333, + "grad_norm": 0.8415957689285278, + "learning_rate": 4.919285771256069e-06, + "loss": 0.585, + "step": 6711 + }, + { + "epoch": 0.49345684458167915, + "grad_norm": 0.8390329480171204, + "learning_rate": 4.919261483548154e-06, + "loss": 0.5469, + "step": 6712 + }, + { + "epoch": 0.49353036318188503, + "grad_norm": 0.8009175658226013, + "learning_rate": 4.919237192246557e-06, + "loss": 0.5484, + "step": 6713 + }, + { + "epoch": 0.49360388178209086, + "grad_norm": 0.8398445844650269, + "learning_rate": 4.919212897351318e-06, + "loss": 0.5576, + "step": 6714 + }, + { + "epoch": 0.49367740038229674, + "grad_norm": 0.8605840802192688, + "learning_rate": 4.919188598862469e-06, + "loss": 0.5403, + "step": 6715 + }, + { + "epoch": 0.49375091898250256, + "grad_norm": 0.8640074133872986, + "learning_rate": 4.919164296780048e-06, + "loss": 0.5353, + "step": 6716 + }, + { + "epoch": 0.49382443758270844, + "grad_norm": 0.7986822128295898, + "learning_rate": 4.919139991104092e-06, + "loss": 0.5597, + "step": 6717 + }, + { + "epoch": 0.49389795618291427, + "grad_norm": 0.8407573103904724, + "learning_rate": 4.919115681834635e-06, + "loss": 0.5199, + "step": 6718 + }, + { + "epoch": 0.49397147478312015, + "grad_norm": 0.8570771813392639, + "learning_rate": 4.919091368971714e-06, + "loss": 0.5646, + "step": 6719 + }, + { + "epoch": 0.49404499338332597, + "grad_norm": 0.8681983351707458, + "learning_rate": 4.919067052515366e-06, + "loss": 0.5797, + "step": 6720 + }, + { + "epoch": 0.49411851198353185, + "grad_norm": 0.8423882722854614, + "learning_rate": 4.919042732465625e-06, + "loss": 0.5641, + "step": 6721 + }, + { + "epoch": 0.4941920305837377, + "grad_norm": 0.8189439177513123, + "learning_rate": 4.9190184088225305e-06, + "loss": 0.5611, + "step": 6722 + }, + { + "epoch": 0.49426554918394355, + "grad_norm": 0.8860821723937988, + "learning_rate": 4.9189940815861145e-06, + "loss": 0.5314, + "step": 6723 + }, + { + "epoch": 0.4943390677841494, + "grad_norm": 0.8503178358078003, + "learning_rate": 4.918969750756417e-06, + "loss": 0.5719, + "step": 6724 + }, + { + "epoch": 0.49441258638435526, + "grad_norm": 0.8387562036514282, + "learning_rate": 4.918945416333472e-06, + "loss": 0.5158, + "step": 6725 + }, + { + "epoch": 0.4944861049845611, + "grad_norm": 0.8253615498542786, + "learning_rate": 4.9189210783173155e-06, + "loss": 0.4881, + "step": 6726 + }, + { + "epoch": 0.49455962358476696, + "grad_norm": 0.7980785369873047, + "learning_rate": 4.9188967367079845e-06, + "loss": 0.5138, + "step": 6727 + }, + { + "epoch": 0.4946331421849728, + "grad_norm": 0.8834146857261658, + "learning_rate": 4.918872391505515e-06, + "loss": 0.6006, + "step": 6728 + }, + { + "epoch": 0.49470666078517866, + "grad_norm": 0.8565592169761658, + "learning_rate": 4.918848042709942e-06, + "loss": 0.5688, + "step": 6729 + }, + { + "epoch": 0.4947801793853845, + "grad_norm": 0.8634769916534424, + "learning_rate": 4.918823690321304e-06, + "loss": 0.5222, + "step": 6730 + }, + { + "epoch": 0.49485369798559037, + "grad_norm": 0.8508370518684387, + "learning_rate": 4.918799334339635e-06, + "loss": 0.5574, + "step": 6731 + }, + { + "epoch": 0.4949272165857962, + "grad_norm": 0.7881825566291809, + "learning_rate": 4.918774974764973e-06, + "loss": 0.5274, + "step": 6732 + }, + { + "epoch": 0.49500073518600207, + "grad_norm": 0.8903505802154541, + "learning_rate": 4.918750611597351e-06, + "loss": 0.5702, + "step": 6733 + }, + { + "epoch": 0.4950742537862079, + "grad_norm": 0.8705099821090698, + "learning_rate": 4.91872624483681e-06, + "loss": 0.567, + "step": 6734 + }, + { + "epoch": 0.4951477723864138, + "grad_norm": 0.8429398536682129, + "learning_rate": 4.918701874483383e-06, + "loss": 0.5464, + "step": 6735 + }, + { + "epoch": 0.4952212909866196, + "grad_norm": 0.8397588729858398, + "learning_rate": 4.9186775005371054e-06, + "loss": 0.4956, + "step": 6736 + }, + { + "epoch": 0.4952948095868255, + "grad_norm": 0.8543627858161926, + "learning_rate": 4.918653122998016e-06, + "loss": 0.5815, + "step": 6737 + }, + { + "epoch": 0.4953683281870313, + "grad_norm": 0.8376911282539368, + "learning_rate": 4.91862874186615e-06, + "loss": 0.587, + "step": 6738 + }, + { + "epoch": 0.4954418467872372, + "grad_norm": 0.8298311233520508, + "learning_rate": 4.918604357141543e-06, + "loss": 0.5018, + "step": 6739 + }, + { + "epoch": 0.495515365387443, + "grad_norm": 0.8503391742706299, + "learning_rate": 4.918579968824232e-06, + "loss": 0.5453, + "step": 6740 + }, + { + "epoch": 0.4955888839876489, + "grad_norm": 0.8253580927848816, + "learning_rate": 4.918555576914253e-06, + "loss": 0.5448, + "step": 6741 + }, + { + "epoch": 0.4956624025878547, + "grad_norm": 0.8389142751693726, + "learning_rate": 4.918531181411642e-06, + "loss": 0.5633, + "step": 6742 + }, + { + "epoch": 0.4957359211880606, + "grad_norm": 0.8176791667938232, + "learning_rate": 4.918506782316434e-06, + "loss": 0.5624, + "step": 6743 + }, + { + "epoch": 0.4958094397882664, + "grad_norm": 0.8336679339408875, + "learning_rate": 4.918482379628669e-06, + "loss": 0.5412, + "step": 6744 + }, + { + "epoch": 0.4958829583884723, + "grad_norm": 0.863921582698822, + "learning_rate": 4.91845797334838e-06, + "loss": 0.5193, + "step": 6745 + }, + { + "epoch": 0.4959564769886781, + "grad_norm": 0.8234433531761169, + "learning_rate": 4.918433563475603e-06, + "loss": 0.5103, + "step": 6746 + }, + { + "epoch": 0.496029995588884, + "grad_norm": 0.8670532703399658, + "learning_rate": 4.918409150010376e-06, + "loss": 0.5081, + "step": 6747 + }, + { + "epoch": 0.4961035141890898, + "grad_norm": 0.8378722667694092, + "learning_rate": 4.9183847329527345e-06, + "loss": 0.577, + "step": 6748 + }, + { + "epoch": 0.4961770327892957, + "grad_norm": 0.821810245513916, + "learning_rate": 4.918360312302715e-06, + "loss": 0.571, + "step": 6749 + }, + { + "epoch": 0.4962505513895015, + "grad_norm": 0.8600327372550964, + "learning_rate": 4.918335888060354e-06, + "loss": 0.6179, + "step": 6750 + }, + { + "epoch": 0.4963240699897074, + "grad_norm": 0.864086925983429, + "learning_rate": 4.918311460225687e-06, + "loss": 0.609, + "step": 6751 + }, + { + "epoch": 0.49639758858991323, + "grad_norm": 0.894615888595581, + "learning_rate": 4.918287028798752e-06, + "loss": 0.5914, + "step": 6752 + }, + { + "epoch": 0.4964711071901191, + "grad_norm": 0.8244901895523071, + "learning_rate": 4.918262593779582e-06, + "loss": 0.5552, + "step": 6753 + }, + { + "epoch": 0.49654462579032493, + "grad_norm": 0.8597402572631836, + "learning_rate": 4.9182381551682176e-06, + "loss": 0.5396, + "step": 6754 + }, + { + "epoch": 0.4966181443905308, + "grad_norm": 0.8733044266700745, + "learning_rate": 4.918213712964691e-06, + "loss": 0.5701, + "step": 6755 + }, + { + "epoch": 0.49669166299073664, + "grad_norm": 0.8726484775543213, + "learning_rate": 4.918189267169041e-06, + "loss": 0.5202, + "step": 6756 + }, + { + "epoch": 0.4967651815909425, + "grad_norm": 0.8127583265304565, + "learning_rate": 4.9181648177813036e-06, + "loss": 0.5327, + "step": 6757 + }, + { + "epoch": 0.49683870019114834, + "grad_norm": 0.8647104501724243, + "learning_rate": 4.918140364801515e-06, + "loss": 0.5399, + "step": 6758 + }, + { + "epoch": 0.4969122187913542, + "grad_norm": 0.825189471244812, + "learning_rate": 4.91811590822971e-06, + "loss": 0.5571, + "step": 6759 + }, + { + "epoch": 0.49698573739156005, + "grad_norm": 0.854555606842041, + "learning_rate": 4.918091448065927e-06, + "loss": 0.561, + "step": 6760 + }, + { + "epoch": 0.4970592559917659, + "grad_norm": 0.8710049986839294, + "learning_rate": 4.918066984310202e-06, + "loss": 0.5882, + "step": 6761 + }, + { + "epoch": 0.49713277459197175, + "grad_norm": 0.8425012230873108, + "learning_rate": 4.91804251696257e-06, + "loss": 0.5639, + "step": 6762 + }, + { + "epoch": 0.49720629319217763, + "grad_norm": 0.8783018589019775, + "learning_rate": 4.9180180460230696e-06, + "loss": 0.5563, + "step": 6763 + }, + { + "epoch": 0.49727981179238345, + "grad_norm": 0.8417856693267822, + "learning_rate": 4.917993571491735e-06, + "loss": 0.5579, + "step": 6764 + }, + { + "epoch": 0.49735333039258933, + "grad_norm": 0.7639203667640686, + "learning_rate": 4.917969093368602e-06, + "loss": 0.525, + "step": 6765 + }, + { + "epoch": 0.49742684899279516, + "grad_norm": 0.8587158918380737, + "learning_rate": 4.91794461165371e-06, + "loss": 0.5425, + "step": 6766 + }, + { + "epoch": 0.49750036759300104, + "grad_norm": 0.8453246355056763, + "learning_rate": 4.917920126347093e-06, + "loss": 0.5688, + "step": 6767 + }, + { + "epoch": 0.49757388619320686, + "grad_norm": 0.9056903719902039, + "learning_rate": 4.917895637448789e-06, + "loss": 0.5826, + "step": 6768 + }, + { + "epoch": 0.49764740479341274, + "grad_norm": 0.8849014639854431, + "learning_rate": 4.917871144958832e-06, + "loss": 0.5591, + "step": 6769 + }, + { + "epoch": 0.49772092339361856, + "grad_norm": 0.8562685251235962, + "learning_rate": 4.917846648877261e-06, + "loss": 0.5384, + "step": 6770 + }, + { + "epoch": 0.49779444199382444, + "grad_norm": 0.8288081884384155, + "learning_rate": 4.917822149204112e-06, + "loss": 0.5417, + "step": 6771 + }, + { + "epoch": 0.49786796059403027, + "grad_norm": 0.8056303262710571, + "learning_rate": 4.917797645939418e-06, + "loss": 0.5595, + "step": 6772 + }, + { + "epoch": 0.49794147919423615, + "grad_norm": 0.8413621783256531, + "learning_rate": 4.917773139083219e-06, + "loss": 0.5619, + "step": 6773 + }, + { + "epoch": 0.49801499779444197, + "grad_norm": 0.8163940906524658, + "learning_rate": 4.917748628635551e-06, + "loss": 0.5474, + "step": 6774 + }, + { + "epoch": 0.49808851639464785, + "grad_norm": 0.8896793723106384, + "learning_rate": 4.91772411459645e-06, + "loss": 0.5925, + "step": 6775 + }, + { + "epoch": 0.4981620349948537, + "grad_norm": 0.8261660933494568, + "learning_rate": 4.9176995969659515e-06, + "loss": 0.5295, + "step": 6776 + }, + { + "epoch": 0.49823555359505955, + "grad_norm": 0.8564607501029968, + "learning_rate": 4.917675075744093e-06, + "loss": 0.5816, + "step": 6777 + }, + { + "epoch": 0.4983090721952654, + "grad_norm": 0.8469114303588867, + "learning_rate": 4.91765055093091e-06, + "loss": 0.5586, + "step": 6778 + }, + { + "epoch": 0.49838259079547126, + "grad_norm": 0.7926656007766724, + "learning_rate": 4.91762602252644e-06, + "loss": 0.554, + "step": 6779 + }, + { + "epoch": 0.4984561093956771, + "grad_norm": 0.8619980812072754, + "learning_rate": 4.91760149053072e-06, + "loss": 0.5306, + "step": 6780 + }, + { + "epoch": 0.49852962799588296, + "grad_norm": 0.8234326839447021, + "learning_rate": 4.917576954943784e-06, + "loss": 0.5697, + "step": 6781 + }, + { + "epoch": 0.4986031465960888, + "grad_norm": 0.8839605450630188, + "learning_rate": 4.9175524157656695e-06, + "loss": 0.5489, + "step": 6782 + }, + { + "epoch": 0.49867666519629467, + "grad_norm": 0.8598839640617371, + "learning_rate": 4.917527872996414e-06, + "loss": 0.5655, + "step": 6783 + }, + { + "epoch": 0.4987501837965005, + "grad_norm": 0.8639554381370544, + "learning_rate": 4.9175033266360535e-06, + "loss": 0.5221, + "step": 6784 + }, + { + "epoch": 0.49882370239670637, + "grad_norm": 0.919954776763916, + "learning_rate": 4.917478776684623e-06, + "loss": 0.5827, + "step": 6785 + }, + { + "epoch": 0.4988972209969122, + "grad_norm": 0.8955155611038208, + "learning_rate": 4.917454223142162e-06, + "loss": 0.5589, + "step": 6786 + }, + { + "epoch": 0.4989707395971181, + "grad_norm": 0.8443817496299744, + "learning_rate": 4.917429666008704e-06, + "loss": 0.5422, + "step": 6787 + }, + { + "epoch": 0.4990442581973239, + "grad_norm": 0.8008097410202026, + "learning_rate": 4.917405105284286e-06, + "loss": 0.5091, + "step": 6788 + }, + { + "epoch": 0.4991177767975298, + "grad_norm": 0.8655639886856079, + "learning_rate": 4.917380540968945e-06, + "loss": 0.5523, + "step": 6789 + }, + { + "epoch": 0.4991912953977356, + "grad_norm": 0.8496471643447876, + "learning_rate": 4.917355973062719e-06, + "loss": 0.5662, + "step": 6790 + }, + { + "epoch": 0.4992648139979415, + "grad_norm": 0.846208930015564, + "learning_rate": 4.917331401565643e-06, + "loss": 0.5586, + "step": 6791 + }, + { + "epoch": 0.4993383325981473, + "grad_norm": 0.8128052353858948, + "learning_rate": 4.917306826477752e-06, + "loss": 0.5049, + "step": 6792 + }, + { + "epoch": 0.4994118511983532, + "grad_norm": 0.8428284525871277, + "learning_rate": 4.917282247799086e-06, + "loss": 0.5508, + "step": 6793 + }, + { + "epoch": 0.499485369798559, + "grad_norm": 0.8594085574150085, + "learning_rate": 4.917257665529678e-06, + "loss": 0.5405, + "step": 6794 + }, + { + "epoch": 0.4995588883987649, + "grad_norm": 0.7960380911827087, + "learning_rate": 4.917233079669566e-06, + "loss": 0.5122, + "step": 6795 + }, + { + "epoch": 0.4996324069989707, + "grad_norm": 0.819756031036377, + "learning_rate": 4.917208490218788e-06, + "loss": 0.5174, + "step": 6796 + }, + { + "epoch": 0.4997059255991766, + "grad_norm": 0.8297457098960876, + "learning_rate": 4.9171838971773785e-06, + "loss": 0.5349, + "step": 6797 + }, + { + "epoch": 0.4997794441993824, + "grad_norm": 0.8568331599235535, + "learning_rate": 4.917159300545375e-06, + "loss": 0.5646, + "step": 6798 + }, + { + "epoch": 0.4998529627995883, + "grad_norm": 0.8445714712142944, + "learning_rate": 4.9171347003228136e-06, + "loss": 0.5453, + "step": 6799 + }, + { + "epoch": 0.4999264813997941, + "grad_norm": 0.8101001381874084, + "learning_rate": 4.917110096509731e-06, + "loss": 0.522, + "step": 6800 + }, + { + "epoch": 0.5, + "grad_norm": 0.8274605870246887, + "learning_rate": 4.917085489106163e-06, + "loss": 0.512, + "step": 6801 + }, + { + "epoch": 0.5000735186002059, + "grad_norm": 0.8316836953163147, + "learning_rate": 4.917060878112148e-06, + "loss": 0.5605, + "step": 6802 + }, + { + "epoch": 0.5001470372004118, + "grad_norm": 0.8463109731674194, + "learning_rate": 4.917036263527721e-06, + "loss": 0.5839, + "step": 6803 + }, + { + "epoch": 0.5002205558006175, + "grad_norm": 0.9099034667015076, + "learning_rate": 4.917011645352918e-06, + "loss": 0.6272, + "step": 6804 + }, + { + "epoch": 0.5002940744008234, + "grad_norm": 0.8255887031555176, + "learning_rate": 4.916987023587777e-06, + "loss": 0.5129, + "step": 6805 + }, + { + "epoch": 0.5003675930010293, + "grad_norm": 0.8115993738174438, + "learning_rate": 4.916962398232334e-06, + "loss": 0.4827, + "step": 6806 + }, + { + "epoch": 0.5004411116012352, + "grad_norm": 0.8349897265434265, + "learning_rate": 4.916937769286627e-06, + "loss": 0.6092, + "step": 6807 + }, + { + "epoch": 0.5005146302014409, + "grad_norm": 0.7985877394676208, + "learning_rate": 4.91691313675069e-06, + "loss": 0.5519, + "step": 6808 + }, + { + "epoch": 0.5005881488016468, + "grad_norm": 0.8026227951049805, + "learning_rate": 4.916888500624561e-06, + "loss": 0.5054, + "step": 6809 + }, + { + "epoch": 0.5006616674018527, + "grad_norm": 0.8246350288391113, + "learning_rate": 4.916863860908278e-06, + "loss": 0.5256, + "step": 6810 + }, + { + "epoch": 0.5007351860020586, + "grad_norm": 0.8087736368179321, + "learning_rate": 4.916839217601874e-06, + "loss": 0.4872, + "step": 6811 + }, + { + "epoch": 0.5008087046022643, + "grad_norm": 0.852928638458252, + "learning_rate": 4.916814570705389e-06, + "loss": 0.5234, + "step": 6812 + }, + { + "epoch": 0.5008822232024702, + "grad_norm": 0.888599157333374, + "learning_rate": 4.916789920218857e-06, + "loss": 0.5618, + "step": 6813 + }, + { + "epoch": 0.5009557418026761, + "grad_norm": 0.8294469118118286, + "learning_rate": 4.916765266142317e-06, + "loss": 0.5695, + "step": 6814 + }, + { + "epoch": 0.501029260402882, + "grad_norm": 0.8544323444366455, + "learning_rate": 4.916740608475805e-06, + "loss": 0.5166, + "step": 6815 + }, + { + "epoch": 0.5011027790030878, + "grad_norm": 0.8334459066390991, + "learning_rate": 4.916715947219356e-06, + "loss": 0.5226, + "step": 6816 + }, + { + "epoch": 0.5011762976032936, + "grad_norm": 0.8086405396461487, + "learning_rate": 4.916691282373008e-06, + "loss": 0.5401, + "step": 6817 + }, + { + "epoch": 0.5012498162034995, + "grad_norm": 0.8314017057418823, + "learning_rate": 4.916666613936798e-06, + "loss": 0.5307, + "step": 6818 + }, + { + "epoch": 0.5013233348037054, + "grad_norm": 0.8463442325592041, + "learning_rate": 4.916641941910762e-06, + "loss": 0.531, + "step": 6819 + }, + { + "epoch": 0.5013968534039112, + "grad_norm": 0.8809993267059326, + "learning_rate": 4.916617266294936e-06, + "loss": 0.5368, + "step": 6820 + }, + { + "epoch": 0.501470372004117, + "grad_norm": 0.8145313262939453, + "learning_rate": 4.9165925870893586e-06, + "loss": 0.5369, + "step": 6821 + }, + { + "epoch": 0.5015438906043229, + "grad_norm": 0.8518773317337036, + "learning_rate": 4.916567904294064e-06, + "loss": 0.5693, + "step": 6822 + }, + { + "epoch": 0.5016174092045288, + "grad_norm": 0.8509002923965454, + "learning_rate": 4.9165432179090915e-06, + "loss": 0.561, + "step": 6823 + }, + { + "epoch": 0.5016909278047346, + "grad_norm": 0.8936302065849304, + "learning_rate": 4.916518527934475e-06, + "loss": 0.5771, + "step": 6824 + }, + { + "epoch": 0.5017644464049404, + "grad_norm": 0.8924044370651245, + "learning_rate": 4.916493834370253e-06, + "loss": 0.5537, + "step": 6825 + }, + { + "epoch": 0.5018379650051463, + "grad_norm": 0.902014434337616, + "learning_rate": 4.916469137216462e-06, + "loss": 0.5522, + "step": 6826 + }, + { + "epoch": 0.5019114836053522, + "grad_norm": 0.8615565299987793, + "learning_rate": 4.916444436473138e-06, + "loss": 0.5524, + "step": 6827 + }, + { + "epoch": 0.501985002205558, + "grad_norm": 0.7942250967025757, + "learning_rate": 4.916419732140319e-06, + "loss": 0.5057, + "step": 6828 + }, + { + "epoch": 0.5020585208057639, + "grad_norm": 0.8257637619972229, + "learning_rate": 4.916395024218039e-06, + "loss": 0.5364, + "step": 6829 + }, + { + "epoch": 0.5021320394059697, + "grad_norm": 0.8419570922851562, + "learning_rate": 4.916370312706338e-06, + "loss": 0.534, + "step": 6830 + }, + { + "epoch": 0.5022055580061756, + "grad_norm": 0.8306810259819031, + "learning_rate": 4.916345597605251e-06, + "loss": 0.5123, + "step": 6831 + }, + { + "epoch": 0.5022790766063814, + "grad_norm": 0.8376314640045166, + "learning_rate": 4.916320878914815e-06, + "loss": 0.5542, + "step": 6832 + }, + { + "epoch": 0.5023525952065873, + "grad_norm": 0.8450117111206055, + "learning_rate": 4.916296156635066e-06, + "loss": 0.554, + "step": 6833 + }, + { + "epoch": 0.5024261138067931, + "grad_norm": 0.8678677082061768, + "learning_rate": 4.916271430766042e-06, + "loss": 0.5881, + "step": 6834 + }, + { + "epoch": 0.502499632406999, + "grad_norm": 0.8362794518470764, + "learning_rate": 4.916246701307778e-06, + "loss": 0.5559, + "step": 6835 + }, + { + "epoch": 0.5025731510072048, + "grad_norm": 0.8193661570549011, + "learning_rate": 4.916221968260313e-06, + "loss": 0.5496, + "step": 6836 + }, + { + "epoch": 0.5026466696074107, + "grad_norm": 0.8207920789718628, + "learning_rate": 4.9161972316236825e-06, + "loss": 0.5431, + "step": 6837 + }, + { + "epoch": 0.5027201882076165, + "grad_norm": 0.8903096914291382, + "learning_rate": 4.916172491397923e-06, + "loss": 0.5699, + "step": 6838 + }, + { + "epoch": 0.5027937068078224, + "grad_norm": 0.8330907821655273, + "learning_rate": 4.916147747583072e-06, + "loss": 0.5366, + "step": 6839 + }, + { + "epoch": 0.5028672254080282, + "grad_norm": 0.8396929502487183, + "learning_rate": 4.916123000179166e-06, + "loss": 0.5257, + "step": 6840 + }, + { + "epoch": 0.5029407440082341, + "grad_norm": 0.8285074830055237, + "learning_rate": 4.916098249186241e-06, + "loss": 0.492, + "step": 6841 + }, + { + "epoch": 0.50301426260844, + "grad_norm": 0.8457141518592834, + "learning_rate": 4.916073494604334e-06, + "loss": 0.5536, + "step": 6842 + }, + { + "epoch": 0.5030877812086458, + "grad_norm": 0.8738320469856262, + "learning_rate": 4.916048736433483e-06, + "loss": 0.5489, + "step": 6843 + }, + { + "epoch": 0.5031612998088516, + "grad_norm": 0.8155305981636047, + "learning_rate": 4.916023974673724e-06, + "loss": 0.5504, + "step": 6844 + }, + { + "epoch": 0.5032348184090575, + "grad_norm": 0.857870876789093, + "learning_rate": 4.915999209325092e-06, + "loss": 0.552, + "step": 6845 + }, + { + "epoch": 0.5033083370092634, + "grad_norm": 0.832937479019165, + "learning_rate": 4.915974440387627e-06, + "loss": 0.5636, + "step": 6846 + }, + { + "epoch": 0.5033818556094692, + "grad_norm": 0.8720632791519165, + "learning_rate": 4.9159496678613636e-06, + "loss": 0.5735, + "step": 6847 + }, + { + "epoch": 0.503455374209675, + "grad_norm": 0.8391405344009399, + "learning_rate": 4.91592489174634e-06, + "loss": 0.5374, + "step": 6848 + }, + { + "epoch": 0.5035288928098809, + "grad_norm": 0.8247716426849365, + "learning_rate": 4.9159001120425916e-06, + "loss": 0.5582, + "step": 6849 + }, + { + "epoch": 0.5036024114100868, + "grad_norm": 0.8778427243232727, + "learning_rate": 4.915875328750156e-06, + "loss": 0.509, + "step": 6850 + }, + { + "epoch": 0.5036759300102926, + "grad_norm": 0.8728204369544983, + "learning_rate": 4.915850541869071e-06, + "loss": 0.6099, + "step": 6851 + }, + { + "epoch": 0.5037494486104984, + "grad_norm": 0.8925386667251587, + "learning_rate": 4.9158257513993705e-06, + "loss": 0.5644, + "step": 6852 + }, + { + "epoch": 0.5038229672107043, + "grad_norm": 0.7961198091506958, + "learning_rate": 4.915800957341094e-06, + "loss": 0.5513, + "step": 6853 + }, + { + "epoch": 0.5038964858109102, + "grad_norm": 0.8938006162643433, + "learning_rate": 4.915776159694277e-06, + "loss": 0.5723, + "step": 6854 + }, + { + "epoch": 0.5039700044111161, + "grad_norm": 0.8235172629356384, + "learning_rate": 4.9157513584589576e-06, + "loss": 0.5257, + "step": 6855 + }, + { + "epoch": 0.5040435230113218, + "grad_norm": 0.8489853739738464, + "learning_rate": 4.915726553635172e-06, + "loss": 0.5119, + "step": 6856 + }, + { + "epoch": 0.5041170416115277, + "grad_norm": 0.8100699186325073, + "learning_rate": 4.915701745222956e-06, + "loss": 0.5304, + "step": 6857 + }, + { + "epoch": 0.5041905602117336, + "grad_norm": 0.8634759187698364, + "learning_rate": 4.915676933222347e-06, + "loss": 0.544, + "step": 6858 + }, + { + "epoch": 0.5042640788119395, + "grad_norm": 0.845569908618927, + "learning_rate": 4.915652117633384e-06, + "loss": 0.5656, + "step": 6859 + }, + { + "epoch": 0.5043375974121452, + "grad_norm": 0.8343470692634583, + "learning_rate": 4.915627298456101e-06, + "loss": 0.5331, + "step": 6860 + }, + { + "epoch": 0.5044111160123511, + "grad_norm": 0.8804770708084106, + "learning_rate": 4.915602475690535e-06, + "loss": 0.592, + "step": 6861 + }, + { + "epoch": 0.504484634612557, + "grad_norm": 0.8757964372634888, + "learning_rate": 4.915577649336725e-06, + "loss": 0.5115, + "step": 6862 + }, + { + "epoch": 0.5045581532127629, + "grad_norm": 0.8701205253601074, + "learning_rate": 4.915552819394706e-06, + "loss": 0.5858, + "step": 6863 + }, + { + "epoch": 0.5046316718129686, + "grad_norm": 0.8304445743560791, + "learning_rate": 4.915527985864516e-06, + "loss": 0.5376, + "step": 6864 + }, + { + "epoch": 0.5047051904131745, + "grad_norm": 0.832233726978302, + "learning_rate": 4.915503148746191e-06, + "loss": 0.5461, + "step": 6865 + }, + { + "epoch": 0.5047787090133804, + "grad_norm": 0.8388919234275818, + "learning_rate": 4.915478308039769e-06, + "loss": 0.5281, + "step": 6866 + }, + { + "epoch": 0.5048522276135863, + "grad_norm": 0.8192275762557983, + "learning_rate": 4.915453463745286e-06, + "loss": 0.5723, + "step": 6867 + }, + { + "epoch": 0.504925746213792, + "grad_norm": 0.8124808669090271, + "learning_rate": 4.91542861586278e-06, + "loss": 0.5582, + "step": 6868 + }, + { + "epoch": 0.5049992648139979, + "grad_norm": 0.8687469363212585, + "learning_rate": 4.915403764392286e-06, + "loss": 0.5354, + "step": 6869 + }, + { + "epoch": 0.5050727834142038, + "grad_norm": 0.8506252765655518, + "learning_rate": 4.915378909333841e-06, + "loss": 0.5815, + "step": 6870 + }, + { + "epoch": 0.5051463020144097, + "grad_norm": 0.8021840453147888, + "learning_rate": 4.9153540506874845e-06, + "loss": 0.5483, + "step": 6871 + }, + { + "epoch": 0.5052198206146155, + "grad_norm": 0.8512057065963745, + "learning_rate": 4.915329188453251e-06, + "loss": 0.5795, + "step": 6872 + }, + { + "epoch": 0.5052933392148213, + "grad_norm": 0.8626983165740967, + "learning_rate": 4.915304322631179e-06, + "loss": 0.5612, + "step": 6873 + }, + { + "epoch": 0.5053668578150272, + "grad_norm": 0.9060139060020447, + "learning_rate": 4.9152794532213035e-06, + "loss": 0.5968, + "step": 6874 + }, + { + "epoch": 0.5054403764152331, + "grad_norm": 0.8387018442153931, + "learning_rate": 4.915254580223664e-06, + "loss": 0.5491, + "step": 6875 + }, + { + "epoch": 0.5055138950154389, + "grad_norm": 0.880510151386261, + "learning_rate": 4.915229703638295e-06, + "loss": 0.5503, + "step": 6876 + }, + { + "epoch": 0.5055874136156447, + "grad_norm": 0.8616777658462524, + "learning_rate": 4.915204823465235e-06, + "loss": 0.5443, + "step": 6877 + }, + { + "epoch": 0.5056609322158506, + "grad_norm": 0.8359262943267822, + "learning_rate": 4.915179939704521e-06, + "loss": 0.5706, + "step": 6878 + }, + { + "epoch": 0.5057344508160565, + "grad_norm": 0.8474477529525757, + "learning_rate": 4.915155052356188e-06, + "loss": 0.5389, + "step": 6879 + }, + { + "epoch": 0.5058079694162623, + "grad_norm": 0.8607267141342163, + "learning_rate": 4.915130161420276e-06, + "loss": 0.5601, + "step": 6880 + }, + { + "epoch": 0.5058814880164682, + "grad_norm": 0.8208725452423096, + "learning_rate": 4.91510526689682e-06, + "loss": 0.5462, + "step": 6881 + }, + { + "epoch": 0.505955006616674, + "grad_norm": 0.7867773175239563, + "learning_rate": 4.9150803687858565e-06, + "loss": 0.5329, + "step": 6882 + }, + { + "epoch": 0.5060285252168799, + "grad_norm": 0.8526594042778015, + "learning_rate": 4.915055467087424e-06, + "loss": 0.5564, + "step": 6883 + }, + { + "epoch": 0.5061020438170857, + "grad_norm": 0.8500291705131531, + "learning_rate": 4.915030561801559e-06, + "loss": 0.5773, + "step": 6884 + }, + { + "epoch": 0.5061755624172916, + "grad_norm": 0.7881303429603577, + "learning_rate": 4.915005652928298e-06, + "loss": 0.5381, + "step": 6885 + }, + { + "epoch": 0.5062490810174974, + "grad_norm": 0.8419769406318665, + "learning_rate": 4.914980740467679e-06, + "loss": 0.5564, + "step": 6886 + }, + { + "epoch": 0.5063225996177033, + "grad_norm": 0.8279970288276672, + "learning_rate": 4.914955824419737e-06, + "loss": 0.5566, + "step": 6887 + }, + { + "epoch": 0.5063961182179091, + "grad_norm": 0.9032385349273682, + "learning_rate": 4.914930904784511e-06, + "loss": 0.5945, + "step": 6888 + }, + { + "epoch": 0.506469636818115, + "grad_norm": 0.8708629608154297, + "learning_rate": 4.914905981562038e-06, + "loss": 0.5777, + "step": 6889 + }, + { + "epoch": 0.5065431554183208, + "grad_norm": 0.8820542693138123, + "learning_rate": 4.914881054752354e-06, + "loss": 0.5611, + "step": 6890 + }, + { + "epoch": 0.5066166740185267, + "grad_norm": 0.8293461799621582, + "learning_rate": 4.914856124355496e-06, + "loss": 0.5596, + "step": 6891 + }, + { + "epoch": 0.5066901926187325, + "grad_norm": 0.8042560815811157, + "learning_rate": 4.9148311903715025e-06, + "loss": 0.5368, + "step": 6892 + }, + { + "epoch": 0.5067637112189384, + "grad_norm": 0.8443880081176758, + "learning_rate": 4.914806252800409e-06, + "loss": 0.5838, + "step": 6893 + }, + { + "epoch": 0.5068372298191443, + "grad_norm": 0.8429650664329529, + "learning_rate": 4.914781311642253e-06, + "loss": 0.5799, + "step": 6894 + }, + { + "epoch": 0.5069107484193501, + "grad_norm": 0.834589421749115, + "learning_rate": 4.914756366897071e-06, + "loss": 0.5609, + "step": 6895 + }, + { + "epoch": 0.5069842670195559, + "grad_norm": 0.9192515015602112, + "learning_rate": 4.9147314185649015e-06, + "loss": 0.5536, + "step": 6896 + }, + { + "epoch": 0.5070577856197618, + "grad_norm": 0.8543354868888855, + "learning_rate": 4.91470646664578e-06, + "loss": 0.5058, + "step": 6897 + }, + { + "epoch": 0.5071313042199677, + "grad_norm": 0.8422306180000305, + "learning_rate": 4.914681511139745e-06, + "loss": 0.5017, + "step": 6898 + }, + { + "epoch": 0.5072048228201735, + "grad_norm": 0.8794522285461426, + "learning_rate": 4.914656552046833e-06, + "loss": 0.5663, + "step": 6899 + }, + { + "epoch": 0.5072783414203793, + "grad_norm": 0.8058120012283325, + "learning_rate": 4.91463158936708e-06, + "loss": 0.5328, + "step": 6900 + }, + { + "epoch": 0.5073518600205852, + "grad_norm": 0.8848984837532043, + "learning_rate": 4.914606623100524e-06, + "loss": 0.5685, + "step": 6901 + }, + { + "epoch": 0.5074253786207911, + "grad_norm": 0.7917881608009338, + "learning_rate": 4.914581653247202e-06, + "loss": 0.5262, + "step": 6902 + }, + { + "epoch": 0.507498897220997, + "grad_norm": 0.8913101553916931, + "learning_rate": 4.914556679807152e-06, + "loss": 0.5514, + "step": 6903 + }, + { + "epoch": 0.5075724158212027, + "grad_norm": 0.8716957569122314, + "learning_rate": 4.9145317027804106e-06, + "loss": 0.5625, + "step": 6904 + }, + { + "epoch": 0.5076459344214086, + "grad_norm": 0.8334651589393616, + "learning_rate": 4.914506722167014e-06, + "loss": 0.5556, + "step": 6905 + }, + { + "epoch": 0.5077194530216145, + "grad_norm": 0.8260417580604553, + "learning_rate": 4.914481737966999e-06, + "loss": 0.5058, + "step": 6906 + }, + { + "epoch": 0.5077929716218204, + "grad_norm": 0.8159055709838867, + "learning_rate": 4.914456750180405e-06, + "loss": 0.5367, + "step": 6907 + }, + { + "epoch": 0.5078664902220261, + "grad_norm": 0.886212944984436, + "learning_rate": 4.9144317588072666e-06, + "loss": 0.5744, + "step": 6908 + }, + { + "epoch": 0.507940008822232, + "grad_norm": 0.8588460683822632, + "learning_rate": 4.9144067638476225e-06, + "loss": 0.5748, + "step": 6909 + }, + { + "epoch": 0.5080135274224379, + "grad_norm": 0.8555459380149841, + "learning_rate": 4.914381765301509e-06, + "loss": 0.5884, + "step": 6910 + }, + { + "epoch": 0.5080870460226438, + "grad_norm": 0.8099327683448792, + "learning_rate": 4.914356763168964e-06, + "loss": 0.4882, + "step": 6911 + }, + { + "epoch": 0.5081605646228495, + "grad_norm": 0.8399486541748047, + "learning_rate": 4.914331757450025e-06, + "loss": 0.5744, + "step": 6912 + }, + { + "epoch": 0.5082340832230554, + "grad_norm": 0.8282365798950195, + "learning_rate": 4.9143067481447275e-06, + "loss": 0.5465, + "step": 6913 + }, + { + "epoch": 0.5083076018232613, + "grad_norm": 0.8810256719589233, + "learning_rate": 4.914281735253109e-06, + "loss": 0.5664, + "step": 6914 + }, + { + "epoch": 0.5083811204234672, + "grad_norm": 0.8672999143600464, + "learning_rate": 4.9142567187752075e-06, + "loss": 0.5785, + "step": 6915 + }, + { + "epoch": 0.5084546390236729, + "grad_norm": 0.8302225470542908, + "learning_rate": 4.91423169871106e-06, + "loss": 0.518, + "step": 6916 + }, + { + "epoch": 0.5085281576238788, + "grad_norm": 0.8692728281021118, + "learning_rate": 4.914206675060704e-06, + "loss": 0.5457, + "step": 6917 + }, + { + "epoch": 0.5086016762240847, + "grad_norm": 0.8269931077957153, + "learning_rate": 4.914181647824176e-06, + "loss": 0.5445, + "step": 6918 + }, + { + "epoch": 0.5086751948242906, + "grad_norm": 0.8149473667144775, + "learning_rate": 4.914156617001513e-06, + "loss": 0.5484, + "step": 6919 + }, + { + "epoch": 0.5087487134244963, + "grad_norm": 0.928070068359375, + "learning_rate": 4.9141315825927525e-06, + "loss": 0.5601, + "step": 6920 + }, + { + "epoch": 0.5088222320247022, + "grad_norm": 0.8474403619766235, + "learning_rate": 4.9141065445979325e-06, + "loss": 0.5309, + "step": 6921 + }, + { + "epoch": 0.5088957506249081, + "grad_norm": 0.8174158334732056, + "learning_rate": 4.9140815030170885e-06, + "loss": 0.5096, + "step": 6922 + }, + { + "epoch": 0.508969269225114, + "grad_norm": 0.9502517580986023, + "learning_rate": 4.914056457850259e-06, + "loss": 0.6025, + "step": 6923 + }, + { + "epoch": 0.5090427878253198, + "grad_norm": 0.846075713634491, + "learning_rate": 4.914031409097481e-06, + "loss": 0.5549, + "step": 6924 + }, + { + "epoch": 0.5091163064255256, + "grad_norm": 0.7973523139953613, + "learning_rate": 4.914006356758791e-06, + "loss": 0.5029, + "step": 6925 + }, + { + "epoch": 0.5091898250257315, + "grad_norm": 0.8277109861373901, + "learning_rate": 4.913981300834228e-06, + "loss": 0.5877, + "step": 6926 + }, + { + "epoch": 0.5092633436259374, + "grad_norm": 0.8037904500961304, + "learning_rate": 4.9139562413238265e-06, + "loss": 0.5465, + "step": 6927 + }, + { + "epoch": 0.5093368622261432, + "grad_norm": 0.8561519980430603, + "learning_rate": 4.913931178227626e-06, + "loss": 0.5414, + "step": 6928 + }, + { + "epoch": 0.509410380826349, + "grad_norm": 0.8645528554916382, + "learning_rate": 4.913906111545662e-06, + "loss": 0.5426, + "step": 6929 + }, + { + "epoch": 0.5094838994265549, + "grad_norm": 0.8605754971504211, + "learning_rate": 4.913881041277974e-06, + "loss": 0.5876, + "step": 6930 + }, + { + "epoch": 0.5095574180267608, + "grad_norm": 0.8945801854133606, + "learning_rate": 4.913855967424597e-06, + "loss": 0.5466, + "step": 6931 + }, + { + "epoch": 0.5096309366269666, + "grad_norm": 0.8642796874046326, + "learning_rate": 4.913830889985569e-06, + "loss": 0.5468, + "step": 6932 + }, + { + "epoch": 0.5097044552271724, + "grad_norm": 0.8496505618095398, + "learning_rate": 4.913805808960928e-06, + "loss": 0.6121, + "step": 6933 + }, + { + "epoch": 0.5097779738273783, + "grad_norm": 0.8157097697257996, + "learning_rate": 4.913780724350711e-06, + "loss": 0.5301, + "step": 6934 + }, + { + "epoch": 0.5098514924275842, + "grad_norm": 0.8404610753059387, + "learning_rate": 4.9137556361549535e-06, + "loss": 0.5677, + "step": 6935 + }, + { + "epoch": 0.50992501102779, + "grad_norm": 0.8238793611526489, + "learning_rate": 4.913730544373695e-06, + "loss": 0.5584, + "step": 6936 + }, + { + "epoch": 0.5099985296279959, + "grad_norm": 0.8713434338569641, + "learning_rate": 4.913705449006973e-06, + "loss": 0.5571, + "step": 6937 + }, + { + "epoch": 0.5100720482282017, + "grad_norm": 0.830376148223877, + "learning_rate": 4.913680350054821e-06, + "loss": 0.5196, + "step": 6938 + }, + { + "epoch": 0.5101455668284076, + "grad_norm": 0.8557092547416687, + "learning_rate": 4.913655247517281e-06, + "loss": 0.5637, + "step": 6939 + }, + { + "epoch": 0.5102190854286135, + "grad_norm": 0.8434821963310242, + "learning_rate": 4.9136301413943885e-06, + "loss": 0.582, + "step": 6940 + }, + { + "epoch": 0.5102926040288193, + "grad_norm": 0.8097763061523438, + "learning_rate": 4.91360503168618e-06, + "loss": 0.5135, + "step": 6941 + }, + { + "epoch": 0.5103661226290251, + "grad_norm": 0.8257768750190735, + "learning_rate": 4.913579918392694e-06, + "loss": 0.5093, + "step": 6942 + }, + { + "epoch": 0.510439641229231, + "grad_norm": 0.8096591830253601, + "learning_rate": 4.913554801513966e-06, + "loss": 0.5284, + "step": 6943 + }, + { + "epoch": 0.5105131598294369, + "grad_norm": 0.9064376354217529, + "learning_rate": 4.913529681050035e-06, + "loss": 0.5556, + "step": 6944 + }, + { + "epoch": 0.5105866784296427, + "grad_norm": 0.8908004760742188, + "learning_rate": 4.913504557000938e-06, + "loss": 0.5546, + "step": 6945 + }, + { + "epoch": 0.5106601970298486, + "grad_norm": 0.8338992595672607, + "learning_rate": 4.913479429366712e-06, + "loss": 0.5527, + "step": 6946 + }, + { + "epoch": 0.5107337156300544, + "grad_norm": 0.8360468149185181, + "learning_rate": 4.913454298147394e-06, + "loss": 0.5488, + "step": 6947 + }, + { + "epoch": 0.5108072342302603, + "grad_norm": 0.8459481000900269, + "learning_rate": 4.913429163343023e-06, + "loss": 0.5763, + "step": 6948 + }, + { + "epoch": 0.5108807528304661, + "grad_norm": 0.8303934335708618, + "learning_rate": 4.913404024953634e-06, + "loss": 0.5753, + "step": 6949 + }, + { + "epoch": 0.510954271430672, + "grad_norm": 0.8634257316589355, + "learning_rate": 4.913378882979266e-06, + "loss": 0.5794, + "step": 6950 + }, + { + "epoch": 0.5110277900308778, + "grad_norm": 0.81779944896698, + "learning_rate": 4.913353737419956e-06, + "loss": 0.535, + "step": 6951 + }, + { + "epoch": 0.5111013086310837, + "grad_norm": 0.8742894530296326, + "learning_rate": 4.9133285882757405e-06, + "loss": 0.5938, + "step": 6952 + }, + { + "epoch": 0.5111748272312895, + "grad_norm": 0.8621854782104492, + "learning_rate": 4.913303435546659e-06, + "loss": 0.5487, + "step": 6953 + }, + { + "epoch": 0.5112483458314954, + "grad_norm": 0.8337787389755249, + "learning_rate": 4.913278279232746e-06, + "loss": 0.5709, + "step": 6954 + }, + { + "epoch": 0.5113218644317012, + "grad_norm": 0.8873703479766846, + "learning_rate": 4.91325311933404e-06, + "loss": 0.553, + "step": 6955 + }, + { + "epoch": 0.5113953830319071, + "grad_norm": 0.8344628214836121, + "learning_rate": 4.91322795585058e-06, + "loss": 0.5642, + "step": 6956 + }, + { + "epoch": 0.5114689016321129, + "grad_norm": 0.8134127855300903, + "learning_rate": 4.913202788782401e-06, + "loss": 0.5219, + "step": 6957 + }, + { + "epoch": 0.5115424202323188, + "grad_norm": 0.8189787268638611, + "learning_rate": 4.913177618129542e-06, + "loss": 0.5726, + "step": 6958 + }, + { + "epoch": 0.5116159388325247, + "grad_norm": 0.8499162197113037, + "learning_rate": 4.913152443892039e-06, + "loss": 0.5403, + "step": 6959 + }, + { + "epoch": 0.5116894574327305, + "grad_norm": 0.8875407576560974, + "learning_rate": 4.913127266069931e-06, + "loss": 0.5546, + "step": 6960 + }, + { + "epoch": 0.5117629760329363, + "grad_norm": 0.9288690686225891, + "learning_rate": 4.913102084663255e-06, + "loss": 0.5798, + "step": 6961 + }, + { + "epoch": 0.5118364946331422, + "grad_norm": 0.8556601405143738, + "learning_rate": 4.913076899672047e-06, + "loss": 0.567, + "step": 6962 + }, + { + "epoch": 0.5119100132333481, + "grad_norm": 0.8426167964935303, + "learning_rate": 4.913051711096345e-06, + "loss": 0.55, + "step": 6963 + }, + { + "epoch": 0.5119835318335539, + "grad_norm": 0.8821260333061218, + "learning_rate": 4.913026518936188e-06, + "loss": 0.5551, + "step": 6964 + }, + { + "epoch": 0.5120570504337597, + "grad_norm": 0.847575843334198, + "learning_rate": 4.913001323191612e-06, + "loss": 0.5601, + "step": 6965 + }, + { + "epoch": 0.5121305690339656, + "grad_norm": 0.8366155624389648, + "learning_rate": 4.912976123862654e-06, + "loss": 0.5424, + "step": 6966 + }, + { + "epoch": 0.5122040876341715, + "grad_norm": 0.8748670816421509, + "learning_rate": 4.912950920949353e-06, + "loss": 0.5651, + "step": 6967 + }, + { + "epoch": 0.5122776062343773, + "grad_norm": 0.9087255001068115, + "learning_rate": 4.912925714451745e-06, + "loss": 0.5805, + "step": 6968 + }, + { + "epoch": 0.5123511248345831, + "grad_norm": 0.8421562910079956, + "learning_rate": 4.912900504369867e-06, + "loss": 0.5494, + "step": 6969 + }, + { + "epoch": 0.512424643434789, + "grad_norm": 0.8418018221855164, + "learning_rate": 4.912875290703759e-06, + "loss": 0.562, + "step": 6970 + }, + { + "epoch": 0.5124981620349949, + "grad_norm": 0.8411720991134644, + "learning_rate": 4.912850073453456e-06, + "loss": 0.5287, + "step": 6971 + }, + { + "epoch": 0.5125716806352008, + "grad_norm": 0.810722827911377, + "learning_rate": 4.912824852618997e-06, + "loss": 0.5263, + "step": 6972 + }, + { + "epoch": 0.5126451992354065, + "grad_norm": 0.8075962662696838, + "learning_rate": 4.912799628200419e-06, + "loss": 0.5744, + "step": 6973 + }, + { + "epoch": 0.5127187178356124, + "grad_norm": 0.8498373031616211, + "learning_rate": 4.9127744001977586e-06, + "loss": 0.5093, + "step": 6974 + }, + { + "epoch": 0.5127922364358183, + "grad_norm": 0.8814647197723389, + "learning_rate": 4.912749168611054e-06, + "loss": 0.5407, + "step": 6975 + }, + { + "epoch": 0.5128657550360242, + "grad_norm": 0.8671673536300659, + "learning_rate": 4.912723933440342e-06, + "loss": 0.5426, + "step": 6976 + }, + { + "epoch": 0.5129392736362299, + "grad_norm": 0.8276809453964233, + "learning_rate": 4.9126986946856614e-06, + "loss": 0.5902, + "step": 6977 + }, + { + "epoch": 0.5130127922364358, + "grad_norm": 0.830857515335083, + "learning_rate": 4.912673452347049e-06, + "loss": 0.5751, + "step": 6978 + }, + { + "epoch": 0.5130863108366417, + "grad_norm": 0.8399289846420288, + "learning_rate": 4.912648206424543e-06, + "loss": 0.5415, + "step": 6979 + }, + { + "epoch": 0.5131598294368476, + "grad_norm": 0.8796810507774353, + "learning_rate": 4.9126229569181795e-06, + "loss": 0.5575, + "step": 6980 + }, + { + "epoch": 0.5132333480370533, + "grad_norm": 0.8354654908180237, + "learning_rate": 4.912597703827997e-06, + "loss": 0.533, + "step": 6981 + }, + { + "epoch": 0.5133068666372592, + "grad_norm": 0.8459544777870178, + "learning_rate": 4.912572447154031e-06, + "loss": 0.5752, + "step": 6982 + }, + { + "epoch": 0.5133803852374651, + "grad_norm": 0.851004958152771, + "learning_rate": 4.912547186896323e-06, + "loss": 0.5606, + "step": 6983 + }, + { + "epoch": 0.513453903837671, + "grad_norm": 0.8275269269943237, + "learning_rate": 4.912521923054907e-06, + "loss": 0.5715, + "step": 6984 + }, + { + "epoch": 0.5135274224378767, + "grad_norm": 0.8643697500228882, + "learning_rate": 4.912496655629823e-06, + "loss": 0.5122, + "step": 6985 + }, + { + "epoch": 0.5136009410380826, + "grad_norm": 0.8448425531387329, + "learning_rate": 4.912471384621107e-06, + "loss": 0.5103, + "step": 6986 + }, + { + "epoch": 0.5136744596382885, + "grad_norm": 0.8158522844314575, + "learning_rate": 4.912446110028796e-06, + "loss": 0.5319, + "step": 6987 + }, + { + "epoch": 0.5137479782384944, + "grad_norm": 0.8566840291023254, + "learning_rate": 4.912420831852929e-06, + "loss": 0.6032, + "step": 6988 + }, + { + "epoch": 0.5138214968387002, + "grad_norm": 0.8630392551422119, + "learning_rate": 4.912395550093543e-06, + "loss": 0.5596, + "step": 6989 + }, + { + "epoch": 0.513895015438906, + "grad_norm": 0.8433971405029297, + "learning_rate": 4.912370264750674e-06, + "loss": 0.5646, + "step": 6990 + }, + { + "epoch": 0.5139685340391119, + "grad_norm": 0.844366192817688, + "learning_rate": 4.912344975824363e-06, + "loss": 0.5674, + "step": 6991 + }, + { + "epoch": 0.5140420526393178, + "grad_norm": 0.8298133611679077, + "learning_rate": 4.912319683314645e-06, + "loss": 0.5417, + "step": 6992 + }, + { + "epoch": 0.5141155712395236, + "grad_norm": 0.8414552211761475, + "learning_rate": 4.912294387221558e-06, + "loss": 0.5751, + "step": 6993 + }, + { + "epoch": 0.5141890898397294, + "grad_norm": 0.8506165146827698, + "learning_rate": 4.912269087545141e-06, + "loss": 0.5191, + "step": 6994 + }, + { + "epoch": 0.5142626084399353, + "grad_norm": 0.8740043044090271, + "learning_rate": 4.912243784285429e-06, + "loss": 0.5932, + "step": 6995 + }, + { + "epoch": 0.5143361270401412, + "grad_norm": 0.8050557971000671, + "learning_rate": 4.9122184774424605e-06, + "loss": 0.5241, + "step": 6996 + }, + { + "epoch": 0.514409645640347, + "grad_norm": 0.8292415738105774, + "learning_rate": 4.9121931670162745e-06, + "loss": 0.5154, + "step": 6997 + }, + { + "epoch": 0.5144831642405528, + "grad_norm": 0.8712044358253479, + "learning_rate": 4.912167853006908e-06, + "loss": 0.5738, + "step": 6998 + }, + { + "epoch": 0.5145566828407587, + "grad_norm": 0.877734899520874, + "learning_rate": 4.912142535414398e-06, + "loss": 0.5754, + "step": 6999 + }, + { + "epoch": 0.5146302014409646, + "grad_norm": 0.8340710401535034, + "learning_rate": 4.912117214238782e-06, + "loss": 0.5656, + "step": 7000 + }, + { + "epoch": 0.5147037200411704, + "grad_norm": 0.7907268404960632, + "learning_rate": 4.912091889480097e-06, + "loss": 0.5391, + "step": 7001 + }, + { + "epoch": 0.5147772386413763, + "grad_norm": 0.8498433828353882, + "learning_rate": 4.912066561138383e-06, + "loss": 0.533, + "step": 7002 + }, + { + "epoch": 0.5148507572415821, + "grad_norm": 0.7905449271202087, + "learning_rate": 4.912041229213676e-06, + "loss": 0.546, + "step": 7003 + }, + { + "epoch": 0.514924275841788, + "grad_norm": 0.8844752311706543, + "learning_rate": 4.9120158937060135e-06, + "loss": 0.5936, + "step": 7004 + }, + { + "epoch": 0.5149977944419938, + "grad_norm": 0.8385818004608154, + "learning_rate": 4.911990554615434e-06, + "loss": 0.5928, + "step": 7005 + }, + { + "epoch": 0.5150713130421997, + "grad_norm": 0.836679995059967, + "learning_rate": 4.911965211941974e-06, + "loss": 0.5564, + "step": 7006 + }, + { + "epoch": 0.5151448316424055, + "grad_norm": 0.8144569396972656, + "learning_rate": 4.911939865685672e-06, + "loss": 0.5263, + "step": 7007 + }, + { + "epoch": 0.5152183502426114, + "grad_norm": 0.8360192179679871, + "learning_rate": 4.911914515846565e-06, + "loss": 0.5363, + "step": 7008 + }, + { + "epoch": 0.5152918688428172, + "grad_norm": 0.8606711030006409, + "learning_rate": 4.911889162424692e-06, + "loss": 0.5238, + "step": 7009 + }, + { + "epoch": 0.5153653874430231, + "grad_norm": 0.83980393409729, + "learning_rate": 4.911863805420089e-06, + "loss": 0.5744, + "step": 7010 + }, + { + "epoch": 0.515438906043229, + "grad_norm": 0.8086807727813721, + "learning_rate": 4.911838444832795e-06, + "loss": 0.528, + "step": 7011 + }, + { + "epoch": 0.5155124246434348, + "grad_norm": 0.8192518949508667, + "learning_rate": 4.911813080662846e-06, + "loss": 0.5448, + "step": 7012 + }, + { + "epoch": 0.5155859432436406, + "grad_norm": 0.865616500377655, + "learning_rate": 4.911787712910282e-06, + "loss": 0.5722, + "step": 7013 + }, + { + "epoch": 0.5156594618438465, + "grad_norm": 0.8295899629592896, + "learning_rate": 4.911762341575138e-06, + "loss": 0.5715, + "step": 7014 + }, + { + "epoch": 0.5157329804440524, + "grad_norm": 0.8693717122077942, + "learning_rate": 4.911736966657455e-06, + "loss": 0.585, + "step": 7015 + }, + { + "epoch": 0.5158064990442582, + "grad_norm": 0.8870978951454163, + "learning_rate": 4.911711588157267e-06, + "loss": 0.5675, + "step": 7016 + }, + { + "epoch": 0.515880017644464, + "grad_norm": 0.8495530486106873, + "learning_rate": 4.911686206074614e-06, + "loss": 0.5247, + "step": 7017 + }, + { + "epoch": 0.5159535362446699, + "grad_norm": 0.8689393997192383, + "learning_rate": 4.911660820409533e-06, + "loss": 0.6155, + "step": 7018 + }, + { + "epoch": 0.5160270548448758, + "grad_norm": 0.8410409688949585, + "learning_rate": 4.911635431162063e-06, + "loss": 0.5537, + "step": 7019 + }, + { + "epoch": 0.5161005734450816, + "grad_norm": 0.8652913570404053, + "learning_rate": 4.911610038332239e-06, + "loss": 0.5638, + "step": 7020 + }, + { + "epoch": 0.5161740920452874, + "grad_norm": 0.860251247882843, + "learning_rate": 4.911584641920102e-06, + "loss": 0.5532, + "step": 7021 + }, + { + "epoch": 0.5162476106454933, + "grad_norm": 0.8436646461486816, + "learning_rate": 4.911559241925687e-06, + "loss": 0.5546, + "step": 7022 + }, + { + "epoch": 0.5163211292456992, + "grad_norm": 0.8691819310188293, + "learning_rate": 4.911533838349034e-06, + "loss": 0.5855, + "step": 7023 + }, + { + "epoch": 0.516394647845905, + "grad_norm": 0.8055794835090637, + "learning_rate": 4.9115084311901785e-06, + "loss": 0.518, + "step": 7024 + }, + { + "epoch": 0.5164681664461108, + "grad_norm": 0.8108629584312439, + "learning_rate": 4.911483020449159e-06, + "loss": 0.539, + "step": 7025 + }, + { + "epoch": 0.5165416850463167, + "grad_norm": 0.7930851578712463, + "learning_rate": 4.911457606126014e-06, + "loss": 0.5321, + "step": 7026 + }, + { + "epoch": 0.5166152036465226, + "grad_norm": 0.8411803245544434, + "learning_rate": 4.91143218822078e-06, + "loss": 0.4964, + "step": 7027 + }, + { + "epoch": 0.5166887222467285, + "grad_norm": 0.8424232602119446, + "learning_rate": 4.911406766733497e-06, + "loss": 0.5832, + "step": 7028 + }, + { + "epoch": 0.5167622408469342, + "grad_norm": 0.8516883850097656, + "learning_rate": 4.911381341664201e-06, + "loss": 0.5524, + "step": 7029 + }, + { + "epoch": 0.5168357594471401, + "grad_norm": 0.8928271532058716, + "learning_rate": 4.91135591301293e-06, + "loss": 0.5731, + "step": 7030 + }, + { + "epoch": 0.516909278047346, + "grad_norm": 0.8936083912849426, + "learning_rate": 4.9113304807797204e-06, + "loss": 0.5714, + "step": 7031 + }, + { + "epoch": 0.5169827966475519, + "grad_norm": 0.8037853837013245, + "learning_rate": 4.911305044964613e-06, + "loss": 0.5378, + "step": 7032 + }, + { + "epoch": 0.5170563152477576, + "grad_norm": 0.8376741409301758, + "learning_rate": 4.911279605567643e-06, + "loss": 0.4967, + "step": 7033 + }, + { + "epoch": 0.5171298338479635, + "grad_norm": 0.8657670021057129, + "learning_rate": 4.91125416258885e-06, + "loss": 0.5657, + "step": 7034 + }, + { + "epoch": 0.5172033524481694, + "grad_norm": 0.8409833312034607, + "learning_rate": 4.91122871602827e-06, + "loss": 0.5652, + "step": 7035 + }, + { + "epoch": 0.5172768710483753, + "grad_norm": 0.8855292201042175, + "learning_rate": 4.911203265885943e-06, + "loss": 0.6281, + "step": 7036 + }, + { + "epoch": 0.517350389648581, + "grad_norm": 0.8442057371139526, + "learning_rate": 4.9111778121619045e-06, + "loss": 0.5245, + "step": 7037 + }, + { + "epoch": 0.5174239082487869, + "grad_norm": 0.8415868282318115, + "learning_rate": 4.911152354856195e-06, + "loss": 0.5528, + "step": 7038 + }, + { + "epoch": 0.5174974268489928, + "grad_norm": 0.8443600535392761, + "learning_rate": 4.9111268939688485e-06, + "loss": 0.5518, + "step": 7039 + }, + { + "epoch": 0.5175709454491987, + "grad_norm": 0.8453041315078735, + "learning_rate": 4.911101429499906e-06, + "loss": 0.5465, + "step": 7040 + }, + { + "epoch": 0.5176444640494045, + "grad_norm": 0.7755591869354248, + "learning_rate": 4.911075961449405e-06, + "loss": 0.5147, + "step": 7041 + }, + { + "epoch": 0.5177179826496103, + "grad_norm": 0.8706019520759583, + "learning_rate": 4.9110504898173814e-06, + "loss": 0.5472, + "step": 7042 + }, + { + "epoch": 0.5177915012498162, + "grad_norm": 0.8609167337417603, + "learning_rate": 4.911025014603876e-06, + "loss": 0.498, + "step": 7043 + }, + { + "epoch": 0.5178650198500221, + "grad_norm": 0.845934271812439, + "learning_rate": 4.910999535808924e-06, + "loss": 0.5382, + "step": 7044 + }, + { + "epoch": 0.5179385384502279, + "grad_norm": 0.8377081155776978, + "learning_rate": 4.910974053432564e-06, + "loss": 0.5232, + "step": 7045 + }, + { + "epoch": 0.5180120570504337, + "grad_norm": 0.8427677750587463, + "learning_rate": 4.910948567474834e-06, + "loss": 0.5381, + "step": 7046 + }, + { + "epoch": 0.5180855756506396, + "grad_norm": 0.7985289096832275, + "learning_rate": 4.910923077935773e-06, + "loss": 0.4944, + "step": 7047 + }, + { + "epoch": 0.5181590942508455, + "grad_norm": 0.8187490105628967, + "learning_rate": 4.910897584815416e-06, + "loss": 0.5421, + "step": 7048 + }, + { + "epoch": 0.5182326128510513, + "grad_norm": 0.888708233833313, + "learning_rate": 4.910872088113804e-06, + "loss": 0.5603, + "step": 7049 + }, + { + "epoch": 0.5183061314512571, + "grad_norm": 0.7956041693687439, + "learning_rate": 4.910846587830973e-06, + "loss": 0.5485, + "step": 7050 + }, + { + "epoch": 0.518379650051463, + "grad_norm": 0.8325605988502502, + "learning_rate": 4.910821083966963e-06, + "loss": 0.5288, + "step": 7051 + }, + { + "epoch": 0.5184531686516689, + "grad_norm": 0.8673542737960815, + "learning_rate": 4.9107955765218076e-06, + "loss": 0.5801, + "step": 7052 + }, + { + "epoch": 0.5185266872518747, + "grad_norm": 0.8674647212028503, + "learning_rate": 4.91077006549555e-06, + "loss": 0.5348, + "step": 7053 + }, + { + "epoch": 0.5186002058520806, + "grad_norm": 0.8617333769798279, + "learning_rate": 4.910744550888224e-06, + "loss": 0.5522, + "step": 7054 + }, + { + "epoch": 0.5186737244522864, + "grad_norm": 0.8543192148208618, + "learning_rate": 4.9107190326998684e-06, + "loss": 0.5655, + "step": 7055 + }, + { + "epoch": 0.5187472430524923, + "grad_norm": 0.8380985856056213, + "learning_rate": 4.910693510930522e-06, + "loss": 0.5708, + "step": 7056 + }, + { + "epoch": 0.5188207616526981, + "grad_norm": 0.8173675537109375, + "learning_rate": 4.910667985580223e-06, + "loss": 0.5539, + "step": 7057 + }, + { + "epoch": 0.518894280252904, + "grad_norm": 0.8116873502731323, + "learning_rate": 4.910642456649008e-06, + "loss": 0.5606, + "step": 7058 + }, + { + "epoch": 0.5189677988531098, + "grad_norm": 0.8211826682090759, + "learning_rate": 4.910616924136917e-06, + "loss": 0.5532, + "step": 7059 + }, + { + "epoch": 0.5190413174533157, + "grad_norm": 0.8165102601051331, + "learning_rate": 4.910591388043986e-06, + "loss": 0.5732, + "step": 7060 + }, + { + "epoch": 0.5191148360535215, + "grad_norm": 0.8553032875061035, + "learning_rate": 4.910565848370253e-06, + "loss": 0.5249, + "step": 7061 + }, + { + "epoch": 0.5191883546537274, + "grad_norm": 0.8211163282394409, + "learning_rate": 4.910540305115756e-06, + "loss": 0.5363, + "step": 7062 + }, + { + "epoch": 0.5192618732539332, + "grad_norm": 0.8684572577476501, + "learning_rate": 4.910514758280535e-06, + "loss": 0.4802, + "step": 7063 + }, + { + "epoch": 0.5193353918541391, + "grad_norm": 0.8483703136444092, + "learning_rate": 4.910489207864625e-06, + "loss": 0.5827, + "step": 7064 + }, + { + "epoch": 0.5194089104543449, + "grad_norm": 0.846531331539154, + "learning_rate": 4.910463653868066e-06, + "loss": 0.567, + "step": 7065 + }, + { + "epoch": 0.5194824290545508, + "grad_norm": 0.7709220051765442, + "learning_rate": 4.9104380962908946e-06, + "loss": 0.5111, + "step": 7066 + }, + { + "epoch": 0.5195559476547567, + "grad_norm": 0.8754871487617493, + "learning_rate": 4.91041253513315e-06, + "loss": 0.5607, + "step": 7067 + }, + { + "epoch": 0.5196294662549625, + "grad_norm": 0.8467587232589722, + "learning_rate": 4.910386970394869e-06, + "loss": 0.5192, + "step": 7068 + }, + { + "epoch": 0.5197029848551683, + "grad_norm": 0.8763861060142517, + "learning_rate": 4.910361402076091e-06, + "loss": 0.5886, + "step": 7069 + }, + { + "epoch": 0.5197765034553742, + "grad_norm": 0.822246789932251, + "learning_rate": 4.910335830176852e-06, + "loss": 0.5547, + "step": 7070 + }, + { + "epoch": 0.5198500220555801, + "grad_norm": 1.0203418731689453, + "learning_rate": 4.9103102546971915e-06, + "loss": 0.5892, + "step": 7071 + }, + { + "epoch": 0.5199235406557859, + "grad_norm": 0.8443164229393005, + "learning_rate": 4.910284675637147e-06, + "loss": 0.5632, + "step": 7072 + }, + { + "epoch": 0.5199970592559917, + "grad_norm": 0.80675208568573, + "learning_rate": 4.910259092996757e-06, + "loss": 0.5186, + "step": 7073 + }, + { + "epoch": 0.5200705778561976, + "grad_norm": 0.898744523525238, + "learning_rate": 4.910233506776059e-06, + "loss": 0.6106, + "step": 7074 + }, + { + "epoch": 0.5201440964564035, + "grad_norm": 0.8389249444007874, + "learning_rate": 4.91020791697509e-06, + "loss": 0.5417, + "step": 7075 + }, + { + "epoch": 0.5202176150566094, + "grad_norm": 0.8600838780403137, + "learning_rate": 4.910182323593891e-06, + "loss": 0.5468, + "step": 7076 + }, + { + "epoch": 0.5202911336568151, + "grad_norm": 0.8384804129600525, + "learning_rate": 4.910156726632497e-06, + "loss": 0.5053, + "step": 7077 + }, + { + "epoch": 0.520364652257021, + "grad_norm": 0.8851485848426819, + "learning_rate": 4.910131126090948e-06, + "loss": 0.5169, + "step": 7078 + }, + { + "epoch": 0.5204381708572269, + "grad_norm": 0.84156733751297, + "learning_rate": 4.910105521969281e-06, + "loss": 0.5209, + "step": 7079 + }, + { + "epoch": 0.5205116894574328, + "grad_norm": 0.8350622653961182, + "learning_rate": 4.910079914267533e-06, + "loss": 0.5558, + "step": 7080 + }, + { + "epoch": 0.5205852080576386, + "grad_norm": 0.853598415851593, + "learning_rate": 4.910054302985744e-06, + "loss": 0.5611, + "step": 7081 + }, + { + "epoch": 0.5206587266578444, + "grad_norm": 0.8635367751121521, + "learning_rate": 4.910028688123952e-06, + "loss": 0.5412, + "step": 7082 + }, + { + "epoch": 0.5207322452580503, + "grad_norm": 0.8377785682678223, + "learning_rate": 4.910003069682193e-06, + "loss": 0.5397, + "step": 7083 + }, + { + "epoch": 0.5208057638582562, + "grad_norm": 0.827852189540863, + "learning_rate": 4.9099774476605074e-06, + "loss": 0.5449, + "step": 7084 + }, + { + "epoch": 0.520879282458462, + "grad_norm": 0.8469928503036499, + "learning_rate": 4.909951822058933e-06, + "loss": 0.5439, + "step": 7085 + }, + { + "epoch": 0.5209528010586678, + "grad_norm": 0.852078378200531, + "learning_rate": 4.909926192877505e-06, + "loss": 0.5788, + "step": 7086 + }, + { + "epoch": 0.5210263196588737, + "grad_norm": 0.8849294781684875, + "learning_rate": 4.909900560116265e-06, + "loss": 0.5533, + "step": 7087 + }, + { + "epoch": 0.5210998382590796, + "grad_norm": 0.8569811582565308, + "learning_rate": 4.909874923775249e-06, + "loss": 0.5721, + "step": 7088 + }, + { + "epoch": 0.5211733568592855, + "grad_norm": 0.8541024923324585, + "learning_rate": 4.909849283854497e-06, + "loss": 0.5538, + "step": 7089 + }, + { + "epoch": 0.5212468754594912, + "grad_norm": 0.8427575826644897, + "learning_rate": 4.909823640354045e-06, + "loss": 0.5259, + "step": 7090 + }, + { + "epoch": 0.5213203940596971, + "grad_norm": 0.8888862729072571, + "learning_rate": 4.909797993273932e-06, + "loss": 0.5673, + "step": 7091 + }, + { + "epoch": 0.521393912659903, + "grad_norm": 0.8167763352394104, + "learning_rate": 4.909772342614196e-06, + "loss": 0.5529, + "step": 7092 + }, + { + "epoch": 0.5214674312601089, + "grad_norm": 0.90582674741745, + "learning_rate": 4.909746688374875e-06, + "loss": 0.6036, + "step": 7093 + }, + { + "epoch": 0.5215409498603146, + "grad_norm": 0.8387399911880493, + "learning_rate": 4.909721030556008e-06, + "loss": 0.5505, + "step": 7094 + }, + { + "epoch": 0.5216144684605205, + "grad_norm": 0.8726805448532104, + "learning_rate": 4.909695369157631e-06, + "loss": 0.5736, + "step": 7095 + }, + { + "epoch": 0.5216879870607264, + "grad_norm": 0.8233128190040588, + "learning_rate": 4.909669704179784e-06, + "loss": 0.5235, + "step": 7096 + }, + { + "epoch": 0.5217615056609323, + "grad_norm": 0.8268699645996094, + "learning_rate": 4.909644035622506e-06, + "loss": 0.5285, + "step": 7097 + }, + { + "epoch": 0.521835024261138, + "grad_norm": 0.8327262997627258, + "learning_rate": 4.909618363485832e-06, + "loss": 0.5393, + "step": 7098 + }, + { + "epoch": 0.5219085428613439, + "grad_norm": 0.8432707190513611, + "learning_rate": 4.909592687769803e-06, + "loss": 0.5418, + "step": 7099 + }, + { + "epoch": 0.5219820614615498, + "grad_norm": 0.857673168182373, + "learning_rate": 4.9095670084744555e-06, + "loss": 0.5211, + "step": 7100 + }, + { + "epoch": 0.5220555800617557, + "grad_norm": 0.8985885381698608, + "learning_rate": 4.909541325599828e-06, + "loss": 0.623, + "step": 7101 + }, + { + "epoch": 0.5221290986619614, + "grad_norm": 0.8451283574104309, + "learning_rate": 4.909515639145959e-06, + "loss": 0.5872, + "step": 7102 + }, + { + "epoch": 0.5222026172621673, + "grad_norm": 0.8320543169975281, + "learning_rate": 4.909489949112887e-06, + "loss": 0.5134, + "step": 7103 + }, + { + "epoch": 0.5222761358623732, + "grad_norm": 0.82646244764328, + "learning_rate": 4.909464255500648e-06, + "loss": 0.5526, + "step": 7104 + }, + { + "epoch": 0.5223496544625791, + "grad_norm": 0.8454843163490295, + "learning_rate": 4.909438558309283e-06, + "loss": 0.5493, + "step": 7105 + }, + { + "epoch": 0.5224231730627849, + "grad_norm": 0.8438200950622559, + "learning_rate": 4.909412857538829e-06, + "loss": 0.5285, + "step": 7106 + }, + { + "epoch": 0.5224966916629907, + "grad_norm": 0.889375627040863, + "learning_rate": 4.909387153189324e-06, + "loss": 0.5561, + "step": 7107 + }, + { + "epoch": 0.5225702102631966, + "grad_norm": 0.7799162864685059, + "learning_rate": 4.909361445260806e-06, + "loss": 0.5237, + "step": 7108 + }, + { + "epoch": 0.5226437288634025, + "grad_norm": 0.8206755518913269, + "learning_rate": 4.909335733753314e-06, + "loss": 0.5436, + "step": 7109 + }, + { + "epoch": 0.5227172474636083, + "grad_norm": 0.908315896987915, + "learning_rate": 4.909310018666885e-06, + "loss": 0.6016, + "step": 7110 + }, + { + "epoch": 0.5227907660638141, + "grad_norm": 0.8344796895980835, + "learning_rate": 4.9092843000015585e-06, + "loss": 0.5777, + "step": 7111 + }, + { + "epoch": 0.52286428466402, + "grad_norm": 0.9054419994354248, + "learning_rate": 4.909258577757372e-06, + "loss": 0.5611, + "step": 7112 + }, + { + "epoch": 0.5229378032642259, + "grad_norm": 0.9090638160705566, + "learning_rate": 4.909232851934363e-06, + "loss": 0.577, + "step": 7113 + }, + { + "epoch": 0.5230113218644317, + "grad_norm": 0.8122974634170532, + "learning_rate": 4.9092071225325725e-06, + "loss": 0.5081, + "step": 7114 + }, + { + "epoch": 0.5230848404646375, + "grad_norm": 0.7990195155143738, + "learning_rate": 4.909181389552035e-06, + "loss": 0.519, + "step": 7115 + }, + { + "epoch": 0.5231583590648434, + "grad_norm": 0.8459122776985168, + "learning_rate": 4.909155652992791e-06, + "loss": 0.5363, + "step": 7116 + }, + { + "epoch": 0.5232318776650493, + "grad_norm": 0.8306003212928772, + "learning_rate": 4.909129912854878e-06, + "loss": 0.5436, + "step": 7117 + }, + { + "epoch": 0.5233053962652551, + "grad_norm": 0.846583366394043, + "learning_rate": 4.909104169138335e-06, + "loss": 0.5162, + "step": 7118 + }, + { + "epoch": 0.523378914865461, + "grad_norm": 0.8183870315551758, + "learning_rate": 4.9090784218432e-06, + "loss": 0.5329, + "step": 7119 + }, + { + "epoch": 0.5234524334656668, + "grad_norm": 0.8255444765090942, + "learning_rate": 4.9090526709695094e-06, + "loss": 0.4965, + "step": 7120 + }, + { + "epoch": 0.5235259520658727, + "grad_norm": 0.8684021234512329, + "learning_rate": 4.909026916517304e-06, + "loss": 0.5841, + "step": 7121 + }, + { + "epoch": 0.5235994706660785, + "grad_norm": 0.8401899337768555, + "learning_rate": 4.9090011584866205e-06, + "loss": 0.5683, + "step": 7122 + }, + { + "epoch": 0.5236729892662844, + "grad_norm": 0.858308732509613, + "learning_rate": 4.908975396877498e-06, + "loss": 0.5434, + "step": 7123 + }, + { + "epoch": 0.5237465078664902, + "grad_norm": 0.8518227934837341, + "learning_rate": 4.908949631689975e-06, + "loss": 0.5972, + "step": 7124 + }, + { + "epoch": 0.5238200264666961, + "grad_norm": 0.887390673160553, + "learning_rate": 4.9089238629240885e-06, + "loss": 0.5861, + "step": 7125 + }, + { + "epoch": 0.5238935450669019, + "grad_norm": 0.8181896805763245, + "learning_rate": 4.908898090579878e-06, + "loss": 0.5438, + "step": 7126 + }, + { + "epoch": 0.5239670636671078, + "grad_norm": 0.8551415801048279, + "learning_rate": 4.9088723146573804e-06, + "loss": 0.5462, + "step": 7127 + }, + { + "epoch": 0.5240405822673136, + "grad_norm": 0.8058627247810364, + "learning_rate": 4.908846535156636e-06, + "loss": 0.5651, + "step": 7128 + }, + { + "epoch": 0.5241141008675195, + "grad_norm": 0.8789483308792114, + "learning_rate": 4.908820752077681e-06, + "loss": 0.5364, + "step": 7129 + }, + { + "epoch": 0.5241876194677253, + "grad_norm": 0.8860745429992676, + "learning_rate": 4.908794965420556e-06, + "loss": 0.548, + "step": 7130 + }, + { + "epoch": 0.5242611380679312, + "grad_norm": 0.7948374152183533, + "learning_rate": 4.908769175185296e-06, + "loss": 0.5195, + "step": 7131 + }, + { + "epoch": 0.5243346566681371, + "grad_norm": 0.8105024695396423, + "learning_rate": 4.908743381371943e-06, + "loss": 0.5385, + "step": 7132 + }, + { + "epoch": 0.5244081752683429, + "grad_norm": 0.8185162544250488, + "learning_rate": 4.908717583980533e-06, + "loss": 0.511, + "step": 7133 + }, + { + "epoch": 0.5244816938685487, + "grad_norm": 0.8534353375434875, + "learning_rate": 4.908691783011105e-06, + "loss": 0.5055, + "step": 7134 + }, + { + "epoch": 0.5245552124687546, + "grad_norm": 0.8662156462669373, + "learning_rate": 4.908665978463697e-06, + "loss": 0.5285, + "step": 7135 + }, + { + "epoch": 0.5246287310689605, + "grad_norm": 0.9770722985267639, + "learning_rate": 4.908640170338348e-06, + "loss": 0.5924, + "step": 7136 + }, + { + "epoch": 0.5247022496691663, + "grad_norm": 0.8238306641578674, + "learning_rate": 4.908614358635096e-06, + "loss": 0.5287, + "step": 7137 + }, + { + "epoch": 0.5247757682693721, + "grad_norm": 0.8458572626113892, + "learning_rate": 4.908588543353979e-06, + "loss": 0.5806, + "step": 7138 + }, + { + "epoch": 0.524849286869578, + "grad_norm": 0.8530046939849854, + "learning_rate": 4.9085627244950355e-06, + "loss": 0.5471, + "step": 7139 + }, + { + "epoch": 0.5249228054697839, + "grad_norm": 0.8462518453598022, + "learning_rate": 4.908536902058304e-06, + "loss": 0.5314, + "step": 7140 + }, + { + "epoch": 0.5249963240699898, + "grad_norm": 0.8648687601089478, + "learning_rate": 4.9085110760438226e-06, + "loss": 0.5613, + "step": 7141 + }, + { + "epoch": 0.5250698426701955, + "grad_norm": 0.8061139583587646, + "learning_rate": 4.90848524645163e-06, + "loss": 0.5176, + "step": 7142 + }, + { + "epoch": 0.5251433612704014, + "grad_norm": 0.8464365601539612, + "learning_rate": 4.908459413281764e-06, + "loss": 0.5823, + "step": 7143 + }, + { + "epoch": 0.5252168798706073, + "grad_norm": 0.9102874994277954, + "learning_rate": 4.908433576534265e-06, + "loss": 0.6095, + "step": 7144 + }, + { + "epoch": 0.5252903984708132, + "grad_norm": 0.8193724751472473, + "learning_rate": 4.9084077362091684e-06, + "loss": 0.5317, + "step": 7145 + }, + { + "epoch": 0.5253639170710189, + "grad_norm": 0.828357458114624, + "learning_rate": 4.908381892306514e-06, + "loss": 0.5484, + "step": 7146 + }, + { + "epoch": 0.5254374356712248, + "grad_norm": 0.8217025399208069, + "learning_rate": 4.908356044826341e-06, + "loss": 0.5503, + "step": 7147 + }, + { + "epoch": 0.5255109542714307, + "grad_norm": 0.817631721496582, + "learning_rate": 4.9083301937686864e-06, + "loss": 0.5595, + "step": 7148 + }, + { + "epoch": 0.5255844728716366, + "grad_norm": 0.842433750629425, + "learning_rate": 4.90830433913359e-06, + "loss": 0.535, + "step": 7149 + }, + { + "epoch": 0.5256579914718423, + "grad_norm": 0.8580867052078247, + "learning_rate": 4.908278480921088e-06, + "loss": 0.5608, + "step": 7150 + }, + { + "epoch": 0.5257315100720482, + "grad_norm": 0.836259663105011, + "learning_rate": 4.908252619131222e-06, + "loss": 0.5539, + "step": 7151 + }, + { + "epoch": 0.5258050286722541, + "grad_norm": 0.8076220154762268, + "learning_rate": 4.908226753764027e-06, + "loss": 0.5289, + "step": 7152 + }, + { + "epoch": 0.52587854727246, + "grad_norm": 0.8149564266204834, + "learning_rate": 4.908200884819544e-06, + "loss": 0.5496, + "step": 7153 + }, + { + "epoch": 0.5259520658726657, + "grad_norm": 0.8476876020431519, + "learning_rate": 4.9081750122978094e-06, + "loss": 0.565, + "step": 7154 + }, + { + "epoch": 0.5260255844728716, + "grad_norm": 0.8922572731971741, + "learning_rate": 4.908149136198863e-06, + "loss": 0.5454, + "step": 7155 + }, + { + "epoch": 0.5260991030730775, + "grad_norm": 0.8729925751686096, + "learning_rate": 4.908123256522744e-06, + "loss": 0.5623, + "step": 7156 + }, + { + "epoch": 0.5261726216732834, + "grad_norm": 0.8263548612594604, + "learning_rate": 4.908097373269489e-06, + "loss": 0.5424, + "step": 7157 + }, + { + "epoch": 0.5262461402734891, + "grad_norm": 0.837825357913971, + "learning_rate": 4.908071486439136e-06, + "loss": 0.4994, + "step": 7158 + }, + { + "epoch": 0.526319658873695, + "grad_norm": 0.8038219809532166, + "learning_rate": 4.9080455960317265e-06, + "loss": 0.552, + "step": 7159 + }, + { + "epoch": 0.5263931774739009, + "grad_norm": 0.8601519465446472, + "learning_rate": 4.908019702047297e-06, + "loss": 0.5464, + "step": 7160 + }, + { + "epoch": 0.5264666960741068, + "grad_norm": 0.8390891551971436, + "learning_rate": 4.9079938044858854e-06, + "loss": 0.5402, + "step": 7161 + }, + { + "epoch": 0.5265402146743126, + "grad_norm": 0.8508782982826233, + "learning_rate": 4.907967903347531e-06, + "loss": 0.5738, + "step": 7162 + }, + { + "epoch": 0.5266137332745184, + "grad_norm": 0.8759168386459351, + "learning_rate": 4.907941998632272e-06, + "loss": 0.5758, + "step": 7163 + }, + { + "epoch": 0.5266872518747243, + "grad_norm": 0.852066159248352, + "learning_rate": 4.907916090340147e-06, + "loss": 0.5798, + "step": 7164 + }, + { + "epoch": 0.5267607704749302, + "grad_norm": 0.8220332264900208, + "learning_rate": 4.907890178471195e-06, + "loss": 0.5193, + "step": 7165 + }, + { + "epoch": 0.526834289075136, + "grad_norm": 0.8836582899093628, + "learning_rate": 4.907864263025454e-06, + "loss": 0.5245, + "step": 7166 + }, + { + "epoch": 0.5269078076753418, + "grad_norm": 0.7851231694221497, + "learning_rate": 4.907838344002962e-06, + "loss": 0.5288, + "step": 7167 + }, + { + "epoch": 0.5269813262755477, + "grad_norm": 0.8421019315719604, + "learning_rate": 4.9078124214037585e-06, + "loss": 0.5236, + "step": 7168 + }, + { + "epoch": 0.5270548448757536, + "grad_norm": 0.8570948839187622, + "learning_rate": 4.907786495227881e-06, + "loss": 0.5848, + "step": 7169 + }, + { + "epoch": 0.5271283634759594, + "grad_norm": 0.8082157373428345, + "learning_rate": 4.907760565475368e-06, + "loss": 0.4963, + "step": 7170 + }, + { + "epoch": 0.5272018820761653, + "grad_norm": 0.8514195084571838, + "learning_rate": 4.90773463214626e-06, + "loss": 0.5473, + "step": 7171 + }, + { + "epoch": 0.5272754006763711, + "grad_norm": 0.8573423027992249, + "learning_rate": 4.9077086952405924e-06, + "loss": 0.5375, + "step": 7172 + }, + { + "epoch": 0.527348919276577, + "grad_norm": 0.8170181512832642, + "learning_rate": 4.907682754758407e-06, + "loss": 0.5802, + "step": 7173 + }, + { + "epoch": 0.5274224378767828, + "grad_norm": 0.8627784252166748, + "learning_rate": 4.907656810699739e-06, + "loss": 0.5551, + "step": 7174 + }, + { + "epoch": 0.5274959564769887, + "grad_norm": 0.8816397190093994, + "learning_rate": 4.90763086306463e-06, + "loss": 0.5517, + "step": 7175 + }, + { + "epoch": 0.5275694750771945, + "grad_norm": 0.8001824617385864, + "learning_rate": 4.907604911853117e-06, + "loss": 0.5299, + "step": 7176 + }, + { + "epoch": 0.5276429936774004, + "grad_norm": 0.8771700263023376, + "learning_rate": 4.9075789570652385e-06, + "loss": 0.5336, + "step": 7177 + }, + { + "epoch": 0.5277165122776062, + "grad_norm": 0.8145037293434143, + "learning_rate": 4.9075529987010335e-06, + "loss": 0.5048, + "step": 7178 + }, + { + "epoch": 0.5277900308778121, + "grad_norm": 0.812615692615509, + "learning_rate": 4.90752703676054e-06, + "loss": 0.5302, + "step": 7179 + }, + { + "epoch": 0.527863549478018, + "grad_norm": 0.8668370246887207, + "learning_rate": 4.9075010712437975e-06, + "loss": 0.5949, + "step": 7180 + }, + { + "epoch": 0.5279370680782238, + "grad_norm": 0.8180181980133057, + "learning_rate": 4.9074751021508436e-06, + "loss": 0.5371, + "step": 7181 + }, + { + "epoch": 0.5280105866784296, + "grad_norm": 0.9070309996604919, + "learning_rate": 4.907449129481717e-06, + "loss": 0.546, + "step": 7182 + }, + { + "epoch": 0.5280841052786355, + "grad_norm": 0.8474376201629639, + "learning_rate": 4.907423153236457e-06, + "loss": 0.5403, + "step": 7183 + }, + { + "epoch": 0.5281576238788414, + "grad_norm": 0.792716920375824, + "learning_rate": 4.907397173415102e-06, + "loss": 0.4697, + "step": 7184 + }, + { + "epoch": 0.5282311424790472, + "grad_norm": 0.8641507625579834, + "learning_rate": 4.907371190017689e-06, + "loss": 0.5715, + "step": 7185 + }, + { + "epoch": 0.528304661079253, + "grad_norm": 0.8230507969856262, + "learning_rate": 4.907345203044259e-06, + "loss": 0.5445, + "step": 7186 + }, + { + "epoch": 0.5283781796794589, + "grad_norm": 0.8422389626502991, + "learning_rate": 4.90731921249485e-06, + "loss": 0.5842, + "step": 7187 + }, + { + "epoch": 0.5284516982796648, + "grad_norm": 0.8534636497497559, + "learning_rate": 4.907293218369499e-06, + "loss": 0.5339, + "step": 7188 + }, + { + "epoch": 0.5285252168798706, + "grad_norm": 0.8321546912193298, + "learning_rate": 4.907267220668246e-06, + "loss": 0.5515, + "step": 7189 + }, + { + "epoch": 0.5285987354800764, + "grad_norm": 0.7977197766304016, + "learning_rate": 4.907241219391129e-06, + "loss": 0.5721, + "step": 7190 + }, + { + "epoch": 0.5286722540802823, + "grad_norm": 0.8746935725212097, + "learning_rate": 4.907215214538188e-06, + "loss": 0.6132, + "step": 7191 + }, + { + "epoch": 0.5287457726804882, + "grad_norm": 0.8347777128219604, + "learning_rate": 4.90718920610946e-06, + "loss": 0.5565, + "step": 7192 + }, + { + "epoch": 0.528819291280694, + "grad_norm": 0.8556181788444519, + "learning_rate": 4.907163194104985e-06, + "loss": 0.5233, + "step": 7193 + }, + { + "epoch": 0.5288928098808998, + "grad_norm": 0.8644651770591736, + "learning_rate": 4.9071371785247996e-06, + "loss": 0.5327, + "step": 7194 + }, + { + "epoch": 0.5289663284811057, + "grad_norm": 0.876439094543457, + "learning_rate": 4.907111159368945e-06, + "loss": 0.5052, + "step": 7195 + }, + { + "epoch": 0.5290398470813116, + "grad_norm": 0.8525221943855286, + "learning_rate": 4.907085136637457e-06, + "loss": 0.52, + "step": 7196 + }, + { + "epoch": 0.5291133656815175, + "grad_norm": 0.8145563006401062, + "learning_rate": 4.907059110330377e-06, + "loss": 0.5319, + "step": 7197 + }, + { + "epoch": 0.5291868842817232, + "grad_norm": 0.8982133269309998, + "learning_rate": 4.907033080447743e-06, + "loss": 0.6092, + "step": 7198 + }, + { + "epoch": 0.5292604028819291, + "grad_norm": 0.7734351754188538, + "learning_rate": 4.907007046989592e-06, + "loss": 0.5472, + "step": 7199 + }, + { + "epoch": 0.529333921482135, + "grad_norm": 0.8646948337554932, + "learning_rate": 4.906981009955965e-06, + "loss": 0.6043, + "step": 7200 + }, + { + "epoch": 0.5294074400823409, + "grad_norm": 0.8671430349349976, + "learning_rate": 4.906954969346898e-06, + "loss": 0.5398, + "step": 7201 + }, + { + "epoch": 0.5294809586825466, + "grad_norm": 0.8285473585128784, + "learning_rate": 4.906928925162432e-06, + "loss": 0.5586, + "step": 7202 + }, + { + "epoch": 0.5295544772827525, + "grad_norm": 0.830195426940918, + "learning_rate": 4.9069028774026055e-06, + "loss": 0.52, + "step": 7203 + }, + { + "epoch": 0.5296279958829584, + "grad_norm": 0.8335383534431458, + "learning_rate": 4.906876826067456e-06, + "loss": 0.5263, + "step": 7204 + }, + { + "epoch": 0.5297015144831643, + "grad_norm": 0.8870127201080322, + "learning_rate": 4.906850771157023e-06, + "loss": 0.6017, + "step": 7205 + }, + { + "epoch": 0.52977503308337, + "grad_norm": 0.8015517592430115, + "learning_rate": 4.906824712671344e-06, + "loss": 0.4885, + "step": 7206 + }, + { + "epoch": 0.5298485516835759, + "grad_norm": 0.8418801426887512, + "learning_rate": 4.906798650610459e-06, + "loss": 0.5474, + "step": 7207 + }, + { + "epoch": 0.5299220702837818, + "grad_norm": 0.8720914721488953, + "learning_rate": 4.906772584974407e-06, + "loss": 0.5628, + "step": 7208 + }, + { + "epoch": 0.5299955888839877, + "grad_norm": 0.8296825885772705, + "learning_rate": 4.906746515763227e-06, + "loss": 0.5183, + "step": 7209 + }, + { + "epoch": 0.5300691074841934, + "grad_norm": 0.8400160670280457, + "learning_rate": 4.906720442976955e-06, + "loss": 0.5304, + "step": 7210 + }, + { + "epoch": 0.5301426260843993, + "grad_norm": 0.8284702301025391, + "learning_rate": 4.906694366615632e-06, + "loss": 0.559, + "step": 7211 + }, + { + "epoch": 0.5302161446846052, + "grad_norm": 0.833324670791626, + "learning_rate": 4.906668286679297e-06, + "loss": 0.5477, + "step": 7212 + }, + { + "epoch": 0.5302896632848111, + "grad_norm": 0.8698702454566956, + "learning_rate": 4.9066422031679874e-06, + "loss": 0.5748, + "step": 7213 + }, + { + "epoch": 0.5303631818850169, + "grad_norm": 0.8287404775619507, + "learning_rate": 4.906616116081743e-06, + "loss": 0.4739, + "step": 7214 + }, + { + "epoch": 0.5304367004852227, + "grad_norm": 0.891847550868988, + "learning_rate": 4.906590025420602e-06, + "loss": 0.5557, + "step": 7215 + }, + { + "epoch": 0.5305102190854286, + "grad_norm": 0.8728455901145935, + "learning_rate": 4.9065639311846035e-06, + "loss": 0.5121, + "step": 7216 + }, + { + "epoch": 0.5305837376856345, + "grad_norm": 0.8431978821754456, + "learning_rate": 4.906537833373785e-06, + "loss": 0.5241, + "step": 7217 + }, + { + "epoch": 0.5306572562858404, + "grad_norm": 0.8517115116119385, + "learning_rate": 4.906511731988188e-06, + "loss": 0.5556, + "step": 7218 + }, + { + "epoch": 0.5307307748860461, + "grad_norm": 0.8351109027862549, + "learning_rate": 4.90648562702785e-06, + "loss": 0.553, + "step": 7219 + }, + { + "epoch": 0.530804293486252, + "grad_norm": 0.8511225581169128, + "learning_rate": 4.9064595184928074e-06, + "loss": 0.5372, + "step": 7220 + }, + { + "epoch": 0.5308778120864579, + "grad_norm": 0.904900312423706, + "learning_rate": 4.9064334063831016e-06, + "loss": 0.5824, + "step": 7221 + }, + { + "epoch": 0.5309513306866638, + "grad_norm": 0.8622738122940063, + "learning_rate": 4.906407290698771e-06, + "loss": 0.58, + "step": 7222 + }, + { + "epoch": 0.5310248492868695, + "grad_norm": 0.8534431457519531, + "learning_rate": 4.906381171439854e-06, + "loss": 0.5474, + "step": 7223 + }, + { + "epoch": 0.5310983678870754, + "grad_norm": 0.8542900681495667, + "learning_rate": 4.90635504860639e-06, + "loss": 0.565, + "step": 7224 + }, + { + "epoch": 0.5311718864872813, + "grad_norm": 0.8538702130317688, + "learning_rate": 4.906328922198417e-06, + "loss": 0.5575, + "step": 7225 + }, + { + "epoch": 0.5312454050874872, + "grad_norm": 0.8156202435493469, + "learning_rate": 4.906302792215973e-06, + "loss": 0.5162, + "step": 7226 + }, + { + "epoch": 0.531318923687693, + "grad_norm": 0.8542548418045044, + "learning_rate": 4.906276658659099e-06, + "loss": 0.5616, + "step": 7227 + }, + { + "epoch": 0.5313924422878988, + "grad_norm": 0.9077259302139282, + "learning_rate": 4.906250521527833e-06, + "loss": 0.5798, + "step": 7228 + }, + { + "epoch": 0.5314659608881047, + "grad_norm": 0.7935370802879333, + "learning_rate": 4.906224380822214e-06, + "loss": 0.5452, + "step": 7229 + }, + { + "epoch": 0.5315394794883106, + "grad_norm": 0.8390198945999146, + "learning_rate": 4.9061982365422795e-06, + "loss": 0.5066, + "step": 7230 + }, + { + "epoch": 0.5316129980885164, + "grad_norm": 0.8687477707862854, + "learning_rate": 4.9061720886880695e-06, + "loss": 0.6028, + "step": 7231 + }, + { + "epoch": 0.5316865166887222, + "grad_norm": 0.8997647166252136, + "learning_rate": 4.906145937259623e-06, + "loss": 0.5583, + "step": 7232 + }, + { + "epoch": 0.5317600352889281, + "grad_norm": 0.84250408411026, + "learning_rate": 4.906119782256978e-06, + "loss": 0.5375, + "step": 7233 + }, + { + "epoch": 0.531833553889134, + "grad_norm": 0.8241019248962402, + "learning_rate": 4.906093623680174e-06, + "loss": 0.5047, + "step": 7234 + }, + { + "epoch": 0.5319070724893398, + "grad_norm": 0.8626338243484497, + "learning_rate": 4.906067461529251e-06, + "loss": 0.5726, + "step": 7235 + }, + { + "epoch": 0.5319805910895457, + "grad_norm": 0.8502580523490906, + "learning_rate": 4.906041295804245e-06, + "loss": 0.5718, + "step": 7236 + }, + { + "epoch": 0.5320541096897515, + "grad_norm": 0.8329111337661743, + "learning_rate": 4.906015126505196e-06, + "loss": 0.5013, + "step": 7237 + }, + { + "epoch": 0.5321276282899574, + "grad_norm": 0.924628496170044, + "learning_rate": 4.905988953632143e-06, + "loss": 0.6079, + "step": 7238 + }, + { + "epoch": 0.5322011468901632, + "grad_norm": 0.8268237113952637, + "learning_rate": 4.905962777185127e-06, + "loss": 0.5082, + "step": 7239 + }, + { + "epoch": 0.5322746654903691, + "grad_norm": 0.7899359464645386, + "learning_rate": 4.905936597164184e-06, + "loss": 0.5142, + "step": 7240 + }, + { + "epoch": 0.5323481840905749, + "grad_norm": 0.8560640215873718, + "learning_rate": 4.905910413569355e-06, + "loss": 0.5119, + "step": 7241 + }, + { + "epoch": 0.5324217026907808, + "grad_norm": 0.8205233812332153, + "learning_rate": 4.905884226400677e-06, + "loss": 0.5424, + "step": 7242 + }, + { + "epoch": 0.5324952212909866, + "grad_norm": 0.8833664655685425, + "learning_rate": 4.905858035658189e-06, + "loss": 0.5642, + "step": 7243 + }, + { + "epoch": 0.5325687398911925, + "grad_norm": 0.82571941614151, + "learning_rate": 4.905831841341931e-06, + "loss": 0.5771, + "step": 7244 + }, + { + "epoch": 0.5326422584913983, + "grad_norm": 0.8271648287773132, + "learning_rate": 4.905805643451942e-06, + "loss": 0.5272, + "step": 7245 + }, + { + "epoch": 0.5327157770916042, + "grad_norm": 0.7846462726593018, + "learning_rate": 4.90577944198826e-06, + "loss": 0.4848, + "step": 7246 + }, + { + "epoch": 0.53278929569181, + "grad_norm": 0.899658203125, + "learning_rate": 4.905753236950926e-06, + "loss": 0.5285, + "step": 7247 + }, + { + "epoch": 0.5328628142920159, + "grad_norm": 0.8447715640068054, + "learning_rate": 4.905727028339975e-06, + "loss": 0.533, + "step": 7248 + }, + { + "epoch": 0.5329363328922218, + "grad_norm": 0.8563879728317261, + "learning_rate": 4.905700816155449e-06, + "loss": 0.5709, + "step": 7249 + }, + { + "epoch": 0.5330098514924276, + "grad_norm": 0.8082693815231323, + "learning_rate": 4.905674600397387e-06, + "loss": 0.513, + "step": 7250 + }, + { + "epoch": 0.5330833700926334, + "grad_norm": 0.8476765155792236, + "learning_rate": 4.9056483810658265e-06, + "loss": 0.5213, + "step": 7251 + }, + { + "epoch": 0.5331568886928393, + "grad_norm": 0.8460866808891296, + "learning_rate": 4.905622158160807e-06, + "loss": 0.5783, + "step": 7252 + }, + { + "epoch": 0.5332304072930452, + "grad_norm": 0.8367021679878235, + "learning_rate": 4.905595931682367e-06, + "loss": 0.591, + "step": 7253 + }, + { + "epoch": 0.533303925893251, + "grad_norm": 0.878348708152771, + "learning_rate": 4.905569701630547e-06, + "loss": 0.539, + "step": 7254 + }, + { + "epoch": 0.5333774444934568, + "grad_norm": 0.8336567878723145, + "learning_rate": 4.905543468005383e-06, + "loss": 0.5187, + "step": 7255 + }, + { + "epoch": 0.5334509630936627, + "grad_norm": 0.8673830032348633, + "learning_rate": 4.905517230806919e-06, + "loss": 0.5859, + "step": 7256 + }, + { + "epoch": 0.5335244816938686, + "grad_norm": 0.9015780091285706, + "learning_rate": 4.905490990035189e-06, + "loss": 0.5505, + "step": 7257 + }, + { + "epoch": 0.5335980002940744, + "grad_norm": 0.9018529057502747, + "learning_rate": 4.905464745690234e-06, + "loss": 0.584, + "step": 7258 + }, + { + "epoch": 0.5336715188942802, + "grad_norm": 0.8972885012626648, + "learning_rate": 4.905438497772092e-06, + "loss": 0.586, + "step": 7259 + }, + { + "epoch": 0.5337450374944861, + "grad_norm": 0.8709143400192261, + "learning_rate": 4.905412246280804e-06, + "loss": 0.5732, + "step": 7260 + }, + { + "epoch": 0.533818556094692, + "grad_norm": 0.8707026243209839, + "learning_rate": 4.905385991216408e-06, + "loss": 0.5821, + "step": 7261 + }, + { + "epoch": 0.5338920746948979, + "grad_norm": 0.8452028036117554, + "learning_rate": 4.9053597325789425e-06, + "loss": 0.5699, + "step": 7262 + }, + { + "epoch": 0.5339655932951036, + "grad_norm": 0.8892844915390015, + "learning_rate": 4.9053334703684466e-06, + "loss": 0.5277, + "step": 7263 + }, + { + "epoch": 0.5340391118953095, + "grad_norm": 0.8341946601867676, + "learning_rate": 4.905307204584959e-06, + "loss": 0.5622, + "step": 7264 + }, + { + "epoch": 0.5341126304955154, + "grad_norm": 0.8308964371681213, + "learning_rate": 4.9052809352285204e-06, + "loss": 0.5321, + "step": 7265 + }, + { + "epoch": 0.5341861490957213, + "grad_norm": 0.9225189089775085, + "learning_rate": 4.905254662299168e-06, + "loss": 0.6322, + "step": 7266 + }, + { + "epoch": 0.534259667695927, + "grad_norm": 0.8216723203659058, + "learning_rate": 4.905228385796942e-06, + "loss": 0.5049, + "step": 7267 + }, + { + "epoch": 0.5343331862961329, + "grad_norm": 0.8699127435684204, + "learning_rate": 4.905202105721881e-06, + "loss": 0.5428, + "step": 7268 + }, + { + "epoch": 0.5344067048963388, + "grad_norm": 0.8231623768806458, + "learning_rate": 4.905175822074023e-06, + "loss": 0.5369, + "step": 7269 + }, + { + "epoch": 0.5344802234965447, + "grad_norm": 0.8579986095428467, + "learning_rate": 4.905149534853409e-06, + "loss": 0.5335, + "step": 7270 + }, + { + "epoch": 0.5345537420967504, + "grad_norm": 0.9144521951675415, + "learning_rate": 4.905123244060077e-06, + "loss": 0.5342, + "step": 7271 + }, + { + "epoch": 0.5346272606969563, + "grad_norm": 0.9030277729034424, + "learning_rate": 4.905096949694066e-06, + "loss": 0.5422, + "step": 7272 + }, + { + "epoch": 0.5347007792971622, + "grad_norm": 0.8322221636772156, + "learning_rate": 4.905070651755415e-06, + "loss": 0.4851, + "step": 7273 + }, + { + "epoch": 0.5347742978973681, + "grad_norm": 0.7946834564208984, + "learning_rate": 4.905044350244162e-06, + "loss": 0.5346, + "step": 7274 + }, + { + "epoch": 0.5348478164975738, + "grad_norm": 0.8457018733024597, + "learning_rate": 4.905018045160349e-06, + "loss": 0.5565, + "step": 7275 + }, + { + "epoch": 0.5349213350977797, + "grad_norm": 0.8412285447120667, + "learning_rate": 4.9049917365040135e-06, + "loss": 0.5728, + "step": 7276 + }, + { + "epoch": 0.5349948536979856, + "grad_norm": 0.8933072686195374, + "learning_rate": 4.904965424275195e-06, + "loss": 0.55, + "step": 7277 + }, + { + "epoch": 0.5350683722981915, + "grad_norm": 0.8671562671661377, + "learning_rate": 4.9049391084739296e-06, + "loss": 0.5247, + "step": 7278 + }, + { + "epoch": 0.5351418908983973, + "grad_norm": 0.8776398301124573, + "learning_rate": 4.90491278910026e-06, + "loss": 0.5375, + "step": 7279 + }, + { + "epoch": 0.5352154094986031, + "grad_norm": 0.87315434217453, + "learning_rate": 4.904886466154225e-06, + "loss": 0.6167, + "step": 7280 + }, + { + "epoch": 0.535288928098809, + "grad_norm": 0.8123823404312134, + "learning_rate": 4.904860139635862e-06, + "loss": 0.5283, + "step": 7281 + }, + { + "epoch": 0.5353624466990149, + "grad_norm": 0.7896045446395874, + "learning_rate": 4.9048338095452116e-06, + "loss": 0.5749, + "step": 7282 + }, + { + "epoch": 0.5354359652992207, + "grad_norm": 0.9044662117958069, + "learning_rate": 4.904807475882312e-06, + "loss": 0.5941, + "step": 7283 + }, + { + "epoch": 0.5355094838994265, + "grad_norm": 0.7985972166061401, + "learning_rate": 4.904781138647202e-06, + "loss": 0.4923, + "step": 7284 + }, + { + "epoch": 0.5355830024996324, + "grad_norm": 0.8279371857643127, + "learning_rate": 4.9047547978399215e-06, + "loss": 0.567, + "step": 7285 + }, + { + "epoch": 0.5356565210998383, + "grad_norm": 0.874261200428009, + "learning_rate": 4.90472845346051e-06, + "loss": 0.5416, + "step": 7286 + }, + { + "epoch": 0.5357300397000441, + "grad_norm": 0.8253615498542786, + "learning_rate": 4.904702105509005e-06, + "loss": 0.5132, + "step": 7287 + }, + { + "epoch": 0.53580355830025, + "grad_norm": 0.8368448615074158, + "learning_rate": 4.9046757539854486e-06, + "loss": 0.5294, + "step": 7288 + }, + { + "epoch": 0.5358770769004558, + "grad_norm": 0.8396816253662109, + "learning_rate": 4.904649398889876e-06, + "loss": 0.5474, + "step": 7289 + }, + { + "epoch": 0.5359505955006617, + "grad_norm": 0.8473378419876099, + "learning_rate": 4.904623040222329e-06, + "loss": 0.5579, + "step": 7290 + }, + { + "epoch": 0.5360241141008675, + "grad_norm": 0.7915855050086975, + "learning_rate": 4.904596677982847e-06, + "loss": 0.5118, + "step": 7291 + }, + { + "epoch": 0.5360976327010734, + "grad_norm": 0.8514422178268433, + "learning_rate": 4.904570312171467e-06, + "loss": 0.5875, + "step": 7292 + }, + { + "epoch": 0.5361711513012792, + "grad_norm": 0.8484582304954529, + "learning_rate": 4.904543942788231e-06, + "loss": 0.566, + "step": 7293 + }, + { + "epoch": 0.5362446699014851, + "grad_norm": 0.88576740026474, + "learning_rate": 4.904517569833176e-06, + "loss": 0.5636, + "step": 7294 + }, + { + "epoch": 0.5363181885016909, + "grad_norm": 0.855175256729126, + "learning_rate": 4.904491193306341e-06, + "loss": 0.5498, + "step": 7295 + }, + { + "epoch": 0.5363917071018968, + "grad_norm": 0.8135848045349121, + "learning_rate": 4.9044648132077665e-06, + "loss": 0.5779, + "step": 7296 + }, + { + "epoch": 0.5364652257021026, + "grad_norm": 0.8447319269180298, + "learning_rate": 4.904438429537491e-06, + "loss": 0.5865, + "step": 7297 + }, + { + "epoch": 0.5365387443023085, + "grad_norm": 0.8176134824752808, + "learning_rate": 4.904412042295555e-06, + "loss": 0.569, + "step": 7298 + }, + { + "epoch": 0.5366122629025143, + "grad_norm": 0.9039896130561829, + "learning_rate": 4.904385651481995e-06, + "loss": 0.5771, + "step": 7299 + }, + { + "epoch": 0.5366857815027202, + "grad_norm": 0.891705334186554, + "learning_rate": 4.904359257096853e-06, + "loss": 0.5381, + "step": 7300 + }, + { + "epoch": 0.536759300102926, + "grad_norm": 0.8488550782203674, + "learning_rate": 4.904332859140167e-06, + "loss": 0.5654, + "step": 7301 + }, + { + "epoch": 0.5368328187031319, + "grad_norm": 0.8399471640586853, + "learning_rate": 4.904306457611975e-06, + "loss": 0.5689, + "step": 7302 + }, + { + "epoch": 0.5369063373033377, + "grad_norm": 0.8166784048080444, + "learning_rate": 4.904280052512318e-06, + "loss": 0.5513, + "step": 7303 + }, + { + "epoch": 0.5369798559035436, + "grad_norm": 0.8061221241950989, + "learning_rate": 4.904253643841235e-06, + "loss": 0.4991, + "step": 7304 + }, + { + "epoch": 0.5370533745037495, + "grad_norm": 0.8738861083984375, + "learning_rate": 4.904227231598765e-06, + "loss": 0.5674, + "step": 7305 + }, + { + "epoch": 0.5371268931039553, + "grad_norm": 0.8543864488601685, + "learning_rate": 4.904200815784946e-06, + "loss": 0.5234, + "step": 7306 + }, + { + "epoch": 0.5372004117041611, + "grad_norm": 0.904176652431488, + "learning_rate": 4.90417439639982e-06, + "loss": 0.5471, + "step": 7307 + }, + { + "epoch": 0.537273930304367, + "grad_norm": 0.8756328225135803, + "learning_rate": 4.904147973443423e-06, + "loss": 0.5762, + "step": 7308 + }, + { + "epoch": 0.5373474489045729, + "grad_norm": 0.8437320590019226, + "learning_rate": 4.904121546915796e-06, + "loss": 0.5414, + "step": 7309 + }, + { + "epoch": 0.5374209675047787, + "grad_norm": 0.8291122913360596, + "learning_rate": 4.904095116816979e-06, + "loss": 0.5725, + "step": 7310 + }, + { + "epoch": 0.5374944861049845, + "grad_norm": 0.8195613026618958, + "learning_rate": 4.90406868314701e-06, + "loss": 0.5933, + "step": 7311 + }, + { + "epoch": 0.5375680047051904, + "grad_norm": 0.8973590135574341, + "learning_rate": 4.904042245905929e-06, + "loss": 0.5186, + "step": 7312 + }, + { + "epoch": 0.5376415233053963, + "grad_norm": 0.8155051469802856, + "learning_rate": 4.904015805093775e-06, + "loss": 0.5398, + "step": 7313 + }, + { + "epoch": 0.5377150419056022, + "grad_norm": 0.8015062808990479, + "learning_rate": 4.903989360710586e-06, + "loss": 0.5203, + "step": 7314 + }, + { + "epoch": 0.5377885605058079, + "grad_norm": 0.7793501615524292, + "learning_rate": 4.903962912756402e-06, + "loss": 0.5225, + "step": 7315 + }, + { + "epoch": 0.5378620791060138, + "grad_norm": 0.8582054376602173, + "learning_rate": 4.903936461231264e-06, + "loss": 0.5473, + "step": 7316 + }, + { + "epoch": 0.5379355977062197, + "grad_norm": 0.823205828666687, + "learning_rate": 4.903910006135211e-06, + "loss": 0.5344, + "step": 7317 + }, + { + "epoch": 0.5380091163064256, + "grad_norm": 0.8533278107643127, + "learning_rate": 4.90388354746828e-06, + "loss": 0.5893, + "step": 7318 + }, + { + "epoch": 0.5380826349066313, + "grad_norm": 0.8066454529762268, + "learning_rate": 4.903857085230511e-06, + "loss": 0.5221, + "step": 7319 + }, + { + "epoch": 0.5381561535068372, + "grad_norm": 0.8384581208229065, + "learning_rate": 4.903830619421945e-06, + "loss": 0.5594, + "step": 7320 + }, + { + "epoch": 0.5382296721070431, + "grad_norm": 0.8687387704849243, + "learning_rate": 4.9038041500426206e-06, + "loss": 0.5389, + "step": 7321 + }, + { + "epoch": 0.538303190707249, + "grad_norm": 0.8155416250228882, + "learning_rate": 4.903777677092575e-06, + "loss": 0.5455, + "step": 7322 + }, + { + "epoch": 0.5383767093074547, + "grad_norm": 0.8760262131690979, + "learning_rate": 4.903751200571851e-06, + "loss": 0.5412, + "step": 7323 + }, + { + "epoch": 0.5384502279076606, + "grad_norm": 0.8345479965209961, + "learning_rate": 4.9037247204804846e-06, + "loss": 0.5575, + "step": 7324 + }, + { + "epoch": 0.5385237465078665, + "grad_norm": 0.8650375008583069, + "learning_rate": 4.9036982368185185e-06, + "loss": 0.5723, + "step": 7325 + }, + { + "epoch": 0.5385972651080724, + "grad_norm": 0.8471035361289978, + "learning_rate": 4.90367174958599e-06, + "loss": 0.5445, + "step": 7326 + }, + { + "epoch": 0.5386707837082781, + "grad_norm": 0.8208979368209839, + "learning_rate": 4.9036452587829375e-06, + "loss": 0.5304, + "step": 7327 + }, + { + "epoch": 0.538744302308484, + "grad_norm": 0.884165346622467, + "learning_rate": 4.903618764409402e-06, + "loss": 0.555, + "step": 7328 + }, + { + "epoch": 0.5388178209086899, + "grad_norm": 0.8487053513526917, + "learning_rate": 4.903592266465423e-06, + "loss": 0.5572, + "step": 7329 + }, + { + "epoch": 0.5388913395088958, + "grad_norm": 0.7863051891326904, + "learning_rate": 4.903565764951039e-06, + "loss": 0.5523, + "step": 7330 + }, + { + "epoch": 0.5389648581091016, + "grad_norm": 0.8055100440979004, + "learning_rate": 4.90353925986629e-06, + "loss": 0.5305, + "step": 7331 + }, + { + "epoch": 0.5390383767093074, + "grad_norm": 0.8000940680503845, + "learning_rate": 4.903512751211214e-06, + "loss": 0.5421, + "step": 7332 + }, + { + "epoch": 0.5391118953095133, + "grad_norm": 0.8734196424484253, + "learning_rate": 4.903486238985853e-06, + "loss": 0.566, + "step": 7333 + }, + { + "epoch": 0.5391854139097192, + "grad_norm": 0.7994245886802673, + "learning_rate": 4.903459723190244e-06, + "loss": 0.5241, + "step": 7334 + }, + { + "epoch": 0.539258932509925, + "grad_norm": 0.859464704990387, + "learning_rate": 4.903433203824428e-06, + "loss": 0.5331, + "step": 7335 + }, + { + "epoch": 0.5393324511101308, + "grad_norm": 0.8597770929336548, + "learning_rate": 4.903406680888443e-06, + "loss": 0.5547, + "step": 7336 + }, + { + "epoch": 0.5394059697103367, + "grad_norm": 0.8781493306159973, + "learning_rate": 4.903380154382328e-06, + "loss": 0.5874, + "step": 7337 + }, + { + "epoch": 0.5394794883105426, + "grad_norm": 0.8260395526885986, + "learning_rate": 4.903353624306124e-06, + "loss": 0.5471, + "step": 7338 + }, + { + "epoch": 0.5395530069107484, + "grad_norm": 0.7824047207832336, + "learning_rate": 4.903327090659871e-06, + "loss": 0.5344, + "step": 7339 + }, + { + "epoch": 0.5396265255109542, + "grad_norm": 0.8403849005699158, + "learning_rate": 4.9033005534436055e-06, + "loss": 0.5958, + "step": 7340 + }, + { + "epoch": 0.5397000441111601, + "grad_norm": 0.8798538446426392, + "learning_rate": 4.90327401265737e-06, + "loss": 0.6181, + "step": 7341 + }, + { + "epoch": 0.539773562711366, + "grad_norm": 0.8626687526702881, + "learning_rate": 4.903247468301201e-06, + "loss": 0.5838, + "step": 7342 + }, + { + "epoch": 0.5398470813115718, + "grad_norm": 0.863251805305481, + "learning_rate": 4.90322092037514e-06, + "loss": 0.584, + "step": 7343 + }, + { + "epoch": 0.5399205999117777, + "grad_norm": 0.8843042850494385, + "learning_rate": 4.903194368879227e-06, + "loss": 0.533, + "step": 7344 + }, + { + "epoch": 0.5399941185119835, + "grad_norm": 0.8522303700447083, + "learning_rate": 4.9031678138135e-06, + "loss": 0.5968, + "step": 7345 + }, + { + "epoch": 0.5400676371121894, + "grad_norm": 0.8348379135131836, + "learning_rate": 4.903141255177998e-06, + "loss": 0.5153, + "step": 7346 + }, + { + "epoch": 0.5401411557123952, + "grad_norm": 0.8668313026428223, + "learning_rate": 4.9031146929727615e-06, + "loss": 0.5934, + "step": 7347 + }, + { + "epoch": 0.5402146743126011, + "grad_norm": 0.8497680425643921, + "learning_rate": 4.90308812719783e-06, + "loss": 0.5304, + "step": 7348 + }, + { + "epoch": 0.5402881929128069, + "grad_norm": 0.8231236338615417, + "learning_rate": 4.9030615578532424e-06, + "loss": 0.5331, + "step": 7349 + }, + { + "epoch": 0.5403617115130128, + "grad_norm": 0.8041205406188965, + "learning_rate": 4.903034984939039e-06, + "loss": 0.5903, + "step": 7350 + }, + { + "epoch": 0.5404352301132186, + "grad_norm": 0.8810940980911255, + "learning_rate": 4.9030084084552585e-06, + "loss": 0.5529, + "step": 7351 + }, + { + "epoch": 0.5405087487134245, + "grad_norm": 0.8242585062980652, + "learning_rate": 4.90298182840194e-06, + "loss": 0.5151, + "step": 7352 + }, + { + "epoch": 0.5405822673136303, + "grad_norm": 0.8031716346740723, + "learning_rate": 4.902955244779124e-06, + "loss": 0.5436, + "step": 7353 + }, + { + "epoch": 0.5406557859138362, + "grad_norm": 0.8224453926086426, + "learning_rate": 4.902928657586849e-06, + "loss": 0.5365, + "step": 7354 + }, + { + "epoch": 0.540729304514042, + "grad_norm": 0.8830243349075317, + "learning_rate": 4.902902066825156e-06, + "loss": 0.565, + "step": 7355 + }, + { + "epoch": 0.5408028231142479, + "grad_norm": 0.8166176080703735, + "learning_rate": 4.902875472494084e-06, + "loss": 0.5313, + "step": 7356 + }, + { + "epoch": 0.5408763417144538, + "grad_norm": 0.8299870491027832, + "learning_rate": 4.902848874593671e-06, + "loss": 0.5516, + "step": 7357 + }, + { + "epoch": 0.5409498603146596, + "grad_norm": 0.8131978511810303, + "learning_rate": 4.902822273123958e-06, + "loss": 0.5512, + "step": 7358 + }, + { + "epoch": 0.5410233789148655, + "grad_norm": 0.8120273351669312, + "learning_rate": 4.9027956680849835e-06, + "loss": 0.5378, + "step": 7359 + }, + { + "epoch": 0.5410968975150713, + "grad_norm": 0.8683211803436279, + "learning_rate": 4.9027690594767885e-06, + "loss": 0.5859, + "step": 7360 + }, + { + "epoch": 0.5411704161152772, + "grad_norm": 0.8312329649925232, + "learning_rate": 4.902742447299411e-06, + "loss": 0.5302, + "step": 7361 + }, + { + "epoch": 0.541243934715483, + "grad_norm": 0.8152020573616028, + "learning_rate": 4.9027158315528915e-06, + "loss": 0.552, + "step": 7362 + }, + { + "epoch": 0.5413174533156889, + "grad_norm": 0.8592310547828674, + "learning_rate": 4.902689212237269e-06, + "loss": 0.5465, + "step": 7363 + }, + { + "epoch": 0.5413909719158947, + "grad_norm": 0.8589658141136169, + "learning_rate": 4.902662589352583e-06, + "loss": 0.5658, + "step": 7364 + }, + { + "epoch": 0.5414644905161006, + "grad_norm": 0.8742044568061829, + "learning_rate": 4.902635962898873e-06, + "loss": 0.5256, + "step": 7365 + }, + { + "epoch": 0.5415380091163065, + "grad_norm": 0.8409419059753418, + "learning_rate": 4.90260933287618e-06, + "loss": 0.5637, + "step": 7366 + }, + { + "epoch": 0.5416115277165123, + "grad_norm": 0.8833799362182617, + "learning_rate": 4.9025826992845416e-06, + "loss": 0.532, + "step": 7367 + }, + { + "epoch": 0.5416850463167181, + "grad_norm": 0.8554703593254089, + "learning_rate": 4.902556062123998e-06, + "loss": 0.5198, + "step": 7368 + }, + { + "epoch": 0.541758564916924, + "grad_norm": 0.8596144914627075, + "learning_rate": 4.902529421394589e-06, + "loss": 0.5502, + "step": 7369 + }, + { + "epoch": 0.5418320835171299, + "grad_norm": 0.8154865503311157, + "learning_rate": 4.902502777096355e-06, + "loss": 0.54, + "step": 7370 + }, + { + "epoch": 0.5419056021173357, + "grad_norm": 0.8210517168045044, + "learning_rate": 4.902476129229334e-06, + "loss": 0.5466, + "step": 7371 + }, + { + "epoch": 0.5419791207175415, + "grad_norm": 0.8546174168586731, + "learning_rate": 4.902449477793567e-06, + "loss": 0.5227, + "step": 7372 + }, + { + "epoch": 0.5420526393177474, + "grad_norm": 0.8523893356323242, + "learning_rate": 4.9024228227890915e-06, + "loss": 0.542, + "step": 7373 + }, + { + "epoch": 0.5421261579179533, + "grad_norm": 0.8539914488792419, + "learning_rate": 4.902396164215949e-06, + "loss": 0.56, + "step": 7374 + }, + { + "epoch": 0.5421996765181591, + "grad_norm": 0.814677357673645, + "learning_rate": 4.902369502074179e-06, + "loss": 0.5541, + "step": 7375 + }, + { + "epoch": 0.5422731951183649, + "grad_norm": 0.8310657143592834, + "learning_rate": 4.9023428363638206e-06, + "loss": 0.5377, + "step": 7376 + }, + { + "epoch": 0.5423467137185708, + "grad_norm": 0.8414857387542725, + "learning_rate": 4.902316167084914e-06, + "loss": 0.5455, + "step": 7377 + }, + { + "epoch": 0.5424202323187767, + "grad_norm": 0.8314030766487122, + "learning_rate": 4.902289494237497e-06, + "loss": 0.5197, + "step": 7378 + }, + { + "epoch": 0.5424937509189826, + "grad_norm": 0.8599029183387756, + "learning_rate": 4.902262817821611e-06, + "loss": 0.5584, + "step": 7379 + }, + { + "epoch": 0.5425672695191883, + "grad_norm": 0.8257334232330322, + "learning_rate": 4.902236137837295e-06, + "loss": 0.561, + "step": 7380 + }, + { + "epoch": 0.5426407881193942, + "grad_norm": 0.8731462955474854, + "learning_rate": 4.902209454284589e-06, + "loss": 0.5725, + "step": 7381 + }, + { + "epoch": 0.5427143067196001, + "grad_norm": 0.8365134000778198, + "learning_rate": 4.9021827671635325e-06, + "loss": 0.533, + "step": 7382 + }, + { + "epoch": 0.542787825319806, + "grad_norm": 0.8387839198112488, + "learning_rate": 4.9021560764741646e-06, + "loss": 0.5469, + "step": 7383 + }, + { + "epoch": 0.5428613439200117, + "grad_norm": 0.8363453149795532, + "learning_rate": 4.9021293822165254e-06, + "loss": 0.5315, + "step": 7384 + }, + { + "epoch": 0.5429348625202176, + "grad_norm": 0.8305526971817017, + "learning_rate": 4.902102684390655e-06, + "loss": 0.5499, + "step": 7385 + }, + { + "epoch": 0.5430083811204235, + "grad_norm": 0.8378708958625793, + "learning_rate": 4.902075982996592e-06, + "loss": 0.5318, + "step": 7386 + }, + { + "epoch": 0.5430818997206294, + "grad_norm": 0.8937927484512329, + "learning_rate": 4.902049278034378e-06, + "loss": 0.6028, + "step": 7387 + }, + { + "epoch": 0.5431554183208351, + "grad_norm": 0.8474279642105103, + "learning_rate": 4.90202256950405e-06, + "loss": 0.5476, + "step": 7388 + }, + { + "epoch": 0.543228936921041, + "grad_norm": 0.8323319554328918, + "learning_rate": 4.9019958574056495e-06, + "loss": 0.5816, + "step": 7389 + }, + { + "epoch": 0.5433024555212469, + "grad_norm": 0.8091368675231934, + "learning_rate": 4.901969141739215e-06, + "loss": 0.5772, + "step": 7390 + }, + { + "epoch": 0.5433759741214528, + "grad_norm": 0.8327074646949768, + "learning_rate": 4.901942422504787e-06, + "loss": 0.5667, + "step": 7391 + }, + { + "epoch": 0.5434494927216585, + "grad_norm": 0.8385477662086487, + "learning_rate": 4.901915699702406e-06, + "loss": 0.5637, + "step": 7392 + }, + { + "epoch": 0.5435230113218644, + "grad_norm": 0.8869376182556152, + "learning_rate": 4.90188897333211e-06, + "loss": 0.5912, + "step": 7393 + }, + { + "epoch": 0.5435965299220703, + "grad_norm": 0.8745696544647217, + "learning_rate": 4.90186224339394e-06, + "loss": 0.5613, + "step": 7394 + }, + { + "epoch": 0.5436700485222762, + "grad_norm": 0.8730836510658264, + "learning_rate": 4.901835509887935e-06, + "loss": 0.5316, + "step": 7395 + }, + { + "epoch": 0.543743567122482, + "grad_norm": 0.8013554811477661, + "learning_rate": 4.9018087728141345e-06, + "loss": 0.534, + "step": 7396 + }, + { + "epoch": 0.5438170857226878, + "grad_norm": 0.8006407618522644, + "learning_rate": 4.901782032172578e-06, + "loss": 0.555, + "step": 7397 + }, + { + "epoch": 0.5438906043228937, + "grad_norm": 0.8389438390731812, + "learning_rate": 4.901755287963307e-06, + "loss": 0.5305, + "step": 7398 + }, + { + "epoch": 0.5439641229230996, + "grad_norm": 0.8683653473854065, + "learning_rate": 4.90172854018636e-06, + "loss": 0.5395, + "step": 7399 + }, + { + "epoch": 0.5440376415233054, + "grad_norm": 0.876187801361084, + "learning_rate": 4.901701788841776e-06, + "loss": 0.5831, + "step": 7400 + }, + { + "epoch": 0.5441111601235112, + "grad_norm": 0.8337395787239075, + "learning_rate": 4.901675033929596e-06, + "loss": 0.5314, + "step": 7401 + }, + { + "epoch": 0.5441846787237171, + "grad_norm": 0.8568874597549438, + "learning_rate": 4.90164827544986e-06, + "loss": 0.5968, + "step": 7402 + }, + { + "epoch": 0.544258197323923, + "grad_norm": 0.8517863154411316, + "learning_rate": 4.901621513402606e-06, + "loss": 0.4996, + "step": 7403 + }, + { + "epoch": 0.5443317159241288, + "grad_norm": 0.87135249376297, + "learning_rate": 4.901594747787875e-06, + "loss": 0.5685, + "step": 7404 + }, + { + "epoch": 0.5444052345243346, + "grad_norm": 0.8317119479179382, + "learning_rate": 4.901567978605706e-06, + "loss": 0.5212, + "step": 7405 + }, + { + "epoch": 0.5444787531245405, + "grad_norm": 0.829646110534668, + "learning_rate": 4.901541205856139e-06, + "loss": 0.5043, + "step": 7406 + }, + { + "epoch": 0.5445522717247464, + "grad_norm": 0.7953717708587646, + "learning_rate": 4.901514429539216e-06, + "loss": 0.4768, + "step": 7407 + }, + { + "epoch": 0.5446257903249522, + "grad_norm": 0.8535495400428772, + "learning_rate": 4.901487649654973e-06, + "loss": 0.5167, + "step": 7408 + }, + { + "epoch": 0.544699308925158, + "grad_norm": 0.8613768219947815, + "learning_rate": 4.901460866203453e-06, + "loss": 0.5849, + "step": 7409 + }, + { + "epoch": 0.5447728275253639, + "grad_norm": 0.8488555550575256, + "learning_rate": 4.901434079184693e-06, + "loss": 0.5172, + "step": 7410 + }, + { + "epoch": 0.5448463461255698, + "grad_norm": 0.806003987789154, + "learning_rate": 4.901407288598735e-06, + "loss": 0.508, + "step": 7411 + }, + { + "epoch": 0.5449198647257756, + "grad_norm": 0.8449246287345886, + "learning_rate": 4.901380494445617e-06, + "loss": 0.584, + "step": 7412 + }, + { + "epoch": 0.5449933833259815, + "grad_norm": 0.8373808264732361, + "learning_rate": 4.90135369672538e-06, + "loss": 0.5919, + "step": 7413 + }, + { + "epoch": 0.5450669019261873, + "grad_norm": 0.7904672622680664, + "learning_rate": 4.901326895438064e-06, + "loss": 0.5225, + "step": 7414 + }, + { + "epoch": 0.5451404205263932, + "grad_norm": 0.79067063331604, + "learning_rate": 4.901300090583709e-06, + "loss": 0.5022, + "step": 7415 + }, + { + "epoch": 0.545213939126599, + "grad_norm": 0.836906373500824, + "learning_rate": 4.901273282162354e-06, + "loss": 0.5252, + "step": 7416 + }, + { + "epoch": 0.5452874577268049, + "grad_norm": 0.811057448387146, + "learning_rate": 4.901246470174038e-06, + "loss": 0.5432, + "step": 7417 + }, + { + "epoch": 0.5453609763270107, + "grad_norm": 0.8835916519165039, + "learning_rate": 4.901219654618803e-06, + "loss": 0.5529, + "step": 7418 + }, + { + "epoch": 0.5454344949272166, + "grad_norm": 0.7988160252571106, + "learning_rate": 4.9011928354966876e-06, + "loss": 0.5217, + "step": 7419 + }, + { + "epoch": 0.5455080135274224, + "grad_norm": 0.8179652690887451, + "learning_rate": 4.901166012807731e-06, + "loss": 0.5609, + "step": 7420 + }, + { + "epoch": 0.5455815321276283, + "grad_norm": 0.8508458137512207, + "learning_rate": 4.901139186551974e-06, + "loss": 0.5966, + "step": 7421 + }, + { + "epoch": 0.5456550507278342, + "grad_norm": 0.8288016319274902, + "learning_rate": 4.901112356729456e-06, + "loss": 0.5514, + "step": 7422 + }, + { + "epoch": 0.54572856932804, + "grad_norm": 0.8692492246627808, + "learning_rate": 4.901085523340218e-06, + "loss": 0.5963, + "step": 7423 + }, + { + "epoch": 0.5458020879282458, + "grad_norm": 0.8213505148887634, + "learning_rate": 4.901058686384298e-06, + "loss": 0.506, + "step": 7424 + }, + { + "epoch": 0.5458756065284517, + "grad_norm": 0.8677130341529846, + "learning_rate": 4.901031845861737e-06, + "loss": 0.5351, + "step": 7425 + }, + { + "epoch": 0.5459491251286576, + "grad_norm": 0.849595308303833, + "learning_rate": 4.901005001772575e-06, + "loss": 0.5427, + "step": 7426 + }, + { + "epoch": 0.5460226437288634, + "grad_norm": 0.8478572964668274, + "learning_rate": 4.900978154116851e-06, + "loss": 0.5524, + "step": 7427 + }, + { + "epoch": 0.5460961623290692, + "grad_norm": 0.7880478501319885, + "learning_rate": 4.9009513028946065e-06, + "loss": 0.5417, + "step": 7428 + }, + { + "epoch": 0.5461696809292751, + "grad_norm": 0.8045113682746887, + "learning_rate": 4.90092444810588e-06, + "loss": 0.554, + "step": 7429 + }, + { + "epoch": 0.546243199529481, + "grad_norm": 0.8388193845748901, + "learning_rate": 4.9008975897507105e-06, + "loss": 0.5365, + "step": 7430 + }, + { + "epoch": 0.5463167181296869, + "grad_norm": 0.8463746905326843, + "learning_rate": 4.90087072782914e-06, + "loss": 0.536, + "step": 7431 + }, + { + "epoch": 0.5463902367298926, + "grad_norm": 0.8385171890258789, + "learning_rate": 4.9008438623412076e-06, + "loss": 0.578, + "step": 7432 + }, + { + "epoch": 0.5464637553300985, + "grad_norm": 0.8687260746955872, + "learning_rate": 4.9008169932869534e-06, + "loss": 0.551, + "step": 7433 + }, + { + "epoch": 0.5465372739303044, + "grad_norm": 0.8134276866912842, + "learning_rate": 4.900790120666416e-06, + "loss": 0.5401, + "step": 7434 + }, + { + "epoch": 0.5466107925305103, + "grad_norm": 0.8163163661956787, + "learning_rate": 4.900763244479637e-06, + "loss": 0.5615, + "step": 7435 + }, + { + "epoch": 0.546684311130716, + "grad_norm": 0.8429960608482361, + "learning_rate": 4.900736364726656e-06, + "loss": 0.5417, + "step": 7436 + }, + { + "epoch": 0.5467578297309219, + "grad_norm": 0.8088297247886658, + "learning_rate": 4.900709481407512e-06, + "loss": 0.5705, + "step": 7437 + }, + { + "epoch": 0.5468313483311278, + "grad_norm": 0.8799097537994385, + "learning_rate": 4.9006825945222455e-06, + "loss": 0.5382, + "step": 7438 + }, + { + "epoch": 0.5469048669313337, + "grad_norm": 0.8672287464141846, + "learning_rate": 4.9006557040708964e-06, + "loss": 0.5362, + "step": 7439 + }, + { + "epoch": 0.5469783855315394, + "grad_norm": 0.8881980776786804, + "learning_rate": 4.900628810053505e-06, + "loss": 0.5722, + "step": 7440 + }, + { + "epoch": 0.5470519041317453, + "grad_norm": 0.8334965109825134, + "learning_rate": 4.900601912470111e-06, + "loss": 0.5524, + "step": 7441 + }, + { + "epoch": 0.5471254227319512, + "grad_norm": 0.8030849695205688, + "learning_rate": 4.900575011320754e-06, + "loss": 0.5273, + "step": 7442 + }, + { + "epoch": 0.5471989413321571, + "grad_norm": 0.8268588781356812, + "learning_rate": 4.900548106605474e-06, + "loss": 0.5687, + "step": 7443 + }, + { + "epoch": 0.5472724599323628, + "grad_norm": 0.8989033102989197, + "learning_rate": 4.900521198324312e-06, + "loss": 0.5763, + "step": 7444 + }, + { + "epoch": 0.5473459785325687, + "grad_norm": 0.7921287417411804, + "learning_rate": 4.900494286477307e-06, + "loss": 0.5192, + "step": 7445 + }, + { + "epoch": 0.5474194971327746, + "grad_norm": 0.8154789805412292, + "learning_rate": 4.900467371064499e-06, + "loss": 0.5755, + "step": 7446 + }, + { + "epoch": 0.5474930157329805, + "grad_norm": 0.8411282300949097, + "learning_rate": 4.9004404520859284e-06, + "loss": 0.5079, + "step": 7447 + }, + { + "epoch": 0.5475665343331863, + "grad_norm": 0.8238452672958374, + "learning_rate": 4.900413529541634e-06, + "loss": 0.5275, + "step": 7448 + }, + { + "epoch": 0.5476400529333921, + "grad_norm": 0.8298611640930176, + "learning_rate": 4.9003866034316585e-06, + "loss": 0.5425, + "step": 7449 + }, + { + "epoch": 0.547713571533598, + "grad_norm": 0.8201438188552856, + "learning_rate": 4.900359673756039e-06, + "loss": 0.5227, + "step": 7450 + }, + { + "epoch": 0.5477870901338039, + "grad_norm": 0.8140427470207214, + "learning_rate": 4.900332740514817e-06, + "loss": 0.5161, + "step": 7451 + }, + { + "epoch": 0.5478606087340097, + "grad_norm": 0.8737766742706299, + "learning_rate": 4.900305803708032e-06, + "loss": 0.5922, + "step": 7452 + }, + { + "epoch": 0.5479341273342155, + "grad_norm": 0.8154763579368591, + "learning_rate": 4.900278863335724e-06, + "loss": 0.5879, + "step": 7453 + }, + { + "epoch": 0.5480076459344214, + "grad_norm": 0.8425807356834412, + "learning_rate": 4.900251919397934e-06, + "loss": 0.542, + "step": 7454 + }, + { + "epoch": 0.5480811645346273, + "grad_norm": 0.8160684704780579, + "learning_rate": 4.900224971894701e-06, + "loss": 0.5172, + "step": 7455 + }, + { + "epoch": 0.5481546831348331, + "grad_norm": 0.9687092900276184, + "learning_rate": 4.900198020826064e-06, + "loss": 0.6072, + "step": 7456 + }, + { + "epoch": 0.548228201735039, + "grad_norm": 0.8580155968666077, + "learning_rate": 4.900171066192065e-06, + "loss": 0.5841, + "step": 7457 + }, + { + "epoch": 0.5483017203352448, + "grad_norm": 0.8178898692131042, + "learning_rate": 4.900144107992744e-06, + "loss": 0.5573, + "step": 7458 + }, + { + "epoch": 0.5483752389354507, + "grad_norm": 0.8629224300384521, + "learning_rate": 4.900117146228139e-06, + "loss": 0.5556, + "step": 7459 + }, + { + "epoch": 0.5484487575356565, + "grad_norm": 0.9059383869171143, + "learning_rate": 4.900090180898292e-06, + "loss": 0.5057, + "step": 7460 + }, + { + "epoch": 0.5485222761358624, + "grad_norm": 0.8388679623603821, + "learning_rate": 4.900063212003242e-06, + "loss": 0.5196, + "step": 7461 + }, + { + "epoch": 0.5485957947360682, + "grad_norm": 0.8957772254943848, + "learning_rate": 4.9000362395430315e-06, + "loss": 0.5288, + "step": 7462 + }, + { + "epoch": 0.5486693133362741, + "grad_norm": 0.8869755864143372, + "learning_rate": 4.900009263517696e-06, + "loss": 0.5387, + "step": 7463 + }, + { + "epoch": 0.5487428319364799, + "grad_norm": 0.8383581638336182, + "learning_rate": 4.899982283927281e-06, + "loss": 0.5724, + "step": 7464 + }, + { + "epoch": 0.5488163505366858, + "grad_norm": 0.834895133972168, + "learning_rate": 4.899955300771821e-06, + "loss": 0.5353, + "step": 7465 + }, + { + "epoch": 0.5488898691368916, + "grad_norm": 0.8311089873313904, + "learning_rate": 4.89992831405136e-06, + "loss": 0.5898, + "step": 7466 + }, + { + "epoch": 0.5489633877370975, + "grad_norm": 0.8087198138237, + "learning_rate": 4.8999013237659364e-06, + "loss": 0.5704, + "step": 7467 + }, + { + "epoch": 0.5490369063373033, + "grad_norm": 0.7971131205558777, + "learning_rate": 4.899874329915592e-06, + "loss": 0.5119, + "step": 7468 + }, + { + "epoch": 0.5491104249375092, + "grad_norm": 0.817422091960907, + "learning_rate": 4.899847332500365e-06, + "loss": 0.5113, + "step": 7469 + }, + { + "epoch": 0.549183943537715, + "grad_norm": 0.830955982208252, + "learning_rate": 4.899820331520296e-06, + "loss": 0.555, + "step": 7470 + }, + { + "epoch": 0.5492574621379209, + "grad_norm": 0.847828209400177, + "learning_rate": 4.899793326975425e-06, + "loss": 0.5821, + "step": 7471 + }, + { + "epoch": 0.5493309807381267, + "grad_norm": 0.8412435054779053, + "learning_rate": 4.899766318865793e-06, + "loss": 0.559, + "step": 7472 + }, + { + "epoch": 0.5494044993383326, + "grad_norm": 0.809637725353241, + "learning_rate": 4.89973930719144e-06, + "loss": 0.5646, + "step": 7473 + }, + { + "epoch": 0.5494780179385385, + "grad_norm": 0.8870561718940735, + "learning_rate": 4.899712291952404e-06, + "loss": 0.5289, + "step": 7474 + }, + { + "epoch": 0.5495515365387443, + "grad_norm": 0.8816589117050171, + "learning_rate": 4.899685273148729e-06, + "loss": 0.5697, + "step": 7475 + }, + { + "epoch": 0.5496250551389501, + "grad_norm": 0.807648777961731, + "learning_rate": 4.899658250780451e-06, + "loss": 0.5062, + "step": 7476 + }, + { + "epoch": 0.549698573739156, + "grad_norm": 0.8347017168998718, + "learning_rate": 4.899631224847613e-06, + "loss": 0.5483, + "step": 7477 + }, + { + "epoch": 0.5497720923393619, + "grad_norm": 0.8092883825302124, + "learning_rate": 4.899604195350255e-06, + "loss": 0.5626, + "step": 7478 + }, + { + "epoch": 0.5498456109395677, + "grad_norm": 0.8284129500389099, + "learning_rate": 4.899577162288414e-06, + "loss": 0.5205, + "step": 7479 + }, + { + "epoch": 0.5499191295397735, + "grad_norm": 0.8617380857467651, + "learning_rate": 4.899550125662135e-06, + "loss": 0.5516, + "step": 7480 + }, + { + "epoch": 0.5499926481399794, + "grad_norm": 0.8064405918121338, + "learning_rate": 4.899523085471454e-06, + "loss": 0.5499, + "step": 7481 + }, + { + "epoch": 0.5500661667401853, + "grad_norm": 0.8341809511184692, + "learning_rate": 4.899496041716414e-06, + "loss": 0.5511, + "step": 7482 + }, + { + "epoch": 0.5501396853403911, + "grad_norm": 0.8073792457580566, + "learning_rate": 4.899468994397053e-06, + "loss": 0.4811, + "step": 7483 + }, + { + "epoch": 0.5502132039405969, + "grad_norm": 0.8599108457565308, + "learning_rate": 4.899441943513413e-06, + "loss": 0.5594, + "step": 7484 + }, + { + "epoch": 0.5502867225408028, + "grad_norm": 0.8451476693153381, + "learning_rate": 4.899414889065533e-06, + "loss": 0.5598, + "step": 7485 + }, + { + "epoch": 0.5503602411410087, + "grad_norm": 0.8552488684654236, + "learning_rate": 4.899387831053453e-06, + "loss": 0.565, + "step": 7486 + }, + { + "epoch": 0.5504337597412146, + "grad_norm": 0.7904579639434814, + "learning_rate": 4.899360769477215e-06, + "loss": 0.5277, + "step": 7487 + }, + { + "epoch": 0.5505072783414203, + "grad_norm": 0.8169418573379517, + "learning_rate": 4.8993337043368575e-06, + "loss": 0.5443, + "step": 7488 + }, + { + "epoch": 0.5505807969416262, + "grad_norm": 0.8410421013832092, + "learning_rate": 4.899306635632421e-06, + "loss": 0.592, + "step": 7489 + }, + { + "epoch": 0.5506543155418321, + "grad_norm": 0.8452143669128418, + "learning_rate": 4.899279563363946e-06, + "loss": 0.5424, + "step": 7490 + }, + { + "epoch": 0.550727834142038, + "grad_norm": 0.8373218774795532, + "learning_rate": 4.8992524875314725e-06, + "loss": 0.5652, + "step": 7491 + }, + { + "epoch": 0.5508013527422437, + "grad_norm": 0.8074889183044434, + "learning_rate": 4.899225408135041e-06, + "loss": 0.5295, + "step": 7492 + }, + { + "epoch": 0.5508748713424496, + "grad_norm": 0.8177749514579773, + "learning_rate": 4.8991983251746914e-06, + "loss": 0.5737, + "step": 7493 + }, + { + "epoch": 0.5509483899426555, + "grad_norm": 0.8202979564666748, + "learning_rate": 4.899171238650464e-06, + "loss": 0.5511, + "step": 7494 + }, + { + "epoch": 0.5510219085428614, + "grad_norm": 0.8631581664085388, + "learning_rate": 4.899144148562399e-06, + "loss": 0.5173, + "step": 7495 + }, + { + "epoch": 0.5510954271430673, + "grad_norm": 0.8516996502876282, + "learning_rate": 4.899117054910537e-06, + "loss": 0.5439, + "step": 7496 + }, + { + "epoch": 0.551168945743273, + "grad_norm": 0.8608637452125549, + "learning_rate": 4.899089957694918e-06, + "loss": 0.5953, + "step": 7497 + }, + { + "epoch": 0.5512424643434789, + "grad_norm": 0.898249089717865, + "learning_rate": 4.899062856915583e-06, + "loss": 0.5987, + "step": 7498 + }, + { + "epoch": 0.5513159829436848, + "grad_norm": 0.8376590013504028, + "learning_rate": 4.89903575257257e-06, + "loss": 0.5267, + "step": 7499 + }, + { + "epoch": 0.5513895015438907, + "grad_norm": 0.7921758890151978, + "learning_rate": 4.899008644665922e-06, + "loss": 0.4928, + "step": 7500 + }, + { + "epoch": 0.5514630201440964, + "grad_norm": 0.8570200204849243, + "learning_rate": 4.898981533195677e-06, + "loss": 0.5548, + "step": 7501 + }, + { + "epoch": 0.5515365387443023, + "grad_norm": 0.8440523743629456, + "learning_rate": 4.898954418161876e-06, + "loss": 0.5736, + "step": 7502 + }, + { + "epoch": 0.5516100573445082, + "grad_norm": 0.7822224497795105, + "learning_rate": 4.898927299564561e-06, + "loss": 0.5226, + "step": 7503 + }, + { + "epoch": 0.5516835759447141, + "grad_norm": 0.8376975059509277, + "learning_rate": 4.8989001774037695e-06, + "loss": 0.5393, + "step": 7504 + }, + { + "epoch": 0.5517570945449198, + "grad_norm": 0.8459351062774658, + "learning_rate": 4.8988730516795444e-06, + "loss": 0.5133, + "step": 7505 + }, + { + "epoch": 0.5518306131451257, + "grad_norm": 0.8407958745956421, + "learning_rate": 4.898845922391924e-06, + "loss": 0.5377, + "step": 7506 + }, + { + "epoch": 0.5519041317453316, + "grad_norm": 0.8340442180633545, + "learning_rate": 4.898818789540949e-06, + "loss": 0.531, + "step": 7507 + }, + { + "epoch": 0.5519776503455375, + "grad_norm": 0.8366067409515381, + "learning_rate": 4.898791653126661e-06, + "loss": 0.5427, + "step": 7508 + }, + { + "epoch": 0.5520511689457432, + "grad_norm": 0.8332499265670776, + "learning_rate": 4.898764513149099e-06, + "loss": 0.5584, + "step": 7509 + }, + { + "epoch": 0.5521246875459491, + "grad_norm": 0.8163657188415527, + "learning_rate": 4.898737369608303e-06, + "loss": 0.564, + "step": 7510 + }, + { + "epoch": 0.552198206146155, + "grad_norm": 0.8317967653274536, + "learning_rate": 4.898710222504315e-06, + "loss": 0.5603, + "step": 7511 + }, + { + "epoch": 0.5522717247463609, + "grad_norm": 0.8818846940994263, + "learning_rate": 4.898683071837174e-06, + "loss": 0.5655, + "step": 7512 + }, + { + "epoch": 0.5523452433465667, + "grad_norm": 0.8947253227233887, + "learning_rate": 4.89865591760692e-06, + "loss": 0.5472, + "step": 7513 + }, + { + "epoch": 0.5524187619467725, + "grad_norm": 0.8675657510757446, + "learning_rate": 4.898628759813594e-06, + "loss": 0.5041, + "step": 7514 + }, + { + "epoch": 0.5524922805469784, + "grad_norm": 0.8209342360496521, + "learning_rate": 4.898601598457237e-06, + "loss": 0.5032, + "step": 7515 + }, + { + "epoch": 0.5525657991471843, + "grad_norm": 0.8035435676574707, + "learning_rate": 4.898574433537888e-06, + "loss": 0.5385, + "step": 7516 + }, + { + "epoch": 0.5526393177473901, + "grad_norm": 0.8327635526657104, + "learning_rate": 4.898547265055589e-06, + "loss": 0.5467, + "step": 7517 + }, + { + "epoch": 0.5527128363475959, + "grad_norm": 0.8469045758247375, + "learning_rate": 4.898520093010378e-06, + "loss": 0.5361, + "step": 7518 + }, + { + "epoch": 0.5527863549478018, + "grad_norm": 0.8590689301490784, + "learning_rate": 4.8984929174022985e-06, + "loss": 0.5311, + "step": 7519 + }, + { + "epoch": 0.5528598735480077, + "grad_norm": 0.8691157698631287, + "learning_rate": 4.898465738231388e-06, + "loss": 0.5524, + "step": 7520 + }, + { + "epoch": 0.5529333921482135, + "grad_norm": 0.8033909797668457, + "learning_rate": 4.898438555497688e-06, + "loss": 0.5442, + "step": 7521 + }, + { + "epoch": 0.5530069107484193, + "grad_norm": 0.80409175157547, + "learning_rate": 4.898411369201239e-06, + "loss": 0.5288, + "step": 7522 + }, + { + "epoch": 0.5530804293486252, + "grad_norm": 0.8306796550750732, + "learning_rate": 4.898384179342082e-06, + "loss": 0.5627, + "step": 7523 + }, + { + "epoch": 0.5531539479488311, + "grad_norm": 0.8443827033042908, + "learning_rate": 4.898356985920255e-06, + "loss": 0.5635, + "step": 7524 + }, + { + "epoch": 0.5532274665490369, + "grad_norm": 0.8109288215637207, + "learning_rate": 4.898329788935801e-06, + "loss": 0.5787, + "step": 7525 + }, + { + "epoch": 0.5533009851492428, + "grad_norm": 0.8072315454483032, + "learning_rate": 4.89830258838876e-06, + "loss": 0.5415, + "step": 7526 + }, + { + "epoch": 0.5533745037494486, + "grad_norm": 0.805678129196167, + "learning_rate": 4.898275384279171e-06, + "loss": 0.5497, + "step": 7527 + }, + { + "epoch": 0.5534480223496545, + "grad_norm": 0.7926591634750366, + "learning_rate": 4.8982481766070755e-06, + "loss": 0.4885, + "step": 7528 + }, + { + "epoch": 0.5535215409498603, + "grad_norm": 0.9057445526123047, + "learning_rate": 4.898220965372513e-06, + "loss": 0.5886, + "step": 7529 + }, + { + "epoch": 0.5535950595500662, + "grad_norm": 0.8321449160575867, + "learning_rate": 4.898193750575525e-06, + "loss": 0.5582, + "step": 7530 + }, + { + "epoch": 0.553668578150272, + "grad_norm": 0.8342463970184326, + "learning_rate": 4.898166532216152e-06, + "loss": 0.5346, + "step": 7531 + }, + { + "epoch": 0.5537420967504779, + "grad_norm": 0.8432776927947998, + "learning_rate": 4.898139310294434e-06, + "loss": 0.5452, + "step": 7532 + }, + { + "epoch": 0.5538156153506837, + "grad_norm": 0.8197382688522339, + "learning_rate": 4.8981120848104114e-06, + "loss": 0.5487, + "step": 7533 + }, + { + "epoch": 0.5538891339508896, + "grad_norm": 0.9075015187263489, + "learning_rate": 4.898084855764123e-06, + "loss": 0.5678, + "step": 7534 + }, + { + "epoch": 0.5539626525510954, + "grad_norm": 0.8399832248687744, + "learning_rate": 4.898057623155613e-06, + "loss": 0.5067, + "step": 7535 + }, + { + "epoch": 0.5540361711513013, + "grad_norm": 0.9251586198806763, + "learning_rate": 4.898030386984919e-06, + "loss": 0.5181, + "step": 7536 + }, + { + "epoch": 0.5541096897515071, + "grad_norm": 0.9035869836807251, + "learning_rate": 4.898003147252082e-06, + "loss": 0.6063, + "step": 7537 + }, + { + "epoch": 0.554183208351713, + "grad_norm": 0.8358448147773743, + "learning_rate": 4.897975903957142e-06, + "loss": 0.5101, + "step": 7538 + }, + { + "epoch": 0.5542567269519189, + "grad_norm": 0.8101109862327576, + "learning_rate": 4.897948657100141e-06, + "loss": 0.485, + "step": 7539 + }, + { + "epoch": 0.5543302455521247, + "grad_norm": 0.8811442852020264, + "learning_rate": 4.897921406681118e-06, + "loss": 0.5592, + "step": 7540 + }, + { + "epoch": 0.5544037641523305, + "grad_norm": 0.8881211280822754, + "learning_rate": 4.897894152700114e-06, + "loss": 0.5923, + "step": 7541 + }, + { + "epoch": 0.5544772827525364, + "grad_norm": 0.8225483894348145, + "learning_rate": 4.89786689515717e-06, + "loss": 0.5181, + "step": 7542 + }, + { + "epoch": 0.5545508013527423, + "grad_norm": 0.8211756348609924, + "learning_rate": 4.897839634052326e-06, + "loss": 0.5299, + "step": 7543 + }, + { + "epoch": 0.5546243199529481, + "grad_norm": 0.8197619318962097, + "learning_rate": 4.897812369385623e-06, + "loss": 0.5271, + "step": 7544 + }, + { + "epoch": 0.5546978385531539, + "grad_norm": 0.8375295400619507, + "learning_rate": 4.8977851011571e-06, + "loss": 0.5805, + "step": 7545 + }, + { + "epoch": 0.5547713571533598, + "grad_norm": 0.8225491046905518, + "learning_rate": 4.897757829366799e-06, + "loss": 0.5331, + "step": 7546 + }, + { + "epoch": 0.5548448757535657, + "grad_norm": 0.8227545022964478, + "learning_rate": 4.89773055401476e-06, + "loss": 0.5594, + "step": 7547 + }, + { + "epoch": 0.5549183943537715, + "grad_norm": 0.8185699582099915, + "learning_rate": 4.897703275101024e-06, + "loss": 0.5436, + "step": 7548 + }, + { + "epoch": 0.5549919129539773, + "grad_norm": 0.8537350296974182, + "learning_rate": 4.89767599262563e-06, + "loss": 0.5748, + "step": 7549 + }, + { + "epoch": 0.5550654315541832, + "grad_norm": 0.8653262853622437, + "learning_rate": 4.897648706588621e-06, + "loss": 0.5645, + "step": 7550 + }, + { + "epoch": 0.5551389501543891, + "grad_norm": 0.838525652885437, + "learning_rate": 4.897621416990035e-06, + "loss": 0.5702, + "step": 7551 + }, + { + "epoch": 0.555212468754595, + "grad_norm": 0.7843332290649414, + "learning_rate": 4.897594123829914e-06, + "loss": 0.5241, + "step": 7552 + }, + { + "epoch": 0.5552859873548007, + "grad_norm": 0.8104695677757263, + "learning_rate": 4.897566827108299e-06, + "loss": 0.4949, + "step": 7553 + }, + { + "epoch": 0.5553595059550066, + "grad_norm": 0.8375505805015564, + "learning_rate": 4.897539526825229e-06, + "loss": 0.5271, + "step": 7554 + }, + { + "epoch": 0.5554330245552125, + "grad_norm": 0.9071950316429138, + "learning_rate": 4.8975122229807445e-06, + "loss": 0.5924, + "step": 7555 + }, + { + "epoch": 0.5555065431554184, + "grad_norm": 0.8320985436439514, + "learning_rate": 4.897484915574889e-06, + "loss": 0.5107, + "step": 7556 + }, + { + "epoch": 0.5555800617556241, + "grad_norm": 0.810740053653717, + "learning_rate": 4.8974576046077e-06, + "loss": 0.5552, + "step": 7557 + }, + { + "epoch": 0.55565358035583, + "grad_norm": 0.8260281085968018, + "learning_rate": 4.897430290079218e-06, + "loss": 0.5112, + "step": 7558 + }, + { + "epoch": 0.5557270989560359, + "grad_norm": 0.85053950548172, + "learning_rate": 4.897402971989485e-06, + "loss": 0.5674, + "step": 7559 + }, + { + "epoch": 0.5558006175562418, + "grad_norm": 0.85274338722229, + "learning_rate": 4.897375650338542e-06, + "loss": 0.5654, + "step": 7560 + }, + { + "epoch": 0.5558741361564475, + "grad_norm": 0.8171094059944153, + "learning_rate": 4.897348325126428e-06, + "loss": 0.5192, + "step": 7561 + }, + { + "epoch": 0.5559476547566534, + "grad_norm": 0.8358718752861023, + "learning_rate": 4.897320996353184e-06, + "loss": 0.5644, + "step": 7562 + }, + { + "epoch": 0.5560211733568593, + "grad_norm": 0.8886316418647766, + "learning_rate": 4.897293664018852e-06, + "loss": 0.5962, + "step": 7563 + }, + { + "epoch": 0.5560946919570652, + "grad_norm": 0.8020240068435669, + "learning_rate": 4.8972663281234715e-06, + "loss": 0.5548, + "step": 7564 + }, + { + "epoch": 0.556168210557271, + "grad_norm": 0.8388903737068176, + "learning_rate": 4.8972389886670815e-06, + "loss": 0.5298, + "step": 7565 + }, + { + "epoch": 0.5562417291574768, + "grad_norm": 0.8674479126930237, + "learning_rate": 4.897211645649726e-06, + "loss": 0.5463, + "step": 7566 + }, + { + "epoch": 0.5563152477576827, + "grad_norm": 0.8612646460533142, + "learning_rate": 4.897184299071443e-06, + "loss": 0.6371, + "step": 7567 + }, + { + "epoch": 0.5563887663578886, + "grad_norm": 0.8191538453102112, + "learning_rate": 4.8971569489322745e-06, + "loss": 0.5695, + "step": 7568 + }, + { + "epoch": 0.5564622849580944, + "grad_norm": 0.8077065944671631, + "learning_rate": 4.897129595232261e-06, + "loss": 0.5105, + "step": 7569 + }, + { + "epoch": 0.5565358035583002, + "grad_norm": 0.7586472630500793, + "learning_rate": 4.897102237971441e-06, + "loss": 0.4868, + "step": 7570 + }, + { + "epoch": 0.5566093221585061, + "grad_norm": 0.854392409324646, + "learning_rate": 4.897074877149859e-06, + "loss": 0.5268, + "step": 7571 + }, + { + "epoch": 0.556682840758712, + "grad_norm": 0.8088271021842957, + "learning_rate": 4.897047512767552e-06, + "loss": 0.5303, + "step": 7572 + }, + { + "epoch": 0.5567563593589178, + "grad_norm": 0.8041417002677917, + "learning_rate": 4.897020144824562e-06, + "loss": 0.5041, + "step": 7573 + }, + { + "epoch": 0.5568298779591236, + "grad_norm": 0.8433414697647095, + "learning_rate": 4.896992773320931e-06, + "loss": 0.5577, + "step": 7574 + }, + { + "epoch": 0.5569033965593295, + "grad_norm": 0.8527549505233765, + "learning_rate": 4.896965398256698e-06, + "loss": 0.5477, + "step": 7575 + }, + { + "epoch": 0.5569769151595354, + "grad_norm": 0.8556081056594849, + "learning_rate": 4.896938019631905e-06, + "loss": 0.5545, + "step": 7576 + }, + { + "epoch": 0.5570504337597412, + "grad_norm": 0.8779119253158569, + "learning_rate": 4.8969106374465905e-06, + "loss": 0.5804, + "step": 7577 + }, + { + "epoch": 0.557123952359947, + "grad_norm": 0.8814209699630737, + "learning_rate": 4.896883251700797e-06, + "loss": 0.5832, + "step": 7578 + }, + { + "epoch": 0.5571974709601529, + "grad_norm": 0.8096378445625305, + "learning_rate": 4.896855862394565e-06, + "loss": 0.5462, + "step": 7579 + }, + { + "epoch": 0.5572709895603588, + "grad_norm": 0.8900346159934998, + "learning_rate": 4.8968284695279346e-06, + "loss": 0.5636, + "step": 7580 + }, + { + "epoch": 0.5573445081605646, + "grad_norm": 0.7980438470840454, + "learning_rate": 4.896801073100946e-06, + "loss": 0.5076, + "step": 7581 + }, + { + "epoch": 0.5574180267607705, + "grad_norm": 0.7868141531944275, + "learning_rate": 4.896773673113641e-06, + "loss": 0.5556, + "step": 7582 + }, + { + "epoch": 0.5574915453609763, + "grad_norm": 0.8281459212303162, + "learning_rate": 4.896746269566062e-06, + "loss": 0.5475, + "step": 7583 + }, + { + "epoch": 0.5575650639611822, + "grad_norm": 0.8113718628883362, + "learning_rate": 4.896718862458245e-06, + "loss": 0.535, + "step": 7584 + }, + { + "epoch": 0.557638582561388, + "grad_norm": 0.7872015833854675, + "learning_rate": 4.896691451790235e-06, + "loss": 0.519, + "step": 7585 + }, + { + "epoch": 0.5577121011615939, + "grad_norm": 0.8824781179428101, + "learning_rate": 4.89666403756207e-06, + "loss": 0.5667, + "step": 7586 + }, + { + "epoch": 0.5577856197617997, + "grad_norm": 0.8641833066940308, + "learning_rate": 4.896636619773792e-06, + "loss": 0.5307, + "step": 7587 + }, + { + "epoch": 0.5578591383620056, + "grad_norm": 0.8316683173179626, + "learning_rate": 4.896609198425442e-06, + "loss": 0.522, + "step": 7588 + }, + { + "epoch": 0.5579326569622114, + "grad_norm": 0.8007581830024719, + "learning_rate": 4.896581773517061e-06, + "loss": 0.5634, + "step": 7589 + }, + { + "epoch": 0.5580061755624173, + "grad_norm": 0.8551180958747864, + "learning_rate": 4.896554345048687e-06, + "loss": 0.5284, + "step": 7590 + }, + { + "epoch": 0.5580796941626232, + "grad_norm": 0.8577991724014282, + "learning_rate": 4.896526913020364e-06, + "loss": 0.6132, + "step": 7591 + }, + { + "epoch": 0.558153212762829, + "grad_norm": 0.7945630550384521, + "learning_rate": 4.896499477432132e-06, + "loss": 0.5216, + "step": 7592 + }, + { + "epoch": 0.5582267313630348, + "grad_norm": 0.8394166231155396, + "learning_rate": 4.8964720382840305e-06, + "loss": 0.5319, + "step": 7593 + }, + { + "epoch": 0.5583002499632407, + "grad_norm": 0.8311812877655029, + "learning_rate": 4.8964445955761005e-06, + "loss": 0.5878, + "step": 7594 + }, + { + "epoch": 0.5583737685634466, + "grad_norm": 0.8097630143165588, + "learning_rate": 4.896417149308385e-06, + "loss": 0.5253, + "step": 7595 + }, + { + "epoch": 0.5584472871636524, + "grad_norm": 0.8027758002281189, + "learning_rate": 4.896389699480921e-06, + "loss": 0.533, + "step": 7596 + }, + { + "epoch": 0.5585208057638582, + "grad_norm": 0.8307938575744629, + "learning_rate": 4.896362246093752e-06, + "loss": 0.5181, + "step": 7597 + }, + { + "epoch": 0.5585943243640641, + "grad_norm": 0.8264908790588379, + "learning_rate": 4.896334789146919e-06, + "loss": 0.551, + "step": 7598 + }, + { + "epoch": 0.55866784296427, + "grad_norm": 0.904229462146759, + "learning_rate": 4.8963073286404614e-06, + "loss": 0.6317, + "step": 7599 + }, + { + "epoch": 0.5587413615644758, + "grad_norm": 0.8567285537719727, + "learning_rate": 4.89627986457442e-06, + "loss": 0.5357, + "step": 7600 + }, + { + "epoch": 0.5588148801646816, + "grad_norm": 0.8383865356445312, + "learning_rate": 4.896252396948836e-06, + "loss": 0.554, + "step": 7601 + }, + { + "epoch": 0.5588883987648875, + "grad_norm": 0.786189615726471, + "learning_rate": 4.8962249257637505e-06, + "loss": 0.5358, + "step": 7602 + }, + { + "epoch": 0.5589619173650934, + "grad_norm": 0.8331820964813232, + "learning_rate": 4.896197451019205e-06, + "loss": 0.567, + "step": 7603 + }, + { + "epoch": 0.5590354359652993, + "grad_norm": 0.8312704563140869, + "learning_rate": 4.896169972715239e-06, + "loss": 0.5253, + "step": 7604 + }, + { + "epoch": 0.559108954565505, + "grad_norm": 0.818594753742218, + "learning_rate": 4.8961424908518926e-06, + "loss": 0.5471, + "step": 7605 + }, + { + "epoch": 0.5591824731657109, + "grad_norm": 0.8700255751609802, + "learning_rate": 4.896115005429209e-06, + "loss": 0.5251, + "step": 7606 + }, + { + "epoch": 0.5592559917659168, + "grad_norm": 0.865626871585846, + "learning_rate": 4.896087516447227e-06, + "loss": 0.5545, + "step": 7607 + }, + { + "epoch": 0.5593295103661227, + "grad_norm": 0.8377863764762878, + "learning_rate": 4.896060023905989e-06, + "loss": 0.4998, + "step": 7608 + }, + { + "epoch": 0.5594030289663284, + "grad_norm": 0.8532816171646118, + "learning_rate": 4.896032527805534e-06, + "loss": 0.5732, + "step": 7609 + }, + { + "epoch": 0.5594765475665343, + "grad_norm": 0.7735000848770142, + "learning_rate": 4.896005028145905e-06, + "loss": 0.5237, + "step": 7610 + }, + { + "epoch": 0.5595500661667402, + "grad_norm": 0.7883667945861816, + "learning_rate": 4.895977524927141e-06, + "loss": 0.5244, + "step": 7611 + }, + { + "epoch": 0.5596235847669461, + "grad_norm": 0.8059868216514587, + "learning_rate": 4.895950018149284e-06, + "loss": 0.5534, + "step": 7612 + }, + { + "epoch": 0.5596971033671518, + "grad_norm": 0.8375779390335083, + "learning_rate": 4.895922507812374e-06, + "loss": 0.5316, + "step": 7613 + }, + { + "epoch": 0.5597706219673577, + "grad_norm": 0.8264373540878296, + "learning_rate": 4.895894993916452e-06, + "loss": 0.5494, + "step": 7614 + }, + { + "epoch": 0.5598441405675636, + "grad_norm": 0.7938170433044434, + "learning_rate": 4.89586747646156e-06, + "loss": 0.5379, + "step": 7615 + }, + { + "epoch": 0.5599176591677695, + "grad_norm": 0.8484510779380798, + "learning_rate": 4.895839955447737e-06, + "loss": 0.5616, + "step": 7616 + }, + { + "epoch": 0.5599911777679752, + "grad_norm": 0.8007338643074036, + "learning_rate": 4.895812430875027e-06, + "loss": 0.541, + "step": 7617 + }, + { + "epoch": 0.5600646963681811, + "grad_norm": 0.8371747136116028, + "learning_rate": 4.8957849027434665e-06, + "loss": 0.571, + "step": 7618 + }, + { + "epoch": 0.560138214968387, + "grad_norm": 0.7923346161842346, + "learning_rate": 4.895757371053101e-06, + "loss": 0.5323, + "step": 7619 + }, + { + "epoch": 0.5602117335685929, + "grad_norm": 0.785272479057312, + "learning_rate": 4.895729835803967e-06, + "loss": 0.5208, + "step": 7620 + }, + { + "epoch": 0.5602852521687987, + "grad_norm": 0.8607359528541565, + "learning_rate": 4.8957022969961085e-06, + "loss": 0.614, + "step": 7621 + }, + { + "epoch": 0.5603587707690045, + "grad_norm": 0.8568984866142273, + "learning_rate": 4.895674754629565e-06, + "loss": 0.6011, + "step": 7622 + }, + { + "epoch": 0.5604322893692104, + "grad_norm": 0.8185001015663147, + "learning_rate": 4.8956472087043785e-06, + "loss": 0.5424, + "step": 7623 + }, + { + "epoch": 0.5605058079694163, + "grad_norm": 0.8959346413612366, + "learning_rate": 4.895619659220588e-06, + "loss": 0.5641, + "step": 7624 + }, + { + "epoch": 0.5605793265696221, + "grad_norm": 0.8296692371368408, + "learning_rate": 4.895592106178237e-06, + "loss": 0.5298, + "step": 7625 + }, + { + "epoch": 0.5606528451698279, + "grad_norm": 0.8603593111038208, + "learning_rate": 4.895564549577365e-06, + "loss": 0.5594, + "step": 7626 + }, + { + "epoch": 0.5607263637700338, + "grad_norm": 0.7736011147499084, + "learning_rate": 4.895536989418013e-06, + "loss": 0.541, + "step": 7627 + }, + { + "epoch": 0.5607998823702397, + "grad_norm": 0.7993500232696533, + "learning_rate": 4.8955094257002216e-06, + "loss": 0.4919, + "step": 7628 + }, + { + "epoch": 0.5608734009704455, + "grad_norm": 0.8513904809951782, + "learning_rate": 4.8954818584240315e-06, + "loss": 0.5492, + "step": 7629 + }, + { + "epoch": 0.5609469195706513, + "grad_norm": 0.8047094345092773, + "learning_rate": 4.895454287589485e-06, + "loss": 0.528, + "step": 7630 + }, + { + "epoch": 0.5610204381708572, + "grad_norm": 0.9093722701072693, + "learning_rate": 4.895426713196623e-06, + "loss": 0.5627, + "step": 7631 + }, + { + "epoch": 0.5610939567710631, + "grad_norm": 0.8349389433860779, + "learning_rate": 4.895399135245484e-06, + "loss": 0.5263, + "step": 7632 + }, + { + "epoch": 0.5611674753712689, + "grad_norm": 0.8191705942153931, + "learning_rate": 4.8953715537361125e-06, + "loss": 0.508, + "step": 7633 + }, + { + "epoch": 0.5612409939714748, + "grad_norm": 0.8984976410865784, + "learning_rate": 4.895343968668547e-06, + "loss": 0.5305, + "step": 7634 + }, + { + "epoch": 0.5613145125716806, + "grad_norm": 0.8390412330627441, + "learning_rate": 4.8953163800428295e-06, + "loss": 0.5413, + "step": 7635 + }, + { + "epoch": 0.5613880311718865, + "grad_norm": 0.8499994277954102, + "learning_rate": 4.895288787859e-06, + "loss": 0.5545, + "step": 7636 + }, + { + "epoch": 0.5614615497720924, + "grad_norm": 0.8530221581459045, + "learning_rate": 4.895261192117101e-06, + "loss": 0.5568, + "step": 7637 + }, + { + "epoch": 0.5615350683722982, + "grad_norm": 0.8083328604698181, + "learning_rate": 4.895233592817172e-06, + "loss": 0.5104, + "step": 7638 + }, + { + "epoch": 0.561608586972504, + "grad_norm": 0.8697389960289001, + "learning_rate": 4.895205989959255e-06, + "loss": 0.5206, + "step": 7639 + }, + { + "epoch": 0.5616821055727099, + "grad_norm": 0.8188719153404236, + "learning_rate": 4.8951783835433905e-06, + "loss": 0.5589, + "step": 7640 + }, + { + "epoch": 0.5617556241729158, + "grad_norm": 0.8076352477073669, + "learning_rate": 4.89515077356962e-06, + "loss": 0.5754, + "step": 7641 + }, + { + "epoch": 0.5618291427731216, + "grad_norm": 0.8042243719100952, + "learning_rate": 4.895123160037984e-06, + "loss": 0.5498, + "step": 7642 + }, + { + "epoch": 0.5619026613733275, + "grad_norm": 0.7828338742256165, + "learning_rate": 4.895095542948524e-06, + "loss": 0.5033, + "step": 7643 + }, + { + "epoch": 0.5619761799735333, + "grad_norm": 0.8322115540504456, + "learning_rate": 4.89506792230128e-06, + "loss": 0.5467, + "step": 7644 + }, + { + "epoch": 0.5620496985737392, + "grad_norm": 0.8506466746330261, + "learning_rate": 4.895040298096293e-06, + "loss": 0.4899, + "step": 7645 + }, + { + "epoch": 0.562123217173945, + "grad_norm": 0.866471529006958, + "learning_rate": 4.895012670333607e-06, + "loss": 0.5594, + "step": 7646 + }, + { + "epoch": 0.5621967357741509, + "grad_norm": 0.8596031069755554, + "learning_rate": 4.894985039013259e-06, + "loss": 0.5555, + "step": 7647 + }, + { + "epoch": 0.5622702543743567, + "grad_norm": 0.8828059434890747, + "learning_rate": 4.894957404135293e-06, + "loss": 0.5999, + "step": 7648 + }, + { + "epoch": 0.5623437729745626, + "grad_norm": 0.8388007283210754, + "learning_rate": 4.894929765699749e-06, + "loss": 0.5688, + "step": 7649 + }, + { + "epoch": 0.5624172915747684, + "grad_norm": 0.825590193271637, + "learning_rate": 4.8949021237066666e-06, + "loss": 0.5056, + "step": 7650 + }, + { + "epoch": 0.5624908101749743, + "grad_norm": 0.8410060405731201, + "learning_rate": 4.89487447815609e-06, + "loss": 0.5938, + "step": 7651 + }, + { + "epoch": 0.5625643287751801, + "grad_norm": 0.8583136796951294, + "learning_rate": 4.894846829048057e-06, + "loss": 0.5779, + "step": 7652 + }, + { + "epoch": 0.562637847375386, + "grad_norm": 0.8422438502311707, + "learning_rate": 4.894819176382611e-06, + "loss": 0.5417, + "step": 7653 + }, + { + "epoch": 0.5627113659755918, + "grad_norm": 0.8370105028152466, + "learning_rate": 4.894791520159792e-06, + "loss": 0.5365, + "step": 7654 + }, + { + "epoch": 0.5627848845757977, + "grad_norm": 0.8403592705726624, + "learning_rate": 4.894763860379641e-06, + "loss": 0.5355, + "step": 7655 + }, + { + "epoch": 0.5628584031760036, + "grad_norm": 0.8519876003265381, + "learning_rate": 4.8947361970422e-06, + "loss": 0.5608, + "step": 7656 + }, + { + "epoch": 0.5629319217762094, + "grad_norm": 0.8209717869758606, + "learning_rate": 4.894708530147509e-06, + "loss": 0.5778, + "step": 7657 + }, + { + "epoch": 0.5630054403764152, + "grad_norm": 0.8482524752616882, + "learning_rate": 4.894680859695611e-06, + "loss": 0.5866, + "step": 7658 + }, + { + "epoch": 0.5630789589766211, + "grad_norm": 0.8257632255554199, + "learning_rate": 4.894653185686545e-06, + "loss": 0.5521, + "step": 7659 + }, + { + "epoch": 0.563152477576827, + "grad_norm": 0.88035649061203, + "learning_rate": 4.894625508120352e-06, + "loss": 0.5028, + "step": 7660 + }, + { + "epoch": 0.5632259961770328, + "grad_norm": 0.8182226419448853, + "learning_rate": 4.894597826997074e-06, + "loss": 0.511, + "step": 7661 + }, + { + "epoch": 0.5632995147772386, + "grad_norm": 0.8359242081642151, + "learning_rate": 4.894570142316752e-06, + "loss": 0.5304, + "step": 7662 + }, + { + "epoch": 0.5633730333774445, + "grad_norm": 0.864807665348053, + "learning_rate": 4.8945424540794275e-06, + "loss": 0.5324, + "step": 7663 + }, + { + "epoch": 0.5634465519776504, + "grad_norm": 0.8207464814186096, + "learning_rate": 4.894514762285142e-06, + "loss": 0.5351, + "step": 7664 + }, + { + "epoch": 0.5635200705778562, + "grad_norm": 0.7874698042869568, + "learning_rate": 4.894487066933935e-06, + "loss": 0.4999, + "step": 7665 + }, + { + "epoch": 0.563593589178062, + "grad_norm": 0.8649342656135559, + "learning_rate": 4.894459368025849e-06, + "loss": 0.5907, + "step": 7666 + }, + { + "epoch": 0.5636671077782679, + "grad_norm": 0.8655877709388733, + "learning_rate": 4.894431665560925e-06, + "loss": 0.5875, + "step": 7667 + }, + { + "epoch": 0.5637406263784738, + "grad_norm": 0.8524664044380188, + "learning_rate": 4.894403959539203e-06, + "loss": 0.5479, + "step": 7668 + }, + { + "epoch": 0.5638141449786797, + "grad_norm": 0.7900020480155945, + "learning_rate": 4.894376249960726e-06, + "loss": 0.5327, + "step": 7669 + }, + { + "epoch": 0.5638876635788854, + "grad_norm": 0.8486982583999634, + "learning_rate": 4.894348536825533e-06, + "loss": 0.5629, + "step": 7670 + }, + { + "epoch": 0.5639611821790913, + "grad_norm": 0.8551309108734131, + "learning_rate": 4.894320820133668e-06, + "loss": 0.5399, + "step": 7671 + }, + { + "epoch": 0.5640347007792972, + "grad_norm": 0.8311700224876404, + "learning_rate": 4.894293099885169e-06, + "loss": 0.5799, + "step": 7672 + }, + { + "epoch": 0.5641082193795031, + "grad_norm": 0.8492332100868225, + "learning_rate": 4.89426537608008e-06, + "loss": 0.5419, + "step": 7673 + }, + { + "epoch": 0.5641817379797088, + "grad_norm": 0.7926174402236938, + "learning_rate": 4.89423764871844e-06, + "loss": 0.518, + "step": 7674 + }, + { + "epoch": 0.5642552565799147, + "grad_norm": 0.8223817944526672, + "learning_rate": 4.894209917800291e-06, + "loss": 0.5071, + "step": 7675 + }, + { + "epoch": 0.5643287751801206, + "grad_norm": 0.9024696350097656, + "learning_rate": 4.894182183325675e-06, + "loss": 0.5809, + "step": 7676 + }, + { + "epoch": 0.5644022937803265, + "grad_norm": 0.870566189289093, + "learning_rate": 4.894154445294632e-06, + "loss": 0.5139, + "step": 7677 + }, + { + "epoch": 0.5644758123805322, + "grad_norm": 0.8829197883605957, + "learning_rate": 4.894126703707204e-06, + "loss": 0.5431, + "step": 7678 + }, + { + "epoch": 0.5645493309807381, + "grad_norm": 0.9335896372795105, + "learning_rate": 4.894098958563431e-06, + "loss": 0.5818, + "step": 7679 + }, + { + "epoch": 0.564622849580944, + "grad_norm": 0.7970061898231506, + "learning_rate": 4.894071209863356e-06, + "loss": 0.5206, + "step": 7680 + }, + { + "epoch": 0.5646963681811499, + "grad_norm": 0.8679705262184143, + "learning_rate": 4.894043457607019e-06, + "loss": 0.5672, + "step": 7681 + }, + { + "epoch": 0.5647698867813556, + "grad_norm": 0.8613421320915222, + "learning_rate": 4.894015701794462e-06, + "loss": 0.5687, + "step": 7682 + }, + { + "epoch": 0.5648434053815615, + "grad_norm": 0.8337295651435852, + "learning_rate": 4.893987942425725e-06, + "loss": 0.5793, + "step": 7683 + }, + { + "epoch": 0.5649169239817674, + "grad_norm": 0.8544402122497559, + "learning_rate": 4.89396017950085e-06, + "loss": 0.598, + "step": 7684 + }, + { + "epoch": 0.5649904425819733, + "grad_norm": 0.8250648379325867, + "learning_rate": 4.893932413019879e-06, + "loss": 0.5578, + "step": 7685 + }, + { + "epoch": 0.565063961182179, + "grad_norm": 0.8881555795669556, + "learning_rate": 4.893904642982852e-06, + "loss": 0.5382, + "step": 7686 + }, + { + "epoch": 0.5651374797823849, + "grad_norm": 0.8147713541984558, + "learning_rate": 4.893876869389811e-06, + "loss": 0.5453, + "step": 7687 + }, + { + "epoch": 0.5652109983825908, + "grad_norm": 0.8141970038414001, + "learning_rate": 4.8938490922407965e-06, + "loss": 0.5562, + "step": 7688 + }, + { + "epoch": 0.5652845169827967, + "grad_norm": 0.8537650108337402, + "learning_rate": 4.89382131153585e-06, + "loss": 0.5487, + "step": 7689 + }, + { + "epoch": 0.5653580355830025, + "grad_norm": 0.8701884150505066, + "learning_rate": 4.893793527275014e-06, + "loss": 0.5686, + "step": 7690 + }, + { + "epoch": 0.5654315541832083, + "grad_norm": 0.8306875228881836, + "learning_rate": 4.893765739458328e-06, + "loss": 0.5277, + "step": 7691 + }, + { + "epoch": 0.5655050727834142, + "grad_norm": 0.8639206290245056, + "learning_rate": 4.893737948085835e-06, + "loss": 0.5426, + "step": 7692 + }, + { + "epoch": 0.5655785913836201, + "grad_norm": 0.8457142114639282, + "learning_rate": 4.8937101531575745e-06, + "loss": 0.5553, + "step": 7693 + }, + { + "epoch": 0.5656521099838259, + "grad_norm": 0.8188959956169128, + "learning_rate": 4.893682354673589e-06, + "loss": 0.4976, + "step": 7694 + }, + { + "epoch": 0.5657256285840317, + "grad_norm": 0.8260761499404907, + "learning_rate": 4.89365455263392e-06, + "loss": 0.5657, + "step": 7695 + }, + { + "epoch": 0.5657991471842376, + "grad_norm": 0.8426594734191895, + "learning_rate": 4.8936267470386075e-06, + "loss": 0.5791, + "step": 7696 + }, + { + "epoch": 0.5658726657844435, + "grad_norm": 0.79121333360672, + "learning_rate": 4.893598937887694e-06, + "loss": 0.5303, + "step": 7697 + }, + { + "epoch": 0.5659461843846493, + "grad_norm": 0.8256922364234924, + "learning_rate": 4.8935711251812195e-06, + "loss": 0.5964, + "step": 7698 + }, + { + "epoch": 0.5660197029848552, + "grad_norm": 0.8219789266586304, + "learning_rate": 4.893543308919226e-06, + "loss": 0.5326, + "step": 7699 + }, + { + "epoch": 0.566093221585061, + "grad_norm": 0.8267701864242554, + "learning_rate": 4.893515489101757e-06, + "loss": 0.5207, + "step": 7700 + }, + { + "epoch": 0.5661667401852669, + "grad_norm": 0.8681022524833679, + "learning_rate": 4.89348766572885e-06, + "loss": 0.5551, + "step": 7701 + }, + { + "epoch": 0.5662402587854727, + "grad_norm": 0.8108463287353516, + "learning_rate": 4.893459838800549e-06, + "loss": 0.526, + "step": 7702 + }, + { + "epoch": 0.5663137773856786, + "grad_norm": 0.8319451212882996, + "learning_rate": 4.893432008316893e-06, + "loss": 0.52, + "step": 7703 + }, + { + "epoch": 0.5663872959858844, + "grad_norm": 0.8720823526382446, + "learning_rate": 4.8934041742779265e-06, + "loss": 0.5415, + "step": 7704 + }, + { + "epoch": 0.5664608145860903, + "grad_norm": 0.8662034273147583, + "learning_rate": 4.893376336683689e-06, + "loss": 0.5479, + "step": 7705 + }, + { + "epoch": 0.5665343331862961, + "grad_norm": 0.8347812294960022, + "learning_rate": 4.893348495534222e-06, + "loss": 0.5812, + "step": 7706 + }, + { + "epoch": 0.566607851786502, + "grad_norm": 0.8770520687103271, + "learning_rate": 4.893320650829566e-06, + "loss": 0.541, + "step": 7707 + }, + { + "epoch": 0.5666813703867078, + "grad_norm": 0.8232636451721191, + "learning_rate": 4.893292802569764e-06, + "loss": 0.5296, + "step": 7708 + }, + { + "epoch": 0.5667548889869137, + "grad_norm": 0.8963871598243713, + "learning_rate": 4.893264950754856e-06, + "loss": 0.5644, + "step": 7709 + }, + { + "epoch": 0.5668284075871195, + "grad_norm": 0.9138287901878357, + "learning_rate": 4.893237095384884e-06, + "loss": 0.5566, + "step": 7710 + }, + { + "epoch": 0.5669019261873254, + "grad_norm": 0.8032906651496887, + "learning_rate": 4.893209236459889e-06, + "loss": 0.534, + "step": 7711 + }, + { + "epoch": 0.5669754447875313, + "grad_norm": 0.8618490099906921, + "learning_rate": 4.893181373979914e-06, + "loss": 0.5739, + "step": 7712 + }, + { + "epoch": 0.5670489633877371, + "grad_norm": 0.8092986345291138, + "learning_rate": 4.893153507944997e-06, + "loss": 0.5645, + "step": 7713 + }, + { + "epoch": 0.5671224819879429, + "grad_norm": 0.8912092447280884, + "learning_rate": 4.893125638355184e-06, + "loss": 0.6267, + "step": 7714 + }, + { + "epoch": 0.5671960005881488, + "grad_norm": 0.8646382689476013, + "learning_rate": 4.893097765210512e-06, + "loss": 0.5799, + "step": 7715 + }, + { + "epoch": 0.5672695191883547, + "grad_norm": 0.8186856508255005, + "learning_rate": 4.893069888511026e-06, + "loss": 0.5713, + "step": 7716 + }, + { + "epoch": 0.5673430377885605, + "grad_norm": 0.8747276663780212, + "learning_rate": 4.893042008256764e-06, + "loss": 0.5683, + "step": 7717 + }, + { + "epoch": 0.5674165563887663, + "grad_norm": 0.8321971297264099, + "learning_rate": 4.89301412444777e-06, + "loss": 0.5977, + "step": 7718 + }, + { + "epoch": 0.5674900749889722, + "grad_norm": 0.8384507894515991, + "learning_rate": 4.892986237084085e-06, + "loss": 0.5751, + "step": 7719 + }, + { + "epoch": 0.5675635935891781, + "grad_norm": 0.8175206184387207, + "learning_rate": 4.892958346165749e-06, + "loss": 0.5657, + "step": 7720 + }, + { + "epoch": 0.567637112189384, + "grad_norm": 0.8342339396476746, + "learning_rate": 4.892930451692806e-06, + "loss": 0.5467, + "step": 7721 + }, + { + "epoch": 0.5677106307895897, + "grad_norm": 0.867856502532959, + "learning_rate": 4.892902553665294e-06, + "loss": 0.5536, + "step": 7722 + }, + { + "epoch": 0.5677841493897956, + "grad_norm": 0.8520252704620361, + "learning_rate": 4.892874652083257e-06, + "loss": 0.5723, + "step": 7723 + }, + { + "epoch": 0.5678576679900015, + "grad_norm": 0.805672287940979, + "learning_rate": 4.892846746946736e-06, + "loss": 0.5084, + "step": 7724 + }, + { + "epoch": 0.5679311865902074, + "grad_norm": 0.8068552613258362, + "learning_rate": 4.8928188382557725e-06, + "loss": 0.5682, + "step": 7725 + }, + { + "epoch": 0.5680047051904131, + "grad_norm": 0.8390793204307556, + "learning_rate": 4.892790926010407e-06, + "loss": 0.5937, + "step": 7726 + }, + { + "epoch": 0.568078223790619, + "grad_norm": 0.8488858342170715, + "learning_rate": 4.892763010210682e-06, + "loss": 0.5573, + "step": 7727 + }, + { + "epoch": 0.5681517423908249, + "grad_norm": 0.8742102980613708, + "learning_rate": 4.892735090856638e-06, + "loss": 0.5475, + "step": 7728 + }, + { + "epoch": 0.5682252609910308, + "grad_norm": 0.8171707391738892, + "learning_rate": 4.892707167948317e-06, + "loss": 0.5292, + "step": 7729 + }, + { + "epoch": 0.5682987795912365, + "grad_norm": 0.8320571780204773, + "learning_rate": 4.892679241485762e-06, + "loss": 0.522, + "step": 7730 + }, + { + "epoch": 0.5683722981914424, + "grad_norm": 0.784369707107544, + "learning_rate": 4.892651311469011e-06, + "loss": 0.5121, + "step": 7731 + }, + { + "epoch": 0.5684458167916483, + "grad_norm": 0.8071209788322449, + "learning_rate": 4.892623377898109e-06, + "loss": 0.5041, + "step": 7732 + }, + { + "epoch": 0.5685193353918542, + "grad_norm": 0.8096908330917358, + "learning_rate": 4.8925954407730945e-06, + "loss": 0.5373, + "step": 7733 + }, + { + "epoch": 0.5685928539920599, + "grad_norm": 0.8373882174491882, + "learning_rate": 4.892567500094012e-06, + "loss": 0.5662, + "step": 7734 + }, + { + "epoch": 0.5686663725922658, + "grad_norm": 0.8361291885375977, + "learning_rate": 4.8925395558609e-06, + "loss": 0.5573, + "step": 7735 + }, + { + "epoch": 0.5687398911924717, + "grad_norm": 0.8862065672874451, + "learning_rate": 4.892511608073804e-06, + "loss": 0.5507, + "step": 7736 + }, + { + "epoch": 0.5688134097926776, + "grad_norm": 0.8257156014442444, + "learning_rate": 4.892483656732761e-06, + "loss": 0.5254, + "step": 7737 + }, + { + "epoch": 0.5688869283928834, + "grad_norm": 0.8169510960578918, + "learning_rate": 4.892455701837814e-06, + "loss": 0.548, + "step": 7738 + }, + { + "epoch": 0.5689604469930892, + "grad_norm": 0.9441107511520386, + "learning_rate": 4.892427743389006e-06, + "loss": 0.5837, + "step": 7739 + }, + { + "epoch": 0.5690339655932951, + "grad_norm": 0.8715334534645081, + "learning_rate": 4.892399781386377e-06, + "loss": 0.5487, + "step": 7740 + }, + { + "epoch": 0.569107484193501, + "grad_norm": 0.8270304203033447, + "learning_rate": 4.89237181582997e-06, + "loss": 0.5222, + "step": 7741 + }, + { + "epoch": 0.5691810027937068, + "grad_norm": 0.8147298693656921, + "learning_rate": 4.892343846719826e-06, + "loss": 0.5033, + "step": 7742 + }, + { + "epoch": 0.5692545213939126, + "grad_norm": 0.7908262610435486, + "learning_rate": 4.892315874055985e-06, + "loss": 0.5363, + "step": 7743 + }, + { + "epoch": 0.5693280399941185, + "grad_norm": 0.8694459795951843, + "learning_rate": 4.892287897838491e-06, + "loss": 0.5674, + "step": 7744 + }, + { + "epoch": 0.5694015585943244, + "grad_norm": 0.8506972193717957, + "learning_rate": 4.892259918067383e-06, + "loss": 0.5379, + "step": 7745 + }, + { + "epoch": 0.5694750771945302, + "grad_norm": 0.8196699619293213, + "learning_rate": 4.892231934742705e-06, + "loss": 0.5314, + "step": 7746 + }, + { + "epoch": 0.569548595794736, + "grad_norm": 0.7981112599372864, + "learning_rate": 4.892203947864497e-06, + "loss": 0.5389, + "step": 7747 + }, + { + "epoch": 0.5696221143949419, + "grad_norm": 0.8313735723495483, + "learning_rate": 4.892175957432801e-06, + "loss": 0.5141, + "step": 7748 + }, + { + "epoch": 0.5696956329951478, + "grad_norm": 0.8720634579658508, + "learning_rate": 4.892147963447658e-06, + "loss": 0.5198, + "step": 7749 + }, + { + "epoch": 0.5697691515953536, + "grad_norm": 0.8370186686515808, + "learning_rate": 4.892119965909112e-06, + "loss": 0.5341, + "step": 7750 + }, + { + "epoch": 0.5698426701955595, + "grad_norm": 0.8670873641967773, + "learning_rate": 4.892091964817201e-06, + "loss": 0.5498, + "step": 7751 + }, + { + "epoch": 0.5699161887957653, + "grad_norm": 0.8414497971534729, + "learning_rate": 4.892063960171969e-06, + "loss": 0.5367, + "step": 7752 + }, + { + "epoch": 0.5699897073959712, + "grad_norm": 0.8241210579872131, + "learning_rate": 4.892035951973457e-06, + "loss": 0.5585, + "step": 7753 + }, + { + "epoch": 0.570063225996177, + "grad_norm": 0.82374107837677, + "learning_rate": 4.892007940221707e-06, + "loss": 0.542, + "step": 7754 + }, + { + "epoch": 0.5701367445963829, + "grad_norm": 0.879878580570221, + "learning_rate": 4.891979924916759e-06, + "loss": 0.549, + "step": 7755 + }, + { + "epoch": 0.5702102631965887, + "grad_norm": 0.8469676971435547, + "learning_rate": 4.891951906058658e-06, + "loss": 0.5706, + "step": 7756 + }, + { + "epoch": 0.5702837817967946, + "grad_norm": 0.8850369453430176, + "learning_rate": 4.891923883647441e-06, + "loss": 0.5639, + "step": 7757 + }, + { + "epoch": 0.5703573003970004, + "grad_norm": 0.7886282205581665, + "learning_rate": 4.8918958576831535e-06, + "loss": 0.4942, + "step": 7758 + }, + { + "epoch": 0.5704308189972063, + "grad_norm": 0.804046094417572, + "learning_rate": 4.891867828165835e-06, + "loss": 0.5169, + "step": 7759 + }, + { + "epoch": 0.5705043375974121, + "grad_norm": 0.8243350386619568, + "learning_rate": 4.8918397950955275e-06, + "loss": 0.5294, + "step": 7760 + }, + { + "epoch": 0.570577856197618, + "grad_norm": 0.878578245639801, + "learning_rate": 4.891811758472273e-06, + "loss": 0.5759, + "step": 7761 + }, + { + "epoch": 0.5706513747978238, + "grad_norm": 0.8392253518104553, + "learning_rate": 4.891783718296114e-06, + "loss": 0.5549, + "step": 7762 + }, + { + "epoch": 0.5707248933980297, + "grad_norm": 0.8526713848114014, + "learning_rate": 4.891755674567091e-06, + "loss": 0.5462, + "step": 7763 + }, + { + "epoch": 0.5707984119982356, + "grad_norm": 0.8233692049980164, + "learning_rate": 4.891727627285246e-06, + "loss": 0.5214, + "step": 7764 + }, + { + "epoch": 0.5708719305984414, + "grad_norm": 0.9389340281486511, + "learning_rate": 4.89169957645062e-06, + "loss": 0.6141, + "step": 7765 + }, + { + "epoch": 0.5709454491986472, + "grad_norm": 0.8631511926651001, + "learning_rate": 4.891671522063256e-06, + "loss": 0.5615, + "step": 7766 + }, + { + "epoch": 0.5710189677988531, + "grad_norm": 0.8184564709663391, + "learning_rate": 4.891643464123194e-06, + "loss": 0.5478, + "step": 7767 + }, + { + "epoch": 0.571092486399059, + "grad_norm": 0.8608691096305847, + "learning_rate": 4.891615402630478e-06, + "loss": 0.5313, + "step": 7768 + }, + { + "epoch": 0.5711660049992648, + "grad_norm": 0.8420376777648926, + "learning_rate": 4.8915873375851465e-06, + "loss": 0.5658, + "step": 7769 + }, + { + "epoch": 0.5712395235994706, + "grad_norm": 0.8318824768066406, + "learning_rate": 4.891559268987244e-06, + "loss": 0.5094, + "step": 7770 + }, + { + "epoch": 0.5713130421996765, + "grad_norm": 0.854723334312439, + "learning_rate": 4.891531196836811e-06, + "loss": 0.5408, + "step": 7771 + }, + { + "epoch": 0.5713865607998824, + "grad_norm": 0.8252547383308411, + "learning_rate": 4.891503121133889e-06, + "loss": 0.5767, + "step": 7772 + }, + { + "epoch": 0.5714600794000882, + "grad_norm": 0.8185114860534668, + "learning_rate": 4.891475041878521e-06, + "loss": 0.5653, + "step": 7773 + }, + { + "epoch": 0.5715335980002941, + "grad_norm": 0.7839376926422119, + "learning_rate": 4.891446959070747e-06, + "loss": 0.5521, + "step": 7774 + }, + { + "epoch": 0.5716071166004999, + "grad_norm": 0.8309715390205383, + "learning_rate": 4.891418872710609e-06, + "loss": 0.5466, + "step": 7775 + }, + { + "epoch": 0.5716806352007058, + "grad_norm": 0.8020691275596619, + "learning_rate": 4.891390782798151e-06, + "loss": 0.4822, + "step": 7776 + }, + { + "epoch": 0.5717541538009117, + "grad_norm": 0.8326172828674316, + "learning_rate": 4.891362689333412e-06, + "loss": 0.5298, + "step": 7777 + }, + { + "epoch": 0.5718276724011175, + "grad_norm": 0.8277500867843628, + "learning_rate": 4.891334592316434e-06, + "loss": 0.5774, + "step": 7778 + }, + { + "epoch": 0.5719011910013233, + "grad_norm": 0.8530204892158508, + "learning_rate": 4.89130649174726e-06, + "loss": 0.558, + "step": 7779 + }, + { + "epoch": 0.5719747096015292, + "grad_norm": 0.8495906591415405, + "learning_rate": 4.891278387625931e-06, + "loss": 0.5555, + "step": 7780 + }, + { + "epoch": 0.5720482282017351, + "grad_norm": 0.8742639422416687, + "learning_rate": 4.891250279952489e-06, + "loss": 0.5871, + "step": 7781 + }, + { + "epoch": 0.572121746801941, + "grad_norm": 0.8842779994010925, + "learning_rate": 4.891222168726976e-06, + "loss": 0.5474, + "step": 7782 + }, + { + "epoch": 0.5721952654021467, + "grad_norm": 0.8156639337539673, + "learning_rate": 4.891194053949433e-06, + "loss": 0.5374, + "step": 7783 + }, + { + "epoch": 0.5722687840023526, + "grad_norm": 0.8280581831932068, + "learning_rate": 4.891165935619902e-06, + "loss": 0.5621, + "step": 7784 + }, + { + "epoch": 0.5723423026025585, + "grad_norm": 0.8233302235603333, + "learning_rate": 4.891137813738425e-06, + "loss": 0.5142, + "step": 7785 + }, + { + "epoch": 0.5724158212027644, + "grad_norm": 0.8317558765411377, + "learning_rate": 4.891109688305044e-06, + "loss": 0.5489, + "step": 7786 + }, + { + "epoch": 0.5724893398029701, + "grad_norm": 0.8187234997749329, + "learning_rate": 4.8910815593198e-06, + "loss": 0.5357, + "step": 7787 + }, + { + "epoch": 0.572562858403176, + "grad_norm": 0.8571476340293884, + "learning_rate": 4.891053426782736e-06, + "loss": 0.5861, + "step": 7788 + }, + { + "epoch": 0.5726363770033819, + "grad_norm": 0.8228201270103455, + "learning_rate": 4.891025290693892e-06, + "loss": 0.516, + "step": 7789 + }, + { + "epoch": 0.5727098956035878, + "grad_norm": 0.8020432591438293, + "learning_rate": 4.890997151053312e-06, + "loss": 0.5245, + "step": 7790 + }, + { + "epoch": 0.5727834142037935, + "grad_norm": 0.8471818566322327, + "learning_rate": 4.890969007861036e-06, + "loss": 0.5941, + "step": 7791 + }, + { + "epoch": 0.5728569328039994, + "grad_norm": 0.8088469505310059, + "learning_rate": 4.890940861117107e-06, + "loss": 0.5394, + "step": 7792 + }, + { + "epoch": 0.5729304514042053, + "grad_norm": 0.8516833782196045, + "learning_rate": 4.890912710821566e-06, + "loss": 0.5479, + "step": 7793 + }, + { + "epoch": 0.5730039700044112, + "grad_norm": 0.8009675145149231, + "learning_rate": 4.890884556974456e-06, + "loss": 0.5418, + "step": 7794 + }, + { + "epoch": 0.5730774886046169, + "grad_norm": 0.7951147556304932, + "learning_rate": 4.890856399575816e-06, + "loss": 0.5027, + "step": 7795 + }, + { + "epoch": 0.5731510072048228, + "grad_norm": 0.8073939681053162, + "learning_rate": 4.890828238625691e-06, + "loss": 0.5504, + "step": 7796 + }, + { + "epoch": 0.5732245258050287, + "grad_norm": 0.8302463889122009, + "learning_rate": 4.8908000741241216e-06, + "loss": 0.5291, + "step": 7797 + }, + { + "epoch": 0.5732980444052346, + "grad_norm": 0.8427172899246216, + "learning_rate": 4.890771906071149e-06, + "loss": 0.563, + "step": 7798 + }, + { + "epoch": 0.5733715630054403, + "grad_norm": 0.8022068738937378, + "learning_rate": 4.890743734466816e-06, + "loss": 0.5399, + "step": 7799 + }, + { + "epoch": 0.5734450816056462, + "grad_norm": 0.7880488634109497, + "learning_rate": 4.890715559311164e-06, + "loss": 0.5101, + "step": 7800 + }, + { + "epoch": 0.5735186002058521, + "grad_norm": 0.9182371497154236, + "learning_rate": 4.890687380604235e-06, + "loss": 0.5573, + "step": 7801 + }, + { + "epoch": 0.573592118806058, + "grad_norm": 0.8213628530502319, + "learning_rate": 4.890659198346071e-06, + "loss": 0.5407, + "step": 7802 + }, + { + "epoch": 0.5736656374062638, + "grad_norm": 0.7738112211227417, + "learning_rate": 4.890631012536713e-06, + "loss": 0.5078, + "step": 7803 + }, + { + "epoch": 0.5737391560064696, + "grad_norm": 0.7923648357391357, + "learning_rate": 4.890602823176204e-06, + "loss": 0.53, + "step": 7804 + }, + { + "epoch": 0.5738126746066755, + "grad_norm": 0.865445077419281, + "learning_rate": 4.890574630264585e-06, + "loss": 0.5931, + "step": 7805 + }, + { + "epoch": 0.5738861932068814, + "grad_norm": 0.8540164232254028, + "learning_rate": 4.890546433801899e-06, + "loss": 0.559, + "step": 7806 + }, + { + "epoch": 0.5739597118070872, + "grad_norm": 0.8623993396759033, + "learning_rate": 4.8905182337881874e-06, + "loss": 0.5192, + "step": 7807 + }, + { + "epoch": 0.574033230407293, + "grad_norm": 0.8990058898925781, + "learning_rate": 4.890490030223491e-06, + "loss": 0.6247, + "step": 7808 + }, + { + "epoch": 0.5741067490074989, + "grad_norm": 0.8487695455551147, + "learning_rate": 4.890461823107853e-06, + "loss": 0.5773, + "step": 7809 + }, + { + "epoch": 0.5741802676077048, + "grad_norm": 0.8663483262062073, + "learning_rate": 4.8904336124413145e-06, + "loss": 0.5366, + "step": 7810 + }, + { + "epoch": 0.5742537862079106, + "grad_norm": 0.8220470547676086, + "learning_rate": 4.890405398223918e-06, + "loss": 0.5267, + "step": 7811 + }, + { + "epoch": 0.5743273048081164, + "grad_norm": 0.8340222239494324, + "learning_rate": 4.890377180455705e-06, + "loss": 0.5218, + "step": 7812 + }, + { + "epoch": 0.5744008234083223, + "grad_norm": 0.8765743374824524, + "learning_rate": 4.890348959136718e-06, + "loss": 0.546, + "step": 7813 + }, + { + "epoch": 0.5744743420085282, + "grad_norm": 0.7930842041969299, + "learning_rate": 4.890320734266999e-06, + "loss": 0.4882, + "step": 7814 + }, + { + "epoch": 0.574547860608734, + "grad_norm": 0.8074560761451721, + "learning_rate": 4.890292505846588e-06, + "loss": 0.4515, + "step": 7815 + }, + { + "epoch": 0.5746213792089399, + "grad_norm": 0.8301766514778137, + "learning_rate": 4.890264273875529e-06, + "loss": 0.5583, + "step": 7816 + }, + { + "epoch": 0.5746948978091457, + "grad_norm": 0.8434063196182251, + "learning_rate": 4.890236038353864e-06, + "loss": 0.5377, + "step": 7817 + }, + { + "epoch": 0.5747684164093516, + "grad_norm": 0.8020269870758057, + "learning_rate": 4.890207799281634e-06, + "loss": 0.5728, + "step": 7818 + }, + { + "epoch": 0.5748419350095574, + "grad_norm": 0.8744384050369263, + "learning_rate": 4.890179556658881e-06, + "loss": 0.5633, + "step": 7819 + }, + { + "epoch": 0.5749154536097633, + "grad_norm": 0.8196881413459778, + "learning_rate": 4.890151310485646e-06, + "loss": 0.5296, + "step": 7820 + }, + { + "epoch": 0.5749889722099691, + "grad_norm": 0.8382379412651062, + "learning_rate": 4.890123060761973e-06, + "loss": 0.5721, + "step": 7821 + }, + { + "epoch": 0.575062490810175, + "grad_norm": 0.8189162611961365, + "learning_rate": 4.890094807487904e-06, + "loss": 0.5458, + "step": 7822 + }, + { + "epoch": 0.5751360094103808, + "grad_norm": 0.8722888827323914, + "learning_rate": 4.890066550663479e-06, + "loss": 0.5879, + "step": 7823 + }, + { + "epoch": 0.5752095280105867, + "grad_norm": 0.8287116885185242, + "learning_rate": 4.890038290288741e-06, + "loss": 0.6003, + "step": 7824 + }, + { + "epoch": 0.5752830466107925, + "grad_norm": 0.8423541188240051, + "learning_rate": 4.890010026363732e-06, + "loss": 0.5702, + "step": 7825 + }, + { + "epoch": 0.5753565652109984, + "grad_norm": 0.7877045273780823, + "learning_rate": 4.889981758888494e-06, + "loss": 0.5459, + "step": 7826 + }, + { + "epoch": 0.5754300838112042, + "grad_norm": 0.8220041394233704, + "learning_rate": 4.889953487863069e-06, + "loss": 0.5421, + "step": 7827 + }, + { + "epoch": 0.5755036024114101, + "grad_norm": 0.8303972482681274, + "learning_rate": 4.889925213287499e-06, + "loss": 0.5151, + "step": 7828 + }, + { + "epoch": 0.575577121011616, + "grad_norm": 0.8381295204162598, + "learning_rate": 4.889896935161827e-06, + "loss": 0.5621, + "step": 7829 + }, + { + "epoch": 0.5756506396118218, + "grad_norm": 0.786014199256897, + "learning_rate": 4.889868653486093e-06, + "loss": 0.4996, + "step": 7830 + }, + { + "epoch": 0.5757241582120276, + "grad_norm": 0.8112921714782715, + "learning_rate": 4.88984036826034e-06, + "loss": 0.5424, + "step": 7831 + }, + { + "epoch": 0.5757976768122335, + "grad_norm": 0.8529176115989685, + "learning_rate": 4.88981207948461e-06, + "loss": 0.5531, + "step": 7832 + }, + { + "epoch": 0.5758711954124394, + "grad_norm": 0.8273711204528809, + "learning_rate": 4.889783787158946e-06, + "loss": 0.5564, + "step": 7833 + }, + { + "epoch": 0.5759447140126452, + "grad_norm": 0.8001328706741333, + "learning_rate": 4.889755491283388e-06, + "loss": 0.515, + "step": 7834 + }, + { + "epoch": 0.576018232612851, + "grad_norm": 0.8169440627098083, + "learning_rate": 4.889727191857979e-06, + "loss": 0.545, + "step": 7835 + }, + { + "epoch": 0.5760917512130569, + "grad_norm": 0.8305037021636963, + "learning_rate": 4.889698888882761e-06, + "loss": 0.4979, + "step": 7836 + }, + { + "epoch": 0.5761652698132628, + "grad_norm": 0.8149951696395874, + "learning_rate": 4.889670582357777e-06, + "loss": 0.5441, + "step": 7837 + }, + { + "epoch": 0.5762387884134686, + "grad_norm": 0.7783148884773254, + "learning_rate": 4.889642272283067e-06, + "loss": 0.5032, + "step": 7838 + }, + { + "epoch": 0.5763123070136744, + "grad_norm": 0.8488932251930237, + "learning_rate": 4.8896139586586755e-06, + "loss": 0.5679, + "step": 7839 + }, + { + "epoch": 0.5763858256138803, + "grad_norm": 0.8307912349700928, + "learning_rate": 4.889585641484642e-06, + "loss": 0.4746, + "step": 7840 + }, + { + "epoch": 0.5764593442140862, + "grad_norm": 0.8703557848930359, + "learning_rate": 4.889557320761012e-06, + "loss": 0.5756, + "step": 7841 + }, + { + "epoch": 0.5765328628142921, + "grad_norm": 0.8379648923873901, + "learning_rate": 4.889528996487823e-06, + "loss": 0.54, + "step": 7842 + }, + { + "epoch": 0.5766063814144978, + "grad_norm": 0.8527658581733704, + "learning_rate": 4.8895006686651214e-06, + "loss": 0.5533, + "step": 7843 + }, + { + "epoch": 0.5766799000147037, + "grad_norm": 0.867982029914856, + "learning_rate": 4.889472337292947e-06, + "loss": 0.5419, + "step": 7844 + }, + { + "epoch": 0.5767534186149096, + "grad_norm": 0.8670473694801331, + "learning_rate": 4.88944400237134e-06, + "loss": 0.5254, + "step": 7845 + }, + { + "epoch": 0.5768269372151155, + "grad_norm": 0.8580642938613892, + "learning_rate": 4.889415663900348e-06, + "loss": 0.5, + "step": 7846 + }, + { + "epoch": 0.5769004558153212, + "grad_norm": 0.8492301106452942, + "learning_rate": 4.889387321880008e-06, + "loss": 0.5568, + "step": 7847 + }, + { + "epoch": 0.5769739744155271, + "grad_norm": 0.8904550671577454, + "learning_rate": 4.889358976310365e-06, + "loss": 0.5467, + "step": 7848 + }, + { + "epoch": 0.577047493015733, + "grad_norm": 0.8358099460601807, + "learning_rate": 4.88933062719146e-06, + "loss": 0.5493, + "step": 7849 + }, + { + "epoch": 0.5771210116159389, + "grad_norm": 0.8584602475166321, + "learning_rate": 4.8893022745233346e-06, + "loss": 0.5937, + "step": 7850 + }, + { + "epoch": 0.5771945302161446, + "grad_norm": 0.8729233741760254, + "learning_rate": 4.889273918306031e-06, + "loss": 0.5759, + "step": 7851 + }, + { + "epoch": 0.5772680488163505, + "grad_norm": 0.9079208970069885, + "learning_rate": 4.889245558539592e-06, + "loss": 0.5886, + "step": 7852 + }, + { + "epoch": 0.5773415674165564, + "grad_norm": 0.8172531723976135, + "learning_rate": 4.889217195224061e-06, + "loss": 0.5793, + "step": 7853 + }, + { + "epoch": 0.5774150860167623, + "grad_norm": 0.8746456503868103, + "learning_rate": 4.889188828359478e-06, + "loss": 0.5725, + "step": 7854 + }, + { + "epoch": 0.577488604616968, + "grad_norm": 0.8240705728530884, + "learning_rate": 4.889160457945885e-06, + "loss": 0.5661, + "step": 7855 + }, + { + "epoch": 0.5775621232171739, + "grad_norm": 0.8192330598831177, + "learning_rate": 4.889132083983326e-06, + "loss": 0.4885, + "step": 7856 + }, + { + "epoch": 0.5776356418173798, + "grad_norm": 0.8351190090179443, + "learning_rate": 4.889103706471841e-06, + "loss": 0.5223, + "step": 7857 + }, + { + "epoch": 0.5777091604175857, + "grad_norm": 0.8505154848098755, + "learning_rate": 4.889075325411475e-06, + "loss": 0.5711, + "step": 7858 + }, + { + "epoch": 0.5777826790177915, + "grad_norm": 0.8140507936477661, + "learning_rate": 4.889046940802267e-06, + "loss": 0.5424, + "step": 7859 + }, + { + "epoch": 0.5778561976179973, + "grad_norm": 0.9018070101737976, + "learning_rate": 4.889018552644261e-06, + "loss": 0.5782, + "step": 7860 + }, + { + "epoch": 0.5779297162182032, + "grad_norm": 0.8882020115852356, + "learning_rate": 4.888990160937499e-06, + "loss": 0.5345, + "step": 7861 + }, + { + "epoch": 0.5780032348184091, + "grad_norm": 0.8739079236984253, + "learning_rate": 4.888961765682022e-06, + "loss": 0.5993, + "step": 7862 + }, + { + "epoch": 0.5780767534186149, + "grad_norm": 0.8678872585296631, + "learning_rate": 4.888933366877874e-06, + "loss": 0.5304, + "step": 7863 + }, + { + "epoch": 0.5781502720188207, + "grad_norm": 0.8639071583747864, + "learning_rate": 4.888904964525097e-06, + "loss": 0.554, + "step": 7864 + }, + { + "epoch": 0.5782237906190266, + "grad_norm": 0.8268415927886963, + "learning_rate": 4.888876558623731e-06, + "loss": 0.549, + "step": 7865 + }, + { + "epoch": 0.5782973092192325, + "grad_norm": 0.8302285671234131, + "learning_rate": 4.88884814917382e-06, + "loss": 0.5455, + "step": 7866 + }, + { + "epoch": 0.5783708278194383, + "grad_norm": 0.8445565700531006, + "learning_rate": 4.888819736175406e-06, + "loss": 0.5324, + "step": 7867 + }, + { + "epoch": 0.5784443464196442, + "grad_norm": 0.8601003289222717, + "learning_rate": 4.8887913196285315e-06, + "loss": 0.5784, + "step": 7868 + }, + { + "epoch": 0.57851786501985, + "grad_norm": 0.8935721516609192, + "learning_rate": 4.8887628995332385e-06, + "loss": 0.5928, + "step": 7869 + }, + { + "epoch": 0.5785913836200559, + "grad_norm": 0.7975568175315857, + "learning_rate": 4.8887344758895685e-06, + "loss": 0.578, + "step": 7870 + }, + { + "epoch": 0.5786649022202617, + "grad_norm": 0.8484026789665222, + "learning_rate": 4.888706048697563e-06, + "loss": 0.5804, + "step": 7871 + }, + { + "epoch": 0.5787384208204676, + "grad_norm": 0.8825498223304749, + "learning_rate": 4.888677617957267e-06, + "loss": 0.5864, + "step": 7872 + }, + { + "epoch": 0.5788119394206734, + "grad_norm": 0.9387716054916382, + "learning_rate": 4.888649183668721e-06, + "loss": 0.5644, + "step": 7873 + }, + { + "epoch": 0.5788854580208793, + "grad_norm": 0.8363198637962341, + "learning_rate": 4.888620745831968e-06, + "loss": 0.5352, + "step": 7874 + }, + { + "epoch": 0.5789589766210851, + "grad_norm": 0.8598242402076721, + "learning_rate": 4.888592304447049e-06, + "loss": 0.5442, + "step": 7875 + }, + { + "epoch": 0.579032495221291, + "grad_norm": 0.8687200546264648, + "learning_rate": 4.888563859514007e-06, + "loss": 0.5313, + "step": 7876 + }, + { + "epoch": 0.5791060138214968, + "grad_norm": 0.8788819909095764, + "learning_rate": 4.888535411032884e-06, + "loss": 0.5567, + "step": 7877 + }, + { + "epoch": 0.5791795324217027, + "grad_norm": 0.8881429433822632, + "learning_rate": 4.888506959003723e-06, + "loss": 0.5915, + "step": 7878 + }, + { + "epoch": 0.5792530510219085, + "grad_norm": 0.8357433676719666, + "learning_rate": 4.8884785034265644e-06, + "loss": 0.5296, + "step": 7879 + }, + { + "epoch": 0.5793265696221144, + "grad_norm": 0.8367363214492798, + "learning_rate": 4.8884500443014534e-06, + "loss": 0.5524, + "step": 7880 + }, + { + "epoch": 0.5794000882223203, + "grad_norm": 0.8602146506309509, + "learning_rate": 4.88842158162843e-06, + "loss": 0.5719, + "step": 7881 + }, + { + "epoch": 0.5794736068225261, + "grad_norm": 0.8689234852790833, + "learning_rate": 4.888393115407537e-06, + "loss": 0.5924, + "step": 7882 + }, + { + "epoch": 0.5795471254227319, + "grad_norm": 0.851992130279541, + "learning_rate": 4.888364645638817e-06, + "loss": 0.5342, + "step": 7883 + }, + { + "epoch": 0.5796206440229378, + "grad_norm": 0.8485741019248962, + "learning_rate": 4.888336172322312e-06, + "loss": 0.5501, + "step": 7884 + }, + { + "epoch": 0.5796941626231437, + "grad_norm": 0.8570536971092224, + "learning_rate": 4.888307695458064e-06, + "loss": 0.5644, + "step": 7885 + }, + { + "epoch": 0.5797676812233495, + "grad_norm": 0.786894679069519, + "learning_rate": 4.888279215046116e-06, + "loss": 0.512, + "step": 7886 + }, + { + "epoch": 0.5798411998235553, + "grad_norm": 0.9227602481842041, + "learning_rate": 4.88825073108651e-06, + "loss": 0.5842, + "step": 7887 + }, + { + "epoch": 0.5799147184237612, + "grad_norm": 0.8100396394729614, + "learning_rate": 4.888222243579289e-06, + "loss": 0.5538, + "step": 7888 + }, + { + "epoch": 0.5799882370239671, + "grad_norm": 0.8680819272994995, + "learning_rate": 4.888193752524494e-06, + "loss": 0.5327, + "step": 7889 + }, + { + "epoch": 0.580061755624173, + "grad_norm": 0.8673941493034363, + "learning_rate": 4.888165257922167e-06, + "loss": 0.5733, + "step": 7890 + }, + { + "epoch": 0.5801352742243787, + "grad_norm": 0.8544667363166809, + "learning_rate": 4.8881367597723525e-06, + "loss": 0.5446, + "step": 7891 + }, + { + "epoch": 0.5802087928245846, + "grad_norm": 0.8518598675727844, + "learning_rate": 4.888108258075092e-06, + "loss": 0.5392, + "step": 7892 + }, + { + "epoch": 0.5802823114247905, + "grad_norm": 0.8825533986091614, + "learning_rate": 4.888079752830427e-06, + "loss": 0.5476, + "step": 7893 + }, + { + "epoch": 0.5803558300249964, + "grad_norm": 0.8099077939987183, + "learning_rate": 4.8880512440384e-06, + "loss": 0.5114, + "step": 7894 + }, + { + "epoch": 0.5804293486252021, + "grad_norm": 0.8321529626846313, + "learning_rate": 4.888022731699054e-06, + "loss": 0.575, + "step": 7895 + }, + { + "epoch": 0.580502867225408, + "grad_norm": 0.8599364757537842, + "learning_rate": 4.88799421581243e-06, + "loss": 0.5376, + "step": 7896 + }, + { + "epoch": 0.5805763858256139, + "grad_norm": 0.8105636239051819, + "learning_rate": 4.887965696378573e-06, + "loss": 0.5856, + "step": 7897 + }, + { + "epoch": 0.5806499044258198, + "grad_norm": 0.899621844291687, + "learning_rate": 4.887937173397522e-06, + "loss": 0.5763, + "step": 7898 + }, + { + "epoch": 0.5807234230260255, + "grad_norm": 0.9010258913040161, + "learning_rate": 4.887908646869322e-06, + "loss": 0.5707, + "step": 7899 + }, + { + "epoch": 0.5807969416262314, + "grad_norm": 0.8129716515541077, + "learning_rate": 4.887880116794015e-06, + "loss": 0.5123, + "step": 7900 + }, + { + "epoch": 0.5808704602264373, + "grad_norm": 0.8790016770362854, + "learning_rate": 4.8878515831716415e-06, + "loss": 0.5874, + "step": 7901 + }, + { + "epoch": 0.5809439788266432, + "grad_norm": 0.7950918078422546, + "learning_rate": 4.8878230460022455e-06, + "loss": 0.5157, + "step": 7902 + }, + { + "epoch": 0.5810174974268489, + "grad_norm": 0.873823881149292, + "learning_rate": 4.88779450528587e-06, + "loss": 0.5594, + "step": 7903 + }, + { + "epoch": 0.5810910160270548, + "grad_norm": 0.8850948214530945, + "learning_rate": 4.887765961022556e-06, + "loss": 0.5472, + "step": 7904 + }, + { + "epoch": 0.5811645346272607, + "grad_norm": 0.8747403621673584, + "learning_rate": 4.887737413212347e-06, + "loss": 0.5446, + "step": 7905 + }, + { + "epoch": 0.5812380532274666, + "grad_norm": 0.9052395224571228, + "learning_rate": 4.887708861855284e-06, + "loss": 0.5542, + "step": 7906 + }, + { + "epoch": 0.5813115718276723, + "grad_norm": 0.8508394956588745, + "learning_rate": 4.8876803069514104e-06, + "loss": 0.5559, + "step": 7907 + }, + { + "epoch": 0.5813850904278782, + "grad_norm": 0.858822762966156, + "learning_rate": 4.887651748500769e-06, + "loss": 0.5327, + "step": 7908 + }, + { + "epoch": 0.5814586090280841, + "grad_norm": 0.8748155832290649, + "learning_rate": 4.8876231865034015e-06, + "loss": 0.5215, + "step": 7909 + }, + { + "epoch": 0.58153212762829, + "grad_norm": 0.8350159525871277, + "learning_rate": 4.88759462095935e-06, + "loss": 0.5343, + "step": 7910 + }, + { + "epoch": 0.5816056462284958, + "grad_norm": 0.8046433925628662, + "learning_rate": 4.887566051868657e-06, + "loss": 0.5547, + "step": 7911 + }, + { + "epoch": 0.5816791648287016, + "grad_norm": 0.8416337370872498, + "learning_rate": 4.8875374792313665e-06, + "loss": 0.534, + "step": 7912 + }, + { + "epoch": 0.5817526834289075, + "grad_norm": 0.7805633544921875, + "learning_rate": 4.887508903047519e-06, + "loss": 0.5084, + "step": 7913 + }, + { + "epoch": 0.5818262020291134, + "grad_norm": 0.8780969381332397, + "learning_rate": 4.887480323317159e-06, + "loss": 0.5218, + "step": 7914 + }, + { + "epoch": 0.5818997206293193, + "grad_norm": 0.7955661416053772, + "learning_rate": 4.8874517400403265e-06, + "loss": 0.5408, + "step": 7915 + }, + { + "epoch": 0.581973239229525, + "grad_norm": 0.9501274228096008, + "learning_rate": 4.887423153217065e-06, + "loss": 0.5668, + "step": 7916 + }, + { + "epoch": 0.5820467578297309, + "grad_norm": 0.7996043562889099, + "learning_rate": 4.887394562847419e-06, + "loss": 0.5258, + "step": 7917 + }, + { + "epoch": 0.5821202764299368, + "grad_norm": 0.8955947160720825, + "learning_rate": 4.887365968931427e-06, + "loss": 0.5384, + "step": 7918 + }, + { + "epoch": 0.5821937950301427, + "grad_norm": 0.8635971546173096, + "learning_rate": 4.887337371469135e-06, + "loss": 0.5615, + "step": 7919 + }, + { + "epoch": 0.5822673136303484, + "grad_norm": 0.8788906335830688, + "learning_rate": 4.8873087704605835e-06, + "loss": 0.5373, + "step": 7920 + }, + { + "epoch": 0.5823408322305543, + "grad_norm": 0.8512932658195496, + "learning_rate": 4.887280165905815e-06, + "loss": 0.5654, + "step": 7921 + }, + { + "epoch": 0.5824143508307602, + "grad_norm": 0.8642678260803223, + "learning_rate": 4.887251557804873e-06, + "loss": 0.5927, + "step": 7922 + }, + { + "epoch": 0.5824878694309661, + "grad_norm": 0.9098982214927673, + "learning_rate": 4.887222946157799e-06, + "loss": 0.615, + "step": 7923 + }, + { + "epoch": 0.5825613880311719, + "grad_norm": 0.7921795845031738, + "learning_rate": 4.8871943309646375e-06, + "loss": 0.5393, + "step": 7924 + }, + { + "epoch": 0.5826349066313777, + "grad_norm": 0.7994734048843384, + "learning_rate": 4.887165712225428e-06, + "loss": 0.4996, + "step": 7925 + }, + { + "epoch": 0.5827084252315836, + "grad_norm": 0.8369468450546265, + "learning_rate": 4.887137089940215e-06, + "loss": 0.5215, + "step": 7926 + }, + { + "epoch": 0.5827819438317895, + "grad_norm": 0.8364464640617371, + "learning_rate": 4.8871084641090415e-06, + "loss": 0.5681, + "step": 7927 + }, + { + "epoch": 0.5828554624319953, + "grad_norm": 0.830049991607666, + "learning_rate": 4.887079834731948e-06, + "loss": 0.5466, + "step": 7928 + }, + { + "epoch": 0.5829289810322011, + "grad_norm": 0.8375780582427979, + "learning_rate": 4.887051201808979e-06, + "loss": 0.5763, + "step": 7929 + }, + { + "epoch": 0.583002499632407, + "grad_norm": 0.8460109829902649, + "learning_rate": 4.887022565340175e-06, + "loss": 0.5678, + "step": 7930 + }, + { + "epoch": 0.5830760182326129, + "grad_norm": 0.8881986737251282, + "learning_rate": 4.886993925325581e-06, + "loss": 0.6134, + "step": 7931 + }, + { + "epoch": 0.5831495368328187, + "grad_norm": 0.8586279153823853, + "learning_rate": 4.886965281765237e-06, + "loss": 0.5581, + "step": 7932 + }, + { + "epoch": 0.5832230554330246, + "grad_norm": 0.8380564451217651, + "learning_rate": 4.886936634659188e-06, + "loss": 0.5807, + "step": 7933 + }, + { + "epoch": 0.5832965740332304, + "grad_norm": 0.8503848910331726, + "learning_rate": 4.886907984007474e-06, + "loss": 0.4952, + "step": 7934 + }, + { + "epoch": 0.5833700926334363, + "grad_norm": 0.8144093155860901, + "learning_rate": 4.886879329810139e-06, + "loss": 0.5495, + "step": 7935 + }, + { + "epoch": 0.5834436112336421, + "grad_norm": 0.8907766342163086, + "learning_rate": 4.886850672067227e-06, + "loss": 0.6005, + "step": 7936 + }, + { + "epoch": 0.583517129833848, + "grad_norm": 0.7966369390487671, + "learning_rate": 4.886822010778778e-06, + "loss": 0.5037, + "step": 7937 + }, + { + "epoch": 0.5835906484340538, + "grad_norm": 0.8290678262710571, + "learning_rate": 4.886793345944835e-06, + "loss": 0.5457, + "step": 7938 + }, + { + "epoch": 0.5836641670342597, + "grad_norm": 0.8451236486434937, + "learning_rate": 4.886764677565442e-06, + "loss": 0.5515, + "step": 7939 + }, + { + "epoch": 0.5837376856344655, + "grad_norm": 0.8624398112297058, + "learning_rate": 4.88673600564064e-06, + "loss": 0.5304, + "step": 7940 + }, + { + "epoch": 0.5838112042346714, + "grad_norm": 0.9001532196998596, + "learning_rate": 4.886707330170474e-06, + "loss": 0.5841, + "step": 7941 + }, + { + "epoch": 0.5838847228348772, + "grad_norm": 0.8235920071601868, + "learning_rate": 4.886678651154983e-06, + "loss": 0.5622, + "step": 7942 + }, + { + "epoch": 0.5839582414350831, + "grad_norm": 0.8364502191543579, + "learning_rate": 4.886649968594212e-06, + "loss": 0.5418, + "step": 7943 + }, + { + "epoch": 0.5840317600352889, + "grad_norm": 0.8329149484634399, + "learning_rate": 4.886621282488204e-06, + "loss": 0.5437, + "step": 7944 + }, + { + "epoch": 0.5841052786354948, + "grad_norm": 0.836245059967041, + "learning_rate": 4.886592592837001e-06, + "loss": 0.5391, + "step": 7945 + }, + { + "epoch": 0.5841787972357007, + "grad_norm": 0.8481488823890686, + "learning_rate": 4.8865638996406444e-06, + "loss": 0.5515, + "step": 7946 + }, + { + "epoch": 0.5842523158359065, + "grad_norm": 0.8070008158683777, + "learning_rate": 4.886535202899179e-06, + "loss": 0.5533, + "step": 7947 + }, + { + "epoch": 0.5843258344361123, + "grad_norm": 0.7823443412780762, + "learning_rate": 4.886506502612644e-06, + "loss": 0.4968, + "step": 7948 + }, + { + "epoch": 0.5843993530363182, + "grad_norm": 0.833154022693634, + "learning_rate": 4.886477798781086e-06, + "loss": 0.5554, + "step": 7949 + }, + { + "epoch": 0.5844728716365241, + "grad_norm": 0.8056250810623169, + "learning_rate": 4.886449091404546e-06, + "loss": 0.5281, + "step": 7950 + }, + { + "epoch": 0.5845463902367299, + "grad_norm": 0.8432762622833252, + "learning_rate": 4.8864203804830664e-06, + "loss": 0.4973, + "step": 7951 + }, + { + "epoch": 0.5846199088369357, + "grad_norm": 0.8329969048500061, + "learning_rate": 4.8863916660166895e-06, + "loss": 0.5442, + "step": 7952 + }, + { + "epoch": 0.5846934274371416, + "grad_norm": 0.7814478278160095, + "learning_rate": 4.8863629480054585e-06, + "loss": 0.5066, + "step": 7953 + }, + { + "epoch": 0.5847669460373475, + "grad_norm": 0.8306936621665955, + "learning_rate": 4.886334226449417e-06, + "loss": 0.5563, + "step": 7954 + }, + { + "epoch": 0.5848404646375533, + "grad_norm": 0.7907067537307739, + "learning_rate": 4.886305501348605e-06, + "loss": 0.519, + "step": 7955 + }, + { + "epoch": 0.5849139832377591, + "grad_norm": 0.7687763571739197, + "learning_rate": 4.886276772703068e-06, + "loss": 0.548, + "step": 7956 + }, + { + "epoch": 0.584987501837965, + "grad_norm": 0.8509412407875061, + "learning_rate": 4.886248040512848e-06, + "loss": 0.5643, + "step": 7957 + }, + { + "epoch": 0.5850610204381709, + "grad_norm": 0.8120266795158386, + "learning_rate": 4.886219304777986e-06, + "loss": 0.5484, + "step": 7958 + }, + { + "epoch": 0.5851345390383768, + "grad_norm": 0.8538793325424194, + "learning_rate": 4.886190565498527e-06, + "loss": 0.5591, + "step": 7959 + }, + { + "epoch": 0.5852080576385825, + "grad_norm": 0.8749871850013733, + "learning_rate": 4.8861618226745115e-06, + "loss": 0.5715, + "step": 7960 + }, + { + "epoch": 0.5852815762387884, + "grad_norm": 0.8834674954414368, + "learning_rate": 4.886133076305984e-06, + "loss": 0.5546, + "step": 7961 + }, + { + "epoch": 0.5853550948389943, + "grad_norm": 0.8472704291343689, + "learning_rate": 4.8861043263929855e-06, + "loss": 0.5538, + "step": 7962 + }, + { + "epoch": 0.5854286134392002, + "grad_norm": 0.8881286382675171, + "learning_rate": 4.88607557293556e-06, + "loss": 0.5697, + "step": 7963 + }, + { + "epoch": 0.5855021320394059, + "grad_norm": 0.8817333579063416, + "learning_rate": 4.88604681593375e-06, + "loss": 0.5682, + "step": 7964 + }, + { + "epoch": 0.5855756506396118, + "grad_norm": 0.8210345506668091, + "learning_rate": 4.886018055387599e-06, + "loss": 0.5788, + "step": 7965 + }, + { + "epoch": 0.5856491692398177, + "grad_norm": 0.8256205916404724, + "learning_rate": 4.885989291297148e-06, + "loss": 0.5363, + "step": 7966 + }, + { + "epoch": 0.5857226878400236, + "grad_norm": 0.8636623024940491, + "learning_rate": 4.885960523662441e-06, + "loss": 0.5193, + "step": 7967 + }, + { + "epoch": 0.5857962064402293, + "grad_norm": 0.897546648979187, + "learning_rate": 4.88593175248352e-06, + "loss": 0.5606, + "step": 7968 + }, + { + "epoch": 0.5858697250404352, + "grad_norm": 0.7759339809417725, + "learning_rate": 4.885902977760428e-06, + "loss": 0.5272, + "step": 7969 + }, + { + "epoch": 0.5859432436406411, + "grad_norm": 0.8008965253829956, + "learning_rate": 4.885874199493208e-06, + "loss": 0.5498, + "step": 7970 + }, + { + "epoch": 0.586016762240847, + "grad_norm": 0.8296404480934143, + "learning_rate": 4.885845417681903e-06, + "loss": 0.5317, + "step": 7971 + }, + { + "epoch": 0.5860902808410527, + "grad_norm": 0.8403415083885193, + "learning_rate": 4.885816632326554e-06, + "loss": 0.555, + "step": 7972 + }, + { + "epoch": 0.5861637994412586, + "grad_norm": 0.8049166798591614, + "learning_rate": 4.885787843427206e-06, + "loss": 0.5448, + "step": 7973 + }, + { + "epoch": 0.5862373180414645, + "grad_norm": 0.8544232249259949, + "learning_rate": 4.8857590509839e-06, + "loss": 0.5464, + "step": 7974 + }, + { + "epoch": 0.5863108366416704, + "grad_norm": 0.8367577791213989, + "learning_rate": 4.88573025499668e-06, + "loss": 0.5391, + "step": 7975 + }, + { + "epoch": 0.5863843552418762, + "grad_norm": 0.8365288972854614, + "learning_rate": 4.885701455465589e-06, + "loss": 0.5359, + "step": 7976 + }, + { + "epoch": 0.586457873842082, + "grad_norm": 0.8778634071350098, + "learning_rate": 4.8856726523906685e-06, + "loss": 0.5862, + "step": 7977 + }, + { + "epoch": 0.5865313924422879, + "grad_norm": 0.8363059163093567, + "learning_rate": 4.885643845771962e-06, + "loss": 0.5538, + "step": 7978 + }, + { + "epoch": 0.5866049110424938, + "grad_norm": 0.8436989784240723, + "learning_rate": 4.885615035609512e-06, + "loss": 0.5343, + "step": 7979 + }, + { + "epoch": 0.5866784296426996, + "grad_norm": 0.8185303807258606, + "learning_rate": 4.8855862219033625e-06, + "loss": 0.5059, + "step": 7980 + }, + { + "epoch": 0.5867519482429054, + "grad_norm": 0.8205084204673767, + "learning_rate": 4.885557404653554e-06, + "loss": 0.5382, + "step": 7981 + }, + { + "epoch": 0.5868254668431113, + "grad_norm": 0.9389173984527588, + "learning_rate": 4.885528583860131e-06, + "loss": 0.6446, + "step": 7982 + }, + { + "epoch": 0.5868989854433172, + "grad_norm": 0.8160080909729004, + "learning_rate": 4.885499759523136e-06, + "loss": 0.5315, + "step": 7983 + }, + { + "epoch": 0.586972504043523, + "grad_norm": 0.829311728477478, + "learning_rate": 4.8854709316426115e-06, + "loss": 0.5391, + "step": 7984 + }, + { + "epoch": 0.5870460226437288, + "grad_norm": 0.8463708162307739, + "learning_rate": 4.885442100218601e-06, + "loss": 0.5699, + "step": 7985 + }, + { + "epoch": 0.5871195412439347, + "grad_norm": 0.8433619737625122, + "learning_rate": 4.885413265251147e-06, + "loss": 0.5476, + "step": 7986 + }, + { + "epoch": 0.5871930598441406, + "grad_norm": 0.8639633655548096, + "learning_rate": 4.8853844267402914e-06, + "loss": 0.5726, + "step": 7987 + }, + { + "epoch": 0.5872665784443464, + "grad_norm": 0.8421056270599365, + "learning_rate": 4.8853555846860786e-06, + "loss": 0.5638, + "step": 7988 + }, + { + "epoch": 0.5873400970445523, + "grad_norm": 0.8221138715744019, + "learning_rate": 4.885326739088551e-06, + "loss": 0.5077, + "step": 7989 + }, + { + "epoch": 0.5874136156447581, + "grad_norm": 0.8662703633308411, + "learning_rate": 4.88529788994775e-06, + "loss": 0.5531, + "step": 7990 + }, + { + "epoch": 0.587487134244964, + "grad_norm": 0.8379670977592468, + "learning_rate": 4.88526903726372e-06, + "loss": 0.5272, + "step": 7991 + }, + { + "epoch": 0.5875606528451698, + "grad_norm": 0.8466066718101501, + "learning_rate": 4.885240181036504e-06, + "loss": 0.5757, + "step": 7992 + }, + { + "epoch": 0.5876341714453757, + "grad_norm": 0.841617226600647, + "learning_rate": 4.885211321266143e-06, + "loss": 0.5436, + "step": 7993 + }, + { + "epoch": 0.5877076900455815, + "grad_norm": 0.8436275124549866, + "learning_rate": 4.885182457952683e-06, + "loss": 0.5269, + "step": 7994 + }, + { + "epoch": 0.5877812086457874, + "grad_norm": 0.8619831800460815, + "learning_rate": 4.885153591096164e-06, + "loss": 0.5391, + "step": 7995 + }, + { + "epoch": 0.5878547272459932, + "grad_norm": 0.8847826719284058, + "learning_rate": 4.88512472069663e-06, + "loss": 0.5629, + "step": 7996 + }, + { + "epoch": 0.5879282458461991, + "grad_norm": 0.899716854095459, + "learning_rate": 4.885095846754124e-06, + "loss": 0.5577, + "step": 7997 + }, + { + "epoch": 0.588001764446405, + "grad_norm": 0.8389831185340881, + "learning_rate": 4.885066969268689e-06, + "loss": 0.5712, + "step": 7998 + }, + { + "epoch": 0.5880752830466108, + "grad_norm": 0.8381837606430054, + "learning_rate": 4.885038088240367e-06, + "loss": 0.5679, + "step": 7999 + }, + { + "epoch": 0.5881488016468166, + "grad_norm": 0.8633168339729309, + "learning_rate": 4.885009203669202e-06, + "loss": 0.5462, + "step": 8000 + }, + { + "epoch": 0.5882223202470225, + "grad_norm": 0.8475168943405151, + "learning_rate": 4.8849803155552365e-06, + "loss": 0.5495, + "step": 8001 + }, + { + "epoch": 0.5882958388472284, + "grad_norm": 0.8232252597808838, + "learning_rate": 4.8849514238985125e-06, + "loss": 0.5773, + "step": 8002 + }, + { + "epoch": 0.5883693574474342, + "grad_norm": 0.8333099484443665, + "learning_rate": 4.884922528699075e-06, + "loss": 0.5576, + "step": 8003 + }, + { + "epoch": 0.58844287604764, + "grad_norm": 0.8160027265548706, + "learning_rate": 4.884893629956965e-06, + "loss": 0.532, + "step": 8004 + }, + { + "epoch": 0.5885163946478459, + "grad_norm": 0.8595901727676392, + "learning_rate": 4.884864727672226e-06, + "loss": 0.5266, + "step": 8005 + }, + { + "epoch": 0.5885899132480518, + "grad_norm": 0.8328739404678345, + "learning_rate": 4.8848358218449014e-06, + "loss": 0.5727, + "step": 8006 + }, + { + "epoch": 0.5886634318482576, + "grad_norm": 0.9083075523376465, + "learning_rate": 4.884806912475034e-06, + "loss": 0.587, + "step": 8007 + }, + { + "epoch": 0.5887369504484634, + "grad_norm": 0.8318718671798706, + "learning_rate": 4.884777999562665e-06, + "loss": 0.5211, + "step": 8008 + }, + { + "epoch": 0.5888104690486693, + "grad_norm": 0.8567113876342773, + "learning_rate": 4.88474908310784e-06, + "loss": 0.5719, + "step": 8009 + }, + { + "epoch": 0.5888839876488752, + "grad_norm": 0.8453391194343567, + "learning_rate": 4.884720163110601e-06, + "loss": 0.5689, + "step": 8010 + }, + { + "epoch": 0.588957506249081, + "grad_norm": 0.7895583510398865, + "learning_rate": 4.884691239570991e-06, + "loss": 0.4897, + "step": 8011 + }, + { + "epoch": 0.5890310248492868, + "grad_norm": 0.8564037680625916, + "learning_rate": 4.884662312489053e-06, + "loss": 0.5509, + "step": 8012 + }, + { + "epoch": 0.5891045434494927, + "grad_norm": 0.8692104816436768, + "learning_rate": 4.884633381864829e-06, + "loss": 0.5921, + "step": 8013 + }, + { + "epoch": 0.5891780620496986, + "grad_norm": 0.8749783635139465, + "learning_rate": 4.884604447698362e-06, + "loss": 0.5321, + "step": 8014 + }, + { + "epoch": 0.5892515806499045, + "grad_norm": 0.8227005004882812, + "learning_rate": 4.884575509989696e-06, + "loss": 0.5366, + "step": 8015 + }, + { + "epoch": 0.5893250992501102, + "grad_norm": 0.8617095351219177, + "learning_rate": 4.884546568738875e-06, + "loss": 0.5745, + "step": 8016 + }, + { + "epoch": 0.5893986178503161, + "grad_norm": 0.8370508551597595, + "learning_rate": 4.884517623945939e-06, + "loss": 0.4958, + "step": 8017 + }, + { + "epoch": 0.589472136450522, + "grad_norm": 0.8000183701515198, + "learning_rate": 4.884488675610934e-06, + "loss": 0.5401, + "step": 8018 + }, + { + "epoch": 0.5895456550507279, + "grad_norm": 0.8068054914474487, + "learning_rate": 4.884459723733901e-06, + "loss": 0.4816, + "step": 8019 + }, + { + "epoch": 0.5896191736509336, + "grad_norm": 0.9055860638618469, + "learning_rate": 4.884430768314884e-06, + "loss": 0.5574, + "step": 8020 + }, + { + "epoch": 0.5896926922511395, + "grad_norm": 0.8565131425857544, + "learning_rate": 4.8844018093539245e-06, + "loss": 0.558, + "step": 8021 + }, + { + "epoch": 0.5897662108513454, + "grad_norm": 0.8134201765060425, + "learning_rate": 4.8843728468510675e-06, + "loss": 0.5793, + "step": 8022 + }, + { + "epoch": 0.5898397294515513, + "grad_norm": 0.8601264953613281, + "learning_rate": 4.884343880806355e-06, + "loss": 0.5404, + "step": 8023 + }, + { + "epoch": 0.589913248051757, + "grad_norm": 0.8213011026382446, + "learning_rate": 4.884314911219831e-06, + "loss": 0.569, + "step": 8024 + }, + { + "epoch": 0.5899867666519629, + "grad_norm": 0.8133310079574585, + "learning_rate": 4.884285938091537e-06, + "loss": 0.5113, + "step": 8025 + }, + { + "epoch": 0.5900602852521688, + "grad_norm": 0.7768744230270386, + "learning_rate": 4.884256961421517e-06, + "loss": 0.5188, + "step": 8026 + }, + { + "epoch": 0.5901338038523747, + "grad_norm": 0.8484397530555725, + "learning_rate": 4.884227981209814e-06, + "loss": 0.5665, + "step": 8027 + }, + { + "epoch": 0.5902073224525805, + "grad_norm": 0.8115279078483582, + "learning_rate": 4.8841989974564705e-06, + "loss": 0.5308, + "step": 8028 + }, + { + "epoch": 0.5902808410527863, + "grad_norm": 0.8850377202033997, + "learning_rate": 4.88417001016153e-06, + "loss": 0.544, + "step": 8029 + }, + { + "epoch": 0.5903543596529922, + "grad_norm": 0.8731047511100769, + "learning_rate": 4.884141019325036e-06, + "loss": 0.547, + "step": 8030 + }, + { + "epoch": 0.5904278782531981, + "grad_norm": 0.8851649761199951, + "learning_rate": 4.884112024947031e-06, + "loss": 0.5986, + "step": 8031 + }, + { + "epoch": 0.5905013968534039, + "grad_norm": 0.8057652711868286, + "learning_rate": 4.884083027027557e-06, + "loss": 0.5049, + "step": 8032 + }, + { + "epoch": 0.5905749154536097, + "grad_norm": 0.8428675532341003, + "learning_rate": 4.884054025566659e-06, + "loss": 0.5318, + "step": 8033 + }, + { + "epoch": 0.5906484340538156, + "grad_norm": 0.8454442620277405, + "learning_rate": 4.88402502056438e-06, + "loss": 0.5715, + "step": 8034 + }, + { + "epoch": 0.5907219526540215, + "grad_norm": 0.8317438960075378, + "learning_rate": 4.883996012020761e-06, + "loss": 0.5335, + "step": 8035 + }, + { + "epoch": 0.5907954712542273, + "grad_norm": 0.8104897141456604, + "learning_rate": 4.883966999935847e-06, + "loss": 0.5202, + "step": 8036 + }, + { + "epoch": 0.5908689898544331, + "grad_norm": 0.8776252269744873, + "learning_rate": 4.883937984309681e-06, + "loss": 0.5726, + "step": 8037 + }, + { + "epoch": 0.590942508454639, + "grad_norm": 0.8095578551292419, + "learning_rate": 4.883908965142305e-06, + "loss": 0.5151, + "step": 8038 + }, + { + "epoch": 0.5910160270548449, + "grad_norm": 0.8654858469963074, + "learning_rate": 4.883879942433764e-06, + "loss": 0.5831, + "step": 8039 + }, + { + "epoch": 0.5910895456550507, + "grad_norm": 0.7958062887191772, + "learning_rate": 4.883850916184099e-06, + "loss": 0.543, + "step": 8040 + }, + { + "epoch": 0.5911630642552566, + "grad_norm": 0.8065274357795715, + "learning_rate": 4.883821886393354e-06, + "loss": 0.503, + "step": 8041 + }, + { + "epoch": 0.5912365828554624, + "grad_norm": 0.8450739979743958, + "learning_rate": 4.8837928530615725e-06, + "loss": 0.5189, + "step": 8042 + }, + { + "epoch": 0.5913101014556683, + "grad_norm": 0.8085793852806091, + "learning_rate": 4.883763816188797e-06, + "loss": 0.5266, + "step": 8043 + }, + { + "epoch": 0.5913836200558741, + "grad_norm": 0.812489926815033, + "learning_rate": 4.883734775775071e-06, + "loss": 0.5666, + "step": 8044 + }, + { + "epoch": 0.59145713865608, + "grad_norm": 0.8208580613136292, + "learning_rate": 4.883705731820437e-06, + "loss": 0.5325, + "step": 8045 + }, + { + "epoch": 0.5915306572562858, + "grad_norm": 0.8539313077926636, + "learning_rate": 4.88367668432494e-06, + "loss": 0.5406, + "step": 8046 + }, + { + "epoch": 0.5916041758564917, + "grad_norm": 0.8127798438072205, + "learning_rate": 4.88364763328862e-06, + "loss": 0.5292, + "step": 8047 + }, + { + "epoch": 0.5916776944566975, + "grad_norm": 0.8450517058372498, + "learning_rate": 4.883618578711523e-06, + "loss": 0.5406, + "step": 8048 + }, + { + "epoch": 0.5917512130569034, + "grad_norm": 0.8620615601539612, + "learning_rate": 4.8835895205936916e-06, + "loss": 0.5353, + "step": 8049 + }, + { + "epoch": 0.5918247316571092, + "grad_norm": 0.8396292328834534, + "learning_rate": 4.883560458935167e-06, + "loss": 0.5568, + "step": 8050 + }, + { + "epoch": 0.5918982502573151, + "grad_norm": 0.8527743816375732, + "learning_rate": 4.883531393735994e-06, + "loss": 0.5465, + "step": 8051 + }, + { + "epoch": 0.591971768857521, + "grad_norm": 0.8187996745109558, + "learning_rate": 4.883502324996217e-06, + "loss": 0.5328, + "step": 8052 + }, + { + "epoch": 0.5920452874577268, + "grad_norm": 0.8757977485656738, + "learning_rate": 4.8834732527158765e-06, + "loss": 0.553, + "step": 8053 + }, + { + "epoch": 0.5921188060579327, + "grad_norm": 0.8384924530982971, + "learning_rate": 4.883444176895018e-06, + "loss": 0.5215, + "step": 8054 + }, + { + "epoch": 0.5921923246581385, + "grad_norm": 0.8025791049003601, + "learning_rate": 4.883415097533684e-06, + "loss": 0.5234, + "step": 8055 + }, + { + "epoch": 0.5922658432583444, + "grad_norm": 0.8473466634750366, + "learning_rate": 4.883386014631916e-06, + "loss": 0.5392, + "step": 8056 + }, + { + "epoch": 0.5923393618585502, + "grad_norm": 0.8759574294090271, + "learning_rate": 4.88335692818976e-06, + "loss": 0.5606, + "step": 8057 + }, + { + "epoch": 0.5924128804587561, + "grad_norm": 0.826541006565094, + "learning_rate": 4.883327838207257e-06, + "loss": 0.5437, + "step": 8058 + }, + { + "epoch": 0.5924863990589619, + "grad_norm": 0.8693739175796509, + "learning_rate": 4.88329874468445e-06, + "loss": 0.526, + "step": 8059 + }, + { + "epoch": 0.5925599176591678, + "grad_norm": 0.84260094165802, + "learning_rate": 4.883269647621385e-06, + "loss": 0.5278, + "step": 8060 + }, + { + "epoch": 0.5926334362593736, + "grad_norm": 0.8525712490081787, + "learning_rate": 4.883240547018103e-06, + "loss": 0.5693, + "step": 8061 + }, + { + "epoch": 0.5927069548595795, + "grad_norm": 0.8084606528282166, + "learning_rate": 4.883211442874647e-06, + "loss": 0.5297, + "step": 8062 + }, + { + "epoch": 0.5927804734597854, + "grad_norm": 0.7833832502365112, + "learning_rate": 4.883182335191061e-06, + "loss": 0.4849, + "step": 8063 + }, + { + "epoch": 0.5928539920599912, + "grad_norm": 0.8900926113128662, + "learning_rate": 4.883153223967389e-06, + "loss": 0.5373, + "step": 8064 + }, + { + "epoch": 0.592927510660197, + "grad_norm": 0.8649987578392029, + "learning_rate": 4.883124109203673e-06, + "loss": 0.5433, + "step": 8065 + }, + { + "epoch": 0.5930010292604029, + "grad_norm": 0.8254950046539307, + "learning_rate": 4.883094990899956e-06, + "loss": 0.5452, + "step": 8066 + }, + { + "epoch": 0.5930745478606088, + "grad_norm": 0.8563669919967651, + "learning_rate": 4.8830658690562824e-06, + "loss": 0.5655, + "step": 8067 + }, + { + "epoch": 0.5931480664608146, + "grad_norm": 0.8349326848983765, + "learning_rate": 4.8830367436726954e-06, + "loss": 0.4834, + "step": 8068 + }, + { + "epoch": 0.5932215850610204, + "grad_norm": 0.8151076436042786, + "learning_rate": 4.8830076147492375e-06, + "loss": 0.5147, + "step": 8069 + }, + { + "epoch": 0.5932951036612263, + "grad_norm": 0.8505677580833435, + "learning_rate": 4.882978482285952e-06, + "loss": 0.584, + "step": 8070 + }, + { + "epoch": 0.5933686222614322, + "grad_norm": 0.8275824785232544, + "learning_rate": 4.882949346282882e-06, + "loss": 0.4979, + "step": 8071 + }, + { + "epoch": 0.593442140861638, + "grad_norm": 0.8314801454544067, + "learning_rate": 4.882920206740072e-06, + "loss": 0.5536, + "step": 8072 + }, + { + "epoch": 0.5935156594618438, + "grad_norm": 0.8619317412376404, + "learning_rate": 4.882891063657564e-06, + "loss": 0.5518, + "step": 8073 + }, + { + "epoch": 0.5935891780620497, + "grad_norm": 0.8108440041542053, + "learning_rate": 4.882861917035403e-06, + "loss": 0.5449, + "step": 8074 + }, + { + "epoch": 0.5936626966622556, + "grad_norm": 0.8472460508346558, + "learning_rate": 4.8828327668736295e-06, + "loss": 0.512, + "step": 8075 + }, + { + "epoch": 0.5937362152624615, + "grad_norm": 0.8474302291870117, + "learning_rate": 4.8828036131722886e-06, + "loss": 0.5736, + "step": 8076 + }, + { + "epoch": 0.5938097338626672, + "grad_norm": 0.8100407123565674, + "learning_rate": 4.882774455931425e-06, + "loss": 0.5411, + "step": 8077 + }, + { + "epoch": 0.5938832524628731, + "grad_norm": 0.8952091336250305, + "learning_rate": 4.882745295151079e-06, + "loss": 0.5594, + "step": 8078 + }, + { + "epoch": 0.593956771063079, + "grad_norm": 0.7764789462089539, + "learning_rate": 4.882716130831296e-06, + "loss": 0.5475, + "step": 8079 + }, + { + "epoch": 0.5940302896632849, + "grad_norm": 0.835589587688446, + "learning_rate": 4.882686962972118e-06, + "loss": 0.5543, + "step": 8080 + }, + { + "epoch": 0.5941038082634906, + "grad_norm": 0.8933616876602173, + "learning_rate": 4.882657791573589e-06, + "loss": 0.5805, + "step": 8081 + }, + { + "epoch": 0.5941773268636965, + "grad_norm": 0.8167561888694763, + "learning_rate": 4.8826286166357525e-06, + "loss": 0.5033, + "step": 8082 + }, + { + "epoch": 0.5942508454639024, + "grad_norm": 0.8214067816734314, + "learning_rate": 4.882599438158652e-06, + "loss": 0.5671, + "step": 8083 + }, + { + "epoch": 0.5943243640641083, + "grad_norm": 0.7822413444519043, + "learning_rate": 4.8825702561423305e-06, + "loss": 0.5301, + "step": 8084 + }, + { + "epoch": 0.594397882664314, + "grad_norm": 0.8423383831977844, + "learning_rate": 4.882541070586832e-06, + "loss": 0.5592, + "step": 8085 + }, + { + "epoch": 0.5944714012645199, + "grad_norm": 0.8086497187614441, + "learning_rate": 4.882511881492198e-06, + "loss": 0.5394, + "step": 8086 + }, + { + "epoch": 0.5945449198647258, + "grad_norm": 0.8592672944068909, + "learning_rate": 4.882482688858473e-06, + "loss": 0.5297, + "step": 8087 + }, + { + "epoch": 0.5946184384649317, + "grad_norm": 0.8089136481285095, + "learning_rate": 4.882453492685702e-06, + "loss": 0.5094, + "step": 8088 + }, + { + "epoch": 0.5946919570651374, + "grad_norm": 0.8327713012695312, + "learning_rate": 4.882424292973925e-06, + "loss": 0.5182, + "step": 8089 + }, + { + "epoch": 0.5947654756653433, + "grad_norm": 0.8762706518173218, + "learning_rate": 4.882395089723188e-06, + "loss": 0.5607, + "step": 8090 + }, + { + "epoch": 0.5948389942655492, + "grad_norm": 0.8319636583328247, + "learning_rate": 4.882365882933533e-06, + "loss": 0.5517, + "step": 8091 + }, + { + "epoch": 0.5949125128657551, + "grad_norm": 0.8530774712562561, + "learning_rate": 4.882336672605004e-06, + "loss": 0.5729, + "step": 8092 + }, + { + "epoch": 0.5949860314659609, + "grad_norm": 0.8573547005653381, + "learning_rate": 4.882307458737645e-06, + "loss": 0.6122, + "step": 8093 + }, + { + "epoch": 0.5950595500661667, + "grad_norm": 0.8564916253089905, + "learning_rate": 4.882278241331498e-06, + "loss": 0.5813, + "step": 8094 + }, + { + "epoch": 0.5951330686663726, + "grad_norm": 0.8864686489105225, + "learning_rate": 4.882249020386608e-06, + "loss": 0.5063, + "step": 8095 + }, + { + "epoch": 0.5952065872665785, + "grad_norm": 0.8572306036949158, + "learning_rate": 4.882219795903017e-06, + "loss": 0.5668, + "step": 8096 + }, + { + "epoch": 0.5952801058667843, + "grad_norm": 0.8240888118743896, + "learning_rate": 4.882190567880769e-06, + "loss": 0.5644, + "step": 8097 + }, + { + "epoch": 0.5953536244669901, + "grad_norm": 0.8343793153762817, + "learning_rate": 4.882161336319907e-06, + "loss": 0.5373, + "step": 8098 + }, + { + "epoch": 0.595427143067196, + "grad_norm": 0.8644894957542419, + "learning_rate": 4.882132101220475e-06, + "loss": 0.5725, + "step": 8099 + }, + { + "epoch": 0.5955006616674019, + "grad_norm": 0.7758488059043884, + "learning_rate": 4.882102862582516e-06, + "loss": 0.5371, + "step": 8100 + }, + { + "epoch": 0.5955741802676077, + "grad_norm": 0.8141983151435852, + "learning_rate": 4.882073620406075e-06, + "loss": 0.5655, + "step": 8101 + }, + { + "epoch": 0.5956476988678135, + "grad_norm": 0.8320913314819336, + "learning_rate": 4.882044374691193e-06, + "loss": 0.5202, + "step": 8102 + }, + { + "epoch": 0.5957212174680194, + "grad_norm": 0.7877049446105957, + "learning_rate": 4.882015125437914e-06, + "loss": 0.5129, + "step": 8103 + }, + { + "epoch": 0.5957947360682253, + "grad_norm": 0.7863125801086426, + "learning_rate": 4.881985872646282e-06, + "loss": 0.508, + "step": 8104 + }, + { + "epoch": 0.5958682546684311, + "grad_norm": 0.8598261475563049, + "learning_rate": 4.881956616316342e-06, + "loss": 0.4546, + "step": 8105 + }, + { + "epoch": 0.595941773268637, + "grad_norm": 0.80042564868927, + "learning_rate": 4.881927356448134e-06, + "loss": 0.5266, + "step": 8106 + }, + { + "epoch": 0.5960152918688428, + "grad_norm": 0.8310795426368713, + "learning_rate": 4.881898093041704e-06, + "loss": 0.5523, + "step": 8107 + }, + { + "epoch": 0.5960888104690487, + "grad_norm": 0.8493480682373047, + "learning_rate": 4.881868826097095e-06, + "loss": 0.5486, + "step": 8108 + }, + { + "epoch": 0.5961623290692545, + "grad_norm": 0.8528892397880554, + "learning_rate": 4.8818395556143505e-06, + "loss": 0.5422, + "step": 8109 + }, + { + "epoch": 0.5962358476694604, + "grad_norm": 0.8011596202850342, + "learning_rate": 4.881810281593513e-06, + "loss": 0.514, + "step": 8110 + }, + { + "epoch": 0.5963093662696662, + "grad_norm": 0.9361591339111328, + "learning_rate": 4.881781004034628e-06, + "loss": 0.5661, + "step": 8111 + }, + { + "epoch": 0.5963828848698721, + "grad_norm": 0.8407625555992126, + "learning_rate": 4.881751722937736e-06, + "loss": 0.5456, + "step": 8112 + }, + { + "epoch": 0.5964564034700779, + "grad_norm": 0.7794384956359863, + "learning_rate": 4.881722438302884e-06, + "loss": 0.486, + "step": 8113 + }, + { + "epoch": 0.5965299220702838, + "grad_norm": 0.8795315027236938, + "learning_rate": 4.881693150130113e-06, + "loss": 0.5653, + "step": 8114 + }, + { + "epoch": 0.5966034406704896, + "grad_norm": 0.8569689393043518, + "learning_rate": 4.881663858419468e-06, + "loss": 0.6031, + "step": 8115 + }, + { + "epoch": 0.5966769592706955, + "grad_norm": 0.8819539546966553, + "learning_rate": 4.881634563170991e-06, + "loss": 0.5271, + "step": 8116 + }, + { + "epoch": 0.5967504778709013, + "grad_norm": 0.8689881563186646, + "learning_rate": 4.881605264384725e-06, + "loss": 0.5694, + "step": 8117 + }, + { + "epoch": 0.5968239964711072, + "grad_norm": 0.8789385557174683, + "learning_rate": 4.881575962060716e-06, + "loss": 0.5934, + "step": 8118 + }, + { + "epoch": 0.5968975150713131, + "grad_norm": 0.7848215699195862, + "learning_rate": 4.881546656199007e-06, + "loss": 0.5503, + "step": 8119 + }, + { + "epoch": 0.5969710336715189, + "grad_norm": 0.8143073916435242, + "learning_rate": 4.88151734679964e-06, + "loss": 0.535, + "step": 8120 + }, + { + "epoch": 0.5970445522717247, + "grad_norm": 0.8912351727485657, + "learning_rate": 4.88148803386266e-06, + "loss": 0.56, + "step": 8121 + }, + { + "epoch": 0.5971180708719306, + "grad_norm": 0.8510034680366516, + "learning_rate": 4.881458717388109e-06, + "loss": 0.5589, + "step": 8122 + }, + { + "epoch": 0.5971915894721365, + "grad_norm": 0.8431478142738342, + "learning_rate": 4.881429397376032e-06, + "loss": 0.5435, + "step": 8123 + }, + { + "epoch": 0.5972651080723423, + "grad_norm": 0.8127920031547546, + "learning_rate": 4.881400073826472e-06, + "loss": 0.5528, + "step": 8124 + }, + { + "epoch": 0.5973386266725481, + "grad_norm": 0.7740603089332581, + "learning_rate": 4.881370746739473e-06, + "loss": 0.5362, + "step": 8125 + }, + { + "epoch": 0.597412145272754, + "grad_norm": 0.8268492817878723, + "learning_rate": 4.8813414161150775e-06, + "loss": 0.5763, + "step": 8126 + }, + { + "epoch": 0.5974856638729599, + "grad_norm": 0.8150094151496887, + "learning_rate": 4.88131208195333e-06, + "loss": 0.5689, + "step": 8127 + }, + { + "epoch": 0.5975591824731658, + "grad_norm": 0.8466824293136597, + "learning_rate": 4.8812827442542745e-06, + "loss": 0.5605, + "step": 8128 + }, + { + "epoch": 0.5976327010733715, + "grad_norm": 0.8348325490951538, + "learning_rate": 4.881253403017953e-06, + "loss": 0.5482, + "step": 8129 + }, + { + "epoch": 0.5977062196735774, + "grad_norm": 0.8844063878059387, + "learning_rate": 4.88122405824441e-06, + "loss": 0.5693, + "step": 8130 + }, + { + "epoch": 0.5977797382737833, + "grad_norm": 0.8602885007858276, + "learning_rate": 4.88119470993369e-06, + "loss": 0.5378, + "step": 8131 + }, + { + "epoch": 0.5978532568739892, + "grad_norm": 0.8447596430778503, + "learning_rate": 4.881165358085834e-06, + "loss": 0.5424, + "step": 8132 + }, + { + "epoch": 0.5979267754741949, + "grad_norm": 0.8616815209388733, + "learning_rate": 4.881136002700888e-06, + "loss": 0.5335, + "step": 8133 + }, + { + "epoch": 0.5980002940744008, + "grad_norm": 0.8579034209251404, + "learning_rate": 4.881106643778895e-06, + "loss": 0.5735, + "step": 8134 + }, + { + "epoch": 0.5980738126746067, + "grad_norm": 0.8898789882659912, + "learning_rate": 4.881077281319899e-06, + "loss": 0.558, + "step": 8135 + }, + { + "epoch": 0.5981473312748126, + "grad_norm": 0.8025586605072021, + "learning_rate": 4.8810479153239425e-06, + "loss": 0.5494, + "step": 8136 + }, + { + "epoch": 0.5982208498750183, + "grad_norm": 0.8344362378120422, + "learning_rate": 4.88101854579107e-06, + "loss": 0.5535, + "step": 8137 + }, + { + "epoch": 0.5982943684752242, + "grad_norm": 0.8189972043037415, + "learning_rate": 4.8809891727213236e-06, + "loss": 0.5658, + "step": 8138 + }, + { + "epoch": 0.5983678870754301, + "grad_norm": 0.8358983397483826, + "learning_rate": 4.880959796114749e-06, + "loss": 0.6123, + "step": 8139 + }, + { + "epoch": 0.598441405675636, + "grad_norm": 0.8339546918869019, + "learning_rate": 4.880930415971389e-06, + "loss": 0.5058, + "step": 8140 + }, + { + "epoch": 0.5985149242758417, + "grad_norm": 0.7962789535522461, + "learning_rate": 4.8809010322912875e-06, + "loss": 0.5167, + "step": 8141 + }, + { + "epoch": 0.5985884428760476, + "grad_norm": 0.7792835831642151, + "learning_rate": 4.880871645074488e-06, + "loss": 0.5182, + "step": 8142 + }, + { + "epoch": 0.5986619614762535, + "grad_norm": 0.8684979677200317, + "learning_rate": 4.880842254321034e-06, + "loss": 0.5793, + "step": 8143 + }, + { + "epoch": 0.5987354800764594, + "grad_norm": 0.8706232905387878, + "learning_rate": 4.880812860030969e-06, + "loss": 0.5785, + "step": 8144 + }, + { + "epoch": 0.5988089986766651, + "grad_norm": 0.8457674980163574, + "learning_rate": 4.880783462204337e-06, + "loss": 0.5415, + "step": 8145 + }, + { + "epoch": 0.598882517276871, + "grad_norm": 0.8822994828224182, + "learning_rate": 4.880754060841181e-06, + "loss": 0.5408, + "step": 8146 + }, + { + "epoch": 0.5989560358770769, + "grad_norm": 0.8252171874046326, + "learning_rate": 4.880724655941546e-06, + "loss": 0.5335, + "step": 8147 + }, + { + "epoch": 0.5990295544772828, + "grad_norm": 0.875001072883606, + "learning_rate": 4.8806952475054745e-06, + "loss": 0.5616, + "step": 8148 + }, + { + "epoch": 0.5991030730774886, + "grad_norm": 0.8865826725959778, + "learning_rate": 4.88066583553301e-06, + "loss": 0.5643, + "step": 8149 + }, + { + "epoch": 0.5991765916776944, + "grad_norm": 0.8335224390029907, + "learning_rate": 4.880636420024197e-06, + "loss": 0.5346, + "step": 8150 + }, + { + "epoch": 0.5992501102779003, + "grad_norm": 0.8490098714828491, + "learning_rate": 4.88060700097908e-06, + "loss": 0.5531, + "step": 8151 + }, + { + "epoch": 0.5993236288781062, + "grad_norm": 0.8407742381095886, + "learning_rate": 4.880577578397701e-06, + "loss": 0.5013, + "step": 8152 + }, + { + "epoch": 0.599397147478312, + "grad_norm": 0.8591493964195251, + "learning_rate": 4.880548152280105e-06, + "loss": 0.5377, + "step": 8153 + }, + { + "epoch": 0.5994706660785178, + "grad_norm": 0.8306421637535095, + "learning_rate": 4.880518722626334e-06, + "loss": 0.5541, + "step": 8154 + }, + { + "epoch": 0.5995441846787237, + "grad_norm": 0.8039858937263489, + "learning_rate": 4.880489289436433e-06, + "loss": 0.5294, + "step": 8155 + }, + { + "epoch": 0.5996177032789296, + "grad_norm": 0.812833845615387, + "learning_rate": 4.880459852710446e-06, + "loss": 0.4935, + "step": 8156 + }, + { + "epoch": 0.5996912218791354, + "grad_norm": 0.7758691906929016, + "learning_rate": 4.8804304124484155e-06, + "loss": 0.5524, + "step": 8157 + }, + { + "epoch": 0.5997647404793413, + "grad_norm": 0.8152886629104614, + "learning_rate": 4.880400968650386e-06, + "loss": 0.5604, + "step": 8158 + }, + { + "epoch": 0.5998382590795471, + "grad_norm": 0.8109773397445679, + "learning_rate": 4.880371521316402e-06, + "loss": 0.5177, + "step": 8159 + }, + { + "epoch": 0.599911777679753, + "grad_norm": 0.7850215435028076, + "learning_rate": 4.880342070446507e-06, + "loss": 0.5016, + "step": 8160 + }, + { + "epoch": 0.5999852962799588, + "grad_norm": 0.7963110208511353, + "learning_rate": 4.880312616040742e-06, + "loss": 0.5018, + "step": 8161 + }, + { + "epoch": 0.6000588148801647, + "grad_norm": 0.8263002038002014, + "learning_rate": 4.880283158099155e-06, + "loss": 0.5583, + "step": 8162 + }, + { + "epoch": 0.6001323334803705, + "grad_norm": 0.9123395085334778, + "learning_rate": 4.880253696621786e-06, + "loss": 0.5388, + "step": 8163 + }, + { + "epoch": 0.6002058520805764, + "grad_norm": 0.8444212675094604, + "learning_rate": 4.880224231608682e-06, + "loss": 0.5222, + "step": 8164 + }, + { + "epoch": 0.6002793706807822, + "grad_norm": 0.8346475958824158, + "learning_rate": 4.8801947630598844e-06, + "loss": 0.56, + "step": 8165 + }, + { + "epoch": 0.6003528892809881, + "grad_norm": 0.8308640122413635, + "learning_rate": 4.8801652909754384e-06, + "loss": 0.543, + "step": 8166 + }, + { + "epoch": 0.600426407881194, + "grad_norm": 0.8645997643470764, + "learning_rate": 4.880135815355387e-06, + "loss": 0.5469, + "step": 8167 + }, + { + "epoch": 0.6004999264813998, + "grad_norm": 0.8606894612312317, + "learning_rate": 4.880106336199774e-06, + "loss": 0.5147, + "step": 8168 + }, + { + "epoch": 0.6005734450816056, + "grad_norm": 0.844582736492157, + "learning_rate": 4.880076853508643e-06, + "loss": 0.5485, + "step": 8169 + }, + { + "epoch": 0.6006469636818115, + "grad_norm": 0.8736292719841003, + "learning_rate": 4.8800473672820385e-06, + "loss": 0.6113, + "step": 8170 + }, + { + "epoch": 0.6007204822820174, + "grad_norm": 0.8456955552101135, + "learning_rate": 4.880017877520004e-06, + "loss": 0.5471, + "step": 8171 + }, + { + "epoch": 0.6007940008822232, + "grad_norm": 0.8804194927215576, + "learning_rate": 4.879988384222583e-06, + "loss": 0.5939, + "step": 8172 + }, + { + "epoch": 0.600867519482429, + "grad_norm": 0.8231454491615295, + "learning_rate": 4.879958887389821e-06, + "loss": 0.5565, + "step": 8173 + }, + { + "epoch": 0.6009410380826349, + "grad_norm": 0.8207378387451172, + "learning_rate": 4.879929387021759e-06, + "loss": 0.4746, + "step": 8174 + }, + { + "epoch": 0.6010145566828408, + "grad_norm": 0.8113421201705933, + "learning_rate": 4.879899883118442e-06, + "loss": 0.5856, + "step": 8175 + }, + { + "epoch": 0.6010880752830466, + "grad_norm": 0.8165158033370972, + "learning_rate": 4.8798703756799145e-06, + "loss": 0.4992, + "step": 8176 + }, + { + "epoch": 0.6011615938832524, + "grad_norm": 0.8304612040519714, + "learning_rate": 4.87984086470622e-06, + "loss": 0.6002, + "step": 8177 + }, + { + "epoch": 0.6012351124834583, + "grad_norm": 0.8554861545562744, + "learning_rate": 4.879811350197402e-06, + "loss": 0.5544, + "step": 8178 + }, + { + "epoch": 0.6013086310836642, + "grad_norm": 0.8546152710914612, + "learning_rate": 4.879781832153505e-06, + "loss": 0.5192, + "step": 8179 + }, + { + "epoch": 0.60138214968387, + "grad_norm": 0.8162432312965393, + "learning_rate": 4.879752310574572e-06, + "loss": 0.557, + "step": 8180 + }, + { + "epoch": 0.6014556682840758, + "grad_norm": 0.8638714551925659, + "learning_rate": 4.879722785460648e-06, + "loss": 0.5509, + "step": 8181 + }, + { + "epoch": 0.6015291868842817, + "grad_norm": 0.8693324327468872, + "learning_rate": 4.879693256811775e-06, + "loss": 0.6004, + "step": 8182 + }, + { + "epoch": 0.6016027054844876, + "grad_norm": 0.8575549721717834, + "learning_rate": 4.879663724627998e-06, + "loss": 0.5823, + "step": 8183 + }, + { + "epoch": 0.6016762240846935, + "grad_norm": 0.8440104126930237, + "learning_rate": 4.879634188909362e-06, + "loss": 0.5366, + "step": 8184 + }, + { + "epoch": 0.6017497426848992, + "grad_norm": 0.8320025205612183, + "learning_rate": 4.8796046496559085e-06, + "loss": 0.5474, + "step": 8185 + }, + { + "epoch": 0.6018232612851051, + "grad_norm": 0.8634149432182312, + "learning_rate": 4.8795751068676836e-06, + "loss": 0.5841, + "step": 8186 + }, + { + "epoch": 0.601896779885311, + "grad_norm": 0.8712909817695618, + "learning_rate": 4.879545560544729e-06, + "loss": 0.5367, + "step": 8187 + }, + { + "epoch": 0.6019702984855169, + "grad_norm": 0.8290266990661621, + "learning_rate": 4.879516010687091e-06, + "loss": 0.5216, + "step": 8188 + }, + { + "epoch": 0.6020438170857227, + "grad_norm": 0.8546011447906494, + "learning_rate": 4.879486457294811e-06, + "loss": 0.5538, + "step": 8189 + }, + { + "epoch": 0.6021173356859285, + "grad_norm": 0.8723077774047852, + "learning_rate": 4.8794569003679346e-06, + "loss": 0.5752, + "step": 8190 + }, + { + "epoch": 0.6021908542861344, + "grad_norm": 0.8342640399932861, + "learning_rate": 4.879427339906506e-06, + "loss": 0.5731, + "step": 8191 + }, + { + "epoch": 0.6022643728863403, + "grad_norm": 0.8046355843544006, + "learning_rate": 4.879397775910568e-06, + "loss": 0.5087, + "step": 8192 + }, + { + "epoch": 0.6023378914865462, + "grad_norm": 0.8287876844406128, + "learning_rate": 4.879368208380165e-06, + "loss": 0.5225, + "step": 8193 + }, + { + "epoch": 0.6024114100867519, + "grad_norm": 0.878824770450592, + "learning_rate": 4.8793386373153405e-06, + "loss": 0.568, + "step": 8194 + }, + { + "epoch": 0.6024849286869578, + "grad_norm": 0.872440755367279, + "learning_rate": 4.8793090627161385e-06, + "loss": 0.5759, + "step": 8195 + }, + { + "epoch": 0.6025584472871637, + "grad_norm": 0.8688493371009827, + "learning_rate": 4.879279484582603e-06, + "loss": 0.5509, + "step": 8196 + }, + { + "epoch": 0.6026319658873696, + "grad_norm": 0.8661640286445618, + "learning_rate": 4.879249902914779e-06, + "loss": 0.5039, + "step": 8197 + }, + { + "epoch": 0.6027054844875753, + "grad_norm": 0.8882476687431335, + "learning_rate": 4.879220317712709e-06, + "loss": 0.5848, + "step": 8198 + }, + { + "epoch": 0.6027790030877812, + "grad_norm": 0.8692573308944702, + "learning_rate": 4.8791907289764385e-06, + "loss": 0.5492, + "step": 8199 + }, + { + "epoch": 0.6028525216879871, + "grad_norm": 0.8675456643104553, + "learning_rate": 4.87916113670601e-06, + "loss": 0.5305, + "step": 8200 + }, + { + "epoch": 0.602926040288193, + "grad_norm": 0.863921582698822, + "learning_rate": 4.879131540901468e-06, + "loss": 0.5692, + "step": 8201 + }, + { + "epoch": 0.6029995588883987, + "grad_norm": 0.8213576078414917, + "learning_rate": 4.879101941562855e-06, + "loss": 0.4982, + "step": 8202 + }, + { + "epoch": 0.6030730774886046, + "grad_norm": 0.8180848360061646, + "learning_rate": 4.879072338690217e-06, + "loss": 0.5309, + "step": 8203 + }, + { + "epoch": 0.6031465960888105, + "grad_norm": 0.8940820097923279, + "learning_rate": 4.8790427322835974e-06, + "loss": 0.5897, + "step": 8204 + }, + { + "epoch": 0.6032201146890164, + "grad_norm": 0.8509474992752075, + "learning_rate": 4.87901312234304e-06, + "loss": 0.5435, + "step": 8205 + }, + { + "epoch": 0.6032936332892221, + "grad_norm": 0.838840663433075, + "learning_rate": 4.87898350886859e-06, + "loss": 0.5653, + "step": 8206 + }, + { + "epoch": 0.603367151889428, + "grad_norm": 0.787865161895752, + "learning_rate": 4.87895389186029e-06, + "loss": 0.5168, + "step": 8207 + }, + { + "epoch": 0.6034406704896339, + "grad_norm": 0.8159606456756592, + "learning_rate": 4.8789242713181825e-06, + "loss": 0.5507, + "step": 8208 + }, + { + "epoch": 0.6035141890898398, + "grad_norm": 0.9192243218421936, + "learning_rate": 4.878894647242315e-06, + "loss": 0.5816, + "step": 8209 + }, + { + "epoch": 0.6035877076900455, + "grad_norm": 0.8123262524604797, + "learning_rate": 4.878865019632729e-06, + "loss": 0.5764, + "step": 8210 + }, + { + "epoch": 0.6036612262902514, + "grad_norm": 0.8708851933479309, + "learning_rate": 4.8788353884894705e-06, + "loss": 0.5366, + "step": 8211 + }, + { + "epoch": 0.6037347448904573, + "grad_norm": 0.8137749433517456, + "learning_rate": 4.8788057538125805e-06, + "loss": 0.5435, + "step": 8212 + }, + { + "epoch": 0.6038082634906632, + "grad_norm": 0.8177232146263123, + "learning_rate": 4.878776115602105e-06, + "loss": 0.5685, + "step": 8213 + }, + { + "epoch": 0.603881782090869, + "grad_norm": 0.8090692758560181, + "learning_rate": 4.878746473858089e-06, + "loss": 0.5704, + "step": 8214 + }, + { + "epoch": 0.6039553006910748, + "grad_norm": 0.8639929890632629, + "learning_rate": 4.878716828580575e-06, + "loss": 0.5807, + "step": 8215 + }, + { + "epoch": 0.6040288192912807, + "grad_norm": 0.8635073900222778, + "learning_rate": 4.878687179769607e-06, + "loss": 0.5805, + "step": 8216 + }, + { + "epoch": 0.6041023378914866, + "grad_norm": 0.7822147607803345, + "learning_rate": 4.87865752742523e-06, + "loss": 0.5131, + "step": 8217 + }, + { + "epoch": 0.6041758564916924, + "grad_norm": 0.834973156452179, + "learning_rate": 4.878627871547487e-06, + "loss": 0.5385, + "step": 8218 + }, + { + "epoch": 0.6042493750918982, + "grad_norm": 0.8563823699951172, + "learning_rate": 4.878598212136423e-06, + "loss": 0.5157, + "step": 8219 + }, + { + "epoch": 0.6043228936921041, + "grad_norm": 0.8266501426696777, + "learning_rate": 4.878568549192081e-06, + "loss": 0.5724, + "step": 8220 + }, + { + "epoch": 0.60439641229231, + "grad_norm": 0.8940772414207458, + "learning_rate": 4.878538882714506e-06, + "loss": 0.5961, + "step": 8221 + }, + { + "epoch": 0.6044699308925158, + "grad_norm": 0.8261977434158325, + "learning_rate": 4.878509212703741e-06, + "loss": 0.5351, + "step": 8222 + }, + { + "epoch": 0.6045434494927217, + "grad_norm": 0.8986411094665527, + "learning_rate": 4.878479539159832e-06, + "loss": 0.5832, + "step": 8223 + }, + { + "epoch": 0.6046169680929275, + "grad_norm": 0.8639916777610779, + "learning_rate": 4.878449862082822e-06, + "loss": 0.5576, + "step": 8224 + }, + { + "epoch": 0.6046904866931334, + "grad_norm": 0.8409141302108765, + "learning_rate": 4.878420181472754e-06, + "loss": 0.5347, + "step": 8225 + }, + { + "epoch": 0.6047640052933392, + "grad_norm": 0.8253182172775269, + "learning_rate": 4.878390497329674e-06, + "loss": 0.5556, + "step": 8226 + }, + { + "epoch": 0.6048375238935451, + "grad_norm": 0.84140545129776, + "learning_rate": 4.878360809653624e-06, + "loss": 0.5592, + "step": 8227 + }, + { + "epoch": 0.6049110424937509, + "grad_norm": 0.8264115452766418, + "learning_rate": 4.87833111844465e-06, + "loss": 0.5319, + "step": 8228 + }, + { + "epoch": 0.6049845610939568, + "grad_norm": 0.8268153667449951, + "learning_rate": 4.878301423702794e-06, + "loss": 0.5645, + "step": 8229 + }, + { + "epoch": 0.6050580796941626, + "grad_norm": 0.8509644865989685, + "learning_rate": 4.878271725428104e-06, + "loss": 0.5246, + "step": 8230 + }, + { + "epoch": 0.6051315982943685, + "grad_norm": 0.8269049525260925, + "learning_rate": 4.87824202362062e-06, + "loss": 0.4982, + "step": 8231 + }, + { + "epoch": 0.6052051168945743, + "grad_norm": 0.8281665444374084, + "learning_rate": 4.878212318280388e-06, + "loss": 0.5367, + "step": 8232 + }, + { + "epoch": 0.6052786354947802, + "grad_norm": 0.8345290422439575, + "learning_rate": 4.878182609407452e-06, + "loss": 0.6003, + "step": 8233 + }, + { + "epoch": 0.605352154094986, + "grad_norm": 0.8066402077674866, + "learning_rate": 4.878152897001856e-06, + "loss": 0.4974, + "step": 8234 + }, + { + "epoch": 0.6054256726951919, + "grad_norm": 0.82523512840271, + "learning_rate": 4.878123181063643e-06, + "loss": 0.5245, + "step": 8235 + }, + { + "epoch": 0.6054991912953978, + "grad_norm": 0.7927917242050171, + "learning_rate": 4.87809346159286e-06, + "loss": 0.5688, + "step": 8236 + }, + { + "epoch": 0.6055727098956036, + "grad_norm": 0.8087112307548523, + "learning_rate": 4.878063738589548e-06, + "loss": 0.5441, + "step": 8237 + }, + { + "epoch": 0.6056462284958094, + "grad_norm": 0.8668555021286011, + "learning_rate": 4.8780340120537526e-06, + "loss": 0.5873, + "step": 8238 + }, + { + "epoch": 0.6057197470960153, + "grad_norm": 0.8393281698226929, + "learning_rate": 4.878004281985519e-06, + "loss": 0.5281, + "step": 8239 + }, + { + "epoch": 0.6057932656962212, + "grad_norm": 0.7908028960227966, + "learning_rate": 4.877974548384889e-06, + "loss": 0.5064, + "step": 8240 + }, + { + "epoch": 0.605866784296427, + "grad_norm": 0.8367143273353577, + "learning_rate": 4.877944811251909e-06, + "loss": 0.5444, + "step": 8241 + }, + { + "epoch": 0.6059403028966328, + "grad_norm": 0.83806973695755, + "learning_rate": 4.877915070586622e-06, + "loss": 0.5185, + "step": 8242 + }, + { + "epoch": 0.6060138214968387, + "grad_norm": 0.8691368103027344, + "learning_rate": 4.877885326389072e-06, + "loss": 0.5675, + "step": 8243 + }, + { + "epoch": 0.6060873400970446, + "grad_norm": 0.8429267406463623, + "learning_rate": 4.877855578659303e-06, + "loss": 0.5578, + "step": 8244 + }, + { + "epoch": 0.6061608586972504, + "grad_norm": 0.8118954300880432, + "learning_rate": 4.877825827397361e-06, + "loss": 0.5234, + "step": 8245 + }, + { + "epoch": 0.6062343772974562, + "grad_norm": 0.8665241599082947, + "learning_rate": 4.877796072603288e-06, + "loss": 0.5565, + "step": 8246 + }, + { + "epoch": 0.6063078958976621, + "grad_norm": 0.9192641973495483, + "learning_rate": 4.877766314277129e-06, + "loss": 0.5777, + "step": 8247 + }, + { + "epoch": 0.606381414497868, + "grad_norm": 0.827052652835846, + "learning_rate": 4.877736552418929e-06, + "loss": 0.5508, + "step": 8248 + }, + { + "epoch": 0.6064549330980739, + "grad_norm": 0.8123875260353088, + "learning_rate": 4.877706787028732e-06, + "loss": 0.4824, + "step": 8249 + }, + { + "epoch": 0.6065284516982796, + "grad_norm": 0.8892994523048401, + "learning_rate": 4.8776770181065815e-06, + "loss": 0.5726, + "step": 8250 + }, + { + "epoch": 0.6066019702984855, + "grad_norm": 0.789219319820404, + "learning_rate": 4.877647245652522e-06, + "loss": 0.533, + "step": 8251 + }, + { + "epoch": 0.6066754888986914, + "grad_norm": 0.8172328472137451, + "learning_rate": 4.8776174696665966e-06, + "loss": 0.5313, + "step": 8252 + }, + { + "epoch": 0.6067490074988973, + "grad_norm": 0.8235572576522827, + "learning_rate": 4.877587690148851e-06, + "loss": 0.5586, + "step": 8253 + }, + { + "epoch": 0.606822526099103, + "grad_norm": 0.813670814037323, + "learning_rate": 4.87755790709933e-06, + "loss": 0.5345, + "step": 8254 + }, + { + "epoch": 0.6068960446993089, + "grad_norm": 0.8197185397148132, + "learning_rate": 4.8775281205180765e-06, + "loss": 0.5432, + "step": 8255 + }, + { + "epoch": 0.6069695632995148, + "grad_norm": 0.7470952272415161, + "learning_rate": 4.877498330405135e-06, + "loss": 0.5223, + "step": 8256 + }, + { + "epoch": 0.6070430818997207, + "grad_norm": 0.8592533469200134, + "learning_rate": 4.87746853676055e-06, + "loss": 0.5138, + "step": 8257 + }, + { + "epoch": 0.6071166004999264, + "grad_norm": 0.7943236231803894, + "learning_rate": 4.877438739584366e-06, + "loss": 0.5226, + "step": 8258 + }, + { + "epoch": 0.6071901191001323, + "grad_norm": 0.8857846260070801, + "learning_rate": 4.877408938876627e-06, + "loss": 0.5923, + "step": 8259 + }, + { + "epoch": 0.6072636377003382, + "grad_norm": 0.8110754489898682, + "learning_rate": 4.877379134637377e-06, + "loss": 0.5344, + "step": 8260 + }, + { + "epoch": 0.6073371563005441, + "grad_norm": 0.8405297994613647, + "learning_rate": 4.877349326866659e-06, + "loss": 0.5927, + "step": 8261 + }, + { + "epoch": 0.6074106749007498, + "grad_norm": 0.8460606336593628, + "learning_rate": 4.877319515564521e-06, + "loss": 0.5274, + "step": 8262 + }, + { + "epoch": 0.6074841935009557, + "grad_norm": 0.8311708569526672, + "learning_rate": 4.877289700731004e-06, + "loss": 0.5462, + "step": 8263 + }, + { + "epoch": 0.6075577121011616, + "grad_norm": 0.8875831961631775, + "learning_rate": 4.8772598823661535e-06, + "loss": 0.5916, + "step": 8264 + }, + { + "epoch": 0.6076312307013675, + "grad_norm": 0.7908024787902832, + "learning_rate": 4.877230060470013e-06, + "loss": 0.4922, + "step": 8265 + }, + { + "epoch": 0.6077047493015733, + "grad_norm": 0.9111143350601196, + "learning_rate": 4.877200235042628e-06, + "loss": 0.6269, + "step": 8266 + }, + { + "epoch": 0.6077782679017791, + "grad_norm": 0.8031562566757202, + "learning_rate": 4.877170406084043e-06, + "loss": 0.5451, + "step": 8267 + }, + { + "epoch": 0.607851786501985, + "grad_norm": 0.8744503259658813, + "learning_rate": 4.8771405735943e-06, + "loss": 0.5591, + "step": 8268 + }, + { + "epoch": 0.6079253051021909, + "grad_norm": 0.9073666930198669, + "learning_rate": 4.877110737573446e-06, + "loss": 0.5214, + "step": 8269 + }, + { + "epoch": 0.6079988237023967, + "grad_norm": 0.8139211535453796, + "learning_rate": 4.877080898021523e-06, + "loss": 0.5436, + "step": 8270 + }, + { + "epoch": 0.6080723423026025, + "grad_norm": 0.8631243109703064, + "learning_rate": 4.877051054938578e-06, + "loss": 0.5537, + "step": 8271 + }, + { + "epoch": 0.6081458609028084, + "grad_norm": 0.8297969102859497, + "learning_rate": 4.877021208324652e-06, + "loss": 0.5404, + "step": 8272 + }, + { + "epoch": 0.6082193795030143, + "grad_norm": 0.8084414601325989, + "learning_rate": 4.876991358179793e-06, + "loss": 0.526, + "step": 8273 + }, + { + "epoch": 0.6082928981032201, + "grad_norm": 0.7845690250396729, + "learning_rate": 4.876961504504043e-06, + "loss": 0.5612, + "step": 8274 + }, + { + "epoch": 0.608366416703426, + "grad_norm": 0.8493770956993103, + "learning_rate": 4.876931647297447e-06, + "loss": 0.5352, + "step": 8275 + }, + { + "epoch": 0.6084399353036318, + "grad_norm": 0.8247410655021667, + "learning_rate": 4.876901786560048e-06, + "loss": 0.5622, + "step": 8276 + }, + { + "epoch": 0.6085134539038377, + "grad_norm": 0.8472274541854858, + "learning_rate": 4.876871922291893e-06, + "loss": 0.5586, + "step": 8277 + }, + { + "epoch": 0.6085869725040435, + "grad_norm": 0.7925957441329956, + "learning_rate": 4.876842054493024e-06, + "loss": 0.5104, + "step": 8278 + }, + { + "epoch": 0.6086604911042494, + "grad_norm": 0.8330312371253967, + "learning_rate": 4.876812183163486e-06, + "loss": 0.5278, + "step": 8279 + }, + { + "epoch": 0.6087340097044552, + "grad_norm": 0.8572019934654236, + "learning_rate": 4.876782308303324e-06, + "loss": 0.5617, + "step": 8280 + }, + { + "epoch": 0.6088075283046611, + "grad_norm": 0.8491873741149902, + "learning_rate": 4.876752429912582e-06, + "loss": 0.5657, + "step": 8281 + }, + { + "epoch": 0.6088810469048669, + "grad_norm": 0.8234464526176453, + "learning_rate": 4.876722547991305e-06, + "loss": 0.5223, + "step": 8282 + }, + { + "epoch": 0.6089545655050728, + "grad_norm": 0.8389483690261841, + "learning_rate": 4.876692662539536e-06, + "loss": 0.5526, + "step": 8283 + }, + { + "epoch": 0.6090280841052786, + "grad_norm": 0.8023913502693176, + "learning_rate": 4.876662773557321e-06, + "loss": 0.5593, + "step": 8284 + }, + { + "epoch": 0.6091016027054845, + "grad_norm": 0.8456333875656128, + "learning_rate": 4.876632881044703e-06, + "loss": 0.5529, + "step": 8285 + }, + { + "epoch": 0.6091751213056903, + "grad_norm": 0.8625261783599854, + "learning_rate": 4.876602985001727e-06, + "loss": 0.5298, + "step": 8286 + }, + { + "epoch": 0.6092486399058962, + "grad_norm": 0.847687840461731, + "learning_rate": 4.8765730854284374e-06, + "loss": 0.5622, + "step": 8287 + }, + { + "epoch": 0.609322158506102, + "grad_norm": 0.7970453500747681, + "learning_rate": 4.8765431823248795e-06, + "loss": 0.5585, + "step": 8288 + }, + { + "epoch": 0.6093956771063079, + "grad_norm": 0.7922688126564026, + "learning_rate": 4.876513275691096e-06, + "loss": 0.4998, + "step": 8289 + }, + { + "epoch": 0.6094691957065137, + "grad_norm": 0.8581867218017578, + "learning_rate": 4.876483365527133e-06, + "loss": 0.5433, + "step": 8290 + }, + { + "epoch": 0.6095427143067196, + "grad_norm": 0.8074532747268677, + "learning_rate": 4.876453451833033e-06, + "loss": 0.533, + "step": 8291 + }, + { + "epoch": 0.6096162329069255, + "grad_norm": 0.8553849458694458, + "learning_rate": 4.876423534608842e-06, + "loss": 0.5275, + "step": 8292 + }, + { + "epoch": 0.6096897515071313, + "grad_norm": 0.8726967573165894, + "learning_rate": 4.876393613854604e-06, + "loss": 0.594, + "step": 8293 + }, + { + "epoch": 0.6097632701073371, + "grad_norm": 0.8201259970664978, + "learning_rate": 4.876363689570363e-06, + "loss": 0.5183, + "step": 8294 + }, + { + "epoch": 0.609836788707543, + "grad_norm": 0.8391554951667786, + "learning_rate": 4.876333761756164e-06, + "loss": 0.5642, + "step": 8295 + }, + { + "epoch": 0.6099103073077489, + "grad_norm": 0.8816940784454346, + "learning_rate": 4.876303830412051e-06, + "loss": 0.598, + "step": 8296 + }, + { + "epoch": 0.6099838259079547, + "grad_norm": 0.8380608558654785, + "learning_rate": 4.876273895538069e-06, + "loss": 0.5833, + "step": 8297 + }, + { + "epoch": 0.6100573445081605, + "grad_norm": 0.8245167136192322, + "learning_rate": 4.876243957134263e-06, + "loss": 0.5391, + "step": 8298 + }, + { + "epoch": 0.6101308631083664, + "grad_norm": 0.8618939518928528, + "learning_rate": 4.876214015200676e-06, + "loss": 0.5287, + "step": 8299 + }, + { + "epoch": 0.6102043817085723, + "grad_norm": 0.788407027721405, + "learning_rate": 4.876184069737354e-06, + "loss": 0.5301, + "step": 8300 + }, + { + "epoch": 0.6102779003087782, + "grad_norm": 0.8390190601348877, + "learning_rate": 4.87615412074434e-06, + "loss": 0.5108, + "step": 8301 + }, + { + "epoch": 0.6103514189089839, + "grad_norm": 0.8423217535018921, + "learning_rate": 4.87612416822168e-06, + "loss": 0.6019, + "step": 8302 + }, + { + "epoch": 0.6104249375091898, + "grad_norm": 0.8612040877342224, + "learning_rate": 4.876094212169416e-06, + "loss": 0.5798, + "step": 8303 + }, + { + "epoch": 0.6104984561093957, + "grad_norm": 0.8886818289756775, + "learning_rate": 4.876064252587595e-06, + "loss": 0.5946, + "step": 8304 + }, + { + "epoch": 0.6105719747096016, + "grad_norm": 0.8442580699920654, + "learning_rate": 4.876034289476261e-06, + "loss": 0.5311, + "step": 8305 + }, + { + "epoch": 0.6106454933098073, + "grad_norm": 0.8676368594169617, + "learning_rate": 4.876004322835458e-06, + "loss": 0.5676, + "step": 8306 + }, + { + "epoch": 0.6107190119100132, + "grad_norm": 0.8194884657859802, + "learning_rate": 4.875974352665231e-06, + "loss": 0.5547, + "step": 8307 + }, + { + "epoch": 0.6107925305102191, + "grad_norm": 0.8792180418968201, + "learning_rate": 4.8759443789656235e-06, + "loss": 0.5556, + "step": 8308 + }, + { + "epoch": 0.610866049110425, + "grad_norm": 0.8203222155570984, + "learning_rate": 4.875914401736681e-06, + "loss": 0.5218, + "step": 8309 + }, + { + "epoch": 0.6109395677106307, + "grad_norm": 0.8026325702667236, + "learning_rate": 4.875884420978448e-06, + "loss": 0.5169, + "step": 8310 + }, + { + "epoch": 0.6110130863108366, + "grad_norm": 0.8032217025756836, + "learning_rate": 4.8758544366909685e-06, + "loss": 0.4981, + "step": 8311 + }, + { + "epoch": 0.6110866049110425, + "grad_norm": 0.8046801090240479, + "learning_rate": 4.875824448874288e-06, + "loss": 0.5293, + "step": 8312 + }, + { + "epoch": 0.6111601235112484, + "grad_norm": 0.8353023529052734, + "learning_rate": 4.875794457528449e-06, + "loss": 0.5183, + "step": 8313 + }, + { + "epoch": 0.6112336421114541, + "grad_norm": 0.8243984580039978, + "learning_rate": 4.875764462653499e-06, + "loss": 0.5528, + "step": 8314 + }, + { + "epoch": 0.61130716071166, + "grad_norm": 0.7858119606971741, + "learning_rate": 4.875734464249479e-06, + "loss": 0.5162, + "step": 8315 + }, + { + "epoch": 0.6113806793118659, + "grad_norm": 0.8747279047966003, + "learning_rate": 4.875704462316437e-06, + "loss": 0.6017, + "step": 8316 + }, + { + "epoch": 0.6114541979120718, + "grad_norm": 0.8022816181182861, + "learning_rate": 4.875674456854417e-06, + "loss": 0.5574, + "step": 8317 + }, + { + "epoch": 0.6115277165122776, + "grad_norm": 0.8725928068161011, + "learning_rate": 4.8756444478634615e-06, + "loss": 0.5637, + "step": 8318 + }, + { + "epoch": 0.6116012351124834, + "grad_norm": 0.8442485928535461, + "learning_rate": 4.875614435343616e-06, + "loss": 0.5436, + "step": 8319 + }, + { + "epoch": 0.6116747537126893, + "grad_norm": 0.8617422580718994, + "learning_rate": 4.875584419294926e-06, + "loss": 0.5005, + "step": 8320 + }, + { + "epoch": 0.6117482723128952, + "grad_norm": 0.8629777431488037, + "learning_rate": 4.875554399717435e-06, + "loss": 0.5267, + "step": 8321 + }, + { + "epoch": 0.611821790913101, + "grad_norm": 0.7869702577590942, + "learning_rate": 4.875524376611188e-06, + "loss": 0.5297, + "step": 8322 + }, + { + "epoch": 0.6118953095133068, + "grad_norm": 0.9057312607765198, + "learning_rate": 4.87549434997623e-06, + "loss": 0.5715, + "step": 8323 + }, + { + "epoch": 0.6119688281135127, + "grad_norm": 0.8556485772132874, + "learning_rate": 4.8754643198126045e-06, + "loss": 0.5213, + "step": 8324 + }, + { + "epoch": 0.6120423467137186, + "grad_norm": 0.8297311067581177, + "learning_rate": 4.8754342861203575e-06, + "loss": 0.5761, + "step": 8325 + }, + { + "epoch": 0.6121158653139244, + "grad_norm": 0.8594458699226379, + "learning_rate": 4.875404248899532e-06, + "loss": 0.551, + "step": 8326 + }, + { + "epoch": 0.6121893839141302, + "grad_norm": 0.8549926280975342, + "learning_rate": 4.875374208150175e-06, + "loss": 0.5321, + "step": 8327 + }, + { + "epoch": 0.6122629025143361, + "grad_norm": 0.8794614672660828, + "learning_rate": 4.875344163872329e-06, + "loss": 0.5414, + "step": 8328 + }, + { + "epoch": 0.612336421114542, + "grad_norm": 0.7757973670959473, + "learning_rate": 4.875314116066039e-06, + "loss": 0.5187, + "step": 8329 + }, + { + "epoch": 0.6124099397147479, + "grad_norm": 0.8396130800247192, + "learning_rate": 4.875284064731351e-06, + "loss": 0.5224, + "step": 8330 + }, + { + "epoch": 0.6124834583149537, + "grad_norm": 0.8241153955459595, + "learning_rate": 4.875254009868308e-06, + "loss": 0.5589, + "step": 8331 + }, + { + "epoch": 0.6125569769151595, + "grad_norm": 0.8228017687797546, + "learning_rate": 4.875223951476955e-06, + "loss": 0.554, + "step": 8332 + }, + { + "epoch": 0.6126304955153654, + "grad_norm": 0.842910647392273, + "learning_rate": 4.875193889557336e-06, + "loss": 0.5265, + "step": 8333 + }, + { + "epoch": 0.6127040141155713, + "grad_norm": 0.8229979872703552, + "learning_rate": 4.875163824109498e-06, + "loss": 0.5278, + "step": 8334 + }, + { + "epoch": 0.6127775327157771, + "grad_norm": 0.8222583532333374, + "learning_rate": 4.875133755133483e-06, + "loss": 0.5727, + "step": 8335 + }, + { + "epoch": 0.6128510513159829, + "grad_norm": 0.7976926565170288, + "learning_rate": 4.8751036826293384e-06, + "loss": 0.527, + "step": 8336 + }, + { + "epoch": 0.6129245699161888, + "grad_norm": 0.8482201099395752, + "learning_rate": 4.875073606597106e-06, + "loss": 0.5554, + "step": 8337 + }, + { + "epoch": 0.6129980885163947, + "grad_norm": 0.898810625076294, + "learning_rate": 4.875043527036832e-06, + "loss": 0.5433, + "step": 8338 + }, + { + "epoch": 0.6130716071166005, + "grad_norm": 0.8307412266731262, + "learning_rate": 4.875013443948562e-06, + "loss": 0.5471, + "step": 8339 + }, + { + "epoch": 0.6131451257168063, + "grad_norm": 0.8476461172103882, + "learning_rate": 4.874983357332339e-06, + "loss": 0.5451, + "step": 8340 + }, + { + "epoch": 0.6132186443170122, + "grad_norm": 0.8601096272468567, + "learning_rate": 4.874953267188208e-06, + "loss": 0.6272, + "step": 8341 + }, + { + "epoch": 0.6132921629172181, + "grad_norm": 0.8080236911773682, + "learning_rate": 4.874923173516214e-06, + "loss": 0.537, + "step": 8342 + }, + { + "epoch": 0.6133656815174239, + "grad_norm": 0.8475775718688965, + "learning_rate": 4.874893076316401e-06, + "loss": 0.5523, + "step": 8343 + }, + { + "epoch": 0.6134392001176298, + "grad_norm": 0.8671261072158813, + "learning_rate": 4.8748629755888155e-06, + "loss": 0.5595, + "step": 8344 + }, + { + "epoch": 0.6135127187178356, + "grad_norm": 0.8181453347206116, + "learning_rate": 4.8748328713335e-06, + "loss": 0.5208, + "step": 8345 + }, + { + "epoch": 0.6135862373180415, + "grad_norm": 0.8342750072479248, + "learning_rate": 4.874802763550501e-06, + "loss": 0.5516, + "step": 8346 + }, + { + "epoch": 0.6136597559182473, + "grad_norm": 0.8748145699501038, + "learning_rate": 4.8747726522398634e-06, + "loss": 0.5609, + "step": 8347 + }, + { + "epoch": 0.6137332745184532, + "grad_norm": 0.8153367042541504, + "learning_rate": 4.87474253740163e-06, + "loss": 0.5605, + "step": 8348 + }, + { + "epoch": 0.613806793118659, + "grad_norm": 0.8553639054298401, + "learning_rate": 4.874712419035846e-06, + "loss": 0.4838, + "step": 8349 + }, + { + "epoch": 0.6138803117188649, + "grad_norm": 0.8220316171646118, + "learning_rate": 4.874682297142559e-06, + "loss": 0.5088, + "step": 8350 + }, + { + "epoch": 0.6139538303190707, + "grad_norm": 0.8567629456520081, + "learning_rate": 4.87465217172181e-06, + "loss": 0.5372, + "step": 8351 + }, + { + "epoch": 0.6140273489192766, + "grad_norm": 0.8156628608703613, + "learning_rate": 4.874622042773644e-06, + "loss": 0.5187, + "step": 8352 + }, + { + "epoch": 0.6141008675194825, + "grad_norm": 0.8880196809768677, + "learning_rate": 4.8745919102981086e-06, + "loss": 0.5866, + "step": 8353 + }, + { + "epoch": 0.6141743861196883, + "grad_norm": 0.8590477705001831, + "learning_rate": 4.874561774295246e-06, + "loss": 0.5591, + "step": 8354 + }, + { + "epoch": 0.6142479047198941, + "grad_norm": 0.8599662184715271, + "learning_rate": 4.874531634765103e-06, + "loss": 0.5867, + "step": 8355 + }, + { + "epoch": 0.6143214233201, + "grad_norm": 0.8032275438308716, + "learning_rate": 4.874501491707722e-06, + "loss": 0.5322, + "step": 8356 + }, + { + "epoch": 0.6143949419203059, + "grad_norm": 0.8941594362258911, + "learning_rate": 4.87447134512315e-06, + "loss": 0.6116, + "step": 8357 + }, + { + "epoch": 0.6144684605205117, + "grad_norm": 0.8559192419052124, + "learning_rate": 4.8744411950114315e-06, + "loss": 0.5327, + "step": 8358 + }, + { + "epoch": 0.6145419791207175, + "grad_norm": 0.8440840840339661, + "learning_rate": 4.87441104137261e-06, + "loss": 0.5066, + "step": 8359 + }, + { + "epoch": 0.6146154977209234, + "grad_norm": 0.7857178449630737, + "learning_rate": 4.874380884206731e-06, + "loss": 0.5183, + "step": 8360 + }, + { + "epoch": 0.6146890163211293, + "grad_norm": 0.7933362126350403, + "learning_rate": 4.8743507235138385e-06, + "loss": 0.4947, + "step": 8361 + }, + { + "epoch": 0.6147625349213351, + "grad_norm": 0.8143668174743652, + "learning_rate": 4.874320559293978e-06, + "loss": 0.5238, + "step": 8362 + }, + { + "epoch": 0.6148360535215409, + "grad_norm": 0.8724967241287231, + "learning_rate": 4.874290391547195e-06, + "loss": 0.5413, + "step": 8363 + }, + { + "epoch": 0.6149095721217468, + "grad_norm": 0.864202618598938, + "learning_rate": 4.874260220273533e-06, + "loss": 0.5132, + "step": 8364 + }, + { + "epoch": 0.6149830907219527, + "grad_norm": 0.8806681036949158, + "learning_rate": 4.8742300454730386e-06, + "loss": 0.5731, + "step": 8365 + }, + { + "epoch": 0.6150566093221586, + "grad_norm": 0.8795922994613647, + "learning_rate": 4.874199867145754e-06, + "loss": 0.5365, + "step": 8366 + }, + { + "epoch": 0.6151301279223643, + "grad_norm": 0.8108832240104675, + "learning_rate": 4.8741696852917265e-06, + "loss": 0.4877, + "step": 8367 + }, + { + "epoch": 0.6152036465225702, + "grad_norm": 0.9107581377029419, + "learning_rate": 4.874139499911e-06, + "loss": 0.5759, + "step": 8368 + }, + { + "epoch": 0.6152771651227761, + "grad_norm": 0.8746891617774963, + "learning_rate": 4.874109311003619e-06, + "loss": 0.5183, + "step": 8369 + }, + { + "epoch": 0.615350683722982, + "grad_norm": 0.8520327806472778, + "learning_rate": 4.874079118569628e-06, + "loss": 0.5425, + "step": 8370 + }, + { + "epoch": 0.6154242023231877, + "grad_norm": 0.7968313097953796, + "learning_rate": 4.874048922609074e-06, + "loss": 0.5038, + "step": 8371 + }, + { + "epoch": 0.6154977209233936, + "grad_norm": 0.9246711134910583, + "learning_rate": 4.874018723121999e-06, + "loss": 0.5507, + "step": 8372 + }, + { + "epoch": 0.6155712395235995, + "grad_norm": 0.8156843781471252, + "learning_rate": 4.873988520108449e-06, + "loss": 0.5306, + "step": 8373 + }, + { + "epoch": 0.6156447581238054, + "grad_norm": 0.8096582293510437, + "learning_rate": 4.87395831356847e-06, + "loss": 0.4637, + "step": 8374 + }, + { + "epoch": 0.6157182767240111, + "grad_norm": 0.8121567964553833, + "learning_rate": 4.873928103502106e-06, + "loss": 0.5097, + "step": 8375 + }, + { + "epoch": 0.615791795324217, + "grad_norm": 0.8263013958930969, + "learning_rate": 4.873897889909401e-06, + "loss": 0.5376, + "step": 8376 + }, + { + "epoch": 0.6158653139244229, + "grad_norm": 0.9046602845191956, + "learning_rate": 4.873867672790401e-06, + "loss": 0.5532, + "step": 8377 + }, + { + "epoch": 0.6159388325246288, + "grad_norm": 0.8733171224594116, + "learning_rate": 4.873837452145151e-06, + "loss": 0.5329, + "step": 8378 + }, + { + "epoch": 0.6160123511248345, + "grad_norm": 0.8195789456367493, + "learning_rate": 4.873807227973695e-06, + "loss": 0.5653, + "step": 8379 + }, + { + "epoch": 0.6160858697250404, + "grad_norm": 0.8421615362167358, + "learning_rate": 4.873777000276078e-06, + "loss": 0.5708, + "step": 8380 + }, + { + "epoch": 0.6161593883252463, + "grad_norm": 0.9079780578613281, + "learning_rate": 4.873746769052345e-06, + "loss": 0.5612, + "step": 8381 + }, + { + "epoch": 0.6162329069254522, + "grad_norm": 0.7982900738716125, + "learning_rate": 4.873716534302542e-06, + "loss": 0.5282, + "step": 8382 + }, + { + "epoch": 0.616306425525658, + "grad_norm": 0.8592390418052673, + "learning_rate": 4.873686296026713e-06, + "loss": 0.5271, + "step": 8383 + }, + { + "epoch": 0.6163799441258638, + "grad_norm": 0.8163626194000244, + "learning_rate": 4.873656054224903e-06, + "loss": 0.471, + "step": 8384 + }, + { + "epoch": 0.6164534627260697, + "grad_norm": 0.8470133543014526, + "learning_rate": 4.873625808897157e-06, + "loss": 0.5364, + "step": 8385 + }, + { + "epoch": 0.6165269813262756, + "grad_norm": 0.8465301394462585, + "learning_rate": 4.873595560043519e-06, + "loss": 0.5685, + "step": 8386 + }, + { + "epoch": 0.6166004999264814, + "grad_norm": 0.8667528629302979, + "learning_rate": 4.8735653076640356e-06, + "loss": 0.5604, + "step": 8387 + }, + { + "epoch": 0.6166740185266872, + "grad_norm": 0.8392096161842346, + "learning_rate": 4.87353505175875e-06, + "loss": 0.5299, + "step": 8388 + }, + { + "epoch": 0.6167475371268931, + "grad_norm": 0.8143882155418396, + "learning_rate": 4.873504792327709e-06, + "loss": 0.4858, + "step": 8389 + }, + { + "epoch": 0.616821055727099, + "grad_norm": 0.8338658213615417, + "learning_rate": 4.873474529370956e-06, + "loss": 0.5495, + "step": 8390 + }, + { + "epoch": 0.6168945743273048, + "grad_norm": 0.8523336052894592, + "learning_rate": 4.873444262888537e-06, + "loss": 0.5717, + "step": 8391 + }, + { + "epoch": 0.6169680929275106, + "grad_norm": 0.8256091475486755, + "learning_rate": 4.873413992880497e-06, + "loss": 0.5351, + "step": 8392 + }, + { + "epoch": 0.6170416115277165, + "grad_norm": 0.8852195739746094, + "learning_rate": 4.873383719346879e-06, + "loss": 0.5691, + "step": 8393 + }, + { + "epoch": 0.6171151301279224, + "grad_norm": 0.8974267244338989, + "learning_rate": 4.87335344228773e-06, + "loss": 0.5476, + "step": 8394 + }, + { + "epoch": 0.6171886487281282, + "grad_norm": 0.8095411062240601, + "learning_rate": 4.873323161703095e-06, + "loss": 0.519, + "step": 8395 + }, + { + "epoch": 0.617262167328334, + "grad_norm": 0.8637257218360901, + "learning_rate": 4.873292877593019e-06, + "loss": 0.5352, + "step": 8396 + }, + { + "epoch": 0.6173356859285399, + "grad_norm": 0.8659559488296509, + "learning_rate": 4.873262589957545e-06, + "loss": 0.5618, + "step": 8397 + }, + { + "epoch": 0.6174092045287458, + "grad_norm": 0.8721010088920593, + "learning_rate": 4.87323229879672e-06, + "loss": 0.5544, + "step": 8398 + }, + { + "epoch": 0.6174827231289516, + "grad_norm": 0.8374906182289124, + "learning_rate": 4.873202004110588e-06, + "loss": 0.5274, + "step": 8399 + }, + { + "epoch": 0.6175562417291575, + "grad_norm": 0.8135314583778381, + "learning_rate": 4.8731717058991945e-06, + "loss": 0.5343, + "step": 8400 + }, + { + "epoch": 0.6176297603293633, + "grad_norm": 0.808512806892395, + "learning_rate": 4.873141404162584e-06, + "loss": 0.502, + "step": 8401 + }, + { + "epoch": 0.6177032789295692, + "grad_norm": 0.8884798884391785, + "learning_rate": 4.873111098900804e-06, + "loss": 0.5538, + "step": 8402 + }, + { + "epoch": 0.617776797529775, + "grad_norm": 0.7998476624488831, + "learning_rate": 4.873080790113895e-06, + "loss": 0.5266, + "step": 8403 + }, + { + "epoch": 0.6178503161299809, + "grad_norm": 0.8314153552055359, + "learning_rate": 4.873050477801905e-06, + "loss": 0.5475, + "step": 8404 + }, + { + "epoch": 0.6179238347301867, + "grad_norm": 0.8366242051124573, + "learning_rate": 4.873020161964879e-06, + "loss": 0.545, + "step": 8405 + }, + { + "epoch": 0.6179973533303926, + "grad_norm": 0.8538297414779663, + "learning_rate": 4.872989842602861e-06, + "loss": 0.5333, + "step": 8406 + }, + { + "epoch": 0.6180708719305984, + "grad_norm": 0.877297580242157, + "learning_rate": 4.872959519715898e-06, + "loss": 0.5797, + "step": 8407 + }, + { + "epoch": 0.6181443905308043, + "grad_norm": 0.8317930698394775, + "learning_rate": 4.872929193304031e-06, + "loss": 0.5239, + "step": 8408 + }, + { + "epoch": 0.6182179091310102, + "grad_norm": 0.8342764377593994, + "learning_rate": 4.872898863367309e-06, + "loss": 0.5263, + "step": 8409 + }, + { + "epoch": 0.618291427731216, + "grad_norm": 0.7986536622047424, + "learning_rate": 4.872868529905776e-06, + "loss": 0.5139, + "step": 8410 + }, + { + "epoch": 0.6183649463314218, + "grad_norm": 0.9298335313796997, + "learning_rate": 4.872838192919476e-06, + "loss": 0.5907, + "step": 8411 + }, + { + "epoch": 0.6184384649316277, + "grad_norm": 0.9148679971694946, + "learning_rate": 4.872807852408456e-06, + "loss": 0.5904, + "step": 8412 + }, + { + "epoch": 0.6185119835318336, + "grad_norm": 0.8409736752510071, + "learning_rate": 4.872777508372758e-06, + "loss": 0.5213, + "step": 8413 + }, + { + "epoch": 0.6185855021320394, + "grad_norm": 0.8501464128494263, + "learning_rate": 4.872747160812431e-06, + "loss": 0.5496, + "step": 8414 + }, + { + "epoch": 0.6186590207322452, + "grad_norm": 0.8502265810966492, + "learning_rate": 4.872716809727516e-06, + "loss": 0.545, + "step": 8415 + }, + { + "epoch": 0.6187325393324511, + "grad_norm": 0.806853711605072, + "learning_rate": 4.872686455118062e-06, + "loss": 0.5727, + "step": 8416 + }, + { + "epoch": 0.618806057932657, + "grad_norm": 0.8602796196937561, + "learning_rate": 4.872656096984111e-06, + "loss": 0.5455, + "step": 8417 + }, + { + "epoch": 0.6188795765328629, + "grad_norm": 0.8359206914901733, + "learning_rate": 4.872625735325709e-06, + "loss": 0.5564, + "step": 8418 + }, + { + "epoch": 0.6189530951330686, + "grad_norm": 0.803878903388977, + "learning_rate": 4.872595370142902e-06, + "loss": 0.5234, + "step": 8419 + }, + { + "epoch": 0.6190266137332745, + "grad_norm": 0.844837486743927, + "learning_rate": 4.872565001435735e-06, + "loss": 0.5331, + "step": 8420 + }, + { + "epoch": 0.6191001323334804, + "grad_norm": 0.8151933550834656, + "learning_rate": 4.8725346292042515e-06, + "loss": 0.5603, + "step": 8421 + }, + { + "epoch": 0.6191736509336863, + "grad_norm": 0.879963755607605, + "learning_rate": 4.8725042534484985e-06, + "loss": 0.5572, + "step": 8422 + }, + { + "epoch": 0.619247169533892, + "grad_norm": 0.8753535151481628, + "learning_rate": 4.87247387416852e-06, + "loss": 0.5424, + "step": 8423 + }, + { + "epoch": 0.6193206881340979, + "grad_norm": 0.8723174333572388, + "learning_rate": 4.872443491364362e-06, + "loss": 0.6013, + "step": 8424 + }, + { + "epoch": 0.6193942067343038, + "grad_norm": 0.8104709982872009, + "learning_rate": 4.872413105036068e-06, + "loss": 0.5084, + "step": 8425 + }, + { + "epoch": 0.6194677253345097, + "grad_norm": 0.8653547167778015, + "learning_rate": 4.872382715183685e-06, + "loss": 0.5552, + "step": 8426 + }, + { + "epoch": 0.6195412439347154, + "grad_norm": 0.8154970407485962, + "learning_rate": 4.8723523218072574e-06, + "loss": 0.5153, + "step": 8427 + }, + { + "epoch": 0.6196147625349213, + "grad_norm": 0.8646687269210815, + "learning_rate": 4.872321924906831e-06, + "loss": 0.5065, + "step": 8428 + }, + { + "epoch": 0.6196882811351272, + "grad_norm": 0.7965273261070251, + "learning_rate": 4.872291524482449e-06, + "loss": 0.5253, + "step": 8429 + }, + { + "epoch": 0.6197617997353331, + "grad_norm": 0.8301038146018982, + "learning_rate": 4.872261120534159e-06, + "loss": 0.5351, + "step": 8430 + }, + { + "epoch": 0.6198353183355388, + "grad_norm": 0.815445601940155, + "learning_rate": 4.872230713062004e-06, + "loss": 0.5032, + "step": 8431 + }, + { + "epoch": 0.6199088369357447, + "grad_norm": 0.8395422697067261, + "learning_rate": 4.872200302066031e-06, + "loss": 0.5515, + "step": 8432 + }, + { + "epoch": 0.6199823555359506, + "grad_norm": 0.8717793822288513, + "learning_rate": 4.872169887546284e-06, + "loss": 0.5555, + "step": 8433 + }, + { + "epoch": 0.6200558741361565, + "grad_norm": 0.8257295489311218, + "learning_rate": 4.872139469502808e-06, + "loss": 0.5054, + "step": 8434 + }, + { + "epoch": 0.6201293927363623, + "grad_norm": 0.8802221417427063, + "learning_rate": 4.872109047935649e-06, + "loss": 0.5402, + "step": 8435 + }, + { + "epoch": 0.6202029113365681, + "grad_norm": 0.8358039855957031, + "learning_rate": 4.872078622844853e-06, + "loss": 0.5527, + "step": 8436 + }, + { + "epoch": 0.620276429936774, + "grad_norm": 0.8068553805351257, + "learning_rate": 4.872048194230462e-06, + "loss": 0.5116, + "step": 8437 + }, + { + "epoch": 0.6203499485369799, + "grad_norm": 0.8739265203475952, + "learning_rate": 4.872017762092525e-06, + "loss": 0.5511, + "step": 8438 + }, + { + "epoch": 0.6204234671371857, + "grad_norm": 0.7989152073860168, + "learning_rate": 4.871987326431085e-06, + "loss": 0.5386, + "step": 8439 + }, + { + "epoch": 0.6204969857373915, + "grad_norm": 0.8195461630821228, + "learning_rate": 4.871956887246188e-06, + "loss": 0.5102, + "step": 8440 + }, + { + "epoch": 0.6205705043375974, + "grad_norm": 0.8564819693565369, + "learning_rate": 4.8719264445378775e-06, + "loss": 0.5872, + "step": 8441 + }, + { + "epoch": 0.6206440229378033, + "grad_norm": 0.7989869117736816, + "learning_rate": 4.8718959983062015e-06, + "loss": 0.5426, + "step": 8442 + }, + { + "epoch": 0.6207175415380091, + "grad_norm": 0.9044651389122009, + "learning_rate": 4.871865548551204e-06, + "loss": 0.5509, + "step": 8443 + }, + { + "epoch": 0.620791060138215, + "grad_norm": 0.8495839238166809, + "learning_rate": 4.87183509527293e-06, + "loss": 0.5413, + "step": 8444 + }, + { + "epoch": 0.6208645787384208, + "grad_norm": 0.8685294985771179, + "learning_rate": 4.8718046384714244e-06, + "loss": 0.5608, + "step": 8445 + }, + { + "epoch": 0.6209380973386267, + "grad_norm": 0.856541097164154, + "learning_rate": 4.871774178146733e-06, + "loss": 0.5455, + "step": 8446 + }, + { + "epoch": 0.6210116159388325, + "grad_norm": 0.8053032755851746, + "learning_rate": 4.871743714298901e-06, + "loss": 0.5083, + "step": 8447 + }, + { + "epoch": 0.6210851345390384, + "grad_norm": 0.8087243437767029, + "learning_rate": 4.871713246927974e-06, + "loss": 0.4925, + "step": 8448 + }, + { + "epoch": 0.6211586531392442, + "grad_norm": 0.7934030890464783, + "learning_rate": 4.871682776033996e-06, + "loss": 0.5346, + "step": 8449 + }, + { + "epoch": 0.6212321717394501, + "grad_norm": 0.8113446235656738, + "learning_rate": 4.8716523016170135e-06, + "loss": 0.5527, + "step": 8450 + }, + { + "epoch": 0.6213056903396559, + "grad_norm": 0.8264328837394714, + "learning_rate": 4.871621823677072e-06, + "loss": 0.5702, + "step": 8451 + }, + { + "epoch": 0.6213792089398618, + "grad_norm": 0.8338186740875244, + "learning_rate": 4.871591342214215e-06, + "loss": 0.5352, + "step": 8452 + }, + { + "epoch": 0.6214527275400676, + "grad_norm": 0.8360042572021484, + "learning_rate": 4.8715608572284895e-06, + "loss": 0.5541, + "step": 8453 + }, + { + "epoch": 0.6215262461402735, + "grad_norm": 0.8686249256134033, + "learning_rate": 4.87153036871994e-06, + "loss": 0.5716, + "step": 8454 + }, + { + "epoch": 0.6215997647404793, + "grad_norm": 0.8171567916870117, + "learning_rate": 4.871499876688612e-06, + "loss": 0.5078, + "step": 8455 + }, + { + "epoch": 0.6216732833406852, + "grad_norm": 0.863274872303009, + "learning_rate": 4.871469381134552e-06, + "loss": 0.566, + "step": 8456 + }, + { + "epoch": 0.621746801940891, + "grad_norm": 0.8323422074317932, + "learning_rate": 4.871438882057803e-06, + "loss": 0.568, + "step": 8457 + }, + { + "epoch": 0.6218203205410969, + "grad_norm": 0.8170669078826904, + "learning_rate": 4.871408379458411e-06, + "loss": 0.4943, + "step": 8458 + }, + { + "epoch": 0.6218938391413027, + "grad_norm": 0.8518962264060974, + "learning_rate": 4.871377873336422e-06, + "loss": 0.5275, + "step": 8459 + }, + { + "epoch": 0.6219673577415086, + "grad_norm": 0.8259677290916443, + "learning_rate": 4.871347363691882e-06, + "loss": 0.5429, + "step": 8460 + }, + { + "epoch": 0.6220408763417145, + "grad_norm": 0.8445767164230347, + "learning_rate": 4.8713168505248345e-06, + "loss": 0.559, + "step": 8461 + }, + { + "epoch": 0.6221143949419203, + "grad_norm": 0.8452045917510986, + "learning_rate": 4.871286333835326e-06, + "loss": 0.5496, + "step": 8462 + }, + { + "epoch": 0.6221879135421261, + "grad_norm": 0.8244921565055847, + "learning_rate": 4.871255813623401e-06, + "loss": 0.5504, + "step": 8463 + }, + { + "epoch": 0.622261432142332, + "grad_norm": 0.8638094067573547, + "learning_rate": 4.871225289889105e-06, + "loss": 0.5649, + "step": 8464 + }, + { + "epoch": 0.6223349507425379, + "grad_norm": 0.9336277842521667, + "learning_rate": 4.871194762632484e-06, + "loss": 0.6103, + "step": 8465 + }, + { + "epoch": 0.6224084693427437, + "grad_norm": 0.8374253511428833, + "learning_rate": 4.8711642318535835e-06, + "loss": 0.548, + "step": 8466 + }, + { + "epoch": 0.6224819879429496, + "grad_norm": 0.8838309049606323, + "learning_rate": 4.871133697552448e-06, + "loss": 0.4968, + "step": 8467 + }, + { + "epoch": 0.6225555065431554, + "grad_norm": 0.8033251762390137, + "learning_rate": 4.871103159729123e-06, + "loss": 0.5118, + "step": 8468 + }, + { + "epoch": 0.6226290251433613, + "grad_norm": 0.8295712471008301, + "learning_rate": 4.8710726183836545e-06, + "loss": 0.5112, + "step": 8469 + }, + { + "epoch": 0.6227025437435671, + "grad_norm": 0.9055920243263245, + "learning_rate": 4.871042073516088e-06, + "loss": 0.573, + "step": 8470 + }, + { + "epoch": 0.622776062343773, + "grad_norm": 0.8226386308670044, + "learning_rate": 4.871011525126468e-06, + "loss": 0.5156, + "step": 8471 + }, + { + "epoch": 0.6228495809439788, + "grad_norm": 0.7951532602310181, + "learning_rate": 4.87098097321484e-06, + "loss": 0.5198, + "step": 8472 + }, + { + "epoch": 0.6229230995441847, + "grad_norm": 0.8304076790809631, + "learning_rate": 4.8709504177812495e-06, + "loss": 0.5447, + "step": 8473 + }, + { + "epoch": 0.6229966181443906, + "grad_norm": 0.8955786228179932, + "learning_rate": 4.870919858825742e-06, + "loss": 0.5891, + "step": 8474 + }, + { + "epoch": 0.6230701367445964, + "grad_norm": 0.8501465320587158, + "learning_rate": 4.870889296348363e-06, + "loss": 0.5793, + "step": 8475 + }, + { + "epoch": 0.6231436553448022, + "grad_norm": 0.8351402282714844, + "learning_rate": 4.870858730349157e-06, + "loss": 0.5061, + "step": 8476 + }, + { + "epoch": 0.6232171739450081, + "grad_norm": 0.8346958160400391, + "learning_rate": 4.870828160828172e-06, + "loss": 0.5232, + "step": 8477 + }, + { + "epoch": 0.623290692545214, + "grad_norm": 0.8384886980056763, + "learning_rate": 4.870797587785449e-06, + "loss": 0.5415, + "step": 8478 + }, + { + "epoch": 0.6233642111454198, + "grad_norm": 0.7780659794807434, + "learning_rate": 4.870767011221038e-06, + "loss": 0.5178, + "step": 8479 + }, + { + "epoch": 0.6234377297456256, + "grad_norm": 0.8229281902313232, + "learning_rate": 4.870736431134983e-06, + "loss": 0.5302, + "step": 8480 + }, + { + "epoch": 0.6235112483458315, + "grad_norm": 0.8723054528236389, + "learning_rate": 4.8707058475273275e-06, + "loss": 0.5748, + "step": 8481 + }, + { + "epoch": 0.6235847669460374, + "grad_norm": 0.7993925213813782, + "learning_rate": 4.870675260398118e-06, + "loss": 0.5193, + "step": 8482 + }, + { + "epoch": 0.6236582855462433, + "grad_norm": 0.8582591414451599, + "learning_rate": 4.8706446697474004e-06, + "loss": 0.5637, + "step": 8483 + }, + { + "epoch": 0.623731804146449, + "grad_norm": 0.8694007992744446, + "learning_rate": 4.870614075575221e-06, + "loss": 0.5654, + "step": 8484 + }, + { + "epoch": 0.6238053227466549, + "grad_norm": 0.8131874799728394, + "learning_rate": 4.870583477881623e-06, + "loss": 0.5585, + "step": 8485 + }, + { + "epoch": 0.6238788413468608, + "grad_norm": 0.8563418388366699, + "learning_rate": 4.870552876666654e-06, + "loss": 0.5147, + "step": 8486 + }, + { + "epoch": 0.6239523599470667, + "grad_norm": 0.8347798585891724, + "learning_rate": 4.870522271930358e-06, + "loss": 0.5108, + "step": 8487 + }, + { + "epoch": 0.6240258785472724, + "grad_norm": 0.9507817625999451, + "learning_rate": 4.870491663672781e-06, + "loss": 0.6042, + "step": 8488 + }, + { + "epoch": 0.6240993971474783, + "grad_norm": 0.9479788541793823, + "learning_rate": 4.870461051893968e-06, + "loss": 0.6035, + "step": 8489 + }, + { + "epoch": 0.6241729157476842, + "grad_norm": 0.8545436263084412, + "learning_rate": 4.870430436593966e-06, + "loss": 0.5448, + "step": 8490 + }, + { + "epoch": 0.6242464343478901, + "grad_norm": 0.8674505352973938, + "learning_rate": 4.870399817772818e-06, + "loss": 0.5667, + "step": 8491 + }, + { + "epoch": 0.6243199529480958, + "grad_norm": 0.9437742829322815, + "learning_rate": 4.870369195430572e-06, + "loss": 0.5344, + "step": 8492 + }, + { + "epoch": 0.6243934715483017, + "grad_norm": 0.8045768737792969, + "learning_rate": 4.870338569567273e-06, + "loss": 0.5069, + "step": 8493 + }, + { + "epoch": 0.6244669901485076, + "grad_norm": 0.8272260427474976, + "learning_rate": 4.8703079401829635e-06, + "loss": 0.5421, + "step": 8494 + }, + { + "epoch": 0.6245405087487135, + "grad_norm": 0.8223649859428406, + "learning_rate": 4.8702773072776935e-06, + "loss": 0.55, + "step": 8495 + }, + { + "epoch": 0.6246140273489192, + "grad_norm": 0.8676326274871826, + "learning_rate": 4.870246670851505e-06, + "loss": 0.5615, + "step": 8496 + }, + { + "epoch": 0.6246875459491251, + "grad_norm": 0.873992383480072, + "learning_rate": 4.870216030904445e-06, + "loss": 0.5605, + "step": 8497 + }, + { + "epoch": 0.624761064549331, + "grad_norm": 0.8292526006698608, + "learning_rate": 4.87018538743656e-06, + "loss": 0.5573, + "step": 8498 + }, + { + "epoch": 0.6248345831495369, + "grad_norm": 0.88022381067276, + "learning_rate": 4.870154740447893e-06, + "loss": 0.5805, + "step": 8499 + }, + { + "epoch": 0.6249081017497427, + "grad_norm": 0.9137457013130188, + "learning_rate": 4.870124089938492e-06, + "loss": 0.5764, + "step": 8500 + }, + { + "epoch": 0.6249816203499485, + "grad_norm": 0.849327564239502, + "learning_rate": 4.8700934359084015e-06, + "loss": 0.5362, + "step": 8501 + }, + { + "epoch": 0.6250551389501544, + "grad_norm": 0.8024266958236694, + "learning_rate": 4.870062778357666e-06, + "loss": 0.5057, + "step": 8502 + }, + { + "epoch": 0.6251286575503603, + "grad_norm": 0.8451548218727112, + "learning_rate": 4.870032117286333e-06, + "loss": 0.5643, + "step": 8503 + }, + { + "epoch": 0.6252021761505661, + "grad_norm": 0.793315589427948, + "learning_rate": 4.8700014526944465e-06, + "loss": 0.5192, + "step": 8504 + }, + { + "epoch": 0.6252756947507719, + "grad_norm": 0.8280699253082275, + "learning_rate": 4.869970784582053e-06, + "loss": 0.4824, + "step": 8505 + }, + { + "epoch": 0.6253492133509778, + "grad_norm": 0.838122546672821, + "learning_rate": 4.869940112949197e-06, + "loss": 0.5672, + "step": 8506 + }, + { + "epoch": 0.6254227319511837, + "grad_norm": 0.8388272523880005, + "learning_rate": 4.8699094377959255e-06, + "loss": 0.5109, + "step": 8507 + }, + { + "epoch": 0.6254962505513895, + "grad_norm": 0.9740699529647827, + "learning_rate": 4.869878759122283e-06, + "loss": 0.5913, + "step": 8508 + }, + { + "epoch": 0.6255697691515953, + "grad_norm": 0.8490765690803528, + "learning_rate": 4.869848076928315e-06, + "loss": 0.5473, + "step": 8509 + }, + { + "epoch": 0.6256432877518012, + "grad_norm": 0.8717851638793945, + "learning_rate": 4.869817391214068e-06, + "loss": 0.5509, + "step": 8510 + }, + { + "epoch": 0.6257168063520071, + "grad_norm": 0.8601171374320984, + "learning_rate": 4.869786701979587e-06, + "loss": 0.5425, + "step": 8511 + }, + { + "epoch": 0.6257903249522129, + "grad_norm": 0.8728743195533752, + "learning_rate": 4.869756009224917e-06, + "loss": 0.5579, + "step": 8512 + }, + { + "epoch": 0.6258638435524188, + "grad_norm": 0.8506078124046326, + "learning_rate": 4.869725312950105e-06, + "loss": 0.5449, + "step": 8513 + }, + { + "epoch": 0.6259373621526246, + "grad_norm": 0.9373046159744263, + "learning_rate": 4.869694613155196e-06, + "loss": 0.5696, + "step": 8514 + }, + { + "epoch": 0.6260108807528305, + "grad_norm": 0.865349292755127, + "learning_rate": 4.869663909840234e-06, + "loss": 0.5341, + "step": 8515 + }, + { + "epoch": 0.6260843993530363, + "grad_norm": 0.7607935667037964, + "learning_rate": 4.869633203005267e-06, + "loss": 0.4982, + "step": 8516 + }, + { + "epoch": 0.6261579179532422, + "grad_norm": 0.873094916343689, + "learning_rate": 4.86960249265034e-06, + "loss": 0.5722, + "step": 8517 + }, + { + "epoch": 0.626231436553448, + "grad_norm": 0.841196596622467, + "learning_rate": 4.8695717787754975e-06, + "loss": 0.5784, + "step": 8518 + }, + { + "epoch": 0.6263049551536539, + "grad_norm": 0.8271140456199646, + "learning_rate": 4.8695410613807865e-06, + "loss": 0.5625, + "step": 8519 + }, + { + "epoch": 0.6263784737538597, + "grad_norm": 0.8365759253501892, + "learning_rate": 4.869510340466251e-06, + "loss": 0.5362, + "step": 8520 + }, + { + "epoch": 0.6264519923540656, + "grad_norm": 0.8445186018943787, + "learning_rate": 4.869479616031939e-06, + "loss": 0.5376, + "step": 8521 + }, + { + "epoch": 0.6265255109542714, + "grad_norm": 0.8295090198516846, + "learning_rate": 4.869448888077894e-06, + "loss": 0.5128, + "step": 8522 + }, + { + "epoch": 0.6265990295544773, + "grad_norm": 0.8792850971221924, + "learning_rate": 4.869418156604162e-06, + "loss": 0.5817, + "step": 8523 + }, + { + "epoch": 0.6266725481546831, + "grad_norm": 0.8074317574501038, + "learning_rate": 4.86938742161079e-06, + "loss": 0.5305, + "step": 8524 + }, + { + "epoch": 0.626746066754889, + "grad_norm": 0.8490687012672424, + "learning_rate": 4.869356683097822e-06, + "loss": 0.5061, + "step": 8525 + }, + { + "epoch": 0.6268195853550949, + "grad_norm": 0.8525062203407288, + "learning_rate": 4.869325941065305e-06, + "loss": 0.5744, + "step": 8526 + }, + { + "epoch": 0.6268931039553007, + "grad_norm": 0.8152050971984863, + "learning_rate": 4.869295195513284e-06, + "loss": 0.5745, + "step": 8527 + }, + { + "epoch": 0.6269666225555065, + "grad_norm": 0.8548049926757812, + "learning_rate": 4.869264446441804e-06, + "loss": 0.5802, + "step": 8528 + }, + { + "epoch": 0.6270401411557124, + "grad_norm": 0.8573013544082642, + "learning_rate": 4.869233693850912e-06, + "loss": 0.5313, + "step": 8529 + }, + { + "epoch": 0.6271136597559183, + "grad_norm": 0.8392548561096191, + "learning_rate": 4.869202937740654e-06, + "loss": 0.5644, + "step": 8530 + }, + { + "epoch": 0.6271871783561241, + "grad_norm": 0.8855056762695312, + "learning_rate": 4.869172178111073e-06, + "loss": 0.471, + "step": 8531 + }, + { + "epoch": 0.6272606969563299, + "grad_norm": 0.7973986268043518, + "learning_rate": 4.869141414962218e-06, + "loss": 0.4767, + "step": 8532 + }, + { + "epoch": 0.6273342155565358, + "grad_norm": 0.8079444169998169, + "learning_rate": 4.8691106482941315e-06, + "loss": 0.5218, + "step": 8533 + }, + { + "epoch": 0.6274077341567417, + "grad_norm": 0.8433271050453186, + "learning_rate": 4.869079878106861e-06, + "loss": 0.5418, + "step": 8534 + }, + { + "epoch": 0.6274812527569475, + "grad_norm": 0.802934467792511, + "learning_rate": 4.869049104400454e-06, + "loss": 0.5526, + "step": 8535 + }, + { + "epoch": 0.6275547713571533, + "grad_norm": 0.8801777958869934, + "learning_rate": 4.8690183271749525e-06, + "loss": 0.533, + "step": 8536 + }, + { + "epoch": 0.6276282899573592, + "grad_norm": 0.8306606411933899, + "learning_rate": 4.868987546430405e-06, + "loss": 0.5557, + "step": 8537 + }, + { + "epoch": 0.6277018085575651, + "grad_norm": 0.8456841111183167, + "learning_rate": 4.868956762166855e-06, + "loss": 0.5758, + "step": 8538 + }, + { + "epoch": 0.627775327157771, + "grad_norm": 0.8961557745933533, + "learning_rate": 4.868925974384351e-06, + "loss": 0.5866, + "step": 8539 + }, + { + "epoch": 0.6278488457579767, + "grad_norm": 0.8611022233963013, + "learning_rate": 4.8688951830829365e-06, + "loss": 0.5196, + "step": 8540 + }, + { + "epoch": 0.6279223643581826, + "grad_norm": 0.8716205954551697, + "learning_rate": 4.868864388262657e-06, + "loss": 0.5309, + "step": 8541 + }, + { + "epoch": 0.6279958829583885, + "grad_norm": 0.8714290857315063, + "learning_rate": 4.86883358992356e-06, + "loss": 0.5665, + "step": 8542 + }, + { + "epoch": 0.6280694015585944, + "grad_norm": 0.8597114682197571, + "learning_rate": 4.868802788065691e-06, + "loss": 0.5279, + "step": 8543 + }, + { + "epoch": 0.6281429201588001, + "grad_norm": 0.8221266865730286, + "learning_rate": 4.8687719826890944e-06, + "loss": 0.4983, + "step": 8544 + }, + { + "epoch": 0.628216438759006, + "grad_norm": 0.8402964472770691, + "learning_rate": 4.868741173793817e-06, + "loss": 0.5668, + "step": 8545 + }, + { + "epoch": 0.6282899573592119, + "grad_norm": 0.853693962097168, + "learning_rate": 4.868710361379905e-06, + "loss": 0.5078, + "step": 8546 + }, + { + "epoch": 0.6283634759594178, + "grad_norm": 0.8776395916938782, + "learning_rate": 4.868679545447402e-06, + "loss": 0.5314, + "step": 8547 + }, + { + "epoch": 0.6284369945596235, + "grad_norm": 0.8053328394889832, + "learning_rate": 4.868648725996356e-06, + "loss": 0.5133, + "step": 8548 + }, + { + "epoch": 0.6285105131598294, + "grad_norm": 0.812279999256134, + "learning_rate": 4.8686179030268125e-06, + "loss": 0.5492, + "step": 8549 + }, + { + "epoch": 0.6285840317600353, + "grad_norm": 0.7989631295204163, + "learning_rate": 4.868587076538816e-06, + "loss": 0.5661, + "step": 8550 + }, + { + "epoch": 0.6286575503602412, + "grad_norm": 0.8459657430648804, + "learning_rate": 4.868556246532413e-06, + "loss": 0.5757, + "step": 8551 + }, + { + "epoch": 0.628731068960447, + "grad_norm": 0.8766946196556091, + "learning_rate": 4.86852541300765e-06, + "loss": 0.5639, + "step": 8552 + }, + { + "epoch": 0.6288045875606528, + "grad_norm": 0.8401038646697998, + "learning_rate": 4.868494575964572e-06, + "loss": 0.5255, + "step": 8553 + }, + { + "epoch": 0.6288781061608587, + "grad_norm": 0.8397991061210632, + "learning_rate": 4.868463735403225e-06, + "loss": 0.5515, + "step": 8554 + }, + { + "epoch": 0.6289516247610646, + "grad_norm": 0.8777183294296265, + "learning_rate": 4.8684328913236545e-06, + "loss": 0.518, + "step": 8555 + }, + { + "epoch": 0.6290251433612704, + "grad_norm": 0.8751824498176575, + "learning_rate": 4.868402043725907e-06, + "loss": 0.6006, + "step": 8556 + }, + { + "epoch": 0.6290986619614762, + "grad_norm": 0.8178186416625977, + "learning_rate": 4.868371192610028e-06, + "loss": 0.5134, + "step": 8557 + }, + { + "epoch": 0.6291721805616821, + "grad_norm": 0.830837607383728, + "learning_rate": 4.868340337976063e-06, + "loss": 0.5717, + "step": 8558 + }, + { + "epoch": 0.629245699161888, + "grad_norm": 0.8163244128227234, + "learning_rate": 4.868309479824058e-06, + "loss": 0.5569, + "step": 8559 + }, + { + "epoch": 0.6293192177620938, + "grad_norm": 0.8186240196228027, + "learning_rate": 4.86827861815406e-06, + "loss": 0.5025, + "step": 8560 + }, + { + "epoch": 0.6293927363622996, + "grad_norm": 0.8709843754768372, + "learning_rate": 4.868247752966113e-06, + "loss": 0.5879, + "step": 8561 + }, + { + "epoch": 0.6294662549625055, + "grad_norm": 0.8648738265037537, + "learning_rate": 4.868216884260264e-06, + "loss": 0.563, + "step": 8562 + }, + { + "epoch": 0.6295397735627114, + "grad_norm": 0.8246496319770813, + "learning_rate": 4.868186012036558e-06, + "loss": 0.5192, + "step": 8563 + }, + { + "epoch": 0.6296132921629172, + "grad_norm": 0.8711349368095398, + "learning_rate": 4.868155136295041e-06, + "loss": 0.528, + "step": 8564 + }, + { + "epoch": 0.629686810763123, + "grad_norm": 0.8683552742004395, + "learning_rate": 4.86812425703576e-06, + "loss": 0.5579, + "step": 8565 + }, + { + "epoch": 0.6297603293633289, + "grad_norm": 0.9214417338371277, + "learning_rate": 4.86809337425876e-06, + "loss": 0.5309, + "step": 8566 + }, + { + "epoch": 0.6298338479635348, + "grad_norm": 0.8521795868873596, + "learning_rate": 4.868062487964086e-06, + "loss": 0.5402, + "step": 8567 + }, + { + "epoch": 0.6299073665637406, + "grad_norm": 0.8346957564353943, + "learning_rate": 4.868031598151786e-06, + "loss": 0.5594, + "step": 8568 + }, + { + "epoch": 0.6299808851639465, + "grad_norm": 0.858773410320282, + "learning_rate": 4.868000704821905e-06, + "loss": 0.5761, + "step": 8569 + }, + { + "epoch": 0.6300544037641523, + "grad_norm": 0.8438048958778381, + "learning_rate": 4.867969807974488e-06, + "loss": 0.527, + "step": 8570 + }, + { + "epoch": 0.6301279223643582, + "grad_norm": 0.8429308533668518, + "learning_rate": 4.867938907609581e-06, + "loss": 0.5485, + "step": 8571 + }, + { + "epoch": 0.630201440964564, + "grad_norm": 0.816355288028717, + "learning_rate": 4.867908003727231e-06, + "loss": 0.5499, + "step": 8572 + }, + { + "epoch": 0.6302749595647699, + "grad_norm": 0.8989488482475281, + "learning_rate": 4.867877096327484e-06, + "loss": 0.583, + "step": 8573 + }, + { + "epoch": 0.6303484781649757, + "grad_norm": 0.8472263813018799, + "learning_rate": 4.867846185410384e-06, + "loss": 0.5225, + "step": 8574 + }, + { + "epoch": 0.6304219967651816, + "grad_norm": 0.8762685656547546, + "learning_rate": 4.867815270975978e-06, + "loss": 0.5315, + "step": 8575 + }, + { + "epoch": 0.6304955153653874, + "grad_norm": 0.8675025701522827, + "learning_rate": 4.867784353024313e-06, + "loss": 0.551, + "step": 8576 + }, + { + "epoch": 0.6305690339655933, + "grad_norm": 0.834098756313324, + "learning_rate": 4.867753431555434e-06, + "loss": 0.5398, + "step": 8577 + }, + { + "epoch": 0.6306425525657992, + "grad_norm": 0.8103854060173035, + "learning_rate": 4.867722506569387e-06, + "loss": 0.5528, + "step": 8578 + }, + { + "epoch": 0.630716071166005, + "grad_norm": 0.8584461212158203, + "learning_rate": 4.867691578066217e-06, + "loss": 0.5756, + "step": 8579 + }, + { + "epoch": 0.6307895897662108, + "grad_norm": 0.8528128266334534, + "learning_rate": 4.867660646045972e-06, + "loss": 0.572, + "step": 8580 + }, + { + "epoch": 0.6308631083664167, + "grad_norm": 0.8328444957733154, + "learning_rate": 4.867629710508696e-06, + "loss": 0.5507, + "step": 8581 + }, + { + "epoch": 0.6309366269666226, + "grad_norm": 0.8120518922805786, + "learning_rate": 4.8675987714544345e-06, + "loss": 0.5449, + "step": 8582 + }, + { + "epoch": 0.6310101455668284, + "grad_norm": 0.8702962398529053, + "learning_rate": 4.867567828883236e-06, + "loss": 0.5955, + "step": 8583 + }, + { + "epoch": 0.6310836641670342, + "grad_norm": 0.8130154609680176, + "learning_rate": 4.8675368827951456e-06, + "loss": 0.4879, + "step": 8584 + }, + { + "epoch": 0.6311571827672401, + "grad_norm": 0.9326270222663879, + "learning_rate": 4.867505933190209e-06, + "loss": 0.5717, + "step": 8585 + }, + { + "epoch": 0.631230701367446, + "grad_norm": 0.8429806232452393, + "learning_rate": 4.867474980068471e-06, + "loss": 0.5321, + "step": 8586 + }, + { + "epoch": 0.6313042199676518, + "grad_norm": 0.8099591732025146, + "learning_rate": 4.867444023429979e-06, + "loss": 0.5195, + "step": 8587 + }, + { + "epoch": 0.6313777385678576, + "grad_norm": 0.8593201637268066, + "learning_rate": 4.867413063274778e-06, + "loss": 0.5644, + "step": 8588 + }, + { + "epoch": 0.6314512571680635, + "grad_norm": 0.8009642362594604, + "learning_rate": 4.867382099602914e-06, + "loss": 0.5379, + "step": 8589 + }, + { + "epoch": 0.6315247757682694, + "grad_norm": 0.7995576858520508, + "learning_rate": 4.867351132414435e-06, + "loss": 0.5305, + "step": 8590 + }, + { + "epoch": 0.6315982943684753, + "grad_norm": 0.8097450137138367, + "learning_rate": 4.867320161709385e-06, + "loss": 0.5296, + "step": 8591 + }, + { + "epoch": 0.631671812968681, + "grad_norm": 0.8529378175735474, + "learning_rate": 4.867289187487811e-06, + "loss": 0.5342, + "step": 8592 + }, + { + "epoch": 0.6317453315688869, + "grad_norm": 0.8339019417762756, + "learning_rate": 4.8672582097497575e-06, + "loss": 0.5727, + "step": 8593 + }, + { + "epoch": 0.6318188501690928, + "grad_norm": 0.8325361609458923, + "learning_rate": 4.867227228495273e-06, + "loss": 0.5224, + "step": 8594 + }, + { + "epoch": 0.6318923687692987, + "grad_norm": 0.82744961977005, + "learning_rate": 4.8671962437244e-06, + "loss": 0.5404, + "step": 8595 + }, + { + "epoch": 0.6319658873695044, + "grad_norm": 0.8570987582206726, + "learning_rate": 4.867165255437188e-06, + "loss": 0.5281, + "step": 8596 + }, + { + "epoch": 0.6320394059697103, + "grad_norm": 0.8278299570083618, + "learning_rate": 4.867134263633681e-06, + "loss": 0.5064, + "step": 8597 + }, + { + "epoch": 0.6321129245699162, + "grad_norm": 0.8429377675056458, + "learning_rate": 4.8671032683139265e-06, + "loss": 0.5338, + "step": 8598 + }, + { + "epoch": 0.6321864431701221, + "grad_norm": 0.8372032642364502, + "learning_rate": 4.867072269477969e-06, + "loss": 0.5209, + "step": 8599 + }, + { + "epoch": 0.6322599617703278, + "grad_norm": 0.8008707165718079, + "learning_rate": 4.8670412671258564e-06, + "loss": 0.4791, + "step": 8600 + }, + { + "epoch": 0.6323334803705337, + "grad_norm": 0.8412744998931885, + "learning_rate": 4.867010261257633e-06, + "loss": 0.5434, + "step": 8601 + }, + { + "epoch": 0.6324069989707396, + "grad_norm": 0.8233038783073425, + "learning_rate": 4.866979251873345e-06, + "loss": 0.5409, + "step": 8602 + }, + { + "epoch": 0.6324805175709455, + "grad_norm": 0.8239816427230835, + "learning_rate": 4.866948238973039e-06, + "loss": 0.535, + "step": 8603 + }, + { + "epoch": 0.6325540361711512, + "grad_norm": 0.848208487033844, + "learning_rate": 4.8669172225567615e-06, + "loss": 0.5627, + "step": 8604 + }, + { + "epoch": 0.6326275547713571, + "grad_norm": 0.813382089138031, + "learning_rate": 4.866886202624557e-06, + "loss": 0.5479, + "step": 8605 + }, + { + "epoch": 0.632701073371563, + "grad_norm": 0.79816734790802, + "learning_rate": 4.866855179176474e-06, + "loss": 0.5121, + "step": 8606 + }, + { + "epoch": 0.6327745919717689, + "grad_norm": 0.8330629467964172, + "learning_rate": 4.866824152212556e-06, + "loss": 0.5296, + "step": 8607 + }, + { + "epoch": 0.6328481105719748, + "grad_norm": 0.8294219374656677, + "learning_rate": 4.866793121732851e-06, + "loss": 0.551, + "step": 8608 + }, + { + "epoch": 0.6329216291721805, + "grad_norm": 0.8371239304542542, + "learning_rate": 4.866762087737405e-06, + "loss": 0.5064, + "step": 8609 + }, + { + "epoch": 0.6329951477723864, + "grad_norm": 0.83109050989151, + "learning_rate": 4.866731050226263e-06, + "loss": 0.4975, + "step": 8610 + }, + { + "epoch": 0.6330686663725923, + "grad_norm": 0.8892025947570801, + "learning_rate": 4.866700009199471e-06, + "loss": 0.5687, + "step": 8611 + }, + { + "epoch": 0.6331421849727982, + "grad_norm": 0.8359061479568481, + "learning_rate": 4.8666689646570765e-06, + "loss": 0.5275, + "step": 8612 + }, + { + "epoch": 0.6332157035730039, + "grad_norm": 0.87063068151474, + "learning_rate": 4.866637916599124e-06, + "loss": 0.5471, + "step": 8613 + }, + { + "epoch": 0.6332892221732098, + "grad_norm": 0.8593562841415405, + "learning_rate": 4.866606865025662e-06, + "loss": 0.5357, + "step": 8614 + }, + { + "epoch": 0.6333627407734157, + "grad_norm": 0.8476616144180298, + "learning_rate": 4.866575809936734e-06, + "loss": 0.54, + "step": 8615 + }, + { + "epoch": 0.6334362593736216, + "grad_norm": 0.8521478176116943, + "learning_rate": 4.866544751332387e-06, + "loss": 0.5396, + "step": 8616 + }, + { + "epoch": 0.6335097779738273, + "grad_norm": 0.8748135566711426, + "learning_rate": 4.866513689212668e-06, + "loss": 0.5739, + "step": 8617 + }, + { + "epoch": 0.6335832965740332, + "grad_norm": 0.8452302813529968, + "learning_rate": 4.8664826235776215e-06, + "loss": 0.5243, + "step": 8618 + }, + { + "epoch": 0.6336568151742391, + "grad_norm": 0.8494300246238708, + "learning_rate": 4.866451554427296e-06, + "loss": 0.5509, + "step": 8619 + }, + { + "epoch": 0.633730333774445, + "grad_norm": 0.8324556946754456, + "learning_rate": 4.866420481761736e-06, + "loss": 0.5361, + "step": 8620 + }, + { + "epoch": 0.6338038523746508, + "grad_norm": 0.8002433180809021, + "learning_rate": 4.866389405580988e-06, + "loss": 0.5258, + "step": 8621 + }, + { + "epoch": 0.6338773709748566, + "grad_norm": 0.8931912183761597, + "learning_rate": 4.866358325885098e-06, + "loss": 0.5919, + "step": 8622 + }, + { + "epoch": 0.6339508895750625, + "grad_norm": 0.8220117688179016, + "learning_rate": 4.866327242674112e-06, + "loss": 0.5297, + "step": 8623 + }, + { + "epoch": 0.6340244081752684, + "grad_norm": 0.842706561088562, + "learning_rate": 4.8662961559480755e-06, + "loss": 0.5227, + "step": 8624 + }, + { + "epoch": 0.6340979267754742, + "grad_norm": 0.8570156097412109, + "learning_rate": 4.866265065707037e-06, + "loss": 0.566, + "step": 8625 + }, + { + "epoch": 0.63417144537568, + "grad_norm": 0.8409728407859802, + "learning_rate": 4.866233971951041e-06, + "loss": 0.5141, + "step": 8626 + }, + { + "epoch": 0.6342449639758859, + "grad_norm": 0.7986928820610046, + "learning_rate": 4.866202874680134e-06, + "loss": 0.5193, + "step": 8627 + }, + { + "epoch": 0.6343184825760918, + "grad_norm": 0.8122549653053284, + "learning_rate": 4.866171773894363e-06, + "loss": 0.5094, + "step": 8628 + }, + { + "epoch": 0.6343920011762976, + "grad_norm": 0.7739130258560181, + "learning_rate": 4.866140669593772e-06, + "loss": 0.5205, + "step": 8629 + }, + { + "epoch": 0.6344655197765035, + "grad_norm": 0.8358680009841919, + "learning_rate": 4.866109561778409e-06, + "loss": 0.4886, + "step": 8630 + }, + { + "epoch": 0.6345390383767093, + "grad_norm": 0.8075963854789734, + "learning_rate": 4.8660784504483196e-06, + "loss": 0.5279, + "step": 8631 + }, + { + "epoch": 0.6346125569769152, + "grad_norm": 0.8739473223686218, + "learning_rate": 4.866047335603551e-06, + "loss": 0.5802, + "step": 8632 + }, + { + "epoch": 0.634686075577121, + "grad_norm": 0.8614269495010376, + "learning_rate": 4.866016217244148e-06, + "loss": 0.5416, + "step": 8633 + }, + { + "epoch": 0.6347595941773269, + "grad_norm": 0.8722098469734192, + "learning_rate": 4.865985095370158e-06, + "loss": 0.5487, + "step": 8634 + }, + { + "epoch": 0.6348331127775327, + "grad_norm": 0.856907308101654, + "learning_rate": 4.865953969981626e-06, + "loss": 0.5635, + "step": 8635 + }, + { + "epoch": 0.6349066313777386, + "grad_norm": 0.8259537220001221, + "learning_rate": 4.8659228410786e-06, + "loss": 0.5357, + "step": 8636 + }, + { + "epoch": 0.6349801499779444, + "grad_norm": 0.8011850118637085, + "learning_rate": 4.865891708661125e-06, + "loss": 0.504, + "step": 8637 + }, + { + "epoch": 0.6350536685781503, + "grad_norm": 0.8231635093688965, + "learning_rate": 4.865860572729246e-06, + "loss": 0.5283, + "step": 8638 + }, + { + "epoch": 0.6351271871783561, + "grad_norm": 0.8402938842773438, + "learning_rate": 4.865829433283012e-06, + "loss": 0.5592, + "step": 8639 + }, + { + "epoch": 0.635200705778562, + "grad_norm": 0.9082561135292053, + "learning_rate": 4.8657982903224676e-06, + "loss": 0.5536, + "step": 8640 + }, + { + "epoch": 0.6352742243787678, + "grad_norm": 0.8695353865623474, + "learning_rate": 4.86576714384766e-06, + "loss": 0.5274, + "step": 8641 + }, + { + "epoch": 0.6353477429789737, + "grad_norm": 0.7876008152961731, + "learning_rate": 4.865735993858633e-06, + "loss": 0.5296, + "step": 8642 + }, + { + "epoch": 0.6354212615791796, + "grad_norm": 0.7965974807739258, + "learning_rate": 4.865704840355437e-06, + "loss": 0.5449, + "step": 8643 + }, + { + "epoch": 0.6354947801793854, + "grad_norm": 0.843116283416748, + "learning_rate": 4.865673683338115e-06, + "loss": 0.5653, + "step": 8644 + }, + { + "epoch": 0.6355682987795912, + "grad_norm": 0.8488279581069946, + "learning_rate": 4.865642522806714e-06, + "loss": 0.5882, + "step": 8645 + }, + { + "epoch": 0.6356418173797971, + "grad_norm": 0.8781108856201172, + "learning_rate": 4.8656113587612806e-06, + "loss": 0.5648, + "step": 8646 + }, + { + "epoch": 0.635715335980003, + "grad_norm": 0.9218968749046326, + "learning_rate": 4.865580191201861e-06, + "loss": 0.588, + "step": 8647 + }, + { + "epoch": 0.6357888545802088, + "grad_norm": 0.8464252352714539, + "learning_rate": 4.8655490201285015e-06, + "loss": 0.5233, + "step": 8648 + }, + { + "epoch": 0.6358623731804146, + "grad_norm": 0.8500334620475769, + "learning_rate": 4.865517845541249e-06, + "loss": 0.587, + "step": 8649 + }, + { + "epoch": 0.6359358917806205, + "grad_norm": 0.8040811419487, + "learning_rate": 4.865486667440149e-06, + "loss": 0.5542, + "step": 8650 + }, + { + "epoch": 0.6360094103808264, + "grad_norm": 0.8741489052772522, + "learning_rate": 4.865455485825248e-06, + "loss": 0.5903, + "step": 8651 + }, + { + "epoch": 0.6360829289810322, + "grad_norm": 0.8318846225738525, + "learning_rate": 4.865424300696593e-06, + "loss": 0.5355, + "step": 8652 + }, + { + "epoch": 0.636156447581238, + "grad_norm": 0.8257637619972229, + "learning_rate": 4.865393112054229e-06, + "loss": 0.519, + "step": 8653 + }, + { + "epoch": 0.6362299661814439, + "grad_norm": 0.9039561152458191, + "learning_rate": 4.8653619198982035e-06, + "loss": 0.5612, + "step": 8654 + }, + { + "epoch": 0.6363034847816498, + "grad_norm": 0.8675969243049622, + "learning_rate": 4.8653307242285616e-06, + "loss": 0.5509, + "step": 8655 + }, + { + "epoch": 0.6363770033818557, + "grad_norm": 0.8569402098655701, + "learning_rate": 4.8652995250453515e-06, + "loss": 0.5346, + "step": 8656 + }, + { + "epoch": 0.6364505219820614, + "grad_norm": 0.834670901298523, + "learning_rate": 4.865268322348618e-06, + "loss": 0.5544, + "step": 8657 + }, + { + "epoch": 0.6365240405822673, + "grad_norm": 0.8318483829498291, + "learning_rate": 4.8652371161384086e-06, + "loss": 0.5126, + "step": 8658 + }, + { + "epoch": 0.6365975591824732, + "grad_norm": 0.8300985097885132, + "learning_rate": 4.865205906414768e-06, + "loss": 0.5365, + "step": 8659 + }, + { + "epoch": 0.6366710777826791, + "grad_norm": 0.8762117028236389, + "learning_rate": 4.865174693177743e-06, + "loss": 0.5973, + "step": 8660 + }, + { + "epoch": 0.6367445963828848, + "grad_norm": 0.8913345336914062, + "learning_rate": 4.865143476427382e-06, + "loss": 0.5219, + "step": 8661 + }, + { + "epoch": 0.6368181149830907, + "grad_norm": 0.8508892059326172, + "learning_rate": 4.8651122561637296e-06, + "loss": 0.5486, + "step": 8662 + }, + { + "epoch": 0.6368916335832966, + "grad_norm": 0.8786349892616272, + "learning_rate": 4.8650810323868315e-06, + "loss": 0.5833, + "step": 8663 + }, + { + "epoch": 0.6369651521835025, + "grad_norm": 0.8285648226737976, + "learning_rate": 4.8650498050967355e-06, + "loss": 0.4983, + "step": 8664 + }, + { + "epoch": 0.6370386707837082, + "grad_norm": 0.8152305483818054, + "learning_rate": 4.865018574293488e-06, + "loss": 0.5135, + "step": 8665 + }, + { + "epoch": 0.6371121893839141, + "grad_norm": 0.8216953277587891, + "learning_rate": 4.864987339977135e-06, + "loss": 0.5171, + "step": 8666 + }, + { + "epoch": 0.63718570798412, + "grad_norm": 0.8257951736450195, + "learning_rate": 4.864956102147722e-06, + "loss": 0.5136, + "step": 8667 + }, + { + "epoch": 0.6372592265843259, + "grad_norm": 0.8449907302856445, + "learning_rate": 4.864924860805297e-06, + "loss": 0.574, + "step": 8668 + }, + { + "epoch": 0.6373327451845316, + "grad_norm": 0.9191902279853821, + "learning_rate": 4.864893615949905e-06, + "loss": 0.5616, + "step": 8669 + }, + { + "epoch": 0.6374062637847375, + "grad_norm": 0.8485276699066162, + "learning_rate": 4.864862367581593e-06, + "loss": 0.5212, + "step": 8670 + }, + { + "epoch": 0.6374797823849434, + "grad_norm": 0.8291760683059692, + "learning_rate": 4.864831115700408e-06, + "loss": 0.5517, + "step": 8671 + }, + { + "epoch": 0.6375533009851493, + "grad_norm": 0.8278504610061646, + "learning_rate": 4.864799860306395e-06, + "loss": 0.5547, + "step": 8672 + }, + { + "epoch": 0.637626819585355, + "grad_norm": 0.8148646950721741, + "learning_rate": 4.864768601399602e-06, + "loss": 0.546, + "step": 8673 + }, + { + "epoch": 0.6377003381855609, + "grad_norm": 0.7779823541641235, + "learning_rate": 4.864737338980075e-06, + "loss": 0.478, + "step": 8674 + }, + { + "epoch": 0.6377738567857668, + "grad_norm": 0.81084144115448, + "learning_rate": 4.86470607304786e-06, + "loss": 0.5294, + "step": 8675 + }, + { + "epoch": 0.6378473753859727, + "grad_norm": 0.8675541281700134, + "learning_rate": 4.864674803603003e-06, + "loss": 0.5242, + "step": 8676 + }, + { + "epoch": 0.6379208939861785, + "grad_norm": 0.8142791390419006, + "learning_rate": 4.864643530645551e-06, + "loss": 0.5324, + "step": 8677 + }, + { + "epoch": 0.6379944125863843, + "grad_norm": 0.7875639796257019, + "learning_rate": 4.8646122541755516e-06, + "loss": 0.5641, + "step": 8678 + }, + { + "epoch": 0.6380679311865902, + "grad_norm": 0.8235896229743958, + "learning_rate": 4.86458097419305e-06, + "loss": 0.5139, + "step": 8679 + }, + { + "epoch": 0.6381414497867961, + "grad_norm": 0.8301727175712585, + "learning_rate": 4.864549690698092e-06, + "loss": 0.5567, + "step": 8680 + }, + { + "epoch": 0.6382149683870019, + "grad_norm": 0.8593655824661255, + "learning_rate": 4.8645184036907254e-06, + "loss": 0.5744, + "step": 8681 + }, + { + "epoch": 0.6382884869872077, + "grad_norm": 0.772968590259552, + "learning_rate": 4.8644871131709955e-06, + "loss": 0.5338, + "step": 8682 + }, + { + "epoch": 0.6383620055874136, + "grad_norm": 0.849967360496521, + "learning_rate": 4.86445581913895e-06, + "loss": 0.5554, + "step": 8683 + }, + { + "epoch": 0.6384355241876195, + "grad_norm": 0.8071728944778442, + "learning_rate": 4.864424521594635e-06, + "loss": 0.5452, + "step": 8684 + }, + { + "epoch": 0.6385090427878253, + "grad_norm": 0.835408091545105, + "learning_rate": 4.864393220538097e-06, + "loss": 0.5212, + "step": 8685 + }, + { + "epoch": 0.6385825613880312, + "grad_norm": 0.8113066554069519, + "learning_rate": 4.864361915969382e-06, + "loss": 0.537, + "step": 8686 + }, + { + "epoch": 0.638656079988237, + "grad_norm": 0.8728747963905334, + "learning_rate": 4.8643306078885375e-06, + "loss": 0.5486, + "step": 8687 + }, + { + "epoch": 0.6387295985884429, + "grad_norm": 0.885542094707489, + "learning_rate": 4.864299296295608e-06, + "loss": 0.572, + "step": 8688 + }, + { + "epoch": 0.6388031171886487, + "grad_norm": 0.7901771068572998, + "learning_rate": 4.8642679811906425e-06, + "loss": 0.524, + "step": 8689 + }, + { + "epoch": 0.6388766357888546, + "grad_norm": 0.865656852722168, + "learning_rate": 4.864236662573686e-06, + "loss": 0.5559, + "step": 8690 + }, + { + "epoch": 0.6389501543890604, + "grad_norm": 0.8523019552230835, + "learning_rate": 4.864205340444786e-06, + "loss": 0.5623, + "step": 8691 + }, + { + "epoch": 0.6390236729892663, + "grad_norm": 0.8428596258163452, + "learning_rate": 4.8641740148039875e-06, + "loss": 0.5704, + "step": 8692 + }, + { + "epoch": 0.6390971915894721, + "grad_norm": 0.8607492446899414, + "learning_rate": 4.864142685651338e-06, + "loss": 0.5614, + "step": 8693 + }, + { + "epoch": 0.639170710189678, + "grad_norm": 0.8217012882232666, + "learning_rate": 4.864111352986884e-06, + "loss": 0.545, + "step": 8694 + }, + { + "epoch": 0.6392442287898839, + "grad_norm": 0.8404312133789062, + "learning_rate": 4.8640800168106724e-06, + "loss": 0.5632, + "step": 8695 + }, + { + "epoch": 0.6393177473900897, + "grad_norm": 0.8212052583694458, + "learning_rate": 4.86404867712275e-06, + "loss": 0.5406, + "step": 8696 + }, + { + "epoch": 0.6393912659902955, + "grad_norm": 0.8531274199485779, + "learning_rate": 4.864017333923162e-06, + "loss": 0.549, + "step": 8697 + }, + { + "epoch": 0.6394647845905014, + "grad_norm": 0.8120288848876953, + "learning_rate": 4.863985987211955e-06, + "loss": 0.4995, + "step": 8698 + }, + { + "epoch": 0.6395383031907073, + "grad_norm": 0.8664137721061707, + "learning_rate": 4.8639546369891775e-06, + "loss": 0.5769, + "step": 8699 + }, + { + "epoch": 0.6396118217909131, + "grad_norm": 0.8233568072319031, + "learning_rate": 4.863923283254875e-06, + "loss": 0.5623, + "step": 8700 + }, + { + "epoch": 0.6396853403911189, + "grad_norm": 0.8438644409179688, + "learning_rate": 4.863891926009094e-06, + "loss": 0.5411, + "step": 8701 + }, + { + "epoch": 0.6397588589913248, + "grad_norm": 0.931367039680481, + "learning_rate": 4.86386056525188e-06, + "loss": 0.5641, + "step": 8702 + }, + { + "epoch": 0.6398323775915307, + "grad_norm": 0.7899908423423767, + "learning_rate": 4.86382920098328e-06, + "loss": 0.5348, + "step": 8703 + }, + { + "epoch": 0.6399058961917365, + "grad_norm": 0.8310309648513794, + "learning_rate": 4.863797833203342e-06, + "loss": 0.515, + "step": 8704 + }, + { + "epoch": 0.6399794147919423, + "grad_norm": 0.8648161292076111, + "learning_rate": 4.8637664619121115e-06, + "loss": 0.5609, + "step": 8705 + }, + { + "epoch": 0.6400529333921482, + "grad_norm": 0.7829199433326721, + "learning_rate": 4.863735087109636e-06, + "loss": 0.4809, + "step": 8706 + }, + { + "epoch": 0.6401264519923541, + "grad_norm": 0.8432795405387878, + "learning_rate": 4.863703708795962e-06, + "loss": 0.5182, + "step": 8707 + }, + { + "epoch": 0.64019997059256, + "grad_norm": 0.8037398457527161, + "learning_rate": 4.863672326971134e-06, + "loss": 0.5244, + "step": 8708 + }, + { + "epoch": 0.6402734891927657, + "grad_norm": 0.8058597445487976, + "learning_rate": 4.863640941635201e-06, + "loss": 0.55, + "step": 8709 + }, + { + "epoch": 0.6403470077929716, + "grad_norm": 0.7973549962043762, + "learning_rate": 4.8636095527882085e-06, + "loss": 0.5425, + "step": 8710 + }, + { + "epoch": 0.6404205263931775, + "grad_norm": 0.8434470295906067, + "learning_rate": 4.863578160430204e-06, + "loss": 0.5895, + "step": 8711 + }, + { + "epoch": 0.6404940449933834, + "grad_norm": 0.8171049356460571, + "learning_rate": 4.863546764561233e-06, + "loss": 0.5595, + "step": 8712 + }, + { + "epoch": 0.6405675635935891, + "grad_norm": 0.8284887075424194, + "learning_rate": 4.863515365181343e-06, + "loss": 0.4638, + "step": 8713 + }, + { + "epoch": 0.640641082193795, + "grad_norm": 0.8420380353927612, + "learning_rate": 4.86348396229058e-06, + "loss": 0.5477, + "step": 8714 + }, + { + "epoch": 0.6407146007940009, + "grad_norm": 0.8165541291236877, + "learning_rate": 4.863452555888991e-06, + "loss": 0.493, + "step": 8715 + }, + { + "epoch": 0.6407881193942068, + "grad_norm": 0.8390320539474487, + "learning_rate": 4.863421145976624e-06, + "loss": 0.5695, + "step": 8716 + }, + { + "epoch": 0.6408616379944125, + "grad_norm": 0.8432472348213196, + "learning_rate": 4.863389732553522e-06, + "loss": 0.5992, + "step": 8717 + }, + { + "epoch": 0.6409351565946184, + "grad_norm": 0.8504102230072021, + "learning_rate": 4.8633583156197354e-06, + "loss": 0.546, + "step": 8718 + }, + { + "epoch": 0.6410086751948243, + "grad_norm": 0.7923473715782166, + "learning_rate": 4.863326895175309e-06, + "loss": 0.5483, + "step": 8719 + }, + { + "epoch": 0.6410821937950302, + "grad_norm": 0.8006241321563721, + "learning_rate": 4.86329547122029e-06, + "loss": 0.5198, + "step": 8720 + }, + { + "epoch": 0.6411557123952359, + "grad_norm": 0.8798035979270935, + "learning_rate": 4.863264043754725e-06, + "loss": 0.5564, + "step": 8721 + }, + { + "epoch": 0.6412292309954418, + "grad_norm": 0.8663942813873291, + "learning_rate": 4.863232612778661e-06, + "loss": 0.519, + "step": 8722 + }, + { + "epoch": 0.6413027495956477, + "grad_norm": 0.8487386107444763, + "learning_rate": 4.863201178292144e-06, + "loss": 0.5473, + "step": 8723 + }, + { + "epoch": 0.6413762681958536, + "grad_norm": 0.784481942653656, + "learning_rate": 4.863169740295222e-06, + "loss": 0.5187, + "step": 8724 + }, + { + "epoch": 0.6414497867960594, + "grad_norm": 0.828808069229126, + "learning_rate": 4.8631382987879396e-06, + "loss": 0.5258, + "step": 8725 + }, + { + "epoch": 0.6415233053962652, + "grad_norm": 0.8479640483856201, + "learning_rate": 4.8631068537703445e-06, + "loss": 0.5704, + "step": 8726 + }, + { + "epoch": 0.6415968239964711, + "grad_norm": 0.8238703012466431, + "learning_rate": 4.863075405242484e-06, + "loss": 0.5456, + "step": 8727 + }, + { + "epoch": 0.641670342596677, + "grad_norm": 0.8361607789993286, + "learning_rate": 4.863043953204405e-06, + "loss": 0.5392, + "step": 8728 + }, + { + "epoch": 0.6417438611968828, + "grad_norm": 0.8839862942695618, + "learning_rate": 4.863012497656153e-06, + "loss": 0.566, + "step": 8729 + }, + { + "epoch": 0.6418173797970886, + "grad_norm": 0.8045602440834045, + "learning_rate": 4.862981038597775e-06, + "loss": 0.513, + "step": 8730 + }, + { + "epoch": 0.6418908983972945, + "grad_norm": 1.010493516921997, + "learning_rate": 4.862949576029319e-06, + "loss": 0.623, + "step": 8731 + }, + { + "epoch": 0.6419644169975004, + "grad_norm": 0.8203245401382446, + "learning_rate": 4.86291810995083e-06, + "loss": 0.5321, + "step": 8732 + }, + { + "epoch": 0.6420379355977062, + "grad_norm": 0.8041195869445801, + "learning_rate": 4.862886640362355e-06, + "loss": 0.4987, + "step": 8733 + }, + { + "epoch": 0.642111454197912, + "grad_norm": 0.8775700330734253, + "learning_rate": 4.862855167263942e-06, + "loss": 0.5888, + "step": 8734 + }, + { + "epoch": 0.6421849727981179, + "grad_norm": 0.7974523901939392, + "learning_rate": 4.862823690655638e-06, + "loss": 0.5207, + "step": 8735 + }, + { + "epoch": 0.6422584913983238, + "grad_norm": 0.8166987299919128, + "learning_rate": 4.862792210537488e-06, + "loss": 0.5428, + "step": 8736 + }, + { + "epoch": 0.6423320099985296, + "grad_norm": 0.8183264136314392, + "learning_rate": 4.862760726909539e-06, + "loss": 0.504, + "step": 8737 + }, + { + "epoch": 0.6424055285987355, + "grad_norm": 0.8698344230651855, + "learning_rate": 4.862729239771838e-06, + "loss": 0.5782, + "step": 8738 + }, + { + "epoch": 0.6424790471989413, + "grad_norm": 0.8169498443603516, + "learning_rate": 4.862697749124434e-06, + "loss": 0.5349, + "step": 8739 + }, + { + "epoch": 0.6425525657991472, + "grad_norm": 0.8421838283538818, + "learning_rate": 4.862666254967371e-06, + "loss": 0.5203, + "step": 8740 + }, + { + "epoch": 0.642626084399353, + "grad_norm": 0.8518403172492981, + "learning_rate": 4.862634757300696e-06, + "loss": 0.5552, + "step": 8741 + }, + { + "epoch": 0.6426996029995589, + "grad_norm": 0.8479164242744446, + "learning_rate": 4.862603256124456e-06, + "loss": 0.5367, + "step": 8742 + }, + { + "epoch": 0.6427731215997647, + "grad_norm": 0.8207896947860718, + "learning_rate": 4.862571751438699e-06, + "loss": 0.5513, + "step": 8743 + }, + { + "epoch": 0.6428466401999706, + "grad_norm": 0.8918747901916504, + "learning_rate": 4.862540243243472e-06, + "loss": 0.545, + "step": 8744 + }, + { + "epoch": 0.6429201588001765, + "grad_norm": 0.8566156029701233, + "learning_rate": 4.86250873153882e-06, + "loss": 0.578, + "step": 8745 + }, + { + "epoch": 0.6429936774003823, + "grad_norm": 0.8299323916435242, + "learning_rate": 4.8624772163247905e-06, + "loss": 0.5198, + "step": 8746 + }, + { + "epoch": 0.6430671960005881, + "grad_norm": 0.832610011100769, + "learning_rate": 4.86244569760143e-06, + "loss": 0.5621, + "step": 8747 + }, + { + "epoch": 0.643140714600794, + "grad_norm": 0.853678822517395, + "learning_rate": 4.862414175368787e-06, + "loss": 0.5378, + "step": 8748 + }, + { + "epoch": 0.6432142332009999, + "grad_norm": 0.8605043888092041, + "learning_rate": 4.862382649626906e-06, + "loss": 0.5544, + "step": 8749 + }, + { + "epoch": 0.6432877518012057, + "grad_norm": 0.8841429948806763, + "learning_rate": 4.862351120375835e-06, + "loss": 0.5734, + "step": 8750 + }, + { + "epoch": 0.6433612704014116, + "grad_norm": 0.8338509202003479, + "learning_rate": 4.862319587615622e-06, + "loss": 0.5088, + "step": 8751 + }, + { + "epoch": 0.6434347890016174, + "grad_norm": 0.8328068852424622, + "learning_rate": 4.862288051346311e-06, + "loss": 0.5231, + "step": 8752 + }, + { + "epoch": 0.6435083076018233, + "grad_norm": 0.8684440851211548, + "learning_rate": 4.862256511567951e-06, + "loss": 0.5364, + "step": 8753 + }, + { + "epoch": 0.6435818262020291, + "grad_norm": 0.7710339426994324, + "learning_rate": 4.862224968280589e-06, + "loss": 0.5075, + "step": 8754 + }, + { + "epoch": 0.643655344802235, + "grad_norm": 0.830814778804779, + "learning_rate": 4.86219342148427e-06, + "loss": 0.5352, + "step": 8755 + }, + { + "epoch": 0.6437288634024408, + "grad_norm": 0.8283531069755554, + "learning_rate": 4.862161871179043e-06, + "loss": 0.5379, + "step": 8756 + }, + { + "epoch": 0.6438023820026467, + "grad_norm": 0.792066752910614, + "learning_rate": 4.862130317364954e-06, + "loss": 0.5338, + "step": 8757 + }, + { + "epoch": 0.6438759006028525, + "grad_norm": 0.8377960920333862, + "learning_rate": 4.8620987600420486e-06, + "loss": 0.5175, + "step": 8758 + }, + { + "epoch": 0.6439494192030584, + "grad_norm": 0.8976256847381592, + "learning_rate": 4.8620671992103764e-06, + "loss": 0.5817, + "step": 8759 + }, + { + "epoch": 0.6440229378032643, + "grad_norm": 0.8453944325447083, + "learning_rate": 4.862035634869981e-06, + "loss": 0.5287, + "step": 8760 + }, + { + "epoch": 0.6440964564034701, + "grad_norm": 0.8305445909500122, + "learning_rate": 4.862004067020912e-06, + "loss": 0.5402, + "step": 8761 + }, + { + "epoch": 0.6441699750036759, + "grad_norm": 0.8827418088912964, + "learning_rate": 4.861972495663215e-06, + "loss": 0.5343, + "step": 8762 + }, + { + "epoch": 0.6442434936038818, + "grad_norm": 0.8381370306015015, + "learning_rate": 4.861940920796938e-06, + "loss": 0.5663, + "step": 8763 + }, + { + "epoch": 0.6443170122040877, + "grad_norm": 0.8224287629127502, + "learning_rate": 4.861909342422127e-06, + "loss": 0.5008, + "step": 8764 + }, + { + "epoch": 0.6443905308042935, + "grad_norm": 0.8595030903816223, + "learning_rate": 4.861877760538828e-06, + "loss": 0.5637, + "step": 8765 + }, + { + "epoch": 0.6444640494044993, + "grad_norm": 0.865017831325531, + "learning_rate": 4.861846175147089e-06, + "loss": 0.5393, + "step": 8766 + }, + { + "epoch": 0.6445375680047052, + "grad_norm": 0.8649906516075134, + "learning_rate": 4.8618145862469575e-06, + "loss": 0.6011, + "step": 8767 + }, + { + "epoch": 0.6446110866049111, + "grad_norm": 0.8064751029014587, + "learning_rate": 4.86178299383848e-06, + "loss": 0.5387, + "step": 8768 + }, + { + "epoch": 0.644684605205117, + "grad_norm": 0.8292807340621948, + "learning_rate": 4.861751397921703e-06, + "loss": 0.584, + "step": 8769 + }, + { + "epoch": 0.6447581238053227, + "grad_norm": 0.8352841734886169, + "learning_rate": 4.861719798496673e-06, + "loss": 0.5348, + "step": 8770 + }, + { + "epoch": 0.6448316424055286, + "grad_norm": 0.8092901110649109, + "learning_rate": 4.861688195563438e-06, + "loss": 0.5275, + "step": 8771 + }, + { + "epoch": 0.6449051610057345, + "grad_norm": 0.8513374924659729, + "learning_rate": 4.861656589122044e-06, + "loss": 0.5581, + "step": 8772 + }, + { + "epoch": 0.6449786796059404, + "grad_norm": 0.7952576279640198, + "learning_rate": 4.861624979172539e-06, + "loss": 0.5373, + "step": 8773 + }, + { + "epoch": 0.6450521982061461, + "grad_norm": 0.8269073367118835, + "learning_rate": 4.861593365714969e-06, + "loss": 0.5383, + "step": 8774 + }, + { + "epoch": 0.645125716806352, + "grad_norm": 0.847573459148407, + "learning_rate": 4.861561748749382e-06, + "loss": 0.5747, + "step": 8775 + }, + { + "epoch": 0.6451992354065579, + "grad_norm": 0.8515150547027588, + "learning_rate": 4.8615301282758236e-06, + "loss": 0.584, + "step": 8776 + }, + { + "epoch": 0.6452727540067638, + "grad_norm": 0.841151773929596, + "learning_rate": 4.861498504294342e-06, + "loss": 0.5332, + "step": 8777 + }, + { + "epoch": 0.6453462726069695, + "grad_norm": 0.9240732192993164, + "learning_rate": 4.861466876804983e-06, + "loss": 0.5563, + "step": 8778 + }, + { + "epoch": 0.6454197912071754, + "grad_norm": 0.8570741415023804, + "learning_rate": 4.861435245807795e-06, + "loss": 0.5469, + "step": 8779 + }, + { + "epoch": 0.6454933098073813, + "grad_norm": 0.8471795916557312, + "learning_rate": 4.861403611302825e-06, + "loss": 0.4818, + "step": 8780 + }, + { + "epoch": 0.6455668284075872, + "grad_norm": 0.771280825138092, + "learning_rate": 4.861371973290118e-06, + "loss": 0.4955, + "step": 8781 + }, + { + "epoch": 0.6456403470077929, + "grad_norm": 0.8021380305290222, + "learning_rate": 4.861340331769723e-06, + "loss": 0.52, + "step": 8782 + }, + { + "epoch": 0.6457138656079988, + "grad_norm": 0.820187509059906, + "learning_rate": 4.861308686741686e-06, + "loss": 0.5286, + "step": 8783 + }, + { + "epoch": 0.6457873842082047, + "grad_norm": 0.8702619075775146, + "learning_rate": 4.861277038206053e-06, + "loss": 0.5864, + "step": 8784 + }, + { + "epoch": 0.6458609028084106, + "grad_norm": 0.8732390403747559, + "learning_rate": 4.861245386162874e-06, + "loss": 0.55, + "step": 8785 + }, + { + "epoch": 0.6459344214086163, + "grad_norm": 0.7995569109916687, + "learning_rate": 4.861213730612193e-06, + "loss": 0.5446, + "step": 8786 + }, + { + "epoch": 0.6460079400088222, + "grad_norm": 0.8504220247268677, + "learning_rate": 4.8611820715540595e-06, + "loss": 0.547, + "step": 8787 + }, + { + "epoch": 0.6460814586090281, + "grad_norm": 0.8372090458869934, + "learning_rate": 4.861150408988519e-06, + "loss": 0.5378, + "step": 8788 + }, + { + "epoch": 0.646154977209234, + "grad_norm": 0.8954833745956421, + "learning_rate": 4.861118742915618e-06, + "loss": 0.554, + "step": 8789 + }, + { + "epoch": 0.6462284958094398, + "grad_norm": 0.8721951842308044, + "learning_rate": 4.861087073335405e-06, + "loss": 0.5307, + "step": 8790 + }, + { + "epoch": 0.6463020144096456, + "grad_norm": 0.8393587470054626, + "learning_rate": 4.861055400247926e-06, + "loss": 0.5636, + "step": 8791 + }, + { + "epoch": 0.6463755330098515, + "grad_norm": 0.8322648406028748, + "learning_rate": 4.86102372365323e-06, + "loss": 0.5445, + "step": 8792 + }, + { + "epoch": 0.6464490516100574, + "grad_norm": 0.8369734287261963, + "learning_rate": 4.860992043551361e-06, + "loss": 0.5083, + "step": 8793 + }, + { + "epoch": 0.6465225702102632, + "grad_norm": 0.790701687335968, + "learning_rate": 4.860960359942368e-06, + "loss": 0.501, + "step": 8794 + }, + { + "epoch": 0.646596088810469, + "grad_norm": 0.833699643611908, + "learning_rate": 4.860928672826297e-06, + "loss": 0.5359, + "step": 8795 + }, + { + "epoch": 0.6466696074106749, + "grad_norm": 0.8236267566680908, + "learning_rate": 4.860896982203197e-06, + "loss": 0.571, + "step": 8796 + }, + { + "epoch": 0.6467431260108808, + "grad_norm": 0.8557314872741699, + "learning_rate": 4.860865288073113e-06, + "loss": 0.5568, + "step": 8797 + }, + { + "epoch": 0.6468166446110866, + "grad_norm": 0.8426553606987, + "learning_rate": 4.860833590436093e-06, + "loss": 0.5401, + "step": 8798 + }, + { + "epoch": 0.6468901632112924, + "grad_norm": 0.8519309163093567, + "learning_rate": 4.8608018892921845e-06, + "loss": 0.5691, + "step": 8799 + }, + { + "epoch": 0.6469636818114983, + "grad_norm": 0.8709218502044678, + "learning_rate": 4.860770184641434e-06, + "loss": 0.5222, + "step": 8800 + }, + { + "epoch": 0.6470372004117042, + "grad_norm": 0.7981944680213928, + "learning_rate": 4.860738476483888e-06, + "loss": 0.5133, + "step": 8801 + }, + { + "epoch": 0.64711071901191, + "grad_norm": 0.8543785810470581, + "learning_rate": 4.860706764819595e-06, + "loss": 0.5322, + "step": 8802 + }, + { + "epoch": 0.6471842376121159, + "grad_norm": 0.8662289381027222, + "learning_rate": 4.860675049648601e-06, + "loss": 0.5847, + "step": 8803 + }, + { + "epoch": 0.6472577562123217, + "grad_norm": 0.8517324924468994, + "learning_rate": 4.860643330970953e-06, + "loss": 0.5222, + "step": 8804 + }, + { + "epoch": 0.6473312748125276, + "grad_norm": 0.8534497618675232, + "learning_rate": 4.860611608786699e-06, + "loss": 0.522, + "step": 8805 + }, + { + "epoch": 0.6474047934127334, + "grad_norm": 0.8464447855949402, + "learning_rate": 4.860579883095886e-06, + "loss": 0.5629, + "step": 8806 + }, + { + "epoch": 0.6474783120129393, + "grad_norm": 0.8117625713348389, + "learning_rate": 4.860548153898561e-06, + "loss": 0.5023, + "step": 8807 + }, + { + "epoch": 0.6475518306131451, + "grad_norm": 0.8090767860412598, + "learning_rate": 4.860516421194771e-06, + "loss": 0.4713, + "step": 8808 + }, + { + "epoch": 0.647625349213351, + "grad_norm": 0.773814857006073, + "learning_rate": 4.8604846849845625e-06, + "loss": 0.5169, + "step": 8809 + }, + { + "epoch": 0.6476988678135568, + "grad_norm": 0.8617609143257141, + "learning_rate": 4.860452945267984e-06, + "loss": 0.5581, + "step": 8810 + }, + { + "epoch": 0.6477723864137627, + "grad_norm": 0.7907865643501282, + "learning_rate": 4.8604212020450805e-06, + "loss": 0.4962, + "step": 8811 + }, + { + "epoch": 0.6478459050139685, + "grad_norm": 0.8289374113082886, + "learning_rate": 4.860389455315901e-06, + "loss": 0.5855, + "step": 8812 + }, + { + "epoch": 0.6479194236141744, + "grad_norm": 0.814569354057312, + "learning_rate": 4.860357705080493e-06, + "loss": 0.5018, + "step": 8813 + }, + { + "epoch": 0.6479929422143802, + "grad_norm": 0.8485792279243469, + "learning_rate": 4.860325951338903e-06, + "loss": 0.5792, + "step": 8814 + }, + { + "epoch": 0.6480664608145861, + "grad_norm": 0.8178325891494751, + "learning_rate": 4.860294194091178e-06, + "loss": 0.5448, + "step": 8815 + }, + { + "epoch": 0.648139979414792, + "grad_norm": 0.8635504245758057, + "learning_rate": 4.860262433337364e-06, + "loss": 0.5735, + "step": 8816 + }, + { + "epoch": 0.6482134980149978, + "grad_norm": 0.8489909172058105, + "learning_rate": 4.86023066907751e-06, + "loss": 0.5506, + "step": 8817 + }, + { + "epoch": 0.6482870166152036, + "grad_norm": 0.7875059247016907, + "learning_rate": 4.860198901311664e-06, + "loss": 0.5401, + "step": 8818 + }, + { + "epoch": 0.6483605352154095, + "grad_norm": 0.8282469511032104, + "learning_rate": 4.86016713003987e-06, + "loss": 0.5596, + "step": 8819 + }, + { + "epoch": 0.6484340538156154, + "grad_norm": 0.8481526970863342, + "learning_rate": 4.860135355262178e-06, + "loss": 0.5687, + "step": 8820 + }, + { + "epoch": 0.6485075724158212, + "grad_norm": 0.7957704663276672, + "learning_rate": 4.860103576978633e-06, + "loss": 0.5046, + "step": 8821 + }, + { + "epoch": 0.648581091016027, + "grad_norm": 0.8255179524421692, + "learning_rate": 4.860071795189284e-06, + "loss": 0.5496, + "step": 8822 + }, + { + "epoch": 0.6486546096162329, + "grad_norm": 0.8907938003540039, + "learning_rate": 4.860040009894178e-06, + "loss": 0.5687, + "step": 8823 + }, + { + "epoch": 0.6487281282164388, + "grad_norm": 0.8099623322486877, + "learning_rate": 4.860008221093361e-06, + "loss": 0.5103, + "step": 8824 + }, + { + "epoch": 0.6488016468166447, + "grad_norm": 0.8558263182640076, + "learning_rate": 4.859976428786882e-06, + "loss": 0.5463, + "step": 8825 + }, + { + "epoch": 0.6488751654168504, + "grad_norm": 0.844153881072998, + "learning_rate": 4.859944632974787e-06, + "loss": 0.5569, + "step": 8826 + }, + { + "epoch": 0.6489486840170563, + "grad_norm": 0.7663241624832153, + "learning_rate": 4.859912833657122e-06, + "loss": 0.5132, + "step": 8827 + }, + { + "epoch": 0.6490222026172622, + "grad_norm": 0.8912845849990845, + "learning_rate": 4.859881030833938e-06, + "loss": 0.5333, + "step": 8828 + }, + { + "epoch": 0.6490957212174681, + "grad_norm": 0.8152199983596802, + "learning_rate": 4.859849224505278e-06, + "loss": 0.5496, + "step": 8829 + }, + { + "epoch": 0.6491692398176738, + "grad_norm": 0.8226979970932007, + "learning_rate": 4.859817414671193e-06, + "loss": 0.4813, + "step": 8830 + }, + { + "epoch": 0.6492427584178797, + "grad_norm": 0.8622573614120483, + "learning_rate": 4.859785601331727e-06, + "loss": 0.6, + "step": 8831 + }, + { + "epoch": 0.6493162770180856, + "grad_norm": 0.9039278626441956, + "learning_rate": 4.859753784486929e-06, + "loss": 0.588, + "step": 8832 + }, + { + "epoch": 0.6493897956182915, + "grad_norm": 0.8524268269538879, + "learning_rate": 4.8597219641368465e-06, + "loss": 0.5155, + "step": 8833 + }, + { + "epoch": 0.6494633142184972, + "grad_norm": 0.786889910697937, + "learning_rate": 4.859690140281526e-06, + "loss": 0.4843, + "step": 8834 + }, + { + "epoch": 0.6495368328187031, + "grad_norm": 0.832396388053894, + "learning_rate": 4.859658312921015e-06, + "loss": 0.5278, + "step": 8835 + }, + { + "epoch": 0.649610351418909, + "grad_norm": 0.868106484413147, + "learning_rate": 4.8596264820553605e-06, + "loss": 0.5768, + "step": 8836 + }, + { + "epoch": 0.6496838700191149, + "grad_norm": 0.814359188079834, + "learning_rate": 4.859594647684611e-06, + "loss": 0.5594, + "step": 8837 + }, + { + "epoch": 0.6497573886193206, + "grad_norm": 0.8808349370956421, + "learning_rate": 4.859562809808812e-06, + "loss": 0.5659, + "step": 8838 + }, + { + "epoch": 0.6498309072195265, + "grad_norm": 0.7993026971817017, + "learning_rate": 4.859530968428011e-06, + "loss": 0.4983, + "step": 8839 + }, + { + "epoch": 0.6499044258197324, + "grad_norm": 0.8325051665306091, + "learning_rate": 4.859499123542257e-06, + "loss": 0.5628, + "step": 8840 + }, + { + "epoch": 0.6499779444199383, + "grad_norm": 0.8061482310295105, + "learning_rate": 4.859467275151597e-06, + "loss": 0.5015, + "step": 8841 + }, + { + "epoch": 0.650051463020144, + "grad_norm": 0.8274059891700745, + "learning_rate": 4.859435423256076e-06, + "loss": 0.4977, + "step": 8842 + }, + { + "epoch": 0.6501249816203499, + "grad_norm": 0.8540709614753723, + "learning_rate": 4.859403567855743e-06, + "loss": 0.5504, + "step": 8843 + }, + { + "epoch": 0.6501985002205558, + "grad_norm": 0.8378251194953918, + "learning_rate": 4.859371708950646e-06, + "loss": 0.533, + "step": 8844 + }, + { + "epoch": 0.6502720188207617, + "grad_norm": 0.8855370283126831, + "learning_rate": 4.8593398465408315e-06, + "loss": 0.5515, + "step": 8845 + }, + { + "epoch": 0.6503455374209675, + "grad_norm": 0.823586106300354, + "learning_rate": 4.859307980626347e-06, + "loss": 0.4843, + "step": 8846 + }, + { + "epoch": 0.6504190560211733, + "grad_norm": 0.8086034655570984, + "learning_rate": 4.859276111207238e-06, + "loss": 0.5288, + "step": 8847 + }, + { + "epoch": 0.6504925746213792, + "grad_norm": 0.808409571647644, + "learning_rate": 4.859244238283555e-06, + "loss": 0.5082, + "step": 8848 + }, + { + "epoch": 0.6505660932215851, + "grad_norm": 0.8080130219459534, + "learning_rate": 4.8592123618553434e-06, + "loss": 0.5203, + "step": 8849 + }, + { + "epoch": 0.6506396118217909, + "grad_norm": 0.8890860676765442, + "learning_rate": 4.859180481922652e-06, + "loss": 0.5481, + "step": 8850 + }, + { + "epoch": 0.6507131304219967, + "grad_norm": 0.8015138506889343, + "learning_rate": 4.859148598485526e-06, + "loss": 0.5204, + "step": 8851 + }, + { + "epoch": 0.6507866490222026, + "grad_norm": 0.8043867945671082, + "learning_rate": 4.8591167115440145e-06, + "loss": 0.5715, + "step": 8852 + }, + { + "epoch": 0.6508601676224085, + "grad_norm": 0.8577085733413696, + "learning_rate": 4.859084821098165e-06, + "loss": 0.5487, + "step": 8853 + }, + { + "epoch": 0.6509336862226143, + "grad_norm": 0.8062837719917297, + "learning_rate": 4.859052927148022e-06, + "loss": 0.4981, + "step": 8854 + }, + { + "epoch": 0.6510072048228202, + "grad_norm": 0.8132888674736023, + "learning_rate": 4.8590210296936365e-06, + "loss": 0.5647, + "step": 8855 + }, + { + "epoch": 0.651080723423026, + "grad_norm": 0.804273784160614, + "learning_rate": 4.858989128735055e-06, + "loss": 0.4979, + "step": 8856 + }, + { + "epoch": 0.6511542420232319, + "grad_norm": 0.863614559173584, + "learning_rate": 4.858957224272324e-06, + "loss": 0.5535, + "step": 8857 + }, + { + "epoch": 0.6512277606234377, + "grad_norm": 0.8272914886474609, + "learning_rate": 4.85892531630549e-06, + "loss": 0.5344, + "step": 8858 + }, + { + "epoch": 0.6513012792236436, + "grad_norm": 0.8805528283119202, + "learning_rate": 4.858893404834602e-06, + "loss": 0.5776, + "step": 8859 + }, + { + "epoch": 0.6513747978238494, + "grad_norm": 0.9236440062522888, + "learning_rate": 4.8588614898597085e-06, + "loss": 0.5738, + "step": 8860 + }, + { + "epoch": 0.6514483164240553, + "grad_norm": 0.799949586391449, + "learning_rate": 4.858829571380855e-06, + "loss": 0.5193, + "step": 8861 + }, + { + "epoch": 0.6515218350242611, + "grad_norm": 0.8696754574775696, + "learning_rate": 4.858797649398088e-06, + "loss": 0.5398, + "step": 8862 + }, + { + "epoch": 0.651595353624467, + "grad_norm": 0.8000283241271973, + "learning_rate": 4.858765723911457e-06, + "loss": 0.5018, + "step": 8863 + }, + { + "epoch": 0.6516688722246728, + "grad_norm": 0.8057802319526672, + "learning_rate": 4.858733794921009e-06, + "loss": 0.4875, + "step": 8864 + }, + { + "epoch": 0.6517423908248787, + "grad_norm": 0.8293889760971069, + "learning_rate": 4.858701862426791e-06, + "loss": 0.6002, + "step": 8865 + }, + { + "epoch": 0.6518159094250845, + "grad_norm": 0.8086451888084412, + "learning_rate": 4.85866992642885e-06, + "loss": 0.542, + "step": 8866 + }, + { + "epoch": 0.6518894280252904, + "grad_norm": 0.8355501890182495, + "learning_rate": 4.858637986927234e-06, + "loss": 0.5158, + "step": 8867 + }, + { + "epoch": 0.6519629466254963, + "grad_norm": 0.8447239995002747, + "learning_rate": 4.8586060439219915e-06, + "loss": 0.5476, + "step": 8868 + }, + { + "epoch": 0.6520364652257021, + "grad_norm": 0.8745369911193848, + "learning_rate": 4.858574097413168e-06, + "loss": 0.5551, + "step": 8869 + }, + { + "epoch": 0.6521099838259079, + "grad_norm": 0.8184143304824829, + "learning_rate": 4.8585421474008124e-06, + "loss": 0.5512, + "step": 8870 + }, + { + "epoch": 0.6521835024261138, + "grad_norm": 0.8578673005104065, + "learning_rate": 4.858510193884971e-06, + "loss": 0.554, + "step": 8871 + }, + { + "epoch": 0.6522570210263197, + "grad_norm": 0.8081568479537964, + "learning_rate": 4.858478236865691e-06, + "loss": 0.527, + "step": 8872 + }, + { + "epoch": 0.6523305396265255, + "grad_norm": 0.8479720950126648, + "learning_rate": 4.858446276343023e-06, + "loss": 0.5092, + "step": 8873 + }, + { + "epoch": 0.6524040582267313, + "grad_norm": 0.7786447405815125, + "learning_rate": 4.85841431231701e-06, + "loss": 0.5206, + "step": 8874 + }, + { + "epoch": 0.6524775768269372, + "grad_norm": 0.8684438467025757, + "learning_rate": 4.858382344787703e-06, + "loss": 0.5831, + "step": 8875 + }, + { + "epoch": 0.6525510954271431, + "grad_norm": 0.8946706056594849, + "learning_rate": 4.858350373755147e-06, + "loss": 0.5672, + "step": 8876 + }, + { + "epoch": 0.652624614027349, + "grad_norm": 0.8700800538063049, + "learning_rate": 4.858318399219392e-06, + "loss": 0.5683, + "step": 8877 + }, + { + "epoch": 0.6526981326275547, + "grad_norm": 0.866772472858429, + "learning_rate": 4.8582864211804836e-06, + "loss": 0.5528, + "step": 8878 + }, + { + "epoch": 0.6527716512277606, + "grad_norm": 0.8278399705886841, + "learning_rate": 4.85825443963847e-06, + "loss": 0.5023, + "step": 8879 + }, + { + "epoch": 0.6528451698279665, + "grad_norm": 0.8107278347015381, + "learning_rate": 4.858222454593399e-06, + "loss": 0.556, + "step": 8880 + }, + { + "epoch": 0.6529186884281724, + "grad_norm": 0.784641444683075, + "learning_rate": 4.8581904660453165e-06, + "loss": 0.5232, + "step": 8881 + }, + { + "epoch": 0.6529922070283781, + "grad_norm": 0.793213427066803, + "learning_rate": 4.858158473994272e-06, + "loss": 0.5069, + "step": 8882 + }, + { + "epoch": 0.653065725628584, + "grad_norm": 0.8136516213417053, + "learning_rate": 4.858126478440312e-06, + "loss": 0.5516, + "step": 8883 + }, + { + "epoch": 0.6531392442287899, + "grad_norm": 0.8281722664833069, + "learning_rate": 4.858094479383484e-06, + "loss": 0.5347, + "step": 8884 + }, + { + "epoch": 0.6532127628289958, + "grad_norm": 0.7908387184143066, + "learning_rate": 4.858062476823836e-06, + "loss": 0.5235, + "step": 8885 + }, + { + "epoch": 0.6532862814292016, + "grad_norm": 0.8601198792457581, + "learning_rate": 4.858030470761415e-06, + "loss": 0.5646, + "step": 8886 + }, + { + "epoch": 0.6533598000294074, + "grad_norm": 0.8340513706207275, + "learning_rate": 4.8579984611962695e-06, + "loss": 0.5665, + "step": 8887 + }, + { + "epoch": 0.6534333186296133, + "grad_norm": 0.8713018894195557, + "learning_rate": 4.857966448128446e-06, + "loss": 0.547, + "step": 8888 + }, + { + "epoch": 0.6535068372298192, + "grad_norm": 0.8431299328804016, + "learning_rate": 4.857934431557993e-06, + "loss": 0.524, + "step": 8889 + }, + { + "epoch": 0.653580355830025, + "grad_norm": 0.8129270076751709, + "learning_rate": 4.857902411484957e-06, + "loss": 0.5445, + "step": 8890 + }, + { + "epoch": 0.6536538744302308, + "grad_norm": 0.8621707558631897, + "learning_rate": 4.857870387909386e-06, + "loss": 0.5698, + "step": 8891 + }, + { + "epoch": 0.6537273930304367, + "grad_norm": 0.8138759732246399, + "learning_rate": 4.857838360831329e-06, + "loss": 0.5121, + "step": 8892 + }, + { + "epoch": 0.6538009116306426, + "grad_norm": 0.8328111171722412, + "learning_rate": 4.85780633025083e-06, + "loss": 0.5083, + "step": 8893 + }, + { + "epoch": 0.6538744302308485, + "grad_norm": 0.8448997735977173, + "learning_rate": 4.85777429616794e-06, + "loss": 0.5835, + "step": 8894 + }, + { + "epoch": 0.6539479488310542, + "grad_norm": 0.8585377931594849, + "learning_rate": 4.857742258582705e-06, + "loss": 0.5597, + "step": 8895 + }, + { + "epoch": 0.6540214674312601, + "grad_norm": 0.8675339818000793, + "learning_rate": 4.857710217495174e-06, + "loss": 0.5489, + "step": 8896 + }, + { + "epoch": 0.654094986031466, + "grad_norm": 0.8384175300598145, + "learning_rate": 4.857678172905392e-06, + "loss": 0.5436, + "step": 8897 + }, + { + "epoch": 0.6541685046316719, + "grad_norm": 0.832992672920227, + "learning_rate": 4.857646124813409e-06, + "loss": 0.5347, + "step": 8898 + }, + { + "epoch": 0.6542420232318776, + "grad_norm": 0.8506892323493958, + "learning_rate": 4.857614073219272e-06, + "loss": 0.5674, + "step": 8899 + }, + { + "epoch": 0.6543155418320835, + "grad_norm": 0.8309851288795471, + "learning_rate": 4.857582018123028e-06, + "loss": 0.5114, + "step": 8900 + }, + { + "epoch": 0.6543890604322894, + "grad_norm": 0.8015992045402527, + "learning_rate": 4.857549959524725e-06, + "loss": 0.4827, + "step": 8901 + }, + { + "epoch": 0.6544625790324953, + "grad_norm": 0.8422988653182983, + "learning_rate": 4.8575178974244115e-06, + "loss": 0.6001, + "step": 8902 + }, + { + "epoch": 0.654536097632701, + "grad_norm": 0.7974645495414734, + "learning_rate": 4.857485831822133e-06, + "loss": 0.5568, + "step": 8903 + }, + { + "epoch": 0.6546096162329069, + "grad_norm": 0.8986638784408569, + "learning_rate": 4.8574537627179395e-06, + "loss": 0.5735, + "step": 8904 + }, + { + "epoch": 0.6546831348331128, + "grad_norm": 0.8703692555427551, + "learning_rate": 4.857421690111877e-06, + "loss": 0.5343, + "step": 8905 + }, + { + "epoch": 0.6547566534333187, + "grad_norm": 0.8095341324806213, + "learning_rate": 4.857389614003993e-06, + "loss": 0.5181, + "step": 8906 + }, + { + "epoch": 0.6548301720335244, + "grad_norm": 0.8468934297561646, + "learning_rate": 4.857357534394337e-06, + "loss": 0.5655, + "step": 8907 + }, + { + "epoch": 0.6549036906337303, + "grad_norm": 0.839154064655304, + "learning_rate": 4.857325451282955e-06, + "loss": 0.5485, + "step": 8908 + }, + { + "epoch": 0.6549772092339362, + "grad_norm": 0.8193274736404419, + "learning_rate": 4.857293364669895e-06, + "loss": 0.5365, + "step": 8909 + }, + { + "epoch": 0.6550507278341421, + "grad_norm": 0.8465780019760132, + "learning_rate": 4.857261274555205e-06, + "loss": 0.531, + "step": 8910 + }, + { + "epoch": 0.6551242464343479, + "grad_norm": 0.876737654209137, + "learning_rate": 4.857229180938933e-06, + "loss": 0.5259, + "step": 8911 + }, + { + "epoch": 0.6551977650345537, + "grad_norm": 0.8714327812194824, + "learning_rate": 4.857197083821124e-06, + "loss": 0.5489, + "step": 8912 + }, + { + "epoch": 0.6552712836347596, + "grad_norm": 0.8755587339401245, + "learning_rate": 4.85716498320183e-06, + "loss": 0.5396, + "step": 8913 + }, + { + "epoch": 0.6553448022349655, + "grad_norm": 0.8377395272254944, + "learning_rate": 4.857132879081096e-06, + "loss": 0.5415, + "step": 8914 + }, + { + "epoch": 0.6554183208351713, + "grad_norm": 0.8019043207168579, + "learning_rate": 4.85710077145897e-06, + "loss": 0.5352, + "step": 8915 + }, + { + "epoch": 0.6554918394353771, + "grad_norm": 0.928570032119751, + "learning_rate": 4.8570686603354995e-06, + "loss": 0.5978, + "step": 8916 + }, + { + "epoch": 0.655565358035583, + "grad_norm": 0.831556499004364, + "learning_rate": 4.857036545710733e-06, + "loss": 0.5, + "step": 8917 + }, + { + "epoch": 0.6556388766357889, + "grad_norm": 0.8480947613716125, + "learning_rate": 4.857004427584717e-06, + "loss": 0.5556, + "step": 8918 + }, + { + "epoch": 0.6557123952359947, + "grad_norm": 0.8342413902282715, + "learning_rate": 4.856972305957501e-06, + "loss": 0.5353, + "step": 8919 + }, + { + "epoch": 0.6557859138362006, + "grad_norm": 0.8114414811134338, + "learning_rate": 4.8569401808291314e-06, + "loss": 0.5275, + "step": 8920 + }, + { + "epoch": 0.6558594324364064, + "grad_norm": 0.7745878100395203, + "learning_rate": 4.856908052199656e-06, + "loss": 0.4858, + "step": 8921 + }, + { + "epoch": 0.6559329510366123, + "grad_norm": 0.8510180115699768, + "learning_rate": 4.856875920069123e-06, + "loss": 0.5351, + "step": 8922 + }, + { + "epoch": 0.6560064696368181, + "grad_norm": 0.8638266921043396, + "learning_rate": 4.85684378443758e-06, + "loss": 0.5443, + "step": 8923 + }, + { + "epoch": 0.656079988237024, + "grad_norm": 0.8310449123382568, + "learning_rate": 4.856811645305075e-06, + "loss": 0.5446, + "step": 8924 + }, + { + "epoch": 0.6561535068372298, + "grad_norm": 0.8211981058120728, + "learning_rate": 4.856779502671654e-06, + "loss": 0.538, + "step": 8925 + }, + { + "epoch": 0.6562270254374357, + "grad_norm": 0.8467662334442139, + "learning_rate": 4.856747356537367e-06, + "loss": 0.5577, + "step": 8926 + }, + { + "epoch": 0.6563005440376415, + "grad_norm": 0.8264253735542297, + "learning_rate": 4.85671520690226e-06, + "loss": 0.5195, + "step": 8927 + }, + { + "epoch": 0.6563740626378474, + "grad_norm": 0.8571513295173645, + "learning_rate": 4.856683053766383e-06, + "loss": 0.5131, + "step": 8928 + }, + { + "epoch": 0.6564475812380532, + "grad_norm": 0.8279253244400024, + "learning_rate": 4.856650897129782e-06, + "loss": 0.5678, + "step": 8929 + }, + { + "epoch": 0.6565210998382591, + "grad_norm": 0.8541346192359924, + "learning_rate": 4.8566187369925045e-06, + "loss": 0.5692, + "step": 8930 + }, + { + "epoch": 0.6565946184384649, + "grad_norm": 0.8488591909408569, + "learning_rate": 4.856586573354599e-06, + "loss": 0.5695, + "step": 8931 + }, + { + "epoch": 0.6566681370386708, + "grad_norm": 0.8245639204978943, + "learning_rate": 4.856554406216114e-06, + "loss": 0.5529, + "step": 8932 + }, + { + "epoch": 0.6567416556388767, + "grad_norm": 0.8140820860862732, + "learning_rate": 4.856522235577096e-06, + "loss": 0.5569, + "step": 8933 + }, + { + "epoch": 0.6568151742390825, + "grad_norm": 0.8556779623031616, + "learning_rate": 4.856490061437594e-06, + "loss": 0.5628, + "step": 8934 + }, + { + "epoch": 0.6568886928392883, + "grad_norm": 0.8703430891036987, + "learning_rate": 4.856457883797653e-06, + "loss": 0.544, + "step": 8935 + }, + { + "epoch": 0.6569622114394942, + "grad_norm": 0.8561381697654724, + "learning_rate": 4.856425702657325e-06, + "loss": 0.576, + "step": 8936 + }, + { + "epoch": 0.6570357300397001, + "grad_norm": 0.8403781056404114, + "learning_rate": 4.8563935180166546e-06, + "loss": 0.5224, + "step": 8937 + }, + { + "epoch": 0.6571092486399059, + "grad_norm": 0.8369473814964294, + "learning_rate": 4.856361329875691e-06, + "loss": 0.526, + "step": 8938 + }, + { + "epoch": 0.6571827672401117, + "grad_norm": 0.841766357421875, + "learning_rate": 4.856329138234481e-06, + "loss": 0.5306, + "step": 8939 + }, + { + "epoch": 0.6572562858403176, + "grad_norm": 0.8212500214576721, + "learning_rate": 4.856296943093074e-06, + "loss": 0.5148, + "step": 8940 + }, + { + "epoch": 0.6573298044405235, + "grad_norm": 0.8175801634788513, + "learning_rate": 4.8562647444515156e-06, + "loss": 0.5267, + "step": 8941 + }, + { + "epoch": 0.6574033230407293, + "grad_norm": 0.8222749829292297, + "learning_rate": 4.856232542309856e-06, + "loss": 0.5608, + "step": 8942 + }, + { + "epoch": 0.6574768416409351, + "grad_norm": 0.8315678834915161, + "learning_rate": 4.856200336668142e-06, + "loss": 0.5222, + "step": 8943 + }, + { + "epoch": 0.657550360241141, + "grad_norm": 0.7956432700157166, + "learning_rate": 4.856168127526421e-06, + "loss": 0.5411, + "step": 8944 + }, + { + "epoch": 0.6576238788413469, + "grad_norm": 0.8198517560958862, + "learning_rate": 4.856135914884742e-06, + "loss": 0.5363, + "step": 8945 + }, + { + "epoch": 0.6576973974415528, + "grad_norm": 0.8368695378303528, + "learning_rate": 4.856103698743151e-06, + "loss": 0.5429, + "step": 8946 + }, + { + "epoch": 0.6577709160417585, + "grad_norm": 0.7812391519546509, + "learning_rate": 4.856071479101698e-06, + "loss": 0.5222, + "step": 8947 + }, + { + "epoch": 0.6578444346419644, + "grad_norm": 0.8209939002990723, + "learning_rate": 4.856039255960429e-06, + "loss": 0.5392, + "step": 8948 + }, + { + "epoch": 0.6579179532421703, + "grad_norm": 0.8143665790557861, + "learning_rate": 4.856007029319393e-06, + "loss": 0.5607, + "step": 8949 + }, + { + "epoch": 0.6579914718423762, + "grad_norm": 0.7912455797195435, + "learning_rate": 4.855974799178638e-06, + "loss": 0.5036, + "step": 8950 + }, + { + "epoch": 0.6580649904425819, + "grad_norm": 0.8084872961044312, + "learning_rate": 4.855942565538211e-06, + "loss": 0.5409, + "step": 8951 + }, + { + "epoch": 0.6581385090427878, + "grad_norm": 0.8297713994979858, + "learning_rate": 4.85591032839816e-06, + "loss": 0.5471, + "step": 8952 + }, + { + "epoch": 0.6582120276429937, + "grad_norm": 0.7937334775924683, + "learning_rate": 4.855878087758534e-06, + "loss": 0.5229, + "step": 8953 + }, + { + "epoch": 0.6582855462431996, + "grad_norm": 0.8610386252403259, + "learning_rate": 4.85584584361938e-06, + "loss": 0.5509, + "step": 8954 + }, + { + "epoch": 0.6583590648434053, + "grad_norm": 0.7798750400543213, + "learning_rate": 4.855813595980746e-06, + "loss": 0.5161, + "step": 8955 + }, + { + "epoch": 0.6584325834436112, + "grad_norm": 0.8667789101600647, + "learning_rate": 4.8557813448426794e-06, + "loss": 0.5345, + "step": 8956 + }, + { + "epoch": 0.6585061020438171, + "grad_norm": 0.8450369834899902, + "learning_rate": 4.855749090205229e-06, + "loss": 0.5748, + "step": 8957 + }, + { + "epoch": 0.658579620644023, + "grad_norm": 0.7622597217559814, + "learning_rate": 4.855716832068442e-06, + "loss": 0.4679, + "step": 8958 + }, + { + "epoch": 0.6586531392442287, + "grad_norm": 0.8543587327003479, + "learning_rate": 4.855684570432368e-06, + "loss": 0.555, + "step": 8959 + }, + { + "epoch": 0.6587266578444346, + "grad_norm": 0.8272247910499573, + "learning_rate": 4.855652305297052e-06, + "loss": 0.5619, + "step": 8960 + }, + { + "epoch": 0.6588001764446405, + "grad_norm": 0.8596062660217285, + "learning_rate": 4.855620036662544e-06, + "loss": 0.5195, + "step": 8961 + }, + { + "epoch": 0.6588736950448464, + "grad_norm": 0.8071039319038391, + "learning_rate": 4.8555877645288906e-06, + "loss": 0.5237, + "step": 8962 + }, + { + "epoch": 0.6589472136450522, + "grad_norm": 0.8101856112480164, + "learning_rate": 4.855555488896142e-06, + "loss": 0.5376, + "step": 8963 + }, + { + "epoch": 0.659020732245258, + "grad_norm": 0.9086818695068359, + "learning_rate": 4.855523209764343e-06, + "loss": 0.574, + "step": 8964 + }, + { + "epoch": 0.6590942508454639, + "grad_norm": 0.8349906802177429, + "learning_rate": 4.855490927133545e-06, + "loss": 0.5387, + "step": 8965 + }, + { + "epoch": 0.6591677694456698, + "grad_norm": 0.8071803450584412, + "learning_rate": 4.855458641003794e-06, + "loss": 0.5274, + "step": 8966 + }, + { + "epoch": 0.6592412880458756, + "grad_norm": 0.818449854850769, + "learning_rate": 4.855426351375137e-06, + "loss": 0.5595, + "step": 8967 + }, + { + "epoch": 0.6593148066460814, + "grad_norm": 0.8348501324653625, + "learning_rate": 4.855394058247624e-06, + "loss": 0.5427, + "step": 8968 + }, + { + "epoch": 0.6593883252462873, + "grad_norm": 0.8247905373573303, + "learning_rate": 4.855361761621302e-06, + "loss": 0.5211, + "step": 8969 + }, + { + "epoch": 0.6594618438464932, + "grad_norm": 0.8211542367935181, + "learning_rate": 4.855329461496219e-06, + "loss": 0.5472, + "step": 8970 + }, + { + "epoch": 0.659535362446699, + "grad_norm": 0.8276337385177612, + "learning_rate": 4.855297157872422e-06, + "loss": 0.5474, + "step": 8971 + }, + { + "epoch": 0.6596088810469048, + "grad_norm": 0.8412843346595764, + "learning_rate": 4.855264850749962e-06, + "loss": 0.5582, + "step": 8972 + }, + { + "epoch": 0.6596823996471107, + "grad_norm": 0.8351640701293945, + "learning_rate": 4.855232540128884e-06, + "loss": 0.5349, + "step": 8973 + }, + { + "epoch": 0.6597559182473166, + "grad_norm": 0.8559253811836243, + "learning_rate": 4.855200226009237e-06, + "loss": 0.5569, + "step": 8974 + }, + { + "epoch": 0.6598294368475224, + "grad_norm": 0.8403255343437195, + "learning_rate": 4.85516790839107e-06, + "loss": 0.5266, + "step": 8975 + }, + { + "epoch": 0.6599029554477283, + "grad_norm": 0.8537321090698242, + "learning_rate": 4.8551355872744284e-06, + "loss": 0.5699, + "step": 8976 + }, + { + "epoch": 0.6599764740479341, + "grad_norm": 0.8131974935531616, + "learning_rate": 4.855103262659363e-06, + "loss": 0.5483, + "step": 8977 + }, + { + "epoch": 0.66004999264814, + "grad_norm": 0.7694261074066162, + "learning_rate": 4.8550709345459205e-06, + "loss": 0.5135, + "step": 8978 + }, + { + "epoch": 0.6601235112483458, + "grad_norm": 0.8555014729499817, + "learning_rate": 4.855038602934149e-06, + "loss": 0.5379, + "step": 8979 + }, + { + "epoch": 0.6601970298485517, + "grad_norm": 0.8655683994293213, + "learning_rate": 4.855006267824096e-06, + "loss": 0.5558, + "step": 8980 + }, + { + "epoch": 0.6602705484487575, + "grad_norm": 0.8311758637428284, + "learning_rate": 4.85497392921581e-06, + "loss": 0.5339, + "step": 8981 + }, + { + "epoch": 0.6603440670489634, + "grad_norm": 0.821744441986084, + "learning_rate": 4.854941587109341e-06, + "loss": 0.5206, + "step": 8982 + }, + { + "epoch": 0.6604175856491692, + "grad_norm": 0.9271090626716614, + "learning_rate": 4.854909241504734e-06, + "loss": 0.5768, + "step": 8983 + }, + { + "epoch": 0.6604911042493751, + "grad_norm": 0.836298406124115, + "learning_rate": 4.854876892402039e-06, + "loss": 0.5736, + "step": 8984 + }, + { + "epoch": 0.660564622849581, + "grad_norm": 0.8032099008560181, + "learning_rate": 4.854844539801302e-06, + "loss": 0.5788, + "step": 8985 + }, + { + "epoch": 0.6606381414497868, + "grad_norm": 0.801318347454071, + "learning_rate": 4.854812183702573e-06, + "loss": 0.5377, + "step": 8986 + }, + { + "epoch": 0.6607116600499926, + "grad_norm": 0.8293899893760681, + "learning_rate": 4.8547798241059e-06, + "loss": 0.5167, + "step": 8987 + }, + { + "epoch": 0.6607851786501985, + "grad_norm": 0.8379459381103516, + "learning_rate": 4.8547474610113295e-06, + "loss": 0.5601, + "step": 8988 + }, + { + "epoch": 0.6608586972504044, + "grad_norm": 0.9258455634117126, + "learning_rate": 4.854715094418911e-06, + "loss": 0.6025, + "step": 8989 + }, + { + "epoch": 0.6609322158506102, + "grad_norm": 0.8897093534469604, + "learning_rate": 4.854682724328692e-06, + "loss": 0.5493, + "step": 8990 + }, + { + "epoch": 0.661005734450816, + "grad_norm": 0.8167009949684143, + "learning_rate": 4.854650350740722e-06, + "loss": 0.5332, + "step": 8991 + }, + { + "epoch": 0.6610792530510219, + "grad_norm": 0.8330564498901367, + "learning_rate": 4.854617973655046e-06, + "loss": 0.5424, + "step": 8992 + }, + { + "epoch": 0.6611527716512278, + "grad_norm": 0.8407062888145447, + "learning_rate": 4.854585593071715e-06, + "loss": 0.5628, + "step": 8993 + }, + { + "epoch": 0.6612262902514336, + "grad_norm": 0.8854224681854248, + "learning_rate": 4.854553208990776e-06, + "loss": 0.5371, + "step": 8994 + }, + { + "epoch": 0.6612998088516394, + "grad_norm": 0.8614640235900879, + "learning_rate": 4.854520821412277e-06, + "loss": 0.5416, + "step": 8995 + }, + { + "epoch": 0.6613733274518453, + "grad_norm": 0.8108211755752563, + "learning_rate": 4.854488430336266e-06, + "loss": 0.5501, + "step": 8996 + }, + { + "epoch": 0.6614468460520512, + "grad_norm": 0.8333083391189575, + "learning_rate": 4.854456035762792e-06, + "loss": 0.5325, + "step": 8997 + }, + { + "epoch": 0.661520364652257, + "grad_norm": 0.8428142070770264, + "learning_rate": 4.854423637691902e-06, + "loss": 0.561, + "step": 8998 + }, + { + "epoch": 0.6615938832524628, + "grad_norm": 0.804810643196106, + "learning_rate": 4.854391236123644e-06, + "loss": 0.5266, + "step": 8999 + }, + { + "epoch": 0.6616674018526687, + "grad_norm": 0.8324419260025024, + "learning_rate": 4.854358831058068e-06, + "loss": 0.4718, + "step": 9000 + }, + { + "epoch": 0.6617409204528746, + "grad_norm": 0.8645460605621338, + "learning_rate": 4.85432642249522e-06, + "loss": 0.5228, + "step": 9001 + }, + { + "epoch": 0.6618144390530805, + "grad_norm": 0.8766934275627136, + "learning_rate": 4.85429401043515e-06, + "loss": 0.5386, + "step": 9002 + }, + { + "epoch": 0.6618879576532862, + "grad_norm": 0.8051814436912537, + "learning_rate": 4.854261594877904e-06, + "loss": 0.483, + "step": 9003 + }, + { + "epoch": 0.6619614762534921, + "grad_norm": 0.833672046661377, + "learning_rate": 4.8542291758235325e-06, + "loss": 0.5388, + "step": 9004 + }, + { + "epoch": 0.662034994853698, + "grad_norm": 0.8052542209625244, + "learning_rate": 4.854196753272081e-06, + "loss": 0.5217, + "step": 9005 + }, + { + "epoch": 0.6621085134539039, + "grad_norm": 0.811485230922699, + "learning_rate": 4.8541643272236005e-06, + "loss": 0.554, + "step": 9006 + }, + { + "epoch": 0.6621820320541096, + "grad_norm": 0.7956570982933044, + "learning_rate": 4.854131897678137e-06, + "loss": 0.4823, + "step": 9007 + }, + { + "epoch": 0.6622555506543155, + "grad_norm": 0.8687226176261902, + "learning_rate": 4.85409946463574e-06, + "loss": 0.5447, + "step": 9008 + }, + { + "epoch": 0.6623290692545214, + "grad_norm": 0.8099907040596008, + "learning_rate": 4.854067028096456e-06, + "loss": 0.5246, + "step": 9009 + }, + { + "epoch": 0.6624025878547273, + "grad_norm": 0.8517974615097046, + "learning_rate": 4.854034588060336e-06, + "loss": 0.5181, + "step": 9010 + }, + { + "epoch": 0.662476106454933, + "grad_norm": 0.8375890254974365, + "learning_rate": 4.854002144527425e-06, + "loss": 0.5704, + "step": 9011 + }, + { + "epoch": 0.6625496250551389, + "grad_norm": 0.8909435868263245, + "learning_rate": 4.853969697497774e-06, + "loss": 0.6044, + "step": 9012 + }, + { + "epoch": 0.6626231436553448, + "grad_norm": 0.8141803741455078, + "learning_rate": 4.85393724697143e-06, + "loss": 0.4812, + "step": 9013 + }, + { + "epoch": 0.6626966622555507, + "grad_norm": 0.8398885130882263, + "learning_rate": 4.8539047929484396e-06, + "loss": 0.5739, + "step": 9014 + }, + { + "epoch": 0.6627701808557565, + "grad_norm": 0.8474835157394409, + "learning_rate": 4.853872335428854e-06, + "loss": 0.566, + "step": 9015 + }, + { + "epoch": 0.6628436994559623, + "grad_norm": 0.7968475818634033, + "learning_rate": 4.853839874412719e-06, + "loss": 0.5309, + "step": 9016 + }, + { + "epoch": 0.6629172180561682, + "grad_norm": 0.850845217704773, + "learning_rate": 4.853807409900084e-06, + "loss": 0.5483, + "step": 9017 + }, + { + "epoch": 0.6629907366563741, + "grad_norm": 0.8599121570587158, + "learning_rate": 4.853774941890997e-06, + "loss": 0.5736, + "step": 9018 + }, + { + "epoch": 0.6630642552565799, + "grad_norm": 0.9102243781089783, + "learning_rate": 4.853742470385507e-06, + "loss": 0.5403, + "step": 9019 + }, + { + "epoch": 0.6631377738567857, + "grad_norm": 0.8310854434967041, + "learning_rate": 4.85370999538366e-06, + "loss": 0.5481, + "step": 9020 + }, + { + "epoch": 0.6632112924569916, + "grad_norm": 0.8096431493759155, + "learning_rate": 4.853677516885507e-06, + "loss": 0.4986, + "step": 9021 + }, + { + "epoch": 0.6632848110571975, + "grad_norm": 0.8725870251655579, + "learning_rate": 4.853645034891094e-06, + "loss": 0.5652, + "step": 9022 + }, + { + "epoch": 0.6633583296574034, + "grad_norm": 0.8750386834144592, + "learning_rate": 4.8536125494004705e-06, + "loss": 0.6046, + "step": 9023 + }, + { + "epoch": 0.6634318482576091, + "grad_norm": 0.8177233338356018, + "learning_rate": 4.853580060413685e-06, + "loss": 0.5022, + "step": 9024 + }, + { + "epoch": 0.663505366857815, + "grad_norm": 0.7951782941818237, + "learning_rate": 4.853547567930784e-06, + "loss": 0.5343, + "step": 9025 + }, + { + "epoch": 0.6635788854580209, + "grad_norm": 0.8327028751373291, + "learning_rate": 4.853515071951817e-06, + "loss": 0.5153, + "step": 9026 + }, + { + "epoch": 0.6636524040582268, + "grad_norm": 0.8537576794624329, + "learning_rate": 4.853482572476834e-06, + "loss": 0.564, + "step": 9027 + }, + { + "epoch": 0.6637259226584326, + "grad_norm": 0.8446356654167175, + "learning_rate": 4.85345006950588e-06, + "loss": 0.5747, + "step": 9028 + }, + { + "epoch": 0.6637994412586384, + "grad_norm": 0.8611159324645996, + "learning_rate": 4.853417563039005e-06, + "loss": 0.5726, + "step": 9029 + }, + { + "epoch": 0.6638729598588443, + "grad_norm": 0.8770732879638672, + "learning_rate": 4.853385053076257e-06, + "loss": 0.5683, + "step": 9030 + }, + { + "epoch": 0.6639464784590502, + "grad_norm": 0.8390340209007263, + "learning_rate": 4.853352539617685e-06, + "loss": 0.513, + "step": 9031 + }, + { + "epoch": 0.664019997059256, + "grad_norm": 0.8628026843070984, + "learning_rate": 4.853320022663337e-06, + "loss": 0.5836, + "step": 9032 + }, + { + "epoch": 0.6640935156594618, + "grad_norm": 0.8058002591133118, + "learning_rate": 4.8532875022132596e-06, + "loss": 0.5192, + "step": 9033 + }, + { + "epoch": 0.6641670342596677, + "grad_norm": 0.8584407567977905, + "learning_rate": 4.853254978267503e-06, + "loss": 0.5361, + "step": 9034 + }, + { + "epoch": 0.6642405528598736, + "grad_norm": 0.8758600354194641, + "learning_rate": 4.853222450826116e-06, + "loss": 0.5797, + "step": 9035 + }, + { + "epoch": 0.6643140714600794, + "grad_norm": 0.8641175031661987, + "learning_rate": 4.853189919889145e-06, + "loss": 0.5443, + "step": 9036 + }, + { + "epoch": 0.6643875900602852, + "grad_norm": 0.8404555916786194, + "learning_rate": 4.853157385456639e-06, + "loss": 0.5679, + "step": 9037 + }, + { + "epoch": 0.6644611086604911, + "grad_norm": 0.8182904720306396, + "learning_rate": 4.853124847528647e-06, + "loss": 0.5207, + "step": 9038 + }, + { + "epoch": 0.664534627260697, + "grad_norm": 0.8310748934745789, + "learning_rate": 4.853092306105217e-06, + "loss": 0.4805, + "step": 9039 + }, + { + "epoch": 0.6646081458609028, + "grad_norm": 0.8498830199241638, + "learning_rate": 4.853059761186396e-06, + "loss": 0.5632, + "step": 9040 + }, + { + "epoch": 0.6646816644611087, + "grad_norm": 0.7929077744483948, + "learning_rate": 4.853027212772235e-06, + "loss": 0.5351, + "step": 9041 + }, + { + "epoch": 0.6647551830613145, + "grad_norm": 0.8341896533966064, + "learning_rate": 4.852994660862781e-06, + "loss": 0.5478, + "step": 9042 + }, + { + "epoch": 0.6648287016615204, + "grad_norm": 0.8099652528762817, + "learning_rate": 4.852962105458081e-06, + "loss": 0.5458, + "step": 9043 + }, + { + "epoch": 0.6649022202617262, + "grad_norm": 0.8708862066268921, + "learning_rate": 4.852929546558186e-06, + "loss": 0.5187, + "step": 9044 + }, + { + "epoch": 0.6649757388619321, + "grad_norm": 0.8260290026664734, + "learning_rate": 4.852896984163143e-06, + "loss": 0.5439, + "step": 9045 + }, + { + "epoch": 0.6650492574621379, + "grad_norm": 0.8463752865791321, + "learning_rate": 4.852864418272999e-06, + "loss": 0.492, + "step": 9046 + }, + { + "epoch": 0.6651227760623438, + "grad_norm": 0.8619441390037537, + "learning_rate": 4.852831848887805e-06, + "loss": 0.5575, + "step": 9047 + }, + { + "epoch": 0.6651962946625496, + "grad_norm": 0.8180845379829407, + "learning_rate": 4.852799276007607e-06, + "loss": 0.5468, + "step": 9048 + }, + { + "epoch": 0.6652698132627555, + "grad_norm": 0.8227505087852478, + "learning_rate": 4.852766699632455e-06, + "loss": 0.5248, + "step": 9049 + }, + { + "epoch": 0.6653433318629614, + "grad_norm": 0.849367082118988, + "learning_rate": 4.8527341197623965e-06, + "loss": 0.584, + "step": 9050 + }, + { + "epoch": 0.6654168504631672, + "grad_norm": 0.7675198912620544, + "learning_rate": 4.852701536397482e-06, + "loss": 0.5115, + "step": 9051 + }, + { + "epoch": 0.665490369063373, + "grad_norm": 0.8339236378669739, + "learning_rate": 4.852668949537757e-06, + "loss": 0.5284, + "step": 9052 + }, + { + "epoch": 0.6655638876635789, + "grad_norm": 0.839558482170105, + "learning_rate": 4.85263635918327e-06, + "loss": 0.5468, + "step": 9053 + }, + { + "epoch": 0.6656374062637848, + "grad_norm": 0.8139083981513977, + "learning_rate": 4.852603765334072e-06, + "loss": 0.4995, + "step": 9054 + }, + { + "epoch": 0.6657109248639906, + "grad_norm": 0.7993353009223938, + "learning_rate": 4.8525711679902095e-06, + "loss": 0.5325, + "step": 9055 + }, + { + "epoch": 0.6657844434641964, + "grad_norm": 0.8024827837944031, + "learning_rate": 4.852538567151732e-06, + "loss": 0.5031, + "step": 9056 + }, + { + "epoch": 0.6658579620644023, + "grad_norm": 0.854992151260376, + "learning_rate": 4.852505962818687e-06, + "loss": 0.5672, + "step": 9057 + }, + { + "epoch": 0.6659314806646082, + "grad_norm": 0.8206561207771301, + "learning_rate": 4.852473354991122e-06, + "loss": 0.5002, + "step": 9058 + }, + { + "epoch": 0.666004999264814, + "grad_norm": 0.8092232346534729, + "learning_rate": 4.852440743669088e-06, + "loss": 0.518, + "step": 9059 + }, + { + "epoch": 0.6660785178650198, + "grad_norm": 0.83565354347229, + "learning_rate": 4.852408128852631e-06, + "loss": 0.5325, + "step": 9060 + }, + { + "epoch": 0.6661520364652257, + "grad_norm": 0.8547849059104919, + "learning_rate": 4.852375510541802e-06, + "loss": 0.5888, + "step": 9061 + }, + { + "epoch": 0.6662255550654316, + "grad_norm": 0.7752193212509155, + "learning_rate": 4.8523428887366474e-06, + "loss": 0.4739, + "step": 9062 + }, + { + "epoch": 0.6662990736656375, + "grad_norm": 0.9147217869758606, + "learning_rate": 4.852310263437215e-06, + "loss": 0.6051, + "step": 9063 + }, + { + "epoch": 0.6663725922658432, + "grad_norm": 0.8530852794647217, + "learning_rate": 4.852277634643556e-06, + "loss": 0.58, + "step": 9064 + }, + { + "epoch": 0.6664461108660491, + "grad_norm": 0.8151810765266418, + "learning_rate": 4.852245002355718e-06, + "loss": 0.5578, + "step": 9065 + }, + { + "epoch": 0.666519629466255, + "grad_norm": 0.8050547242164612, + "learning_rate": 4.852212366573747e-06, + "loss": 0.5298, + "step": 9066 + }, + { + "epoch": 0.6665931480664609, + "grad_norm": 0.8385896682739258, + "learning_rate": 4.8521797272976945e-06, + "loss": 0.5274, + "step": 9067 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.8030223846435547, + "learning_rate": 4.852147084527607e-06, + "loss": 0.5422, + "step": 9068 + }, + { + "epoch": 0.6667401852668725, + "grad_norm": 0.8809860348701477, + "learning_rate": 4.8521144382635346e-06, + "loss": 0.5386, + "step": 9069 + }, + { + "epoch": 0.6668137038670784, + "grad_norm": 0.8129428029060364, + "learning_rate": 4.852081788505525e-06, + "loss": 0.5429, + "step": 9070 + }, + { + "epoch": 0.6668872224672843, + "grad_norm": 0.8155189752578735, + "learning_rate": 4.852049135253626e-06, + "loss": 0.5212, + "step": 9071 + }, + { + "epoch": 0.66696074106749, + "grad_norm": 0.8141470551490784, + "learning_rate": 4.852016478507887e-06, + "loss": 0.5294, + "step": 9072 + }, + { + "epoch": 0.6670342596676959, + "grad_norm": 0.8631333708763123, + "learning_rate": 4.851983818268357e-06, + "loss": 0.5295, + "step": 9073 + }, + { + "epoch": 0.6671077782679018, + "grad_norm": 0.8573809862136841, + "learning_rate": 4.8519511545350825e-06, + "loss": 0.5466, + "step": 9074 + }, + { + "epoch": 0.6671812968681077, + "grad_norm": 0.8154825568199158, + "learning_rate": 4.851918487308115e-06, + "loss": 0.4634, + "step": 9075 + }, + { + "epoch": 0.6672548154683134, + "grad_norm": 0.8370974063873291, + "learning_rate": 4.851885816587501e-06, + "loss": 0.5143, + "step": 9076 + }, + { + "epoch": 0.6673283340685193, + "grad_norm": 0.8263599276542664, + "learning_rate": 4.851853142373289e-06, + "loss": 0.53, + "step": 9077 + }, + { + "epoch": 0.6674018526687252, + "grad_norm": 0.8398852348327637, + "learning_rate": 4.851820464665527e-06, + "loss": 0.5444, + "step": 9078 + }, + { + "epoch": 0.6674753712689311, + "grad_norm": 0.8420519232749939, + "learning_rate": 4.851787783464265e-06, + "loss": 0.5652, + "step": 9079 + }, + { + "epoch": 0.6675488898691369, + "grad_norm": 0.837102472782135, + "learning_rate": 4.851755098769552e-06, + "loss": 0.5789, + "step": 9080 + }, + { + "epoch": 0.6676224084693427, + "grad_norm": 0.8960004448890686, + "learning_rate": 4.851722410581434e-06, + "loss": 0.5652, + "step": 9081 + }, + { + "epoch": 0.6676959270695486, + "grad_norm": 0.7965373396873474, + "learning_rate": 4.851689718899962e-06, + "loss": 0.5335, + "step": 9082 + }, + { + "epoch": 0.6677694456697545, + "grad_norm": 0.8256140947341919, + "learning_rate": 4.851657023725185e-06, + "loss": 0.5375, + "step": 9083 + }, + { + "epoch": 0.6678429642699603, + "grad_norm": 0.8465526103973389, + "learning_rate": 4.851624325057148e-06, + "loss": 0.5272, + "step": 9084 + }, + { + "epoch": 0.6679164828701661, + "grad_norm": 0.84050053358078, + "learning_rate": 4.851591622895904e-06, + "loss": 0.5805, + "step": 9085 + }, + { + "epoch": 0.667990001470372, + "grad_norm": 0.8876664042472839, + "learning_rate": 4.851558917241498e-06, + "loss": 0.5392, + "step": 9086 + }, + { + "epoch": 0.6680635200705779, + "grad_norm": 0.9069114923477173, + "learning_rate": 4.85152620809398e-06, + "loss": 0.5526, + "step": 9087 + }, + { + "epoch": 0.6681370386707837, + "grad_norm": 0.8820304870605469, + "learning_rate": 4.851493495453399e-06, + "loss": 0.534, + "step": 9088 + }, + { + "epoch": 0.6682105572709895, + "grad_norm": 0.8482901453971863, + "learning_rate": 4.851460779319803e-06, + "loss": 0.56, + "step": 9089 + }, + { + "epoch": 0.6682840758711954, + "grad_norm": 0.8349204659461975, + "learning_rate": 4.851428059693241e-06, + "loss": 0.5187, + "step": 9090 + }, + { + "epoch": 0.6683575944714013, + "grad_norm": 0.8278661966323853, + "learning_rate": 4.851395336573761e-06, + "loss": 0.4943, + "step": 9091 + }, + { + "epoch": 0.6684311130716071, + "grad_norm": 0.875688374042511, + "learning_rate": 4.851362609961412e-06, + "loss": 0.5773, + "step": 9092 + }, + { + "epoch": 0.668504631671813, + "grad_norm": 0.8705455660820007, + "learning_rate": 4.851329879856244e-06, + "loss": 0.5218, + "step": 9093 + }, + { + "epoch": 0.6685781502720188, + "grad_norm": 0.8555766940116882, + "learning_rate": 4.851297146258303e-06, + "loss": 0.5594, + "step": 9094 + }, + { + "epoch": 0.6686516688722247, + "grad_norm": 0.8314375281333923, + "learning_rate": 4.851264409167638e-06, + "loss": 0.5483, + "step": 9095 + }, + { + "epoch": 0.6687251874724305, + "grad_norm": 0.8231747150421143, + "learning_rate": 4.8512316685843e-06, + "loss": 0.486, + "step": 9096 + }, + { + "epoch": 0.6687987060726364, + "grad_norm": 0.8333579897880554, + "learning_rate": 4.851198924508334e-06, + "loss": 0.5339, + "step": 9097 + }, + { + "epoch": 0.6688722246728422, + "grad_norm": 0.8135605454444885, + "learning_rate": 4.851166176939794e-06, + "loss": 0.5485, + "step": 9098 + }, + { + "epoch": 0.6689457432730481, + "grad_norm": 0.857556164264679, + "learning_rate": 4.851133425878723e-06, + "loss": 0.5275, + "step": 9099 + }, + { + "epoch": 0.6690192618732539, + "grad_norm": 0.8586294651031494, + "learning_rate": 4.851100671325172e-06, + "loss": 0.5924, + "step": 9100 + }, + { + "epoch": 0.6690927804734598, + "grad_norm": 0.8556596040725708, + "learning_rate": 4.8510679132791905e-06, + "loss": 0.5384, + "step": 9101 + }, + { + "epoch": 0.6691662990736656, + "grad_norm": 0.8493860363960266, + "learning_rate": 4.851035151740826e-06, + "loss": 0.4948, + "step": 9102 + }, + { + "epoch": 0.6692398176738715, + "grad_norm": 0.8609496355056763, + "learning_rate": 4.851002386710128e-06, + "loss": 0.5544, + "step": 9103 + }, + { + "epoch": 0.6693133362740773, + "grad_norm": 0.8930354118347168, + "learning_rate": 4.850969618187145e-06, + "loss": 0.5557, + "step": 9104 + }, + { + "epoch": 0.6693868548742832, + "grad_norm": 0.8199371695518494, + "learning_rate": 4.8509368461719244e-06, + "loss": 0.5528, + "step": 9105 + }, + { + "epoch": 0.6694603734744891, + "grad_norm": 0.8209179639816284, + "learning_rate": 4.850904070664516e-06, + "loss": 0.5454, + "step": 9106 + }, + { + "epoch": 0.6695338920746949, + "grad_norm": 0.865646243095398, + "learning_rate": 4.850871291664969e-06, + "loss": 0.604, + "step": 9107 + }, + { + "epoch": 0.6696074106749007, + "grad_norm": 0.8696723580360413, + "learning_rate": 4.850838509173331e-06, + "loss": 0.5442, + "step": 9108 + }, + { + "epoch": 0.6696809292751066, + "grad_norm": 0.9047670364379883, + "learning_rate": 4.8508057231896514e-06, + "loss": 0.5943, + "step": 9109 + }, + { + "epoch": 0.6697544478753125, + "grad_norm": 0.8122002482414246, + "learning_rate": 4.850772933713978e-06, + "loss": 0.5054, + "step": 9110 + }, + { + "epoch": 0.6698279664755183, + "grad_norm": 0.8470021486282349, + "learning_rate": 4.85074014074636e-06, + "loss": 0.5355, + "step": 9111 + }, + { + "epoch": 0.6699014850757241, + "grad_norm": 0.894482433795929, + "learning_rate": 4.850707344286847e-06, + "loss": 0.5617, + "step": 9112 + }, + { + "epoch": 0.66997500367593, + "grad_norm": 0.8248872756958008, + "learning_rate": 4.850674544335486e-06, + "loss": 0.5267, + "step": 9113 + }, + { + "epoch": 0.6700485222761359, + "grad_norm": 0.897673487663269, + "learning_rate": 4.850641740892328e-06, + "loss": 0.5587, + "step": 9114 + }, + { + "epoch": 0.6701220408763418, + "grad_norm": 0.9307486414909363, + "learning_rate": 4.850608933957419e-06, + "loss": 0.6235, + "step": 9115 + }, + { + "epoch": 0.6701955594765475, + "grad_norm": 0.8178768754005432, + "learning_rate": 4.85057612353081e-06, + "loss": 0.4914, + "step": 9116 + }, + { + "epoch": 0.6702690780767534, + "grad_norm": 0.897017776966095, + "learning_rate": 4.8505433096125495e-06, + "loss": 0.5437, + "step": 9117 + }, + { + "epoch": 0.6703425966769593, + "grad_norm": 0.8540234565734863, + "learning_rate": 4.850510492202685e-06, + "loss": 0.5538, + "step": 9118 + }, + { + "epoch": 0.6704161152771652, + "grad_norm": 0.8174393177032471, + "learning_rate": 4.8504776713012655e-06, + "loss": 0.5289, + "step": 9119 + }, + { + "epoch": 0.6704896338773709, + "grad_norm": 0.8099195957183838, + "learning_rate": 4.85044484690834e-06, + "loss": 0.4942, + "step": 9120 + }, + { + "epoch": 0.6705631524775768, + "grad_norm": 0.8042497038841248, + "learning_rate": 4.850412019023958e-06, + "loss": 0.5281, + "step": 9121 + }, + { + "epoch": 0.6706366710777827, + "grad_norm": 0.8809049725532532, + "learning_rate": 4.8503791876481676e-06, + "loss": 0.5763, + "step": 9122 + }, + { + "epoch": 0.6707101896779886, + "grad_norm": 0.8411110639572144, + "learning_rate": 4.850346352781017e-06, + "loss": 0.5174, + "step": 9123 + }, + { + "epoch": 0.6707837082781943, + "grad_norm": 0.8236972093582153, + "learning_rate": 4.8503135144225564e-06, + "loss": 0.5652, + "step": 9124 + }, + { + "epoch": 0.6708572268784002, + "grad_norm": 0.9499660730361938, + "learning_rate": 4.850280672572833e-06, + "loss": 0.5985, + "step": 9125 + }, + { + "epoch": 0.6709307454786061, + "grad_norm": 0.8741812109947205, + "learning_rate": 4.850247827231898e-06, + "loss": 0.5249, + "step": 9126 + }, + { + "epoch": 0.671004264078812, + "grad_norm": 0.7902818918228149, + "learning_rate": 4.850214978399797e-06, + "loss": 0.4985, + "step": 9127 + }, + { + "epoch": 0.6710777826790177, + "grad_norm": 0.8680961728096008, + "learning_rate": 4.850182126076581e-06, + "loss": 0.5266, + "step": 9128 + }, + { + "epoch": 0.6711513012792236, + "grad_norm": 0.8626134395599365, + "learning_rate": 4.850149270262297e-06, + "loss": 0.5812, + "step": 9129 + }, + { + "epoch": 0.6712248198794295, + "grad_norm": 0.9133071899414062, + "learning_rate": 4.850116410956996e-06, + "loss": 0.5526, + "step": 9130 + }, + { + "epoch": 0.6712983384796354, + "grad_norm": 0.854257345199585, + "learning_rate": 4.850083548160726e-06, + "loss": 0.5833, + "step": 9131 + }, + { + "epoch": 0.6713718570798412, + "grad_norm": 0.7948493957519531, + "learning_rate": 4.8500506818735346e-06, + "loss": 0.5332, + "step": 9132 + }, + { + "epoch": 0.671445375680047, + "grad_norm": 0.8740541338920593, + "learning_rate": 4.8500178120954724e-06, + "loss": 0.5952, + "step": 9133 + }, + { + "epoch": 0.6715188942802529, + "grad_norm": 0.8392583131790161, + "learning_rate": 4.849984938826587e-06, + "loss": 0.5546, + "step": 9134 + }, + { + "epoch": 0.6715924128804588, + "grad_norm": 0.8301974534988403, + "learning_rate": 4.8499520620669276e-06, + "loss": 0.5112, + "step": 9135 + }, + { + "epoch": 0.6716659314806646, + "grad_norm": 0.781586766242981, + "learning_rate": 4.849919181816544e-06, + "loss": 0.5429, + "step": 9136 + }, + { + "epoch": 0.6717394500808704, + "grad_norm": 0.8199139833450317, + "learning_rate": 4.8498862980754825e-06, + "loss": 0.5271, + "step": 9137 + }, + { + "epoch": 0.6718129686810763, + "grad_norm": 0.8715881705284119, + "learning_rate": 4.849853410843795e-06, + "loss": 0.5651, + "step": 9138 + }, + { + "epoch": 0.6718864872812822, + "grad_norm": 0.820678174495697, + "learning_rate": 4.8498205201215285e-06, + "loss": 0.499, + "step": 9139 + }, + { + "epoch": 0.671960005881488, + "grad_norm": 0.8278282284736633, + "learning_rate": 4.849787625908732e-06, + "loss": 0.5242, + "step": 9140 + }, + { + "epoch": 0.6720335244816938, + "grad_norm": 0.9092677235603333, + "learning_rate": 4.8497547282054544e-06, + "loss": 0.5507, + "step": 9141 + }, + { + "epoch": 0.6721070430818997, + "grad_norm": 0.7895093560218811, + "learning_rate": 4.849721827011746e-06, + "loss": 0.5139, + "step": 9142 + }, + { + "epoch": 0.6721805616821056, + "grad_norm": 0.8310298323631287, + "learning_rate": 4.849688922327653e-06, + "loss": 0.5635, + "step": 9143 + }, + { + "epoch": 0.6722540802823114, + "grad_norm": 0.8380215764045715, + "learning_rate": 4.849656014153227e-06, + "loss": 0.5141, + "step": 9144 + }, + { + "epoch": 0.6723275988825173, + "grad_norm": 0.8791319727897644, + "learning_rate": 4.8496231024885155e-06, + "loss": 0.5652, + "step": 9145 + }, + { + "epoch": 0.6724011174827231, + "grad_norm": 0.8756476044654846, + "learning_rate": 4.849590187333567e-06, + "loss": 0.557, + "step": 9146 + }, + { + "epoch": 0.672474636082929, + "grad_norm": 0.8485491871833801, + "learning_rate": 4.849557268688432e-06, + "loss": 0.5121, + "step": 9147 + }, + { + "epoch": 0.6725481546831348, + "grad_norm": 0.8754923939704895, + "learning_rate": 4.8495243465531565e-06, + "loss": 0.5159, + "step": 9148 + }, + { + "epoch": 0.6726216732833407, + "grad_norm": 0.7825106382369995, + "learning_rate": 4.849491420927793e-06, + "loss": 0.5103, + "step": 9149 + }, + { + "epoch": 0.6726951918835465, + "grad_norm": 0.8605152368545532, + "learning_rate": 4.849458491812388e-06, + "loss": 0.5319, + "step": 9150 + }, + { + "epoch": 0.6727687104837524, + "grad_norm": 0.8913842439651489, + "learning_rate": 4.849425559206991e-06, + "loss": 0.5416, + "step": 9151 + }, + { + "epoch": 0.6728422290839582, + "grad_norm": 0.8362066745758057, + "learning_rate": 4.8493926231116505e-06, + "loss": 0.5571, + "step": 9152 + }, + { + "epoch": 0.6729157476841641, + "grad_norm": 0.7978482246398926, + "learning_rate": 4.849359683526416e-06, + "loss": 0.5211, + "step": 9153 + }, + { + "epoch": 0.67298926628437, + "grad_norm": 0.8417165875434875, + "learning_rate": 4.849326740451337e-06, + "loss": 0.5477, + "step": 9154 + }, + { + "epoch": 0.6730627848845758, + "grad_norm": 0.7625582218170166, + "learning_rate": 4.849293793886462e-06, + "loss": 0.4805, + "step": 9155 + }, + { + "epoch": 0.6731363034847816, + "grad_norm": 0.8568168878555298, + "learning_rate": 4.849260843831839e-06, + "loss": 0.5658, + "step": 9156 + }, + { + "epoch": 0.6732098220849875, + "grad_norm": 0.8421937823295593, + "learning_rate": 4.849227890287518e-06, + "loss": 0.5397, + "step": 9157 + }, + { + "epoch": 0.6732833406851934, + "grad_norm": 0.8519315123558044, + "learning_rate": 4.849194933253547e-06, + "loss": 0.5675, + "step": 9158 + }, + { + "epoch": 0.6733568592853992, + "grad_norm": 0.8427937626838684, + "learning_rate": 4.849161972729976e-06, + "loss": 0.5411, + "step": 9159 + }, + { + "epoch": 0.673430377885605, + "grad_norm": 0.849276602268219, + "learning_rate": 4.849129008716853e-06, + "loss": 0.5946, + "step": 9160 + }, + { + "epoch": 0.6735038964858109, + "grad_norm": 0.8245733976364136, + "learning_rate": 4.849096041214228e-06, + "loss": 0.5361, + "step": 9161 + }, + { + "epoch": 0.6735774150860168, + "grad_norm": 0.8083016872406006, + "learning_rate": 4.8490630702221504e-06, + "loss": 0.5264, + "step": 9162 + }, + { + "epoch": 0.6736509336862226, + "grad_norm": 0.816003143787384, + "learning_rate": 4.849030095740667e-06, + "loss": 0.5419, + "step": 9163 + }, + { + "epoch": 0.6737244522864285, + "grad_norm": 0.840110182762146, + "learning_rate": 4.848997117769828e-06, + "loss": 0.5979, + "step": 9164 + }, + { + "epoch": 0.6737979708866343, + "grad_norm": 0.8366692662239075, + "learning_rate": 4.848964136309684e-06, + "loss": 0.5427, + "step": 9165 + }, + { + "epoch": 0.6738714894868402, + "grad_norm": 0.8301313519477844, + "learning_rate": 4.848931151360281e-06, + "loss": 0.5123, + "step": 9166 + }, + { + "epoch": 0.673945008087046, + "grad_norm": 0.8266049027442932, + "learning_rate": 4.8488981629216694e-06, + "loss": 0.5337, + "step": 9167 + }, + { + "epoch": 0.6740185266872519, + "grad_norm": 0.8428906798362732, + "learning_rate": 4.848865170993898e-06, + "loss": 0.5463, + "step": 9168 + }, + { + "epoch": 0.6740920452874577, + "grad_norm": 0.8110292553901672, + "learning_rate": 4.848832175577016e-06, + "loss": 0.5603, + "step": 9169 + }, + { + "epoch": 0.6741655638876636, + "grad_norm": 0.8491308689117432, + "learning_rate": 4.848799176671073e-06, + "loss": 0.5263, + "step": 9170 + }, + { + "epoch": 0.6742390824878695, + "grad_norm": 0.8186396360397339, + "learning_rate": 4.848766174276117e-06, + "loss": 0.5151, + "step": 9171 + }, + { + "epoch": 0.6743126010880753, + "grad_norm": 0.8031337261199951, + "learning_rate": 4.848733168392198e-06, + "loss": 0.5281, + "step": 9172 + }, + { + "epoch": 0.6743861196882811, + "grad_norm": 0.8022136688232422, + "learning_rate": 4.848700159019364e-06, + "loss": 0.5419, + "step": 9173 + }, + { + "epoch": 0.674459638288487, + "grad_norm": 0.8046041131019592, + "learning_rate": 4.848667146157665e-06, + "loss": 0.5019, + "step": 9174 + }, + { + "epoch": 0.6745331568886929, + "grad_norm": 0.7994704842567444, + "learning_rate": 4.84863412980715e-06, + "loss": 0.5055, + "step": 9175 + }, + { + "epoch": 0.6746066754888987, + "grad_norm": 0.8562270998954773, + "learning_rate": 4.848601109967867e-06, + "loss": 0.5745, + "step": 9176 + }, + { + "epoch": 0.6746801940891045, + "grad_norm": 0.8880748152732849, + "learning_rate": 4.848568086639865e-06, + "loss": 0.567, + "step": 9177 + }, + { + "epoch": 0.6747537126893104, + "grad_norm": 0.8587815165519714, + "learning_rate": 4.848535059823194e-06, + "loss": 0.4973, + "step": 9178 + }, + { + "epoch": 0.6748272312895163, + "grad_norm": 0.8373125791549683, + "learning_rate": 4.848502029517903e-06, + "loss": 0.5624, + "step": 9179 + }, + { + "epoch": 0.6749007498897222, + "grad_norm": 0.8969876170158386, + "learning_rate": 4.848468995724041e-06, + "loss": 0.5709, + "step": 9180 + }, + { + "epoch": 0.6749742684899279, + "grad_norm": 0.8173502087593079, + "learning_rate": 4.848435958441658e-06, + "loss": 0.5167, + "step": 9181 + }, + { + "epoch": 0.6750477870901338, + "grad_norm": 0.8602463603019714, + "learning_rate": 4.8484029176708e-06, + "loss": 0.5785, + "step": 9182 + }, + { + "epoch": 0.6751213056903397, + "grad_norm": 0.7973255515098572, + "learning_rate": 4.848369873411519e-06, + "loss": 0.5343, + "step": 9183 + }, + { + "epoch": 0.6751948242905456, + "grad_norm": 0.7849963307380676, + "learning_rate": 4.848336825663863e-06, + "loss": 0.5229, + "step": 9184 + }, + { + "epoch": 0.6752683428907513, + "grad_norm": 0.8787675499916077, + "learning_rate": 4.8483037744278805e-06, + "loss": 0.5671, + "step": 9185 + }, + { + "epoch": 0.6753418614909572, + "grad_norm": 0.8199341893196106, + "learning_rate": 4.848270719703623e-06, + "loss": 0.5254, + "step": 9186 + }, + { + "epoch": 0.6754153800911631, + "grad_norm": 0.8245421648025513, + "learning_rate": 4.8482376614911365e-06, + "loss": 0.5049, + "step": 9187 + }, + { + "epoch": 0.675488898691369, + "grad_norm": 0.8037405610084534, + "learning_rate": 4.848204599790471e-06, + "loss": 0.5311, + "step": 9188 + }, + { + "epoch": 0.6755624172915747, + "grad_norm": 0.8016397356987, + "learning_rate": 4.848171534601678e-06, + "loss": 0.52, + "step": 9189 + }, + { + "epoch": 0.6756359358917806, + "grad_norm": 0.8101853132247925, + "learning_rate": 4.8481384659248044e-06, + "loss": 0.5609, + "step": 9190 + }, + { + "epoch": 0.6757094544919865, + "grad_norm": 0.8639752268791199, + "learning_rate": 4.8481053937598984e-06, + "loss": 0.5044, + "step": 9191 + }, + { + "epoch": 0.6757829730921924, + "grad_norm": 0.8474699258804321, + "learning_rate": 4.8480723181070116e-06, + "loss": 0.5552, + "step": 9192 + }, + { + "epoch": 0.6758564916923981, + "grad_norm": 0.8334693312644958, + "learning_rate": 4.848039238966192e-06, + "loss": 0.5485, + "step": 9193 + }, + { + "epoch": 0.675930010292604, + "grad_norm": 0.7941808104515076, + "learning_rate": 4.848006156337488e-06, + "loss": 0.5181, + "step": 9194 + }, + { + "epoch": 0.6760035288928099, + "grad_norm": 0.8743615746498108, + "learning_rate": 4.847973070220949e-06, + "loss": 0.5131, + "step": 9195 + }, + { + "epoch": 0.6760770474930158, + "grad_norm": 0.8198467493057251, + "learning_rate": 4.847939980616626e-06, + "loss": 0.5609, + "step": 9196 + }, + { + "epoch": 0.6761505660932216, + "grad_norm": 0.789756178855896, + "learning_rate": 4.847906887524566e-06, + "loss": 0.5558, + "step": 9197 + }, + { + "epoch": 0.6762240846934274, + "grad_norm": 0.8534948825836182, + "learning_rate": 4.847873790944819e-06, + "loss": 0.5606, + "step": 9198 + }, + { + "epoch": 0.6762976032936333, + "grad_norm": 0.8447510600090027, + "learning_rate": 4.847840690877433e-06, + "loss": 0.5247, + "step": 9199 + }, + { + "epoch": 0.6763711218938392, + "grad_norm": 0.8115454912185669, + "learning_rate": 4.8478075873224604e-06, + "loss": 0.5261, + "step": 9200 + }, + { + "epoch": 0.676444640494045, + "grad_norm": 0.7992275357246399, + "learning_rate": 4.847774480279946e-06, + "loss": 0.5415, + "step": 9201 + }, + { + "epoch": 0.6765181590942508, + "grad_norm": 0.8512187004089355, + "learning_rate": 4.8477413697499424e-06, + "loss": 0.5982, + "step": 9202 + }, + { + "epoch": 0.6765916776944567, + "grad_norm": 0.8511441946029663, + "learning_rate": 4.847708255732497e-06, + "loss": 0.5436, + "step": 9203 + }, + { + "epoch": 0.6766651962946626, + "grad_norm": 0.8361104726791382, + "learning_rate": 4.847675138227661e-06, + "loss": 0.5258, + "step": 9204 + }, + { + "epoch": 0.6767387148948684, + "grad_norm": 0.8218809962272644, + "learning_rate": 4.84764201723548e-06, + "loss": 0.5289, + "step": 9205 + }, + { + "epoch": 0.6768122334950742, + "grad_norm": 0.8454537987709045, + "learning_rate": 4.847608892756007e-06, + "loss": 0.5763, + "step": 9206 + }, + { + "epoch": 0.6768857520952801, + "grad_norm": 0.8509688973426819, + "learning_rate": 4.847575764789289e-06, + "loss": 0.5244, + "step": 9207 + }, + { + "epoch": 0.676959270695486, + "grad_norm": 0.7855849862098694, + "learning_rate": 4.847542633335375e-06, + "loss": 0.5253, + "step": 9208 + }, + { + "epoch": 0.6770327892956918, + "grad_norm": 0.833773136138916, + "learning_rate": 4.847509498394315e-06, + "loss": 0.5608, + "step": 9209 + }, + { + "epoch": 0.6771063078958977, + "grad_norm": 0.9091933965682983, + "learning_rate": 4.847476359966159e-06, + "loss": 0.5954, + "step": 9210 + }, + { + "epoch": 0.6771798264961035, + "grad_norm": 0.8116957545280457, + "learning_rate": 4.847443218050956e-06, + "loss": 0.492, + "step": 9211 + }, + { + "epoch": 0.6772533450963094, + "grad_norm": 0.7940312027931213, + "learning_rate": 4.847410072648753e-06, + "loss": 0.5271, + "step": 9212 + }, + { + "epoch": 0.6773268636965152, + "grad_norm": 0.8327898383140564, + "learning_rate": 4.847376923759602e-06, + "loss": 0.5488, + "step": 9213 + }, + { + "epoch": 0.6774003822967211, + "grad_norm": 0.7942079901695251, + "learning_rate": 4.847343771383551e-06, + "loss": 0.499, + "step": 9214 + }, + { + "epoch": 0.6774739008969269, + "grad_norm": 0.8565673828125, + "learning_rate": 4.847310615520649e-06, + "loss": 0.5518, + "step": 9215 + }, + { + "epoch": 0.6775474194971328, + "grad_norm": 0.9040310382843018, + "learning_rate": 4.847277456170945e-06, + "loss": 0.5433, + "step": 9216 + }, + { + "epoch": 0.6776209380973386, + "grad_norm": 0.8568299412727356, + "learning_rate": 4.84724429333449e-06, + "loss": 0.5953, + "step": 9217 + }, + { + "epoch": 0.6776944566975445, + "grad_norm": 0.8839949369430542, + "learning_rate": 4.847211127011332e-06, + "loss": 0.5646, + "step": 9218 + }, + { + "epoch": 0.6777679752977503, + "grad_norm": 0.8202196359634399, + "learning_rate": 4.8471779572015205e-06, + "loss": 0.5012, + "step": 9219 + }, + { + "epoch": 0.6778414938979562, + "grad_norm": 0.868452250957489, + "learning_rate": 4.847144783905105e-06, + "loss": 0.5606, + "step": 9220 + }, + { + "epoch": 0.677915012498162, + "grad_norm": 0.8206037282943726, + "learning_rate": 4.847111607122133e-06, + "loss": 0.5133, + "step": 9221 + }, + { + "epoch": 0.6779885310983679, + "grad_norm": 0.8111454844474792, + "learning_rate": 4.847078426852656e-06, + "loss": 0.5022, + "step": 9222 + }, + { + "epoch": 0.6780620496985738, + "grad_norm": 0.8434358239173889, + "learning_rate": 4.847045243096723e-06, + "loss": 0.5449, + "step": 9223 + }, + { + "epoch": 0.6781355682987796, + "grad_norm": 0.8275930285453796, + "learning_rate": 4.847012055854382e-06, + "loss": 0.5768, + "step": 9224 + }, + { + "epoch": 0.6782090868989854, + "grad_norm": 0.8849714994430542, + "learning_rate": 4.846978865125683e-06, + "loss": 0.5459, + "step": 9225 + }, + { + "epoch": 0.6782826054991913, + "grad_norm": 0.8263468742370605, + "learning_rate": 4.8469456709106765e-06, + "loss": 0.5466, + "step": 9226 + }, + { + "epoch": 0.6783561240993972, + "grad_norm": 0.790968656539917, + "learning_rate": 4.846912473209409e-06, + "loss": 0.4946, + "step": 9227 + }, + { + "epoch": 0.678429642699603, + "grad_norm": 0.8293725252151489, + "learning_rate": 4.846879272021933e-06, + "loss": 0.5476, + "step": 9228 + }, + { + "epoch": 0.6785031612998088, + "grad_norm": 0.8065884709358215, + "learning_rate": 4.846846067348297e-06, + "loss": 0.5092, + "step": 9229 + }, + { + "epoch": 0.6785766799000147, + "grad_norm": 0.7894744873046875, + "learning_rate": 4.846812859188548e-06, + "loss": 0.5099, + "step": 9230 + }, + { + "epoch": 0.6786501985002206, + "grad_norm": 0.7784474492073059, + "learning_rate": 4.8467796475427375e-06, + "loss": 0.5282, + "step": 9231 + }, + { + "epoch": 0.6787237171004264, + "grad_norm": 0.8193115592002869, + "learning_rate": 4.846746432410915e-06, + "loss": 0.541, + "step": 9232 + }, + { + "epoch": 0.6787972357006322, + "grad_norm": 0.8490724563598633, + "learning_rate": 4.846713213793129e-06, + "loss": 0.5516, + "step": 9233 + }, + { + "epoch": 0.6788707543008381, + "grad_norm": 0.8162129521369934, + "learning_rate": 4.846679991689428e-06, + "loss": 0.5175, + "step": 9234 + }, + { + "epoch": 0.678944272901044, + "grad_norm": 0.8490527272224426, + "learning_rate": 4.846646766099864e-06, + "loss": 0.5179, + "step": 9235 + }, + { + "epoch": 0.6790177915012499, + "grad_norm": 0.8190892338752747, + "learning_rate": 4.846613537024483e-06, + "loss": 0.509, + "step": 9236 + }, + { + "epoch": 0.6790913101014556, + "grad_norm": 0.9008859992027283, + "learning_rate": 4.846580304463337e-06, + "loss": 0.5869, + "step": 9237 + }, + { + "epoch": 0.6791648287016615, + "grad_norm": 0.8121741414070129, + "learning_rate": 4.846547068416475e-06, + "loss": 0.4917, + "step": 9238 + }, + { + "epoch": 0.6792383473018674, + "grad_norm": 0.8122638463973999, + "learning_rate": 4.846513828883946e-06, + "loss": 0.5417, + "step": 9239 + }, + { + "epoch": 0.6793118659020733, + "grad_norm": 0.8623680472373962, + "learning_rate": 4.846480585865797e-06, + "loss": 0.5381, + "step": 9240 + }, + { + "epoch": 0.679385384502279, + "grad_norm": 0.7971478700637817, + "learning_rate": 4.846447339362082e-06, + "loss": 0.4997, + "step": 9241 + }, + { + "epoch": 0.6794589031024849, + "grad_norm": 0.8712685704231262, + "learning_rate": 4.846414089372847e-06, + "loss": 0.551, + "step": 9242 + }, + { + "epoch": 0.6795324217026908, + "grad_norm": 0.8046347498893738, + "learning_rate": 4.846380835898141e-06, + "loss": 0.5399, + "step": 9243 + }, + { + "epoch": 0.6796059403028967, + "grad_norm": 0.8735122680664062, + "learning_rate": 4.846347578938016e-06, + "loss": 0.5143, + "step": 9244 + }, + { + "epoch": 0.6796794589031024, + "grad_norm": 0.8168932199478149, + "learning_rate": 4.846314318492521e-06, + "loss": 0.5199, + "step": 9245 + }, + { + "epoch": 0.6797529775033083, + "grad_norm": 0.855695366859436, + "learning_rate": 4.846281054561704e-06, + "loss": 0.5604, + "step": 9246 + }, + { + "epoch": 0.6798264961035142, + "grad_norm": 0.8146414160728455, + "learning_rate": 4.846247787145614e-06, + "loss": 0.5265, + "step": 9247 + }, + { + "epoch": 0.6799000147037201, + "grad_norm": 0.7787647247314453, + "learning_rate": 4.846214516244302e-06, + "loss": 0.5501, + "step": 9248 + }, + { + "epoch": 0.6799735333039258, + "grad_norm": 0.7918274998664856, + "learning_rate": 4.846181241857817e-06, + "loss": 0.5299, + "step": 9249 + }, + { + "epoch": 0.6800470519041317, + "grad_norm": 0.8387899398803711, + "learning_rate": 4.846147963986209e-06, + "loss": 0.5763, + "step": 9250 + }, + { + "epoch": 0.6801205705043376, + "grad_norm": 0.8395480513572693, + "learning_rate": 4.846114682629525e-06, + "loss": 0.5424, + "step": 9251 + }, + { + "epoch": 0.6801940891045435, + "grad_norm": 0.8325395584106445, + "learning_rate": 4.846081397787817e-06, + "loss": 0.5467, + "step": 9252 + }, + { + "epoch": 0.6802676077047493, + "grad_norm": 0.8404865860939026, + "learning_rate": 4.846048109461133e-06, + "loss": 0.5162, + "step": 9253 + }, + { + "epoch": 0.6803411263049551, + "grad_norm": 0.7951205372810364, + "learning_rate": 4.846014817649524e-06, + "loss": 0.5032, + "step": 9254 + }, + { + "epoch": 0.680414644905161, + "grad_norm": 0.8159460425376892, + "learning_rate": 4.845981522353039e-06, + "loss": 0.5187, + "step": 9255 + }, + { + "epoch": 0.6804881635053669, + "grad_norm": 0.8653773665428162, + "learning_rate": 4.845948223571725e-06, + "loss": 0.5827, + "step": 9256 + }, + { + "epoch": 0.6805616821055727, + "grad_norm": 0.8376092910766602, + "learning_rate": 4.845914921305635e-06, + "loss": 0.5335, + "step": 9257 + }, + { + "epoch": 0.6806352007057785, + "grad_norm": 0.8373125195503235, + "learning_rate": 4.8458816155548154e-06, + "loss": 0.6053, + "step": 9258 + }, + { + "epoch": 0.6807087193059844, + "grad_norm": 0.8133729696273804, + "learning_rate": 4.845848306319318e-06, + "loss": 0.5379, + "step": 9259 + }, + { + "epoch": 0.6807822379061903, + "grad_norm": 0.8513334393501282, + "learning_rate": 4.845814993599191e-06, + "loss": 0.5414, + "step": 9260 + }, + { + "epoch": 0.6808557565063961, + "grad_norm": 0.8940184712409973, + "learning_rate": 4.845781677394484e-06, + "loss": 0.535, + "step": 9261 + }, + { + "epoch": 0.680929275106602, + "grad_norm": 0.9296596050262451, + "learning_rate": 4.845748357705248e-06, + "loss": 0.5509, + "step": 9262 + }, + { + "epoch": 0.6810027937068078, + "grad_norm": 0.8852627873420715, + "learning_rate": 4.84571503453153e-06, + "loss": 0.5592, + "step": 9263 + }, + { + "epoch": 0.6810763123070137, + "grad_norm": 0.8650156855583191, + "learning_rate": 4.8456817078733815e-06, + "loss": 0.5583, + "step": 9264 + }, + { + "epoch": 0.6811498309072195, + "grad_norm": 0.8263682126998901, + "learning_rate": 4.845648377730851e-06, + "loss": 0.5269, + "step": 9265 + }, + { + "epoch": 0.6812233495074254, + "grad_norm": 0.8959724307060242, + "learning_rate": 4.845615044103988e-06, + "loss": 0.576, + "step": 9266 + }, + { + "epoch": 0.6812968681076312, + "grad_norm": 0.9338435530662537, + "learning_rate": 4.845581706992843e-06, + "loss": 0.5409, + "step": 9267 + }, + { + "epoch": 0.6813703867078371, + "grad_norm": 0.8681348562240601, + "learning_rate": 4.845548366397464e-06, + "loss": 0.5488, + "step": 9268 + }, + { + "epoch": 0.6814439053080429, + "grad_norm": 0.8318992257118225, + "learning_rate": 4.845515022317902e-06, + "loss": 0.5348, + "step": 9269 + }, + { + "epoch": 0.6815174239082488, + "grad_norm": 0.8518905639648438, + "learning_rate": 4.845481674754206e-06, + "loss": 0.5356, + "step": 9270 + }, + { + "epoch": 0.6815909425084546, + "grad_norm": 0.8140025734901428, + "learning_rate": 4.845448323706425e-06, + "loss": 0.5315, + "step": 9271 + }, + { + "epoch": 0.6816644611086605, + "grad_norm": 0.8075400590896606, + "learning_rate": 4.845414969174609e-06, + "loss": 0.5444, + "step": 9272 + }, + { + "epoch": 0.6817379797088663, + "grad_norm": 0.845491886138916, + "learning_rate": 4.845381611158807e-06, + "loss": 0.5156, + "step": 9273 + }, + { + "epoch": 0.6818114983090722, + "grad_norm": 0.8187010288238525, + "learning_rate": 4.8453482496590696e-06, + "loss": 0.5109, + "step": 9274 + }, + { + "epoch": 0.681885016909278, + "grad_norm": 0.8701493740081787, + "learning_rate": 4.845314884675446e-06, + "loss": 0.524, + "step": 9275 + }, + { + "epoch": 0.6819585355094839, + "grad_norm": 0.8167015314102173, + "learning_rate": 4.845281516207986e-06, + "loss": 0.5654, + "step": 9276 + }, + { + "epoch": 0.6820320541096897, + "grad_norm": 0.8323710560798645, + "learning_rate": 4.845248144256738e-06, + "loss": 0.5677, + "step": 9277 + }, + { + "epoch": 0.6821055727098956, + "grad_norm": 0.8362093567848206, + "learning_rate": 4.845214768821752e-06, + "loss": 0.523, + "step": 9278 + }, + { + "epoch": 0.6821790913101015, + "grad_norm": 0.7930877804756165, + "learning_rate": 4.845181389903078e-06, + "loss": 0.5128, + "step": 9279 + }, + { + "epoch": 0.6822526099103073, + "grad_norm": 0.8776602149009705, + "learning_rate": 4.845148007500766e-06, + "loss": 0.5607, + "step": 9280 + }, + { + "epoch": 0.6823261285105131, + "grad_norm": 0.819279134273529, + "learning_rate": 4.845114621614865e-06, + "loss": 0.5419, + "step": 9281 + }, + { + "epoch": 0.682399647110719, + "grad_norm": 0.8342873454093933, + "learning_rate": 4.845081232245424e-06, + "loss": 0.55, + "step": 9282 + }, + { + "epoch": 0.6824731657109249, + "grad_norm": 0.8678035140037537, + "learning_rate": 4.845047839392494e-06, + "loss": 0.5361, + "step": 9283 + }, + { + "epoch": 0.6825466843111307, + "grad_norm": 0.8251497149467468, + "learning_rate": 4.845014443056123e-06, + "loss": 0.5315, + "step": 9284 + }, + { + "epoch": 0.6826202029113365, + "grad_norm": 0.823606550693512, + "learning_rate": 4.844981043236362e-06, + "loss": 0.5077, + "step": 9285 + }, + { + "epoch": 0.6826937215115424, + "grad_norm": 0.8254640698432922, + "learning_rate": 4.84494763993326e-06, + "loss": 0.5398, + "step": 9286 + }, + { + "epoch": 0.6827672401117483, + "grad_norm": 0.886535108089447, + "learning_rate": 4.8449142331468665e-06, + "loss": 0.5668, + "step": 9287 + }, + { + "epoch": 0.6828407587119542, + "grad_norm": 0.8590834140777588, + "learning_rate": 4.8448808228772314e-06, + "loss": 0.5602, + "step": 9288 + }, + { + "epoch": 0.6829142773121599, + "grad_norm": 0.8420501351356506, + "learning_rate": 4.844847409124405e-06, + "loss": 0.5067, + "step": 9289 + }, + { + "epoch": 0.6829877959123658, + "grad_norm": 0.8173956871032715, + "learning_rate": 4.844813991888435e-06, + "loss": 0.521, + "step": 9290 + }, + { + "epoch": 0.6830613145125717, + "grad_norm": 0.8335960507392883, + "learning_rate": 4.844780571169373e-06, + "loss": 0.5962, + "step": 9291 + }, + { + "epoch": 0.6831348331127776, + "grad_norm": 0.8666524291038513, + "learning_rate": 4.844747146967268e-06, + "loss": 0.5553, + "step": 9292 + }, + { + "epoch": 0.6832083517129833, + "grad_norm": 0.8286288380622864, + "learning_rate": 4.844713719282168e-06, + "loss": 0.5481, + "step": 9293 + }, + { + "epoch": 0.6832818703131892, + "grad_norm": 0.811303973197937, + "learning_rate": 4.844680288114126e-06, + "loss": 0.5397, + "step": 9294 + }, + { + "epoch": 0.6833553889133951, + "grad_norm": 0.8446623086929321, + "learning_rate": 4.844646853463189e-06, + "loss": 0.5221, + "step": 9295 + }, + { + "epoch": 0.683428907513601, + "grad_norm": 0.8195566534996033, + "learning_rate": 4.844613415329408e-06, + "loss": 0.5433, + "step": 9296 + }, + { + "epoch": 0.6835024261138067, + "grad_norm": 0.8184093236923218, + "learning_rate": 4.844579973712831e-06, + "loss": 0.5173, + "step": 9297 + }, + { + "epoch": 0.6835759447140126, + "grad_norm": 0.8343856930732727, + "learning_rate": 4.8445465286135095e-06, + "loss": 0.5619, + "step": 9298 + }, + { + "epoch": 0.6836494633142185, + "grad_norm": 0.7924566864967346, + "learning_rate": 4.844513080031493e-06, + "loss": 0.4925, + "step": 9299 + }, + { + "epoch": 0.6837229819144244, + "grad_norm": 0.8437932729721069, + "learning_rate": 4.8444796279668295e-06, + "loss": 0.5499, + "step": 9300 + }, + { + "epoch": 0.6837965005146303, + "grad_norm": 0.8268933892250061, + "learning_rate": 4.844446172419571e-06, + "loss": 0.4995, + "step": 9301 + }, + { + "epoch": 0.683870019114836, + "grad_norm": 0.8746518492698669, + "learning_rate": 4.844412713389765e-06, + "loss": 0.5946, + "step": 9302 + }, + { + "epoch": 0.6839435377150419, + "grad_norm": 0.7903155088424683, + "learning_rate": 4.844379250877464e-06, + "loss": 0.5298, + "step": 9303 + }, + { + "epoch": 0.6840170563152478, + "grad_norm": 0.8448834419250488, + "learning_rate": 4.844345784882715e-06, + "loss": 0.5434, + "step": 9304 + }, + { + "epoch": 0.6840905749154537, + "grad_norm": 0.79775071144104, + "learning_rate": 4.844312315405569e-06, + "loss": 0.513, + "step": 9305 + }, + { + "epoch": 0.6841640935156594, + "grad_norm": 0.8269309997558594, + "learning_rate": 4.844278842446075e-06, + "loss": 0.5409, + "step": 9306 + }, + { + "epoch": 0.6842376121158653, + "grad_norm": 0.8224620223045349, + "learning_rate": 4.844245366004283e-06, + "loss": 0.5417, + "step": 9307 + }, + { + "epoch": 0.6843111307160712, + "grad_norm": 0.7986931204795837, + "learning_rate": 4.844211886080243e-06, + "loss": 0.5441, + "step": 9308 + }, + { + "epoch": 0.6843846493162771, + "grad_norm": 0.8757932782173157, + "learning_rate": 4.844178402674005e-06, + "loss": 0.586, + "step": 9309 + }, + { + "epoch": 0.6844581679164828, + "grad_norm": 0.8491127490997314, + "learning_rate": 4.844144915785618e-06, + "loss": 0.5884, + "step": 9310 + }, + { + "epoch": 0.6845316865166887, + "grad_norm": 0.8000475764274597, + "learning_rate": 4.844111425415133e-06, + "loss": 0.5239, + "step": 9311 + }, + { + "epoch": 0.6846052051168946, + "grad_norm": 0.8818028569221497, + "learning_rate": 4.844077931562597e-06, + "loss": 0.5211, + "step": 9312 + }, + { + "epoch": 0.6846787237171005, + "grad_norm": 0.8471153378486633, + "learning_rate": 4.844044434228063e-06, + "loss": 0.4968, + "step": 9313 + }, + { + "epoch": 0.6847522423173062, + "grad_norm": 0.8674636483192444, + "learning_rate": 4.8440109334115795e-06, + "loss": 0.566, + "step": 9314 + }, + { + "epoch": 0.6848257609175121, + "grad_norm": 0.8565221428871155, + "learning_rate": 4.843977429113196e-06, + "loss": 0.5779, + "step": 9315 + }, + { + "epoch": 0.684899279517718, + "grad_norm": 0.82789546251297, + "learning_rate": 4.843943921332962e-06, + "loss": 0.54, + "step": 9316 + }, + { + "epoch": 0.6849727981179239, + "grad_norm": 0.886228621006012, + "learning_rate": 4.843910410070928e-06, + "loss": 0.5516, + "step": 9317 + }, + { + "epoch": 0.6850463167181297, + "grad_norm": 0.8709627389907837, + "learning_rate": 4.843876895327143e-06, + "loss": 0.5102, + "step": 9318 + }, + { + "epoch": 0.6851198353183355, + "grad_norm": 0.8733469843864441, + "learning_rate": 4.843843377101657e-06, + "loss": 0.5601, + "step": 9319 + }, + { + "epoch": 0.6851933539185414, + "grad_norm": 0.8096408247947693, + "learning_rate": 4.84380985539452e-06, + "loss": 0.5527, + "step": 9320 + }, + { + "epoch": 0.6852668725187473, + "grad_norm": 0.8849220275878906, + "learning_rate": 4.843776330205783e-06, + "loss": 0.565, + "step": 9321 + }, + { + "epoch": 0.6853403911189531, + "grad_norm": 0.8052298426628113, + "learning_rate": 4.843742801535493e-06, + "loss": 0.5218, + "step": 9322 + }, + { + "epoch": 0.6854139097191589, + "grad_norm": 0.7995292544364929, + "learning_rate": 4.843709269383702e-06, + "loss": 0.5647, + "step": 9323 + }, + { + "epoch": 0.6854874283193648, + "grad_norm": 0.8263292908668518, + "learning_rate": 4.84367573375046e-06, + "loss": 0.5547, + "step": 9324 + }, + { + "epoch": 0.6855609469195707, + "grad_norm": 0.8422336578369141, + "learning_rate": 4.843642194635815e-06, + "loss": 0.5129, + "step": 9325 + }, + { + "epoch": 0.6856344655197765, + "grad_norm": 0.842430055141449, + "learning_rate": 4.843608652039818e-06, + "loss": 0.5497, + "step": 9326 + }, + { + "epoch": 0.6857079841199824, + "grad_norm": 0.8429458737373352, + "learning_rate": 4.8435751059625194e-06, + "loss": 0.5439, + "step": 9327 + }, + { + "epoch": 0.6857815027201882, + "grad_norm": 0.8071109056472778, + "learning_rate": 4.843541556403968e-06, + "loss": 0.5171, + "step": 9328 + }, + { + "epoch": 0.6858550213203941, + "grad_norm": 0.8951935768127441, + "learning_rate": 4.843508003364214e-06, + "loss": 0.5683, + "step": 9329 + }, + { + "epoch": 0.6859285399205999, + "grad_norm": 0.8518374562263489, + "learning_rate": 4.843474446843306e-06, + "loss": 0.5723, + "step": 9330 + }, + { + "epoch": 0.6860020585208058, + "grad_norm": 0.813610851764679, + "learning_rate": 4.843440886841296e-06, + "loss": 0.5031, + "step": 9331 + }, + { + "epoch": 0.6860755771210116, + "grad_norm": 0.8753699064254761, + "learning_rate": 4.843407323358234e-06, + "loss": 0.5542, + "step": 9332 + }, + { + "epoch": 0.6861490957212175, + "grad_norm": 0.8186306357383728, + "learning_rate": 4.843373756394168e-06, + "loss": 0.5294, + "step": 9333 + }, + { + "epoch": 0.6862226143214233, + "grad_norm": 0.8236554861068726, + "learning_rate": 4.843340185949147e-06, + "loss": 0.5451, + "step": 9334 + }, + { + "epoch": 0.6862961329216292, + "grad_norm": 0.8590090870857239, + "learning_rate": 4.8433066120232245e-06, + "loss": 0.5173, + "step": 9335 + }, + { + "epoch": 0.686369651521835, + "grad_norm": 0.8769657015800476, + "learning_rate": 4.843273034616447e-06, + "loss": 0.5706, + "step": 9336 + }, + { + "epoch": 0.6864431701220409, + "grad_norm": 0.916365385055542, + "learning_rate": 4.843239453728866e-06, + "loss": 0.5774, + "step": 9337 + }, + { + "epoch": 0.6865166887222467, + "grad_norm": 0.8621924519538879, + "learning_rate": 4.8432058693605324e-06, + "loss": 0.5679, + "step": 9338 + }, + { + "epoch": 0.6865902073224526, + "grad_norm": 0.8449414372444153, + "learning_rate": 4.843172281511494e-06, + "loss": 0.5421, + "step": 9339 + }, + { + "epoch": 0.6866637259226585, + "grad_norm": 0.861100971698761, + "learning_rate": 4.843138690181801e-06, + "loss": 0.4912, + "step": 9340 + }, + { + "epoch": 0.6867372445228643, + "grad_norm": 0.8297886252403259, + "learning_rate": 4.843105095371503e-06, + "loss": 0.5242, + "step": 9341 + }, + { + "epoch": 0.6868107631230701, + "grad_norm": 0.8314257264137268, + "learning_rate": 4.8430714970806525e-06, + "loss": 0.5592, + "step": 9342 + }, + { + "epoch": 0.686884281723276, + "grad_norm": 0.879919171333313, + "learning_rate": 4.8430378953092974e-06, + "loss": 0.5135, + "step": 9343 + }, + { + "epoch": 0.6869578003234819, + "grad_norm": 0.8297314047813416, + "learning_rate": 4.843004290057487e-06, + "loss": 0.5348, + "step": 9344 + }, + { + "epoch": 0.6870313189236877, + "grad_norm": 0.7695740461349487, + "learning_rate": 4.8429706813252716e-06, + "loss": 0.4732, + "step": 9345 + }, + { + "epoch": 0.6871048375238935, + "grad_norm": 0.878410279750824, + "learning_rate": 4.842937069112702e-06, + "loss": 0.566, + "step": 9346 + }, + { + "epoch": 0.6871783561240994, + "grad_norm": 0.7960689663887024, + "learning_rate": 4.842903453419828e-06, + "loss": 0.4964, + "step": 9347 + }, + { + "epoch": 0.6872518747243053, + "grad_norm": 0.8239715099334717, + "learning_rate": 4.8428698342466995e-06, + "loss": 0.5527, + "step": 9348 + }, + { + "epoch": 0.6873253933245111, + "grad_norm": 0.847010612487793, + "learning_rate": 4.842836211593366e-06, + "loss": 0.5783, + "step": 9349 + }, + { + "epoch": 0.6873989119247169, + "grad_norm": 0.8771141767501831, + "learning_rate": 4.842802585459877e-06, + "loss": 0.5528, + "step": 9350 + }, + { + "epoch": 0.6874724305249228, + "grad_norm": 0.8626658916473389, + "learning_rate": 4.842768955846284e-06, + "loss": 0.5395, + "step": 9351 + }, + { + "epoch": 0.6875459491251287, + "grad_norm": 0.8824483752250671, + "learning_rate": 4.842735322752636e-06, + "loss": 0.5672, + "step": 9352 + }, + { + "epoch": 0.6876194677253346, + "grad_norm": 0.8526688814163208, + "learning_rate": 4.842701686178981e-06, + "loss": 0.5525, + "step": 9353 + }, + { + "epoch": 0.6876929863255403, + "grad_norm": 0.8265148401260376, + "learning_rate": 4.8426680461253725e-06, + "loss": 0.5781, + "step": 9354 + }, + { + "epoch": 0.6877665049257462, + "grad_norm": 0.8016793131828308, + "learning_rate": 4.842634402591859e-06, + "loss": 0.5245, + "step": 9355 + }, + { + "epoch": 0.6878400235259521, + "grad_norm": 0.8342020511627197, + "learning_rate": 4.842600755578491e-06, + "loss": 0.5539, + "step": 9356 + }, + { + "epoch": 0.687913542126158, + "grad_norm": 0.8569339513778687, + "learning_rate": 4.842567105085316e-06, + "loss": 0.478, + "step": 9357 + }, + { + "epoch": 0.6879870607263637, + "grad_norm": 0.8061361908912659, + "learning_rate": 4.842533451112387e-06, + "loss": 0.5296, + "step": 9358 + }, + { + "epoch": 0.6880605793265696, + "grad_norm": 0.7712494730949402, + "learning_rate": 4.8424997936597525e-06, + "loss": 0.4959, + "step": 9359 + }, + { + "epoch": 0.6881340979267755, + "grad_norm": 0.8271931409835815, + "learning_rate": 4.8424661327274626e-06, + "loss": 0.584, + "step": 9360 + }, + { + "epoch": 0.6882076165269814, + "grad_norm": 0.856031060218811, + "learning_rate": 4.842432468315568e-06, + "loss": 0.5511, + "step": 9361 + }, + { + "epoch": 0.6882811351271871, + "grad_norm": 0.8061796426773071, + "learning_rate": 4.842398800424118e-06, + "loss": 0.5437, + "step": 9362 + }, + { + "epoch": 0.688354653727393, + "grad_norm": 0.8430172801017761, + "learning_rate": 4.842365129053163e-06, + "loss": 0.5479, + "step": 9363 + }, + { + "epoch": 0.6884281723275989, + "grad_norm": 0.8583685159683228, + "learning_rate": 4.842331454202754e-06, + "loss": 0.5586, + "step": 9364 + }, + { + "epoch": 0.6885016909278048, + "grad_norm": 0.8268895745277405, + "learning_rate": 4.842297775872938e-06, + "loss": 0.5331, + "step": 9365 + }, + { + "epoch": 0.6885752095280105, + "grad_norm": 0.8804203271865845, + "learning_rate": 4.842264094063767e-06, + "loss": 0.564, + "step": 9366 + }, + { + "epoch": 0.6886487281282164, + "grad_norm": 0.8264340162277222, + "learning_rate": 4.842230408775292e-06, + "loss": 0.5144, + "step": 9367 + }, + { + "epoch": 0.6887222467284223, + "grad_norm": 0.7827036380767822, + "learning_rate": 4.842196720007561e-06, + "loss": 0.562, + "step": 9368 + }, + { + "epoch": 0.6887957653286282, + "grad_norm": 0.8145617246627808, + "learning_rate": 4.842163027760626e-06, + "loss": 0.5371, + "step": 9369 + }, + { + "epoch": 0.688869283928834, + "grad_norm": 0.8524771928787231, + "learning_rate": 4.842129332034535e-06, + "loss": 0.603, + "step": 9370 + }, + { + "epoch": 0.6889428025290398, + "grad_norm": 0.8072348833084106, + "learning_rate": 4.842095632829341e-06, + "loss": 0.5067, + "step": 9371 + }, + { + "epoch": 0.6890163211292457, + "grad_norm": 0.8237748146057129, + "learning_rate": 4.84206193014509e-06, + "loss": 0.5687, + "step": 9372 + }, + { + "epoch": 0.6890898397294516, + "grad_norm": 0.8131492733955383, + "learning_rate": 4.842028223981836e-06, + "loss": 0.5253, + "step": 9373 + }, + { + "epoch": 0.6891633583296574, + "grad_norm": 0.8434028625488281, + "learning_rate": 4.841994514339626e-06, + "loss": 0.5772, + "step": 9374 + }, + { + "epoch": 0.6892368769298632, + "grad_norm": 0.8413892388343811, + "learning_rate": 4.8419608012185125e-06, + "loss": 0.4998, + "step": 9375 + }, + { + "epoch": 0.6893103955300691, + "grad_norm": 0.8626723289489746, + "learning_rate": 4.841927084618544e-06, + "loss": 0.5546, + "step": 9376 + }, + { + "epoch": 0.689383914130275, + "grad_norm": 0.7954138517379761, + "learning_rate": 4.84189336453977e-06, + "loss": 0.5206, + "step": 9377 + }, + { + "epoch": 0.6894574327304808, + "grad_norm": 0.8614278435707092, + "learning_rate": 4.8418596409822434e-06, + "loss": 0.5473, + "step": 9378 + }, + { + "epoch": 0.6895309513306866, + "grad_norm": 0.8257823586463928, + "learning_rate": 4.8418259139460115e-06, + "loss": 0.5192, + "step": 9379 + }, + { + "epoch": 0.6896044699308925, + "grad_norm": 0.8597123622894287, + "learning_rate": 4.8417921834311255e-06, + "loss": 0.4934, + "step": 9380 + }, + { + "epoch": 0.6896779885310984, + "grad_norm": 0.8221179246902466, + "learning_rate": 4.841758449437636e-06, + "loss": 0.5057, + "step": 9381 + }, + { + "epoch": 0.6897515071313042, + "grad_norm": 0.8312053680419922, + "learning_rate": 4.841724711965592e-06, + "loss": 0.5582, + "step": 9382 + }, + { + "epoch": 0.68982502573151, + "grad_norm": 0.8469133377075195, + "learning_rate": 4.841690971015044e-06, + "loss": 0.5332, + "step": 9383 + }, + { + "epoch": 0.6898985443317159, + "grad_norm": 0.8246049284934998, + "learning_rate": 4.841657226586043e-06, + "loss": 0.5125, + "step": 9384 + }, + { + "epoch": 0.6899720629319218, + "grad_norm": 0.7767740488052368, + "learning_rate": 4.841623478678638e-06, + "loss": 0.4958, + "step": 9385 + }, + { + "epoch": 0.6900455815321276, + "grad_norm": 0.851387083530426, + "learning_rate": 4.8415897272928795e-06, + "loss": 0.546, + "step": 9386 + }, + { + "epoch": 0.6901191001323335, + "grad_norm": 0.8489605188369751, + "learning_rate": 4.841555972428818e-06, + "loss": 0.553, + "step": 9387 + }, + { + "epoch": 0.6901926187325393, + "grad_norm": 0.8678834438323975, + "learning_rate": 4.841522214086503e-06, + "loss": 0.5514, + "step": 9388 + }, + { + "epoch": 0.6902661373327452, + "grad_norm": 0.7875951528549194, + "learning_rate": 4.8414884522659855e-06, + "loss": 0.4752, + "step": 9389 + }, + { + "epoch": 0.690339655932951, + "grad_norm": 0.8077500462532043, + "learning_rate": 4.8414546869673145e-06, + "loss": 0.5327, + "step": 9390 + }, + { + "epoch": 0.6904131745331569, + "grad_norm": 0.8318175673484802, + "learning_rate": 4.841420918190541e-06, + "loss": 0.5685, + "step": 9391 + }, + { + "epoch": 0.6904866931333627, + "grad_norm": 0.8850287199020386, + "learning_rate": 4.841387145935715e-06, + "loss": 0.5533, + "step": 9392 + }, + { + "epoch": 0.6905602117335686, + "grad_norm": 0.8428428769111633, + "learning_rate": 4.841353370202886e-06, + "loss": 0.5257, + "step": 9393 + }, + { + "epoch": 0.6906337303337744, + "grad_norm": 0.796561598777771, + "learning_rate": 4.841319590992106e-06, + "loss": 0.5312, + "step": 9394 + }, + { + "epoch": 0.6907072489339803, + "grad_norm": 0.8703632950782776, + "learning_rate": 4.841285808303423e-06, + "loss": 0.5843, + "step": 9395 + }, + { + "epoch": 0.6907807675341862, + "grad_norm": 0.8096339106559753, + "learning_rate": 4.8412520221368885e-06, + "loss": 0.5157, + "step": 9396 + }, + { + "epoch": 0.690854286134392, + "grad_norm": 0.8548423647880554, + "learning_rate": 4.841218232492553e-06, + "loss": 0.515, + "step": 9397 + }, + { + "epoch": 0.6909278047345978, + "grad_norm": 0.8621024489402771, + "learning_rate": 4.841184439370465e-06, + "loss": 0.5776, + "step": 9398 + }, + { + "epoch": 0.6910013233348037, + "grad_norm": 0.8327527046203613, + "learning_rate": 4.841150642770675e-06, + "loss": 0.577, + "step": 9399 + }, + { + "epoch": 0.6910748419350096, + "grad_norm": 0.8084779381752014, + "learning_rate": 4.8411168426932355e-06, + "loss": 0.5231, + "step": 9400 + }, + { + "epoch": 0.6911483605352154, + "grad_norm": 0.8404009342193604, + "learning_rate": 4.841083039138194e-06, + "loss": 0.5583, + "step": 9401 + }, + { + "epoch": 0.6912218791354212, + "grad_norm": 0.8518198132514954, + "learning_rate": 4.841049232105603e-06, + "loss": 0.5732, + "step": 9402 + }, + { + "epoch": 0.6912953977356271, + "grad_norm": 0.8088056445121765, + "learning_rate": 4.841015421595511e-06, + "loss": 0.5396, + "step": 9403 + }, + { + "epoch": 0.691368916335833, + "grad_norm": 0.8397547602653503, + "learning_rate": 4.840981607607968e-06, + "loss": 0.5296, + "step": 9404 + }, + { + "epoch": 0.6914424349360389, + "grad_norm": 0.8205531239509583, + "learning_rate": 4.840947790143025e-06, + "loss": 0.5491, + "step": 9405 + }, + { + "epoch": 0.6915159535362446, + "grad_norm": 0.8421556353569031, + "learning_rate": 4.840913969200733e-06, + "loss": 0.5487, + "step": 9406 + }, + { + "epoch": 0.6915894721364505, + "grad_norm": 0.8345940709114075, + "learning_rate": 4.840880144781142e-06, + "loss": 0.5299, + "step": 9407 + }, + { + "epoch": 0.6916629907366564, + "grad_norm": 0.8019880056381226, + "learning_rate": 4.840846316884301e-06, + "loss": 0.5536, + "step": 9408 + }, + { + "epoch": 0.6917365093368623, + "grad_norm": 0.8202253580093384, + "learning_rate": 4.840812485510261e-06, + "loss": 0.5093, + "step": 9409 + }, + { + "epoch": 0.691810027937068, + "grad_norm": 0.8134739995002747, + "learning_rate": 4.840778650659073e-06, + "loss": 0.5258, + "step": 9410 + }, + { + "epoch": 0.6918835465372739, + "grad_norm": 0.8429462313652039, + "learning_rate": 4.8407448123307854e-06, + "loss": 0.5728, + "step": 9411 + }, + { + "epoch": 0.6919570651374798, + "grad_norm": 0.810142993927002, + "learning_rate": 4.840710970525451e-06, + "loss": 0.4773, + "step": 9412 + }, + { + "epoch": 0.6920305837376857, + "grad_norm": 0.8320975303649902, + "learning_rate": 4.840677125243117e-06, + "loss": 0.5252, + "step": 9413 + }, + { + "epoch": 0.6921041023378914, + "grad_norm": 0.8110191822052002, + "learning_rate": 4.840643276483836e-06, + "loss": 0.5646, + "step": 9414 + }, + { + "epoch": 0.6921776209380973, + "grad_norm": 0.7967197895050049, + "learning_rate": 4.840609424247657e-06, + "loss": 0.5012, + "step": 9415 + }, + { + "epoch": 0.6922511395383032, + "grad_norm": 0.8271967172622681, + "learning_rate": 4.840575568534632e-06, + "loss": 0.5199, + "step": 9416 + }, + { + "epoch": 0.6923246581385091, + "grad_norm": 0.8309230208396912, + "learning_rate": 4.840541709344809e-06, + "loss": 0.476, + "step": 9417 + }, + { + "epoch": 0.6923981767387148, + "grad_norm": 0.8154858946800232, + "learning_rate": 4.840507846678241e-06, + "loss": 0.5318, + "step": 9418 + }, + { + "epoch": 0.6924716953389207, + "grad_norm": 0.785130500793457, + "learning_rate": 4.8404739805349756e-06, + "loss": 0.4975, + "step": 9419 + }, + { + "epoch": 0.6925452139391266, + "grad_norm": 0.8800254464149475, + "learning_rate": 4.8404401109150635e-06, + "loss": 0.5576, + "step": 9420 + }, + { + "epoch": 0.6926187325393325, + "grad_norm": 0.7789275050163269, + "learning_rate": 4.8404062378185565e-06, + "loss": 0.5101, + "step": 9421 + }, + { + "epoch": 0.6926922511395383, + "grad_norm": 0.853266179561615, + "learning_rate": 4.840372361245505e-06, + "loss": 0.5481, + "step": 9422 + }, + { + "epoch": 0.6927657697397441, + "grad_norm": 0.9395782351493835, + "learning_rate": 4.840338481195958e-06, + "loss": 0.6031, + "step": 9423 + }, + { + "epoch": 0.69283928833995, + "grad_norm": 0.8737625479698181, + "learning_rate": 4.8403045976699655e-06, + "loss": 0.5495, + "step": 9424 + }, + { + "epoch": 0.6929128069401559, + "grad_norm": 0.8424541354179382, + "learning_rate": 4.8402707106675804e-06, + "loss": 0.5493, + "step": 9425 + }, + { + "epoch": 0.6929863255403617, + "grad_norm": 0.8457373976707458, + "learning_rate": 4.84023682018885e-06, + "loss": 0.5488, + "step": 9426 + }, + { + "epoch": 0.6930598441405675, + "grad_norm": 0.814069926738739, + "learning_rate": 4.8402029262338265e-06, + "loss": 0.5606, + "step": 9427 + }, + { + "epoch": 0.6931333627407734, + "grad_norm": 0.8696130514144897, + "learning_rate": 4.840169028802559e-06, + "loss": 0.525, + "step": 9428 + }, + { + "epoch": 0.6932068813409793, + "grad_norm": 0.8323102593421936, + "learning_rate": 4.840135127895099e-06, + "loss": 0.5638, + "step": 9429 + }, + { + "epoch": 0.6932803999411851, + "grad_norm": 0.7929182052612305, + "learning_rate": 4.8401012235114964e-06, + "loss": 0.5281, + "step": 9430 + }, + { + "epoch": 0.693353918541391, + "grad_norm": 0.8543181419372559, + "learning_rate": 4.840067315651802e-06, + "loss": 0.5261, + "step": 9431 + }, + { + "epoch": 0.6934274371415968, + "grad_norm": 0.8289944529533386, + "learning_rate": 4.840033404316065e-06, + "loss": 0.5561, + "step": 9432 + }, + { + "epoch": 0.6935009557418027, + "grad_norm": 0.8269586563110352, + "learning_rate": 4.8399994895043365e-06, + "loss": 0.5134, + "step": 9433 + }, + { + "epoch": 0.6935744743420085, + "grad_norm": 0.8333262801170349, + "learning_rate": 4.8399655712166685e-06, + "loss": 0.6156, + "step": 9434 + }, + { + "epoch": 0.6936479929422144, + "grad_norm": 0.8267166018486023, + "learning_rate": 4.8399316494531075e-06, + "loss": 0.5791, + "step": 9435 + }, + { + "epoch": 0.6937215115424202, + "grad_norm": 0.8030021786689758, + "learning_rate": 4.839897724213708e-06, + "loss": 0.5292, + "step": 9436 + }, + { + "epoch": 0.6937950301426261, + "grad_norm": 0.7784038782119751, + "learning_rate": 4.839863795498517e-06, + "loss": 0.5047, + "step": 9437 + }, + { + "epoch": 0.6938685487428319, + "grad_norm": 0.8604013919830322, + "learning_rate": 4.8398298633075876e-06, + "loss": 0.5399, + "step": 9438 + }, + { + "epoch": 0.6939420673430378, + "grad_norm": 0.8749263882637024, + "learning_rate": 4.839795927640969e-06, + "loss": 0.5553, + "step": 9439 + }, + { + "epoch": 0.6940155859432436, + "grad_norm": 0.8124722242355347, + "learning_rate": 4.839761988498711e-06, + "loss": 0.5188, + "step": 9440 + }, + { + "epoch": 0.6940891045434495, + "grad_norm": 0.7884820103645325, + "learning_rate": 4.839728045880865e-06, + "loss": 0.5007, + "step": 9441 + }, + { + "epoch": 0.6941626231436554, + "grad_norm": 0.8538601994514465, + "learning_rate": 4.839694099787481e-06, + "loss": 0.5335, + "step": 9442 + }, + { + "epoch": 0.6942361417438612, + "grad_norm": 0.8295578956604004, + "learning_rate": 4.83966015021861e-06, + "loss": 0.5159, + "step": 9443 + }, + { + "epoch": 0.694309660344067, + "grad_norm": 0.826704740524292, + "learning_rate": 4.839626197174302e-06, + "loss": 0.5274, + "step": 9444 + }, + { + "epoch": 0.6943831789442729, + "grad_norm": 0.8605096936225891, + "learning_rate": 4.8395922406546066e-06, + "loss": 0.5226, + "step": 9445 + }, + { + "epoch": 0.6944566975444788, + "grad_norm": 0.8357852101325989, + "learning_rate": 4.839558280659576e-06, + "loss": 0.5431, + "step": 9446 + }, + { + "epoch": 0.6945302161446846, + "grad_norm": 0.8367773294448853, + "learning_rate": 4.8395243171892585e-06, + "loss": 0.5559, + "step": 9447 + }, + { + "epoch": 0.6946037347448905, + "grad_norm": 0.8107157349586487, + "learning_rate": 4.8394903502437066e-06, + "loss": 0.4839, + "step": 9448 + }, + { + "epoch": 0.6946772533450963, + "grad_norm": 0.8345455527305603, + "learning_rate": 4.83945637982297e-06, + "loss": 0.5344, + "step": 9449 + }, + { + "epoch": 0.6947507719453022, + "grad_norm": 0.8589421510696411, + "learning_rate": 4.839422405927098e-06, + "loss": 0.5675, + "step": 9450 + }, + { + "epoch": 0.694824290545508, + "grad_norm": 0.7837258577346802, + "learning_rate": 4.839388428556143e-06, + "loss": 0.5451, + "step": 9451 + }, + { + "epoch": 0.6948978091457139, + "grad_norm": 0.8179518580436707, + "learning_rate": 4.839354447710155e-06, + "loss": 0.5269, + "step": 9452 + }, + { + "epoch": 0.6949713277459197, + "grad_norm": 0.8082245588302612, + "learning_rate": 4.8393204633891835e-06, + "loss": 0.5426, + "step": 9453 + }, + { + "epoch": 0.6950448463461256, + "grad_norm": 0.8380956053733826, + "learning_rate": 4.8392864755932785e-06, + "loss": 0.5548, + "step": 9454 + }, + { + "epoch": 0.6951183649463314, + "grad_norm": 0.8357593417167664, + "learning_rate": 4.839252484322493e-06, + "loss": 0.5442, + "step": 9455 + }, + { + "epoch": 0.6951918835465373, + "grad_norm": 0.8433772921562195, + "learning_rate": 4.839218489576876e-06, + "loss": 0.529, + "step": 9456 + }, + { + "epoch": 0.6952654021467431, + "grad_norm": 0.8041382431983948, + "learning_rate": 4.839184491356478e-06, + "loss": 0.4983, + "step": 9457 + }, + { + "epoch": 0.695338920746949, + "grad_norm": 0.8003457188606262, + "learning_rate": 4.8391504896613486e-06, + "loss": 0.5288, + "step": 9458 + }, + { + "epoch": 0.6954124393471548, + "grad_norm": 0.8511238694190979, + "learning_rate": 4.839116484491541e-06, + "loss": 0.5646, + "step": 9459 + }, + { + "epoch": 0.6954859579473607, + "grad_norm": 0.8340010643005371, + "learning_rate": 4.8390824758471024e-06, + "loss": 0.5644, + "step": 9460 + }, + { + "epoch": 0.6955594765475666, + "grad_norm": 0.8570244908332825, + "learning_rate": 4.839048463728085e-06, + "loss": 0.5049, + "step": 9461 + }, + { + "epoch": 0.6956329951477724, + "grad_norm": 0.8302440047264099, + "learning_rate": 4.839014448134539e-06, + "loss": 0.5456, + "step": 9462 + }, + { + "epoch": 0.6957065137479782, + "grad_norm": 0.8268259763717651, + "learning_rate": 4.838980429066516e-06, + "loss": 0.53, + "step": 9463 + }, + { + "epoch": 0.6957800323481841, + "grad_norm": 0.8028160929679871, + "learning_rate": 4.838946406524066e-06, + "loss": 0.5437, + "step": 9464 + }, + { + "epoch": 0.69585355094839, + "grad_norm": 0.8311717510223389, + "learning_rate": 4.838912380507237e-06, + "loss": 0.5205, + "step": 9465 + }, + { + "epoch": 0.6959270695485958, + "grad_norm": 0.8558070063591003, + "learning_rate": 4.838878351016084e-06, + "loss": 0.5767, + "step": 9466 + }, + { + "epoch": 0.6960005881488016, + "grad_norm": 0.8338574171066284, + "learning_rate": 4.838844318050654e-06, + "loss": 0.5179, + "step": 9467 + }, + { + "epoch": 0.6960741067490075, + "grad_norm": 0.8257429003715515, + "learning_rate": 4.838810281610999e-06, + "loss": 0.5691, + "step": 9468 + }, + { + "epoch": 0.6961476253492134, + "grad_norm": 0.833864152431488, + "learning_rate": 4.83877624169717e-06, + "loss": 0.5205, + "step": 9469 + }, + { + "epoch": 0.6962211439494193, + "grad_norm": 0.8072954416275024, + "learning_rate": 4.838742198309216e-06, + "loss": 0.5251, + "step": 9470 + }, + { + "epoch": 0.696294662549625, + "grad_norm": 0.8296784162521362, + "learning_rate": 4.83870815144719e-06, + "loss": 0.5559, + "step": 9471 + }, + { + "epoch": 0.6963681811498309, + "grad_norm": 0.8200187683105469, + "learning_rate": 4.83867410111114e-06, + "loss": 0.5164, + "step": 9472 + }, + { + "epoch": 0.6964416997500368, + "grad_norm": 0.8521422147750854, + "learning_rate": 4.838640047301118e-06, + "loss": 0.5867, + "step": 9473 + }, + { + "epoch": 0.6965152183502427, + "grad_norm": 0.8158926367759705, + "learning_rate": 4.838605990017174e-06, + "loss": 0.5146, + "step": 9474 + }, + { + "epoch": 0.6965887369504484, + "grad_norm": 0.8309433460235596, + "learning_rate": 4.838571929259359e-06, + "loss": 0.5629, + "step": 9475 + }, + { + "epoch": 0.6966622555506543, + "grad_norm": 0.8087444305419922, + "learning_rate": 4.838537865027723e-06, + "loss": 0.5178, + "step": 9476 + }, + { + "epoch": 0.6967357741508602, + "grad_norm": 0.864047646522522, + "learning_rate": 4.8385037973223185e-06, + "loss": 0.537, + "step": 9477 + }, + { + "epoch": 0.6968092927510661, + "grad_norm": 0.8216484189033508, + "learning_rate": 4.8384697261431935e-06, + "loss": 0.5232, + "step": 9478 + }, + { + "epoch": 0.6968828113512718, + "grad_norm": 0.8441246747970581, + "learning_rate": 4.8384356514904e-06, + "loss": 0.5443, + "step": 9479 + }, + { + "epoch": 0.6969563299514777, + "grad_norm": 0.8176340460777283, + "learning_rate": 4.8384015733639885e-06, + "loss": 0.5347, + "step": 9480 + }, + { + "epoch": 0.6970298485516836, + "grad_norm": 0.8226016759872437, + "learning_rate": 4.838367491764009e-06, + "loss": 0.5435, + "step": 9481 + }, + { + "epoch": 0.6971033671518895, + "grad_norm": 0.8605604767799377, + "learning_rate": 4.838333406690513e-06, + "loss": 0.5007, + "step": 9482 + }, + { + "epoch": 0.6971768857520952, + "grad_norm": 0.7987176179885864, + "learning_rate": 4.838299318143551e-06, + "loss": 0.4921, + "step": 9483 + }, + { + "epoch": 0.6972504043523011, + "grad_norm": 0.8161829710006714, + "learning_rate": 4.838265226123174e-06, + "loss": 0.5588, + "step": 9484 + }, + { + "epoch": 0.697323922952507, + "grad_norm": 0.8489097952842712, + "learning_rate": 4.838231130629431e-06, + "loss": 0.521, + "step": 9485 + }, + { + "epoch": 0.6973974415527129, + "grad_norm": 0.971222996711731, + "learning_rate": 4.8381970316623734e-06, + "loss": 0.6027, + "step": 9486 + }, + { + "epoch": 0.6974709601529187, + "grad_norm": 0.804072380065918, + "learning_rate": 4.8381629292220524e-06, + "loss": 0.5538, + "step": 9487 + }, + { + "epoch": 0.6975444787531245, + "grad_norm": 0.797117292881012, + "learning_rate": 4.838128823308519e-06, + "loss": 0.5077, + "step": 9488 + }, + { + "epoch": 0.6976179973533304, + "grad_norm": 0.8355290293693542, + "learning_rate": 4.8380947139218235e-06, + "loss": 0.5516, + "step": 9489 + }, + { + "epoch": 0.6976915159535363, + "grad_norm": 0.7981356978416443, + "learning_rate": 4.8380606010620155e-06, + "loss": 0.4924, + "step": 9490 + }, + { + "epoch": 0.6977650345537421, + "grad_norm": 0.8183567523956299, + "learning_rate": 4.838026484729147e-06, + "loss": 0.5469, + "step": 9491 + }, + { + "epoch": 0.6978385531539479, + "grad_norm": 0.8105341792106628, + "learning_rate": 4.837992364923268e-06, + "loss": 0.5215, + "step": 9492 + }, + { + "epoch": 0.6979120717541538, + "grad_norm": 0.8326417207717896, + "learning_rate": 4.837958241644429e-06, + "loss": 0.5087, + "step": 9493 + }, + { + "epoch": 0.6979855903543597, + "grad_norm": 0.8281422853469849, + "learning_rate": 4.837924114892681e-06, + "loss": 0.5664, + "step": 9494 + }, + { + "epoch": 0.6980591089545655, + "grad_norm": 0.7979258298873901, + "learning_rate": 4.837889984668076e-06, + "loss": 0.4999, + "step": 9495 + }, + { + "epoch": 0.6981326275547713, + "grad_norm": 0.8072546124458313, + "learning_rate": 4.837855850970662e-06, + "loss": 0.5253, + "step": 9496 + }, + { + "epoch": 0.6982061461549772, + "grad_norm": 0.8280038237571716, + "learning_rate": 4.837821713800493e-06, + "loss": 0.5056, + "step": 9497 + }, + { + "epoch": 0.6982796647551831, + "grad_norm": 0.8273754119873047, + "learning_rate": 4.837787573157616e-06, + "loss": 0.5107, + "step": 9498 + }, + { + "epoch": 0.6983531833553889, + "grad_norm": 0.7936444878578186, + "learning_rate": 4.837753429042084e-06, + "loss": 0.5186, + "step": 9499 + }, + { + "epoch": 0.6984267019555948, + "grad_norm": 0.8486437201499939, + "learning_rate": 4.837719281453947e-06, + "loss": 0.5831, + "step": 9500 + }, + { + "epoch": 0.6985002205558006, + "grad_norm": 0.767902672290802, + "learning_rate": 4.837685130393257e-06, + "loss": 0.4838, + "step": 9501 + }, + { + "epoch": 0.6985737391560065, + "grad_norm": 0.8042466044425964, + "learning_rate": 4.837650975860063e-06, + "loss": 0.5316, + "step": 9502 + }, + { + "epoch": 0.6986472577562123, + "grad_norm": 0.8983471393585205, + "learning_rate": 4.837616817854417e-06, + "loss": 0.5129, + "step": 9503 + }, + { + "epoch": 0.6987207763564182, + "grad_norm": 0.8811100125312805, + "learning_rate": 4.837582656376369e-06, + "loss": 0.5708, + "step": 9504 + }, + { + "epoch": 0.698794294956624, + "grad_norm": 0.87603759765625, + "learning_rate": 4.83754849142597e-06, + "loss": 0.5875, + "step": 9505 + }, + { + "epoch": 0.6988678135568299, + "grad_norm": 0.820332944393158, + "learning_rate": 4.837514323003271e-06, + "loss": 0.5283, + "step": 9506 + }, + { + "epoch": 0.6989413321570357, + "grad_norm": 0.8131139874458313, + "learning_rate": 4.8374801511083215e-06, + "loss": 0.5147, + "step": 9507 + }, + { + "epoch": 0.6990148507572416, + "grad_norm": 0.7657233476638794, + "learning_rate": 4.837445975741174e-06, + "loss": 0.5125, + "step": 9508 + }, + { + "epoch": 0.6990883693574474, + "grad_norm": 0.7885100245475769, + "learning_rate": 4.837411796901879e-06, + "loss": 0.5253, + "step": 9509 + }, + { + "epoch": 0.6991618879576533, + "grad_norm": 0.8588979244232178, + "learning_rate": 4.837377614590485e-06, + "loss": 0.5365, + "step": 9510 + }, + { + "epoch": 0.6992354065578591, + "grad_norm": 0.8055219650268555, + "learning_rate": 4.837343428807046e-06, + "loss": 0.5559, + "step": 9511 + }, + { + "epoch": 0.699308925158065, + "grad_norm": 0.9481772184371948, + "learning_rate": 4.837309239551611e-06, + "loss": 0.5201, + "step": 9512 + }, + { + "epoch": 0.6993824437582709, + "grad_norm": 0.8355988264083862, + "learning_rate": 4.837275046824231e-06, + "loss": 0.4946, + "step": 9513 + }, + { + "epoch": 0.6994559623584767, + "grad_norm": 0.8696788549423218, + "learning_rate": 4.837240850624957e-06, + "loss": 0.5815, + "step": 9514 + }, + { + "epoch": 0.6995294809586825, + "grad_norm": 0.8161396980285645, + "learning_rate": 4.83720665095384e-06, + "loss": 0.5079, + "step": 9515 + }, + { + "epoch": 0.6996029995588884, + "grad_norm": 0.7601771950721741, + "learning_rate": 4.8371724478109304e-06, + "loss": 0.4825, + "step": 9516 + }, + { + "epoch": 0.6996765181590943, + "grad_norm": 0.8329543471336365, + "learning_rate": 4.837138241196279e-06, + "loss": 0.548, + "step": 9517 + }, + { + "epoch": 0.6997500367593001, + "grad_norm": 0.8161126375198364, + "learning_rate": 4.837104031109936e-06, + "loss": 0.5333, + "step": 9518 + }, + { + "epoch": 0.6998235553595059, + "grad_norm": 0.8551704287528992, + "learning_rate": 4.837069817551954e-06, + "loss": 0.524, + "step": 9519 + }, + { + "epoch": 0.6998970739597118, + "grad_norm": 0.867491602897644, + "learning_rate": 4.837035600522382e-06, + "loss": 0.5124, + "step": 9520 + }, + { + "epoch": 0.6999705925599177, + "grad_norm": 0.8271973133087158, + "learning_rate": 4.837001380021272e-06, + "loss": 0.5519, + "step": 9521 + }, + { + "epoch": 0.7000441111601235, + "grad_norm": 0.8167027235031128, + "learning_rate": 4.836967156048674e-06, + "loss": 0.5532, + "step": 9522 + }, + { + "epoch": 0.7001176297603293, + "grad_norm": 0.8094309568405151, + "learning_rate": 4.83693292860464e-06, + "loss": 0.4948, + "step": 9523 + }, + { + "epoch": 0.7001911483605352, + "grad_norm": 0.8025787472724915, + "learning_rate": 4.83689869768922e-06, + "loss": 0.5201, + "step": 9524 + }, + { + "epoch": 0.7002646669607411, + "grad_norm": 0.8530917167663574, + "learning_rate": 4.836864463302464e-06, + "loss": 0.5337, + "step": 9525 + }, + { + "epoch": 0.700338185560947, + "grad_norm": 0.807580828666687, + "learning_rate": 4.836830225444425e-06, + "loss": 0.5491, + "step": 9526 + }, + { + "epoch": 0.7004117041611527, + "grad_norm": 0.8974081873893738, + "learning_rate": 4.836795984115152e-06, + "loss": 0.5868, + "step": 9527 + }, + { + "epoch": 0.7004852227613586, + "grad_norm": 0.8091118335723877, + "learning_rate": 4.836761739314697e-06, + "loss": 0.5264, + "step": 9528 + }, + { + "epoch": 0.7005587413615645, + "grad_norm": 0.8435501456260681, + "learning_rate": 4.836727491043109e-06, + "loss": 0.5741, + "step": 9529 + }, + { + "epoch": 0.7006322599617704, + "grad_norm": 0.8215237259864807, + "learning_rate": 4.836693239300442e-06, + "loss": 0.5403, + "step": 9530 + }, + { + "epoch": 0.7007057785619761, + "grad_norm": 0.8496243357658386, + "learning_rate": 4.836658984086745e-06, + "loss": 0.5666, + "step": 9531 + }, + { + "epoch": 0.700779297162182, + "grad_norm": 0.8479152917861938, + "learning_rate": 4.836624725402068e-06, + "loss": 0.558, + "step": 9532 + }, + { + "epoch": 0.7008528157623879, + "grad_norm": 0.8516320586204529, + "learning_rate": 4.836590463246463e-06, + "loss": 0.5914, + "step": 9533 + }, + { + "epoch": 0.7009263343625938, + "grad_norm": 0.8275769352912903, + "learning_rate": 4.836556197619981e-06, + "loss": 0.5354, + "step": 9534 + }, + { + "epoch": 0.7009998529627995, + "grad_norm": 0.8109186887741089, + "learning_rate": 4.836521928522674e-06, + "loss": 0.4784, + "step": 9535 + }, + { + "epoch": 0.7010733715630054, + "grad_norm": 0.8208656907081604, + "learning_rate": 4.83648765595459e-06, + "loss": 0.4944, + "step": 9536 + }, + { + "epoch": 0.7011468901632113, + "grad_norm": 0.8695296049118042, + "learning_rate": 4.836453379915782e-06, + "loss": 0.5386, + "step": 9537 + }, + { + "epoch": 0.7012204087634172, + "grad_norm": 0.8631294965744019, + "learning_rate": 4.836419100406301e-06, + "loss": 0.5848, + "step": 9538 + }, + { + "epoch": 0.701293927363623, + "grad_norm": 0.843903660774231, + "learning_rate": 4.836384817426196e-06, + "loss": 0.5926, + "step": 9539 + }, + { + "epoch": 0.7013674459638288, + "grad_norm": 0.7990731000900269, + "learning_rate": 4.836350530975521e-06, + "loss": 0.5374, + "step": 9540 + }, + { + "epoch": 0.7014409645640347, + "grad_norm": 0.8397380113601685, + "learning_rate": 4.836316241054324e-06, + "loss": 0.557, + "step": 9541 + }, + { + "epoch": 0.7015144831642406, + "grad_norm": 0.7814088463783264, + "learning_rate": 4.836281947662658e-06, + "loss": 0.4991, + "step": 9542 + }, + { + "epoch": 0.7015880017644464, + "grad_norm": 0.7844279408454895, + "learning_rate": 4.8362476508005726e-06, + "loss": 0.4929, + "step": 9543 + }, + { + "epoch": 0.7016615203646522, + "grad_norm": 0.8311430215835571, + "learning_rate": 4.8362133504681194e-06, + "loss": 0.5239, + "step": 9544 + }, + { + "epoch": 0.7017350389648581, + "grad_norm": 0.8030425906181335, + "learning_rate": 4.8361790466653494e-06, + "loss": 0.5324, + "step": 9545 + }, + { + "epoch": 0.701808557565064, + "grad_norm": 0.8617026209831238, + "learning_rate": 4.8361447393923126e-06, + "loss": 0.568, + "step": 9546 + }, + { + "epoch": 0.7018820761652698, + "grad_norm": 0.833361804485321, + "learning_rate": 4.836110428649061e-06, + "loss": 0.5647, + "step": 9547 + }, + { + "epoch": 0.7019555947654756, + "grad_norm": 0.9205982685089111, + "learning_rate": 4.836076114435646e-06, + "loss": 0.6055, + "step": 9548 + }, + { + "epoch": 0.7020291133656815, + "grad_norm": 0.9000717997550964, + "learning_rate": 4.8360417967521165e-06, + "loss": 0.529, + "step": 9549 + }, + { + "epoch": 0.7021026319658874, + "grad_norm": 0.8913305997848511, + "learning_rate": 4.836007475598526e-06, + "loss": 0.5033, + "step": 9550 + }, + { + "epoch": 0.7021761505660932, + "grad_norm": 0.7833141088485718, + "learning_rate": 4.835973150974924e-06, + "loss": 0.5107, + "step": 9551 + }, + { + "epoch": 0.702249669166299, + "grad_norm": 0.8568514585494995, + "learning_rate": 4.835938822881362e-06, + "loss": 0.5711, + "step": 9552 + }, + { + "epoch": 0.7023231877665049, + "grad_norm": 0.7922803163528442, + "learning_rate": 4.83590449131789e-06, + "loss": 0.5149, + "step": 9553 + }, + { + "epoch": 0.7023967063667108, + "grad_norm": 0.7716691493988037, + "learning_rate": 4.83587015628456e-06, + "loss": 0.5109, + "step": 9554 + }, + { + "epoch": 0.7024702249669166, + "grad_norm": 0.8655028343200684, + "learning_rate": 4.835835817781424e-06, + "loss": 0.5681, + "step": 9555 + }, + { + "epoch": 0.7025437435671225, + "grad_norm": 0.8062220811843872, + "learning_rate": 4.83580147580853e-06, + "loss": 0.5807, + "step": 9556 + }, + { + "epoch": 0.7026172621673283, + "grad_norm": 0.8493865132331848, + "learning_rate": 4.835767130365932e-06, + "loss": 0.5619, + "step": 9557 + }, + { + "epoch": 0.7026907807675342, + "grad_norm": 0.8193820714950562, + "learning_rate": 4.83573278145368e-06, + "loss": 0.5312, + "step": 9558 + }, + { + "epoch": 0.70276429936774, + "grad_norm": 0.8001705408096313, + "learning_rate": 4.835698429071824e-06, + "loss": 0.5066, + "step": 9559 + }, + { + "epoch": 0.7028378179679459, + "grad_norm": 0.876579999923706, + "learning_rate": 4.835664073220416e-06, + "loss": 0.5785, + "step": 9560 + }, + { + "epoch": 0.7029113365681517, + "grad_norm": 0.8394936323165894, + "learning_rate": 4.835629713899508e-06, + "loss": 0.5947, + "step": 9561 + }, + { + "epoch": 0.7029848551683576, + "grad_norm": 0.8125177025794983, + "learning_rate": 4.835595351109149e-06, + "loss": 0.5123, + "step": 9562 + }, + { + "epoch": 0.7030583737685634, + "grad_norm": 0.8002403974533081, + "learning_rate": 4.835560984849391e-06, + "loss": 0.4778, + "step": 9563 + }, + { + "epoch": 0.7031318923687693, + "grad_norm": 0.821810245513916, + "learning_rate": 4.835526615120285e-06, + "loss": 0.5542, + "step": 9564 + }, + { + "epoch": 0.7032054109689752, + "grad_norm": 0.7906498312950134, + "learning_rate": 4.8354922419218825e-06, + "loss": 0.5043, + "step": 9565 + }, + { + "epoch": 0.703278929569181, + "grad_norm": 0.8292668461799622, + "learning_rate": 4.835457865254234e-06, + "loss": 0.5384, + "step": 9566 + }, + { + "epoch": 0.7033524481693868, + "grad_norm": 0.8238008618354797, + "learning_rate": 4.835423485117391e-06, + "loss": 0.497, + "step": 9567 + }, + { + "epoch": 0.7034259667695927, + "grad_norm": 0.8533749580383301, + "learning_rate": 4.835389101511404e-06, + "loss": 0.5161, + "step": 9568 + }, + { + "epoch": 0.7034994853697986, + "grad_norm": 0.8293826580047607, + "learning_rate": 4.835354714436325e-06, + "loss": 0.5708, + "step": 9569 + }, + { + "epoch": 0.7035730039700044, + "grad_norm": 0.7862104773521423, + "learning_rate": 4.835320323892203e-06, + "loss": 0.4865, + "step": 9570 + }, + { + "epoch": 0.7036465225702102, + "grad_norm": 0.858702540397644, + "learning_rate": 4.835285929879091e-06, + "loss": 0.4847, + "step": 9571 + }, + { + "epoch": 0.7037200411704161, + "grad_norm": 0.7739377617835999, + "learning_rate": 4.835251532397041e-06, + "loss": 0.503, + "step": 9572 + }, + { + "epoch": 0.703793559770622, + "grad_norm": 0.85347580909729, + "learning_rate": 4.835217131446101e-06, + "loss": 0.5529, + "step": 9573 + }, + { + "epoch": 0.7038670783708278, + "grad_norm": 0.8566752076148987, + "learning_rate": 4.835182727026324e-06, + "loss": 0.523, + "step": 9574 + }, + { + "epoch": 0.7039405969710336, + "grad_norm": 0.7834494113922119, + "learning_rate": 4.8351483191377616e-06, + "loss": 0.5067, + "step": 9575 + }, + { + "epoch": 0.7040141155712395, + "grad_norm": 0.8683642745018005, + "learning_rate": 4.835113907780464e-06, + "loss": 0.5745, + "step": 9576 + }, + { + "epoch": 0.7040876341714454, + "grad_norm": 0.8486848473548889, + "learning_rate": 4.835079492954483e-06, + "loss": 0.5983, + "step": 9577 + }, + { + "epoch": 0.7041611527716513, + "grad_norm": 0.8431848287582397, + "learning_rate": 4.835045074659868e-06, + "loss": 0.5122, + "step": 9578 + }, + { + "epoch": 0.7042346713718571, + "grad_norm": 0.8156660199165344, + "learning_rate": 4.835010652896674e-06, + "loss": 0.5071, + "step": 9579 + }, + { + "epoch": 0.7043081899720629, + "grad_norm": 0.8357359170913696, + "learning_rate": 4.834976227664947e-06, + "loss": 0.532, + "step": 9580 + }, + { + "epoch": 0.7043817085722688, + "grad_norm": 0.8508239388465881, + "learning_rate": 4.834941798964742e-06, + "loss": 0.5709, + "step": 9581 + }, + { + "epoch": 0.7044552271724747, + "grad_norm": 0.8220207095146179, + "learning_rate": 4.834907366796108e-06, + "loss": 0.5521, + "step": 9582 + }, + { + "epoch": 0.7045287457726805, + "grad_norm": 0.8394476175308228, + "learning_rate": 4.834872931159098e-06, + "loss": 0.526, + "step": 9583 + }, + { + "epoch": 0.7046022643728863, + "grad_norm": 0.8674471974372864, + "learning_rate": 4.834838492053761e-06, + "loss": 0.5282, + "step": 9584 + }, + { + "epoch": 0.7046757829730922, + "grad_norm": 0.840364396572113, + "learning_rate": 4.8348040494801496e-06, + "loss": 0.5547, + "step": 9585 + }, + { + "epoch": 0.7047493015732981, + "grad_norm": 0.8102839589118958, + "learning_rate": 4.834769603438315e-06, + "loss": 0.4924, + "step": 9586 + }, + { + "epoch": 0.704822820173504, + "grad_norm": 0.8606665134429932, + "learning_rate": 4.834735153928308e-06, + "loss": 0.5513, + "step": 9587 + }, + { + "epoch": 0.7048963387737097, + "grad_norm": 0.8336743116378784, + "learning_rate": 4.834700700950179e-06, + "loss": 0.526, + "step": 9588 + }, + { + "epoch": 0.7049698573739156, + "grad_norm": 0.8839137554168701, + "learning_rate": 4.834666244503981e-06, + "loss": 0.5427, + "step": 9589 + }, + { + "epoch": 0.7050433759741215, + "grad_norm": 0.8271543383598328, + "learning_rate": 4.834631784589763e-06, + "loss": 0.5153, + "step": 9590 + }, + { + "epoch": 0.7051168945743274, + "grad_norm": 0.7920228838920593, + "learning_rate": 4.834597321207578e-06, + "loss": 0.5074, + "step": 9591 + }, + { + "epoch": 0.7051904131745331, + "grad_norm": 0.791559100151062, + "learning_rate": 4.834562854357477e-06, + "loss": 0.5118, + "step": 9592 + }, + { + "epoch": 0.705263931774739, + "grad_norm": 0.8054267168045044, + "learning_rate": 4.83452838403951e-06, + "loss": 0.5328, + "step": 9593 + }, + { + "epoch": 0.7053374503749449, + "grad_norm": 0.8500621914863586, + "learning_rate": 4.834493910253729e-06, + "loss": 0.5666, + "step": 9594 + }, + { + "epoch": 0.7054109689751508, + "grad_norm": 0.7944871783256531, + "learning_rate": 4.834459433000185e-06, + "loss": 0.534, + "step": 9595 + }, + { + "epoch": 0.7054844875753565, + "grad_norm": 0.8574583530426025, + "learning_rate": 4.83442495227893e-06, + "loss": 0.5197, + "step": 9596 + }, + { + "epoch": 0.7055580061755624, + "grad_norm": 0.7979824542999268, + "learning_rate": 4.834390468090015e-06, + "loss": 0.5391, + "step": 9597 + }, + { + "epoch": 0.7056315247757683, + "grad_norm": 0.8280622363090515, + "learning_rate": 4.834355980433489e-06, + "loss": 0.5077, + "step": 9598 + }, + { + "epoch": 0.7057050433759742, + "grad_norm": 0.90082186460495, + "learning_rate": 4.834321489309406e-06, + "loss": 0.6154, + "step": 9599 + }, + { + "epoch": 0.7057785619761799, + "grad_norm": 0.7747737169265747, + "learning_rate": 4.834286994717816e-06, + "loss": 0.5686, + "step": 9600 + }, + { + "epoch": 0.7058520805763858, + "grad_norm": 0.8690893054008484, + "learning_rate": 4.8342524966587715e-06, + "loss": 0.5285, + "step": 9601 + }, + { + "epoch": 0.7059255991765917, + "grad_norm": 0.8350905179977417, + "learning_rate": 4.834217995132321e-06, + "loss": 0.5027, + "step": 9602 + }, + { + "epoch": 0.7059991177767976, + "grad_norm": 0.8482285737991333, + "learning_rate": 4.834183490138519e-06, + "loss": 0.5564, + "step": 9603 + }, + { + "epoch": 0.7060726363770033, + "grad_norm": 0.8021520972251892, + "learning_rate": 4.834148981677414e-06, + "loss": 0.5414, + "step": 9604 + }, + { + "epoch": 0.7061461549772092, + "grad_norm": 0.8288962841033936, + "learning_rate": 4.834114469749059e-06, + "loss": 0.5778, + "step": 9605 + }, + { + "epoch": 0.7062196735774151, + "grad_norm": 0.8395583629608154, + "learning_rate": 4.834079954353506e-06, + "loss": 0.5095, + "step": 9606 + }, + { + "epoch": 0.706293192177621, + "grad_norm": 0.8333902359008789, + "learning_rate": 4.834045435490804e-06, + "loss": 0.5578, + "step": 9607 + }, + { + "epoch": 0.7063667107778268, + "grad_norm": 0.8063961267471313, + "learning_rate": 4.834010913161005e-06, + "loss": 0.5391, + "step": 9608 + }, + { + "epoch": 0.7064402293780326, + "grad_norm": 0.8268665075302124, + "learning_rate": 4.833976387364161e-06, + "loss": 0.5339, + "step": 9609 + }, + { + "epoch": 0.7065137479782385, + "grad_norm": 0.8396983742713928, + "learning_rate": 4.833941858100323e-06, + "loss": 0.5262, + "step": 9610 + }, + { + "epoch": 0.7065872665784444, + "grad_norm": 0.8206280469894409, + "learning_rate": 4.833907325369542e-06, + "loss": 0.5186, + "step": 9611 + }, + { + "epoch": 0.7066607851786502, + "grad_norm": 0.8038507699966431, + "learning_rate": 4.833872789171869e-06, + "loss": 0.5147, + "step": 9612 + }, + { + "epoch": 0.706734303778856, + "grad_norm": 0.8608254194259644, + "learning_rate": 4.833838249507356e-06, + "loss": 0.5228, + "step": 9613 + }, + { + "epoch": 0.7068078223790619, + "grad_norm": 0.8429824113845825, + "learning_rate": 4.8338037063760546e-06, + "loss": 0.5592, + "step": 9614 + }, + { + "epoch": 0.7068813409792678, + "grad_norm": 0.8602674007415771, + "learning_rate": 4.833769159778015e-06, + "loss": 0.587, + "step": 9615 + }, + { + "epoch": 0.7069548595794736, + "grad_norm": 0.831386923789978, + "learning_rate": 4.8337346097132895e-06, + "loss": 0.4945, + "step": 9616 + }, + { + "epoch": 0.7070283781796795, + "grad_norm": 0.8705587983131409, + "learning_rate": 4.8337000561819285e-06, + "loss": 0.5253, + "step": 9617 + }, + { + "epoch": 0.7071018967798853, + "grad_norm": 0.8576956987380981, + "learning_rate": 4.833665499183984e-06, + "loss": 0.5326, + "step": 9618 + }, + { + "epoch": 0.7071754153800912, + "grad_norm": 0.8109095096588135, + "learning_rate": 4.833630938719508e-06, + "loss": 0.4963, + "step": 9619 + }, + { + "epoch": 0.707248933980297, + "grad_norm": 0.8563196063041687, + "learning_rate": 4.833596374788551e-06, + "loss": 0.5811, + "step": 9620 + }, + { + "epoch": 0.7073224525805029, + "grad_norm": 0.8091835379600525, + "learning_rate": 4.833561807391163e-06, + "loss": 0.5375, + "step": 9621 + }, + { + "epoch": 0.7073959711807087, + "grad_norm": 0.9267717003822327, + "learning_rate": 4.833527236527398e-06, + "loss": 0.5411, + "step": 9622 + }, + { + "epoch": 0.7074694897809146, + "grad_norm": 0.8507131338119507, + "learning_rate": 4.8334926621973056e-06, + "loss": 0.5019, + "step": 9623 + }, + { + "epoch": 0.7075430083811204, + "grad_norm": 0.7793010473251343, + "learning_rate": 4.8334580844009375e-06, + "loss": 0.5113, + "step": 9624 + }, + { + "epoch": 0.7076165269813263, + "grad_norm": 0.7767878770828247, + "learning_rate": 4.8334235031383455e-06, + "loss": 0.5314, + "step": 9625 + }, + { + "epoch": 0.7076900455815321, + "grad_norm": 0.7992138862609863, + "learning_rate": 4.83338891840958e-06, + "loss": 0.5544, + "step": 9626 + }, + { + "epoch": 0.707763564181738, + "grad_norm": 0.8213288187980652, + "learning_rate": 4.8333543302146935e-06, + "loss": 0.5244, + "step": 9627 + }, + { + "epoch": 0.7078370827819438, + "grad_norm": 0.8690918684005737, + "learning_rate": 4.833319738553737e-06, + "loss": 0.5574, + "step": 9628 + }, + { + "epoch": 0.7079106013821497, + "grad_norm": 0.8173255920410156, + "learning_rate": 4.833285143426762e-06, + "loss": 0.5295, + "step": 9629 + }, + { + "epoch": 0.7079841199823556, + "grad_norm": 0.8490659594535828, + "learning_rate": 4.833250544833819e-06, + "loss": 0.5698, + "step": 9630 + }, + { + "epoch": 0.7080576385825614, + "grad_norm": 0.833594024181366, + "learning_rate": 4.833215942774961e-06, + "loss": 0.5116, + "step": 9631 + }, + { + "epoch": 0.7081311571827672, + "grad_norm": 0.8773328065872192, + "learning_rate": 4.833181337250238e-06, + "loss": 0.5435, + "step": 9632 + }, + { + "epoch": 0.7082046757829731, + "grad_norm": 0.8168200254440308, + "learning_rate": 4.833146728259702e-06, + "loss": 0.5262, + "step": 9633 + }, + { + "epoch": 0.708278194383179, + "grad_norm": 0.7964973449707031, + "learning_rate": 4.8331121158034035e-06, + "loss": 0.5577, + "step": 9634 + }, + { + "epoch": 0.7083517129833848, + "grad_norm": 0.8687986135482788, + "learning_rate": 4.8330774998813955e-06, + "loss": 0.603, + "step": 9635 + }, + { + "epoch": 0.7084252315835906, + "grad_norm": 0.8382397890090942, + "learning_rate": 4.833042880493729e-06, + "loss": 0.5808, + "step": 9636 + }, + { + "epoch": 0.7084987501837965, + "grad_norm": 0.8193957805633545, + "learning_rate": 4.833008257640455e-06, + "loss": 0.5077, + "step": 9637 + }, + { + "epoch": 0.7085722687840024, + "grad_norm": 0.7963986992835999, + "learning_rate": 4.832973631321623e-06, + "loss": 0.5423, + "step": 9638 + }, + { + "epoch": 0.7086457873842082, + "grad_norm": 0.860223650932312, + "learning_rate": 4.832939001537289e-06, + "loss": 0.5467, + "step": 9639 + }, + { + "epoch": 0.708719305984414, + "grad_norm": 0.8747392892837524, + "learning_rate": 4.832904368287501e-06, + "loss": 0.5384, + "step": 9640 + }, + { + "epoch": 0.7087928245846199, + "grad_norm": 0.8175116777420044, + "learning_rate": 4.832869731572311e-06, + "loss": 0.5457, + "step": 9641 + }, + { + "epoch": 0.7088663431848258, + "grad_norm": 0.7886648178100586, + "learning_rate": 4.8328350913917706e-06, + "loss": 0.5118, + "step": 9642 + }, + { + "epoch": 0.7089398617850317, + "grad_norm": 0.8057457804679871, + "learning_rate": 4.832800447745932e-06, + "loss": 0.5109, + "step": 9643 + }, + { + "epoch": 0.7090133803852374, + "grad_norm": 0.7927117943763733, + "learning_rate": 4.8327658006348454e-06, + "loss": 0.5207, + "step": 9644 + }, + { + "epoch": 0.7090868989854433, + "grad_norm": 0.8112571239471436, + "learning_rate": 4.832731150058564e-06, + "loss": 0.5344, + "step": 9645 + }, + { + "epoch": 0.7091604175856492, + "grad_norm": 0.7689523696899414, + "learning_rate": 4.8326964960171375e-06, + "loss": 0.48, + "step": 9646 + }, + { + "epoch": 0.7092339361858551, + "grad_norm": 0.8528713583946228, + "learning_rate": 4.832661838510618e-06, + "loss": 0.5455, + "step": 9647 + }, + { + "epoch": 0.7093074547860608, + "grad_norm": 0.8225540518760681, + "learning_rate": 4.832627177539057e-06, + "loss": 0.5487, + "step": 9648 + }, + { + "epoch": 0.7093809733862667, + "grad_norm": 0.8331884145736694, + "learning_rate": 4.832592513102507e-06, + "loss": 0.5865, + "step": 9649 + }, + { + "epoch": 0.7094544919864726, + "grad_norm": 0.8432352542877197, + "learning_rate": 4.8325578452010166e-06, + "loss": 0.5709, + "step": 9650 + }, + { + "epoch": 0.7095280105866785, + "grad_norm": 0.817959725856781, + "learning_rate": 4.832523173834641e-06, + "loss": 0.5072, + "step": 9651 + }, + { + "epoch": 0.7096015291868842, + "grad_norm": 0.8166900873184204, + "learning_rate": 4.832488499003429e-06, + "loss": 0.517, + "step": 9652 + }, + { + "epoch": 0.7096750477870901, + "grad_norm": 0.8441641330718994, + "learning_rate": 4.832453820707433e-06, + "loss": 0.567, + "step": 9653 + }, + { + "epoch": 0.709748566387296, + "grad_norm": 0.7899482846260071, + "learning_rate": 4.832419138946705e-06, + "loss": 0.5399, + "step": 9654 + }, + { + "epoch": 0.7098220849875019, + "grad_norm": 0.9232410192489624, + "learning_rate": 4.832384453721297e-06, + "loss": 0.506, + "step": 9655 + }, + { + "epoch": 0.7098956035877076, + "grad_norm": 0.8358385562896729, + "learning_rate": 4.832349765031259e-06, + "loss": 0.5324, + "step": 9656 + }, + { + "epoch": 0.7099691221879135, + "grad_norm": 0.8883395195007324, + "learning_rate": 4.832315072876642e-06, + "loss": 0.6235, + "step": 9657 + }, + { + "epoch": 0.7100426407881194, + "grad_norm": 0.789175808429718, + "learning_rate": 4.8322803772575e-06, + "loss": 0.5178, + "step": 9658 + }, + { + "epoch": 0.7101161593883253, + "grad_norm": 0.8162773847579956, + "learning_rate": 4.832245678173882e-06, + "loss": 0.5323, + "step": 9659 + }, + { + "epoch": 0.710189677988531, + "grad_norm": 0.8037261366844177, + "learning_rate": 4.832210975625842e-06, + "loss": 0.5629, + "step": 9660 + }, + { + "epoch": 0.7102631965887369, + "grad_norm": 0.8244509100914001, + "learning_rate": 4.83217626961343e-06, + "loss": 0.5451, + "step": 9661 + }, + { + "epoch": 0.7103367151889428, + "grad_norm": 0.8495389223098755, + "learning_rate": 4.832141560136698e-06, + "loss": 0.5266, + "step": 9662 + }, + { + "epoch": 0.7104102337891487, + "grad_norm": 0.8144846558570862, + "learning_rate": 4.832106847195697e-06, + "loss": 0.5525, + "step": 9663 + }, + { + "epoch": 0.7104837523893545, + "grad_norm": 0.8327447772026062, + "learning_rate": 4.832072130790479e-06, + "loss": 0.5303, + "step": 9664 + }, + { + "epoch": 0.7105572709895603, + "grad_norm": 0.7904263734817505, + "learning_rate": 4.8320374109210954e-06, + "loss": 0.5562, + "step": 9665 + }, + { + "epoch": 0.7106307895897662, + "grad_norm": 0.8873912692070007, + "learning_rate": 4.832002687587598e-06, + "loss": 0.5801, + "step": 9666 + }, + { + "epoch": 0.7107043081899721, + "grad_norm": 0.8386473655700684, + "learning_rate": 4.83196796079004e-06, + "loss": 0.5389, + "step": 9667 + }, + { + "epoch": 0.7107778267901779, + "grad_norm": 0.8687477707862854, + "learning_rate": 4.831933230528468e-06, + "loss": 0.5189, + "step": 9668 + }, + { + "epoch": 0.7108513453903837, + "grad_norm": 0.8493657112121582, + "learning_rate": 4.831898496802939e-06, + "loss": 0.557, + "step": 9669 + }, + { + "epoch": 0.7109248639905896, + "grad_norm": 0.8181754946708679, + "learning_rate": 4.831863759613502e-06, + "loss": 0.4939, + "step": 9670 + }, + { + "epoch": 0.7109983825907955, + "grad_norm": 0.8568223118782043, + "learning_rate": 4.831829018960209e-06, + "loss": 0.5366, + "step": 9671 + }, + { + "epoch": 0.7110719011910013, + "grad_norm": 0.8254125118255615, + "learning_rate": 4.831794274843112e-06, + "loss": 0.5498, + "step": 9672 + }, + { + "epoch": 0.7111454197912072, + "grad_norm": 0.8832051157951355, + "learning_rate": 4.831759527262263e-06, + "loss": 0.5673, + "step": 9673 + }, + { + "epoch": 0.711218938391413, + "grad_norm": 0.8426378965377808, + "learning_rate": 4.831724776217712e-06, + "loss": 0.5824, + "step": 9674 + }, + { + "epoch": 0.7112924569916189, + "grad_norm": 0.8268963098526001, + "learning_rate": 4.831690021709511e-06, + "loss": 0.553, + "step": 9675 + }, + { + "epoch": 0.7113659755918247, + "grad_norm": 0.8441260457038879, + "learning_rate": 4.831655263737713e-06, + "loss": 0.4875, + "step": 9676 + }, + { + "epoch": 0.7114394941920306, + "grad_norm": 0.7688295841217041, + "learning_rate": 4.831620502302368e-06, + "loss": 0.5026, + "step": 9677 + }, + { + "epoch": 0.7115130127922364, + "grad_norm": 0.8167409300804138, + "learning_rate": 4.83158573740353e-06, + "loss": 0.5272, + "step": 9678 + }, + { + "epoch": 0.7115865313924423, + "grad_norm": 0.8061999678611755, + "learning_rate": 4.831550969041248e-06, + "loss": 0.5638, + "step": 9679 + }, + { + "epoch": 0.7116600499926481, + "grad_norm": 0.8123975396156311, + "learning_rate": 4.8315161972155746e-06, + "loss": 0.5128, + "step": 9680 + }, + { + "epoch": 0.711733568592854, + "grad_norm": 0.8601416945457458, + "learning_rate": 4.831481421926561e-06, + "loss": 0.6004, + "step": 9681 + }, + { + "epoch": 0.7118070871930599, + "grad_norm": 0.7829545140266418, + "learning_rate": 4.83144664317426e-06, + "loss": 0.5173, + "step": 9682 + }, + { + "epoch": 0.7118806057932657, + "grad_norm": 0.8027050495147705, + "learning_rate": 4.831411860958724e-06, + "loss": 0.5041, + "step": 9683 + }, + { + "epoch": 0.7119541243934715, + "grad_norm": 0.8858834505081177, + "learning_rate": 4.831377075280002e-06, + "loss": 0.5618, + "step": 9684 + }, + { + "epoch": 0.7120276429936774, + "grad_norm": 0.8573254346847534, + "learning_rate": 4.831342286138146e-06, + "loss": 0.5841, + "step": 9685 + }, + { + "epoch": 0.7121011615938833, + "grad_norm": 0.826788604259491, + "learning_rate": 4.83130749353321e-06, + "loss": 0.5554, + "step": 9686 + }, + { + "epoch": 0.7121746801940891, + "grad_norm": 0.8661655187606812, + "learning_rate": 4.831272697465245e-06, + "loss": 0.5862, + "step": 9687 + }, + { + "epoch": 0.7122481987942949, + "grad_norm": 0.7933598756790161, + "learning_rate": 4.831237897934301e-06, + "loss": 0.5206, + "step": 9688 + }, + { + "epoch": 0.7123217173945008, + "grad_norm": 0.8243324160575867, + "learning_rate": 4.831203094940431e-06, + "loss": 0.5127, + "step": 9689 + }, + { + "epoch": 0.7123952359947067, + "grad_norm": 0.790665864944458, + "learning_rate": 4.831168288483686e-06, + "loss": 0.5104, + "step": 9690 + }, + { + "epoch": 0.7124687545949125, + "grad_norm": 0.8179237842559814, + "learning_rate": 4.831133478564119e-06, + "loss": 0.5384, + "step": 9691 + }, + { + "epoch": 0.7125422731951183, + "grad_norm": 0.8635038137435913, + "learning_rate": 4.83109866518178e-06, + "loss": 0.5779, + "step": 9692 + }, + { + "epoch": 0.7126157917953242, + "grad_norm": 0.8411722779273987, + "learning_rate": 4.831063848336722e-06, + "loss": 0.5178, + "step": 9693 + }, + { + "epoch": 0.7126893103955301, + "grad_norm": 0.8075034022331238, + "learning_rate": 4.831029028028997e-06, + "loss": 0.5486, + "step": 9694 + }, + { + "epoch": 0.712762828995736, + "grad_norm": 0.866158664226532, + "learning_rate": 4.830994204258656e-06, + "loss": 0.5569, + "step": 9695 + }, + { + "epoch": 0.7128363475959417, + "grad_norm": 0.809913158416748, + "learning_rate": 4.830959377025749e-06, + "loss": 0.505, + "step": 9696 + }, + { + "epoch": 0.7129098661961476, + "grad_norm": 0.8224928379058838, + "learning_rate": 4.8309245463303315e-06, + "loss": 0.502, + "step": 9697 + }, + { + "epoch": 0.7129833847963535, + "grad_norm": 0.794950008392334, + "learning_rate": 4.830889712172452e-06, + "loss": 0.5731, + "step": 9698 + }, + { + "epoch": 0.7130569033965594, + "grad_norm": 0.8408983945846558, + "learning_rate": 4.830854874552164e-06, + "loss": 0.5376, + "step": 9699 + }, + { + "epoch": 0.7131304219967651, + "grad_norm": 0.7958944439888, + "learning_rate": 4.8308200334695185e-06, + "loss": 0.4798, + "step": 9700 + }, + { + "epoch": 0.713203940596971, + "grad_norm": 0.7940548062324524, + "learning_rate": 4.830785188924567e-06, + "loss": 0.5191, + "step": 9701 + }, + { + "epoch": 0.7132774591971769, + "grad_norm": 0.8574374914169312, + "learning_rate": 4.830750340917363e-06, + "loss": 0.5501, + "step": 9702 + }, + { + "epoch": 0.7133509777973828, + "grad_norm": 0.8798186182975769, + "learning_rate": 4.830715489447956e-06, + "loss": 0.56, + "step": 9703 + }, + { + "epoch": 0.7134244963975885, + "grad_norm": 0.9193743467330933, + "learning_rate": 4.8306806345164e-06, + "loss": 0.5574, + "step": 9704 + }, + { + "epoch": 0.7134980149977944, + "grad_norm": 0.8173635005950928, + "learning_rate": 4.8306457761227435e-06, + "loss": 0.5561, + "step": 9705 + }, + { + "epoch": 0.7135715335980003, + "grad_norm": 0.8928841948509216, + "learning_rate": 4.830610914267041e-06, + "loss": 0.569, + "step": 9706 + }, + { + "epoch": 0.7136450521982062, + "grad_norm": 0.8608110547065735, + "learning_rate": 4.830576048949345e-06, + "loss": 0.5687, + "step": 9707 + }, + { + "epoch": 0.7137185707984119, + "grad_norm": 0.8028918504714966, + "learning_rate": 4.830541180169704e-06, + "loss": 0.533, + "step": 9708 + }, + { + "epoch": 0.7137920893986178, + "grad_norm": 0.878013551235199, + "learning_rate": 4.830506307928173e-06, + "loss": 0.5572, + "step": 9709 + }, + { + "epoch": 0.7138656079988237, + "grad_norm": 0.8620849251747131, + "learning_rate": 4.830471432224802e-06, + "loss": 0.5821, + "step": 9710 + }, + { + "epoch": 0.7139391265990296, + "grad_norm": 0.8260915875434875, + "learning_rate": 4.830436553059643e-06, + "loss": 0.4891, + "step": 9711 + }, + { + "epoch": 0.7140126451992354, + "grad_norm": 0.8022705912590027, + "learning_rate": 4.830401670432749e-06, + "loss": 0.4949, + "step": 9712 + }, + { + "epoch": 0.7140861637994412, + "grad_norm": 0.8339937925338745, + "learning_rate": 4.83036678434417e-06, + "loss": 0.5481, + "step": 9713 + }, + { + "epoch": 0.7141596823996471, + "grad_norm": 0.816699206829071, + "learning_rate": 4.8303318947939596e-06, + "loss": 0.5102, + "step": 9714 + }, + { + "epoch": 0.714233200999853, + "grad_norm": 0.8500043153762817, + "learning_rate": 4.830297001782169e-06, + "loss": 0.5302, + "step": 9715 + }, + { + "epoch": 0.7143067196000589, + "grad_norm": 0.8243101239204407, + "learning_rate": 4.830262105308848e-06, + "loss": 0.5334, + "step": 9716 + }, + { + "epoch": 0.7143802382002646, + "grad_norm": 0.7780969142913818, + "learning_rate": 4.830227205374052e-06, + "loss": 0.5385, + "step": 9717 + }, + { + "epoch": 0.7144537568004705, + "grad_norm": 0.8495100736618042, + "learning_rate": 4.83019230197783e-06, + "loss": 0.5648, + "step": 9718 + }, + { + "epoch": 0.7145272754006764, + "grad_norm": 0.8531444668769836, + "learning_rate": 4.830157395120236e-06, + "loss": 0.5348, + "step": 9719 + }, + { + "epoch": 0.7146007940008823, + "grad_norm": 0.8482696413993835, + "learning_rate": 4.83012248480132e-06, + "loss": 0.5496, + "step": 9720 + }, + { + "epoch": 0.714674312601088, + "grad_norm": 0.8016955256462097, + "learning_rate": 4.830087571021135e-06, + "loss": 0.4807, + "step": 9721 + }, + { + "epoch": 0.7147478312012939, + "grad_norm": 0.7941135168075562, + "learning_rate": 4.830052653779732e-06, + "loss": 0.52, + "step": 9722 + }, + { + "epoch": 0.7148213498014998, + "grad_norm": 0.8396117091178894, + "learning_rate": 4.830017733077163e-06, + "loss": 0.5398, + "step": 9723 + }, + { + "epoch": 0.7148948684017057, + "grad_norm": 0.8291444182395935, + "learning_rate": 4.829982808913482e-06, + "loss": 0.5572, + "step": 9724 + }, + { + "epoch": 0.7149683870019115, + "grad_norm": 0.8287760615348816, + "learning_rate": 4.829947881288738e-06, + "loss": 0.5174, + "step": 9725 + }, + { + "epoch": 0.7150419056021173, + "grad_norm": 0.854559063911438, + "learning_rate": 4.829912950202984e-06, + "loss": 0.5781, + "step": 9726 + }, + { + "epoch": 0.7151154242023232, + "grad_norm": 0.855105459690094, + "learning_rate": 4.8298780156562715e-06, + "loss": 0.5568, + "step": 9727 + }, + { + "epoch": 0.7151889428025291, + "grad_norm": 0.7507979273796082, + "learning_rate": 4.829843077648654e-06, + "loss": 0.468, + "step": 9728 + }, + { + "epoch": 0.7152624614027349, + "grad_norm": 0.80118328332901, + "learning_rate": 4.8298081361801815e-06, + "loss": 0.4827, + "step": 9729 + }, + { + "epoch": 0.7153359800029407, + "grad_norm": 0.8244906067848206, + "learning_rate": 4.829773191250906e-06, + "loss": 0.5325, + "step": 9730 + }, + { + "epoch": 0.7154094986031466, + "grad_norm": 0.7894810438156128, + "learning_rate": 4.82973824286088e-06, + "loss": 0.5591, + "step": 9731 + }, + { + "epoch": 0.7154830172033525, + "grad_norm": 0.7875646352767944, + "learning_rate": 4.8297032910101575e-06, + "loss": 0.5209, + "step": 9732 + }, + { + "epoch": 0.7155565358035583, + "grad_norm": 0.824031412601471, + "learning_rate": 4.829668335698786e-06, + "loss": 0.5725, + "step": 9733 + }, + { + "epoch": 0.7156300544037641, + "grad_norm": 0.8570422530174255, + "learning_rate": 4.829633376926821e-06, + "loss": 0.5345, + "step": 9734 + }, + { + "epoch": 0.71570357300397, + "grad_norm": 0.7895466685295105, + "learning_rate": 4.829598414694313e-06, + "loss": 0.5425, + "step": 9735 + }, + { + "epoch": 0.7157770916041759, + "grad_norm": 0.8369321823120117, + "learning_rate": 4.829563449001313e-06, + "loss": 0.5389, + "step": 9736 + }, + { + "epoch": 0.7158506102043817, + "grad_norm": 0.7847894430160522, + "learning_rate": 4.8295284798478755e-06, + "loss": 0.5022, + "step": 9737 + }, + { + "epoch": 0.7159241288045876, + "grad_norm": 0.811890721321106, + "learning_rate": 4.82949350723405e-06, + "loss": 0.5514, + "step": 9738 + }, + { + "epoch": 0.7159976474047934, + "grad_norm": 0.847497284412384, + "learning_rate": 4.82945853115989e-06, + "loss": 0.5105, + "step": 9739 + }, + { + "epoch": 0.7160711660049993, + "grad_norm": 0.7534061074256897, + "learning_rate": 4.829423551625447e-06, + "loss": 0.4769, + "step": 9740 + }, + { + "epoch": 0.7161446846052051, + "grad_norm": 0.8219832181930542, + "learning_rate": 4.829388568630773e-06, + "loss": 0.547, + "step": 9741 + }, + { + "epoch": 0.716218203205411, + "grad_norm": 0.8846962451934814, + "learning_rate": 4.829353582175919e-06, + "loss": 0.5476, + "step": 9742 + }, + { + "epoch": 0.7162917218056168, + "grad_norm": 0.8783580660820007, + "learning_rate": 4.829318592260939e-06, + "loss": 0.5494, + "step": 9743 + }, + { + "epoch": 0.7163652404058227, + "grad_norm": 0.8545105457305908, + "learning_rate": 4.829283598885882e-06, + "loss": 0.5282, + "step": 9744 + }, + { + "epoch": 0.7164387590060285, + "grad_norm": 0.8623099327087402, + "learning_rate": 4.8292486020508035e-06, + "loss": 0.5304, + "step": 9745 + }, + { + "epoch": 0.7165122776062344, + "grad_norm": 0.8315303325653076, + "learning_rate": 4.829213601755753e-06, + "loss": 0.5015, + "step": 9746 + }, + { + "epoch": 0.7165857962064403, + "grad_norm": 0.9087173342704773, + "learning_rate": 4.8291785980007834e-06, + "loss": 0.5863, + "step": 9747 + }, + { + "epoch": 0.7166593148066461, + "grad_norm": 0.7914242148399353, + "learning_rate": 4.829143590785947e-06, + "loss": 0.5117, + "step": 9748 + }, + { + "epoch": 0.7167328334068519, + "grad_norm": 0.8220587968826294, + "learning_rate": 4.8291085801112945e-06, + "loss": 0.5229, + "step": 9749 + }, + { + "epoch": 0.7168063520070578, + "grad_norm": 0.8477752804756165, + "learning_rate": 4.829073565976879e-06, + "loss": 0.5117, + "step": 9750 + }, + { + "epoch": 0.7168798706072637, + "grad_norm": 0.8001118302345276, + "learning_rate": 4.829038548382752e-06, + "loss": 0.4746, + "step": 9751 + }, + { + "epoch": 0.7169533892074695, + "grad_norm": 0.850363552570343, + "learning_rate": 4.829003527328966e-06, + "loss": 0.556, + "step": 9752 + }, + { + "epoch": 0.7170269078076753, + "grad_norm": 0.8398752808570862, + "learning_rate": 4.8289685028155734e-06, + "loss": 0.5276, + "step": 9753 + }, + { + "epoch": 0.7171004264078812, + "grad_norm": 0.8558363914489746, + "learning_rate": 4.828933474842625e-06, + "loss": 0.5478, + "step": 9754 + }, + { + "epoch": 0.7171739450080871, + "grad_norm": 0.8790417313575745, + "learning_rate": 4.828898443410173e-06, + "loss": 0.5692, + "step": 9755 + }, + { + "epoch": 0.717247463608293, + "grad_norm": 0.8442932367324829, + "learning_rate": 4.8288634085182705e-06, + "loss": 0.5216, + "step": 9756 + }, + { + "epoch": 0.7173209822084987, + "grad_norm": 0.9183232188224792, + "learning_rate": 4.828828370166969e-06, + "loss": 0.5408, + "step": 9757 + }, + { + "epoch": 0.7173945008087046, + "grad_norm": 0.8609585762023926, + "learning_rate": 4.82879332835632e-06, + "loss": 0.548, + "step": 9758 + }, + { + "epoch": 0.7174680194089105, + "grad_norm": 0.8355951905250549, + "learning_rate": 4.828758283086377e-06, + "loss": 0.5775, + "step": 9759 + }, + { + "epoch": 0.7175415380091164, + "grad_norm": 0.8200667500495911, + "learning_rate": 4.82872323435719e-06, + "loss": 0.5207, + "step": 9760 + }, + { + "epoch": 0.7176150566093221, + "grad_norm": 0.79792320728302, + "learning_rate": 4.828688182168813e-06, + "loss": 0.5255, + "step": 9761 + }, + { + "epoch": 0.717688575209528, + "grad_norm": 0.8614084720611572, + "learning_rate": 4.828653126521297e-06, + "loss": 0.5564, + "step": 9762 + }, + { + "epoch": 0.7177620938097339, + "grad_norm": 0.8147640824317932, + "learning_rate": 4.828618067414694e-06, + "loss": 0.5556, + "step": 9763 + }, + { + "epoch": 0.7178356124099398, + "grad_norm": 0.8486039638519287, + "learning_rate": 4.828583004849057e-06, + "loss": 0.5351, + "step": 9764 + }, + { + "epoch": 0.7179091310101455, + "grad_norm": 0.8406957387924194, + "learning_rate": 4.828547938824436e-06, + "loss": 0.5531, + "step": 9765 + }, + { + "epoch": 0.7179826496103514, + "grad_norm": 0.7953202724456787, + "learning_rate": 4.8285128693408855e-06, + "loss": 0.4938, + "step": 9766 + }, + { + "epoch": 0.7180561682105573, + "grad_norm": 0.7939335107803345, + "learning_rate": 4.828477796398458e-06, + "loss": 0.4985, + "step": 9767 + }, + { + "epoch": 0.7181296868107632, + "grad_norm": 0.8465196490287781, + "learning_rate": 4.828442719997202e-06, + "loss": 0.5609, + "step": 9768 + }, + { + "epoch": 0.7182032054109689, + "grad_norm": 0.8438934087753296, + "learning_rate": 4.828407640137173e-06, + "loss": 0.5465, + "step": 9769 + }, + { + "epoch": 0.7182767240111748, + "grad_norm": 0.8229069709777832, + "learning_rate": 4.828372556818421e-06, + "loss": 0.5467, + "step": 9770 + }, + { + "epoch": 0.7183502426113807, + "grad_norm": 0.8691814541816711, + "learning_rate": 4.828337470041e-06, + "loss": 0.4997, + "step": 9771 + }, + { + "epoch": 0.7184237612115866, + "grad_norm": 0.8436464667320251, + "learning_rate": 4.828302379804961e-06, + "loss": 0.5145, + "step": 9772 + }, + { + "epoch": 0.7184972798117923, + "grad_norm": 0.8739370107650757, + "learning_rate": 4.828267286110357e-06, + "loss": 0.5334, + "step": 9773 + }, + { + "epoch": 0.7185707984119982, + "grad_norm": 0.8303817510604858, + "learning_rate": 4.828232188957238e-06, + "loss": 0.5131, + "step": 9774 + }, + { + "epoch": 0.7186443170122041, + "grad_norm": 0.9195980429649353, + "learning_rate": 4.828197088345658e-06, + "loss": 0.6117, + "step": 9775 + }, + { + "epoch": 0.71871783561241, + "grad_norm": 0.8529105186462402, + "learning_rate": 4.8281619842756685e-06, + "loss": 0.5612, + "step": 9776 + }, + { + "epoch": 0.7187913542126158, + "grad_norm": 0.7997511625289917, + "learning_rate": 4.828126876747324e-06, + "loss": 0.5566, + "step": 9777 + }, + { + "epoch": 0.7188648728128216, + "grad_norm": 0.8020551204681396, + "learning_rate": 4.828091765760672e-06, + "loss": 0.5273, + "step": 9778 + }, + { + "epoch": 0.7189383914130275, + "grad_norm": 0.8576171398162842, + "learning_rate": 4.828056651315768e-06, + "loss": 0.5246, + "step": 9779 + }, + { + "epoch": 0.7190119100132334, + "grad_norm": 0.8597192764282227, + "learning_rate": 4.828021533412664e-06, + "loss": 0.5256, + "step": 9780 + }, + { + "epoch": 0.7190854286134392, + "grad_norm": 0.8310818672180176, + "learning_rate": 4.8279864120514094e-06, + "loss": 0.5724, + "step": 9781 + }, + { + "epoch": 0.719158947213645, + "grad_norm": 0.8459697961807251, + "learning_rate": 4.82795128723206e-06, + "loss": 0.5616, + "step": 9782 + }, + { + "epoch": 0.7192324658138509, + "grad_norm": 0.8487840294837952, + "learning_rate": 4.827916158954667e-06, + "loss": 0.4809, + "step": 9783 + }, + { + "epoch": 0.7193059844140568, + "grad_norm": 0.916728138923645, + "learning_rate": 4.8278810272192814e-06, + "loss": 0.5511, + "step": 9784 + }, + { + "epoch": 0.7193795030142626, + "grad_norm": 0.8507381081581116, + "learning_rate": 4.827845892025955e-06, + "loss": 0.5448, + "step": 9785 + }, + { + "epoch": 0.7194530216144684, + "grad_norm": 0.8226279020309448, + "learning_rate": 4.827810753374743e-06, + "loss": 0.5085, + "step": 9786 + }, + { + "epoch": 0.7195265402146743, + "grad_norm": 0.8654252886772156, + "learning_rate": 4.827775611265694e-06, + "loss": 0.5219, + "step": 9787 + }, + { + "epoch": 0.7196000588148802, + "grad_norm": 0.8231862783432007, + "learning_rate": 4.827740465698862e-06, + "loss": 0.5262, + "step": 9788 + }, + { + "epoch": 0.719673577415086, + "grad_norm": 0.8923018574714661, + "learning_rate": 4.8277053166743e-06, + "loss": 0.5269, + "step": 9789 + }, + { + "epoch": 0.7197470960152919, + "grad_norm": 0.8341784477233887, + "learning_rate": 4.827670164192059e-06, + "loss": 0.4919, + "step": 9790 + }, + { + "epoch": 0.7198206146154977, + "grad_norm": 0.8012596964836121, + "learning_rate": 4.827635008252191e-06, + "loss": 0.4905, + "step": 9791 + }, + { + "epoch": 0.7198941332157036, + "grad_norm": 0.8163051009178162, + "learning_rate": 4.827599848854748e-06, + "loss": 0.5336, + "step": 9792 + }, + { + "epoch": 0.7199676518159094, + "grad_norm": 0.804246723651886, + "learning_rate": 4.827564685999784e-06, + "loss": 0.5529, + "step": 9793 + }, + { + "epoch": 0.7200411704161153, + "grad_norm": 0.8321088552474976, + "learning_rate": 4.8275295196873494e-06, + "loss": 0.5029, + "step": 9794 + }, + { + "epoch": 0.7201146890163211, + "grad_norm": 0.8440963625907898, + "learning_rate": 4.827494349917497e-06, + "loss": 0.5428, + "step": 9795 + }, + { + "epoch": 0.720188207616527, + "grad_norm": 0.9232602715492249, + "learning_rate": 4.827459176690279e-06, + "loss": 0.5998, + "step": 9796 + }, + { + "epoch": 0.7202617262167328, + "grad_norm": 0.8762557506561279, + "learning_rate": 4.827424000005749e-06, + "loss": 0.5884, + "step": 9797 + }, + { + "epoch": 0.7203352448169387, + "grad_norm": 0.8250618577003479, + "learning_rate": 4.8273888198639574e-06, + "loss": 0.5371, + "step": 9798 + }, + { + "epoch": 0.7204087634171445, + "grad_norm": 0.8467187285423279, + "learning_rate": 4.8273536362649575e-06, + "loss": 0.5664, + "step": 9799 + }, + { + "epoch": 0.7204822820173504, + "grad_norm": 0.8213855028152466, + "learning_rate": 4.827318449208801e-06, + "loss": 0.5516, + "step": 9800 + }, + { + "epoch": 0.7205558006175562, + "grad_norm": 0.8814006447792053, + "learning_rate": 4.827283258695541e-06, + "loss": 0.5711, + "step": 9801 + }, + { + "epoch": 0.7206293192177621, + "grad_norm": 0.8008385300636292, + "learning_rate": 4.827248064725227e-06, + "loss": 0.4941, + "step": 9802 + }, + { + "epoch": 0.720702837817968, + "grad_norm": 0.8531302809715271, + "learning_rate": 4.827212867297915e-06, + "loss": 0.5094, + "step": 9803 + }, + { + "epoch": 0.7207763564181738, + "grad_norm": 0.8252785801887512, + "learning_rate": 4.827177666413656e-06, + "loss": 0.5182, + "step": 9804 + }, + { + "epoch": 0.7208498750183796, + "grad_norm": 0.8142712712287903, + "learning_rate": 4.827142462072502e-06, + "loss": 0.5509, + "step": 9805 + }, + { + "epoch": 0.7209233936185855, + "grad_norm": 0.7904267311096191, + "learning_rate": 4.827107254274505e-06, + "loss": 0.4979, + "step": 9806 + }, + { + "epoch": 0.7209969122187914, + "grad_norm": 0.8608935475349426, + "learning_rate": 4.8270720430197175e-06, + "loss": 0.5155, + "step": 9807 + }, + { + "epoch": 0.7210704308189972, + "grad_norm": 0.8737178444862366, + "learning_rate": 4.827036828308192e-06, + "loss": 0.5236, + "step": 9808 + }, + { + "epoch": 0.721143949419203, + "grad_norm": 0.8672618269920349, + "learning_rate": 4.827001610139981e-06, + "loss": 0.5492, + "step": 9809 + }, + { + "epoch": 0.7212174680194089, + "grad_norm": 0.7969850301742554, + "learning_rate": 4.826966388515136e-06, + "loss": 0.5271, + "step": 9810 + }, + { + "epoch": 0.7212909866196148, + "grad_norm": 0.8419750332832336, + "learning_rate": 4.8269311634337105e-06, + "loss": 0.5896, + "step": 9811 + }, + { + "epoch": 0.7213645052198207, + "grad_norm": 0.8168864846229553, + "learning_rate": 4.826895934895755e-06, + "loss": 0.5619, + "step": 9812 + }, + { + "epoch": 0.7214380238200264, + "grad_norm": 0.7877843976020813, + "learning_rate": 4.826860702901325e-06, + "loss": 0.5002, + "step": 9813 + }, + { + "epoch": 0.7215115424202323, + "grad_norm": 0.8131833672523499, + "learning_rate": 4.8268254674504685e-06, + "loss": 0.5349, + "step": 9814 + }, + { + "epoch": 0.7215850610204382, + "grad_norm": 0.8393976092338562, + "learning_rate": 4.826790228543242e-06, + "loss": 0.5459, + "step": 9815 + }, + { + "epoch": 0.7216585796206441, + "grad_norm": 0.8843039274215698, + "learning_rate": 4.826754986179695e-06, + "loss": 0.5617, + "step": 9816 + }, + { + "epoch": 0.7217320982208498, + "grad_norm": 0.7994672656059265, + "learning_rate": 4.8267197403598805e-06, + "loss": 0.5548, + "step": 9817 + }, + { + "epoch": 0.7218056168210557, + "grad_norm": 0.8340211510658264, + "learning_rate": 4.8266844910838525e-06, + "loss": 0.4969, + "step": 9818 + }, + { + "epoch": 0.7218791354212616, + "grad_norm": 0.8004149794578552, + "learning_rate": 4.826649238351662e-06, + "loss": 0.5261, + "step": 9819 + }, + { + "epoch": 0.7219526540214675, + "grad_norm": 0.8185774683952332, + "learning_rate": 4.82661398216336e-06, + "loss": 0.5161, + "step": 9820 + }, + { + "epoch": 0.7220261726216732, + "grad_norm": 0.8005030155181885, + "learning_rate": 4.826578722519002e-06, + "loss": 0.504, + "step": 9821 + }, + { + "epoch": 0.7220996912218791, + "grad_norm": 0.8379709124565125, + "learning_rate": 4.826543459418638e-06, + "loss": 0.5355, + "step": 9822 + }, + { + "epoch": 0.722173209822085, + "grad_norm": 0.870300829410553, + "learning_rate": 4.826508192862321e-06, + "loss": 0.5493, + "step": 9823 + }, + { + "epoch": 0.7222467284222909, + "grad_norm": 0.8316874504089355, + "learning_rate": 4.8264729228501036e-06, + "loss": 0.5287, + "step": 9824 + }, + { + "epoch": 0.7223202470224966, + "grad_norm": 0.8424426317214966, + "learning_rate": 4.826437649382038e-06, + "loss": 0.5237, + "step": 9825 + }, + { + "epoch": 0.7223937656227025, + "grad_norm": 0.8000940084457397, + "learning_rate": 4.826402372458177e-06, + "loss": 0.5401, + "step": 9826 + }, + { + "epoch": 0.7224672842229084, + "grad_norm": 0.7858731150627136, + "learning_rate": 4.826367092078572e-06, + "loss": 0.4834, + "step": 9827 + }, + { + "epoch": 0.7225408028231143, + "grad_norm": 0.8350973725318909, + "learning_rate": 4.826331808243276e-06, + "loss": 0.5515, + "step": 9828 + }, + { + "epoch": 0.72261432142332, + "grad_norm": 0.8613260984420776, + "learning_rate": 4.826296520952343e-06, + "loss": 0.5106, + "step": 9829 + }, + { + "epoch": 0.7226878400235259, + "grad_norm": 0.8415156602859497, + "learning_rate": 4.826261230205822e-06, + "loss": 0.5121, + "step": 9830 + }, + { + "epoch": 0.7227613586237318, + "grad_norm": 0.8417580127716064, + "learning_rate": 4.826225936003769e-06, + "loss": 0.5564, + "step": 9831 + }, + { + "epoch": 0.7228348772239377, + "grad_norm": 0.8708354830741882, + "learning_rate": 4.826190638346233e-06, + "loss": 0.6116, + "step": 9832 + }, + { + "epoch": 0.7229083958241435, + "grad_norm": 0.8667568564414978, + "learning_rate": 4.82615533723327e-06, + "loss": 0.5055, + "step": 9833 + }, + { + "epoch": 0.7229819144243493, + "grad_norm": 0.7877839803695679, + "learning_rate": 4.826120032664929e-06, + "loss": 0.5065, + "step": 9834 + }, + { + "epoch": 0.7230554330245552, + "grad_norm": 0.8217963576316833, + "learning_rate": 4.826084724641265e-06, + "loss": 0.5369, + "step": 9835 + }, + { + "epoch": 0.7231289516247611, + "grad_norm": 0.854278564453125, + "learning_rate": 4.826049413162329e-06, + "loss": 0.5553, + "step": 9836 + }, + { + "epoch": 0.7232024702249669, + "grad_norm": 0.854922890663147, + "learning_rate": 4.826014098228175e-06, + "loss": 0.5325, + "step": 9837 + }, + { + "epoch": 0.7232759888251727, + "grad_norm": 0.8458940386772156, + "learning_rate": 4.825978779838853e-06, + "loss": 0.561, + "step": 9838 + }, + { + "epoch": 0.7233495074253786, + "grad_norm": 0.8273646831512451, + "learning_rate": 4.825943457994418e-06, + "loss": 0.5502, + "step": 9839 + }, + { + "epoch": 0.7234230260255845, + "grad_norm": 0.8586956858634949, + "learning_rate": 4.825908132694921e-06, + "loss": 0.5267, + "step": 9840 + }, + { + "epoch": 0.7234965446257903, + "grad_norm": 0.7804674506187439, + "learning_rate": 4.8258728039404145e-06, + "loss": 0.463, + "step": 9841 + }, + { + "epoch": 0.7235700632259962, + "grad_norm": 0.8376879692077637, + "learning_rate": 4.825837471730952e-06, + "loss": 0.5911, + "step": 9842 + }, + { + "epoch": 0.723643581826202, + "grad_norm": 0.774604856967926, + "learning_rate": 4.825802136066585e-06, + "loss": 0.521, + "step": 9843 + }, + { + "epoch": 0.7237171004264079, + "grad_norm": 0.8157171010971069, + "learning_rate": 4.825766796947366e-06, + "loss": 0.4806, + "step": 9844 + }, + { + "epoch": 0.7237906190266137, + "grad_norm": 0.8550700545310974, + "learning_rate": 4.825731454373348e-06, + "loss": 0.5666, + "step": 9845 + }, + { + "epoch": 0.7238641376268196, + "grad_norm": 0.8240312337875366, + "learning_rate": 4.825696108344583e-06, + "loss": 0.5224, + "step": 9846 + }, + { + "epoch": 0.7239376562270254, + "grad_norm": 0.7887492179870605, + "learning_rate": 4.825660758861125e-06, + "loss": 0.5268, + "step": 9847 + }, + { + "epoch": 0.7240111748272313, + "grad_norm": 0.8174529671669006, + "learning_rate": 4.825625405923023e-06, + "loss": 0.4847, + "step": 9848 + }, + { + "epoch": 0.7240846934274371, + "grad_norm": 0.8195132613182068, + "learning_rate": 4.825590049530334e-06, + "loss": 0.5131, + "step": 9849 + }, + { + "epoch": 0.724158212027643, + "grad_norm": 0.8228925466537476, + "learning_rate": 4.825554689683107e-06, + "loss": 0.5178, + "step": 9850 + }, + { + "epoch": 0.7242317306278488, + "grad_norm": 0.884719967842102, + "learning_rate": 4.825519326381397e-06, + "loss": 0.5822, + "step": 9851 + }, + { + "epoch": 0.7243052492280547, + "grad_norm": 0.8194149136543274, + "learning_rate": 4.825483959625255e-06, + "loss": 0.5098, + "step": 9852 + }, + { + "epoch": 0.7243787678282605, + "grad_norm": 0.861384928226471, + "learning_rate": 4.825448589414733e-06, + "loss": 0.5772, + "step": 9853 + }, + { + "epoch": 0.7244522864284664, + "grad_norm": 0.7938869595527649, + "learning_rate": 4.825413215749885e-06, + "loss": 0.5149, + "step": 9854 + }, + { + "epoch": 0.7245258050286723, + "grad_norm": 0.8683503866195679, + "learning_rate": 4.8253778386307624e-06, + "loss": 0.5311, + "step": 9855 + }, + { + "epoch": 0.7245993236288781, + "grad_norm": 0.8010714650154114, + "learning_rate": 4.825342458057419e-06, + "loss": 0.5121, + "step": 9856 + }, + { + "epoch": 0.724672842229084, + "grad_norm": 0.7811000943183899, + "learning_rate": 4.825307074029907e-06, + "loss": 0.5224, + "step": 9857 + }, + { + "epoch": 0.7247463608292898, + "grad_norm": 0.793412446975708, + "learning_rate": 4.825271686548279e-06, + "loss": 0.5327, + "step": 9858 + }, + { + "epoch": 0.7248198794294957, + "grad_norm": 0.8917046189308167, + "learning_rate": 4.825236295612586e-06, + "loss": 0.5281, + "step": 9859 + }, + { + "epoch": 0.7248933980297015, + "grad_norm": 0.8310853838920593, + "learning_rate": 4.825200901222883e-06, + "loss": 0.5219, + "step": 9860 + }, + { + "epoch": 0.7249669166299074, + "grad_norm": 0.8216418027877808, + "learning_rate": 4.825165503379221e-06, + "loss": 0.5183, + "step": 9861 + }, + { + "epoch": 0.7250404352301132, + "grad_norm": 0.8442345261573792, + "learning_rate": 4.825130102081652e-06, + "loss": 0.539, + "step": 9862 + }, + { + "epoch": 0.7251139538303191, + "grad_norm": 0.8702029585838318, + "learning_rate": 4.8250946973302305e-06, + "loss": 0.5273, + "step": 9863 + }, + { + "epoch": 0.725187472430525, + "grad_norm": 0.8472489714622498, + "learning_rate": 4.825059289125008e-06, + "loss": 0.5388, + "step": 9864 + }, + { + "epoch": 0.7252609910307308, + "grad_norm": 0.8752679228782654, + "learning_rate": 4.825023877466037e-06, + "loss": 0.6139, + "step": 9865 + }, + { + "epoch": 0.7253345096309366, + "grad_norm": 0.8277351260185242, + "learning_rate": 4.824988462353371e-06, + "loss": 0.5456, + "step": 9866 + }, + { + "epoch": 0.7254080282311425, + "grad_norm": 0.7799929976463318, + "learning_rate": 4.824953043787062e-06, + "loss": 0.5151, + "step": 9867 + }, + { + "epoch": 0.7254815468313484, + "grad_norm": 0.7989616990089417, + "learning_rate": 4.824917621767162e-06, + "loss": 0.5149, + "step": 9868 + }, + { + "epoch": 0.7255550654315542, + "grad_norm": 0.8502606153488159, + "learning_rate": 4.824882196293724e-06, + "loss": 0.5668, + "step": 9869 + }, + { + "epoch": 0.72562858403176, + "grad_norm": 0.833846390247345, + "learning_rate": 4.824846767366801e-06, + "loss": 0.5462, + "step": 9870 + }, + { + "epoch": 0.7257021026319659, + "grad_norm": 0.8336753845214844, + "learning_rate": 4.824811334986446e-06, + "loss": 0.5312, + "step": 9871 + }, + { + "epoch": 0.7257756212321718, + "grad_norm": 0.8037008047103882, + "learning_rate": 4.824775899152711e-06, + "loss": 0.5047, + "step": 9872 + }, + { + "epoch": 0.7258491398323776, + "grad_norm": 0.8391119241714478, + "learning_rate": 4.824740459865647e-06, + "loss": 0.5506, + "step": 9873 + }, + { + "epoch": 0.7259226584325834, + "grad_norm": 0.8258721828460693, + "learning_rate": 4.8247050171253105e-06, + "loss": 0.5162, + "step": 9874 + }, + { + "epoch": 0.7259961770327893, + "grad_norm": 0.7798078656196594, + "learning_rate": 4.824669570931751e-06, + "loss": 0.5175, + "step": 9875 + }, + { + "epoch": 0.7260696956329952, + "grad_norm": 0.8823304176330566, + "learning_rate": 4.824634121285022e-06, + "loss": 0.6069, + "step": 9876 + }, + { + "epoch": 0.726143214233201, + "grad_norm": 0.8090327978134155, + "learning_rate": 4.824598668185177e-06, + "loss": 0.5066, + "step": 9877 + }, + { + "epoch": 0.7262167328334068, + "grad_norm": 0.7964698076248169, + "learning_rate": 4.824563211632267e-06, + "loss": 0.5286, + "step": 9878 + }, + { + "epoch": 0.7262902514336127, + "grad_norm": 0.8350325226783752, + "learning_rate": 4.824527751626347e-06, + "loss": 0.5344, + "step": 9879 + }, + { + "epoch": 0.7263637700338186, + "grad_norm": 0.8115673065185547, + "learning_rate": 4.824492288167467e-06, + "loss": 0.5161, + "step": 9880 + }, + { + "epoch": 0.7264372886340245, + "grad_norm": 0.8444183468818665, + "learning_rate": 4.824456821255682e-06, + "loss": 0.5265, + "step": 9881 + }, + { + "epoch": 0.7265108072342302, + "grad_norm": 0.8853393197059631, + "learning_rate": 4.824421350891043e-06, + "loss": 0.5193, + "step": 9882 + }, + { + "epoch": 0.7265843258344361, + "grad_norm": 0.8038644194602966, + "learning_rate": 4.824385877073603e-06, + "loss": 0.4894, + "step": 9883 + }, + { + "epoch": 0.726657844434642, + "grad_norm": 0.8594773411750793, + "learning_rate": 4.824350399803416e-06, + "loss": 0.5518, + "step": 9884 + }, + { + "epoch": 0.7267313630348479, + "grad_norm": 0.8249757289886475, + "learning_rate": 4.8243149190805335e-06, + "loss": 0.5799, + "step": 9885 + }, + { + "epoch": 0.7268048816350536, + "grad_norm": 0.8400641083717346, + "learning_rate": 4.824279434905008e-06, + "loss": 0.6094, + "step": 9886 + }, + { + "epoch": 0.7268784002352595, + "grad_norm": 0.8146474957466125, + "learning_rate": 4.824243947276893e-06, + "loss": 0.5444, + "step": 9887 + }, + { + "epoch": 0.7269519188354654, + "grad_norm": 0.8015690445899963, + "learning_rate": 4.824208456196241e-06, + "loss": 0.5128, + "step": 9888 + }, + { + "epoch": 0.7270254374356713, + "grad_norm": 0.8519808650016785, + "learning_rate": 4.824172961663105e-06, + "loss": 0.5411, + "step": 9889 + }, + { + "epoch": 0.727098956035877, + "grad_norm": 0.8300852179527283, + "learning_rate": 4.824137463677536e-06, + "loss": 0.5146, + "step": 9890 + }, + { + "epoch": 0.7271724746360829, + "grad_norm": 0.8478034734725952, + "learning_rate": 4.8241019622395894e-06, + "loss": 0.5112, + "step": 9891 + }, + { + "epoch": 0.7272459932362888, + "grad_norm": 0.8583953976631165, + "learning_rate": 4.824066457349315e-06, + "loss": 0.5305, + "step": 9892 + }, + { + "epoch": 0.7273195118364947, + "grad_norm": 0.7502707839012146, + "learning_rate": 4.82403094900677e-06, + "loss": 0.4849, + "step": 9893 + }, + { + "epoch": 0.7273930304367004, + "grad_norm": 0.8224354386329651, + "learning_rate": 4.8239954372120015e-06, + "loss": 0.5413, + "step": 9894 + }, + { + "epoch": 0.7274665490369063, + "grad_norm": 0.8899579644203186, + "learning_rate": 4.823959921965067e-06, + "loss": 0.5622, + "step": 9895 + }, + { + "epoch": 0.7275400676371122, + "grad_norm": 0.8279266953468323, + "learning_rate": 4.823924403266016e-06, + "loss": 0.5489, + "step": 9896 + }, + { + "epoch": 0.7276135862373181, + "grad_norm": 0.7883322238922119, + "learning_rate": 4.823888881114903e-06, + "loss": 0.5452, + "step": 9897 + }, + { + "epoch": 0.7276871048375239, + "grad_norm": 0.8807593584060669, + "learning_rate": 4.82385335551178e-06, + "loss": 0.6145, + "step": 9898 + }, + { + "epoch": 0.7277606234377297, + "grad_norm": 0.8149518370628357, + "learning_rate": 4.8238178264567e-06, + "loss": 0.5058, + "step": 9899 + }, + { + "epoch": 0.7278341420379356, + "grad_norm": 0.8741662502288818, + "learning_rate": 4.823782293949716e-06, + "loss": 0.5677, + "step": 9900 + }, + { + "epoch": 0.7279076606381415, + "grad_norm": 0.8058080077171326, + "learning_rate": 4.823746757990881e-06, + "loss": 0.5012, + "step": 9901 + }, + { + "epoch": 0.7279811792383473, + "grad_norm": 0.852523922920227, + "learning_rate": 4.823711218580247e-06, + "loss": 0.4921, + "step": 9902 + }, + { + "epoch": 0.7280546978385531, + "grad_norm": 0.7905141115188599, + "learning_rate": 4.823675675717868e-06, + "loss": 0.5125, + "step": 9903 + }, + { + "epoch": 0.728128216438759, + "grad_norm": 0.8345409631729126, + "learning_rate": 4.823640129403795e-06, + "loss": 0.5457, + "step": 9904 + }, + { + "epoch": 0.7282017350389649, + "grad_norm": 0.8063996434211731, + "learning_rate": 4.823604579638082e-06, + "loss": 0.5214, + "step": 9905 + }, + { + "epoch": 0.7282752536391707, + "grad_norm": 0.8172658085823059, + "learning_rate": 4.823569026420782e-06, + "loss": 0.5212, + "step": 9906 + }, + { + "epoch": 0.7283487722393766, + "grad_norm": 0.8259575963020325, + "learning_rate": 4.823533469751948e-06, + "loss": 0.5695, + "step": 9907 + }, + { + "epoch": 0.7284222908395824, + "grad_norm": 0.8473582863807678, + "learning_rate": 4.823497909631631e-06, + "loss": 0.5103, + "step": 9908 + }, + { + "epoch": 0.7284958094397883, + "grad_norm": 0.8006930947303772, + "learning_rate": 4.823462346059887e-06, + "loss": 0.5259, + "step": 9909 + }, + { + "epoch": 0.7285693280399941, + "grad_norm": 0.8181999325752258, + "learning_rate": 4.823426779036765e-06, + "loss": 0.5488, + "step": 9910 + }, + { + "epoch": 0.7286428466402, + "grad_norm": 0.8134820461273193, + "learning_rate": 4.823391208562321e-06, + "loss": 0.5051, + "step": 9911 + }, + { + "epoch": 0.7287163652404058, + "grad_norm": 0.8176169395446777, + "learning_rate": 4.823355634636606e-06, + "loss": 0.534, + "step": 9912 + }, + { + "epoch": 0.7287898838406117, + "grad_norm": 0.7871251702308655, + "learning_rate": 4.8233200572596735e-06, + "loss": 0.5052, + "step": 9913 + }, + { + "epoch": 0.7288634024408175, + "grad_norm": 0.8148416876792908, + "learning_rate": 4.823284476431575e-06, + "loss": 0.5329, + "step": 9914 + }, + { + "epoch": 0.7289369210410234, + "grad_norm": 0.8109570145606995, + "learning_rate": 4.8232488921523666e-06, + "loss": 0.5323, + "step": 9915 + }, + { + "epoch": 0.7290104396412292, + "grad_norm": 0.8318511843681335, + "learning_rate": 4.823213304422098e-06, + "loss": 0.5186, + "step": 9916 + }, + { + "epoch": 0.7290839582414351, + "grad_norm": 0.802986204624176, + "learning_rate": 4.823177713240824e-06, + "loss": 0.5335, + "step": 9917 + }, + { + "epoch": 0.7291574768416409, + "grad_norm": 0.8607616424560547, + "learning_rate": 4.823142118608597e-06, + "loss": 0.5515, + "step": 9918 + }, + { + "epoch": 0.7292309954418468, + "grad_norm": 0.8415362238883972, + "learning_rate": 4.823106520525469e-06, + "loss": 0.5498, + "step": 9919 + }, + { + "epoch": 0.7293045140420527, + "grad_norm": 0.865853488445282, + "learning_rate": 4.823070918991494e-06, + "loss": 0.5811, + "step": 9920 + }, + { + "epoch": 0.7293780326422585, + "grad_norm": 0.8553633689880371, + "learning_rate": 4.823035314006723e-06, + "loss": 0.5443, + "step": 9921 + }, + { + "epoch": 0.7294515512424643, + "grad_norm": 0.8623491525650024, + "learning_rate": 4.822999705571211e-06, + "loss": 0.5553, + "step": 9922 + }, + { + "epoch": 0.7295250698426702, + "grad_norm": 0.8241715431213379, + "learning_rate": 4.822964093685011e-06, + "loss": 0.4913, + "step": 9923 + }, + { + "epoch": 0.7295985884428761, + "grad_norm": 0.8484125137329102, + "learning_rate": 4.822928478348174e-06, + "loss": 0.5116, + "step": 9924 + }, + { + "epoch": 0.7296721070430819, + "grad_norm": 0.8451088666915894, + "learning_rate": 4.822892859560754e-06, + "loss": 0.5478, + "step": 9925 + }, + { + "epoch": 0.7297456256432877, + "grad_norm": 0.7993364334106445, + "learning_rate": 4.822857237322804e-06, + "loss": 0.5557, + "step": 9926 + }, + { + "epoch": 0.7298191442434936, + "grad_norm": 0.8023985028266907, + "learning_rate": 4.822821611634377e-06, + "loss": 0.5225, + "step": 9927 + }, + { + "epoch": 0.7298926628436995, + "grad_norm": 0.8283798694610596, + "learning_rate": 4.822785982495526e-06, + "loss": 0.5486, + "step": 9928 + }, + { + "epoch": 0.7299661814439053, + "grad_norm": 0.8319517374038696, + "learning_rate": 4.822750349906302e-06, + "loss": 0.5291, + "step": 9929 + }, + { + "epoch": 0.7300397000441111, + "grad_norm": 0.8568607568740845, + "learning_rate": 4.822714713866761e-06, + "loss": 0.5676, + "step": 9930 + }, + { + "epoch": 0.730113218644317, + "grad_norm": 0.8042932152748108, + "learning_rate": 4.822679074376955e-06, + "loss": 0.5326, + "step": 9931 + }, + { + "epoch": 0.7301867372445229, + "grad_norm": 0.7944716811180115, + "learning_rate": 4.822643431436935e-06, + "loss": 0.5418, + "step": 9932 + }, + { + "epoch": 0.7302602558447288, + "grad_norm": 0.8651081919670105, + "learning_rate": 4.822607785046756e-06, + "loss": 0.5174, + "step": 9933 + }, + { + "epoch": 0.7303337744449345, + "grad_norm": 0.8392298817634583, + "learning_rate": 4.822572135206471e-06, + "loss": 0.5318, + "step": 9934 + }, + { + "epoch": 0.7304072930451404, + "grad_norm": 0.821379542350769, + "learning_rate": 4.822536481916131e-06, + "loss": 0.5926, + "step": 9935 + }, + { + "epoch": 0.7304808116453463, + "grad_norm": 0.9000861048698425, + "learning_rate": 4.822500825175791e-06, + "loss": 0.57, + "step": 9936 + }, + { + "epoch": 0.7305543302455522, + "grad_norm": 0.7855068445205688, + "learning_rate": 4.822465164985503e-06, + "loss": 0.5021, + "step": 9937 + }, + { + "epoch": 0.7306278488457579, + "grad_norm": 0.8400035500526428, + "learning_rate": 4.82242950134532e-06, + "loss": 0.551, + "step": 9938 + }, + { + "epoch": 0.7307013674459638, + "grad_norm": 0.8100321292877197, + "learning_rate": 4.822393834255296e-06, + "loss": 0.5654, + "step": 9939 + }, + { + "epoch": 0.7307748860461697, + "grad_norm": 0.8176552653312683, + "learning_rate": 4.822358163715482e-06, + "loss": 0.5383, + "step": 9940 + }, + { + "epoch": 0.7308484046463756, + "grad_norm": 0.8087986707687378, + "learning_rate": 4.822322489725933e-06, + "loss": 0.53, + "step": 9941 + }, + { + "epoch": 0.7309219232465813, + "grad_norm": 0.8309212923049927, + "learning_rate": 4.8222868122867e-06, + "loss": 0.563, + "step": 9942 + }, + { + "epoch": 0.7309954418467872, + "grad_norm": 0.8186923861503601, + "learning_rate": 4.822251131397838e-06, + "loss": 0.4825, + "step": 9943 + }, + { + "epoch": 0.7310689604469931, + "grad_norm": 0.8319522738456726, + "learning_rate": 4.822215447059398e-06, + "loss": 0.4793, + "step": 9944 + }, + { + "epoch": 0.731142479047199, + "grad_norm": 0.8217481970787048, + "learning_rate": 4.822179759271436e-06, + "loss": 0.5683, + "step": 9945 + }, + { + "epoch": 0.7312159976474047, + "grad_norm": 0.8259706497192383, + "learning_rate": 4.822144068034002e-06, + "loss": 0.5383, + "step": 9946 + }, + { + "epoch": 0.7312895162476106, + "grad_norm": 0.7893728613853455, + "learning_rate": 4.82210837334715e-06, + "loss": 0.5346, + "step": 9947 + }, + { + "epoch": 0.7313630348478165, + "grad_norm": 0.8684837818145752, + "learning_rate": 4.822072675210933e-06, + "loss": 0.5558, + "step": 9948 + }, + { + "epoch": 0.7314365534480224, + "grad_norm": 0.8202981948852539, + "learning_rate": 4.822036973625405e-06, + "loss": 0.5122, + "step": 9949 + }, + { + "epoch": 0.7315100720482282, + "grad_norm": 0.8590604662895203, + "learning_rate": 4.822001268590618e-06, + "loss": 0.5498, + "step": 9950 + }, + { + "epoch": 0.731583590648434, + "grad_norm": 0.8614048361778259, + "learning_rate": 4.821965560106625e-06, + "loss": 0.5335, + "step": 9951 + }, + { + "epoch": 0.7316571092486399, + "grad_norm": 0.8409903049468994, + "learning_rate": 4.82192984817348e-06, + "loss": 0.593, + "step": 9952 + }, + { + "epoch": 0.7317306278488458, + "grad_norm": 0.8416309356689453, + "learning_rate": 4.8218941327912345e-06, + "loss": 0.5347, + "step": 9953 + }, + { + "epoch": 0.7318041464490516, + "grad_norm": 0.8004562258720398, + "learning_rate": 4.821858413959943e-06, + "loss": 0.533, + "step": 9954 + }, + { + "epoch": 0.7318776650492574, + "grad_norm": 0.8508260846138, + "learning_rate": 4.821822691679657e-06, + "loss": 0.5545, + "step": 9955 + }, + { + "epoch": 0.7319511836494633, + "grad_norm": 0.7978852391242981, + "learning_rate": 4.821786965950431e-06, + "loss": 0.5198, + "step": 9956 + }, + { + "epoch": 0.7320247022496692, + "grad_norm": 0.8675968050956726, + "learning_rate": 4.821751236772319e-06, + "loss": 0.5797, + "step": 9957 + }, + { + "epoch": 0.732098220849875, + "grad_norm": 0.8666282296180725, + "learning_rate": 4.821715504145371e-06, + "loss": 0.5791, + "step": 9958 + }, + { + "epoch": 0.7321717394500808, + "grad_norm": 0.8710692524909973, + "learning_rate": 4.821679768069642e-06, + "loss": 0.5439, + "step": 9959 + }, + { + "epoch": 0.7322452580502867, + "grad_norm": 0.8203332424163818, + "learning_rate": 4.821644028545185e-06, + "loss": 0.5657, + "step": 9960 + }, + { + "epoch": 0.7323187766504926, + "grad_norm": 0.817995548248291, + "learning_rate": 4.821608285572053e-06, + "loss": 0.5212, + "step": 9961 + }, + { + "epoch": 0.7323922952506984, + "grad_norm": 0.8189329504966736, + "learning_rate": 4.8215725391502985e-06, + "loss": 0.537, + "step": 9962 + }, + { + "epoch": 0.7324658138509043, + "grad_norm": 0.8599981069564819, + "learning_rate": 4.821536789279976e-06, + "loss": 0.5526, + "step": 9963 + }, + { + "epoch": 0.7325393324511101, + "grad_norm": 0.8685230016708374, + "learning_rate": 4.8215010359611376e-06, + "loss": 0.5766, + "step": 9964 + }, + { + "epoch": 0.732612851051316, + "grad_norm": 0.793003261089325, + "learning_rate": 4.821465279193836e-06, + "loss": 0.4929, + "step": 9965 + }, + { + "epoch": 0.7326863696515218, + "grad_norm": 0.8694113492965698, + "learning_rate": 4.8214295189781255e-06, + "loss": 0.6282, + "step": 9966 + }, + { + "epoch": 0.7327598882517277, + "grad_norm": 0.8463427424430847, + "learning_rate": 4.821393755314057e-06, + "loss": 0.5473, + "step": 9967 + }, + { + "epoch": 0.7328334068519335, + "grad_norm": 0.8843891024589539, + "learning_rate": 4.821357988201687e-06, + "loss": 0.5391, + "step": 9968 + }, + { + "epoch": 0.7329069254521394, + "grad_norm": 0.8025878071784973, + "learning_rate": 4.821322217641067e-06, + "loss": 0.5086, + "step": 9969 + }, + { + "epoch": 0.7329804440523452, + "grad_norm": 0.8442977666854858, + "learning_rate": 4.821286443632249e-06, + "loss": 0.5006, + "step": 9970 + }, + { + "epoch": 0.7330539626525511, + "grad_norm": 0.8116162419319153, + "learning_rate": 4.821250666175287e-06, + "loss": 0.5431, + "step": 9971 + }, + { + "epoch": 0.733127481252757, + "grad_norm": 0.8419055938720703, + "learning_rate": 4.821214885270234e-06, + "loss": 0.5268, + "step": 9972 + }, + { + "epoch": 0.7332009998529628, + "grad_norm": 0.8499184846878052, + "learning_rate": 4.821179100917144e-06, + "loss": 0.5549, + "step": 9973 + }, + { + "epoch": 0.7332745184531686, + "grad_norm": 0.8187799453735352, + "learning_rate": 4.82114331311607e-06, + "loss": 0.5533, + "step": 9974 + }, + { + "epoch": 0.7333480370533745, + "grad_norm": 0.8247771263122559, + "learning_rate": 4.821107521867064e-06, + "loss": 0.5617, + "step": 9975 + }, + { + "epoch": 0.7334215556535804, + "grad_norm": 0.8713053464889526, + "learning_rate": 4.82107172717018e-06, + "loss": 0.5414, + "step": 9976 + }, + { + "epoch": 0.7334950742537862, + "grad_norm": 0.8069146871566772, + "learning_rate": 4.8210359290254714e-06, + "loss": 0.4824, + "step": 9977 + }, + { + "epoch": 0.733568592853992, + "grad_norm": 0.8194196820259094, + "learning_rate": 4.821000127432991e-06, + "loss": 0.5255, + "step": 9978 + }, + { + "epoch": 0.7336421114541979, + "grad_norm": 0.8418537974357605, + "learning_rate": 4.820964322392791e-06, + "loss": 0.5609, + "step": 9979 + }, + { + "epoch": 0.7337156300544038, + "grad_norm": 0.7923794388771057, + "learning_rate": 4.820928513904927e-06, + "loss": 0.5107, + "step": 9980 + }, + { + "epoch": 0.7337891486546096, + "grad_norm": 0.872105062007904, + "learning_rate": 4.82089270196945e-06, + "loss": 0.6069, + "step": 9981 + }, + { + "epoch": 0.7338626672548154, + "grad_norm": 0.8696110248565674, + "learning_rate": 4.820856886586414e-06, + "loss": 0.5297, + "step": 9982 + }, + { + "epoch": 0.7339361858550213, + "grad_norm": 0.8349952697753906, + "learning_rate": 4.820821067755872e-06, + "loss": 0.4841, + "step": 9983 + }, + { + "epoch": 0.7340097044552272, + "grad_norm": 0.8524528741836548, + "learning_rate": 4.820785245477878e-06, + "loss": 0.541, + "step": 9984 + }, + { + "epoch": 0.734083223055433, + "grad_norm": 0.8284185528755188, + "learning_rate": 4.820749419752484e-06, + "loss": 0.5355, + "step": 9985 + }, + { + "epoch": 0.7341567416556388, + "grad_norm": 0.8087604641914368, + "learning_rate": 4.820713590579744e-06, + "loss": 0.53, + "step": 9986 + }, + { + "epoch": 0.7342302602558447, + "grad_norm": 0.7877983450889587, + "learning_rate": 4.820677757959711e-06, + "loss": 0.5347, + "step": 9987 + }, + { + "epoch": 0.7343037788560506, + "grad_norm": 0.807647705078125, + "learning_rate": 4.820641921892438e-06, + "loss": 0.5298, + "step": 9988 + }, + { + "epoch": 0.7343772974562565, + "grad_norm": 0.8360969424247742, + "learning_rate": 4.820606082377979e-06, + "loss": 0.5282, + "step": 9989 + }, + { + "epoch": 0.7344508160564622, + "grad_norm": 0.833760142326355, + "learning_rate": 4.820570239416386e-06, + "loss": 0.5194, + "step": 9990 + }, + { + "epoch": 0.7345243346566681, + "grad_norm": 0.8232802152633667, + "learning_rate": 4.820534393007714e-06, + "loss": 0.5357, + "step": 9991 + }, + { + "epoch": 0.734597853256874, + "grad_norm": 0.8566551804542542, + "learning_rate": 4.8204985431520155e-06, + "loss": 0.5777, + "step": 9992 + }, + { + "epoch": 0.7346713718570799, + "grad_norm": 0.7937160134315491, + "learning_rate": 4.820462689849342e-06, + "loss": 0.4777, + "step": 9993 + }, + { + "epoch": 0.7347448904572857, + "grad_norm": 0.8360270261764526, + "learning_rate": 4.82042683309975e-06, + "loss": 0.5376, + "step": 9994 + }, + { + "epoch": 0.7348184090574915, + "grad_norm": 0.8333276510238647, + "learning_rate": 4.820390972903289e-06, + "loss": 0.5643, + "step": 9995 + }, + { + "epoch": 0.7348919276576974, + "grad_norm": 0.8433962464332581, + "learning_rate": 4.8203551092600155e-06, + "loss": 0.5391, + "step": 9996 + }, + { + "epoch": 0.7349654462579033, + "grad_norm": 0.8186495900154114, + "learning_rate": 4.820319242169982e-06, + "loss": 0.5618, + "step": 9997 + }, + { + "epoch": 0.7350389648581092, + "grad_norm": 0.775876522064209, + "learning_rate": 4.820283371633241e-06, + "loss": 0.4746, + "step": 9998 + }, + { + "epoch": 0.7351124834583149, + "grad_norm": 0.8374110460281372, + "learning_rate": 4.820247497649845e-06, + "loss": 0.5307, + "step": 9999 + }, + { + "epoch": 0.7351860020585208, + "grad_norm": 0.8417997360229492, + "learning_rate": 4.820211620219849e-06, + "loss": 0.5337, + "step": 10000 + }, + { + "epoch": 0.7352595206587267, + "grad_norm": 0.8521609306335449, + "learning_rate": 4.820175739343306e-06, + "loss": 0.5399, + "step": 10001 + }, + { + "epoch": 0.7353330392589326, + "grad_norm": 0.77995365858078, + "learning_rate": 4.8201398550202696e-06, + "loss": 0.5509, + "step": 10002 + }, + { + "epoch": 0.7354065578591383, + "grad_norm": 0.841816246509552, + "learning_rate": 4.820103967250791e-06, + "loss": 0.5532, + "step": 10003 + }, + { + "epoch": 0.7354800764593442, + "grad_norm": 0.788155198097229, + "learning_rate": 4.820068076034926e-06, + "loss": 0.4762, + "step": 10004 + }, + { + "epoch": 0.7355535950595501, + "grad_norm": 0.7851571440696716, + "learning_rate": 4.820032181372727e-06, + "loss": 0.4882, + "step": 10005 + }, + { + "epoch": 0.735627113659756, + "grad_norm": 0.8342891335487366, + "learning_rate": 4.819996283264247e-06, + "loss": 0.5191, + "step": 10006 + }, + { + "epoch": 0.7357006322599617, + "grad_norm": 0.8094378709793091, + "learning_rate": 4.819960381709539e-06, + "loss": 0.5749, + "step": 10007 + }, + { + "epoch": 0.7357741508601676, + "grad_norm": 0.8293774724006653, + "learning_rate": 4.819924476708657e-06, + "loss": 0.5788, + "step": 10008 + }, + { + "epoch": 0.7358476694603735, + "grad_norm": 0.7848393321037292, + "learning_rate": 4.8198885682616555e-06, + "loss": 0.5157, + "step": 10009 + }, + { + "epoch": 0.7359211880605794, + "grad_norm": 0.8622516989707947, + "learning_rate": 4.819852656368585e-06, + "loss": 0.555, + "step": 10010 + }, + { + "epoch": 0.7359947066607851, + "grad_norm": 0.8102110028266907, + "learning_rate": 4.819816741029502e-06, + "loss": 0.5163, + "step": 10011 + }, + { + "epoch": 0.736068225260991, + "grad_norm": 0.8160567283630371, + "learning_rate": 4.819780822244456e-06, + "loss": 0.5326, + "step": 10012 + }, + { + "epoch": 0.7361417438611969, + "grad_norm": 0.8429984450340271, + "learning_rate": 4.819744900013505e-06, + "loss": 0.5441, + "step": 10013 + }, + { + "epoch": 0.7362152624614028, + "grad_norm": 0.8530141115188599, + "learning_rate": 4.8197089743366985e-06, + "loss": 0.5063, + "step": 10014 + }, + { + "epoch": 0.7362887810616086, + "grad_norm": 0.7815214991569519, + "learning_rate": 4.819673045214092e-06, + "loss": 0.5092, + "step": 10015 + }, + { + "epoch": 0.7363622996618144, + "grad_norm": 0.8024402856826782, + "learning_rate": 4.819637112645738e-06, + "loss": 0.554, + "step": 10016 + }, + { + "epoch": 0.7364358182620203, + "grad_norm": 0.8416693210601807, + "learning_rate": 4.81960117663169e-06, + "loss": 0.5099, + "step": 10017 + }, + { + "epoch": 0.7365093368622262, + "grad_norm": 0.817072331905365, + "learning_rate": 4.8195652371720015e-06, + "loss": 0.5006, + "step": 10018 + }, + { + "epoch": 0.736582855462432, + "grad_norm": 0.8077968955039978, + "learning_rate": 4.819529294266726e-06, + "loss": 0.4857, + "step": 10019 + }, + { + "epoch": 0.7366563740626378, + "grad_norm": 0.7835842967033386, + "learning_rate": 4.819493347915917e-06, + "loss": 0.5318, + "step": 10020 + }, + { + "epoch": 0.7367298926628437, + "grad_norm": 0.8369719386100769, + "learning_rate": 4.8194573981196265e-06, + "loss": 0.563, + "step": 10021 + }, + { + "epoch": 0.7368034112630496, + "grad_norm": 0.8006182909011841, + "learning_rate": 4.81942144487791e-06, + "loss": 0.4749, + "step": 10022 + }, + { + "epoch": 0.7368769298632554, + "grad_norm": 0.8523887395858765, + "learning_rate": 4.8193854881908194e-06, + "loss": 0.5748, + "step": 10023 + }, + { + "epoch": 0.7369504484634612, + "grad_norm": 0.8247465491294861, + "learning_rate": 4.819349528058409e-06, + "loss": 0.5199, + "step": 10024 + }, + { + "epoch": 0.7370239670636671, + "grad_norm": 0.8075994849205017, + "learning_rate": 4.819313564480732e-06, + "loss": 0.549, + "step": 10025 + }, + { + "epoch": 0.737097485663873, + "grad_norm": 0.8265957832336426, + "learning_rate": 4.819277597457842e-06, + "loss": 0.5507, + "step": 10026 + }, + { + "epoch": 0.7371710042640788, + "grad_norm": 0.8405113220214844, + "learning_rate": 4.8192416269897914e-06, + "loss": 0.5957, + "step": 10027 + }, + { + "epoch": 0.7372445228642847, + "grad_norm": 0.8631158471107483, + "learning_rate": 4.819205653076635e-06, + "loss": 0.5609, + "step": 10028 + }, + { + "epoch": 0.7373180414644905, + "grad_norm": 0.808578610420227, + "learning_rate": 4.819169675718425e-06, + "loss": 0.5093, + "step": 10029 + }, + { + "epoch": 0.7373915600646964, + "grad_norm": 0.7877328991889954, + "learning_rate": 4.819133694915216e-06, + "loss": 0.4802, + "step": 10030 + }, + { + "epoch": 0.7374650786649022, + "grad_norm": 0.849100649356842, + "learning_rate": 4.819097710667061e-06, + "loss": 0.5774, + "step": 10031 + }, + { + "epoch": 0.7375385972651081, + "grad_norm": 0.798375129699707, + "learning_rate": 4.8190617229740125e-06, + "loss": 0.542, + "step": 10032 + }, + { + "epoch": 0.7376121158653139, + "grad_norm": 0.8148141503334045, + "learning_rate": 4.819025731836126e-06, + "loss": 0.5359, + "step": 10033 + }, + { + "epoch": 0.7376856344655198, + "grad_norm": 0.8427116870880127, + "learning_rate": 4.818989737253452e-06, + "loss": 0.5279, + "step": 10034 + }, + { + "epoch": 0.7377591530657256, + "grad_norm": 0.8334965705871582, + "learning_rate": 4.818953739226048e-06, + "loss": 0.5297, + "step": 10035 + }, + { + "epoch": 0.7378326716659315, + "grad_norm": 0.8715918660163879, + "learning_rate": 4.8189177377539635e-06, + "loss": 0.597, + "step": 10036 + }, + { + "epoch": 0.7379061902661374, + "grad_norm": 0.8306391835212708, + "learning_rate": 4.818881732837254e-06, + "loss": 0.5415, + "step": 10037 + }, + { + "epoch": 0.7379797088663432, + "grad_norm": 0.7866026163101196, + "learning_rate": 4.818845724475973e-06, + "loss": 0.5285, + "step": 10038 + }, + { + "epoch": 0.738053227466549, + "grad_norm": 0.8186887502670288, + "learning_rate": 4.818809712670173e-06, + "loss": 0.5553, + "step": 10039 + }, + { + "epoch": 0.7381267460667549, + "grad_norm": 0.8383251428604126, + "learning_rate": 4.818773697419909e-06, + "loss": 0.5859, + "step": 10040 + }, + { + "epoch": 0.7382002646669608, + "grad_norm": 0.8462571501731873, + "learning_rate": 4.818737678725234e-06, + "loss": 0.5385, + "step": 10041 + }, + { + "epoch": 0.7382737832671666, + "grad_norm": 0.8569655418395996, + "learning_rate": 4.8187016565861995e-06, + "loss": 0.5351, + "step": 10042 + }, + { + "epoch": 0.7383473018673724, + "grad_norm": 0.8112154603004456, + "learning_rate": 4.818665631002861e-06, + "loss": 0.521, + "step": 10043 + }, + { + "epoch": 0.7384208204675783, + "grad_norm": 0.7953896522521973, + "learning_rate": 4.818629601975273e-06, + "loss": 0.546, + "step": 10044 + }, + { + "epoch": 0.7384943390677842, + "grad_norm": 0.8587042689323425, + "learning_rate": 4.818593569503487e-06, + "loss": 0.5008, + "step": 10045 + }, + { + "epoch": 0.73856785766799, + "grad_norm": 0.8886381983757019, + "learning_rate": 4.818557533587557e-06, + "loss": 0.5391, + "step": 10046 + }, + { + "epoch": 0.7386413762681958, + "grad_norm": 0.8497517704963684, + "learning_rate": 4.818521494227537e-06, + "loss": 0.5519, + "step": 10047 + }, + { + "epoch": 0.7387148948684017, + "grad_norm": 0.9025774002075195, + "learning_rate": 4.8184854514234804e-06, + "loss": 0.5567, + "step": 10048 + }, + { + "epoch": 0.7387884134686076, + "grad_norm": 0.889303982257843, + "learning_rate": 4.81844940517544e-06, + "loss": 0.5689, + "step": 10049 + }, + { + "epoch": 0.7388619320688135, + "grad_norm": 0.8040785193443298, + "learning_rate": 4.81841335548347e-06, + "loss": 0.5298, + "step": 10050 + }, + { + "epoch": 0.7389354506690192, + "grad_norm": 0.8271570801734924, + "learning_rate": 4.818377302347624e-06, + "loss": 0.511, + "step": 10051 + }, + { + "epoch": 0.7390089692692251, + "grad_norm": 0.8410338163375854, + "learning_rate": 4.818341245767956e-06, + "loss": 0.5667, + "step": 10052 + }, + { + "epoch": 0.739082487869431, + "grad_norm": 0.8341442942619324, + "learning_rate": 4.81830518574452e-06, + "loss": 0.5666, + "step": 10053 + }, + { + "epoch": 0.7391560064696369, + "grad_norm": 0.8447108864784241, + "learning_rate": 4.8182691222773666e-06, + "loss": 0.5549, + "step": 10054 + }, + { + "epoch": 0.7392295250698426, + "grad_norm": 0.8254664540290833, + "learning_rate": 4.818233055366553e-06, + "loss": 0.5349, + "step": 10055 + }, + { + "epoch": 0.7393030436700485, + "grad_norm": 0.8152405023574829, + "learning_rate": 4.818196985012129e-06, + "loss": 0.5289, + "step": 10056 + }, + { + "epoch": 0.7393765622702544, + "grad_norm": 0.8135454654693604, + "learning_rate": 4.818160911214152e-06, + "loss": 0.5556, + "step": 10057 + }, + { + "epoch": 0.7394500808704603, + "grad_norm": 0.839767575263977, + "learning_rate": 4.818124833972674e-06, + "loss": 0.4973, + "step": 10058 + }, + { + "epoch": 0.739523599470666, + "grad_norm": 0.7776958346366882, + "learning_rate": 4.818088753287747e-06, + "loss": 0.4995, + "step": 10059 + }, + { + "epoch": 0.7395971180708719, + "grad_norm": 0.8531126976013184, + "learning_rate": 4.8180526691594275e-06, + "loss": 0.5742, + "step": 10060 + }, + { + "epoch": 0.7396706366710778, + "grad_norm": 0.8513871431350708, + "learning_rate": 4.818016581587767e-06, + "loss": 0.5199, + "step": 10061 + }, + { + "epoch": 0.7397441552712837, + "grad_norm": 0.8164681792259216, + "learning_rate": 4.81798049057282e-06, + "loss": 0.5543, + "step": 10062 + }, + { + "epoch": 0.7398176738714894, + "grad_norm": 0.8366082906723022, + "learning_rate": 4.81794439611464e-06, + "loss": 0.5615, + "step": 10063 + }, + { + "epoch": 0.7398911924716953, + "grad_norm": 0.8829578757286072, + "learning_rate": 4.817908298213281e-06, + "loss": 0.5427, + "step": 10064 + }, + { + "epoch": 0.7399647110719012, + "grad_norm": 0.8980188369750977, + "learning_rate": 4.817872196868796e-06, + "loss": 0.5704, + "step": 10065 + }, + { + "epoch": 0.7400382296721071, + "grad_norm": 0.7989926934242249, + "learning_rate": 4.817836092081238e-06, + "loss": 0.4864, + "step": 10066 + }, + { + "epoch": 0.7401117482723129, + "grad_norm": 0.8392601013183594, + "learning_rate": 4.817799983850662e-06, + "loss": 0.533, + "step": 10067 + }, + { + "epoch": 0.7401852668725187, + "grad_norm": 0.8288995027542114, + "learning_rate": 4.817763872177121e-06, + "loss": 0.5187, + "step": 10068 + }, + { + "epoch": 0.7402587854727246, + "grad_norm": 0.9326995015144348, + "learning_rate": 4.817727757060668e-06, + "loss": 0.5596, + "step": 10069 + }, + { + "epoch": 0.7403323040729305, + "grad_norm": 0.8172112107276917, + "learning_rate": 4.817691638501358e-06, + "loss": 0.5175, + "step": 10070 + }, + { + "epoch": 0.7404058226731363, + "grad_norm": 0.7913234233856201, + "learning_rate": 4.817655516499244e-06, + "loss": 0.5161, + "step": 10071 + }, + { + "epoch": 0.7404793412733421, + "grad_norm": 0.8312079310417175, + "learning_rate": 4.8176193910543795e-06, + "loss": 0.5328, + "step": 10072 + }, + { + "epoch": 0.740552859873548, + "grad_norm": 0.8634917140007019, + "learning_rate": 4.817583262166819e-06, + "loss": 0.5715, + "step": 10073 + }, + { + "epoch": 0.7406263784737539, + "grad_norm": 0.7875246405601501, + "learning_rate": 4.817547129836614e-06, + "loss": 0.5008, + "step": 10074 + }, + { + "epoch": 0.7406998970739597, + "grad_norm": 0.8053138852119446, + "learning_rate": 4.8175109940638204e-06, + "loss": 0.5357, + "step": 10075 + }, + { + "epoch": 0.7407734156741655, + "grad_norm": 0.8284832239151001, + "learning_rate": 4.817474854848491e-06, + "loss": 0.5333, + "step": 10076 + }, + { + "epoch": 0.7408469342743714, + "grad_norm": 0.8768438100814819, + "learning_rate": 4.8174387121906795e-06, + "loss": 0.5365, + "step": 10077 + }, + { + "epoch": 0.7409204528745773, + "grad_norm": 0.804203450679779, + "learning_rate": 4.81740256609044e-06, + "loss": 0.5151, + "step": 10078 + }, + { + "epoch": 0.7409939714747831, + "grad_norm": 0.7994048595428467, + "learning_rate": 4.817366416547826e-06, + "loss": 0.4972, + "step": 10079 + }, + { + "epoch": 0.741067490074989, + "grad_norm": 0.7613925933837891, + "learning_rate": 4.817330263562891e-06, + "loss": 0.4738, + "step": 10080 + }, + { + "epoch": 0.7411410086751948, + "grad_norm": 0.873399555683136, + "learning_rate": 4.817294107135689e-06, + "loss": 0.5736, + "step": 10081 + }, + { + "epoch": 0.7412145272754007, + "grad_norm": 0.7918217182159424, + "learning_rate": 4.817257947266274e-06, + "loss": 0.4849, + "step": 10082 + }, + { + "epoch": 0.7412880458756065, + "grad_norm": 0.8303136825561523, + "learning_rate": 4.817221783954698e-06, + "loss": 0.5119, + "step": 10083 + }, + { + "epoch": 0.7413615644758124, + "grad_norm": 0.8280515670776367, + "learning_rate": 4.817185617201015e-06, + "loss": 0.4661, + "step": 10084 + }, + { + "epoch": 0.7414350830760182, + "grad_norm": 0.8298171758651733, + "learning_rate": 4.8171494470052824e-06, + "loss": 0.5584, + "step": 10085 + }, + { + "epoch": 0.7415086016762241, + "grad_norm": 0.8554608821868896, + "learning_rate": 4.81711327336755e-06, + "loss": 0.5348, + "step": 10086 + }, + { + "epoch": 0.7415821202764299, + "grad_norm": 0.8490903973579407, + "learning_rate": 4.817077096287872e-06, + "loss": 0.5755, + "step": 10087 + }, + { + "epoch": 0.7416556388766358, + "grad_norm": 0.8624181151390076, + "learning_rate": 4.817040915766303e-06, + "loss": 0.5313, + "step": 10088 + }, + { + "epoch": 0.7417291574768416, + "grad_norm": 0.8567653298377991, + "learning_rate": 4.8170047318028976e-06, + "loss": 0.5526, + "step": 10089 + }, + { + "epoch": 0.7418026760770475, + "grad_norm": 0.8564568758010864, + "learning_rate": 4.816968544397707e-06, + "loss": 0.5429, + "step": 10090 + }, + { + "epoch": 0.7418761946772533, + "grad_norm": 0.8597990870475769, + "learning_rate": 4.816932353550788e-06, + "loss": 0.5482, + "step": 10091 + }, + { + "epoch": 0.7419497132774592, + "grad_norm": 0.8308922052383423, + "learning_rate": 4.816896159262192e-06, + "loss": 0.4942, + "step": 10092 + }, + { + "epoch": 0.7420232318776651, + "grad_norm": 0.8371241092681885, + "learning_rate": 4.816859961531974e-06, + "loss": 0.5144, + "step": 10093 + }, + { + "epoch": 0.7420967504778709, + "grad_norm": 0.822175145149231, + "learning_rate": 4.816823760360187e-06, + "loss": 0.5259, + "step": 10094 + }, + { + "epoch": 0.7421702690780767, + "grad_norm": 0.8382798433303833, + "learning_rate": 4.816787555746886e-06, + "loss": 0.5666, + "step": 10095 + }, + { + "epoch": 0.7422437876782826, + "grad_norm": 0.8117102384567261, + "learning_rate": 4.816751347692123e-06, + "loss": 0.518, + "step": 10096 + }, + { + "epoch": 0.7423173062784885, + "grad_norm": 0.8531597852706909, + "learning_rate": 4.816715136195953e-06, + "loss": 0.5553, + "step": 10097 + }, + { + "epoch": 0.7423908248786943, + "grad_norm": 0.87682044506073, + "learning_rate": 4.81667892125843e-06, + "loss": 0.5865, + "step": 10098 + }, + { + "epoch": 0.7424643434789001, + "grad_norm": 0.8547384142875671, + "learning_rate": 4.816642702879607e-06, + "loss": 0.5328, + "step": 10099 + }, + { + "epoch": 0.742537862079106, + "grad_norm": 0.783983051776886, + "learning_rate": 4.816606481059538e-06, + "loss": 0.4942, + "step": 10100 + }, + { + "epoch": 0.7426113806793119, + "grad_norm": 0.8271907567977905, + "learning_rate": 4.816570255798277e-06, + "loss": 0.5508, + "step": 10101 + }, + { + "epoch": 0.7426848992795178, + "grad_norm": 0.7920087575912476, + "learning_rate": 4.816534027095878e-06, + "loss": 0.5317, + "step": 10102 + }, + { + "epoch": 0.7427584178797235, + "grad_norm": 0.8253958821296692, + "learning_rate": 4.8164977949523935e-06, + "loss": 0.552, + "step": 10103 + }, + { + "epoch": 0.7428319364799294, + "grad_norm": 0.8933981657028198, + "learning_rate": 4.8164615593678795e-06, + "loss": 0.5407, + "step": 10104 + }, + { + "epoch": 0.7429054550801353, + "grad_norm": 0.8565035462379456, + "learning_rate": 4.816425320342388e-06, + "loss": 0.5439, + "step": 10105 + }, + { + "epoch": 0.7429789736803412, + "grad_norm": 0.8325918912887573, + "learning_rate": 4.816389077875975e-06, + "loss": 0.4927, + "step": 10106 + }, + { + "epoch": 0.7430524922805469, + "grad_norm": 0.8032909035682678, + "learning_rate": 4.816352831968691e-06, + "loss": 0.4769, + "step": 10107 + }, + { + "epoch": 0.7431260108807528, + "grad_norm": 0.8349966406822205, + "learning_rate": 4.816316582620592e-06, + "loss": 0.5131, + "step": 10108 + }, + { + "epoch": 0.7431995294809587, + "grad_norm": 0.8244778513908386, + "learning_rate": 4.816280329831733e-06, + "loss": 0.5305, + "step": 10109 + }, + { + "epoch": 0.7432730480811646, + "grad_norm": 0.8228414058685303, + "learning_rate": 4.816244073602166e-06, + "loss": 0.5668, + "step": 10110 + }, + { + "epoch": 0.7433465666813703, + "grad_norm": 0.8392022848129272, + "learning_rate": 4.816207813931944e-06, + "loss": 0.5252, + "step": 10111 + }, + { + "epoch": 0.7434200852815762, + "grad_norm": 0.8053048849105835, + "learning_rate": 4.816171550821122e-06, + "loss": 0.5453, + "step": 10112 + }, + { + "epoch": 0.7434936038817821, + "grad_norm": 0.8664254546165466, + "learning_rate": 4.8161352842697555e-06, + "loss": 0.5359, + "step": 10113 + }, + { + "epoch": 0.743567122481988, + "grad_norm": 0.8502040505409241, + "learning_rate": 4.816099014277897e-06, + "loss": 0.5424, + "step": 10114 + }, + { + "epoch": 0.7436406410821937, + "grad_norm": 0.8653925061225891, + "learning_rate": 4.816062740845599e-06, + "loss": 0.5928, + "step": 10115 + }, + { + "epoch": 0.7437141596823996, + "grad_norm": 0.8441140651702881, + "learning_rate": 4.816026463972917e-06, + "loss": 0.4913, + "step": 10116 + }, + { + "epoch": 0.7437876782826055, + "grad_norm": 0.827397346496582, + "learning_rate": 4.8159901836599045e-06, + "loss": 0.5359, + "step": 10117 + }, + { + "epoch": 0.7438611968828114, + "grad_norm": 0.8260247111320496, + "learning_rate": 4.815953899906616e-06, + "loss": 0.5379, + "step": 10118 + }, + { + "epoch": 0.7439347154830172, + "grad_norm": 0.8164761066436768, + "learning_rate": 4.815917612713103e-06, + "loss": 0.5186, + "step": 10119 + }, + { + "epoch": 0.744008234083223, + "grad_norm": 0.8437654376029968, + "learning_rate": 4.8158813220794235e-06, + "loss": 0.5378, + "step": 10120 + }, + { + "epoch": 0.7440817526834289, + "grad_norm": 0.7920463681221008, + "learning_rate": 4.815845028005628e-06, + "loss": 0.5072, + "step": 10121 + }, + { + "epoch": 0.7441552712836348, + "grad_norm": 0.8030918836593628, + "learning_rate": 4.815808730491771e-06, + "loss": 0.5152, + "step": 10122 + }, + { + "epoch": 0.7442287898838406, + "grad_norm": 0.8313467502593994, + "learning_rate": 4.815772429537909e-06, + "loss": 0.4892, + "step": 10123 + }, + { + "epoch": 0.7443023084840464, + "grad_norm": 0.8490053415298462, + "learning_rate": 4.815736125144091e-06, + "loss": 0.5586, + "step": 10124 + }, + { + "epoch": 0.7443758270842523, + "grad_norm": 0.8777790069580078, + "learning_rate": 4.815699817310375e-06, + "loss": 0.5479, + "step": 10125 + }, + { + "epoch": 0.7444493456844582, + "grad_norm": 0.8223150968551636, + "learning_rate": 4.8156635060368145e-06, + "loss": 0.543, + "step": 10126 + }, + { + "epoch": 0.744522864284664, + "grad_norm": 0.8078228235244751, + "learning_rate": 4.815627191323462e-06, + "loss": 0.5047, + "step": 10127 + }, + { + "epoch": 0.7445963828848698, + "grad_norm": 0.8076865673065186, + "learning_rate": 4.8155908731703726e-06, + "loss": 0.5328, + "step": 10128 + }, + { + "epoch": 0.7446699014850757, + "grad_norm": 0.8382828235626221, + "learning_rate": 4.8155545515775995e-06, + "loss": 0.5254, + "step": 10129 + }, + { + "epoch": 0.7447434200852816, + "grad_norm": 0.8280465006828308, + "learning_rate": 4.815518226545196e-06, + "loss": 0.5349, + "step": 10130 + }, + { + "epoch": 0.7448169386854874, + "grad_norm": 0.8209459185600281, + "learning_rate": 4.8154818980732185e-06, + "loss": 0.5212, + "step": 10131 + }, + { + "epoch": 0.7448904572856933, + "grad_norm": 0.8156742453575134, + "learning_rate": 4.815445566161718e-06, + "loss": 0.5679, + "step": 10132 + }, + { + "epoch": 0.7449639758858991, + "grad_norm": 0.8037152290344238, + "learning_rate": 4.815409230810751e-06, + "loss": 0.5044, + "step": 10133 + }, + { + "epoch": 0.745037494486105, + "grad_norm": 0.8743916153907776, + "learning_rate": 4.81537289202037e-06, + "loss": 0.555, + "step": 10134 + }, + { + "epoch": 0.7451110130863109, + "grad_norm": 0.8256641030311584, + "learning_rate": 4.815336549790629e-06, + "loss": 0.522, + "step": 10135 + }, + { + "epoch": 0.7451845316865167, + "grad_norm": 0.9340819120407104, + "learning_rate": 4.815300204121582e-06, + "loss": 0.5761, + "step": 10136 + }, + { + "epoch": 0.7452580502867225, + "grad_norm": 0.8405176997184753, + "learning_rate": 4.815263855013285e-06, + "loss": 0.53, + "step": 10137 + }, + { + "epoch": 0.7453315688869284, + "grad_norm": 0.7963041067123413, + "learning_rate": 4.8152275024657894e-06, + "loss": 0.5018, + "step": 10138 + }, + { + "epoch": 0.7454050874871343, + "grad_norm": 0.8225973844528198, + "learning_rate": 4.8151911464791495e-06, + "loss": 0.5483, + "step": 10139 + }, + { + "epoch": 0.7454786060873401, + "grad_norm": 0.8397067189216614, + "learning_rate": 4.81515478705342e-06, + "loss": 0.5552, + "step": 10140 + }, + { + "epoch": 0.745552124687546, + "grad_norm": 0.8150939345359802, + "learning_rate": 4.815118424188656e-06, + "loss": 0.5401, + "step": 10141 + }, + { + "epoch": 0.7456256432877518, + "grad_norm": 0.8361151218414307, + "learning_rate": 4.815082057884909e-06, + "loss": 0.5694, + "step": 10142 + }, + { + "epoch": 0.7456991618879577, + "grad_norm": 0.8545119762420654, + "learning_rate": 4.815045688142236e-06, + "loss": 0.5506, + "step": 10143 + }, + { + "epoch": 0.7457726804881635, + "grad_norm": 0.8630272746086121, + "learning_rate": 4.815009314960688e-06, + "loss": 0.5211, + "step": 10144 + }, + { + "epoch": 0.7458461990883694, + "grad_norm": 0.9150027632713318, + "learning_rate": 4.814972938340321e-06, + "loss": 0.5774, + "step": 10145 + }, + { + "epoch": 0.7459197176885752, + "grad_norm": 0.8209151029586792, + "learning_rate": 4.814936558281188e-06, + "loss": 0.5228, + "step": 10146 + }, + { + "epoch": 0.7459932362887811, + "grad_norm": 0.8391227126121521, + "learning_rate": 4.814900174783344e-06, + "loss": 0.5253, + "step": 10147 + }, + { + "epoch": 0.7460667548889869, + "grad_norm": 0.8235474228858948, + "learning_rate": 4.814863787846843e-06, + "loss": 0.5089, + "step": 10148 + }, + { + "epoch": 0.7461402734891928, + "grad_norm": 0.8342710733413696, + "learning_rate": 4.8148273974717374e-06, + "loss": 0.5188, + "step": 10149 + }, + { + "epoch": 0.7462137920893986, + "grad_norm": 0.8172957301139832, + "learning_rate": 4.8147910036580825e-06, + "loss": 0.4809, + "step": 10150 + }, + { + "epoch": 0.7462873106896045, + "grad_norm": 0.8584080338478088, + "learning_rate": 4.814754606405933e-06, + "loss": 0.525, + "step": 10151 + }, + { + "epoch": 0.7463608292898103, + "grad_norm": 0.843845784664154, + "learning_rate": 4.814718205715343e-06, + "loss": 0.5331, + "step": 10152 + }, + { + "epoch": 0.7464343478900162, + "grad_norm": 0.8141693472862244, + "learning_rate": 4.814681801586364e-06, + "loss": 0.5159, + "step": 10153 + }, + { + "epoch": 0.746507866490222, + "grad_norm": 0.9443715214729309, + "learning_rate": 4.814645394019053e-06, + "loss": 0.5594, + "step": 10154 + }, + { + "epoch": 0.7465813850904279, + "grad_norm": 0.8416028618812561, + "learning_rate": 4.814608983013463e-06, + "loss": 0.5595, + "step": 10155 + }, + { + "epoch": 0.7466549036906337, + "grad_norm": 0.8878920078277588, + "learning_rate": 4.814572568569648e-06, + "loss": 0.6042, + "step": 10156 + }, + { + "epoch": 0.7467284222908396, + "grad_norm": 0.8306199908256531, + "learning_rate": 4.814536150687662e-06, + "loss": 0.5708, + "step": 10157 + }, + { + "epoch": 0.7468019408910455, + "grad_norm": 0.7986963391304016, + "learning_rate": 4.81449972936756e-06, + "loss": 0.4775, + "step": 10158 + }, + { + "epoch": 0.7468754594912513, + "grad_norm": 0.7943423390388489, + "learning_rate": 4.8144633046093946e-06, + "loss": 0.5232, + "step": 10159 + }, + { + "epoch": 0.7469489780914571, + "grad_norm": 0.8397079110145569, + "learning_rate": 4.814426876413221e-06, + "loss": 0.5244, + "step": 10160 + }, + { + "epoch": 0.747022496691663, + "grad_norm": 0.8462828993797302, + "learning_rate": 4.814390444779094e-06, + "loss": 0.5188, + "step": 10161 + }, + { + "epoch": 0.7470960152918689, + "grad_norm": 0.8442651629447937, + "learning_rate": 4.814354009707065e-06, + "loss": 0.528, + "step": 10162 + }, + { + "epoch": 0.7471695338920747, + "grad_norm": 0.8692923188209534, + "learning_rate": 4.814317571197191e-06, + "loss": 0.5289, + "step": 10163 + }, + { + "epoch": 0.7472430524922805, + "grad_norm": 0.8851388692855835, + "learning_rate": 4.814281129249524e-06, + "loss": 0.4932, + "step": 10164 + }, + { + "epoch": 0.7473165710924864, + "grad_norm": 0.8360166549682617, + "learning_rate": 4.81424468386412e-06, + "loss": 0.5481, + "step": 10165 + }, + { + "epoch": 0.7473900896926923, + "grad_norm": 0.8549612760543823, + "learning_rate": 4.814208235041031e-06, + "loss": 0.525, + "step": 10166 + }, + { + "epoch": 0.7474636082928982, + "grad_norm": 0.8155908584594727, + "learning_rate": 4.814171782780314e-06, + "loss": 0.5263, + "step": 10167 + }, + { + "epoch": 0.7475371268931039, + "grad_norm": 0.8505427837371826, + "learning_rate": 4.814135327082021e-06, + "loss": 0.5639, + "step": 10168 + }, + { + "epoch": 0.7476106454933098, + "grad_norm": 0.8623645305633545, + "learning_rate": 4.8140988679462065e-06, + "loss": 0.5592, + "step": 10169 + }, + { + "epoch": 0.7476841640935157, + "grad_norm": 0.8269482254981995, + "learning_rate": 4.814062405372925e-06, + "loss": 0.5734, + "step": 10170 + }, + { + "epoch": 0.7477576826937216, + "grad_norm": 0.8158605098724365, + "learning_rate": 4.81402593936223e-06, + "loss": 0.5378, + "step": 10171 + }, + { + "epoch": 0.7478312012939273, + "grad_norm": 0.8617229461669922, + "learning_rate": 4.813989469914176e-06, + "loss": 0.5657, + "step": 10172 + }, + { + "epoch": 0.7479047198941332, + "grad_norm": 0.8488666415214539, + "learning_rate": 4.813952997028819e-06, + "loss": 0.4761, + "step": 10173 + }, + { + "epoch": 0.7479782384943391, + "grad_norm": 0.7992455363273621, + "learning_rate": 4.81391652070621e-06, + "loss": 0.5519, + "step": 10174 + }, + { + "epoch": 0.748051757094545, + "grad_norm": 0.8022369146347046, + "learning_rate": 4.813880040946405e-06, + "loss": 0.5582, + "step": 10175 + }, + { + "epoch": 0.7481252756947507, + "grad_norm": 0.8373310565948486, + "learning_rate": 4.813843557749458e-06, + "loss": 0.5841, + "step": 10176 + }, + { + "epoch": 0.7481987942949566, + "grad_norm": 0.7904412746429443, + "learning_rate": 4.813807071115424e-06, + "loss": 0.4963, + "step": 10177 + }, + { + "epoch": 0.7482723128951625, + "grad_norm": 0.8207305669784546, + "learning_rate": 4.813770581044354e-06, + "loss": 0.5229, + "step": 10178 + }, + { + "epoch": 0.7483458314953684, + "grad_norm": 0.8266283273696899, + "learning_rate": 4.813734087536307e-06, + "loss": 0.5422, + "step": 10179 + }, + { + "epoch": 0.7484193500955741, + "grad_norm": 0.8620912432670593, + "learning_rate": 4.813697590591333e-06, + "loss": 0.5572, + "step": 10180 + }, + { + "epoch": 0.74849286869578, + "grad_norm": 0.8511542081832886, + "learning_rate": 4.813661090209489e-06, + "loss": 0.4981, + "step": 10181 + }, + { + "epoch": 0.7485663872959859, + "grad_norm": 0.8198342323303223, + "learning_rate": 4.813624586390828e-06, + "loss": 0.548, + "step": 10182 + }, + { + "epoch": 0.7486399058961918, + "grad_norm": 0.7769510746002197, + "learning_rate": 4.813588079135404e-06, + "loss": 0.5498, + "step": 10183 + }, + { + "epoch": 0.7487134244963976, + "grad_norm": 0.8315170407295227, + "learning_rate": 4.813551568443272e-06, + "loss": 0.5365, + "step": 10184 + }, + { + "epoch": 0.7487869430966034, + "grad_norm": 0.7986992597579956, + "learning_rate": 4.813515054314485e-06, + "loss": 0.4969, + "step": 10185 + }, + { + "epoch": 0.7488604616968093, + "grad_norm": 0.8365127444267273, + "learning_rate": 4.813478536749099e-06, + "loss": 0.5305, + "step": 10186 + }, + { + "epoch": 0.7489339802970152, + "grad_norm": 0.848400890827179, + "learning_rate": 4.813442015747167e-06, + "loss": 0.5538, + "step": 10187 + }, + { + "epoch": 0.749007498897221, + "grad_norm": 0.8080681562423706, + "learning_rate": 4.8134054913087436e-06, + "loss": 0.5323, + "step": 10188 + }, + { + "epoch": 0.7490810174974268, + "grad_norm": 0.8561474084854126, + "learning_rate": 4.813368963433883e-06, + "loss": 0.5417, + "step": 10189 + }, + { + "epoch": 0.7491545360976327, + "grad_norm": 0.8371536731719971, + "learning_rate": 4.813332432122639e-06, + "loss": 0.5164, + "step": 10190 + }, + { + "epoch": 0.7492280546978386, + "grad_norm": 0.8348843455314636, + "learning_rate": 4.813295897375066e-06, + "loss": 0.5532, + "step": 10191 + }, + { + "epoch": 0.7493015732980444, + "grad_norm": 0.7976164817810059, + "learning_rate": 4.81325935919122e-06, + "loss": 0.5284, + "step": 10192 + }, + { + "epoch": 0.7493750918982502, + "grad_norm": 0.8691771030426025, + "learning_rate": 4.8132228175711535e-06, + "loss": 0.5664, + "step": 10193 + }, + { + "epoch": 0.7494486104984561, + "grad_norm": 0.7632983326911926, + "learning_rate": 4.813186272514921e-06, + "loss": 0.4727, + "step": 10194 + }, + { + "epoch": 0.749522129098662, + "grad_norm": 0.9634777903556824, + "learning_rate": 4.813149724022577e-06, + "loss": 0.5552, + "step": 10195 + }, + { + "epoch": 0.7495956476988678, + "grad_norm": 0.8046792149543762, + "learning_rate": 4.813113172094175e-06, + "loss": 0.5773, + "step": 10196 + }, + { + "epoch": 0.7496691662990737, + "grad_norm": 0.865375280380249, + "learning_rate": 4.813076616729771e-06, + "loss": 0.5686, + "step": 10197 + }, + { + "epoch": 0.7497426848992795, + "grad_norm": 0.8046164512634277, + "learning_rate": 4.813040057929418e-06, + "loss": 0.5189, + "step": 10198 + }, + { + "epoch": 0.7498162034994854, + "grad_norm": 0.8124054670333862, + "learning_rate": 4.813003495693171e-06, + "loss": 0.5225, + "step": 10199 + }, + { + "epoch": 0.7498897220996912, + "grad_norm": 0.8024898767471313, + "learning_rate": 4.812966930021083e-06, + "loss": 0.528, + "step": 10200 + }, + { + "epoch": 0.7499632406998971, + "grad_norm": 0.8379092812538147, + "learning_rate": 4.812930360913211e-06, + "loss": 0.4967, + "step": 10201 + }, + { + "epoch": 0.7500367593001029, + "grad_norm": 0.844000518321991, + "learning_rate": 4.812893788369606e-06, + "loss": 0.5163, + "step": 10202 + }, + { + "epoch": 0.7501102779003088, + "grad_norm": 0.8272560834884644, + "learning_rate": 4.812857212390324e-06, + "loss": 0.5578, + "step": 10203 + }, + { + "epoch": 0.7501837965005146, + "grad_norm": 0.8625856041908264, + "learning_rate": 4.8128206329754205e-06, + "loss": 0.5792, + "step": 10204 + }, + { + "epoch": 0.7502573151007205, + "grad_norm": 0.8634403944015503, + "learning_rate": 4.812784050124948e-06, + "loss": 0.5852, + "step": 10205 + }, + { + "epoch": 0.7503308337009263, + "grad_norm": 0.8180461525917053, + "learning_rate": 4.81274746383896e-06, + "loss": 0.5478, + "step": 10206 + }, + { + "epoch": 0.7504043523011322, + "grad_norm": 0.9309381246566772, + "learning_rate": 4.812710874117515e-06, + "loss": 0.57, + "step": 10207 + }, + { + "epoch": 0.750477870901338, + "grad_norm": 0.8212215900421143, + "learning_rate": 4.812674280960663e-06, + "loss": 0.5553, + "step": 10208 + }, + { + "epoch": 0.7505513895015439, + "grad_norm": 0.7973989844322205, + "learning_rate": 4.8126376843684595e-06, + "loss": 0.4971, + "step": 10209 + }, + { + "epoch": 0.7506249081017498, + "grad_norm": 0.8416316509246826, + "learning_rate": 4.81260108434096e-06, + "loss": 0.5137, + "step": 10210 + }, + { + "epoch": 0.7506984267019556, + "grad_norm": 0.7953634262084961, + "learning_rate": 4.812564480878218e-06, + "loss": 0.5367, + "step": 10211 + }, + { + "epoch": 0.7507719453021614, + "grad_norm": 0.8489105701446533, + "learning_rate": 4.812527873980288e-06, + "loss": 0.4892, + "step": 10212 + }, + { + "epoch": 0.7508454639023673, + "grad_norm": 0.831511914730072, + "learning_rate": 4.812491263647226e-06, + "loss": 0.5103, + "step": 10213 + }, + { + "epoch": 0.7509189825025732, + "grad_norm": 0.8352476954460144, + "learning_rate": 4.812454649879083e-06, + "loss": 0.5562, + "step": 10214 + }, + { + "epoch": 0.750992501102779, + "grad_norm": 0.8771212100982666, + "learning_rate": 4.812418032675916e-06, + "loss": 0.5242, + "step": 10215 + }, + { + "epoch": 0.7510660197029848, + "grad_norm": 0.8242813944816589, + "learning_rate": 4.812381412037779e-06, + "loss": 0.5252, + "step": 10216 + }, + { + "epoch": 0.7511395383031907, + "grad_norm": 0.8453404903411865, + "learning_rate": 4.812344787964725e-06, + "loss": 0.5291, + "step": 10217 + }, + { + "epoch": 0.7512130569033966, + "grad_norm": 0.8234066367149353, + "learning_rate": 4.81230816045681e-06, + "loss": 0.5005, + "step": 10218 + }, + { + "epoch": 0.7512865755036024, + "grad_norm": 0.8030783534049988, + "learning_rate": 4.812271529514088e-06, + "loss": 0.5115, + "step": 10219 + }, + { + "epoch": 0.7513600941038082, + "grad_norm": 0.8115867376327515, + "learning_rate": 4.812234895136613e-06, + "loss": 0.5192, + "step": 10220 + }, + { + "epoch": 0.7514336127040141, + "grad_norm": 0.834136426448822, + "learning_rate": 4.81219825732444e-06, + "loss": 0.5197, + "step": 10221 + }, + { + "epoch": 0.75150713130422, + "grad_norm": 0.8192460536956787, + "learning_rate": 4.812161616077623e-06, + "loss": 0.5479, + "step": 10222 + }, + { + "epoch": 0.7515806499044259, + "grad_norm": 0.8068962693214417, + "learning_rate": 4.812124971396217e-06, + "loss": 0.5159, + "step": 10223 + }, + { + "epoch": 0.7516541685046316, + "grad_norm": 0.7740306854248047, + "learning_rate": 4.812088323280275e-06, + "loss": 0.5008, + "step": 10224 + }, + { + "epoch": 0.7517276871048375, + "grad_norm": 0.8528626561164856, + "learning_rate": 4.8120516717298525e-06, + "loss": 0.5571, + "step": 10225 + }, + { + "epoch": 0.7518012057050434, + "grad_norm": 0.8580121397972107, + "learning_rate": 4.8120150167450045e-06, + "loss": 0.5393, + "step": 10226 + }, + { + "epoch": 0.7518747243052493, + "grad_norm": 0.8074237704277039, + "learning_rate": 4.811978358325784e-06, + "loss": 0.4902, + "step": 10227 + }, + { + "epoch": 0.751948242905455, + "grad_norm": 0.8238807916641235, + "learning_rate": 4.811941696472247e-06, + "loss": 0.5505, + "step": 10228 + }, + { + "epoch": 0.7520217615056609, + "grad_norm": 0.7904471158981323, + "learning_rate": 4.811905031184447e-06, + "loss": 0.5065, + "step": 10229 + }, + { + "epoch": 0.7520952801058668, + "grad_norm": 0.7997993230819702, + "learning_rate": 4.811868362462438e-06, + "loss": 0.4957, + "step": 10230 + }, + { + "epoch": 0.7521687987060727, + "grad_norm": 0.8886420130729675, + "learning_rate": 4.811831690306276e-06, + "loss": 0.5667, + "step": 10231 + }, + { + "epoch": 0.7522423173062784, + "grad_norm": 0.8253592848777771, + "learning_rate": 4.811795014716015e-06, + "loss": 0.5442, + "step": 10232 + }, + { + "epoch": 0.7523158359064843, + "grad_norm": 0.8507479429244995, + "learning_rate": 4.811758335691708e-06, + "loss": 0.5422, + "step": 10233 + }, + { + "epoch": 0.7523893545066902, + "grad_norm": 0.8575054407119751, + "learning_rate": 4.811721653233411e-06, + "loss": 0.5267, + "step": 10234 + }, + { + "epoch": 0.7524628731068961, + "grad_norm": 0.9500731825828552, + "learning_rate": 4.811684967341178e-06, + "loss": 0.5673, + "step": 10235 + }, + { + "epoch": 0.7525363917071018, + "grad_norm": 0.8807106018066406, + "learning_rate": 4.811648278015064e-06, + "loss": 0.5725, + "step": 10236 + }, + { + "epoch": 0.7526099103073077, + "grad_norm": 0.7787485718727112, + "learning_rate": 4.811611585255123e-06, + "loss": 0.4512, + "step": 10237 + }, + { + "epoch": 0.7526834289075136, + "grad_norm": 0.8275383114814758, + "learning_rate": 4.811574889061409e-06, + "loss": 0.5391, + "step": 10238 + }, + { + "epoch": 0.7527569475077195, + "grad_norm": 0.8541898727416992, + "learning_rate": 4.811538189433977e-06, + "loss": 0.5478, + "step": 10239 + }, + { + "epoch": 0.7528304661079253, + "grad_norm": 0.8427082896232605, + "learning_rate": 4.811501486372882e-06, + "loss": 0.5102, + "step": 10240 + }, + { + "epoch": 0.7529039847081311, + "grad_norm": 0.8384830355644226, + "learning_rate": 4.811464779878178e-06, + "loss": 0.5192, + "step": 10241 + }, + { + "epoch": 0.752977503308337, + "grad_norm": 0.8534827828407288, + "learning_rate": 4.81142806994992e-06, + "loss": 0.5435, + "step": 10242 + }, + { + "epoch": 0.7530510219085429, + "grad_norm": 0.8112414479255676, + "learning_rate": 4.811391356588162e-06, + "loss": 0.53, + "step": 10243 + }, + { + "epoch": 0.7531245405087487, + "grad_norm": 0.8305468559265137, + "learning_rate": 4.811354639792959e-06, + "loss": 0.5378, + "step": 10244 + }, + { + "epoch": 0.7531980591089545, + "grad_norm": 0.8912906050682068, + "learning_rate": 4.811317919564365e-06, + "loss": 0.5306, + "step": 10245 + }, + { + "epoch": 0.7532715777091604, + "grad_norm": 0.8473263382911682, + "learning_rate": 4.8112811959024345e-06, + "loss": 0.5466, + "step": 10246 + }, + { + "epoch": 0.7533450963093663, + "grad_norm": 0.851737916469574, + "learning_rate": 4.811244468807222e-06, + "loss": 0.5663, + "step": 10247 + }, + { + "epoch": 0.7534186149095721, + "grad_norm": 0.7956842184066772, + "learning_rate": 4.811207738278783e-06, + "loss": 0.5265, + "step": 10248 + }, + { + "epoch": 0.753492133509778, + "grad_norm": 0.8306012749671936, + "learning_rate": 4.811171004317171e-06, + "loss": 0.5407, + "step": 10249 + }, + { + "epoch": 0.7535656521099838, + "grad_norm": 0.8538923859596252, + "learning_rate": 4.811134266922442e-06, + "loss": 0.5503, + "step": 10250 + }, + { + "epoch": 0.7536391707101897, + "grad_norm": 0.8314139246940613, + "learning_rate": 4.811097526094648e-06, + "loss": 0.5363, + "step": 10251 + }, + { + "epoch": 0.7537126893103955, + "grad_norm": 0.8595540523529053, + "learning_rate": 4.811060781833847e-06, + "loss": 0.5175, + "step": 10252 + }, + { + "epoch": 0.7537862079106014, + "grad_norm": 0.821700394153595, + "learning_rate": 4.811024034140091e-06, + "loss": 0.5114, + "step": 10253 + }, + { + "epoch": 0.7538597265108072, + "grad_norm": 0.8440825343132019, + "learning_rate": 4.8109872830134355e-06, + "loss": 0.592, + "step": 10254 + }, + { + "epoch": 0.7539332451110131, + "grad_norm": 0.8099267482757568, + "learning_rate": 4.8109505284539346e-06, + "loss": 0.4726, + "step": 10255 + }, + { + "epoch": 0.7540067637112189, + "grad_norm": 0.836168110370636, + "learning_rate": 4.810913770461643e-06, + "loss": 0.5447, + "step": 10256 + }, + { + "epoch": 0.7540802823114248, + "grad_norm": 0.8354970216751099, + "learning_rate": 4.8108770090366154e-06, + "loss": 0.5776, + "step": 10257 + }, + { + "epoch": 0.7541538009116306, + "grad_norm": 0.8143665194511414, + "learning_rate": 4.810840244178907e-06, + "loss": 0.5403, + "step": 10258 + }, + { + "epoch": 0.7542273195118365, + "grad_norm": 0.8524022698402405, + "learning_rate": 4.810803475888572e-06, + "loss": 0.5411, + "step": 10259 + }, + { + "epoch": 0.7543008381120423, + "grad_norm": 0.8139626979827881, + "learning_rate": 4.810766704165666e-06, + "loss": 0.5176, + "step": 10260 + }, + { + "epoch": 0.7543743567122482, + "grad_norm": 0.7893404960632324, + "learning_rate": 4.8107299290102415e-06, + "loss": 0.4656, + "step": 10261 + }, + { + "epoch": 0.754447875312454, + "grad_norm": 0.818498432636261, + "learning_rate": 4.810693150422354e-06, + "loss": 0.4774, + "step": 10262 + }, + { + "epoch": 0.7545213939126599, + "grad_norm": 0.8049955368041992, + "learning_rate": 4.810656368402058e-06, + "loss": 0.5156, + "step": 10263 + }, + { + "epoch": 0.7545949125128657, + "grad_norm": 0.8231286406517029, + "learning_rate": 4.81061958294941e-06, + "loss": 0.5524, + "step": 10264 + }, + { + "epoch": 0.7546684311130716, + "grad_norm": 0.7899958491325378, + "learning_rate": 4.810582794064462e-06, + "loss": 0.453, + "step": 10265 + }, + { + "epoch": 0.7547419497132775, + "grad_norm": 0.8588510751724243, + "learning_rate": 4.81054600174727e-06, + "loss": 0.5811, + "step": 10266 + }, + { + "epoch": 0.7548154683134833, + "grad_norm": 0.8275521397590637, + "learning_rate": 4.810509205997889e-06, + "loss": 0.5222, + "step": 10267 + }, + { + "epoch": 0.7548889869136891, + "grad_norm": 0.8360216617584229, + "learning_rate": 4.810472406816372e-06, + "loss": 0.5684, + "step": 10268 + }, + { + "epoch": 0.754962505513895, + "grad_norm": 0.7797479033470154, + "learning_rate": 4.810435604202775e-06, + "loss": 0.4606, + "step": 10269 + }, + { + "epoch": 0.7550360241141009, + "grad_norm": 0.7916223406791687, + "learning_rate": 4.810398798157153e-06, + "loss": 0.4988, + "step": 10270 + }, + { + "epoch": 0.7551095427143067, + "grad_norm": 0.7776148915290833, + "learning_rate": 4.81036198867956e-06, + "loss": 0.5262, + "step": 10271 + }, + { + "epoch": 0.7551830613145126, + "grad_norm": 0.7917611598968506, + "learning_rate": 4.81032517577005e-06, + "loss": 0.5399, + "step": 10272 + }, + { + "epoch": 0.7552565799147184, + "grad_norm": 0.8198309540748596, + "learning_rate": 4.81028835942868e-06, + "loss": 0.5599, + "step": 10273 + }, + { + "epoch": 0.7553300985149243, + "grad_norm": 0.8610906004905701, + "learning_rate": 4.810251539655502e-06, + "loss": 0.5444, + "step": 10274 + }, + { + "epoch": 0.7554036171151302, + "grad_norm": 0.8118707537651062, + "learning_rate": 4.810214716450572e-06, + "loss": 0.5366, + "step": 10275 + }, + { + "epoch": 0.755477135715336, + "grad_norm": 0.8583564162254333, + "learning_rate": 4.810177889813944e-06, + "loss": 0.54, + "step": 10276 + }, + { + "epoch": 0.7555506543155418, + "grad_norm": 0.8487796187400818, + "learning_rate": 4.810141059745674e-06, + "loss": 0.5787, + "step": 10277 + }, + { + "epoch": 0.7556241729157477, + "grad_norm": 0.8174240589141846, + "learning_rate": 4.810104226245816e-06, + "loss": 0.523, + "step": 10278 + }, + { + "epoch": 0.7556976915159536, + "grad_norm": 0.846435010433197, + "learning_rate": 4.810067389314425e-06, + "loss": 0.5536, + "step": 10279 + }, + { + "epoch": 0.7557712101161594, + "grad_norm": 0.833888053894043, + "learning_rate": 4.810030548951554e-06, + "loss": 0.5355, + "step": 10280 + }, + { + "epoch": 0.7558447287163652, + "grad_norm": 0.7954726219177246, + "learning_rate": 4.809993705157261e-06, + "loss": 0.4788, + "step": 10281 + }, + { + "epoch": 0.7559182473165711, + "grad_norm": 0.8203378915786743, + "learning_rate": 4.8099568579315975e-06, + "loss": 0.5567, + "step": 10282 + }, + { + "epoch": 0.755991765916777, + "grad_norm": 0.8103793263435364, + "learning_rate": 4.80992000727462e-06, + "loss": 0.5445, + "step": 10283 + }, + { + "epoch": 0.7560652845169828, + "grad_norm": 0.7932429909706116, + "learning_rate": 4.8098831531863825e-06, + "loss": 0.503, + "step": 10284 + }, + { + "epoch": 0.7561388031171886, + "grad_norm": 0.8937979340553284, + "learning_rate": 4.80984629566694e-06, + "loss": 0.5444, + "step": 10285 + }, + { + "epoch": 0.7562123217173945, + "grad_norm": 0.8314617276191711, + "learning_rate": 4.809809434716348e-06, + "loss": 0.5199, + "step": 10286 + }, + { + "epoch": 0.7562858403176004, + "grad_norm": 0.8100855350494385, + "learning_rate": 4.80977257033466e-06, + "loss": 0.5475, + "step": 10287 + }, + { + "epoch": 0.7563593589178063, + "grad_norm": 0.8594852685928345, + "learning_rate": 4.8097357025219315e-06, + "loss": 0.5169, + "step": 10288 + }, + { + "epoch": 0.756432877518012, + "grad_norm": 0.8494696617126465, + "learning_rate": 4.809698831278217e-06, + "loss": 0.5062, + "step": 10289 + }, + { + "epoch": 0.7565063961182179, + "grad_norm": 0.7749537825584412, + "learning_rate": 4.8096619566035715e-06, + "loss": 0.5208, + "step": 10290 + }, + { + "epoch": 0.7565799147184238, + "grad_norm": 0.7924597263336182, + "learning_rate": 4.80962507849805e-06, + "loss": 0.5294, + "step": 10291 + }, + { + "epoch": 0.7566534333186297, + "grad_norm": 0.8159472942352295, + "learning_rate": 4.809588196961706e-06, + "loss": 0.5583, + "step": 10292 + }, + { + "epoch": 0.7567269519188354, + "grad_norm": 0.8155662417411804, + "learning_rate": 4.809551311994596e-06, + "loss": 0.4965, + "step": 10293 + }, + { + "epoch": 0.7568004705190413, + "grad_norm": 0.8324490785598755, + "learning_rate": 4.809514423596774e-06, + "loss": 0.5449, + "step": 10294 + }, + { + "epoch": 0.7568739891192472, + "grad_norm": 0.8247500658035278, + "learning_rate": 4.809477531768294e-06, + "loss": 0.5236, + "step": 10295 + }, + { + "epoch": 0.7569475077194531, + "grad_norm": 0.8442362546920776, + "learning_rate": 4.809440636509213e-06, + "loss": 0.4893, + "step": 10296 + }, + { + "epoch": 0.7570210263196588, + "grad_norm": 0.8644261956214905, + "learning_rate": 4.809403737819583e-06, + "loss": 0.5182, + "step": 10297 + }, + { + "epoch": 0.7570945449198647, + "grad_norm": 0.8122292757034302, + "learning_rate": 4.8093668356994605e-06, + "loss": 0.5261, + "step": 10298 + }, + { + "epoch": 0.7571680635200706, + "grad_norm": 0.8710063099861145, + "learning_rate": 4.8093299301489006e-06, + "loss": 0.5961, + "step": 10299 + }, + { + "epoch": 0.7572415821202765, + "grad_norm": 0.8154159784317017, + "learning_rate": 4.809293021167956e-06, + "loss": 0.5133, + "step": 10300 + }, + { + "epoch": 0.7573151007204822, + "grad_norm": 0.8694455623626709, + "learning_rate": 4.8092561087566845e-06, + "loss": 0.5211, + "step": 10301 + }, + { + "epoch": 0.7573886193206881, + "grad_norm": 0.8251890540122986, + "learning_rate": 4.80921919291514e-06, + "loss": 0.5062, + "step": 10302 + }, + { + "epoch": 0.757462137920894, + "grad_norm": 0.8373616933822632, + "learning_rate": 4.809182273643376e-06, + "loss": 0.5086, + "step": 10303 + }, + { + "epoch": 0.7575356565210999, + "grad_norm": 0.8450796604156494, + "learning_rate": 4.809145350941448e-06, + "loss": 0.532, + "step": 10304 + }, + { + "epoch": 0.7576091751213057, + "grad_norm": 0.8073848485946655, + "learning_rate": 4.8091084248094115e-06, + "loss": 0.5028, + "step": 10305 + }, + { + "epoch": 0.7576826937215115, + "grad_norm": 0.8351794481277466, + "learning_rate": 4.809071495247321e-06, + "loss": 0.5541, + "step": 10306 + }, + { + "epoch": 0.7577562123217174, + "grad_norm": 0.8472034931182861, + "learning_rate": 4.80903456225523e-06, + "loss": 0.5233, + "step": 10307 + }, + { + "epoch": 0.7578297309219233, + "grad_norm": 0.8515232801437378, + "learning_rate": 4.808997625833196e-06, + "loss": 0.5525, + "step": 10308 + }, + { + "epoch": 0.7579032495221291, + "grad_norm": 0.8485268354415894, + "learning_rate": 4.808960685981272e-06, + "loss": 0.5369, + "step": 10309 + }, + { + "epoch": 0.7579767681223349, + "grad_norm": 0.837741494178772, + "learning_rate": 4.8089237426995134e-06, + "loss": 0.5397, + "step": 10310 + }, + { + "epoch": 0.7580502867225408, + "grad_norm": 0.8714830279350281, + "learning_rate": 4.8088867959879744e-06, + "loss": 0.5522, + "step": 10311 + }, + { + "epoch": 0.7581238053227467, + "grad_norm": 0.8444538712501526, + "learning_rate": 4.808849845846712e-06, + "loss": 0.5422, + "step": 10312 + }, + { + "epoch": 0.7581973239229525, + "grad_norm": 0.8638488054275513, + "learning_rate": 4.808812892275778e-06, + "loss": 0.5757, + "step": 10313 + }, + { + "epoch": 0.7582708425231584, + "grad_norm": 0.8633882403373718, + "learning_rate": 4.80877593527523e-06, + "loss": 0.5917, + "step": 10314 + }, + { + "epoch": 0.7583443611233642, + "grad_norm": 0.8091937303543091, + "learning_rate": 4.808738974845121e-06, + "loss": 0.5595, + "step": 10315 + }, + { + "epoch": 0.7584178797235701, + "grad_norm": 0.8457619547843933, + "learning_rate": 4.808702010985506e-06, + "loss": 0.5217, + "step": 10316 + }, + { + "epoch": 0.7584913983237759, + "grad_norm": 0.8195582032203674, + "learning_rate": 4.808665043696442e-06, + "loss": 0.5569, + "step": 10317 + }, + { + "epoch": 0.7585649169239818, + "grad_norm": 0.824089527130127, + "learning_rate": 4.808628072977982e-06, + "loss": 0.5373, + "step": 10318 + }, + { + "epoch": 0.7586384355241876, + "grad_norm": 0.8755716681480408, + "learning_rate": 4.808591098830182e-06, + "loss": 0.6003, + "step": 10319 + }, + { + "epoch": 0.7587119541243935, + "grad_norm": 0.8295056223869324, + "learning_rate": 4.808554121253095e-06, + "loss": 0.5022, + "step": 10320 + }, + { + "epoch": 0.7587854727245993, + "grad_norm": 0.8252952694892883, + "learning_rate": 4.808517140246778e-06, + "loss": 0.5344, + "step": 10321 + }, + { + "epoch": 0.7588589913248052, + "grad_norm": 0.8951216340065002, + "learning_rate": 4.8084801558112855e-06, + "loss": 0.5825, + "step": 10322 + }, + { + "epoch": 0.758932509925011, + "grad_norm": 0.8366326093673706, + "learning_rate": 4.808443167946671e-06, + "loss": 0.5053, + "step": 10323 + }, + { + "epoch": 0.7590060285252169, + "grad_norm": 0.7988861203193665, + "learning_rate": 4.808406176652992e-06, + "loss": 0.5315, + "step": 10324 + }, + { + "epoch": 0.7590795471254227, + "grad_norm": 0.8463971018791199, + "learning_rate": 4.808369181930302e-06, + "loss": 0.5295, + "step": 10325 + }, + { + "epoch": 0.7591530657256286, + "grad_norm": 0.8225906491279602, + "learning_rate": 4.808332183778655e-06, + "loss": 0.5053, + "step": 10326 + }, + { + "epoch": 0.7592265843258345, + "grad_norm": 0.8753818869590759, + "learning_rate": 4.808295182198107e-06, + "loss": 0.5442, + "step": 10327 + }, + { + "epoch": 0.7593001029260403, + "grad_norm": 0.8072828650474548, + "learning_rate": 4.8082581771887134e-06, + "loss": 0.5153, + "step": 10328 + }, + { + "epoch": 0.7593736215262461, + "grad_norm": 0.8136546015739441, + "learning_rate": 4.8082211687505285e-06, + "loss": 0.5387, + "step": 10329 + }, + { + "epoch": 0.759447140126452, + "grad_norm": 0.8243510127067566, + "learning_rate": 4.808184156883608e-06, + "loss": 0.5789, + "step": 10330 + }, + { + "epoch": 0.7595206587266579, + "grad_norm": 0.8526034951210022, + "learning_rate": 4.808147141588006e-06, + "loss": 0.5514, + "step": 10331 + }, + { + "epoch": 0.7595941773268637, + "grad_norm": 0.8101229667663574, + "learning_rate": 4.808110122863777e-06, + "loss": 0.5169, + "step": 10332 + }, + { + "epoch": 0.7596676959270695, + "grad_norm": 0.8062682151794434, + "learning_rate": 4.808073100710978e-06, + "loss": 0.4986, + "step": 10333 + }, + { + "epoch": 0.7597412145272754, + "grad_norm": 0.8394662737846375, + "learning_rate": 4.808036075129662e-06, + "loss": 0.5529, + "step": 10334 + }, + { + "epoch": 0.7598147331274813, + "grad_norm": 0.8090305924415588, + "learning_rate": 4.807999046119885e-06, + "loss": 0.5131, + "step": 10335 + }, + { + "epoch": 0.7598882517276871, + "grad_norm": 0.8480478525161743, + "learning_rate": 4.807962013681702e-06, + "loss": 0.5408, + "step": 10336 + }, + { + "epoch": 0.7599617703278929, + "grad_norm": 0.816391110420227, + "learning_rate": 4.807924977815168e-06, + "loss": 0.5022, + "step": 10337 + }, + { + "epoch": 0.7600352889280988, + "grad_norm": 0.8568453192710876, + "learning_rate": 4.807887938520337e-06, + "loss": 0.538, + "step": 10338 + }, + { + "epoch": 0.7601088075283047, + "grad_norm": 0.8474832773208618, + "learning_rate": 4.807850895797265e-06, + "loss": 0.5738, + "step": 10339 + }, + { + "epoch": 0.7601823261285106, + "grad_norm": 0.8589595556259155, + "learning_rate": 4.807813849646007e-06, + "loss": 0.5386, + "step": 10340 + }, + { + "epoch": 0.7602558447287163, + "grad_norm": 0.814878523349762, + "learning_rate": 4.807776800066619e-06, + "loss": 0.543, + "step": 10341 + }, + { + "epoch": 0.7603293633289222, + "grad_norm": 0.8520500659942627, + "learning_rate": 4.807739747059153e-06, + "loss": 0.5391, + "step": 10342 + }, + { + "epoch": 0.7604028819291281, + "grad_norm": 0.8316921591758728, + "learning_rate": 4.807702690623668e-06, + "loss": 0.5512, + "step": 10343 + }, + { + "epoch": 0.760476400529334, + "grad_norm": 0.8480863571166992, + "learning_rate": 4.807665630760215e-06, + "loss": 0.5519, + "step": 10344 + }, + { + "epoch": 0.7605499191295397, + "grad_norm": 0.8486985564231873, + "learning_rate": 4.8076285674688525e-06, + "loss": 0.593, + "step": 10345 + }, + { + "epoch": 0.7606234377297456, + "grad_norm": 0.8295016288757324, + "learning_rate": 4.807591500749634e-06, + "loss": 0.5691, + "step": 10346 + }, + { + "epoch": 0.7606969563299515, + "grad_norm": 0.9015218019485474, + "learning_rate": 4.807554430602614e-06, + "loss": 0.5613, + "step": 10347 + }, + { + "epoch": 0.7607704749301574, + "grad_norm": 0.8332432508468628, + "learning_rate": 4.807517357027849e-06, + "loss": 0.5588, + "step": 10348 + }, + { + "epoch": 0.7608439935303631, + "grad_norm": 0.8224297761917114, + "learning_rate": 4.807480280025393e-06, + "loss": 0.5313, + "step": 10349 + }, + { + "epoch": 0.760917512130569, + "grad_norm": 0.815075695514679, + "learning_rate": 4.807443199595301e-06, + "loss": 0.5362, + "step": 10350 + }, + { + "epoch": 0.7609910307307749, + "grad_norm": 0.8545467853546143, + "learning_rate": 4.8074061157376295e-06, + "loss": 0.5504, + "step": 10351 + }, + { + "epoch": 0.7610645493309808, + "grad_norm": 0.8031228184700012, + "learning_rate": 4.8073690284524314e-06, + "loss": 0.5161, + "step": 10352 + }, + { + "epoch": 0.7611380679311865, + "grad_norm": 0.8537706732749939, + "learning_rate": 4.807331937739764e-06, + "loss": 0.5347, + "step": 10353 + }, + { + "epoch": 0.7612115865313924, + "grad_norm": 0.7883549332618713, + "learning_rate": 4.80729484359968e-06, + "loss": 0.5116, + "step": 10354 + }, + { + "epoch": 0.7612851051315983, + "grad_norm": 0.845377504825592, + "learning_rate": 4.807257746032238e-06, + "loss": 0.5214, + "step": 10355 + }, + { + "epoch": 0.7613586237318042, + "grad_norm": 0.866531252861023, + "learning_rate": 4.807220645037489e-06, + "loss": 0.5298, + "step": 10356 + }, + { + "epoch": 0.76143214233201, + "grad_norm": 0.8627543449401855, + "learning_rate": 4.80718354061549e-06, + "loss": 0.6069, + "step": 10357 + }, + { + "epoch": 0.7615056609322158, + "grad_norm": 0.7973942160606384, + "learning_rate": 4.807146432766297e-06, + "loss": 0.5206, + "step": 10358 + }, + { + "epoch": 0.7615791795324217, + "grad_norm": 0.8145812153816223, + "learning_rate": 4.807109321489965e-06, + "loss": 0.524, + "step": 10359 + }, + { + "epoch": 0.7616526981326276, + "grad_norm": 0.8112028241157532, + "learning_rate": 4.8070722067865475e-06, + "loss": 0.5771, + "step": 10360 + }, + { + "epoch": 0.7617262167328334, + "grad_norm": 0.8178080320358276, + "learning_rate": 4.8070350886561e-06, + "loss": 0.5495, + "step": 10361 + }, + { + "epoch": 0.7617997353330392, + "grad_norm": 0.8405719995498657, + "learning_rate": 4.806997967098679e-06, + "loss": 0.5431, + "step": 10362 + }, + { + "epoch": 0.7618732539332451, + "grad_norm": 0.8717169761657715, + "learning_rate": 4.806960842114338e-06, + "loss": 0.5304, + "step": 10363 + }, + { + "epoch": 0.761946772533451, + "grad_norm": 0.8584299683570862, + "learning_rate": 4.806923713703135e-06, + "loss": 0.553, + "step": 10364 + }, + { + "epoch": 0.7620202911336568, + "grad_norm": 0.8287584185600281, + "learning_rate": 4.806886581865121e-06, + "loss": 0.5372, + "step": 10365 + }, + { + "epoch": 0.7620938097338626, + "grad_norm": 0.8618944883346558, + "learning_rate": 4.806849446600355e-06, + "loss": 0.5865, + "step": 10366 + }, + { + "epoch": 0.7621673283340685, + "grad_norm": 0.8552265167236328, + "learning_rate": 4.80681230790889e-06, + "loss": 0.548, + "step": 10367 + }, + { + "epoch": 0.7622408469342744, + "grad_norm": 0.8397519588470459, + "learning_rate": 4.806775165790781e-06, + "loss": 0.5799, + "step": 10368 + }, + { + "epoch": 0.7623143655344802, + "grad_norm": 0.865967869758606, + "learning_rate": 4.806738020246085e-06, + "loss": 0.546, + "step": 10369 + }, + { + "epoch": 0.7623878841346861, + "grad_norm": 0.8293899893760681, + "learning_rate": 4.806700871274854e-06, + "loss": 0.509, + "step": 10370 + }, + { + "epoch": 0.7624614027348919, + "grad_norm": 0.8583315014839172, + "learning_rate": 4.806663718877147e-06, + "loss": 0.4921, + "step": 10371 + }, + { + "epoch": 0.7625349213350978, + "grad_norm": 0.8731511831283569, + "learning_rate": 4.806626563053016e-06, + "loss": 0.5046, + "step": 10372 + }, + { + "epoch": 0.7626084399353036, + "grad_norm": 0.8329875469207764, + "learning_rate": 4.8065894038025185e-06, + "loss": 0.5229, + "step": 10373 + }, + { + "epoch": 0.7626819585355095, + "grad_norm": 0.8238754868507385, + "learning_rate": 4.806552241125708e-06, + "loss": 0.5449, + "step": 10374 + }, + { + "epoch": 0.7627554771357153, + "grad_norm": 0.9130180478096008, + "learning_rate": 4.8065150750226406e-06, + "loss": 0.5624, + "step": 10375 + }, + { + "epoch": 0.7628289957359212, + "grad_norm": 0.8491313457489014, + "learning_rate": 4.8064779054933716e-06, + "loss": 0.5498, + "step": 10376 + }, + { + "epoch": 0.762902514336127, + "grad_norm": 0.8071407079696655, + "learning_rate": 4.806440732537957e-06, + "loss": 0.5189, + "step": 10377 + }, + { + "epoch": 0.7629760329363329, + "grad_norm": 0.8258479833602905, + "learning_rate": 4.8064035561564496e-06, + "loss": 0.5279, + "step": 10378 + }, + { + "epoch": 0.7630495515365388, + "grad_norm": 0.814190149307251, + "learning_rate": 4.806366376348907e-06, + "loss": 0.5526, + "step": 10379 + }, + { + "epoch": 0.7631230701367446, + "grad_norm": 0.8791201114654541, + "learning_rate": 4.806329193115382e-06, + "loss": 0.558, + "step": 10380 + }, + { + "epoch": 0.7631965887369504, + "grad_norm": 0.8284847140312195, + "learning_rate": 4.8062920064559325e-06, + "loss": 0.5182, + "step": 10381 + }, + { + "epoch": 0.7632701073371563, + "grad_norm": 0.768090546131134, + "learning_rate": 4.806254816370612e-06, + "loss": 0.5139, + "step": 10382 + }, + { + "epoch": 0.7633436259373622, + "grad_norm": 0.8125748038291931, + "learning_rate": 4.806217622859476e-06, + "loss": 0.5478, + "step": 10383 + }, + { + "epoch": 0.763417144537568, + "grad_norm": 0.7952128052711487, + "learning_rate": 4.806180425922581e-06, + "loss": 0.5066, + "step": 10384 + }, + { + "epoch": 0.7634906631377738, + "grad_norm": 0.7842061519622803, + "learning_rate": 4.80614322555998e-06, + "loss": 0.5222, + "step": 10385 + }, + { + "epoch": 0.7635641817379797, + "grad_norm": 0.8243536353111267, + "learning_rate": 4.80610602177173e-06, + "loss": 0.5075, + "step": 10386 + }, + { + "epoch": 0.7636377003381856, + "grad_norm": 0.8706895709037781, + "learning_rate": 4.806068814557887e-06, + "loss": 0.5085, + "step": 10387 + }, + { + "epoch": 0.7637112189383914, + "grad_norm": 0.8190063238143921, + "learning_rate": 4.806031603918504e-06, + "loss": 0.5127, + "step": 10388 + }, + { + "epoch": 0.7637847375385972, + "grad_norm": 0.7878653407096863, + "learning_rate": 4.8059943898536365e-06, + "loss": 0.5227, + "step": 10389 + }, + { + "epoch": 0.7638582561388031, + "grad_norm": 0.8120626211166382, + "learning_rate": 4.805957172363342e-06, + "loss": 0.5737, + "step": 10390 + }, + { + "epoch": 0.763931774739009, + "grad_norm": 0.9146125912666321, + "learning_rate": 4.805919951447674e-06, + "loss": 0.5632, + "step": 10391 + }, + { + "epoch": 0.7640052933392149, + "grad_norm": 0.8023003935813904, + "learning_rate": 4.805882727106688e-06, + "loss": 0.4978, + "step": 10392 + }, + { + "epoch": 0.7640788119394206, + "grad_norm": 0.8023028373718262, + "learning_rate": 4.805845499340439e-06, + "loss": 0.469, + "step": 10393 + }, + { + "epoch": 0.7641523305396265, + "grad_norm": 0.8741618394851685, + "learning_rate": 4.8058082681489835e-06, + "loss": 0.5314, + "step": 10394 + }, + { + "epoch": 0.7642258491398324, + "grad_norm": 0.8602863550186157, + "learning_rate": 4.805771033532376e-06, + "loss": 0.5624, + "step": 10395 + }, + { + "epoch": 0.7642993677400383, + "grad_norm": 0.8030120134353638, + "learning_rate": 4.805733795490671e-06, + "loss": 0.523, + "step": 10396 + }, + { + "epoch": 0.764372886340244, + "grad_norm": 0.8606557250022888, + "learning_rate": 4.805696554023926e-06, + "loss": 0.5336, + "step": 10397 + }, + { + "epoch": 0.7644464049404499, + "grad_norm": 0.7968544363975525, + "learning_rate": 4.805659309132195e-06, + "loss": 0.4898, + "step": 10398 + }, + { + "epoch": 0.7645199235406558, + "grad_norm": 0.8480812907218933, + "learning_rate": 4.805622060815533e-06, + "loss": 0.5737, + "step": 10399 + }, + { + "epoch": 0.7645934421408617, + "grad_norm": 0.8478822112083435, + "learning_rate": 4.805584809073995e-06, + "loss": 0.5602, + "step": 10400 + }, + { + "epoch": 0.7646669607410674, + "grad_norm": 0.8654072284698486, + "learning_rate": 4.805547553907637e-06, + "loss": 0.516, + "step": 10401 + }, + { + "epoch": 0.7647404793412733, + "grad_norm": 0.8011120557785034, + "learning_rate": 4.805510295316515e-06, + "loss": 0.5027, + "step": 10402 + }, + { + "epoch": 0.7648139979414792, + "grad_norm": 0.7773462533950806, + "learning_rate": 4.805473033300684e-06, + "loss": 0.5477, + "step": 10403 + }, + { + "epoch": 0.7648875165416851, + "grad_norm": 0.8459109663963318, + "learning_rate": 4.805435767860198e-06, + "loss": 0.5122, + "step": 10404 + }, + { + "epoch": 0.7649610351418908, + "grad_norm": 0.7890443205833435, + "learning_rate": 4.805398498995114e-06, + "loss": 0.5362, + "step": 10405 + }, + { + "epoch": 0.7650345537420967, + "grad_norm": 0.832197368144989, + "learning_rate": 4.805361226705487e-06, + "loss": 0.5144, + "step": 10406 + }, + { + "epoch": 0.7651080723423026, + "grad_norm": 0.8340194821357727, + "learning_rate": 4.805323950991372e-06, + "loss": 0.5133, + "step": 10407 + }, + { + "epoch": 0.7651815909425085, + "grad_norm": 0.8385947942733765, + "learning_rate": 4.805286671852825e-06, + "loss": 0.5632, + "step": 10408 + }, + { + "epoch": 0.7652551095427143, + "grad_norm": 0.8116714358329773, + "learning_rate": 4.8052493892899e-06, + "loss": 0.5279, + "step": 10409 + }, + { + "epoch": 0.7653286281429201, + "grad_norm": 0.8198890686035156, + "learning_rate": 4.805212103302653e-06, + "loss": 0.4908, + "step": 10410 + }, + { + "epoch": 0.765402146743126, + "grad_norm": 0.8392840027809143, + "learning_rate": 4.805174813891141e-06, + "loss": 0.5107, + "step": 10411 + }, + { + "epoch": 0.7654756653433319, + "grad_norm": 0.7989683151245117, + "learning_rate": 4.805137521055417e-06, + "loss": 0.4816, + "step": 10412 + }, + { + "epoch": 0.7655491839435378, + "grad_norm": 0.805112898349762, + "learning_rate": 4.805100224795538e-06, + "loss": 0.5249, + "step": 10413 + }, + { + "epoch": 0.7656227025437435, + "grad_norm": 0.8347674608230591, + "learning_rate": 4.8050629251115585e-06, + "loss": 0.5487, + "step": 10414 + }, + { + "epoch": 0.7656962211439494, + "grad_norm": 0.837818443775177, + "learning_rate": 4.805025622003534e-06, + "loss": 0.5368, + "step": 10415 + }, + { + "epoch": 0.7657697397441553, + "grad_norm": 0.8219664692878723, + "learning_rate": 4.80498831547152e-06, + "loss": 0.5267, + "step": 10416 + }, + { + "epoch": 0.7658432583443612, + "grad_norm": 0.8507187366485596, + "learning_rate": 4.8049510055155735e-06, + "loss": 0.5216, + "step": 10417 + }, + { + "epoch": 0.765916776944567, + "grad_norm": 0.8125010132789612, + "learning_rate": 4.804913692135748e-06, + "loss": 0.5558, + "step": 10418 + }, + { + "epoch": 0.7659902955447728, + "grad_norm": 0.814423143863678, + "learning_rate": 4.804876375332098e-06, + "loss": 0.5247, + "step": 10419 + }, + { + "epoch": 0.7660638141449787, + "grad_norm": 0.8180586099624634, + "learning_rate": 4.804839055104682e-06, + "loss": 0.5098, + "step": 10420 + }, + { + "epoch": 0.7661373327451846, + "grad_norm": 0.8170748949050903, + "learning_rate": 4.804801731453553e-06, + "loss": 0.5085, + "step": 10421 + }, + { + "epoch": 0.7662108513453904, + "grad_norm": 0.8018540143966675, + "learning_rate": 4.804764404378767e-06, + "loss": 0.5258, + "step": 10422 + }, + { + "epoch": 0.7662843699455962, + "grad_norm": 0.7920952439308167, + "learning_rate": 4.80472707388038e-06, + "loss": 0.5275, + "step": 10423 + }, + { + "epoch": 0.7663578885458021, + "grad_norm": 0.7908985614776611, + "learning_rate": 4.804689739958447e-06, + "loss": 0.4951, + "step": 10424 + }, + { + "epoch": 0.766431407146008, + "grad_norm": 0.8026199340820312, + "learning_rate": 4.804652402613024e-06, + "loss": 0.5127, + "step": 10425 + }, + { + "epoch": 0.7665049257462138, + "grad_norm": 0.761799156665802, + "learning_rate": 4.804615061844166e-06, + "loss": 0.4918, + "step": 10426 + }, + { + "epoch": 0.7665784443464196, + "grad_norm": 0.8179019093513489, + "learning_rate": 4.804577717651928e-06, + "loss": 0.548, + "step": 10427 + }, + { + "epoch": 0.7666519629466255, + "grad_norm": 0.8499442934989929, + "learning_rate": 4.8045403700363665e-06, + "loss": 0.523, + "step": 10428 + }, + { + "epoch": 0.7667254815468314, + "grad_norm": 0.7743000388145447, + "learning_rate": 4.804503018997536e-06, + "loss": 0.4811, + "step": 10429 + }, + { + "epoch": 0.7667990001470372, + "grad_norm": 0.8301342129707336, + "learning_rate": 4.804465664535493e-06, + "loss": 0.5535, + "step": 10430 + }, + { + "epoch": 0.766872518747243, + "grad_norm": 0.8182762265205383, + "learning_rate": 4.804428306650293e-06, + "loss": 0.5768, + "step": 10431 + }, + { + "epoch": 0.7669460373474489, + "grad_norm": 0.795863926410675, + "learning_rate": 4.804390945341989e-06, + "loss": 0.5397, + "step": 10432 + }, + { + "epoch": 0.7670195559476548, + "grad_norm": 0.8435518145561218, + "learning_rate": 4.80435358061064e-06, + "loss": 0.5363, + "step": 10433 + }, + { + "epoch": 0.7670930745478606, + "grad_norm": 0.8061639070510864, + "learning_rate": 4.8043162124563e-06, + "loss": 0.5147, + "step": 10434 + }, + { + "epoch": 0.7671665931480665, + "grad_norm": 0.7771467566490173, + "learning_rate": 4.804278840879024e-06, + "loss": 0.5538, + "step": 10435 + }, + { + "epoch": 0.7672401117482723, + "grad_norm": 0.8182812333106995, + "learning_rate": 4.8042414658788675e-06, + "loss": 0.5574, + "step": 10436 + }, + { + "epoch": 0.7673136303484782, + "grad_norm": 0.7983241081237793, + "learning_rate": 4.804204087455887e-06, + "loss": 0.5273, + "step": 10437 + }, + { + "epoch": 0.767387148948684, + "grad_norm": 0.8776808381080627, + "learning_rate": 4.804166705610137e-06, + "loss": 0.5637, + "step": 10438 + }, + { + "epoch": 0.7674606675488899, + "grad_norm": 0.8498031497001648, + "learning_rate": 4.804129320341674e-06, + "loss": 0.5495, + "step": 10439 + }, + { + "epoch": 0.7675341861490957, + "grad_norm": 0.8238917589187622, + "learning_rate": 4.804091931650553e-06, + "loss": 0.5431, + "step": 10440 + }, + { + "epoch": 0.7676077047493016, + "grad_norm": 0.7802296280860901, + "learning_rate": 4.80405453953683e-06, + "loss": 0.5404, + "step": 10441 + }, + { + "epoch": 0.7676812233495074, + "grad_norm": 0.8241801261901855, + "learning_rate": 4.80401714400056e-06, + "loss": 0.5252, + "step": 10442 + }, + { + "epoch": 0.7677547419497133, + "grad_norm": 0.8934826254844666, + "learning_rate": 4.803979745041798e-06, + "loss": 0.5547, + "step": 10443 + }, + { + "epoch": 0.7678282605499192, + "grad_norm": 0.8188368678092957, + "learning_rate": 4.8039423426606005e-06, + "loss": 0.5424, + "step": 10444 + }, + { + "epoch": 0.767901779150125, + "grad_norm": 0.8271037340164185, + "learning_rate": 4.803904936857024e-06, + "loss": 0.5078, + "step": 10445 + }, + { + "epoch": 0.7679752977503308, + "grad_norm": 0.8462382555007935, + "learning_rate": 4.803867527631121e-06, + "loss": 0.5226, + "step": 10446 + }, + { + "epoch": 0.7680488163505367, + "grad_norm": 0.8334097266197205, + "learning_rate": 4.803830114982951e-06, + "loss": 0.5358, + "step": 10447 + }, + { + "epoch": 0.7681223349507426, + "grad_norm": 0.8815063238143921, + "learning_rate": 4.803792698912565e-06, + "loss": 0.5249, + "step": 10448 + }, + { + "epoch": 0.7681958535509484, + "grad_norm": 0.791286289691925, + "learning_rate": 4.803755279420023e-06, + "loss": 0.5392, + "step": 10449 + }, + { + "epoch": 0.7682693721511542, + "grad_norm": 0.8429998755455017, + "learning_rate": 4.803717856505379e-06, + "loss": 0.5392, + "step": 10450 + }, + { + "epoch": 0.7683428907513601, + "grad_norm": 0.8495466709136963, + "learning_rate": 4.803680430168686e-06, + "loss": 0.5289, + "step": 10451 + }, + { + "epoch": 0.768416409351566, + "grad_norm": 0.8241242170333862, + "learning_rate": 4.803643000410004e-06, + "loss": 0.5081, + "step": 10452 + }, + { + "epoch": 0.7684899279517718, + "grad_norm": 0.8681772351264954, + "learning_rate": 4.8036055672293854e-06, + "loss": 0.5552, + "step": 10453 + }, + { + "epoch": 0.7685634465519776, + "grad_norm": 0.8949360251426697, + "learning_rate": 4.803568130626888e-06, + "loss": 0.5516, + "step": 10454 + }, + { + "epoch": 0.7686369651521835, + "grad_norm": 0.8650417923927307, + "learning_rate": 4.803530690602565e-06, + "loss": 0.507, + "step": 10455 + }, + { + "epoch": 0.7687104837523894, + "grad_norm": 0.7864852547645569, + "learning_rate": 4.803493247156474e-06, + "loss": 0.4988, + "step": 10456 + }, + { + "epoch": 0.7687840023525953, + "grad_norm": 0.8314998149871826, + "learning_rate": 4.803455800288669e-06, + "loss": 0.5404, + "step": 10457 + }, + { + "epoch": 0.768857520952801, + "grad_norm": 0.8799079656600952, + "learning_rate": 4.803418349999208e-06, + "loss": 0.5333, + "step": 10458 + }, + { + "epoch": 0.7689310395530069, + "grad_norm": 0.8054483532905579, + "learning_rate": 4.803380896288144e-06, + "loss": 0.5497, + "step": 10459 + }, + { + "epoch": 0.7690045581532128, + "grad_norm": 0.8727361559867859, + "learning_rate": 4.803343439155535e-06, + "loss": 0.5191, + "step": 10460 + }, + { + "epoch": 0.7690780767534187, + "grad_norm": 0.8369923830032349, + "learning_rate": 4.803305978601434e-06, + "loss": 0.554, + "step": 10461 + }, + { + "epoch": 0.7691515953536244, + "grad_norm": 0.8269214630126953, + "learning_rate": 4.803268514625899e-06, + "loss": 0.5486, + "step": 10462 + }, + { + "epoch": 0.7692251139538303, + "grad_norm": 0.8175886273384094, + "learning_rate": 4.803231047228985e-06, + "loss": 0.4965, + "step": 10463 + }, + { + "epoch": 0.7692986325540362, + "grad_norm": 0.8826624751091003, + "learning_rate": 4.803193576410746e-06, + "loss": 0.5544, + "step": 10464 + }, + { + "epoch": 0.7693721511542421, + "grad_norm": 0.8413821458816528, + "learning_rate": 4.803156102171241e-06, + "loss": 0.4866, + "step": 10465 + }, + { + "epoch": 0.7694456697544478, + "grad_norm": 0.8354073166847229, + "learning_rate": 4.8031186245105225e-06, + "loss": 0.5596, + "step": 10466 + }, + { + "epoch": 0.7695191883546537, + "grad_norm": 0.826618492603302, + "learning_rate": 4.8030811434286475e-06, + "loss": 0.5289, + "step": 10467 + }, + { + "epoch": 0.7695927069548596, + "grad_norm": 0.8037545680999756, + "learning_rate": 4.803043658925672e-06, + "loss": 0.5157, + "step": 10468 + }, + { + "epoch": 0.7696662255550655, + "grad_norm": 0.8373697996139526, + "learning_rate": 4.803006171001651e-06, + "loss": 0.528, + "step": 10469 + }, + { + "epoch": 0.7697397441552712, + "grad_norm": 0.805964469909668, + "learning_rate": 4.80296867965664e-06, + "loss": 0.5409, + "step": 10470 + }, + { + "epoch": 0.7698132627554771, + "grad_norm": 0.7919174432754517, + "learning_rate": 4.802931184890696e-06, + "loss": 0.5131, + "step": 10471 + }, + { + "epoch": 0.769886781355683, + "grad_norm": 0.8267989158630371, + "learning_rate": 4.802893686703873e-06, + "loss": 0.5797, + "step": 10472 + }, + { + "epoch": 0.7699602999558889, + "grad_norm": 0.8432163596153259, + "learning_rate": 4.802856185096227e-06, + "loss": 0.5896, + "step": 10473 + }, + { + "epoch": 0.7700338185560947, + "grad_norm": 0.8150167465209961, + "learning_rate": 4.802818680067816e-06, + "loss": 0.5747, + "step": 10474 + }, + { + "epoch": 0.7701073371563005, + "grad_norm": 0.8420301079750061, + "learning_rate": 4.802781171618693e-06, + "loss": 0.5585, + "step": 10475 + }, + { + "epoch": 0.7701808557565064, + "grad_norm": 0.8560304641723633, + "learning_rate": 4.802743659748914e-06, + "loss": 0.5274, + "step": 10476 + }, + { + "epoch": 0.7702543743567123, + "grad_norm": 0.8412010669708252, + "learning_rate": 4.802706144458536e-06, + "loss": 0.5202, + "step": 10477 + }, + { + "epoch": 0.7703278929569181, + "grad_norm": 0.8018699288368225, + "learning_rate": 4.802668625747615e-06, + "loss": 0.5212, + "step": 10478 + }, + { + "epoch": 0.7704014115571239, + "grad_norm": 0.8615643978118896, + "learning_rate": 4.802631103616206e-06, + "loss": 0.5794, + "step": 10479 + }, + { + "epoch": 0.7704749301573298, + "grad_norm": 0.8483918905258179, + "learning_rate": 4.802593578064363e-06, + "loss": 0.5407, + "step": 10480 + }, + { + "epoch": 0.7705484487575357, + "grad_norm": 0.8278160095214844, + "learning_rate": 4.802556049092144e-06, + "loss": 0.4564, + "step": 10481 + }, + { + "epoch": 0.7706219673577415, + "grad_norm": 0.8402330875396729, + "learning_rate": 4.8025185166996035e-06, + "loss": 0.5387, + "step": 10482 + }, + { + "epoch": 0.7706954859579473, + "grad_norm": 0.8226373195648193, + "learning_rate": 4.802480980886799e-06, + "loss": 0.5505, + "step": 10483 + }, + { + "epoch": 0.7707690045581532, + "grad_norm": 0.7804208993911743, + "learning_rate": 4.802443441653784e-06, + "loss": 0.4645, + "step": 10484 + }, + { + "epoch": 0.7708425231583591, + "grad_norm": 0.8029263019561768, + "learning_rate": 4.802405899000616e-06, + "loss": 0.4975, + "step": 10485 + }, + { + "epoch": 0.7709160417585649, + "grad_norm": 0.842869222164154, + "learning_rate": 4.80236835292735e-06, + "loss": 0.5477, + "step": 10486 + }, + { + "epoch": 0.7709895603587708, + "grad_norm": 0.7893240451812744, + "learning_rate": 4.802330803434042e-06, + "loss": 0.5619, + "step": 10487 + }, + { + "epoch": 0.7710630789589766, + "grad_norm": 0.8682080507278442, + "learning_rate": 4.802293250520748e-06, + "loss": 0.5327, + "step": 10488 + }, + { + "epoch": 0.7711365975591825, + "grad_norm": 0.8350527882575989, + "learning_rate": 4.802255694187522e-06, + "loss": 0.5314, + "step": 10489 + }, + { + "epoch": 0.7712101161593883, + "grad_norm": 0.8577840924263, + "learning_rate": 4.802218134434422e-06, + "loss": 0.5731, + "step": 10490 + }, + { + "epoch": 0.7712836347595942, + "grad_norm": 0.8442997336387634, + "learning_rate": 4.802180571261503e-06, + "loss": 0.5325, + "step": 10491 + }, + { + "epoch": 0.7713571533598, + "grad_norm": 0.8586961627006531, + "learning_rate": 4.802143004668821e-06, + "loss": 0.5373, + "step": 10492 + }, + { + "epoch": 0.7714306719600059, + "grad_norm": 0.80288165807724, + "learning_rate": 4.802105434656432e-06, + "loss": 0.5151, + "step": 10493 + }, + { + "epoch": 0.7715041905602117, + "grad_norm": 0.8312978148460388, + "learning_rate": 4.802067861224391e-06, + "loss": 0.5219, + "step": 10494 + }, + { + "epoch": 0.7715777091604176, + "grad_norm": 0.8587441444396973, + "learning_rate": 4.802030284372755e-06, + "loss": 0.565, + "step": 10495 + }, + { + "epoch": 0.7716512277606234, + "grad_norm": 0.8253948092460632, + "learning_rate": 4.801992704101578e-06, + "loss": 0.5084, + "step": 10496 + }, + { + "epoch": 0.7717247463608293, + "grad_norm": 0.7926103472709656, + "learning_rate": 4.8019551204109175e-06, + "loss": 0.4942, + "step": 10497 + }, + { + "epoch": 0.7717982649610351, + "grad_norm": 0.7992749810218811, + "learning_rate": 4.8019175333008285e-06, + "loss": 0.5544, + "step": 10498 + }, + { + "epoch": 0.771871783561241, + "grad_norm": 0.8076448440551758, + "learning_rate": 4.801879942771367e-06, + "loss": 0.5448, + "step": 10499 + }, + { + "epoch": 0.7719453021614469, + "grad_norm": 0.889887809753418, + "learning_rate": 4.801842348822589e-06, + "loss": 0.5202, + "step": 10500 + }, + { + "epoch": 0.7720188207616527, + "grad_norm": 0.8800585269927979, + "learning_rate": 4.80180475145455e-06, + "loss": 0.5956, + "step": 10501 + }, + { + "epoch": 0.7720923393618585, + "grad_norm": 0.7973965406417847, + "learning_rate": 4.801767150667307e-06, + "loss": 0.5165, + "step": 10502 + }, + { + "epoch": 0.7721658579620644, + "grad_norm": 0.7979219555854797, + "learning_rate": 4.801729546460913e-06, + "loss": 0.523, + "step": 10503 + }, + { + "epoch": 0.7722393765622703, + "grad_norm": 0.807275116443634, + "learning_rate": 4.801691938835428e-06, + "loss": 0.5247, + "step": 10504 + }, + { + "epoch": 0.7723128951624761, + "grad_norm": 0.7943615913391113, + "learning_rate": 4.8016543277909045e-06, + "loss": 0.5391, + "step": 10505 + }, + { + "epoch": 0.7723864137626819, + "grad_norm": 0.8086714148521423, + "learning_rate": 4.801616713327399e-06, + "loss": 0.5482, + "step": 10506 + }, + { + "epoch": 0.7724599323628878, + "grad_norm": 0.8780484795570374, + "learning_rate": 4.801579095444969e-06, + "loss": 0.5791, + "step": 10507 + }, + { + "epoch": 0.7725334509630937, + "grad_norm": 0.8412054777145386, + "learning_rate": 4.8015414741436695e-06, + "loss": 0.5803, + "step": 10508 + }, + { + "epoch": 0.7726069695632996, + "grad_norm": 0.7813959121704102, + "learning_rate": 4.801503849423556e-06, + "loss": 0.4896, + "step": 10509 + }, + { + "epoch": 0.7726804881635053, + "grad_norm": 0.8083725571632385, + "learning_rate": 4.801466221284683e-06, + "loss": 0.5267, + "step": 10510 + }, + { + "epoch": 0.7727540067637112, + "grad_norm": 0.8437464237213135, + "learning_rate": 4.80142858972711e-06, + "loss": 0.5235, + "step": 10511 + }, + { + "epoch": 0.7728275253639171, + "grad_norm": 0.8477357625961304, + "learning_rate": 4.80139095475089e-06, + "loss": 0.53, + "step": 10512 + }, + { + "epoch": 0.772901043964123, + "grad_norm": 0.813996434211731, + "learning_rate": 4.801353316356079e-06, + "loss": 0.5747, + "step": 10513 + }, + { + "epoch": 0.7729745625643287, + "grad_norm": 0.8712180852890015, + "learning_rate": 4.801315674542735e-06, + "loss": 0.534, + "step": 10514 + }, + { + "epoch": 0.7730480811645346, + "grad_norm": 0.7903978228569031, + "learning_rate": 4.801278029310912e-06, + "loss": 0.5305, + "step": 10515 + }, + { + "epoch": 0.7731215997647405, + "grad_norm": 0.8261705636978149, + "learning_rate": 4.8012403806606665e-06, + "loss": 0.5331, + "step": 10516 + }, + { + "epoch": 0.7731951183649464, + "grad_norm": 0.8104559183120728, + "learning_rate": 4.801202728592054e-06, + "loss": 0.5185, + "step": 10517 + }, + { + "epoch": 0.7732686369651521, + "grad_norm": 0.8207666277885437, + "learning_rate": 4.801165073105131e-06, + "loss": 0.5648, + "step": 10518 + }, + { + "epoch": 0.773342155565358, + "grad_norm": 0.7879164218902588, + "learning_rate": 4.801127414199953e-06, + "loss": 0.5479, + "step": 10519 + }, + { + "epoch": 0.7734156741655639, + "grad_norm": 0.8206574320793152, + "learning_rate": 4.801089751876578e-06, + "loss": 0.5255, + "step": 10520 + }, + { + "epoch": 0.7734891927657698, + "grad_norm": 0.8442511558532715, + "learning_rate": 4.801052086135058e-06, + "loss": 0.5682, + "step": 10521 + }, + { + "epoch": 0.7735627113659755, + "grad_norm": 0.8110962510108948, + "learning_rate": 4.8010144169754525e-06, + "loss": 0.525, + "step": 10522 + }, + { + "epoch": 0.7736362299661814, + "grad_norm": 0.8485051989555359, + "learning_rate": 4.8009767443978164e-06, + "loss": 0.5192, + "step": 10523 + }, + { + "epoch": 0.7737097485663873, + "grad_norm": 0.8752320408821106, + "learning_rate": 4.800939068402204e-06, + "loss": 0.5669, + "step": 10524 + }, + { + "epoch": 0.7737832671665932, + "grad_norm": 0.8815372586250305, + "learning_rate": 4.800901388988673e-06, + "loss": 0.5652, + "step": 10525 + }, + { + "epoch": 0.773856785766799, + "grad_norm": 0.8714168667793274, + "learning_rate": 4.800863706157279e-06, + "loss": 0.5137, + "step": 10526 + }, + { + "epoch": 0.7739303043670048, + "grad_norm": 0.8010234832763672, + "learning_rate": 4.800826019908078e-06, + "loss": 0.5157, + "step": 10527 + }, + { + "epoch": 0.7740038229672107, + "grad_norm": 0.8426216244697571, + "learning_rate": 4.800788330241126e-06, + "loss": 0.6161, + "step": 10528 + }, + { + "epoch": 0.7740773415674166, + "grad_norm": 0.8710449934005737, + "learning_rate": 4.800750637156479e-06, + "loss": 0.548, + "step": 10529 + }, + { + "epoch": 0.7741508601676224, + "grad_norm": 0.8030955791473389, + "learning_rate": 4.800712940654194e-06, + "loss": 0.4985, + "step": 10530 + }, + { + "epoch": 0.7742243787678282, + "grad_norm": 0.8942474722862244, + "learning_rate": 4.800675240734325e-06, + "loss": 0.5426, + "step": 10531 + }, + { + "epoch": 0.7742978973680341, + "grad_norm": 0.8391851186752319, + "learning_rate": 4.8006375373969285e-06, + "loss": 0.5176, + "step": 10532 + }, + { + "epoch": 0.77437141596824, + "grad_norm": 0.7715606093406677, + "learning_rate": 4.800599830642061e-06, + "loss": 0.4775, + "step": 10533 + }, + { + "epoch": 0.7744449345684458, + "grad_norm": 0.8030086159706116, + "learning_rate": 4.800562120469778e-06, + "loss": 0.5005, + "step": 10534 + }, + { + "epoch": 0.7745184531686516, + "grad_norm": 0.9247521162033081, + "learning_rate": 4.800524406880137e-06, + "loss": 0.5831, + "step": 10535 + }, + { + "epoch": 0.7745919717688575, + "grad_norm": 0.8666000366210938, + "learning_rate": 4.800486689873193e-06, + "loss": 0.5433, + "step": 10536 + }, + { + "epoch": 0.7746654903690634, + "grad_norm": 0.8148276805877686, + "learning_rate": 4.800448969449001e-06, + "loss": 0.5417, + "step": 10537 + }, + { + "epoch": 0.7747390089692692, + "grad_norm": 0.8794915676116943, + "learning_rate": 4.800411245607619e-06, + "loss": 0.542, + "step": 10538 + }, + { + "epoch": 0.774812527569475, + "grad_norm": 0.8335277438163757, + "learning_rate": 4.800373518349101e-06, + "loss": 0.5178, + "step": 10539 + }, + { + "epoch": 0.7748860461696809, + "grad_norm": 0.7982462048530579, + "learning_rate": 4.800335787673505e-06, + "loss": 0.5398, + "step": 10540 + }, + { + "epoch": 0.7749595647698868, + "grad_norm": 0.8472418189048767, + "learning_rate": 4.800298053580885e-06, + "loss": 0.4997, + "step": 10541 + }, + { + "epoch": 0.7750330833700926, + "grad_norm": 0.85298752784729, + "learning_rate": 4.8002603160713e-06, + "loss": 0.4884, + "step": 10542 + }, + { + "epoch": 0.7751066019702985, + "grad_norm": 0.8672699332237244, + "learning_rate": 4.8002225751448025e-06, + "loss": 0.5856, + "step": 10543 + }, + { + "epoch": 0.7751801205705043, + "grad_norm": 0.8112961649894714, + "learning_rate": 4.800184830801451e-06, + "loss": 0.5202, + "step": 10544 + }, + { + "epoch": 0.7752536391707102, + "grad_norm": 0.8584012985229492, + "learning_rate": 4.800147083041302e-06, + "loss": 0.5865, + "step": 10545 + }, + { + "epoch": 0.775327157770916, + "grad_norm": 0.9053543210029602, + "learning_rate": 4.800109331864409e-06, + "loss": 0.5622, + "step": 10546 + }, + { + "epoch": 0.7754006763711219, + "grad_norm": 0.8307254314422607, + "learning_rate": 4.80007157727083e-06, + "loss": 0.531, + "step": 10547 + }, + { + "epoch": 0.7754741949713277, + "grad_norm": 0.7825506329536438, + "learning_rate": 4.800033819260621e-06, + "loss": 0.4962, + "step": 10548 + }, + { + "epoch": 0.7755477135715336, + "grad_norm": 0.8368204236030579, + "learning_rate": 4.799996057833836e-06, + "loss": 0.5676, + "step": 10549 + }, + { + "epoch": 0.7756212321717395, + "grad_norm": 0.8114379048347473, + "learning_rate": 4.799958292990535e-06, + "loss": 0.5094, + "step": 10550 + }, + { + "epoch": 0.7756947507719453, + "grad_norm": 0.8054068684577942, + "learning_rate": 4.799920524730771e-06, + "loss": 0.5085, + "step": 10551 + }, + { + "epoch": 0.7757682693721512, + "grad_norm": 0.8447481989860535, + "learning_rate": 4.7998827530546014e-06, + "loss": 0.5525, + "step": 10552 + }, + { + "epoch": 0.775841787972357, + "grad_norm": 0.7998230457305908, + "learning_rate": 4.799844977962081e-06, + "loss": 0.5399, + "step": 10553 + }, + { + "epoch": 0.7759153065725629, + "grad_norm": 0.8090458512306213, + "learning_rate": 4.799807199453268e-06, + "loss": 0.527, + "step": 10554 + }, + { + "epoch": 0.7759888251727687, + "grad_norm": 0.8645522594451904, + "learning_rate": 4.799769417528216e-06, + "loss": 0.5796, + "step": 10555 + }, + { + "epoch": 0.7760623437729746, + "grad_norm": 0.8571654558181763, + "learning_rate": 4.799731632186984e-06, + "loss": 0.5425, + "step": 10556 + }, + { + "epoch": 0.7761358623731804, + "grad_norm": 0.8022111654281616, + "learning_rate": 4.799693843429625e-06, + "loss": 0.5633, + "step": 10557 + }, + { + "epoch": 0.7762093809733863, + "grad_norm": 0.7878424525260925, + "learning_rate": 4.799656051256198e-06, + "loss": 0.4906, + "step": 10558 + }, + { + "epoch": 0.7762828995735921, + "grad_norm": 0.8027223944664001, + "learning_rate": 4.799618255666757e-06, + "loss": 0.5376, + "step": 10559 + }, + { + "epoch": 0.776356418173798, + "grad_norm": 0.8366406559944153, + "learning_rate": 4.79958045666136e-06, + "loss": 0.5101, + "step": 10560 + }, + { + "epoch": 0.7764299367740038, + "grad_norm": 0.7948976755142212, + "learning_rate": 4.799542654240061e-06, + "loss": 0.4986, + "step": 10561 + }, + { + "epoch": 0.7765034553742097, + "grad_norm": 0.8357480764389038, + "learning_rate": 4.7995048484029185e-06, + "loss": 0.5816, + "step": 10562 + }, + { + "epoch": 0.7765769739744155, + "grad_norm": 0.8610880970954895, + "learning_rate": 4.799467039149987e-06, + "loss": 0.5382, + "step": 10563 + }, + { + "epoch": 0.7766504925746214, + "grad_norm": 0.7927334308624268, + "learning_rate": 4.799429226481322e-06, + "loss": 0.5217, + "step": 10564 + }, + { + "epoch": 0.7767240111748273, + "grad_norm": 0.8688990473747253, + "learning_rate": 4.799391410396982e-06, + "loss": 0.5188, + "step": 10565 + }, + { + "epoch": 0.7767975297750331, + "grad_norm": 0.8211650848388672, + "learning_rate": 4.799353590897021e-06, + "loss": 0.5031, + "step": 10566 + }, + { + "epoch": 0.7768710483752389, + "grad_norm": 0.8375197649002075, + "learning_rate": 4.799315767981497e-06, + "loss": 0.5854, + "step": 10567 + }, + { + "epoch": 0.7769445669754448, + "grad_norm": 0.84108966588974, + "learning_rate": 4.799277941650466e-06, + "loss": 0.5116, + "step": 10568 + }, + { + "epoch": 0.7770180855756507, + "grad_norm": 0.8528844714164734, + "learning_rate": 4.7992401119039824e-06, + "loss": 0.5744, + "step": 10569 + }, + { + "epoch": 0.7770916041758565, + "grad_norm": 0.8483923077583313, + "learning_rate": 4.799202278742103e-06, + "loss": 0.5491, + "step": 10570 + }, + { + "epoch": 0.7771651227760623, + "grad_norm": 0.8448845148086548, + "learning_rate": 4.799164442164886e-06, + "loss": 0.5674, + "step": 10571 + }, + { + "epoch": 0.7772386413762682, + "grad_norm": 0.8410565853118896, + "learning_rate": 4.799126602172386e-06, + "loss": 0.5534, + "step": 10572 + }, + { + "epoch": 0.7773121599764741, + "grad_norm": 0.8922712802886963, + "learning_rate": 4.799088758764659e-06, + "loss": 0.5504, + "step": 10573 + }, + { + "epoch": 0.77738567857668, + "grad_norm": 0.815962553024292, + "learning_rate": 4.799050911941761e-06, + "loss": 0.5301, + "step": 10574 + }, + { + "epoch": 0.7774591971768857, + "grad_norm": 0.8057209849357605, + "learning_rate": 4.799013061703748e-06, + "loss": 0.5006, + "step": 10575 + }, + { + "epoch": 0.7775327157770916, + "grad_norm": 0.7987083196640015, + "learning_rate": 4.798975208050679e-06, + "loss": 0.5452, + "step": 10576 + }, + { + "epoch": 0.7776062343772975, + "grad_norm": 0.8073053956031799, + "learning_rate": 4.798937350982607e-06, + "loss": 0.5001, + "step": 10577 + }, + { + "epoch": 0.7776797529775034, + "grad_norm": 0.8131410479545593, + "learning_rate": 4.79889949049959e-06, + "loss": 0.5233, + "step": 10578 + }, + { + "epoch": 0.7777532715777091, + "grad_norm": 0.8179341554641724, + "learning_rate": 4.798861626601683e-06, + "loss": 0.5101, + "step": 10579 + }, + { + "epoch": 0.777826790177915, + "grad_norm": 0.8173497915267944, + "learning_rate": 4.798823759288943e-06, + "loss": 0.5149, + "step": 10580 + }, + { + "epoch": 0.7779003087781209, + "grad_norm": 0.7888452410697937, + "learning_rate": 4.798785888561427e-06, + "loss": 0.4829, + "step": 10581 + }, + { + "epoch": 0.7779738273783268, + "grad_norm": 0.819039523601532, + "learning_rate": 4.79874801441919e-06, + "loss": 0.49, + "step": 10582 + }, + { + "epoch": 0.7780473459785325, + "grad_norm": 0.8236765265464783, + "learning_rate": 4.798710136862289e-06, + "loss": 0.5025, + "step": 10583 + }, + { + "epoch": 0.7781208645787384, + "grad_norm": 0.8308722376823425, + "learning_rate": 4.79867225589078e-06, + "loss": 0.5651, + "step": 10584 + }, + { + "epoch": 0.7781943831789443, + "grad_norm": 0.8458476066589355, + "learning_rate": 4.798634371504718e-06, + "loss": 0.5446, + "step": 10585 + }, + { + "epoch": 0.7782679017791502, + "grad_norm": 0.8131529092788696, + "learning_rate": 4.7985964837041615e-06, + "loss": 0.5301, + "step": 10586 + }, + { + "epoch": 0.7783414203793559, + "grad_norm": 0.81990647315979, + "learning_rate": 4.798558592489166e-06, + "loss": 0.5223, + "step": 10587 + }, + { + "epoch": 0.7784149389795618, + "grad_norm": 0.8423801064491272, + "learning_rate": 4.798520697859787e-06, + "loss": 0.5701, + "step": 10588 + }, + { + "epoch": 0.7784884575797677, + "grad_norm": 0.8176277875900269, + "learning_rate": 4.798482799816081e-06, + "loss": 0.5104, + "step": 10589 + }, + { + "epoch": 0.7785619761799736, + "grad_norm": 0.8309590220451355, + "learning_rate": 4.798444898358106e-06, + "loss": 0.4936, + "step": 10590 + }, + { + "epoch": 0.7786354947801793, + "grad_norm": 0.8430067896842957, + "learning_rate": 4.798406993485916e-06, + "loss": 0.5342, + "step": 10591 + }, + { + "epoch": 0.7787090133803852, + "grad_norm": 0.8733850717544556, + "learning_rate": 4.798369085199569e-06, + "loss": 0.5557, + "step": 10592 + }, + { + "epoch": 0.7787825319805911, + "grad_norm": 0.7456334233283997, + "learning_rate": 4.79833117349912e-06, + "loss": 0.4993, + "step": 10593 + }, + { + "epoch": 0.778856050580797, + "grad_norm": 0.8086921572685242, + "learning_rate": 4.798293258384625e-06, + "loss": 0.5425, + "step": 10594 + }, + { + "epoch": 0.7789295691810028, + "grad_norm": 0.8345601558685303, + "learning_rate": 4.798255339856143e-06, + "loss": 0.5535, + "step": 10595 + }, + { + "epoch": 0.7790030877812086, + "grad_norm": 0.810375452041626, + "learning_rate": 4.7982174179137275e-06, + "loss": 0.5242, + "step": 10596 + }, + { + "epoch": 0.7790766063814145, + "grad_norm": 0.8740202188491821, + "learning_rate": 4.798179492557436e-06, + "loss": 0.5707, + "step": 10597 + }, + { + "epoch": 0.7791501249816204, + "grad_norm": 0.8743566274642944, + "learning_rate": 4.7981415637873256e-06, + "loss": 0.5974, + "step": 10598 + }, + { + "epoch": 0.7792236435818262, + "grad_norm": 0.8562939167022705, + "learning_rate": 4.79810363160345e-06, + "loss": 0.554, + "step": 10599 + }, + { + "epoch": 0.779297162182032, + "grad_norm": 0.765854001045227, + "learning_rate": 4.798065696005869e-06, + "loss": 0.5098, + "step": 10600 + }, + { + "epoch": 0.7793706807822379, + "grad_norm": 0.7940604090690613, + "learning_rate": 4.798027756994637e-06, + "loss": 0.5538, + "step": 10601 + }, + { + "epoch": 0.7794441993824438, + "grad_norm": 0.7712826132774353, + "learning_rate": 4.797989814569809e-06, + "loss": 0.5241, + "step": 10602 + }, + { + "epoch": 0.7795177179826496, + "grad_norm": 0.8458381295204163, + "learning_rate": 4.797951868731444e-06, + "loss": 0.5018, + "step": 10603 + }, + { + "epoch": 0.7795912365828555, + "grad_norm": 0.8523455858230591, + "learning_rate": 4.797913919479597e-06, + "loss": 0.5315, + "step": 10604 + }, + { + "epoch": 0.7796647551830613, + "grad_norm": 0.8504886031150818, + "learning_rate": 4.797875966814326e-06, + "loss": 0.5232, + "step": 10605 + }, + { + "epoch": 0.7797382737832672, + "grad_norm": 0.8411505222320557, + "learning_rate": 4.797838010735685e-06, + "loss": 0.5206, + "step": 10606 + }, + { + "epoch": 0.779811792383473, + "grad_norm": 0.8823432922363281, + "learning_rate": 4.7978000512437315e-06, + "loss": 0.524, + "step": 10607 + }, + { + "epoch": 0.7798853109836789, + "grad_norm": 0.7727447152137756, + "learning_rate": 4.797762088338522e-06, + "loss": 0.4802, + "step": 10608 + }, + { + "epoch": 0.7799588295838847, + "grad_norm": 0.8269922733306885, + "learning_rate": 4.7977241220201135e-06, + "loss": 0.5194, + "step": 10609 + }, + { + "epoch": 0.7800323481840906, + "grad_norm": 0.8440337777137756, + "learning_rate": 4.797686152288561e-06, + "loss": 0.5795, + "step": 10610 + }, + { + "epoch": 0.7801058667842964, + "grad_norm": 0.7976400256156921, + "learning_rate": 4.797648179143921e-06, + "loss": 0.5243, + "step": 10611 + }, + { + "epoch": 0.7801793853845023, + "grad_norm": 0.8135088086128235, + "learning_rate": 4.797610202586251e-06, + "loss": 0.5157, + "step": 10612 + }, + { + "epoch": 0.7802529039847081, + "grad_norm": 0.8018741607666016, + "learning_rate": 4.7975722226156065e-06, + "loss": 0.5193, + "step": 10613 + }, + { + "epoch": 0.780326422584914, + "grad_norm": 0.8294339776039124, + "learning_rate": 4.797534239232044e-06, + "loss": 0.5193, + "step": 10614 + }, + { + "epoch": 0.7803999411851198, + "grad_norm": 0.8466664552688599, + "learning_rate": 4.797496252435622e-06, + "loss": 0.5922, + "step": 10615 + }, + { + "epoch": 0.7804734597853257, + "grad_norm": 0.8745380640029907, + "learning_rate": 4.797458262226393e-06, + "loss": 0.5399, + "step": 10616 + }, + { + "epoch": 0.7805469783855316, + "grad_norm": 0.8822537660598755, + "learning_rate": 4.797420268604417e-06, + "loss": 0.5368, + "step": 10617 + }, + { + "epoch": 0.7806204969857374, + "grad_norm": 0.8234239816665649, + "learning_rate": 4.797382271569748e-06, + "loss": 0.5443, + "step": 10618 + }, + { + "epoch": 0.7806940155859432, + "grad_norm": 0.860011875629425, + "learning_rate": 4.797344271122444e-06, + "loss": 0.5658, + "step": 10619 + }, + { + "epoch": 0.7807675341861491, + "grad_norm": 0.8521602749824524, + "learning_rate": 4.7973062672625605e-06, + "loss": 0.5893, + "step": 10620 + }, + { + "epoch": 0.780841052786355, + "grad_norm": 0.8080328106880188, + "learning_rate": 4.797268259990154e-06, + "loss": 0.5404, + "step": 10621 + }, + { + "epoch": 0.7809145713865608, + "grad_norm": 0.8043069243431091, + "learning_rate": 4.797230249305283e-06, + "loss": 0.518, + "step": 10622 + }, + { + "epoch": 0.7809880899867666, + "grad_norm": 0.832950234413147, + "learning_rate": 4.797192235208e-06, + "loss": 0.446, + "step": 10623 + }, + { + "epoch": 0.7810616085869725, + "grad_norm": 0.842819094657898, + "learning_rate": 4.797154217698366e-06, + "loss": 0.5024, + "step": 10624 + }, + { + "epoch": 0.7811351271871784, + "grad_norm": 0.7921789288520813, + "learning_rate": 4.797116196776434e-06, + "loss": 0.52, + "step": 10625 + }, + { + "epoch": 0.7812086457873842, + "grad_norm": 0.7770872712135315, + "learning_rate": 4.797078172442261e-06, + "loss": 0.4886, + "step": 10626 + }, + { + "epoch": 0.78128216438759, + "grad_norm": 0.8496046662330627, + "learning_rate": 4.797040144695904e-06, + "loss": 0.5488, + "step": 10627 + }, + { + "epoch": 0.7813556829877959, + "grad_norm": 0.837003231048584, + "learning_rate": 4.7970021135374216e-06, + "loss": 0.5458, + "step": 10628 + }, + { + "epoch": 0.7814292015880018, + "grad_norm": 0.8290637731552124, + "learning_rate": 4.796964078966867e-06, + "loss": 0.4974, + "step": 10629 + }, + { + "epoch": 0.7815027201882077, + "grad_norm": 0.8631480932235718, + "learning_rate": 4.796926040984298e-06, + "loss": 0.5563, + "step": 10630 + }, + { + "epoch": 0.7815762387884134, + "grad_norm": 0.8200415372848511, + "learning_rate": 4.796887999589772e-06, + "loss": 0.5414, + "step": 10631 + }, + { + "epoch": 0.7816497573886193, + "grad_norm": 0.8814170956611633, + "learning_rate": 4.796849954783344e-06, + "loss": 0.5101, + "step": 10632 + }, + { + "epoch": 0.7817232759888252, + "grad_norm": 0.8392153382301331, + "learning_rate": 4.796811906565071e-06, + "loss": 0.5822, + "step": 10633 + }, + { + "epoch": 0.7817967945890311, + "grad_norm": 0.8027096390724182, + "learning_rate": 4.79677385493501e-06, + "loss": 0.4942, + "step": 10634 + }, + { + "epoch": 0.7818703131892368, + "grad_norm": 0.8233495950698853, + "learning_rate": 4.7967357998932175e-06, + "loss": 0.5032, + "step": 10635 + }, + { + "epoch": 0.7819438317894427, + "grad_norm": 0.8272022008895874, + "learning_rate": 4.796697741439749e-06, + "loss": 0.5553, + "step": 10636 + }, + { + "epoch": 0.7820173503896486, + "grad_norm": 0.85514897108078, + "learning_rate": 4.796659679574662e-06, + "loss": 0.5827, + "step": 10637 + }, + { + "epoch": 0.7820908689898545, + "grad_norm": 0.8228676915168762, + "learning_rate": 4.796621614298013e-06, + "loss": 0.4943, + "step": 10638 + }, + { + "epoch": 0.7821643875900602, + "grad_norm": 0.8302251696586609, + "learning_rate": 4.7965835456098585e-06, + "loss": 0.5093, + "step": 10639 + }, + { + "epoch": 0.7822379061902661, + "grad_norm": 0.8280417323112488, + "learning_rate": 4.796545473510254e-06, + "loss": 0.5411, + "step": 10640 + }, + { + "epoch": 0.782311424790472, + "grad_norm": 0.8334177732467651, + "learning_rate": 4.7965073979992585e-06, + "loss": 0.5164, + "step": 10641 + }, + { + "epoch": 0.7823849433906779, + "grad_norm": 0.7804111838340759, + "learning_rate": 4.796469319076927e-06, + "loss": 0.5323, + "step": 10642 + }, + { + "epoch": 0.7824584619908836, + "grad_norm": 0.8173143267631531, + "learning_rate": 4.796431236743314e-06, + "loss": 0.5055, + "step": 10643 + }, + { + "epoch": 0.7825319805910895, + "grad_norm": 0.7800828814506531, + "learning_rate": 4.796393150998479e-06, + "loss": 0.4913, + "step": 10644 + }, + { + "epoch": 0.7826054991912954, + "grad_norm": 0.8345896601676941, + "learning_rate": 4.796355061842478e-06, + "loss": 0.5612, + "step": 10645 + }, + { + "epoch": 0.7826790177915013, + "grad_norm": 0.7997344732284546, + "learning_rate": 4.796316969275368e-06, + "loss": 0.5344, + "step": 10646 + }, + { + "epoch": 0.782752536391707, + "grad_norm": 0.8488349318504333, + "learning_rate": 4.7962788732972044e-06, + "loss": 0.5597, + "step": 10647 + }, + { + "epoch": 0.7828260549919129, + "grad_norm": 0.8238749504089355, + "learning_rate": 4.796240773908044e-06, + "loss": 0.5184, + "step": 10648 + }, + { + "epoch": 0.7828995735921188, + "grad_norm": 0.8229656219482422, + "learning_rate": 4.796202671107944e-06, + "loss": 0.4918, + "step": 10649 + }, + { + "epoch": 0.7829730921923247, + "grad_norm": 0.7899409532546997, + "learning_rate": 4.796164564896961e-06, + "loss": 0.4915, + "step": 10650 + }, + { + "epoch": 0.7830466107925305, + "grad_norm": 0.876825749874115, + "learning_rate": 4.796126455275151e-06, + "loss": 0.541, + "step": 10651 + }, + { + "epoch": 0.7831201293927363, + "grad_norm": 0.8594217300415039, + "learning_rate": 4.79608834224257e-06, + "loss": 0.5734, + "step": 10652 + }, + { + "epoch": 0.7831936479929422, + "grad_norm": 0.8151822686195374, + "learning_rate": 4.796050225799275e-06, + "loss": 0.5011, + "step": 10653 + }, + { + "epoch": 0.7832671665931481, + "grad_norm": 0.7752025127410889, + "learning_rate": 4.796012105945326e-06, + "loss": 0.4765, + "step": 10654 + }, + { + "epoch": 0.7833406851933539, + "grad_norm": 0.8813450336456299, + "learning_rate": 4.795973982680774e-06, + "loss": 0.5418, + "step": 10655 + }, + { + "epoch": 0.7834142037935597, + "grad_norm": 0.8519026041030884, + "learning_rate": 4.79593585600568e-06, + "loss": 0.5446, + "step": 10656 + }, + { + "epoch": 0.7834877223937656, + "grad_norm": 0.7905334234237671, + "learning_rate": 4.795897725920098e-06, + "loss": 0.5407, + "step": 10657 + }, + { + "epoch": 0.7835612409939715, + "grad_norm": 0.8445658683776855, + "learning_rate": 4.795859592424087e-06, + "loss": 0.5695, + "step": 10658 + }, + { + "epoch": 0.7836347595941773, + "grad_norm": 0.9322822093963623, + "learning_rate": 4.795821455517701e-06, + "loss": 0.601, + "step": 10659 + }, + { + "epoch": 0.7837082781943832, + "grad_norm": 0.8018797636032104, + "learning_rate": 4.795783315200998e-06, + "loss": 0.5298, + "step": 10660 + }, + { + "epoch": 0.783781796794589, + "grad_norm": 0.8715519905090332, + "learning_rate": 4.795745171474035e-06, + "loss": 0.5654, + "step": 10661 + }, + { + "epoch": 0.7838553153947949, + "grad_norm": 0.8042186498641968, + "learning_rate": 4.795707024336868e-06, + "loss": 0.5043, + "step": 10662 + }, + { + "epoch": 0.7839288339950007, + "grad_norm": 0.8594738841056824, + "learning_rate": 4.795668873789554e-06, + "loss": 0.5468, + "step": 10663 + }, + { + "epoch": 0.7840023525952066, + "grad_norm": 0.8444423079490662, + "learning_rate": 4.795630719832149e-06, + "loss": 0.5796, + "step": 10664 + }, + { + "epoch": 0.7840758711954124, + "grad_norm": 0.8280789852142334, + "learning_rate": 4.795592562464711e-06, + "loss": 0.5324, + "step": 10665 + }, + { + "epoch": 0.7841493897956183, + "grad_norm": 0.8666853904724121, + "learning_rate": 4.7955544016872965e-06, + "loss": 0.5322, + "step": 10666 + }, + { + "epoch": 0.7842229083958241, + "grad_norm": 0.8014205694198608, + "learning_rate": 4.795516237499961e-06, + "loss": 0.5111, + "step": 10667 + }, + { + "epoch": 0.78429642699603, + "grad_norm": 0.7848010063171387, + "learning_rate": 4.795478069902762e-06, + "loss": 0.526, + "step": 10668 + }, + { + "epoch": 0.7843699455962359, + "grad_norm": 0.8383796215057373, + "learning_rate": 4.795439898895756e-06, + "loss": 0.5245, + "step": 10669 + }, + { + "epoch": 0.7844434641964417, + "grad_norm": 0.8113707304000854, + "learning_rate": 4.7954017244789994e-06, + "loss": 0.5379, + "step": 10670 + }, + { + "epoch": 0.7845169827966475, + "grad_norm": 0.8512637615203857, + "learning_rate": 4.7953635466525495e-06, + "loss": 0.5531, + "step": 10671 + }, + { + "epoch": 0.7845905013968534, + "grad_norm": 0.8664129972457886, + "learning_rate": 4.7953253654164625e-06, + "loss": 0.5636, + "step": 10672 + }, + { + "epoch": 0.7846640199970593, + "grad_norm": 0.831468403339386, + "learning_rate": 4.795287180770796e-06, + "loss": 0.524, + "step": 10673 + }, + { + "epoch": 0.7847375385972651, + "grad_norm": 0.8008344173431396, + "learning_rate": 4.795248992715606e-06, + "loss": 0.5559, + "step": 10674 + }, + { + "epoch": 0.7848110571974709, + "grad_norm": 0.8342307806015015, + "learning_rate": 4.795210801250948e-06, + "loss": 0.4891, + "step": 10675 + }, + { + "epoch": 0.7848845757976768, + "grad_norm": 0.8516437411308289, + "learning_rate": 4.795172606376881e-06, + "loss": 0.5466, + "step": 10676 + }, + { + "epoch": 0.7849580943978827, + "grad_norm": 0.8757006525993347, + "learning_rate": 4.795134408093461e-06, + "loss": 0.5104, + "step": 10677 + }, + { + "epoch": 0.7850316129980885, + "grad_norm": 0.8240055441856384, + "learning_rate": 4.795096206400745e-06, + "loss": 0.5317, + "step": 10678 + }, + { + "epoch": 0.7851051315982943, + "grad_norm": 0.8353098034858704, + "learning_rate": 4.795058001298788e-06, + "loss": 0.5246, + "step": 10679 + }, + { + "epoch": 0.7851786501985002, + "grad_norm": 0.8387826681137085, + "learning_rate": 4.795019792787649e-06, + "loss": 0.5395, + "step": 10680 + }, + { + "epoch": 0.7852521687987061, + "grad_norm": 0.833220899105072, + "learning_rate": 4.794981580867382e-06, + "loss": 0.5668, + "step": 10681 + }, + { + "epoch": 0.785325687398912, + "grad_norm": 0.8043558597564697, + "learning_rate": 4.794943365538047e-06, + "loss": 0.5067, + "step": 10682 + }, + { + "epoch": 0.7853992059991177, + "grad_norm": 0.8332311511039734, + "learning_rate": 4.7949051467997e-06, + "loss": 0.5338, + "step": 10683 + }, + { + "epoch": 0.7854727245993236, + "grad_norm": 0.8130710124969482, + "learning_rate": 4.794866924652396e-06, + "loss": 0.558, + "step": 10684 + }, + { + "epoch": 0.7855462431995295, + "grad_norm": 0.8162306547164917, + "learning_rate": 4.794828699096193e-06, + "loss": 0.494, + "step": 10685 + }, + { + "epoch": 0.7856197617997354, + "grad_norm": 0.8896776437759399, + "learning_rate": 4.7947904701311476e-06, + "loss": 0.5654, + "step": 10686 + }, + { + "epoch": 0.7856932803999411, + "grad_norm": 0.8147783279418945, + "learning_rate": 4.794752237757317e-06, + "loss": 0.5406, + "step": 10687 + }, + { + "epoch": 0.785766799000147, + "grad_norm": 0.8387112617492676, + "learning_rate": 4.7947140019747565e-06, + "loss": 0.5037, + "step": 10688 + }, + { + "epoch": 0.7858403176003529, + "grad_norm": 0.8067510724067688, + "learning_rate": 4.794675762783525e-06, + "loss": 0.5343, + "step": 10689 + }, + { + "epoch": 0.7859138362005588, + "grad_norm": 0.8512712121009827, + "learning_rate": 4.794637520183678e-06, + "loss": 0.5133, + "step": 10690 + }, + { + "epoch": 0.7859873548007646, + "grad_norm": 0.8275851607322693, + "learning_rate": 4.7945992741752735e-06, + "loss": 0.5451, + "step": 10691 + }, + { + "epoch": 0.7860608734009704, + "grad_norm": 0.8194851875305176, + "learning_rate": 4.794561024758366e-06, + "loss": 0.5301, + "step": 10692 + }, + { + "epoch": 0.7861343920011763, + "grad_norm": 0.8276898860931396, + "learning_rate": 4.7945227719330145e-06, + "loss": 0.4911, + "step": 10693 + }, + { + "epoch": 0.7862079106013822, + "grad_norm": 0.8280585408210754, + "learning_rate": 4.794484515699275e-06, + "loss": 0.4948, + "step": 10694 + }, + { + "epoch": 0.7862814292015881, + "grad_norm": 0.8197425007820129, + "learning_rate": 4.794446256057205e-06, + "loss": 0.5327, + "step": 10695 + }, + { + "epoch": 0.7863549478017938, + "grad_norm": 0.8081288933753967, + "learning_rate": 4.794407993006859e-06, + "loss": 0.5142, + "step": 10696 + }, + { + "epoch": 0.7864284664019997, + "grad_norm": 0.8204184174537659, + "learning_rate": 4.794369726548297e-06, + "loss": 0.5239, + "step": 10697 + }, + { + "epoch": 0.7865019850022056, + "grad_norm": 0.8422456383705139, + "learning_rate": 4.794331456681574e-06, + "loss": 0.5103, + "step": 10698 + }, + { + "epoch": 0.7865755036024115, + "grad_norm": 0.8276283740997314, + "learning_rate": 4.794293183406747e-06, + "loss": 0.5135, + "step": 10699 + }, + { + "epoch": 0.7866490222026172, + "grad_norm": 0.8401318192481995, + "learning_rate": 4.794254906723874e-06, + "loss": 0.5767, + "step": 10700 + }, + { + "epoch": 0.7867225408028231, + "grad_norm": 0.8056728839874268, + "learning_rate": 4.79421662663301e-06, + "loss": 0.5644, + "step": 10701 + }, + { + "epoch": 0.786796059403029, + "grad_norm": 0.8546997904777527, + "learning_rate": 4.794178343134214e-06, + "loss": 0.5775, + "step": 10702 + }, + { + "epoch": 0.7868695780032349, + "grad_norm": 0.8704463243484497, + "learning_rate": 4.79414005622754e-06, + "loss": 0.5568, + "step": 10703 + }, + { + "epoch": 0.7869430966034406, + "grad_norm": 0.8239634037017822, + "learning_rate": 4.794101765913048e-06, + "loss": 0.5213, + "step": 10704 + }, + { + "epoch": 0.7870166152036465, + "grad_norm": 0.7732020020484924, + "learning_rate": 4.794063472190793e-06, + "loss": 0.5301, + "step": 10705 + }, + { + "epoch": 0.7870901338038524, + "grad_norm": 0.8051524758338928, + "learning_rate": 4.794025175060833e-06, + "loss": 0.507, + "step": 10706 + }, + { + "epoch": 0.7871636524040583, + "grad_norm": 0.8124587535858154, + "learning_rate": 4.793986874523223e-06, + "loss": 0.5204, + "step": 10707 + }, + { + "epoch": 0.787237171004264, + "grad_norm": 0.8259574174880981, + "learning_rate": 4.793948570578021e-06, + "loss": 0.5237, + "step": 10708 + }, + { + "epoch": 0.7873106896044699, + "grad_norm": 0.7982532382011414, + "learning_rate": 4.793910263225285e-06, + "loss": 0.5333, + "step": 10709 + }, + { + "epoch": 0.7873842082046758, + "grad_norm": 0.8303859233856201, + "learning_rate": 4.79387195246507e-06, + "loss": 0.5225, + "step": 10710 + }, + { + "epoch": 0.7874577268048817, + "grad_norm": 0.8264607191085815, + "learning_rate": 4.793833638297435e-06, + "loss": 0.5252, + "step": 10711 + }, + { + "epoch": 0.7875312454050875, + "grad_norm": 0.8151007890701294, + "learning_rate": 4.793795320722435e-06, + "loss": 0.5457, + "step": 10712 + }, + { + "epoch": 0.7876047640052933, + "grad_norm": 0.8277100920677185, + "learning_rate": 4.793756999740128e-06, + "loss": 0.5256, + "step": 10713 + }, + { + "epoch": 0.7876782826054992, + "grad_norm": 0.8268104195594788, + "learning_rate": 4.79371867535057e-06, + "loss": 0.5061, + "step": 10714 + }, + { + "epoch": 0.7877518012057051, + "grad_norm": 0.8336225152015686, + "learning_rate": 4.7936803475538184e-06, + "loss": 0.5516, + "step": 10715 + }, + { + "epoch": 0.7878253198059109, + "grad_norm": 0.8348897695541382, + "learning_rate": 4.793642016349931e-06, + "loss": 0.5555, + "step": 10716 + }, + { + "epoch": 0.7878988384061167, + "grad_norm": 0.8078470826148987, + "learning_rate": 4.793603681738964e-06, + "loss": 0.4977, + "step": 10717 + }, + { + "epoch": 0.7879723570063226, + "grad_norm": 0.8164672255516052, + "learning_rate": 4.793565343720974e-06, + "loss": 0.5053, + "step": 10718 + }, + { + "epoch": 0.7880458756065285, + "grad_norm": 0.8330370187759399, + "learning_rate": 4.793527002296018e-06, + "loss": 0.5231, + "step": 10719 + }, + { + "epoch": 0.7881193942067343, + "grad_norm": 0.8632023930549622, + "learning_rate": 4.7934886574641535e-06, + "loss": 0.5163, + "step": 10720 + }, + { + "epoch": 0.7881929128069401, + "grad_norm": 0.821762204170227, + "learning_rate": 4.793450309225437e-06, + "loss": 0.5298, + "step": 10721 + }, + { + "epoch": 0.788266431407146, + "grad_norm": 0.801921010017395, + "learning_rate": 4.793411957579925e-06, + "loss": 0.4689, + "step": 10722 + }, + { + "epoch": 0.7883399500073519, + "grad_norm": 0.8408368825912476, + "learning_rate": 4.793373602527676e-06, + "loss": 0.553, + "step": 10723 + }, + { + "epoch": 0.7884134686075577, + "grad_norm": 0.8436383605003357, + "learning_rate": 4.793335244068747e-06, + "loss": 0.5735, + "step": 10724 + }, + { + "epoch": 0.7884869872077636, + "grad_norm": 0.826636552810669, + "learning_rate": 4.793296882203193e-06, + "loss": 0.5535, + "step": 10725 + }, + { + "epoch": 0.7885605058079694, + "grad_norm": 0.8023980259895325, + "learning_rate": 4.7932585169310715e-06, + "loss": 0.4958, + "step": 10726 + }, + { + "epoch": 0.7886340244081753, + "grad_norm": 0.8890313506126404, + "learning_rate": 4.793220148252441e-06, + "loss": 0.5578, + "step": 10727 + }, + { + "epoch": 0.7887075430083811, + "grad_norm": 0.8617933988571167, + "learning_rate": 4.793181776167357e-06, + "loss": 0.5574, + "step": 10728 + }, + { + "epoch": 0.788781061608587, + "grad_norm": 0.8621333241462708, + "learning_rate": 4.793143400675878e-06, + "loss": 0.5261, + "step": 10729 + }, + { + "epoch": 0.7888545802087928, + "grad_norm": 0.7919071316719055, + "learning_rate": 4.793105021778059e-06, + "loss": 0.5248, + "step": 10730 + }, + { + "epoch": 0.7889280988089987, + "grad_norm": 0.8125652074813843, + "learning_rate": 4.793066639473959e-06, + "loss": 0.4773, + "step": 10731 + }, + { + "epoch": 0.7890016174092045, + "grad_norm": 0.8604112267494202, + "learning_rate": 4.793028253763633e-06, + "loss": 0.5496, + "step": 10732 + }, + { + "epoch": 0.7890751360094104, + "grad_norm": 0.8149160742759705, + "learning_rate": 4.792989864647139e-06, + "loss": 0.5266, + "step": 10733 + }, + { + "epoch": 0.7891486546096163, + "grad_norm": 0.8292012214660645, + "learning_rate": 4.792951472124535e-06, + "loss": 0.5594, + "step": 10734 + }, + { + "epoch": 0.7892221732098221, + "grad_norm": 0.8804226517677307, + "learning_rate": 4.792913076195877e-06, + "loss": 0.5492, + "step": 10735 + }, + { + "epoch": 0.7892956918100279, + "grad_norm": 0.7883846163749695, + "learning_rate": 4.792874676861222e-06, + "loss": 0.4822, + "step": 10736 + }, + { + "epoch": 0.7893692104102338, + "grad_norm": 0.81449955701828, + "learning_rate": 4.792836274120627e-06, + "loss": 0.5336, + "step": 10737 + }, + { + "epoch": 0.7894427290104397, + "grad_norm": 0.8706387281417847, + "learning_rate": 4.79279786797415e-06, + "loss": 0.5279, + "step": 10738 + }, + { + "epoch": 0.7895162476106455, + "grad_norm": 0.8634327054023743, + "learning_rate": 4.792759458421847e-06, + "loss": 0.5379, + "step": 10739 + }, + { + "epoch": 0.7895897662108513, + "grad_norm": 0.7978482246398926, + "learning_rate": 4.792721045463775e-06, + "loss": 0.5364, + "step": 10740 + }, + { + "epoch": 0.7896632848110572, + "grad_norm": 0.8350933194160461, + "learning_rate": 4.792682629099992e-06, + "loss": 0.5006, + "step": 10741 + }, + { + "epoch": 0.7897368034112631, + "grad_norm": 0.8307157158851624, + "learning_rate": 4.792644209330554e-06, + "loss": 0.4852, + "step": 10742 + }, + { + "epoch": 0.789810322011469, + "grad_norm": 0.8354582190513611, + "learning_rate": 4.792605786155519e-06, + "loss": 0.5649, + "step": 10743 + }, + { + "epoch": 0.7898838406116747, + "grad_norm": 0.8144856095314026, + "learning_rate": 4.792567359574943e-06, + "loss": 0.5274, + "step": 10744 + }, + { + "epoch": 0.7899573592118806, + "grad_norm": 0.7663074731826782, + "learning_rate": 4.792528929588883e-06, + "loss": 0.4924, + "step": 10745 + }, + { + "epoch": 0.7900308778120865, + "grad_norm": 0.8429298400878906, + "learning_rate": 4.792490496197398e-06, + "loss": 0.5184, + "step": 10746 + }, + { + "epoch": 0.7901043964122924, + "grad_norm": 0.8071603178977966, + "learning_rate": 4.792452059400544e-06, + "loss": 0.5366, + "step": 10747 + }, + { + "epoch": 0.7901779150124981, + "grad_norm": 0.8050289154052734, + "learning_rate": 4.792413619198378e-06, + "loss": 0.5283, + "step": 10748 + }, + { + "epoch": 0.790251433612704, + "grad_norm": 0.8208066821098328, + "learning_rate": 4.7923751755909566e-06, + "loss": 0.5346, + "step": 10749 + }, + { + "epoch": 0.7903249522129099, + "grad_norm": 0.8380746841430664, + "learning_rate": 4.792336728578338e-06, + "loss": 0.5273, + "step": 10750 + }, + { + "epoch": 0.7903984708131158, + "grad_norm": 0.8299145102500916, + "learning_rate": 4.7922982781605785e-06, + "loss": 0.5164, + "step": 10751 + }, + { + "epoch": 0.7904719894133215, + "grad_norm": 0.8196365833282471, + "learning_rate": 4.792259824337735e-06, + "loss": 0.5153, + "step": 10752 + }, + { + "epoch": 0.7905455080135274, + "grad_norm": 0.8315192461013794, + "learning_rate": 4.792221367109866e-06, + "loss": 0.5311, + "step": 10753 + }, + { + "epoch": 0.7906190266137333, + "grad_norm": 0.8101179003715515, + "learning_rate": 4.792182906477026e-06, + "loss": 0.5317, + "step": 10754 + }, + { + "epoch": 0.7906925452139392, + "grad_norm": 0.8377714157104492, + "learning_rate": 4.792144442439276e-06, + "loss": 0.5388, + "step": 10755 + }, + { + "epoch": 0.7907660638141449, + "grad_norm": 0.841845691204071, + "learning_rate": 4.792105974996669e-06, + "loss": 0.5235, + "step": 10756 + }, + { + "epoch": 0.7908395824143508, + "grad_norm": 0.8086544871330261, + "learning_rate": 4.792067504149265e-06, + "loss": 0.4973, + "step": 10757 + }, + { + "epoch": 0.7909131010145567, + "grad_norm": 0.8085094690322876, + "learning_rate": 4.792029029897121e-06, + "loss": 0.5639, + "step": 10758 + }, + { + "epoch": 0.7909866196147626, + "grad_norm": 0.8442378044128418, + "learning_rate": 4.791990552240292e-06, + "loss": 0.5602, + "step": 10759 + }, + { + "epoch": 0.7910601382149683, + "grad_norm": 0.8699309229850769, + "learning_rate": 4.791952071178837e-06, + "loss": 0.5488, + "step": 10760 + }, + { + "epoch": 0.7911336568151742, + "grad_norm": 0.8564377427101135, + "learning_rate": 4.791913586712813e-06, + "loss": 0.5423, + "step": 10761 + }, + { + "epoch": 0.7912071754153801, + "grad_norm": 0.8056367039680481, + "learning_rate": 4.791875098842276e-06, + "loss": 0.513, + "step": 10762 + }, + { + "epoch": 0.791280694015586, + "grad_norm": 0.8318111300468445, + "learning_rate": 4.791836607567285e-06, + "loss": 0.5409, + "step": 10763 + }, + { + "epoch": 0.7913542126157918, + "grad_norm": 0.7997940182685852, + "learning_rate": 4.791798112887896e-06, + "loss": 0.5191, + "step": 10764 + }, + { + "epoch": 0.7914277312159976, + "grad_norm": 0.8063733577728271, + "learning_rate": 4.791759614804166e-06, + "loss": 0.5229, + "step": 10765 + }, + { + "epoch": 0.7915012498162035, + "grad_norm": 0.8390834331512451, + "learning_rate": 4.791721113316153e-06, + "loss": 0.5055, + "step": 10766 + }, + { + "epoch": 0.7915747684164094, + "grad_norm": 0.7662041187286377, + "learning_rate": 4.7916826084239135e-06, + "loss": 0.4928, + "step": 10767 + }, + { + "epoch": 0.7916482870166152, + "grad_norm": 0.8460866212844849, + "learning_rate": 4.791644100127505e-06, + "loss": 0.5844, + "step": 10768 + }, + { + "epoch": 0.791721805616821, + "grad_norm": 0.8100905418395996, + "learning_rate": 4.7916055884269854e-06, + "loss": 0.5118, + "step": 10769 + }, + { + "epoch": 0.7917953242170269, + "grad_norm": 0.8378884792327881, + "learning_rate": 4.791567073322411e-06, + "loss": 0.5396, + "step": 10770 + }, + { + "epoch": 0.7918688428172328, + "grad_norm": 0.8209216594696045, + "learning_rate": 4.7915285548138375e-06, + "loss": 0.5518, + "step": 10771 + }, + { + "epoch": 0.7919423614174386, + "grad_norm": 0.783011794090271, + "learning_rate": 4.791490032901326e-06, + "loss": 0.55, + "step": 10772 + }, + { + "epoch": 0.7920158800176444, + "grad_norm": 0.8013287782669067, + "learning_rate": 4.79145150758493e-06, + "loss": 0.4749, + "step": 10773 + }, + { + "epoch": 0.7920893986178503, + "grad_norm": 0.8545408844947815, + "learning_rate": 4.791412978864709e-06, + "loss": 0.5191, + "step": 10774 + }, + { + "epoch": 0.7921629172180562, + "grad_norm": 0.8711202144622803, + "learning_rate": 4.7913744467407195e-06, + "loss": 0.583, + "step": 10775 + }, + { + "epoch": 0.792236435818262, + "grad_norm": 0.8578009605407715, + "learning_rate": 4.791335911213018e-06, + "loss": 0.5452, + "step": 10776 + }, + { + "epoch": 0.7923099544184679, + "grad_norm": 0.8295096755027771, + "learning_rate": 4.791297372281663e-06, + "loss": 0.5425, + "step": 10777 + }, + { + "epoch": 0.7923834730186737, + "grad_norm": 0.8273155689239502, + "learning_rate": 4.7912588299467124e-06, + "loss": 0.5308, + "step": 10778 + }, + { + "epoch": 0.7924569916188796, + "grad_norm": 0.8212745785713196, + "learning_rate": 4.7912202842082205e-06, + "loss": 0.5132, + "step": 10779 + }, + { + "epoch": 0.7925305102190854, + "grad_norm": 0.8440702557563782, + "learning_rate": 4.791181735066247e-06, + "loss": 0.5059, + "step": 10780 + }, + { + "epoch": 0.7926040288192913, + "grad_norm": 0.8691514730453491, + "learning_rate": 4.791143182520849e-06, + "loss": 0.5658, + "step": 10781 + }, + { + "epoch": 0.7926775474194971, + "grad_norm": 0.8361057043075562, + "learning_rate": 4.791104626572083e-06, + "loss": 0.5476, + "step": 10782 + }, + { + "epoch": 0.792751066019703, + "grad_norm": 0.8233325481414795, + "learning_rate": 4.791066067220006e-06, + "loss": 0.5187, + "step": 10783 + }, + { + "epoch": 0.7928245846199088, + "grad_norm": 0.8114588260650635, + "learning_rate": 4.791027504464676e-06, + "loss": 0.5212, + "step": 10784 + }, + { + "epoch": 0.7928981032201147, + "grad_norm": 0.818454384803772, + "learning_rate": 4.790988938306151e-06, + "loss": 0.5408, + "step": 10785 + }, + { + "epoch": 0.7929716218203205, + "grad_norm": 0.8131927251815796, + "learning_rate": 4.790950368744486e-06, + "loss": 0.4959, + "step": 10786 + }, + { + "epoch": 0.7930451404205264, + "grad_norm": 0.8541097640991211, + "learning_rate": 4.79091179577974e-06, + "loss": 0.5337, + "step": 10787 + }, + { + "epoch": 0.7931186590207322, + "grad_norm": 0.8642334938049316, + "learning_rate": 4.79087321941197e-06, + "loss": 0.5673, + "step": 10788 + }, + { + "epoch": 0.7931921776209381, + "grad_norm": 0.813153862953186, + "learning_rate": 4.7908346396412335e-06, + "loss": 0.4954, + "step": 10789 + }, + { + "epoch": 0.793265696221144, + "grad_norm": 0.8008583188056946, + "learning_rate": 4.7907960564675885e-06, + "loss": 0.5036, + "step": 10790 + }, + { + "epoch": 0.7933392148213498, + "grad_norm": 0.8800985813140869, + "learning_rate": 4.790757469891089e-06, + "loss": 0.5639, + "step": 10791 + }, + { + "epoch": 0.7934127334215556, + "grad_norm": 0.7962590456008911, + "learning_rate": 4.790718879911797e-06, + "loss": 0.5312, + "step": 10792 + }, + { + "epoch": 0.7934862520217615, + "grad_norm": 0.8419889211654663, + "learning_rate": 4.790680286529766e-06, + "loss": 0.4654, + "step": 10793 + }, + { + "epoch": 0.7935597706219674, + "grad_norm": 0.8758903741836548, + "learning_rate": 4.790641689745056e-06, + "loss": 0.5472, + "step": 10794 + }, + { + "epoch": 0.7936332892221732, + "grad_norm": 0.8130672574043274, + "learning_rate": 4.790603089557723e-06, + "loss": 0.4811, + "step": 10795 + }, + { + "epoch": 0.793706807822379, + "grad_norm": 0.8202022910118103, + "learning_rate": 4.790564485967824e-06, + "loss": 0.5219, + "step": 10796 + }, + { + "epoch": 0.7937803264225849, + "grad_norm": 0.80646812915802, + "learning_rate": 4.790525878975418e-06, + "loss": 0.5074, + "step": 10797 + }, + { + "epoch": 0.7938538450227908, + "grad_norm": 0.832790195941925, + "learning_rate": 4.790487268580561e-06, + "loss": 0.5466, + "step": 10798 + }, + { + "epoch": 0.7939273636229967, + "grad_norm": 0.7894321084022522, + "learning_rate": 4.790448654783309e-06, + "loss": 0.501, + "step": 10799 + }, + { + "epoch": 0.7940008822232024, + "grad_norm": 0.8320439457893372, + "learning_rate": 4.790410037583722e-06, + "loss": 0.5454, + "step": 10800 + }, + { + "epoch": 0.7940744008234083, + "grad_norm": 0.8503484725952148, + "learning_rate": 4.790371416981857e-06, + "loss": 0.5711, + "step": 10801 + }, + { + "epoch": 0.7941479194236142, + "grad_norm": 0.8260546922683716, + "learning_rate": 4.79033279297777e-06, + "loss": 0.5202, + "step": 10802 + }, + { + "epoch": 0.7942214380238201, + "grad_norm": 0.8146265745162964, + "learning_rate": 4.790294165571518e-06, + "loss": 0.5263, + "step": 10803 + }, + { + "epoch": 0.7942949566240258, + "grad_norm": 0.8432303667068481, + "learning_rate": 4.790255534763161e-06, + "loss": 0.5352, + "step": 10804 + }, + { + "epoch": 0.7943684752242317, + "grad_norm": 0.8089739084243774, + "learning_rate": 4.7902169005527544e-06, + "loss": 0.5798, + "step": 10805 + }, + { + "epoch": 0.7944419938244376, + "grad_norm": 0.8096573948860168, + "learning_rate": 4.790178262940356e-06, + "loss": 0.532, + "step": 10806 + }, + { + "epoch": 0.7945155124246435, + "grad_norm": 0.7904171943664551, + "learning_rate": 4.7901396219260236e-06, + "loss": 0.5347, + "step": 10807 + }, + { + "epoch": 0.7945890310248492, + "grad_norm": 0.8137339949607849, + "learning_rate": 4.790100977509814e-06, + "loss": 0.5175, + "step": 10808 + }, + { + "epoch": 0.7946625496250551, + "grad_norm": 0.784348726272583, + "learning_rate": 4.790062329691784e-06, + "loss": 0.5009, + "step": 10809 + }, + { + "epoch": 0.794736068225261, + "grad_norm": 0.8509861826896667, + "learning_rate": 4.790023678471993e-06, + "loss": 0.5073, + "step": 10810 + }, + { + "epoch": 0.7948095868254669, + "grad_norm": 0.7965607643127441, + "learning_rate": 4.789985023850496e-06, + "loss": 0.5224, + "step": 10811 + }, + { + "epoch": 0.7948831054256726, + "grad_norm": 0.8034186959266663, + "learning_rate": 4.789946365827351e-06, + "loss": 0.5321, + "step": 10812 + }, + { + "epoch": 0.7949566240258785, + "grad_norm": 0.8420112133026123, + "learning_rate": 4.789907704402618e-06, + "loss": 0.5071, + "step": 10813 + }, + { + "epoch": 0.7950301426260844, + "grad_norm": 0.8114156723022461, + "learning_rate": 4.789869039576352e-06, + "loss": 0.5387, + "step": 10814 + }, + { + "epoch": 0.7951036612262903, + "grad_norm": 0.7980316281318665, + "learning_rate": 4.78983037134861e-06, + "loss": 0.5275, + "step": 10815 + }, + { + "epoch": 0.795177179826496, + "grad_norm": 0.8556327223777771, + "learning_rate": 4.789791699719452e-06, + "loss": 0.4691, + "step": 10816 + }, + { + "epoch": 0.7952506984267019, + "grad_norm": 0.8153330087661743, + "learning_rate": 4.789753024688933e-06, + "loss": 0.505, + "step": 10817 + }, + { + "epoch": 0.7953242170269078, + "grad_norm": 0.7804341912269592, + "learning_rate": 4.78971434625711e-06, + "loss": 0.5262, + "step": 10818 + }, + { + "epoch": 0.7953977356271137, + "grad_norm": 0.80417799949646, + "learning_rate": 4.789675664424043e-06, + "loss": 0.4928, + "step": 10819 + }, + { + "epoch": 0.7954712542273195, + "grad_norm": 0.8370447158813477, + "learning_rate": 4.789636979189788e-06, + "loss": 0.5069, + "step": 10820 + }, + { + "epoch": 0.7955447728275253, + "grad_norm": 0.888163149356842, + "learning_rate": 4.789598290554403e-06, + "loss": 0.5381, + "step": 10821 + }, + { + "epoch": 0.7956182914277312, + "grad_norm": 0.847560703754425, + "learning_rate": 4.789559598517945e-06, + "loss": 0.5216, + "step": 10822 + }, + { + "epoch": 0.7956918100279371, + "grad_norm": 0.8992093205451965, + "learning_rate": 4.789520903080471e-06, + "loss": 0.5517, + "step": 10823 + }, + { + "epoch": 0.7957653286281429, + "grad_norm": 0.8194587826728821, + "learning_rate": 4.7894822042420395e-06, + "loss": 0.5488, + "step": 10824 + }, + { + "epoch": 0.7958388472283487, + "grad_norm": 0.7777640223503113, + "learning_rate": 4.789443502002708e-06, + "loss": 0.5242, + "step": 10825 + }, + { + "epoch": 0.7959123658285546, + "grad_norm": 0.8273258805274963, + "learning_rate": 4.789404796362532e-06, + "loss": 0.549, + "step": 10826 + }, + { + "epoch": 0.7959858844287605, + "grad_norm": 0.8172245621681213, + "learning_rate": 4.7893660873215716e-06, + "loss": 0.5079, + "step": 10827 + }, + { + "epoch": 0.7960594030289664, + "grad_norm": 0.844150722026825, + "learning_rate": 4.789327374879884e-06, + "loss": 0.5395, + "step": 10828 + }, + { + "epoch": 0.7961329216291722, + "grad_norm": 0.8101589679718018, + "learning_rate": 4.789288659037526e-06, + "loss": 0.5055, + "step": 10829 + }, + { + "epoch": 0.796206440229378, + "grad_norm": 0.8391571044921875, + "learning_rate": 4.789249939794553e-06, + "loss": 0.4949, + "step": 10830 + }, + { + "epoch": 0.7962799588295839, + "grad_norm": 0.8015562295913696, + "learning_rate": 4.789211217151026e-06, + "loss": 0.4866, + "step": 10831 + }, + { + "epoch": 0.7963534774297898, + "grad_norm": 0.8107284903526306, + "learning_rate": 4.789172491107001e-06, + "loss": 0.5006, + "step": 10832 + }, + { + "epoch": 0.7964269960299956, + "grad_norm": 0.8026779890060425, + "learning_rate": 4.789133761662535e-06, + "loss": 0.5195, + "step": 10833 + }, + { + "epoch": 0.7965005146302014, + "grad_norm": 0.8070216178894043, + "learning_rate": 4.789095028817687e-06, + "loss": 0.5444, + "step": 10834 + }, + { + "epoch": 0.7965740332304073, + "grad_norm": 0.8150079250335693, + "learning_rate": 4.789056292572514e-06, + "loss": 0.5461, + "step": 10835 + }, + { + "epoch": 0.7966475518306132, + "grad_norm": 0.7979779839515686, + "learning_rate": 4.789017552927072e-06, + "loss": 0.5066, + "step": 10836 + }, + { + "epoch": 0.796721070430819, + "grad_norm": 0.8253905773162842, + "learning_rate": 4.788978809881421e-06, + "loss": 0.4929, + "step": 10837 + }, + { + "epoch": 0.7967945890310248, + "grad_norm": 0.7826922535896301, + "learning_rate": 4.788940063435616e-06, + "loss": 0.5057, + "step": 10838 + }, + { + "epoch": 0.7968681076312307, + "grad_norm": 0.8337271809577942, + "learning_rate": 4.788901313589717e-06, + "loss": 0.5794, + "step": 10839 + }, + { + "epoch": 0.7969416262314366, + "grad_norm": 0.8316510319709778, + "learning_rate": 4.788862560343779e-06, + "loss": 0.5937, + "step": 10840 + }, + { + "epoch": 0.7970151448316424, + "grad_norm": 0.8314968943595886, + "learning_rate": 4.788823803697862e-06, + "loss": 0.5582, + "step": 10841 + }, + { + "epoch": 0.7970886634318483, + "grad_norm": 0.8729121088981628, + "learning_rate": 4.788785043652023e-06, + "loss": 0.5197, + "step": 10842 + }, + { + "epoch": 0.7971621820320541, + "grad_norm": 0.8555662035942078, + "learning_rate": 4.788746280206318e-06, + "loss": 0.5701, + "step": 10843 + }, + { + "epoch": 0.79723570063226, + "grad_norm": 0.8649851679801941, + "learning_rate": 4.788707513360807e-06, + "loss": 0.5812, + "step": 10844 + }, + { + "epoch": 0.7973092192324658, + "grad_norm": 0.8733654022216797, + "learning_rate": 4.788668743115546e-06, + "loss": 0.545, + "step": 10845 + }, + { + "epoch": 0.7973827378326717, + "grad_norm": 0.8298196792602539, + "learning_rate": 4.788629969470592e-06, + "loss": 0.5124, + "step": 10846 + }, + { + "epoch": 0.7974562564328775, + "grad_norm": 0.8380185961723328, + "learning_rate": 4.788591192426005e-06, + "loss": 0.5332, + "step": 10847 + }, + { + "epoch": 0.7975297750330834, + "grad_norm": 0.808627188205719, + "learning_rate": 4.7885524119818396e-06, + "loss": 0.5491, + "step": 10848 + }, + { + "epoch": 0.7976032936332892, + "grad_norm": 0.8499715924263, + "learning_rate": 4.788513628138155e-06, + "loss": 0.5373, + "step": 10849 + }, + { + "epoch": 0.7976768122334951, + "grad_norm": 0.8630760312080383, + "learning_rate": 4.78847484089501e-06, + "loss": 0.494, + "step": 10850 + }, + { + "epoch": 0.797750330833701, + "grad_norm": 0.8340190649032593, + "learning_rate": 4.78843605025246e-06, + "loss": 0.5212, + "step": 10851 + }, + { + "epoch": 0.7978238494339068, + "grad_norm": 0.8968904614448547, + "learning_rate": 4.788397256210564e-06, + "loss": 0.5468, + "step": 10852 + }, + { + "epoch": 0.7978973680341126, + "grad_norm": 0.8396506905555725, + "learning_rate": 4.788358458769379e-06, + "loss": 0.4846, + "step": 10853 + }, + { + "epoch": 0.7979708866343185, + "grad_norm": 0.8357014656066895, + "learning_rate": 4.788319657928962e-06, + "loss": 0.5238, + "step": 10854 + }, + { + "epoch": 0.7980444052345244, + "grad_norm": 0.8128986954689026, + "learning_rate": 4.7882808536893724e-06, + "loss": 0.4943, + "step": 10855 + }, + { + "epoch": 0.7981179238347302, + "grad_norm": 0.8761110305786133, + "learning_rate": 4.7882420460506666e-06, + "loss": 0.5495, + "step": 10856 + }, + { + "epoch": 0.798191442434936, + "grad_norm": 0.853909432888031, + "learning_rate": 4.788203235012903e-06, + "loss": 0.5508, + "step": 10857 + }, + { + "epoch": 0.7982649610351419, + "grad_norm": 0.8088453412055969, + "learning_rate": 4.788164420576138e-06, + "loss": 0.5892, + "step": 10858 + }, + { + "epoch": 0.7983384796353478, + "grad_norm": 0.8019602298736572, + "learning_rate": 4.788125602740431e-06, + "loss": 0.5104, + "step": 10859 + }, + { + "epoch": 0.7984119982355536, + "grad_norm": 0.8428398370742798, + "learning_rate": 4.788086781505838e-06, + "loss": 0.54, + "step": 10860 + }, + { + "epoch": 0.7984855168357594, + "grad_norm": 0.8736385703086853, + "learning_rate": 4.788047956872417e-06, + "loss": 0.5256, + "step": 10861 + }, + { + "epoch": 0.7985590354359653, + "grad_norm": 0.8560945391654968, + "learning_rate": 4.788009128840228e-06, + "loss": 0.5123, + "step": 10862 + }, + { + "epoch": 0.7986325540361712, + "grad_norm": 0.7998468279838562, + "learning_rate": 4.787970297409324e-06, + "loss": 0.492, + "step": 10863 + }, + { + "epoch": 0.798706072636377, + "grad_norm": 0.803871214389801, + "learning_rate": 4.787931462579767e-06, + "loss": 0.4847, + "step": 10864 + }, + { + "epoch": 0.7987795912365828, + "grad_norm": 0.8256657719612122, + "learning_rate": 4.787892624351613e-06, + "loss": 0.541, + "step": 10865 + }, + { + "epoch": 0.7988531098367887, + "grad_norm": 0.7967311143875122, + "learning_rate": 4.787853782724919e-06, + "loss": 0.5217, + "step": 10866 + }, + { + "epoch": 0.7989266284369946, + "grad_norm": 0.7750201225280762, + "learning_rate": 4.787814937699745e-06, + "loss": 0.448, + "step": 10867 + }, + { + "epoch": 0.7990001470372005, + "grad_norm": 0.858256995677948, + "learning_rate": 4.787776089276146e-06, + "loss": 0.566, + "step": 10868 + }, + { + "epoch": 0.7990736656374062, + "grad_norm": 0.8304235935211182, + "learning_rate": 4.787737237454182e-06, + "loss": 0.5424, + "step": 10869 + }, + { + "epoch": 0.7991471842376121, + "grad_norm": 0.7762194871902466, + "learning_rate": 4.787698382233908e-06, + "loss": 0.4938, + "step": 10870 + }, + { + "epoch": 0.799220702837818, + "grad_norm": 0.8001773357391357, + "learning_rate": 4.787659523615384e-06, + "loss": 0.4943, + "step": 10871 + }, + { + "epoch": 0.7992942214380239, + "grad_norm": 0.8275223970413208, + "learning_rate": 4.787620661598668e-06, + "loss": 0.4682, + "step": 10872 + }, + { + "epoch": 0.7993677400382296, + "grad_norm": 0.8785827159881592, + "learning_rate": 4.787581796183816e-06, + "loss": 0.5264, + "step": 10873 + }, + { + "epoch": 0.7994412586384355, + "grad_norm": 0.8265512585639954, + "learning_rate": 4.787542927370886e-06, + "loss": 0.5596, + "step": 10874 + }, + { + "epoch": 0.7995147772386414, + "grad_norm": 0.8636699318885803, + "learning_rate": 4.787504055159937e-06, + "loss": 0.5448, + "step": 10875 + }, + { + "epoch": 0.7995882958388473, + "grad_norm": 0.8254286646842957, + "learning_rate": 4.787465179551025e-06, + "loss": 0.5338, + "step": 10876 + }, + { + "epoch": 0.799661814439053, + "grad_norm": 0.7828885316848755, + "learning_rate": 4.78742630054421e-06, + "loss": 0.5178, + "step": 10877 + }, + { + "epoch": 0.7997353330392589, + "grad_norm": 0.8622047901153564, + "learning_rate": 4.787387418139548e-06, + "loss": 0.5492, + "step": 10878 + }, + { + "epoch": 0.7998088516394648, + "grad_norm": 0.8241442441940308, + "learning_rate": 4.787348532337097e-06, + "loss": 0.5403, + "step": 10879 + }, + { + "epoch": 0.7998823702396707, + "grad_norm": 0.8535041809082031, + "learning_rate": 4.787309643136915e-06, + "loss": 0.5224, + "step": 10880 + }, + { + "epoch": 0.7999558888398765, + "grad_norm": 0.9047330021858215, + "learning_rate": 4.78727075053906e-06, + "loss": 0.535, + "step": 10881 + }, + { + "epoch": 0.8000294074400823, + "grad_norm": 0.8153062462806702, + "learning_rate": 4.7872318545435896e-06, + "loss": 0.5416, + "step": 10882 + }, + { + "epoch": 0.8001029260402882, + "grad_norm": 0.7925218939781189, + "learning_rate": 4.7871929551505614e-06, + "loss": 0.5203, + "step": 10883 + }, + { + "epoch": 0.8001764446404941, + "grad_norm": 0.8419613242149353, + "learning_rate": 4.787154052360033e-06, + "loss": 0.5398, + "step": 10884 + }, + { + "epoch": 0.8002499632406999, + "grad_norm": 0.8159751892089844, + "learning_rate": 4.787115146172064e-06, + "loss": 0.5279, + "step": 10885 + }, + { + "epoch": 0.8003234818409057, + "grad_norm": 0.7935442924499512, + "learning_rate": 4.7870762365867084e-06, + "loss": 0.5152, + "step": 10886 + }, + { + "epoch": 0.8003970004411116, + "grad_norm": 0.7939809560775757, + "learning_rate": 4.787037323604028e-06, + "loss": 0.5129, + "step": 10887 + }, + { + "epoch": 0.8004705190413175, + "grad_norm": 0.8611061573028564, + "learning_rate": 4.786998407224078e-06, + "loss": 0.5301, + "step": 10888 + }, + { + "epoch": 0.8005440376415233, + "grad_norm": 0.7720556259155273, + "learning_rate": 4.786959487446917e-06, + "loss": 0.4825, + "step": 10889 + }, + { + "epoch": 0.8006175562417291, + "grad_norm": 0.8138999342918396, + "learning_rate": 4.7869205642726035e-06, + "loss": 0.5032, + "step": 10890 + }, + { + "epoch": 0.800691074841935, + "grad_norm": 0.8305288553237915, + "learning_rate": 4.786881637701195e-06, + "loss": 0.5241, + "step": 10891 + }, + { + "epoch": 0.8007645934421409, + "grad_norm": 0.8492199778556824, + "learning_rate": 4.786842707732748e-06, + "loss": 0.5349, + "step": 10892 + }, + { + "epoch": 0.8008381120423467, + "grad_norm": 0.8056818842887878, + "learning_rate": 4.786803774367323e-06, + "loss": 0.5311, + "step": 10893 + }, + { + "epoch": 0.8009116306425526, + "grad_norm": 0.8387656211853027, + "learning_rate": 4.7867648376049745e-06, + "loss": 0.5558, + "step": 10894 + }, + { + "epoch": 0.8009851492427584, + "grad_norm": 0.8217490911483765, + "learning_rate": 4.7867258974457635e-06, + "loss": 0.5328, + "step": 10895 + }, + { + "epoch": 0.8010586678429643, + "grad_norm": 0.8199796676635742, + "learning_rate": 4.7866869538897444e-06, + "loss": 0.5223, + "step": 10896 + }, + { + "epoch": 0.8011321864431701, + "grad_norm": 0.7768239974975586, + "learning_rate": 4.7866480069369784e-06, + "loss": 0.5141, + "step": 10897 + }, + { + "epoch": 0.801205705043376, + "grad_norm": 0.7960096597671509, + "learning_rate": 4.786609056587522e-06, + "loss": 0.5317, + "step": 10898 + }, + { + "epoch": 0.8012792236435818, + "grad_norm": 0.7979761958122253, + "learning_rate": 4.7865701028414334e-06, + "loss": 0.5011, + "step": 10899 + }, + { + "epoch": 0.8013527422437877, + "grad_norm": 0.8158488869667053, + "learning_rate": 4.78653114569877e-06, + "loss": 0.5369, + "step": 10900 + }, + { + "epoch": 0.8014262608439935, + "grad_norm": 0.874465823173523, + "learning_rate": 4.786492185159589e-06, + "loss": 0.5815, + "step": 10901 + }, + { + "epoch": 0.8014997794441994, + "grad_norm": 0.8053731918334961, + "learning_rate": 4.78645322122395e-06, + "loss": 0.5269, + "step": 10902 + }, + { + "epoch": 0.8015732980444052, + "grad_norm": 0.829008162021637, + "learning_rate": 4.786414253891909e-06, + "loss": 0.5417, + "step": 10903 + }, + { + "epoch": 0.8016468166446111, + "grad_norm": 0.8291321396827698, + "learning_rate": 4.7863752831635255e-06, + "loss": 0.5318, + "step": 10904 + }, + { + "epoch": 0.8017203352448169, + "grad_norm": 0.835017204284668, + "learning_rate": 4.786336309038856e-06, + "loss": 0.4736, + "step": 10905 + }, + { + "epoch": 0.8017938538450228, + "grad_norm": 0.8373985290527344, + "learning_rate": 4.78629733151796e-06, + "loss": 0.5748, + "step": 10906 + }, + { + "epoch": 0.8018673724452287, + "grad_norm": 0.8171612024307251, + "learning_rate": 4.786258350600894e-06, + "loss": 0.543, + "step": 10907 + }, + { + "epoch": 0.8019408910454345, + "grad_norm": 0.850665271282196, + "learning_rate": 4.786219366287717e-06, + "loss": 0.5457, + "step": 10908 + }, + { + "epoch": 0.8020144096456403, + "grad_norm": 0.8459036946296692, + "learning_rate": 4.786180378578485e-06, + "loss": 0.5318, + "step": 10909 + }, + { + "epoch": 0.8020879282458462, + "grad_norm": 0.8003333806991577, + "learning_rate": 4.786141387473259e-06, + "loss": 0.5292, + "step": 10910 + }, + { + "epoch": 0.8021614468460521, + "grad_norm": 0.821549654006958, + "learning_rate": 4.786102392972093e-06, + "loss": 0.5554, + "step": 10911 + }, + { + "epoch": 0.8022349654462579, + "grad_norm": 0.7780027985572815, + "learning_rate": 4.786063395075049e-06, + "loss": 0.4792, + "step": 10912 + }, + { + "epoch": 0.8023084840464637, + "grad_norm": 0.8482469916343689, + "learning_rate": 4.786024393782182e-06, + "loss": 0.5579, + "step": 10913 + }, + { + "epoch": 0.8023820026466696, + "grad_norm": 0.8323980569839478, + "learning_rate": 4.785985389093551e-06, + "loss": 0.4771, + "step": 10914 + }, + { + "epoch": 0.8024555212468755, + "grad_norm": 0.8580916523933411, + "learning_rate": 4.785946381009215e-06, + "loss": 0.5624, + "step": 10915 + }, + { + "epoch": 0.8025290398470813, + "grad_norm": 0.8119022846221924, + "learning_rate": 4.78590736952923e-06, + "loss": 0.5614, + "step": 10916 + }, + { + "epoch": 0.8026025584472871, + "grad_norm": 0.8182693719863892, + "learning_rate": 4.785868354653654e-06, + "loss": 0.5538, + "step": 10917 + }, + { + "epoch": 0.802676077047493, + "grad_norm": 0.838167667388916, + "learning_rate": 4.785829336382547e-06, + "loss": 0.5423, + "step": 10918 + }, + { + "epoch": 0.8027495956476989, + "grad_norm": 0.8819785118103027, + "learning_rate": 4.785790314715965e-06, + "loss": 0.525, + "step": 10919 + }, + { + "epoch": 0.8028231142479048, + "grad_norm": 0.852965772151947, + "learning_rate": 4.785751289653967e-06, + "loss": 0.4996, + "step": 10920 + }, + { + "epoch": 0.8028966328481105, + "grad_norm": 0.8773462772369385, + "learning_rate": 4.78571226119661e-06, + "loss": 0.5603, + "step": 10921 + }, + { + "epoch": 0.8029701514483164, + "grad_norm": 0.8077142238616943, + "learning_rate": 4.785673229343954e-06, + "loss": 0.5625, + "step": 10922 + }, + { + "epoch": 0.8030436700485223, + "grad_norm": 0.880841076374054, + "learning_rate": 4.785634194096054e-06, + "loss": 0.5711, + "step": 10923 + }, + { + "epoch": 0.8031171886487282, + "grad_norm": 0.915691077709198, + "learning_rate": 4.78559515545297e-06, + "loss": 0.5853, + "step": 10924 + }, + { + "epoch": 0.8031907072489339, + "grad_norm": 0.8095833659172058, + "learning_rate": 4.78555611341476e-06, + "loss": 0.5033, + "step": 10925 + }, + { + "epoch": 0.8032642258491398, + "grad_norm": 0.8351472020149231, + "learning_rate": 4.785517067981481e-06, + "loss": 0.5382, + "step": 10926 + }, + { + "epoch": 0.8033377444493457, + "grad_norm": 0.8080655336380005, + "learning_rate": 4.785478019153193e-06, + "loss": 0.5337, + "step": 10927 + }, + { + "epoch": 0.8034112630495516, + "grad_norm": 0.8512127995491028, + "learning_rate": 4.785438966929951e-06, + "loss": 0.5344, + "step": 10928 + }, + { + "epoch": 0.8034847816497573, + "grad_norm": 0.8348212242126465, + "learning_rate": 4.785399911311814e-06, + "loss": 0.4706, + "step": 10929 + }, + { + "epoch": 0.8035583002499632, + "grad_norm": 0.8342378735542297, + "learning_rate": 4.7853608522988415e-06, + "loss": 0.5225, + "step": 10930 + }, + { + "epoch": 0.8036318188501691, + "grad_norm": 0.849014163017273, + "learning_rate": 4.785321789891091e-06, + "loss": 0.5406, + "step": 10931 + }, + { + "epoch": 0.803705337450375, + "grad_norm": 0.8265557885169983, + "learning_rate": 4.78528272408862e-06, + "loss": 0.5238, + "step": 10932 + }, + { + "epoch": 0.8037788560505807, + "grad_norm": 0.8258954882621765, + "learning_rate": 4.785243654891486e-06, + "loss": 0.541, + "step": 10933 + }, + { + "epoch": 0.8038523746507866, + "grad_norm": 0.8049579858779907, + "learning_rate": 4.785204582299748e-06, + "loss": 0.5106, + "step": 10934 + }, + { + "epoch": 0.8039258932509925, + "grad_norm": 0.8499417304992676, + "learning_rate": 4.7851655063134636e-06, + "loss": 0.5135, + "step": 10935 + }, + { + "epoch": 0.8039994118511984, + "grad_norm": 0.7940279841423035, + "learning_rate": 4.785126426932692e-06, + "loss": 0.527, + "step": 10936 + }, + { + "epoch": 0.8040729304514042, + "grad_norm": 0.7885071635246277, + "learning_rate": 4.785087344157489e-06, + "loss": 0.5812, + "step": 10937 + }, + { + "epoch": 0.80414644905161, + "grad_norm": 0.8598050475120544, + "learning_rate": 4.785048257987914e-06, + "loss": 0.5194, + "step": 10938 + }, + { + "epoch": 0.8042199676518159, + "grad_norm": 0.8958181142807007, + "learning_rate": 4.785009168424026e-06, + "loss": 0.5644, + "step": 10939 + }, + { + "epoch": 0.8042934862520218, + "grad_norm": 0.8588594198226929, + "learning_rate": 4.7849700754658816e-06, + "loss": 0.5623, + "step": 10940 + }, + { + "epoch": 0.8043670048522276, + "grad_norm": 0.772733747959137, + "learning_rate": 4.7849309791135375e-06, + "loss": 0.5037, + "step": 10941 + }, + { + "epoch": 0.8044405234524334, + "grad_norm": 0.8223463892936707, + "learning_rate": 4.784891879367055e-06, + "loss": 0.549, + "step": 10942 + }, + { + "epoch": 0.8045140420526393, + "grad_norm": 0.7958160638809204, + "learning_rate": 4.784852776226491e-06, + "loss": 0.5311, + "step": 10943 + }, + { + "epoch": 0.8045875606528452, + "grad_norm": 0.9344210028648376, + "learning_rate": 4.784813669691904e-06, + "loss": 0.588, + "step": 10944 + }, + { + "epoch": 0.804661079253051, + "grad_norm": 0.8867225646972656, + "learning_rate": 4.7847745597633495e-06, + "loss": 0.5355, + "step": 10945 + }, + { + "epoch": 0.8047345978532569, + "grad_norm": 0.8292431831359863, + "learning_rate": 4.7847354464408884e-06, + "loss": 0.5581, + "step": 10946 + }, + { + "epoch": 0.8048081164534627, + "grad_norm": 0.7944467067718506, + "learning_rate": 4.784696329724578e-06, + "loss": 0.4971, + "step": 10947 + }, + { + "epoch": 0.8048816350536686, + "grad_norm": 0.8269667029380798, + "learning_rate": 4.784657209614476e-06, + "loss": 0.534, + "step": 10948 + }, + { + "epoch": 0.8049551536538744, + "grad_norm": 0.7905490398406982, + "learning_rate": 4.784618086110642e-06, + "loss": 0.4925, + "step": 10949 + }, + { + "epoch": 0.8050286722540803, + "grad_norm": 0.8624086976051331, + "learning_rate": 4.784578959213132e-06, + "loss": 0.5859, + "step": 10950 + }, + { + "epoch": 0.8051021908542861, + "grad_norm": 0.8870965838432312, + "learning_rate": 4.784539828922004e-06, + "loss": 0.5574, + "step": 10951 + }, + { + "epoch": 0.805175709454492, + "grad_norm": 0.7868763208389282, + "learning_rate": 4.7845006952373185e-06, + "loss": 0.4921, + "step": 10952 + }, + { + "epoch": 0.8052492280546978, + "grad_norm": 0.852489173412323, + "learning_rate": 4.784461558159132e-06, + "loss": 0.504, + "step": 10953 + }, + { + "epoch": 0.8053227466549037, + "grad_norm": 0.8465309143066406, + "learning_rate": 4.784422417687503e-06, + "loss": 0.5422, + "step": 10954 + }, + { + "epoch": 0.8053962652551095, + "grad_norm": 0.7633745670318604, + "learning_rate": 4.784383273822489e-06, + "loss": 0.4786, + "step": 10955 + }, + { + "epoch": 0.8054697838553154, + "grad_norm": 0.789228618144989, + "learning_rate": 4.78434412656415e-06, + "loss": 0.514, + "step": 10956 + }, + { + "epoch": 0.8055433024555212, + "grad_norm": 0.8194633722305298, + "learning_rate": 4.7843049759125425e-06, + "loss": 0.5272, + "step": 10957 + }, + { + "epoch": 0.8056168210557271, + "grad_norm": 0.8436959981918335, + "learning_rate": 4.784265821867724e-06, + "loss": 0.5272, + "step": 10958 + }, + { + "epoch": 0.805690339655933, + "grad_norm": 0.8141857981681824, + "learning_rate": 4.784226664429755e-06, + "loss": 0.5159, + "step": 10959 + }, + { + "epoch": 0.8057638582561388, + "grad_norm": 0.8219137191772461, + "learning_rate": 4.784187503598692e-06, + "loss": 0.5424, + "step": 10960 + }, + { + "epoch": 0.8058373768563446, + "grad_norm": 0.8468931317329407, + "learning_rate": 4.784148339374593e-06, + "loss": 0.5446, + "step": 10961 + }, + { + "epoch": 0.8059108954565505, + "grad_norm": 0.8280074596405029, + "learning_rate": 4.784109171757517e-06, + "loss": 0.5294, + "step": 10962 + }, + { + "epoch": 0.8059844140567564, + "grad_norm": 0.8500959277153015, + "learning_rate": 4.784070000747522e-06, + "loss": 0.5729, + "step": 10963 + }, + { + "epoch": 0.8060579326569622, + "grad_norm": 0.8099027872085571, + "learning_rate": 4.784030826344666e-06, + "loss": 0.5201, + "step": 10964 + }, + { + "epoch": 0.8061314512571681, + "grad_norm": 0.8277158737182617, + "learning_rate": 4.783991648549007e-06, + "loss": 0.5686, + "step": 10965 + }, + { + "epoch": 0.8062049698573739, + "grad_norm": 0.859736442565918, + "learning_rate": 4.783952467360604e-06, + "loss": 0.5129, + "step": 10966 + }, + { + "epoch": 0.8062784884575798, + "grad_norm": 0.8101823925971985, + "learning_rate": 4.783913282779515e-06, + "loss": 0.518, + "step": 10967 + }, + { + "epoch": 0.8063520070577856, + "grad_norm": 0.8164732456207275, + "learning_rate": 4.783874094805797e-06, + "loss": 0.4518, + "step": 10968 + }, + { + "epoch": 0.8064255256579915, + "grad_norm": 0.8408162593841553, + "learning_rate": 4.783834903439509e-06, + "loss": 0.5299, + "step": 10969 + }, + { + "epoch": 0.8064990442581973, + "grad_norm": 0.8401206135749817, + "learning_rate": 4.78379570868071e-06, + "loss": 0.4963, + "step": 10970 + }, + { + "epoch": 0.8065725628584032, + "grad_norm": 0.8367481231689453, + "learning_rate": 4.783756510529457e-06, + "loss": 0.5291, + "step": 10971 + }, + { + "epoch": 0.806646081458609, + "grad_norm": 0.8391899466514587, + "learning_rate": 4.783717308985809e-06, + "loss": 0.545, + "step": 10972 + }, + { + "epoch": 0.8067196000588149, + "grad_norm": 0.842791736125946, + "learning_rate": 4.783678104049825e-06, + "loss": 0.56, + "step": 10973 + }, + { + "epoch": 0.8067931186590207, + "grad_norm": 0.8849760890007019, + "learning_rate": 4.783638895721561e-06, + "loss": 0.573, + "step": 10974 + }, + { + "epoch": 0.8068666372592266, + "grad_norm": 0.8060376644134521, + "learning_rate": 4.783599684001077e-06, + "loss": 0.4784, + "step": 10975 + }, + { + "epoch": 0.8069401558594325, + "grad_norm": 0.8725643754005432, + "learning_rate": 4.78356046888843e-06, + "loss": 0.5333, + "step": 10976 + }, + { + "epoch": 0.8070136744596383, + "grad_norm": 0.80571448802948, + "learning_rate": 4.7835212503836795e-06, + "loss": 0.5653, + "step": 10977 + }, + { + "epoch": 0.8070871930598441, + "grad_norm": 0.8135367631912231, + "learning_rate": 4.7834820284868835e-06, + "loss": 0.502, + "step": 10978 + }, + { + "epoch": 0.80716071166005, + "grad_norm": 0.8435800075531006, + "learning_rate": 4.783442803198099e-06, + "loss": 0.5176, + "step": 10979 + }, + { + "epoch": 0.8072342302602559, + "grad_norm": 0.8105892539024353, + "learning_rate": 4.783403574517387e-06, + "loss": 0.5208, + "step": 10980 + }, + { + "epoch": 0.8073077488604617, + "grad_norm": 0.8435215950012207, + "learning_rate": 4.783364342444803e-06, + "loss": 0.526, + "step": 10981 + }, + { + "epoch": 0.8073812674606675, + "grad_norm": 0.8538809418678284, + "learning_rate": 4.783325106980406e-06, + "loss": 0.576, + "step": 10982 + }, + { + "epoch": 0.8074547860608734, + "grad_norm": 0.8376870155334473, + "learning_rate": 4.783285868124254e-06, + "loss": 0.499, + "step": 10983 + }, + { + "epoch": 0.8075283046610793, + "grad_norm": 0.8124322295188904, + "learning_rate": 4.7832466258764075e-06, + "loss": 0.5155, + "step": 10984 + }, + { + "epoch": 0.8076018232612852, + "grad_norm": 0.8649541735649109, + "learning_rate": 4.783207380236921e-06, + "loss": 0.5738, + "step": 10985 + }, + { + "epoch": 0.8076753418614909, + "grad_norm": 0.8093895316123962, + "learning_rate": 4.783168131205858e-06, + "loss": 0.5304, + "step": 10986 + }, + { + "epoch": 0.8077488604616968, + "grad_norm": 0.8211585879325867, + "learning_rate": 4.783128878783272e-06, + "loss": 0.5136, + "step": 10987 + }, + { + "epoch": 0.8078223790619027, + "grad_norm": 0.9063660502433777, + "learning_rate": 4.783089622969222e-06, + "loss": 0.5337, + "step": 10988 + }, + { + "epoch": 0.8078958976621086, + "grad_norm": 0.8434269428253174, + "learning_rate": 4.783050363763769e-06, + "loss": 0.5232, + "step": 10989 + }, + { + "epoch": 0.8079694162623143, + "grad_norm": 0.8180649876594543, + "learning_rate": 4.783011101166969e-06, + "loss": 0.5187, + "step": 10990 + }, + { + "epoch": 0.8080429348625202, + "grad_norm": 0.877894401550293, + "learning_rate": 4.782971835178881e-06, + "loss": 0.5636, + "step": 10991 + }, + { + "epoch": 0.8081164534627261, + "grad_norm": 0.8473223447799683, + "learning_rate": 4.782932565799564e-06, + "loss": 0.5331, + "step": 10992 + }, + { + "epoch": 0.808189972062932, + "grad_norm": 0.7918576002120972, + "learning_rate": 4.782893293029075e-06, + "loss": 0.5121, + "step": 10993 + }, + { + "epoch": 0.8082634906631377, + "grad_norm": 0.8326922059059143, + "learning_rate": 4.782854016867474e-06, + "loss": 0.5415, + "step": 10994 + }, + { + "epoch": 0.8083370092633436, + "grad_norm": 0.8352206945419312, + "learning_rate": 4.782814737314817e-06, + "loss": 0.5832, + "step": 10995 + }, + { + "epoch": 0.8084105278635495, + "grad_norm": 0.8464149832725525, + "learning_rate": 4.782775454371165e-06, + "loss": 0.5346, + "step": 10996 + }, + { + "epoch": 0.8084840464637554, + "grad_norm": 0.8334778547286987, + "learning_rate": 4.782736168036574e-06, + "loss": 0.5154, + "step": 10997 + }, + { + "epoch": 0.8085575650639611, + "grad_norm": 0.817391574382782, + "learning_rate": 4.782696878311104e-06, + "loss": 0.5393, + "step": 10998 + }, + { + "epoch": 0.808631083664167, + "grad_norm": 0.8220919370651245, + "learning_rate": 4.782657585194812e-06, + "loss": 0.5416, + "step": 10999 + }, + { + "epoch": 0.8087046022643729, + "grad_norm": 0.843687891960144, + "learning_rate": 4.782618288687758e-06, + "loss": 0.5431, + "step": 11000 + }, + { + "epoch": 0.8087781208645788, + "grad_norm": 0.7982800006866455, + "learning_rate": 4.78257898879e-06, + "loss": 0.5192, + "step": 11001 + }, + { + "epoch": 0.8088516394647846, + "grad_norm": 0.7962113618850708, + "learning_rate": 4.7825396855015945e-06, + "loss": 0.524, + "step": 11002 + }, + { + "epoch": 0.8089251580649904, + "grad_norm": 0.8064985275268555, + "learning_rate": 4.782500378822603e-06, + "loss": 0.5424, + "step": 11003 + }, + { + "epoch": 0.8089986766651963, + "grad_norm": 0.8610924482345581, + "learning_rate": 4.782461068753081e-06, + "loss": 0.5522, + "step": 11004 + }, + { + "epoch": 0.8090721952654022, + "grad_norm": 0.8338280320167542, + "learning_rate": 4.782421755293088e-06, + "loss": 0.5004, + "step": 11005 + }, + { + "epoch": 0.809145713865608, + "grad_norm": 0.8414120078086853, + "learning_rate": 4.782382438442683e-06, + "loss": 0.547, + "step": 11006 + }, + { + "epoch": 0.8092192324658138, + "grad_norm": 0.8355515003204346, + "learning_rate": 4.782343118201923e-06, + "loss": 0.4825, + "step": 11007 + }, + { + "epoch": 0.8092927510660197, + "grad_norm": 0.8467540144920349, + "learning_rate": 4.782303794570868e-06, + "loss": 0.5434, + "step": 11008 + }, + { + "epoch": 0.8093662696662256, + "grad_norm": 0.8669694066047668, + "learning_rate": 4.782264467549575e-06, + "loss": 0.5592, + "step": 11009 + }, + { + "epoch": 0.8094397882664314, + "grad_norm": 0.8829163908958435, + "learning_rate": 4.782225137138104e-06, + "loss": 0.5816, + "step": 11010 + }, + { + "epoch": 0.8095133068666373, + "grad_norm": 0.7922269701957703, + "learning_rate": 4.782185803336513e-06, + "loss": 0.5145, + "step": 11011 + }, + { + "epoch": 0.8095868254668431, + "grad_norm": 0.8002742528915405, + "learning_rate": 4.782146466144859e-06, + "loss": 0.4749, + "step": 11012 + }, + { + "epoch": 0.809660344067049, + "grad_norm": 0.8428245782852173, + "learning_rate": 4.782107125563201e-06, + "loss": 0.5179, + "step": 11013 + }, + { + "epoch": 0.8097338626672548, + "grad_norm": 0.7977726459503174, + "learning_rate": 4.7820677815915986e-06, + "loss": 0.4963, + "step": 11014 + }, + { + "epoch": 0.8098073812674607, + "grad_norm": 0.8330321907997131, + "learning_rate": 4.782028434230109e-06, + "loss": 0.5524, + "step": 11015 + }, + { + "epoch": 0.8098808998676665, + "grad_norm": 0.7848511338233948, + "learning_rate": 4.781989083478792e-06, + "loss": 0.5062, + "step": 11016 + }, + { + "epoch": 0.8099544184678724, + "grad_norm": 0.8070018291473389, + "learning_rate": 4.781949729337705e-06, + "loss": 0.5417, + "step": 11017 + }, + { + "epoch": 0.8100279370680782, + "grad_norm": 0.8108657002449036, + "learning_rate": 4.781910371806905e-06, + "loss": 0.5046, + "step": 11018 + }, + { + "epoch": 0.8101014556682841, + "grad_norm": 0.8850300908088684, + "learning_rate": 4.781871010886454e-06, + "loss": 0.5389, + "step": 11019 + }, + { + "epoch": 0.81017497426849, + "grad_norm": 0.7955283522605896, + "learning_rate": 4.781831646576407e-06, + "loss": 0.5198, + "step": 11020 + }, + { + "epoch": 0.8102484928686958, + "grad_norm": 0.841864824295044, + "learning_rate": 4.7817922788768255e-06, + "loss": 0.4743, + "step": 11021 + }, + { + "epoch": 0.8103220114689016, + "grad_norm": 0.8345712423324585, + "learning_rate": 4.781752907787765e-06, + "loss": 0.5583, + "step": 11022 + }, + { + "epoch": 0.8103955300691075, + "grad_norm": 0.8292920589447021, + "learning_rate": 4.7817135333092875e-06, + "loss": 0.513, + "step": 11023 + }, + { + "epoch": 0.8104690486693134, + "grad_norm": 0.7856601476669312, + "learning_rate": 4.781674155441448e-06, + "loss": 0.5141, + "step": 11024 + }, + { + "epoch": 0.8105425672695192, + "grad_norm": 0.8477720022201538, + "learning_rate": 4.781634774184307e-06, + "loss": 0.5316, + "step": 11025 + }, + { + "epoch": 0.810616085869725, + "grad_norm": 0.8713408708572388, + "learning_rate": 4.781595389537923e-06, + "loss": 0.5519, + "step": 11026 + }, + { + "epoch": 0.8106896044699309, + "grad_norm": 0.7841761708259583, + "learning_rate": 4.781556001502352e-06, + "loss": 0.4897, + "step": 11027 + }, + { + "epoch": 0.8107631230701368, + "grad_norm": 0.8163352608680725, + "learning_rate": 4.781516610077657e-06, + "loss": 0.5251, + "step": 11028 + }, + { + "epoch": 0.8108366416703426, + "grad_norm": 0.7921731472015381, + "learning_rate": 4.781477215263892e-06, + "loss": 0.5317, + "step": 11029 + }, + { + "epoch": 0.8109101602705484, + "grad_norm": 0.850532591342926, + "learning_rate": 4.7814378170611184e-06, + "loss": 0.549, + "step": 11030 + }, + { + "epoch": 0.8109836788707543, + "grad_norm": 0.7945539951324463, + "learning_rate": 4.781398415469395e-06, + "loss": 0.537, + "step": 11031 + }, + { + "epoch": 0.8110571974709602, + "grad_norm": 0.8129040598869324, + "learning_rate": 4.781359010488778e-06, + "loss": 0.5128, + "step": 11032 + }, + { + "epoch": 0.811130716071166, + "grad_norm": 0.7973667979240417, + "learning_rate": 4.781319602119327e-06, + "loss": 0.5124, + "step": 11033 + }, + { + "epoch": 0.8112042346713718, + "grad_norm": 0.8304096460342407, + "learning_rate": 4.7812801903611e-06, + "loss": 0.538, + "step": 11034 + }, + { + "epoch": 0.8112777532715777, + "grad_norm": 0.7975058555603027, + "learning_rate": 4.781240775214157e-06, + "loss": 0.5743, + "step": 11035 + }, + { + "epoch": 0.8113512718717836, + "grad_norm": 0.8642358183860779, + "learning_rate": 4.781201356678556e-06, + "loss": 0.5583, + "step": 11036 + }, + { + "epoch": 0.8114247904719895, + "grad_norm": 0.8120304942131042, + "learning_rate": 4.781161934754355e-06, + "loss": 0.5229, + "step": 11037 + }, + { + "epoch": 0.8114983090721952, + "grad_norm": 0.8539409637451172, + "learning_rate": 4.7811225094416126e-06, + "loss": 0.4718, + "step": 11038 + }, + { + "epoch": 0.8115718276724011, + "grad_norm": 0.8916470408439636, + "learning_rate": 4.7810830807403876e-06, + "loss": 0.5184, + "step": 11039 + }, + { + "epoch": 0.811645346272607, + "grad_norm": 0.8364790678024292, + "learning_rate": 4.781043648650739e-06, + "loss": 0.5484, + "step": 11040 + }, + { + "epoch": 0.8117188648728129, + "grad_norm": 0.797244131565094, + "learning_rate": 4.781004213172725e-06, + "loss": 0.5017, + "step": 11041 + }, + { + "epoch": 0.8117923834730186, + "grad_norm": 0.8469781279563904, + "learning_rate": 4.780964774306404e-06, + "loss": 0.5233, + "step": 11042 + }, + { + "epoch": 0.8118659020732245, + "grad_norm": 0.8340592980384827, + "learning_rate": 4.780925332051834e-06, + "loss": 0.5218, + "step": 11043 + }, + { + "epoch": 0.8119394206734304, + "grad_norm": 0.8588736653327942, + "learning_rate": 4.7808858864090755e-06, + "loss": 0.542, + "step": 11044 + }, + { + "epoch": 0.8120129392736363, + "grad_norm": 0.8141918778419495, + "learning_rate": 4.780846437378186e-06, + "loss": 0.5546, + "step": 11045 + }, + { + "epoch": 0.812086457873842, + "grad_norm": 0.8348001837730408, + "learning_rate": 4.780806984959223e-06, + "loss": 0.504, + "step": 11046 + }, + { + "epoch": 0.8121599764740479, + "grad_norm": 0.828570544719696, + "learning_rate": 4.780767529152247e-06, + "loss": 0.5423, + "step": 11047 + }, + { + "epoch": 0.8122334950742538, + "grad_norm": 0.8595280051231384, + "learning_rate": 4.780728069957315e-06, + "loss": 0.5791, + "step": 11048 + }, + { + "epoch": 0.8123070136744597, + "grad_norm": 0.8059713244438171, + "learning_rate": 4.7806886073744865e-06, + "loss": 0.549, + "step": 11049 + }, + { + "epoch": 0.8123805322746654, + "grad_norm": 0.8749924302101135, + "learning_rate": 4.78064914140382e-06, + "loss": 0.5526, + "step": 11050 + }, + { + "epoch": 0.8124540508748713, + "grad_norm": 0.8289139270782471, + "learning_rate": 4.780609672045375e-06, + "loss": 0.5363, + "step": 11051 + }, + { + "epoch": 0.8125275694750772, + "grad_norm": 0.805067241191864, + "learning_rate": 4.780570199299208e-06, + "loss": 0.5177, + "step": 11052 + }, + { + "epoch": 0.8126010880752831, + "grad_norm": 0.831296980381012, + "learning_rate": 4.78053072316538e-06, + "loss": 0.5745, + "step": 11053 + }, + { + "epoch": 0.8126746066754889, + "grad_norm": 0.8051124811172485, + "learning_rate": 4.780491243643947e-06, + "loss": 0.4839, + "step": 11054 + }, + { + "epoch": 0.8127481252756947, + "grad_norm": 0.8557770848274231, + "learning_rate": 4.78045176073497e-06, + "loss": 0.5263, + "step": 11055 + }, + { + "epoch": 0.8128216438759006, + "grad_norm": 0.8531129956245422, + "learning_rate": 4.780412274438507e-06, + "loss": 0.5759, + "step": 11056 + }, + { + "epoch": 0.8128951624761065, + "grad_norm": 0.7945084571838379, + "learning_rate": 4.780372784754615e-06, + "loss": 0.5478, + "step": 11057 + }, + { + "epoch": 0.8129686810763123, + "grad_norm": 0.8186981678009033, + "learning_rate": 4.780333291683355e-06, + "loss": 0.519, + "step": 11058 + }, + { + "epoch": 0.8130421996765181, + "grad_norm": 0.8073182702064514, + "learning_rate": 4.780293795224787e-06, + "loss": 0.486, + "step": 11059 + }, + { + "epoch": 0.813115718276724, + "grad_norm": 0.7899954319000244, + "learning_rate": 4.780254295378964e-06, + "loss": 0.5137, + "step": 11060 + }, + { + "epoch": 0.8131892368769299, + "grad_norm": 0.857563316822052, + "learning_rate": 4.7802147921459496e-06, + "loss": 0.5577, + "step": 11061 + }, + { + "epoch": 0.8132627554771357, + "grad_norm": 0.7926645278930664, + "learning_rate": 4.780175285525801e-06, + "loss": 0.5037, + "step": 11062 + }, + { + "epoch": 0.8133362740773415, + "grad_norm": 0.8619323968887329, + "learning_rate": 4.7801357755185765e-06, + "loss": 0.5241, + "step": 11063 + }, + { + "epoch": 0.8134097926775474, + "grad_norm": 0.7700501084327698, + "learning_rate": 4.7800962621243355e-06, + "loss": 0.4744, + "step": 11064 + }, + { + "epoch": 0.8134833112777533, + "grad_norm": 0.7932068109512329, + "learning_rate": 4.780056745343137e-06, + "loss": 0.508, + "step": 11065 + }, + { + "epoch": 0.8135568298779591, + "grad_norm": 0.8477669954299927, + "learning_rate": 4.780017225175038e-06, + "loss": 0.5006, + "step": 11066 + }, + { + "epoch": 0.813630348478165, + "grad_norm": 0.8856947422027588, + "learning_rate": 4.779977701620099e-06, + "loss": 0.5967, + "step": 11067 + }, + { + "epoch": 0.8137038670783708, + "grad_norm": 0.8224878311157227, + "learning_rate": 4.779938174678377e-06, + "loss": 0.5216, + "step": 11068 + }, + { + "epoch": 0.8137773856785767, + "grad_norm": 0.7850083708763123, + "learning_rate": 4.7798986443499325e-06, + "loss": 0.499, + "step": 11069 + }, + { + "epoch": 0.8138509042787825, + "grad_norm": 0.7953516840934753, + "learning_rate": 4.779859110634824e-06, + "loss": 0.5204, + "step": 11070 + }, + { + "epoch": 0.8139244228789884, + "grad_norm": 0.8204354047775269, + "learning_rate": 4.779819573533107e-06, + "loss": 0.554, + "step": 11071 + }, + { + "epoch": 0.8139979414791942, + "grad_norm": 0.7606602311134338, + "learning_rate": 4.779780033044846e-06, + "loss": 0.5172, + "step": 11072 + }, + { + "epoch": 0.8140714600794001, + "grad_norm": 0.8356537222862244, + "learning_rate": 4.779740489170095e-06, + "loss": 0.4904, + "step": 11073 + }, + { + "epoch": 0.8141449786796059, + "grad_norm": 0.8938836455345154, + "learning_rate": 4.779700941908915e-06, + "loss": 0.5057, + "step": 11074 + }, + { + "epoch": 0.8142184972798118, + "grad_norm": 0.7873280644416809, + "learning_rate": 4.7796613912613634e-06, + "loss": 0.4704, + "step": 11075 + }, + { + "epoch": 0.8142920158800176, + "grad_norm": 0.789728581905365, + "learning_rate": 4.7796218372275e-06, + "loss": 0.4852, + "step": 11076 + }, + { + "epoch": 0.8143655344802235, + "grad_norm": 0.8183556199073792, + "learning_rate": 4.779582279807383e-06, + "loss": 0.539, + "step": 11077 + }, + { + "epoch": 0.8144390530804293, + "grad_norm": 0.809457004070282, + "learning_rate": 4.779542719001071e-06, + "loss": 0.527, + "step": 11078 + }, + { + "epoch": 0.8145125716806352, + "grad_norm": 0.8647887706756592, + "learning_rate": 4.779503154808624e-06, + "loss": 0.5498, + "step": 11079 + }, + { + "epoch": 0.8145860902808411, + "grad_norm": 0.8199924826622009, + "learning_rate": 4.7794635872301e-06, + "loss": 0.5189, + "step": 11080 + }, + { + "epoch": 0.8146596088810469, + "grad_norm": 0.8361583948135376, + "learning_rate": 4.779424016265556e-06, + "loss": 0.5395, + "step": 11081 + }, + { + "epoch": 0.8147331274812527, + "grad_norm": 0.820765495300293, + "learning_rate": 4.779384441915054e-06, + "loss": 0.5147, + "step": 11082 + }, + { + "epoch": 0.8148066460814586, + "grad_norm": 0.86850905418396, + "learning_rate": 4.779344864178651e-06, + "loss": 0.5346, + "step": 11083 + }, + { + "epoch": 0.8148801646816645, + "grad_norm": 0.8133804798126221, + "learning_rate": 4.7793052830564055e-06, + "loss": 0.4771, + "step": 11084 + }, + { + "epoch": 0.8149536832818703, + "grad_norm": 0.9254838824272156, + "learning_rate": 4.779265698548378e-06, + "loss": 0.5425, + "step": 11085 + }, + { + "epoch": 0.8150272018820761, + "grad_norm": 0.8412163853645325, + "learning_rate": 4.779226110654624e-06, + "loss": 0.5078, + "step": 11086 + }, + { + "epoch": 0.815100720482282, + "grad_norm": 0.8138225078582764, + "learning_rate": 4.779186519375207e-06, + "loss": 0.5249, + "step": 11087 + }, + { + "epoch": 0.8151742390824879, + "grad_norm": 0.8275130391120911, + "learning_rate": 4.779146924710181e-06, + "loss": 0.5248, + "step": 11088 + }, + { + "epoch": 0.8152477576826938, + "grad_norm": 0.8295080661773682, + "learning_rate": 4.7791073266596085e-06, + "loss": 0.5237, + "step": 11089 + }, + { + "epoch": 0.8153212762828995, + "grad_norm": 0.8299759030342102, + "learning_rate": 4.779067725223547e-06, + "loss": 0.5354, + "step": 11090 + }, + { + "epoch": 0.8153947948831054, + "grad_norm": 0.8757503628730774, + "learning_rate": 4.779028120402053e-06, + "loss": 0.5971, + "step": 11091 + }, + { + "epoch": 0.8154683134833113, + "grad_norm": 0.8578057885169983, + "learning_rate": 4.77898851219519e-06, + "loss": 0.5217, + "step": 11092 + }, + { + "epoch": 0.8155418320835172, + "grad_norm": 0.7767823934555054, + "learning_rate": 4.778948900603013e-06, + "loss": 0.4984, + "step": 11093 + }, + { + "epoch": 0.8156153506837229, + "grad_norm": 0.7703574895858765, + "learning_rate": 4.778909285625583e-06, + "loss": 0.4894, + "step": 11094 + }, + { + "epoch": 0.8156888692839288, + "grad_norm": 0.8533843159675598, + "learning_rate": 4.7788696672629574e-06, + "loss": 0.5087, + "step": 11095 + }, + { + "epoch": 0.8157623878841347, + "grad_norm": 0.811241865158081, + "learning_rate": 4.778830045515196e-06, + "loss": 0.5827, + "step": 11096 + }, + { + "epoch": 0.8158359064843406, + "grad_norm": 0.8188102841377258, + "learning_rate": 4.778790420382357e-06, + "loss": 0.5302, + "step": 11097 + }, + { + "epoch": 0.8159094250845463, + "grad_norm": 0.8573470115661621, + "learning_rate": 4.7787507918645e-06, + "loss": 0.5345, + "step": 11098 + }, + { + "epoch": 0.8159829436847522, + "grad_norm": 0.8104531168937683, + "learning_rate": 4.778711159961683e-06, + "loss": 0.4745, + "step": 11099 + }, + { + "epoch": 0.8160564622849581, + "grad_norm": 0.9021457433700562, + "learning_rate": 4.778671524673966e-06, + "loss": 0.5107, + "step": 11100 + }, + { + "epoch": 0.816129980885164, + "grad_norm": 0.860532283782959, + "learning_rate": 4.778631886001407e-06, + "loss": 0.55, + "step": 11101 + }, + { + "epoch": 0.8162034994853697, + "grad_norm": 0.8090296983718872, + "learning_rate": 4.778592243944065e-06, + "loss": 0.5471, + "step": 11102 + }, + { + "epoch": 0.8162770180855756, + "grad_norm": 0.8113638758659363, + "learning_rate": 4.778552598501999e-06, + "loss": 0.4976, + "step": 11103 + }, + { + "epoch": 0.8163505366857815, + "grad_norm": 0.8761773109436035, + "learning_rate": 4.778512949675268e-06, + "loss": 0.5494, + "step": 11104 + }, + { + "epoch": 0.8164240552859874, + "grad_norm": 0.7964498400688171, + "learning_rate": 4.77847329746393e-06, + "loss": 0.5089, + "step": 11105 + }, + { + "epoch": 0.8164975738861933, + "grad_norm": 0.7983787655830383, + "learning_rate": 4.778433641868046e-06, + "loss": 0.501, + "step": 11106 + }, + { + "epoch": 0.816571092486399, + "grad_norm": 0.8404422402381897, + "learning_rate": 4.778393982887673e-06, + "loss": 0.5573, + "step": 11107 + }, + { + "epoch": 0.8166446110866049, + "grad_norm": 0.8390858173370361, + "learning_rate": 4.77835432052287e-06, + "loss": 0.5202, + "step": 11108 + }, + { + "epoch": 0.8167181296868108, + "grad_norm": 0.8161986470222473, + "learning_rate": 4.778314654773697e-06, + "loss": 0.509, + "step": 11109 + }, + { + "epoch": 0.8167916482870167, + "grad_norm": 0.7942909598350525, + "learning_rate": 4.778274985640213e-06, + "loss": 0.5359, + "step": 11110 + }, + { + "epoch": 0.8168651668872224, + "grad_norm": 0.8347287774085999, + "learning_rate": 4.778235313122475e-06, + "loss": 0.5148, + "step": 11111 + }, + { + "epoch": 0.8169386854874283, + "grad_norm": 0.8169558048248291, + "learning_rate": 4.778195637220544e-06, + "loss": 0.5327, + "step": 11112 + }, + { + "epoch": 0.8170122040876342, + "grad_norm": 0.8453447222709656, + "learning_rate": 4.778155957934478e-06, + "loss": 0.5638, + "step": 11113 + }, + { + "epoch": 0.8170857226878401, + "grad_norm": 0.7889436483383179, + "learning_rate": 4.778116275264336e-06, + "loss": 0.4784, + "step": 11114 + }, + { + "epoch": 0.8171592412880458, + "grad_norm": 0.8775247931480408, + "learning_rate": 4.7780765892101765e-06, + "loss": 0.5818, + "step": 11115 + }, + { + "epoch": 0.8172327598882517, + "grad_norm": 0.7966326475143433, + "learning_rate": 4.7780368997720595e-06, + "loss": 0.5246, + "step": 11116 + }, + { + "epoch": 0.8173062784884576, + "grad_norm": 0.8268973231315613, + "learning_rate": 4.777997206950043e-06, + "loss": 0.5446, + "step": 11117 + }, + { + "epoch": 0.8173797970886635, + "grad_norm": 0.826259434223175, + "learning_rate": 4.777957510744187e-06, + "loss": 0.5382, + "step": 11118 + }, + { + "epoch": 0.8174533156888693, + "grad_norm": 0.8248881697654724, + "learning_rate": 4.77791781115455e-06, + "loss": 0.5324, + "step": 11119 + }, + { + "epoch": 0.8175268342890751, + "grad_norm": 0.7932005524635315, + "learning_rate": 4.77787810818119e-06, + "loss": 0.5501, + "step": 11120 + }, + { + "epoch": 0.817600352889281, + "grad_norm": 0.8062952756881714, + "learning_rate": 4.777838401824167e-06, + "loss": 0.5178, + "step": 11121 + }, + { + "epoch": 0.8176738714894869, + "grad_norm": 0.8251031041145325, + "learning_rate": 4.7777986920835405e-06, + "loss": 0.6107, + "step": 11122 + }, + { + "epoch": 0.8177473900896927, + "grad_norm": 0.8000215888023376, + "learning_rate": 4.7777589789593684e-06, + "loss": 0.5307, + "step": 11123 + }, + { + "epoch": 0.8178209086898985, + "grad_norm": 0.781064510345459, + "learning_rate": 4.77771926245171e-06, + "loss": 0.5226, + "step": 11124 + }, + { + "epoch": 0.8178944272901044, + "grad_norm": 0.8389056921005249, + "learning_rate": 4.777679542560624e-06, + "loss": 0.5521, + "step": 11125 + }, + { + "epoch": 0.8179679458903103, + "grad_norm": 0.7874335646629333, + "learning_rate": 4.7776398192861704e-06, + "loss": 0.4925, + "step": 11126 + }, + { + "epoch": 0.8180414644905161, + "grad_norm": 0.8321433067321777, + "learning_rate": 4.777600092628407e-06, + "loss": 0.4772, + "step": 11127 + }, + { + "epoch": 0.818114983090722, + "grad_norm": 0.8276160955429077, + "learning_rate": 4.777560362587393e-06, + "loss": 0.5205, + "step": 11128 + }, + { + "epoch": 0.8181885016909278, + "grad_norm": 0.8204348087310791, + "learning_rate": 4.777520629163189e-06, + "loss": 0.5288, + "step": 11129 + }, + { + "epoch": 0.8182620202911337, + "grad_norm": 0.81489497423172, + "learning_rate": 4.777480892355852e-06, + "loss": 0.5438, + "step": 11130 + }, + { + "epoch": 0.8183355388913395, + "grad_norm": 0.8192896246910095, + "learning_rate": 4.777441152165442e-06, + "loss": 0.4583, + "step": 11131 + }, + { + "epoch": 0.8184090574915454, + "grad_norm": 0.8422150015830994, + "learning_rate": 4.777401408592018e-06, + "loss": 0.5444, + "step": 11132 + }, + { + "epoch": 0.8184825760917512, + "grad_norm": 0.8414568901062012, + "learning_rate": 4.777361661635639e-06, + "loss": 0.5087, + "step": 11133 + }, + { + "epoch": 0.8185560946919571, + "grad_norm": 0.8177990913391113, + "learning_rate": 4.777321911296364e-06, + "loss": 0.5553, + "step": 11134 + }, + { + "epoch": 0.8186296132921629, + "grad_norm": 0.8127576112747192, + "learning_rate": 4.7772821575742514e-06, + "loss": 0.5338, + "step": 11135 + }, + { + "epoch": 0.8187031318923688, + "grad_norm": 0.8429848551750183, + "learning_rate": 4.7772424004693615e-06, + "loss": 0.541, + "step": 11136 + }, + { + "epoch": 0.8187766504925746, + "grad_norm": 0.8212087750434875, + "learning_rate": 4.777202639981752e-06, + "loss": 0.534, + "step": 11137 + }, + { + "epoch": 0.8188501690927805, + "grad_norm": 0.8561496734619141, + "learning_rate": 4.777162876111483e-06, + "loss": 0.5488, + "step": 11138 + }, + { + "epoch": 0.8189236876929863, + "grad_norm": 0.8068091869354248, + "learning_rate": 4.7771231088586135e-06, + "loss": 0.5275, + "step": 11139 + }, + { + "epoch": 0.8189972062931922, + "grad_norm": 0.7996342778205872, + "learning_rate": 4.777083338223202e-06, + "loss": 0.5493, + "step": 11140 + }, + { + "epoch": 0.819070724893398, + "grad_norm": 0.8133062124252319, + "learning_rate": 4.777043564205308e-06, + "loss": 0.5228, + "step": 11141 + }, + { + "epoch": 0.8191442434936039, + "grad_norm": 0.7883291244506836, + "learning_rate": 4.7770037868049915e-06, + "loss": 0.5324, + "step": 11142 + }, + { + "epoch": 0.8192177620938097, + "grad_norm": 0.8519462943077087, + "learning_rate": 4.776964006022309e-06, + "loss": 0.5954, + "step": 11143 + }, + { + "epoch": 0.8192912806940156, + "grad_norm": 0.8398228287696838, + "learning_rate": 4.776924221857322e-06, + "loss": 0.4695, + "step": 11144 + }, + { + "epoch": 0.8193647992942215, + "grad_norm": 0.7975549697875977, + "learning_rate": 4.776884434310089e-06, + "loss": 0.4941, + "step": 11145 + }, + { + "epoch": 0.8194383178944273, + "grad_norm": 0.7829383015632629, + "learning_rate": 4.7768446433806685e-06, + "loss": 0.5497, + "step": 11146 + }, + { + "epoch": 0.8195118364946331, + "grad_norm": 0.886823296546936, + "learning_rate": 4.7768048490691194e-06, + "loss": 0.558, + "step": 11147 + }, + { + "epoch": 0.819585355094839, + "grad_norm": 0.8655707836151123, + "learning_rate": 4.776765051375502e-06, + "loss": 0.5252, + "step": 11148 + }, + { + "epoch": 0.8196588736950449, + "grad_norm": 0.8576639890670776, + "learning_rate": 4.776725250299874e-06, + "loss": 0.5521, + "step": 11149 + }, + { + "epoch": 0.8197323922952507, + "grad_norm": 0.7865989804267883, + "learning_rate": 4.776685445842296e-06, + "loss": 0.5395, + "step": 11150 + }, + { + "epoch": 0.8198059108954565, + "grad_norm": 0.8028109073638916, + "learning_rate": 4.776645638002827e-06, + "loss": 0.515, + "step": 11151 + }, + { + "epoch": 0.8198794294956624, + "grad_norm": 0.8044483065605164, + "learning_rate": 4.776605826781525e-06, + "loss": 0.512, + "step": 11152 + }, + { + "epoch": 0.8199529480958683, + "grad_norm": 0.8348039984703064, + "learning_rate": 4.776566012178449e-06, + "loss": 0.5743, + "step": 11153 + }, + { + "epoch": 0.8200264666960742, + "grad_norm": 0.9141361713409424, + "learning_rate": 4.7765261941936595e-06, + "loss": 0.5295, + "step": 11154 + }, + { + "epoch": 0.8200999852962799, + "grad_norm": 0.8081245422363281, + "learning_rate": 4.776486372827215e-06, + "loss": 0.5418, + "step": 11155 + }, + { + "epoch": 0.8201735038964858, + "grad_norm": 0.7929980158805847, + "learning_rate": 4.776446548079174e-06, + "loss": 0.5093, + "step": 11156 + }, + { + "epoch": 0.8202470224966917, + "grad_norm": 0.8556016683578491, + "learning_rate": 4.776406719949598e-06, + "loss": 0.5313, + "step": 11157 + }, + { + "epoch": 0.8203205410968976, + "grad_norm": 0.8086403608322144, + "learning_rate": 4.776366888438543e-06, + "loss": 0.5525, + "step": 11158 + }, + { + "epoch": 0.8203940596971033, + "grad_norm": 0.8219059705734253, + "learning_rate": 4.77632705354607e-06, + "loss": 0.5397, + "step": 11159 + }, + { + "epoch": 0.8204675782973092, + "grad_norm": 0.8073656558990479, + "learning_rate": 4.776287215272238e-06, + "loss": 0.5372, + "step": 11160 + }, + { + "epoch": 0.8205410968975151, + "grad_norm": 0.794551432132721, + "learning_rate": 4.776247373617106e-06, + "loss": 0.5118, + "step": 11161 + }, + { + "epoch": 0.820614615497721, + "grad_norm": 0.8177917003631592, + "learning_rate": 4.776207528580733e-06, + "loss": 0.5393, + "step": 11162 + }, + { + "epoch": 0.8206881340979267, + "grad_norm": 0.803551435470581, + "learning_rate": 4.776167680163178e-06, + "loss": 0.5102, + "step": 11163 + }, + { + "epoch": 0.8207616526981326, + "grad_norm": 0.8433583974838257, + "learning_rate": 4.776127828364501e-06, + "loss": 0.549, + "step": 11164 + }, + { + "epoch": 0.8208351712983385, + "grad_norm": 0.8320105671882629, + "learning_rate": 4.776087973184761e-06, + "loss": 0.5281, + "step": 11165 + }, + { + "epoch": 0.8209086898985444, + "grad_norm": 0.7585362195968628, + "learning_rate": 4.776048114624017e-06, + "loss": 0.5132, + "step": 11166 + }, + { + "epoch": 0.8209822084987501, + "grad_norm": 0.8144948482513428, + "learning_rate": 4.776008252682327e-06, + "loss": 0.533, + "step": 11167 + }, + { + "epoch": 0.821055727098956, + "grad_norm": 0.8097867369651794, + "learning_rate": 4.775968387359752e-06, + "loss": 0.5035, + "step": 11168 + }, + { + "epoch": 0.8211292456991619, + "grad_norm": 0.8468430042266846, + "learning_rate": 4.775928518656351e-06, + "loss": 0.5091, + "step": 11169 + }, + { + "epoch": 0.8212027642993678, + "grad_norm": 0.7873150110244751, + "learning_rate": 4.775888646572183e-06, + "loss": 0.5257, + "step": 11170 + }, + { + "epoch": 0.8212762828995736, + "grad_norm": 0.837081253528595, + "learning_rate": 4.7758487711073064e-06, + "loss": 0.5189, + "step": 11171 + }, + { + "epoch": 0.8213498014997794, + "grad_norm": 0.8364818096160889, + "learning_rate": 4.775808892261781e-06, + "loss": 0.543, + "step": 11172 + }, + { + "epoch": 0.8214233200999853, + "grad_norm": 0.8601551055908203, + "learning_rate": 4.775769010035667e-06, + "loss": 0.5371, + "step": 11173 + }, + { + "epoch": 0.8214968387001912, + "grad_norm": 0.8391677141189575, + "learning_rate": 4.775729124429021e-06, + "loss": 0.5464, + "step": 11174 + }, + { + "epoch": 0.821570357300397, + "grad_norm": 0.8227339386940002, + "learning_rate": 4.775689235441906e-06, + "loss": 0.5108, + "step": 11175 + }, + { + "epoch": 0.8216438759006028, + "grad_norm": 0.8103352189064026, + "learning_rate": 4.775649343074378e-06, + "loss": 0.5103, + "step": 11176 + }, + { + "epoch": 0.8217173945008087, + "grad_norm": 0.802095353603363, + "learning_rate": 4.775609447326498e-06, + "loss": 0.5585, + "step": 11177 + }, + { + "epoch": 0.8217909131010146, + "grad_norm": 0.8406501412391663, + "learning_rate": 4.775569548198324e-06, + "loss": 0.5221, + "step": 11178 + }, + { + "epoch": 0.8218644317012204, + "grad_norm": 0.8744344115257263, + "learning_rate": 4.775529645689918e-06, + "loss": 0.5521, + "step": 11179 + }, + { + "epoch": 0.8219379503014262, + "grad_norm": 0.8388099074363708, + "learning_rate": 4.775489739801336e-06, + "loss": 0.5523, + "step": 11180 + }, + { + "epoch": 0.8220114689016321, + "grad_norm": 0.8333150148391724, + "learning_rate": 4.775449830532638e-06, + "loss": 0.5261, + "step": 11181 + }, + { + "epoch": 0.822084987501838, + "grad_norm": 0.8317480087280273, + "learning_rate": 4.775409917883885e-06, + "loss": 0.5862, + "step": 11182 + }, + { + "epoch": 0.8221585061020438, + "grad_norm": 0.8568970561027527, + "learning_rate": 4.775370001855134e-06, + "loss": 0.516, + "step": 11183 + }, + { + "epoch": 0.8222320247022497, + "grad_norm": 0.8026912212371826, + "learning_rate": 4.775330082446447e-06, + "loss": 0.5186, + "step": 11184 + }, + { + "epoch": 0.8223055433024555, + "grad_norm": 0.8135226368904114, + "learning_rate": 4.7752901596578806e-06, + "loss": 0.5623, + "step": 11185 + }, + { + "epoch": 0.8223790619026614, + "grad_norm": 0.86476069688797, + "learning_rate": 4.775250233489495e-06, + "loss": 0.5695, + "step": 11186 + }, + { + "epoch": 0.8224525805028672, + "grad_norm": 0.8343169093132019, + "learning_rate": 4.77521030394135e-06, + "loss": 0.5182, + "step": 11187 + }, + { + "epoch": 0.8225260991030731, + "grad_norm": 0.794138491153717, + "learning_rate": 4.775170371013506e-06, + "loss": 0.4882, + "step": 11188 + }, + { + "epoch": 0.8225996177032789, + "grad_norm": 0.8680347800254822, + "learning_rate": 4.775130434706019e-06, + "loss": 0.5305, + "step": 11189 + }, + { + "epoch": 0.8226731363034848, + "grad_norm": 0.7994892597198486, + "learning_rate": 4.775090495018952e-06, + "loss": 0.5432, + "step": 11190 + }, + { + "epoch": 0.8227466549036906, + "grad_norm": 0.8721225261688232, + "learning_rate": 4.775050551952362e-06, + "loss": 0.5513, + "step": 11191 + }, + { + "epoch": 0.8228201735038965, + "grad_norm": 0.8010334968566895, + "learning_rate": 4.775010605506309e-06, + "loss": 0.5169, + "step": 11192 + }, + { + "epoch": 0.8228936921041023, + "grad_norm": 0.7985556125640869, + "learning_rate": 4.774970655680852e-06, + "loss": 0.5321, + "step": 11193 + }, + { + "epoch": 0.8229672107043082, + "grad_norm": 0.8160461783409119, + "learning_rate": 4.774930702476051e-06, + "loss": 0.5296, + "step": 11194 + }, + { + "epoch": 0.823040729304514, + "grad_norm": 0.8279892802238464, + "learning_rate": 4.774890745891964e-06, + "loss": 0.5301, + "step": 11195 + }, + { + "epoch": 0.8231142479047199, + "grad_norm": 0.7958954572677612, + "learning_rate": 4.774850785928653e-06, + "loss": 0.4944, + "step": 11196 + }, + { + "epoch": 0.8231877665049258, + "grad_norm": 0.810646116733551, + "learning_rate": 4.774810822586176e-06, + "loss": 0.5244, + "step": 11197 + }, + { + "epoch": 0.8232612851051316, + "grad_norm": 0.764227569103241, + "learning_rate": 4.7747708558645905e-06, + "loss": 0.4783, + "step": 11198 + }, + { + "epoch": 0.8233348037053374, + "grad_norm": 0.799196720123291, + "learning_rate": 4.774730885763958e-06, + "loss": 0.4862, + "step": 11199 + }, + { + "epoch": 0.8234083223055433, + "grad_norm": 0.7657194137573242, + "learning_rate": 4.774690912284337e-06, + "loss": 0.515, + "step": 11200 + }, + { + "epoch": 0.8234818409057492, + "grad_norm": 0.8163392543792725, + "learning_rate": 4.774650935425788e-06, + "loss": 0.5217, + "step": 11201 + }, + { + "epoch": 0.823555359505955, + "grad_norm": 0.8077760934829712, + "learning_rate": 4.774610955188369e-06, + "loss": 0.5558, + "step": 11202 + }, + { + "epoch": 0.8236288781061608, + "grad_norm": 0.8167486190795898, + "learning_rate": 4.774570971572141e-06, + "loss": 0.5569, + "step": 11203 + }, + { + "epoch": 0.8237023967063667, + "grad_norm": 0.8519325256347656, + "learning_rate": 4.774530984577161e-06, + "loss": 0.507, + "step": 11204 + }, + { + "epoch": 0.8237759153065726, + "grad_norm": 0.8104497790336609, + "learning_rate": 4.77449099420349e-06, + "loss": 0.5035, + "step": 11205 + }, + { + "epoch": 0.8238494339067784, + "grad_norm": 0.7962576150894165, + "learning_rate": 4.774451000451188e-06, + "loss": 0.5101, + "step": 11206 + }, + { + "epoch": 0.8239229525069842, + "grad_norm": 0.8534595370292664, + "learning_rate": 4.774411003320313e-06, + "loss": 0.5504, + "step": 11207 + }, + { + "epoch": 0.8239964711071901, + "grad_norm": 0.8500521779060364, + "learning_rate": 4.774371002810925e-06, + "loss": 0.5296, + "step": 11208 + }, + { + "epoch": 0.824069989707396, + "grad_norm": 0.8393610715866089, + "learning_rate": 4.774330998923083e-06, + "loss": 0.5363, + "step": 11209 + }, + { + "epoch": 0.8241435083076019, + "grad_norm": 0.7904156446456909, + "learning_rate": 4.774290991656848e-06, + "loss": 0.4851, + "step": 11210 + }, + { + "epoch": 0.8242170269078076, + "grad_norm": 0.8077813982963562, + "learning_rate": 4.774250981012277e-06, + "loss": 0.5557, + "step": 11211 + }, + { + "epoch": 0.8242905455080135, + "grad_norm": 0.8330923318862915, + "learning_rate": 4.774210966989432e-06, + "loss": 0.5227, + "step": 11212 + }, + { + "epoch": 0.8243640641082194, + "grad_norm": 0.8257962465286255, + "learning_rate": 4.774170949588371e-06, + "loss": 0.5536, + "step": 11213 + }, + { + "epoch": 0.8244375827084253, + "grad_norm": 0.7893235683441162, + "learning_rate": 4.774130928809153e-06, + "loss": 0.5269, + "step": 11214 + }, + { + "epoch": 0.824511101308631, + "grad_norm": 0.8124738931655884, + "learning_rate": 4.774090904651837e-06, + "loss": 0.4871, + "step": 11215 + }, + { + "epoch": 0.8245846199088369, + "grad_norm": 0.8095811605453491, + "learning_rate": 4.774050877116485e-06, + "loss": 0.5265, + "step": 11216 + }, + { + "epoch": 0.8246581385090428, + "grad_norm": 0.8133836984634399, + "learning_rate": 4.774010846203154e-06, + "loss": 0.5435, + "step": 11217 + }, + { + "epoch": 0.8247316571092487, + "grad_norm": 0.8373463749885559, + "learning_rate": 4.773970811911905e-06, + "loss": 0.5554, + "step": 11218 + }, + { + "epoch": 0.8248051757094544, + "grad_norm": 0.8810011744499207, + "learning_rate": 4.773930774242798e-06, + "loss": 0.5278, + "step": 11219 + }, + { + "epoch": 0.8248786943096603, + "grad_norm": 0.8727593421936035, + "learning_rate": 4.773890733195889e-06, + "loss": 0.5392, + "step": 11220 + }, + { + "epoch": 0.8249522129098662, + "grad_norm": 0.8086453080177307, + "learning_rate": 4.773850688771241e-06, + "loss": 0.5005, + "step": 11221 + }, + { + "epoch": 0.8250257315100721, + "grad_norm": 0.8176688551902771, + "learning_rate": 4.773810640968913e-06, + "loss": 0.5539, + "step": 11222 + }, + { + "epoch": 0.8250992501102778, + "grad_norm": 0.8362888693809509, + "learning_rate": 4.773770589788963e-06, + "loss": 0.5124, + "step": 11223 + }, + { + "epoch": 0.8251727687104837, + "grad_norm": 0.8244991302490234, + "learning_rate": 4.773730535231451e-06, + "loss": 0.5531, + "step": 11224 + }, + { + "epoch": 0.8252462873106896, + "grad_norm": 0.8804242014884949, + "learning_rate": 4.773690477296438e-06, + "loss": 0.5558, + "step": 11225 + }, + { + "epoch": 0.8253198059108955, + "grad_norm": 0.8460858464241028, + "learning_rate": 4.773650415983981e-06, + "loss": 0.5376, + "step": 11226 + }, + { + "epoch": 0.8253933245111013, + "grad_norm": 0.8263475894927979, + "learning_rate": 4.773610351294141e-06, + "loss": 0.5134, + "step": 11227 + }, + { + "epoch": 0.8254668431113071, + "grad_norm": 0.7929509282112122, + "learning_rate": 4.773570283226978e-06, + "loss": 0.5437, + "step": 11228 + }, + { + "epoch": 0.825540361711513, + "grad_norm": 0.866435170173645, + "learning_rate": 4.7735302117825506e-06, + "loss": 0.5614, + "step": 11229 + }, + { + "epoch": 0.8256138803117189, + "grad_norm": 0.8007215857505798, + "learning_rate": 4.773490136960918e-06, + "loss": 0.5308, + "step": 11230 + }, + { + "epoch": 0.8256873989119247, + "grad_norm": 0.8171535730361938, + "learning_rate": 4.773450058762142e-06, + "loss": 0.489, + "step": 11231 + }, + { + "epoch": 0.8257609175121305, + "grad_norm": 0.8500158786773682, + "learning_rate": 4.773409977186279e-06, + "loss": 0.5312, + "step": 11232 + }, + { + "epoch": 0.8258344361123364, + "grad_norm": 0.8505780100822449, + "learning_rate": 4.773369892233389e-06, + "loss": 0.5232, + "step": 11233 + }, + { + "epoch": 0.8259079547125423, + "grad_norm": 0.8114443421363831, + "learning_rate": 4.773329803903534e-06, + "loss": 0.4983, + "step": 11234 + }, + { + "epoch": 0.8259814733127481, + "grad_norm": 0.7956454157829285, + "learning_rate": 4.773289712196771e-06, + "loss": 0.4971, + "step": 11235 + }, + { + "epoch": 0.826054991912954, + "grad_norm": 0.8194020390510559, + "learning_rate": 4.773249617113162e-06, + "loss": 0.541, + "step": 11236 + }, + { + "epoch": 0.8261285105131598, + "grad_norm": 0.8380955457687378, + "learning_rate": 4.7732095186527646e-06, + "loss": 0.534, + "step": 11237 + }, + { + "epoch": 0.8262020291133657, + "grad_norm": 0.8315753936767578, + "learning_rate": 4.773169416815638e-06, + "loss": 0.4776, + "step": 11238 + }, + { + "epoch": 0.8262755477135715, + "grad_norm": 0.8102899789810181, + "learning_rate": 4.773129311601844e-06, + "loss": 0.528, + "step": 11239 + }, + { + "epoch": 0.8263490663137774, + "grad_norm": 0.8561495542526245, + "learning_rate": 4.773089203011441e-06, + "loss": 0.5169, + "step": 11240 + }, + { + "epoch": 0.8264225849139832, + "grad_norm": 0.8125258088111877, + "learning_rate": 4.773049091044487e-06, + "loss": 0.5572, + "step": 11241 + }, + { + "epoch": 0.8264961035141891, + "grad_norm": 0.8537551760673523, + "learning_rate": 4.773008975701044e-06, + "loss": 0.5495, + "step": 11242 + }, + { + "epoch": 0.826569622114395, + "grad_norm": 0.8491482138633728, + "learning_rate": 4.772968856981171e-06, + "loss": 0.5756, + "step": 11243 + }, + { + "epoch": 0.8266431407146008, + "grad_norm": 0.8384000062942505, + "learning_rate": 4.7729287348849266e-06, + "loss": 0.5736, + "step": 11244 + }, + { + "epoch": 0.8267166593148066, + "grad_norm": 0.8536553382873535, + "learning_rate": 4.772888609412371e-06, + "loss": 0.5652, + "step": 11245 + }, + { + "epoch": 0.8267901779150125, + "grad_norm": 0.8068138360977173, + "learning_rate": 4.772848480563564e-06, + "loss": 0.4927, + "step": 11246 + }, + { + "epoch": 0.8268636965152184, + "grad_norm": 0.8129410147666931, + "learning_rate": 4.7728083483385655e-06, + "loss": 0.5358, + "step": 11247 + }, + { + "epoch": 0.8269372151154242, + "grad_norm": 0.7659311890602112, + "learning_rate": 4.772768212737434e-06, + "loss": 0.5075, + "step": 11248 + }, + { + "epoch": 0.82701073371563, + "grad_norm": 0.8414600491523743, + "learning_rate": 4.77272807376023e-06, + "loss": 0.5043, + "step": 11249 + }, + { + "epoch": 0.8270842523158359, + "grad_norm": 0.8288606405258179, + "learning_rate": 4.772687931407013e-06, + "loss": 0.5513, + "step": 11250 + }, + { + "epoch": 0.8271577709160418, + "grad_norm": 0.8238076567649841, + "learning_rate": 4.772647785677842e-06, + "loss": 0.5011, + "step": 11251 + }, + { + "epoch": 0.8272312895162476, + "grad_norm": 0.8435410857200623, + "learning_rate": 4.772607636572778e-06, + "loss": 0.5316, + "step": 11252 + }, + { + "epoch": 0.8273048081164535, + "grad_norm": 0.8286506533622742, + "learning_rate": 4.772567484091879e-06, + "loss": 0.4733, + "step": 11253 + }, + { + "epoch": 0.8273783267166593, + "grad_norm": 0.7839934229850769, + "learning_rate": 4.772527328235206e-06, + "loss": 0.4832, + "step": 11254 + }, + { + "epoch": 0.8274518453168652, + "grad_norm": 0.8276464939117432, + "learning_rate": 4.772487169002819e-06, + "loss": 0.5241, + "step": 11255 + }, + { + "epoch": 0.827525363917071, + "grad_norm": 0.8389026522636414, + "learning_rate": 4.772447006394775e-06, + "loss": 0.5516, + "step": 11256 + }, + { + "epoch": 0.8275988825172769, + "grad_norm": 0.8300449848175049, + "learning_rate": 4.772406840411136e-06, + "loss": 0.5289, + "step": 11257 + }, + { + "epoch": 0.8276724011174827, + "grad_norm": 0.8123378157615662, + "learning_rate": 4.772366671051961e-06, + "loss": 0.488, + "step": 11258 + }, + { + "epoch": 0.8277459197176886, + "grad_norm": 0.8558441400527954, + "learning_rate": 4.77232649831731e-06, + "loss": 0.5278, + "step": 11259 + }, + { + "epoch": 0.8278194383178944, + "grad_norm": 0.8193264007568359, + "learning_rate": 4.772286322207242e-06, + "loss": 0.542, + "step": 11260 + }, + { + "epoch": 0.8278929569181003, + "grad_norm": 0.8102686405181885, + "learning_rate": 4.772246142721818e-06, + "loss": 0.5625, + "step": 11261 + }, + { + "epoch": 0.8279664755183062, + "grad_norm": 0.8157998919487, + "learning_rate": 4.772205959861096e-06, + "loss": 0.5145, + "step": 11262 + }, + { + "epoch": 0.828039994118512, + "grad_norm": 0.8231305480003357, + "learning_rate": 4.772165773625137e-06, + "loss": 0.5091, + "step": 11263 + }, + { + "epoch": 0.8281135127187178, + "grad_norm": 0.8120936751365662, + "learning_rate": 4.772125584014e-06, + "loss": 0.5194, + "step": 11264 + }, + { + "epoch": 0.8281870313189237, + "grad_norm": 0.8087732791900635, + "learning_rate": 4.772085391027744e-06, + "loss": 0.4917, + "step": 11265 + }, + { + "epoch": 0.8282605499191296, + "grad_norm": 0.8348216414451599, + "learning_rate": 4.772045194666431e-06, + "loss": 0.5885, + "step": 11266 + }, + { + "epoch": 0.8283340685193354, + "grad_norm": 0.791107177734375, + "learning_rate": 4.772004994930119e-06, + "loss": 0.5106, + "step": 11267 + }, + { + "epoch": 0.8284075871195412, + "grad_norm": 0.8480244278907776, + "learning_rate": 4.771964791818867e-06, + "loss": 0.532, + "step": 11268 + }, + { + "epoch": 0.8284811057197471, + "grad_norm": 0.8221269249916077, + "learning_rate": 4.7719245853327354e-06, + "loss": 0.5266, + "step": 11269 + }, + { + "epoch": 0.828554624319953, + "grad_norm": 0.8506975173950195, + "learning_rate": 4.771884375471786e-06, + "loss": 0.5725, + "step": 11270 + }, + { + "epoch": 0.8286281429201588, + "grad_norm": 0.7920926809310913, + "learning_rate": 4.771844162236075e-06, + "loss": 0.4969, + "step": 11271 + }, + { + "epoch": 0.8287016615203646, + "grad_norm": 0.857045590877533, + "learning_rate": 4.771803945625665e-06, + "loss": 0.5531, + "step": 11272 + }, + { + "epoch": 0.8287751801205705, + "grad_norm": 0.8691778182983398, + "learning_rate": 4.771763725640615e-06, + "loss": 0.5343, + "step": 11273 + }, + { + "epoch": 0.8288486987207764, + "grad_norm": 0.8289052844047546, + "learning_rate": 4.771723502280983e-06, + "loss": 0.5463, + "step": 11274 + }, + { + "epoch": 0.8289222173209823, + "grad_norm": 0.8161314129829407, + "learning_rate": 4.771683275546831e-06, + "loss": 0.4952, + "step": 11275 + }, + { + "epoch": 0.828995735921188, + "grad_norm": 0.8111347556114197, + "learning_rate": 4.771643045438218e-06, + "loss": 0.5263, + "step": 11276 + }, + { + "epoch": 0.8290692545213939, + "grad_norm": 0.8407371044158936, + "learning_rate": 4.771602811955204e-06, + "loss": 0.5158, + "step": 11277 + }, + { + "epoch": 0.8291427731215998, + "grad_norm": 0.8448430895805359, + "learning_rate": 4.771562575097846e-06, + "loss": 0.5739, + "step": 11278 + }, + { + "epoch": 0.8292162917218057, + "grad_norm": 0.8333767652511597, + "learning_rate": 4.77152233486621e-06, + "loss": 0.5615, + "step": 11279 + }, + { + "epoch": 0.8292898103220114, + "grad_norm": 0.8185545206069946, + "learning_rate": 4.771482091260349e-06, + "loss": 0.5072, + "step": 11280 + }, + { + "epoch": 0.8293633289222173, + "grad_norm": 0.8235476016998291, + "learning_rate": 4.771441844280327e-06, + "loss": 0.5124, + "step": 11281 + }, + { + "epoch": 0.8294368475224232, + "grad_norm": 0.7995654940605164, + "learning_rate": 4.771401593926202e-06, + "loss": 0.5195, + "step": 11282 + }, + { + "epoch": 0.8295103661226291, + "grad_norm": 0.831336498260498, + "learning_rate": 4.771361340198034e-06, + "loss": 0.535, + "step": 11283 + }, + { + "epoch": 0.8295838847228348, + "grad_norm": 0.8021090626716614, + "learning_rate": 4.771321083095885e-06, + "loss": 0.5325, + "step": 11284 + }, + { + "epoch": 0.8296574033230407, + "grad_norm": 0.8264485597610474, + "learning_rate": 4.771280822619811e-06, + "loss": 0.5257, + "step": 11285 + }, + { + "epoch": 0.8297309219232466, + "grad_norm": 0.7990605235099792, + "learning_rate": 4.771240558769874e-06, + "loss": 0.4862, + "step": 11286 + }, + { + "epoch": 0.8298044405234525, + "grad_norm": 0.850988507270813, + "learning_rate": 4.771200291546134e-06, + "loss": 0.5577, + "step": 11287 + }, + { + "epoch": 0.8298779591236582, + "grad_norm": 0.8538208603858948, + "learning_rate": 4.771160020948649e-06, + "loss": 0.5805, + "step": 11288 + }, + { + "epoch": 0.8299514777238641, + "grad_norm": 0.8073508739471436, + "learning_rate": 4.771119746977482e-06, + "loss": 0.5317, + "step": 11289 + }, + { + "epoch": 0.83002499632407, + "grad_norm": 0.9100559949874878, + "learning_rate": 4.77107946963269e-06, + "loss": 0.5269, + "step": 11290 + }, + { + "epoch": 0.8300985149242759, + "grad_norm": 0.8290418982505798, + "learning_rate": 4.771039188914335e-06, + "loss": 0.5024, + "step": 11291 + }, + { + "epoch": 0.8301720335244817, + "grad_norm": 0.87714684009552, + "learning_rate": 4.770998904822475e-06, + "loss": 0.5579, + "step": 11292 + }, + { + "epoch": 0.8302455521246875, + "grad_norm": 0.8566208481788635, + "learning_rate": 4.77095861735717e-06, + "loss": 0.5221, + "step": 11293 + }, + { + "epoch": 0.8303190707248934, + "grad_norm": 0.8430255651473999, + "learning_rate": 4.77091832651848e-06, + "loss": 0.5082, + "step": 11294 + }, + { + "epoch": 0.8303925893250993, + "grad_norm": 0.8463939428329468, + "learning_rate": 4.770878032306466e-06, + "loss": 0.5549, + "step": 11295 + }, + { + "epoch": 0.8304661079253051, + "grad_norm": 0.8798572421073914, + "learning_rate": 4.770837734721187e-06, + "loss": 0.5519, + "step": 11296 + }, + { + "epoch": 0.8305396265255109, + "grad_norm": 0.8260458111763, + "learning_rate": 4.770797433762702e-06, + "loss": 0.4974, + "step": 11297 + }, + { + "epoch": 0.8306131451257168, + "grad_norm": 0.858985960483551, + "learning_rate": 4.770757129431073e-06, + "loss": 0.5574, + "step": 11298 + }, + { + "epoch": 0.8306866637259227, + "grad_norm": 0.818972647190094, + "learning_rate": 4.770716821726358e-06, + "loss": 0.5779, + "step": 11299 + }, + { + "epoch": 0.8307601823261285, + "grad_norm": 0.8230063319206238, + "learning_rate": 4.770676510648617e-06, + "loss": 0.5348, + "step": 11300 + }, + { + "epoch": 0.8308337009263344, + "grad_norm": 0.7931026816368103, + "learning_rate": 4.770636196197911e-06, + "loss": 0.5322, + "step": 11301 + }, + { + "epoch": 0.8309072195265402, + "grad_norm": 0.8191856741905212, + "learning_rate": 4.770595878374299e-06, + "loss": 0.515, + "step": 11302 + }, + { + "epoch": 0.8309807381267461, + "grad_norm": 0.8043980002403259, + "learning_rate": 4.7705555571778415e-06, + "loss": 0.5313, + "step": 11303 + }, + { + "epoch": 0.8310542567269519, + "grad_norm": 0.8341671228408813, + "learning_rate": 4.770515232608598e-06, + "loss": 0.5707, + "step": 11304 + }, + { + "epoch": 0.8311277753271578, + "grad_norm": 0.8496114015579224, + "learning_rate": 4.770474904666628e-06, + "loss": 0.5504, + "step": 11305 + }, + { + "epoch": 0.8312012939273636, + "grad_norm": 0.8118084669113159, + "learning_rate": 4.770434573351992e-06, + "loss": 0.5404, + "step": 11306 + }, + { + "epoch": 0.8312748125275695, + "grad_norm": 0.8298864364624023, + "learning_rate": 4.77039423866475e-06, + "loss": 0.5479, + "step": 11307 + }, + { + "epoch": 0.8313483311277753, + "grad_norm": 0.7995131611824036, + "learning_rate": 4.770353900604961e-06, + "loss": 0.4946, + "step": 11308 + }, + { + "epoch": 0.8314218497279812, + "grad_norm": 0.8680016994476318, + "learning_rate": 4.770313559172687e-06, + "loss": 0.5608, + "step": 11309 + }, + { + "epoch": 0.831495368328187, + "grad_norm": 0.8388053178787231, + "learning_rate": 4.770273214367986e-06, + "loss": 0.5846, + "step": 11310 + }, + { + "epoch": 0.8315688869283929, + "grad_norm": 0.8840752840042114, + "learning_rate": 4.770232866190918e-06, + "loss": 0.592, + "step": 11311 + }, + { + "epoch": 0.8316424055285987, + "grad_norm": 0.886031448841095, + "learning_rate": 4.770192514641544e-06, + "loss": 0.5799, + "step": 11312 + }, + { + "epoch": 0.8317159241288046, + "grad_norm": 0.8419873118400574, + "learning_rate": 4.770152159719923e-06, + "loss": 0.5397, + "step": 11313 + }, + { + "epoch": 0.8317894427290105, + "grad_norm": 0.8427174091339111, + "learning_rate": 4.7701118014261145e-06, + "loss": 0.5253, + "step": 11314 + }, + { + "epoch": 0.8318629613292163, + "grad_norm": 0.9774391651153564, + "learning_rate": 4.77007143976018e-06, + "loss": 0.6021, + "step": 11315 + }, + { + "epoch": 0.8319364799294221, + "grad_norm": 0.8636453747749329, + "learning_rate": 4.770031074722179e-06, + "loss": 0.5516, + "step": 11316 + }, + { + "epoch": 0.832009998529628, + "grad_norm": 0.7994987964630127, + "learning_rate": 4.769990706312171e-06, + "loss": 0.4834, + "step": 11317 + }, + { + "epoch": 0.8320835171298339, + "grad_norm": 0.8994420170783997, + "learning_rate": 4.769950334530215e-06, + "loss": 0.5143, + "step": 11318 + }, + { + "epoch": 0.8321570357300397, + "grad_norm": 0.7845983505249023, + "learning_rate": 4.769909959376374e-06, + "loss": 0.493, + "step": 11319 + }, + { + "epoch": 0.8322305543302455, + "grad_norm": 0.9085301756858826, + "learning_rate": 4.769869580850705e-06, + "loss": 0.5276, + "step": 11320 + }, + { + "epoch": 0.8323040729304514, + "grad_norm": 0.7906249165534973, + "learning_rate": 4.7698291989532685e-06, + "loss": 0.5086, + "step": 11321 + }, + { + "epoch": 0.8323775915306573, + "grad_norm": 0.8339211940765381, + "learning_rate": 4.769788813684126e-06, + "loss": 0.5381, + "step": 11322 + }, + { + "epoch": 0.8324511101308631, + "grad_norm": 0.8128288984298706, + "learning_rate": 4.769748425043336e-06, + "loss": 0.5282, + "step": 11323 + }, + { + "epoch": 0.8325246287310689, + "grad_norm": 0.8239064812660217, + "learning_rate": 4.769708033030958e-06, + "loss": 0.5298, + "step": 11324 + }, + { + "epoch": 0.8325981473312748, + "grad_norm": 0.8548926115036011, + "learning_rate": 4.769667637647055e-06, + "loss": 0.5566, + "step": 11325 + }, + { + "epoch": 0.8326716659314807, + "grad_norm": 0.8513265252113342, + "learning_rate": 4.769627238891684e-06, + "loss": 0.5248, + "step": 11326 + }, + { + "epoch": 0.8327451845316866, + "grad_norm": 0.8364790678024292, + "learning_rate": 4.7695868367649065e-06, + "loss": 0.578, + "step": 11327 + }, + { + "epoch": 0.8328187031318923, + "grad_norm": 0.8149252533912659, + "learning_rate": 4.769546431266782e-06, + "loss": 0.5268, + "step": 11328 + }, + { + "epoch": 0.8328922217320982, + "grad_norm": 0.8476575016975403, + "learning_rate": 4.769506022397369e-06, + "loss": 0.5722, + "step": 11329 + }, + { + "epoch": 0.8329657403323041, + "grad_norm": 0.8470839858055115, + "learning_rate": 4.76946561015673e-06, + "loss": 0.5079, + "step": 11330 + }, + { + "epoch": 0.83303925893251, + "grad_norm": 0.8002585768699646, + "learning_rate": 4.769425194544925e-06, + "loss": 0.5133, + "step": 11331 + }, + { + "epoch": 0.8331127775327157, + "grad_norm": 0.8485570549964905, + "learning_rate": 4.769384775562012e-06, + "loss": 0.5189, + "step": 11332 + }, + { + "epoch": 0.8331862961329216, + "grad_norm": 0.8323488235473633, + "learning_rate": 4.769344353208053e-06, + "loss": 0.5535, + "step": 11333 + }, + { + "epoch": 0.8332598147331275, + "grad_norm": 0.8153433799743652, + "learning_rate": 4.769303927483107e-06, + "loss": 0.5542, + "step": 11334 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.797680675983429, + "learning_rate": 4.769263498387234e-06, + "loss": 0.5043, + "step": 11335 + }, + { + "epoch": 0.8334068519335391, + "grad_norm": 0.8876558542251587, + "learning_rate": 4.769223065920495e-06, + "loss": 0.5498, + "step": 11336 + }, + { + "epoch": 0.833480370533745, + "grad_norm": 0.8686621785163879, + "learning_rate": 4.769182630082949e-06, + "loss": 0.5135, + "step": 11337 + }, + { + "epoch": 0.8335538891339509, + "grad_norm": 0.839314341545105, + "learning_rate": 4.769142190874656e-06, + "loss": 0.5665, + "step": 11338 + }, + { + "epoch": 0.8336274077341568, + "grad_norm": 0.8380415439605713, + "learning_rate": 4.769101748295677e-06, + "loss": 0.5452, + "step": 11339 + }, + { + "epoch": 0.8337009263343625, + "grad_norm": 0.8365569710731506, + "learning_rate": 4.769061302346072e-06, + "loss": 0.5737, + "step": 11340 + }, + { + "epoch": 0.8337744449345684, + "grad_norm": 0.8491060137748718, + "learning_rate": 4.7690208530258995e-06, + "loss": 0.5385, + "step": 11341 + }, + { + "epoch": 0.8338479635347743, + "grad_norm": 0.8156087398529053, + "learning_rate": 4.768980400335222e-06, + "loss": 0.5419, + "step": 11342 + }, + { + "epoch": 0.8339214821349802, + "grad_norm": 0.8368483781814575, + "learning_rate": 4.768939944274098e-06, + "loss": 0.5163, + "step": 11343 + }, + { + "epoch": 0.833995000735186, + "grad_norm": 0.8170251846313477, + "learning_rate": 4.768899484842587e-06, + "loss": 0.4851, + "step": 11344 + }, + { + "epoch": 0.8340685193353918, + "grad_norm": 0.79521244764328, + "learning_rate": 4.768859022040752e-06, + "loss": 0.5173, + "step": 11345 + }, + { + "epoch": 0.8341420379355977, + "grad_norm": 0.800362765789032, + "learning_rate": 4.76881855586865e-06, + "loss": 0.4986, + "step": 11346 + }, + { + "epoch": 0.8342155565358036, + "grad_norm": 0.8060577511787415, + "learning_rate": 4.768778086326342e-06, + "loss": 0.5118, + "step": 11347 + }, + { + "epoch": 0.8342890751360094, + "grad_norm": 0.7824937105178833, + "learning_rate": 4.768737613413889e-06, + "loss": 0.4641, + "step": 11348 + }, + { + "epoch": 0.8343625937362152, + "grad_norm": 0.8133432865142822, + "learning_rate": 4.768697137131349e-06, + "loss": 0.4772, + "step": 11349 + }, + { + "epoch": 0.8344361123364211, + "grad_norm": 0.8341020345687866, + "learning_rate": 4.768656657478785e-06, + "loss": 0.5739, + "step": 11350 + }, + { + "epoch": 0.834509630936627, + "grad_norm": 0.8275470733642578, + "learning_rate": 4.768616174456257e-06, + "loss": 0.5337, + "step": 11351 + }, + { + "epoch": 0.8345831495368328, + "grad_norm": 0.826103150844574, + "learning_rate": 4.768575688063821e-06, + "loss": 0.5315, + "step": 11352 + }, + { + "epoch": 0.8346566681370386, + "grad_norm": 0.8664436936378479, + "learning_rate": 4.768535198301542e-06, + "loss": 0.5258, + "step": 11353 + }, + { + "epoch": 0.8347301867372445, + "grad_norm": 0.7982541918754578, + "learning_rate": 4.768494705169478e-06, + "loss": 0.5007, + "step": 11354 + }, + { + "epoch": 0.8348037053374504, + "grad_norm": 0.8464998602867126, + "learning_rate": 4.768454208667689e-06, + "loss": 0.5587, + "step": 11355 + }, + { + "epoch": 0.8348772239376562, + "grad_norm": 0.8297791481018066, + "learning_rate": 4.768413708796236e-06, + "loss": 0.5747, + "step": 11356 + }, + { + "epoch": 0.8349507425378621, + "grad_norm": 0.7814157009124756, + "learning_rate": 4.768373205555178e-06, + "loss": 0.5289, + "step": 11357 + }, + { + "epoch": 0.8350242611380679, + "grad_norm": 0.8287478685379028, + "learning_rate": 4.768332698944575e-06, + "loss": 0.5416, + "step": 11358 + }, + { + "epoch": 0.8350977797382738, + "grad_norm": 0.8119676113128662, + "learning_rate": 4.76829218896449e-06, + "loss": 0.5278, + "step": 11359 + }, + { + "epoch": 0.8351712983384796, + "grad_norm": 0.8418540358543396, + "learning_rate": 4.76825167561498e-06, + "loss": 0.5027, + "step": 11360 + }, + { + "epoch": 0.8352448169386855, + "grad_norm": 0.8555669784545898, + "learning_rate": 4.768211158896107e-06, + "loss": 0.5845, + "step": 11361 + }, + { + "epoch": 0.8353183355388913, + "grad_norm": 0.8307886719703674, + "learning_rate": 4.76817063880793e-06, + "loss": 0.5322, + "step": 11362 + }, + { + "epoch": 0.8353918541390972, + "grad_norm": 0.8129547834396362, + "learning_rate": 4.768130115350509e-06, + "loss": 0.5466, + "step": 11363 + }, + { + "epoch": 0.835465372739303, + "grad_norm": 0.8349826335906982, + "learning_rate": 4.768089588523905e-06, + "loss": 0.5171, + "step": 11364 + }, + { + "epoch": 0.8355388913395089, + "grad_norm": 0.8321928381919861, + "learning_rate": 4.768049058328179e-06, + "loss": 0.573, + "step": 11365 + }, + { + "epoch": 0.8356124099397148, + "grad_norm": 0.7909669876098633, + "learning_rate": 4.768008524763391e-06, + "loss": 0.5223, + "step": 11366 + }, + { + "epoch": 0.8356859285399206, + "grad_norm": 0.8351568579673767, + "learning_rate": 4.767967987829599e-06, + "loss": 0.5449, + "step": 11367 + }, + { + "epoch": 0.8357594471401264, + "grad_norm": 0.8228239417076111, + "learning_rate": 4.767927447526865e-06, + "loss": 0.5557, + "step": 11368 + }, + { + "epoch": 0.8358329657403323, + "grad_norm": 0.8450327515602112, + "learning_rate": 4.76788690385525e-06, + "loss": 0.5276, + "step": 11369 + }, + { + "epoch": 0.8359064843405382, + "grad_norm": 0.825019896030426, + "learning_rate": 4.767846356814812e-06, + "loss": 0.5198, + "step": 11370 + }, + { + "epoch": 0.835980002940744, + "grad_norm": 0.8747271299362183, + "learning_rate": 4.767805806405613e-06, + "loss": 0.5801, + "step": 11371 + }, + { + "epoch": 0.8360535215409498, + "grad_norm": 0.8427603244781494, + "learning_rate": 4.767765252627713e-06, + "loss": 0.5477, + "step": 11372 + }, + { + "epoch": 0.8361270401411557, + "grad_norm": 0.8149084448814392, + "learning_rate": 4.767724695481171e-06, + "loss": 0.5043, + "step": 11373 + }, + { + "epoch": 0.8362005587413616, + "grad_norm": 0.8258174657821655, + "learning_rate": 4.767684134966048e-06, + "loss": 0.52, + "step": 11374 + }, + { + "epoch": 0.8362740773415674, + "grad_norm": 0.814288854598999, + "learning_rate": 4.767643571082406e-06, + "loss": 0.533, + "step": 11375 + }, + { + "epoch": 0.8363475959417732, + "grad_norm": 0.7949753403663635, + "learning_rate": 4.767603003830303e-06, + "loss": 0.4918, + "step": 11376 + }, + { + "epoch": 0.8364211145419791, + "grad_norm": 0.8193694949150085, + "learning_rate": 4.767562433209799e-06, + "loss": 0.5258, + "step": 11377 + }, + { + "epoch": 0.836494633142185, + "grad_norm": 0.8549246788024902, + "learning_rate": 4.767521859220956e-06, + "loss": 0.5267, + "step": 11378 + }, + { + "epoch": 0.8365681517423909, + "grad_norm": 0.8281671404838562, + "learning_rate": 4.767481281863834e-06, + "loss": 0.5104, + "step": 11379 + }, + { + "epoch": 0.8366416703425966, + "grad_norm": 0.7720338702201843, + "learning_rate": 4.767440701138491e-06, + "loss": 0.479, + "step": 11380 + }, + { + "epoch": 0.8367151889428025, + "grad_norm": 0.7497683167457581, + "learning_rate": 4.76740011704499e-06, + "loss": 0.4831, + "step": 11381 + }, + { + "epoch": 0.8367887075430084, + "grad_norm": 0.8389952778816223, + "learning_rate": 4.76735952958339e-06, + "loss": 0.5598, + "step": 11382 + }, + { + "epoch": 0.8368622261432143, + "grad_norm": 0.7765005230903625, + "learning_rate": 4.7673189387537526e-06, + "loss": 0.4801, + "step": 11383 + }, + { + "epoch": 0.8369357447434201, + "grad_norm": 0.8231204152107239, + "learning_rate": 4.767278344556136e-06, + "loss": 0.5226, + "step": 11384 + }, + { + "epoch": 0.8370092633436259, + "grad_norm": 0.8303301334381104, + "learning_rate": 4.767237746990602e-06, + "loss": 0.5293, + "step": 11385 + }, + { + "epoch": 0.8370827819438318, + "grad_norm": 0.8860382437705994, + "learning_rate": 4.76719714605721e-06, + "loss": 0.5743, + "step": 11386 + }, + { + "epoch": 0.8371563005440377, + "grad_norm": 0.870574414730072, + "learning_rate": 4.767156541756021e-06, + "loss": 0.5178, + "step": 11387 + }, + { + "epoch": 0.8372298191442435, + "grad_norm": 0.8244544863700867, + "learning_rate": 4.767115934087095e-06, + "loss": 0.5228, + "step": 11388 + }, + { + "epoch": 0.8373033377444493, + "grad_norm": 0.8134381771087646, + "learning_rate": 4.7670753230504925e-06, + "loss": 0.4971, + "step": 11389 + }, + { + "epoch": 0.8373768563446552, + "grad_norm": 0.8107829093933105, + "learning_rate": 4.767034708646275e-06, + "loss": 0.5284, + "step": 11390 + }, + { + "epoch": 0.8374503749448611, + "grad_norm": 0.8248894810676575, + "learning_rate": 4.7669940908744995e-06, + "loss": 0.5805, + "step": 11391 + }, + { + "epoch": 0.837523893545067, + "grad_norm": 0.8406099677085876, + "learning_rate": 4.76695346973523e-06, + "loss": 0.5271, + "step": 11392 + }, + { + "epoch": 0.8375974121452727, + "grad_norm": 0.8350443243980408, + "learning_rate": 4.766912845228524e-06, + "loss": 0.5022, + "step": 11393 + }, + { + "epoch": 0.8376709307454786, + "grad_norm": 0.8368911147117615, + "learning_rate": 4.766872217354444e-06, + "loss": 0.4954, + "step": 11394 + }, + { + "epoch": 0.8377444493456845, + "grad_norm": 0.8394556641578674, + "learning_rate": 4.766831586113049e-06, + "loss": 0.5814, + "step": 11395 + }, + { + "epoch": 0.8378179679458904, + "grad_norm": 0.8429223895072937, + "learning_rate": 4.766790951504399e-06, + "loss": 0.5133, + "step": 11396 + }, + { + "epoch": 0.8378914865460961, + "grad_norm": 0.8177450299263, + "learning_rate": 4.766750313528557e-06, + "loss": 0.5379, + "step": 11397 + }, + { + "epoch": 0.837965005146302, + "grad_norm": 0.8235857486724854, + "learning_rate": 4.766709672185581e-06, + "loss": 0.5246, + "step": 11398 + }, + { + "epoch": 0.8380385237465079, + "grad_norm": 0.8331131339073181, + "learning_rate": 4.766669027475531e-06, + "loss": 0.5179, + "step": 11399 + }, + { + "epoch": 0.8381120423467138, + "grad_norm": 0.8388303518295288, + "learning_rate": 4.766628379398469e-06, + "loss": 0.5162, + "step": 11400 + }, + { + "epoch": 0.8381855609469195, + "grad_norm": 0.827940046787262, + "learning_rate": 4.766587727954455e-06, + "loss": 0.5651, + "step": 11401 + }, + { + "epoch": 0.8382590795471254, + "grad_norm": 0.8117587566375732, + "learning_rate": 4.766547073143548e-06, + "loss": 0.5407, + "step": 11402 + }, + { + "epoch": 0.8383325981473313, + "grad_norm": 0.8094414472579956, + "learning_rate": 4.7665064149658105e-06, + "loss": 0.4946, + "step": 11403 + }, + { + "epoch": 0.8384061167475372, + "grad_norm": 0.8498899936676025, + "learning_rate": 4.766465753421301e-06, + "loss": 0.554, + "step": 11404 + }, + { + "epoch": 0.838479635347743, + "grad_norm": 0.8235398530960083, + "learning_rate": 4.766425088510082e-06, + "loss": 0.5634, + "step": 11405 + }, + { + "epoch": 0.8385531539479488, + "grad_norm": 0.8208286166191101, + "learning_rate": 4.76638442023221e-06, + "loss": 0.5297, + "step": 11406 + }, + { + "epoch": 0.8386266725481547, + "grad_norm": 0.8359679579734802, + "learning_rate": 4.7663437485877505e-06, + "loss": 0.5669, + "step": 11407 + }, + { + "epoch": 0.8387001911483606, + "grad_norm": 0.8285340070724487, + "learning_rate": 4.766303073576761e-06, + "loss": 0.5416, + "step": 11408 + }, + { + "epoch": 0.8387737097485664, + "grad_norm": 0.8249173164367676, + "learning_rate": 4.766262395199302e-06, + "loss": 0.5646, + "step": 11409 + }, + { + "epoch": 0.8388472283487722, + "grad_norm": 0.8337162733078003, + "learning_rate": 4.766221713455435e-06, + "loss": 0.5431, + "step": 11410 + }, + { + "epoch": 0.8389207469489781, + "grad_norm": 0.8110598921775818, + "learning_rate": 4.766181028345218e-06, + "loss": 0.5565, + "step": 11411 + }, + { + "epoch": 0.838994265549184, + "grad_norm": 0.8164321184158325, + "learning_rate": 4.766140339868714e-06, + "loss": 0.5661, + "step": 11412 + }, + { + "epoch": 0.8390677841493898, + "grad_norm": 0.8485509753227234, + "learning_rate": 4.766099648025984e-06, + "loss": 0.5569, + "step": 11413 + }, + { + "epoch": 0.8391413027495956, + "grad_norm": 0.8684659004211426, + "learning_rate": 4.766058952817086e-06, + "loss": 0.5494, + "step": 11414 + }, + { + "epoch": 0.8392148213498015, + "grad_norm": 0.8376753926277161, + "learning_rate": 4.766018254242081e-06, + "loss": 0.5607, + "step": 11415 + }, + { + "epoch": 0.8392883399500074, + "grad_norm": 0.7991100549697876, + "learning_rate": 4.765977552301031e-06, + "loss": 0.5122, + "step": 11416 + }, + { + "epoch": 0.8393618585502132, + "grad_norm": 0.8293222188949585, + "learning_rate": 4.7659368469939944e-06, + "loss": 0.5132, + "step": 11417 + }, + { + "epoch": 0.839435377150419, + "grad_norm": 0.860339343547821, + "learning_rate": 4.765896138321034e-06, + "loss": 0.5778, + "step": 11418 + }, + { + "epoch": 0.8395088957506249, + "grad_norm": 0.8168594241142273, + "learning_rate": 4.765855426282208e-06, + "loss": 0.529, + "step": 11419 + }, + { + "epoch": 0.8395824143508308, + "grad_norm": 0.8644717335700989, + "learning_rate": 4.765814710877577e-06, + "loss": 0.5813, + "step": 11420 + }, + { + "epoch": 0.8396559329510366, + "grad_norm": 0.9666632413864136, + "learning_rate": 4.765773992107204e-06, + "loss": 0.5663, + "step": 11421 + }, + { + "epoch": 0.8397294515512425, + "grad_norm": 0.8250070810317993, + "learning_rate": 4.765733269971146e-06, + "loss": 0.5296, + "step": 11422 + }, + { + "epoch": 0.8398029701514483, + "grad_norm": 0.8697986006736755, + "learning_rate": 4.765692544469467e-06, + "loss": 0.5365, + "step": 11423 + }, + { + "epoch": 0.8398764887516542, + "grad_norm": 0.8151691555976868, + "learning_rate": 4.765651815602225e-06, + "loss": 0.5034, + "step": 11424 + }, + { + "epoch": 0.83995000735186, + "grad_norm": 0.8717764019966125, + "learning_rate": 4.765611083369483e-06, + "loss": 0.5277, + "step": 11425 + }, + { + "epoch": 0.8400235259520659, + "grad_norm": 0.8895345330238342, + "learning_rate": 4.765570347771298e-06, + "loss": 0.5705, + "step": 11426 + }, + { + "epoch": 0.8400970445522717, + "grad_norm": 0.8398568630218506, + "learning_rate": 4.765529608807732e-06, + "loss": 0.5363, + "step": 11427 + }, + { + "epoch": 0.8401705631524776, + "grad_norm": 0.7690786123275757, + "learning_rate": 4.765488866478847e-06, + "loss": 0.4814, + "step": 11428 + }, + { + "epoch": 0.8402440817526834, + "grad_norm": 0.84194016456604, + "learning_rate": 4.765448120784702e-06, + "loss": 0.5354, + "step": 11429 + }, + { + "epoch": 0.8403176003528893, + "grad_norm": 0.837780237197876, + "learning_rate": 4.7654073717253566e-06, + "loss": 0.51, + "step": 11430 + }, + { + "epoch": 0.8403911189530952, + "grad_norm": 0.8470392227172852, + "learning_rate": 4.765366619300874e-06, + "loss": 0.5354, + "step": 11431 + }, + { + "epoch": 0.840464637553301, + "grad_norm": 0.8550173044204712, + "learning_rate": 4.765325863511313e-06, + "loss": 0.5603, + "step": 11432 + }, + { + "epoch": 0.8405381561535068, + "grad_norm": 0.8272726535797119, + "learning_rate": 4.765285104356735e-06, + "loss": 0.5496, + "step": 11433 + }, + { + "epoch": 0.8406116747537127, + "grad_norm": 0.8669889569282532, + "learning_rate": 4.765244341837199e-06, + "loss": 0.5512, + "step": 11434 + }, + { + "epoch": 0.8406851933539186, + "grad_norm": 0.8608967065811157, + "learning_rate": 4.765203575952767e-06, + "loss": 0.5317, + "step": 11435 + }, + { + "epoch": 0.8407587119541244, + "grad_norm": 0.8330521583557129, + "learning_rate": 4.7651628067034995e-06, + "loss": 0.5467, + "step": 11436 + }, + { + "epoch": 0.8408322305543302, + "grad_norm": 0.8419882655143738, + "learning_rate": 4.7651220340894564e-06, + "loss": 0.5324, + "step": 11437 + }, + { + "epoch": 0.8409057491545361, + "grad_norm": 0.8488715887069702, + "learning_rate": 4.765081258110698e-06, + "loss": 0.5111, + "step": 11438 + }, + { + "epoch": 0.840979267754742, + "grad_norm": 0.7942224740982056, + "learning_rate": 4.765040478767286e-06, + "loss": 0.5549, + "step": 11439 + }, + { + "epoch": 0.8410527863549478, + "grad_norm": 0.84645676612854, + "learning_rate": 4.76499969605928e-06, + "loss": 0.5066, + "step": 11440 + }, + { + "epoch": 0.8411263049551536, + "grad_norm": 0.803106427192688, + "learning_rate": 4.764958909986742e-06, + "loss": 0.5962, + "step": 11441 + }, + { + "epoch": 0.8411998235553595, + "grad_norm": 0.7799784541130066, + "learning_rate": 4.764918120549731e-06, + "loss": 0.5117, + "step": 11442 + }, + { + "epoch": 0.8412733421555654, + "grad_norm": 0.8684123158454895, + "learning_rate": 4.764877327748307e-06, + "loss": 0.5745, + "step": 11443 + }, + { + "epoch": 0.8413468607557713, + "grad_norm": 0.8282279372215271, + "learning_rate": 4.7648365315825335e-06, + "loss": 0.5402, + "step": 11444 + }, + { + "epoch": 0.841420379355977, + "grad_norm": 0.813048243522644, + "learning_rate": 4.7647957320524685e-06, + "loss": 0.5187, + "step": 11445 + }, + { + "epoch": 0.8414938979561829, + "grad_norm": 0.8351661562919617, + "learning_rate": 4.764754929158174e-06, + "loss": 0.547, + "step": 11446 + }, + { + "epoch": 0.8415674165563888, + "grad_norm": 0.8220629096031189, + "learning_rate": 4.76471412289971e-06, + "loss": 0.5312, + "step": 11447 + }, + { + "epoch": 0.8416409351565947, + "grad_norm": 0.8315290808677673, + "learning_rate": 4.764673313277136e-06, + "loss": 0.5748, + "step": 11448 + }, + { + "epoch": 0.8417144537568004, + "grad_norm": 0.7874208688735962, + "learning_rate": 4.764632500290514e-06, + "loss": 0.5304, + "step": 11449 + }, + { + "epoch": 0.8417879723570063, + "grad_norm": 0.8095319271087646, + "learning_rate": 4.7645916839399055e-06, + "loss": 0.5328, + "step": 11450 + }, + { + "epoch": 0.8418614909572122, + "grad_norm": 0.8475613594055176, + "learning_rate": 4.76455086422537e-06, + "loss": 0.5463, + "step": 11451 + }, + { + "epoch": 0.8419350095574181, + "grad_norm": 0.846244752407074, + "learning_rate": 4.764510041146968e-06, + "loss": 0.5101, + "step": 11452 + }, + { + "epoch": 0.8420085281576238, + "grad_norm": 0.7434936165809631, + "learning_rate": 4.76446921470476e-06, + "loss": 0.4818, + "step": 11453 + }, + { + "epoch": 0.8420820467578297, + "grad_norm": 0.8364201784133911, + "learning_rate": 4.764428384898807e-06, + "loss": 0.5309, + "step": 11454 + }, + { + "epoch": 0.8421555653580356, + "grad_norm": 0.7729449272155762, + "learning_rate": 4.76438755172917e-06, + "loss": 0.4825, + "step": 11455 + }, + { + "epoch": 0.8422290839582415, + "grad_norm": 0.8019609451293945, + "learning_rate": 4.764346715195909e-06, + "loss": 0.489, + "step": 11456 + }, + { + "epoch": 0.8423026025584472, + "grad_norm": 0.7932136654853821, + "learning_rate": 4.764305875299085e-06, + "loss": 0.5263, + "step": 11457 + }, + { + "epoch": 0.8423761211586531, + "grad_norm": 0.8146822452545166, + "learning_rate": 4.764265032038758e-06, + "loss": 0.5014, + "step": 11458 + }, + { + "epoch": 0.842449639758859, + "grad_norm": 0.8038166761398315, + "learning_rate": 4.76422418541499e-06, + "loss": 0.5424, + "step": 11459 + }, + { + "epoch": 0.8425231583590649, + "grad_norm": 0.8398517966270447, + "learning_rate": 4.764183335427842e-06, + "loss": 0.5296, + "step": 11460 + }, + { + "epoch": 0.8425966769592707, + "grad_norm": 0.8191812038421631, + "learning_rate": 4.764142482077372e-06, + "loss": 0.5477, + "step": 11461 + }, + { + "epoch": 0.8426701955594765, + "grad_norm": 0.8216798305511475, + "learning_rate": 4.764101625363643e-06, + "loss": 0.5169, + "step": 11462 + }, + { + "epoch": 0.8427437141596824, + "grad_norm": 0.9563413858413696, + "learning_rate": 4.7640607652867144e-06, + "loss": 0.5559, + "step": 11463 + }, + { + "epoch": 0.8428172327598883, + "grad_norm": 0.8165666460990906, + "learning_rate": 4.764019901846648e-06, + "loss": 0.4996, + "step": 11464 + }, + { + "epoch": 0.8428907513600941, + "grad_norm": 0.7904976010322571, + "learning_rate": 4.763979035043504e-06, + "loss": 0.5018, + "step": 11465 + }, + { + "epoch": 0.8429642699602999, + "grad_norm": 0.7798048853874207, + "learning_rate": 4.763938164877343e-06, + "loss": 0.5206, + "step": 11466 + }, + { + "epoch": 0.8430377885605058, + "grad_norm": 0.8195461630821228, + "learning_rate": 4.763897291348226e-06, + "loss": 0.5235, + "step": 11467 + }, + { + "epoch": 0.8431113071607117, + "grad_norm": 0.8475083112716675, + "learning_rate": 4.7638564144562135e-06, + "loss": 0.5076, + "step": 11468 + }, + { + "epoch": 0.8431848257609175, + "grad_norm": 0.830370306968689, + "learning_rate": 4.763815534201367e-06, + "loss": 0.5445, + "step": 11469 + }, + { + "epoch": 0.8432583443611233, + "grad_norm": 0.8071800470352173, + "learning_rate": 4.763774650583745e-06, + "loss": 0.5677, + "step": 11470 + }, + { + "epoch": 0.8433318629613292, + "grad_norm": 0.790060818195343, + "learning_rate": 4.763733763603411e-06, + "loss": 0.5251, + "step": 11471 + }, + { + "epoch": 0.8434053815615351, + "grad_norm": 0.805030107498169, + "learning_rate": 4.763692873260423e-06, + "loss": 0.5466, + "step": 11472 + }, + { + "epoch": 0.8434789001617409, + "grad_norm": 0.8480959534645081, + "learning_rate": 4.763651979554844e-06, + "loss": 0.5504, + "step": 11473 + }, + { + "epoch": 0.8435524187619468, + "grad_norm": 0.8770602941513062, + "learning_rate": 4.7636110824867345e-06, + "loss": 0.5501, + "step": 11474 + }, + { + "epoch": 0.8436259373621526, + "grad_norm": 0.8383925557136536, + "learning_rate": 4.763570182056154e-06, + "loss": 0.5278, + "step": 11475 + }, + { + "epoch": 0.8436994559623585, + "grad_norm": 0.822306752204895, + "learning_rate": 4.7635292782631646e-06, + "loss": 0.5621, + "step": 11476 + }, + { + "epoch": 0.8437729745625643, + "grad_norm": 0.7787200212478638, + "learning_rate": 4.763488371107825e-06, + "loss": 0.489, + "step": 11477 + }, + { + "epoch": 0.8438464931627702, + "grad_norm": 0.8171682357788086, + "learning_rate": 4.7634474605902e-06, + "loss": 0.516, + "step": 11478 + }, + { + "epoch": 0.843920011762976, + "grad_norm": 0.8250762224197388, + "learning_rate": 4.763406546710345e-06, + "loss": 0.5223, + "step": 11479 + }, + { + "epoch": 0.8439935303631819, + "grad_norm": 0.8205649256706238, + "learning_rate": 4.763365629468324e-06, + "loss": 0.542, + "step": 11480 + }, + { + "epoch": 0.8440670489633877, + "grad_norm": 0.8652176856994629, + "learning_rate": 4.763324708864199e-06, + "loss": 0.5541, + "step": 11481 + }, + { + "epoch": 0.8441405675635936, + "grad_norm": 0.7708829045295715, + "learning_rate": 4.763283784898027e-06, + "loss": 0.5199, + "step": 11482 + }, + { + "epoch": 0.8442140861637994, + "grad_norm": 0.8317080140113831, + "learning_rate": 4.763242857569872e-06, + "loss": 0.5774, + "step": 11483 + }, + { + "epoch": 0.8442876047640053, + "grad_norm": 0.8063217401504517, + "learning_rate": 4.763201926879794e-06, + "loss": 0.5191, + "step": 11484 + }, + { + "epoch": 0.8443611233642111, + "grad_norm": 0.808135986328125, + "learning_rate": 4.763160992827852e-06, + "loss": 0.497, + "step": 11485 + }, + { + "epoch": 0.844434641964417, + "grad_norm": 0.8457207679748535, + "learning_rate": 4.76312005541411e-06, + "loss": 0.5214, + "step": 11486 + }, + { + "epoch": 0.8445081605646229, + "grad_norm": 0.837847113609314, + "learning_rate": 4.763079114638626e-06, + "loss": 0.5401, + "step": 11487 + }, + { + "epoch": 0.8445816791648287, + "grad_norm": 0.8245266079902649, + "learning_rate": 4.763038170501462e-06, + "loss": 0.5247, + "step": 11488 + }, + { + "epoch": 0.8446551977650345, + "grad_norm": 0.8318895101547241, + "learning_rate": 4.762997223002679e-06, + "loss": 0.5411, + "step": 11489 + }, + { + "epoch": 0.8447287163652404, + "grad_norm": 0.8407130837440491, + "learning_rate": 4.762956272142338e-06, + "loss": 0.5106, + "step": 11490 + }, + { + "epoch": 0.8448022349654463, + "grad_norm": 0.8484858870506287, + "learning_rate": 4.762915317920499e-06, + "loss": 0.5354, + "step": 11491 + }, + { + "epoch": 0.8448757535656521, + "grad_norm": 0.8996989130973816, + "learning_rate": 4.762874360337223e-06, + "loss": 0.5757, + "step": 11492 + }, + { + "epoch": 0.8449492721658579, + "grad_norm": 0.8168551325798035, + "learning_rate": 4.762833399392571e-06, + "loss": 0.5099, + "step": 11493 + }, + { + "epoch": 0.8450227907660638, + "grad_norm": 0.8782025575637817, + "learning_rate": 4.762792435086604e-06, + "loss": 0.567, + "step": 11494 + }, + { + "epoch": 0.8450963093662697, + "grad_norm": 0.8261253833770752, + "learning_rate": 4.762751467419382e-06, + "loss": 0.491, + "step": 11495 + }, + { + "epoch": 0.8451698279664756, + "grad_norm": 0.8308890461921692, + "learning_rate": 4.762710496390968e-06, + "loss": 0.4954, + "step": 11496 + }, + { + "epoch": 0.8452433465666813, + "grad_norm": 0.8896363377571106, + "learning_rate": 4.762669522001421e-06, + "loss": 0.5551, + "step": 11497 + }, + { + "epoch": 0.8453168651668872, + "grad_norm": 0.8166688680648804, + "learning_rate": 4.7626285442508015e-06, + "loss": 0.5337, + "step": 11498 + }, + { + "epoch": 0.8453903837670931, + "grad_norm": 0.8196623921394348, + "learning_rate": 4.7625875631391716e-06, + "loss": 0.5196, + "step": 11499 + }, + { + "epoch": 0.845463902367299, + "grad_norm": 0.908662736415863, + "learning_rate": 4.762546578666593e-06, + "loss": 0.5394, + "step": 11500 + }, + { + "epoch": 0.8455374209675047, + "grad_norm": 0.8500300645828247, + "learning_rate": 4.762505590833124e-06, + "loss": 0.5301, + "step": 11501 + }, + { + "epoch": 0.8456109395677106, + "grad_norm": 0.8675325512886047, + "learning_rate": 4.762464599638827e-06, + "loss": 0.5325, + "step": 11502 + }, + { + "epoch": 0.8456844581679165, + "grad_norm": 0.8430542945861816, + "learning_rate": 4.7624236050837626e-06, + "loss": 0.5657, + "step": 11503 + }, + { + "epoch": 0.8457579767681224, + "grad_norm": 0.8597111701965332, + "learning_rate": 4.7623826071679925e-06, + "loss": 0.5607, + "step": 11504 + }, + { + "epoch": 0.8458314953683281, + "grad_norm": 0.8828076124191284, + "learning_rate": 4.762341605891576e-06, + "loss": 0.5444, + "step": 11505 + }, + { + "epoch": 0.845905013968534, + "grad_norm": 0.8531865477561951, + "learning_rate": 4.762300601254576e-06, + "loss": 0.5435, + "step": 11506 + }, + { + "epoch": 0.8459785325687399, + "grad_norm": 0.8133656978607178, + "learning_rate": 4.762259593257051e-06, + "loss": 0.5231, + "step": 11507 + }, + { + "epoch": 0.8460520511689458, + "grad_norm": 0.8572024703025818, + "learning_rate": 4.762218581899065e-06, + "loss": 0.5456, + "step": 11508 + }, + { + "epoch": 0.8461255697691515, + "grad_norm": 0.7867946624755859, + "learning_rate": 4.762177567180676e-06, + "loss": 0.529, + "step": 11509 + }, + { + "epoch": 0.8461990883693574, + "grad_norm": 0.8515685796737671, + "learning_rate": 4.762136549101946e-06, + "loss": 0.5169, + "step": 11510 + }, + { + "epoch": 0.8462726069695633, + "grad_norm": 0.801934003829956, + "learning_rate": 4.762095527662936e-06, + "loss": 0.5081, + "step": 11511 + }, + { + "epoch": 0.8463461255697692, + "grad_norm": 0.8472220301628113, + "learning_rate": 4.762054502863708e-06, + "loss": 0.5234, + "step": 11512 + }, + { + "epoch": 0.846419644169975, + "grad_norm": 0.8381593823432922, + "learning_rate": 4.762013474704321e-06, + "loss": 0.5473, + "step": 11513 + }, + { + "epoch": 0.8464931627701808, + "grad_norm": 0.8033559918403625, + "learning_rate": 4.7619724431848366e-06, + "loss": 0.4929, + "step": 11514 + }, + { + "epoch": 0.8465666813703867, + "grad_norm": 0.8589191436767578, + "learning_rate": 4.761931408305317e-06, + "loss": 0.5314, + "step": 11515 + }, + { + "epoch": 0.8466401999705926, + "grad_norm": 0.8247227072715759, + "learning_rate": 4.761890370065822e-06, + "loss": 0.5175, + "step": 11516 + }, + { + "epoch": 0.8467137185707984, + "grad_norm": 0.8559990525245667, + "learning_rate": 4.761849328466413e-06, + "loss": 0.5456, + "step": 11517 + }, + { + "epoch": 0.8467872371710042, + "grad_norm": 0.8593805432319641, + "learning_rate": 4.761808283507149e-06, + "loss": 0.5683, + "step": 11518 + }, + { + "epoch": 0.8468607557712101, + "grad_norm": 0.837479829788208, + "learning_rate": 4.7617672351880935e-06, + "loss": 0.5153, + "step": 11519 + }, + { + "epoch": 0.846934274371416, + "grad_norm": 0.8094531297683716, + "learning_rate": 4.761726183509307e-06, + "loss": 0.4987, + "step": 11520 + }, + { + "epoch": 0.8470077929716219, + "grad_norm": 0.7815853953361511, + "learning_rate": 4.7616851284708494e-06, + "loss": 0.5043, + "step": 11521 + }, + { + "epoch": 0.8470813115718276, + "grad_norm": 0.7871647477149963, + "learning_rate": 4.761644070072783e-06, + "loss": 0.5218, + "step": 11522 + }, + { + "epoch": 0.8471548301720335, + "grad_norm": 0.8564393520355225, + "learning_rate": 4.7616030083151684e-06, + "loss": 0.5406, + "step": 11523 + }, + { + "epoch": 0.8472283487722394, + "grad_norm": 0.8553086519241333, + "learning_rate": 4.761561943198067e-06, + "loss": 0.5481, + "step": 11524 + }, + { + "epoch": 0.8473018673724453, + "grad_norm": 0.7867028117179871, + "learning_rate": 4.761520874721537e-06, + "loss": 0.4766, + "step": 11525 + }, + { + "epoch": 0.847375385972651, + "grad_norm": 0.8471981883049011, + "learning_rate": 4.7614798028856435e-06, + "loss": 0.5406, + "step": 11526 + }, + { + "epoch": 0.8474489045728569, + "grad_norm": 0.7853859663009644, + "learning_rate": 4.761438727690445e-06, + "loss": 0.51, + "step": 11527 + }, + { + "epoch": 0.8475224231730628, + "grad_norm": 0.8930548429489136, + "learning_rate": 4.761397649136002e-06, + "loss": 0.532, + "step": 11528 + }, + { + "epoch": 0.8475959417732687, + "grad_norm": 0.7550943493843079, + "learning_rate": 4.761356567222378e-06, + "loss": 0.4668, + "step": 11529 + }, + { + "epoch": 0.8476694603734745, + "grad_norm": 0.8117631077766418, + "learning_rate": 4.761315481949632e-06, + "loss": 0.484, + "step": 11530 + }, + { + "epoch": 0.8477429789736803, + "grad_norm": 0.8549701571464539, + "learning_rate": 4.7612743933178265e-06, + "loss": 0.5732, + "step": 11531 + }, + { + "epoch": 0.8478164975738862, + "grad_norm": 0.8707537651062012, + "learning_rate": 4.761233301327021e-06, + "loss": 0.5643, + "step": 11532 + }, + { + "epoch": 0.8478900161740921, + "grad_norm": 0.8198784589767456, + "learning_rate": 4.761192205977278e-06, + "loss": 0.5464, + "step": 11533 + }, + { + "epoch": 0.8479635347742979, + "grad_norm": 0.8803454041481018, + "learning_rate": 4.761151107268657e-06, + "loss": 0.5176, + "step": 11534 + }, + { + "epoch": 0.8480370533745037, + "grad_norm": 0.8377848267555237, + "learning_rate": 4.761110005201219e-06, + "loss": 0.5355, + "step": 11535 + }, + { + "epoch": 0.8481105719747096, + "grad_norm": 0.8125724792480469, + "learning_rate": 4.7610688997750275e-06, + "loss": 0.5297, + "step": 11536 + }, + { + "epoch": 0.8481840905749155, + "grad_norm": 0.8534295558929443, + "learning_rate": 4.761027790990142e-06, + "loss": 0.5157, + "step": 11537 + }, + { + "epoch": 0.8482576091751213, + "grad_norm": 0.7933579683303833, + "learning_rate": 4.760986678846622e-06, + "loss": 0.5636, + "step": 11538 + }, + { + "epoch": 0.8483311277753272, + "grad_norm": 0.9080445170402527, + "learning_rate": 4.760945563344531e-06, + "loss": 0.5765, + "step": 11539 + }, + { + "epoch": 0.848404646375533, + "grad_norm": 0.8385955691337585, + "learning_rate": 4.7609044444839295e-06, + "loss": 0.5108, + "step": 11540 + }, + { + "epoch": 0.8484781649757389, + "grad_norm": 0.7983773350715637, + "learning_rate": 4.760863322264878e-06, + "loss": 0.5315, + "step": 11541 + }, + { + "epoch": 0.8485516835759447, + "grad_norm": 0.8615196347236633, + "learning_rate": 4.760822196687438e-06, + "loss": 0.4967, + "step": 11542 + }, + { + "epoch": 0.8486252021761506, + "grad_norm": 0.7976120710372925, + "learning_rate": 4.76078106775167e-06, + "loss": 0.5171, + "step": 11543 + }, + { + "epoch": 0.8486987207763564, + "grad_norm": 0.810457170009613, + "learning_rate": 4.7607399354576355e-06, + "loss": 0.5176, + "step": 11544 + }, + { + "epoch": 0.8487722393765623, + "grad_norm": 0.7915114164352417, + "learning_rate": 4.760698799805396e-06, + "loss": 0.5189, + "step": 11545 + }, + { + "epoch": 0.8488457579767681, + "grad_norm": 0.8115959167480469, + "learning_rate": 4.760657660795012e-06, + "loss": 0.4979, + "step": 11546 + }, + { + "epoch": 0.848919276576974, + "grad_norm": 0.8353862762451172, + "learning_rate": 4.760616518426545e-06, + "loss": 0.5286, + "step": 11547 + }, + { + "epoch": 0.8489927951771798, + "grad_norm": 0.8187763690948486, + "learning_rate": 4.760575372700056e-06, + "loss": 0.5207, + "step": 11548 + }, + { + "epoch": 0.8490663137773857, + "grad_norm": 0.8800750374794006, + "learning_rate": 4.760534223615606e-06, + "loss": 0.5689, + "step": 11549 + }, + { + "epoch": 0.8491398323775915, + "grad_norm": 0.8211764693260193, + "learning_rate": 4.760493071173256e-06, + "loss": 0.5268, + "step": 11550 + }, + { + "epoch": 0.8492133509777974, + "grad_norm": 0.8166378140449524, + "learning_rate": 4.760451915373067e-06, + "loss": 0.5339, + "step": 11551 + }, + { + "epoch": 0.8492868695780033, + "grad_norm": 0.8859792351722717, + "learning_rate": 4.760410756215101e-06, + "loss": 0.5376, + "step": 11552 + }, + { + "epoch": 0.8493603881782091, + "grad_norm": 0.8637241721153259, + "learning_rate": 4.760369593699419e-06, + "loss": 0.5395, + "step": 11553 + }, + { + "epoch": 0.8494339067784149, + "grad_norm": 0.8078875541687012, + "learning_rate": 4.760328427826081e-06, + "loss": 0.5101, + "step": 11554 + }, + { + "epoch": 0.8495074253786208, + "grad_norm": 0.8520817756652832, + "learning_rate": 4.760287258595148e-06, + "loss": 0.5245, + "step": 11555 + }, + { + "epoch": 0.8495809439788267, + "grad_norm": 0.8517326712608337, + "learning_rate": 4.760246086006684e-06, + "loss": 0.5587, + "step": 11556 + }, + { + "epoch": 0.8496544625790325, + "grad_norm": 0.807672381401062, + "learning_rate": 4.7602049100607475e-06, + "loss": 0.5504, + "step": 11557 + }, + { + "epoch": 0.8497279811792383, + "grad_norm": 0.842490553855896, + "learning_rate": 4.7601637307574e-06, + "loss": 0.5361, + "step": 11558 + }, + { + "epoch": 0.8498014997794442, + "grad_norm": 0.8439072370529175, + "learning_rate": 4.760122548096704e-06, + "loss": 0.5264, + "step": 11559 + }, + { + "epoch": 0.8498750183796501, + "grad_norm": 0.7951505780220032, + "learning_rate": 4.760081362078719e-06, + "loss": 0.4895, + "step": 11560 + }, + { + "epoch": 0.849948536979856, + "grad_norm": 0.8821302652359009, + "learning_rate": 4.760040172703506e-06, + "loss": 0.5299, + "step": 11561 + }, + { + "epoch": 0.8500220555800617, + "grad_norm": 0.8193894624710083, + "learning_rate": 4.759998979971129e-06, + "loss": 0.5331, + "step": 11562 + }, + { + "epoch": 0.8500955741802676, + "grad_norm": 0.8209232687950134, + "learning_rate": 4.759957783881646e-06, + "loss": 0.56, + "step": 11563 + }, + { + "epoch": 0.8501690927804735, + "grad_norm": 0.8760809302330017, + "learning_rate": 4.75991658443512e-06, + "loss": 0.5403, + "step": 11564 + }, + { + "epoch": 0.8502426113806794, + "grad_norm": 0.8332844972610474, + "learning_rate": 4.759875381631612e-06, + "loss": 0.5122, + "step": 11565 + }, + { + "epoch": 0.8503161299808851, + "grad_norm": 0.8202250003814697, + "learning_rate": 4.759834175471181e-06, + "loss": 0.5631, + "step": 11566 + }, + { + "epoch": 0.850389648581091, + "grad_norm": 0.8186206817626953, + "learning_rate": 4.759792965953892e-06, + "loss": 0.5176, + "step": 11567 + }, + { + "epoch": 0.8504631671812969, + "grad_norm": 0.7904266715049744, + "learning_rate": 4.7597517530798036e-06, + "loss": 0.5046, + "step": 11568 + }, + { + "epoch": 0.8505366857815028, + "grad_norm": 0.8115061521530151, + "learning_rate": 4.759710536848977e-06, + "loss": 0.5374, + "step": 11569 + }, + { + "epoch": 0.8506102043817085, + "grad_norm": 0.7892378568649292, + "learning_rate": 4.759669317261475e-06, + "loss": 0.4948, + "step": 11570 + }, + { + "epoch": 0.8506837229819144, + "grad_norm": 0.8133406043052673, + "learning_rate": 4.759628094317358e-06, + "loss": 0.5057, + "step": 11571 + }, + { + "epoch": 0.8507572415821203, + "grad_norm": 0.7750811576843262, + "learning_rate": 4.759586868016687e-06, + "loss": 0.4897, + "step": 11572 + }, + { + "epoch": 0.8508307601823262, + "grad_norm": 0.8397600054740906, + "learning_rate": 4.759545638359524e-06, + "loss": 0.5144, + "step": 11573 + }, + { + "epoch": 0.8509042787825319, + "grad_norm": 0.8082808256149292, + "learning_rate": 4.759504405345929e-06, + "loss": 0.5602, + "step": 11574 + }, + { + "epoch": 0.8509777973827378, + "grad_norm": 0.7936307191848755, + "learning_rate": 4.759463168975964e-06, + "loss": 0.5276, + "step": 11575 + }, + { + "epoch": 0.8510513159829437, + "grad_norm": 0.8188018202781677, + "learning_rate": 4.75942192924969e-06, + "loss": 0.5037, + "step": 11576 + }, + { + "epoch": 0.8511248345831496, + "grad_norm": 0.8425880074501038, + "learning_rate": 4.759380686167169e-06, + "loss": 0.5288, + "step": 11577 + }, + { + "epoch": 0.8511983531833553, + "grad_norm": 0.8474772572517395, + "learning_rate": 4.759339439728462e-06, + "loss": 0.5639, + "step": 11578 + }, + { + "epoch": 0.8512718717835612, + "grad_norm": 0.7883749604225159, + "learning_rate": 4.759298189933629e-06, + "loss": 0.5524, + "step": 11579 + }, + { + "epoch": 0.8513453903837671, + "grad_norm": 0.8492713570594788, + "learning_rate": 4.759256936782732e-06, + "loss": 0.561, + "step": 11580 + }, + { + "epoch": 0.851418908983973, + "grad_norm": 0.8344087600708008, + "learning_rate": 4.759215680275834e-06, + "loss": 0.4995, + "step": 11581 + }, + { + "epoch": 0.8514924275841788, + "grad_norm": 0.7748273015022278, + "learning_rate": 4.759174420412994e-06, + "loss": 0.5029, + "step": 11582 + }, + { + "epoch": 0.8515659461843846, + "grad_norm": 0.8155128359794617, + "learning_rate": 4.7591331571942736e-06, + "loss": 0.5443, + "step": 11583 + }, + { + "epoch": 0.8516394647845905, + "grad_norm": 0.8159598708152771, + "learning_rate": 4.759091890619735e-06, + "loss": 0.5239, + "step": 11584 + }, + { + "epoch": 0.8517129833847964, + "grad_norm": 0.8127286434173584, + "learning_rate": 4.759050620689439e-06, + "loss": 0.53, + "step": 11585 + }, + { + "epoch": 0.8517865019850022, + "grad_norm": 0.867901623249054, + "learning_rate": 4.759009347403448e-06, + "loss": 0.5416, + "step": 11586 + }, + { + "epoch": 0.851860020585208, + "grad_norm": 0.9039316773414612, + "learning_rate": 4.7589680707618215e-06, + "loss": 0.5849, + "step": 11587 + }, + { + "epoch": 0.8519335391854139, + "grad_norm": 0.8372844457626343, + "learning_rate": 4.758926790764622e-06, + "loss": 0.5628, + "step": 11588 + }, + { + "epoch": 0.8520070577856198, + "grad_norm": 0.888325035572052, + "learning_rate": 4.75888550741191e-06, + "loss": 0.5654, + "step": 11589 + }, + { + "epoch": 0.8520805763858256, + "grad_norm": 0.8289212584495544, + "learning_rate": 4.758844220703747e-06, + "loss": 0.5231, + "step": 11590 + }, + { + "epoch": 0.8521540949860315, + "grad_norm": 0.8724353909492493, + "learning_rate": 4.7588029306401955e-06, + "loss": 0.5515, + "step": 11591 + }, + { + "epoch": 0.8522276135862373, + "grad_norm": 0.8381733298301697, + "learning_rate": 4.758761637221315e-06, + "loss": 0.5507, + "step": 11592 + }, + { + "epoch": 0.8523011321864432, + "grad_norm": 0.8165490031242371, + "learning_rate": 4.7587203404471685e-06, + "loss": 0.4963, + "step": 11593 + }, + { + "epoch": 0.852374650786649, + "grad_norm": 0.8770736455917358, + "learning_rate": 4.758679040317816e-06, + "loss": 0.5189, + "step": 11594 + }, + { + "epoch": 0.8524481693868549, + "grad_norm": 0.8004727959632874, + "learning_rate": 4.7586377368333205e-06, + "loss": 0.5439, + "step": 11595 + }, + { + "epoch": 0.8525216879870607, + "grad_norm": 0.8213698863983154, + "learning_rate": 4.758596429993742e-06, + "loss": 0.5074, + "step": 11596 + }, + { + "epoch": 0.8525952065872666, + "grad_norm": 0.8520490527153015, + "learning_rate": 4.758555119799141e-06, + "loss": 0.5336, + "step": 11597 + }, + { + "epoch": 0.8526687251874724, + "grad_norm": 0.8304243683815002, + "learning_rate": 4.7585138062495805e-06, + "loss": 0.5464, + "step": 11598 + }, + { + "epoch": 0.8527422437876783, + "grad_norm": 0.8533538579940796, + "learning_rate": 4.758472489345122e-06, + "loss": 0.553, + "step": 11599 + }, + { + "epoch": 0.8528157623878841, + "grad_norm": 0.8713799118995667, + "learning_rate": 4.758431169085826e-06, + "loss": 0.5993, + "step": 11600 + }, + { + "epoch": 0.85288928098809, + "grad_norm": 0.7763060331344604, + "learning_rate": 4.7583898454717545e-06, + "loss": 0.4833, + "step": 11601 + }, + { + "epoch": 0.8529627995882958, + "grad_norm": 0.780712902545929, + "learning_rate": 4.758348518502968e-06, + "loss": 0.4751, + "step": 11602 + }, + { + "epoch": 0.8530363181885017, + "grad_norm": 0.826822817325592, + "learning_rate": 4.758307188179529e-06, + "loss": 0.5402, + "step": 11603 + }, + { + "epoch": 0.8531098367887076, + "grad_norm": 0.8181250691413879, + "learning_rate": 4.758265854501498e-06, + "loss": 0.5086, + "step": 11604 + }, + { + "epoch": 0.8531833553889134, + "grad_norm": 0.803973913192749, + "learning_rate": 4.758224517468936e-06, + "loss": 0.524, + "step": 11605 + }, + { + "epoch": 0.8532568739891192, + "grad_norm": 0.802065372467041, + "learning_rate": 4.758183177081906e-06, + "loss": 0.5084, + "step": 11606 + }, + { + "epoch": 0.8533303925893251, + "grad_norm": 0.8911341428756714, + "learning_rate": 4.75814183334047e-06, + "loss": 0.5413, + "step": 11607 + }, + { + "epoch": 0.853403911189531, + "grad_norm": 0.7810620069503784, + "learning_rate": 4.7581004862446855e-06, + "loss": 0.4898, + "step": 11608 + }, + { + "epoch": 0.8534774297897368, + "grad_norm": 0.817528486251831, + "learning_rate": 4.758059135794617e-06, + "loss": 0.5387, + "step": 11609 + }, + { + "epoch": 0.8535509483899426, + "grad_norm": 0.8492636680603027, + "learning_rate": 4.758017781990326e-06, + "loss": 0.5401, + "step": 11610 + }, + { + "epoch": 0.8536244669901485, + "grad_norm": 0.8185343146324158, + "learning_rate": 4.757976424831873e-06, + "loss": 0.5068, + "step": 11611 + }, + { + "epoch": 0.8536979855903544, + "grad_norm": 0.7992901802062988, + "learning_rate": 4.757935064319319e-06, + "loss": 0.4781, + "step": 11612 + }, + { + "epoch": 0.8537715041905602, + "grad_norm": 0.8080017566680908, + "learning_rate": 4.757893700452727e-06, + "loss": 0.495, + "step": 11613 + }, + { + "epoch": 0.853845022790766, + "grad_norm": 0.8285527229309082, + "learning_rate": 4.757852333232157e-06, + "loss": 0.5012, + "step": 11614 + }, + { + "epoch": 0.8539185413909719, + "grad_norm": 0.8152053952217102, + "learning_rate": 4.757810962657671e-06, + "loss": 0.5458, + "step": 11615 + }, + { + "epoch": 0.8539920599911778, + "grad_norm": 0.7829628586769104, + "learning_rate": 4.757769588729331e-06, + "loss": 0.5234, + "step": 11616 + }, + { + "epoch": 0.8540655785913837, + "grad_norm": 0.8388260006904602, + "learning_rate": 4.757728211447197e-06, + "loss": 0.5588, + "step": 11617 + }, + { + "epoch": 0.8541390971915894, + "grad_norm": 0.8131085634231567, + "learning_rate": 4.757686830811332e-06, + "loss": 0.4722, + "step": 11618 + }, + { + "epoch": 0.8542126157917953, + "grad_norm": 0.7994105815887451, + "learning_rate": 4.757645446821797e-06, + "loss": 0.5089, + "step": 11619 + }, + { + "epoch": 0.8542861343920012, + "grad_norm": 0.760814368724823, + "learning_rate": 4.757604059478653e-06, + "loss": 0.499, + "step": 11620 + }, + { + "epoch": 0.8543596529922071, + "grad_norm": 0.7922692894935608, + "learning_rate": 4.757562668781962e-06, + "loss": 0.5328, + "step": 11621 + }, + { + "epoch": 0.8544331715924128, + "grad_norm": 0.8229963183403015, + "learning_rate": 4.7575212747317844e-06, + "loss": 0.5564, + "step": 11622 + }, + { + "epoch": 0.8545066901926187, + "grad_norm": 0.7934334874153137, + "learning_rate": 4.757479877328184e-06, + "loss": 0.4964, + "step": 11623 + }, + { + "epoch": 0.8545802087928246, + "grad_norm": 0.8478909730911255, + "learning_rate": 4.75743847657122e-06, + "loss": 0.5464, + "step": 11624 + }, + { + "epoch": 0.8546537273930305, + "grad_norm": 0.8427479863166809, + "learning_rate": 4.757397072460954e-06, + "loss": 0.5412, + "step": 11625 + }, + { + "epoch": 0.8547272459932362, + "grad_norm": 0.8451244235038757, + "learning_rate": 4.7573556649974496e-06, + "loss": 0.5471, + "step": 11626 + }, + { + "epoch": 0.8548007645934421, + "grad_norm": 0.805824875831604, + "learning_rate": 4.757314254180767e-06, + "loss": 0.5068, + "step": 11627 + }, + { + "epoch": 0.854874283193648, + "grad_norm": 0.8395320177078247, + "learning_rate": 4.757272840010967e-06, + "loss": 0.5402, + "step": 11628 + }, + { + "epoch": 0.8549478017938539, + "grad_norm": 0.8430002331733704, + "learning_rate": 4.757231422488112e-06, + "loss": 0.5565, + "step": 11629 + }, + { + "epoch": 0.8550213203940596, + "grad_norm": 0.8050330281257629, + "learning_rate": 4.757190001612263e-06, + "loss": 0.4799, + "step": 11630 + }, + { + "epoch": 0.8550948389942655, + "grad_norm": 0.8611631393432617, + "learning_rate": 4.757148577383482e-06, + "loss": 0.5499, + "step": 11631 + }, + { + "epoch": 0.8551683575944714, + "grad_norm": 0.8369905948638916, + "learning_rate": 4.75710714980183e-06, + "loss": 0.539, + "step": 11632 + }, + { + "epoch": 0.8552418761946773, + "grad_norm": 0.7983229160308838, + "learning_rate": 4.757065718867371e-06, + "loss": 0.5333, + "step": 11633 + }, + { + "epoch": 0.855315394794883, + "grad_norm": 0.800137460231781, + "learning_rate": 4.7570242845801625e-06, + "loss": 0.5251, + "step": 11634 + }, + { + "epoch": 0.8553889133950889, + "grad_norm": 0.8586016297340393, + "learning_rate": 4.756982846940269e-06, + "loss": 0.5185, + "step": 11635 + }, + { + "epoch": 0.8554624319952948, + "grad_norm": 0.819237470626831, + "learning_rate": 4.75694140594775e-06, + "loss": 0.531, + "step": 11636 + }, + { + "epoch": 0.8555359505955007, + "grad_norm": 0.8189160823822021, + "learning_rate": 4.7568999616026685e-06, + "loss": 0.5366, + "step": 11637 + }, + { + "epoch": 0.8556094691957065, + "grad_norm": 0.8342434763908386, + "learning_rate": 4.756858513905086e-06, + "loss": 0.5362, + "step": 11638 + }, + { + "epoch": 0.8556829877959123, + "grad_norm": 0.8179375529289246, + "learning_rate": 4.756817062855064e-06, + "loss": 0.5388, + "step": 11639 + }, + { + "epoch": 0.8557565063961182, + "grad_norm": 0.8586395382881165, + "learning_rate": 4.756775608452663e-06, + "loss": 0.5429, + "step": 11640 + }, + { + "epoch": 0.8558300249963241, + "grad_norm": 0.8074289560317993, + "learning_rate": 4.756734150697946e-06, + "loss": 0.5168, + "step": 11641 + }, + { + "epoch": 0.8559035435965299, + "grad_norm": 0.8179011344909668, + "learning_rate": 4.756692689590974e-06, + "loss": 0.5492, + "step": 11642 + }, + { + "epoch": 0.8559770621967357, + "grad_norm": 0.8462977409362793, + "learning_rate": 4.756651225131808e-06, + "loss": 0.5366, + "step": 11643 + }, + { + "epoch": 0.8560505807969416, + "grad_norm": 0.8046279549598694, + "learning_rate": 4.7566097573205105e-06, + "loss": 0.4753, + "step": 11644 + }, + { + "epoch": 0.8561240993971475, + "grad_norm": 0.8558065295219421, + "learning_rate": 4.756568286157143e-06, + "loss": 0.5078, + "step": 11645 + }, + { + "epoch": 0.8561976179973533, + "grad_norm": 0.8333191275596619, + "learning_rate": 4.7565268116417664e-06, + "loss": 0.5391, + "step": 11646 + }, + { + "epoch": 0.8562711365975592, + "grad_norm": 0.7775573134422302, + "learning_rate": 4.756485333774443e-06, + "loss": 0.4733, + "step": 11647 + }, + { + "epoch": 0.856344655197765, + "grad_norm": 0.8089644908905029, + "learning_rate": 4.7564438525552345e-06, + "loss": 0.5352, + "step": 11648 + }, + { + "epoch": 0.8564181737979709, + "grad_norm": 0.8203295469284058, + "learning_rate": 4.756402367984203e-06, + "loss": 0.5164, + "step": 11649 + }, + { + "epoch": 0.8564916923981767, + "grad_norm": 0.8411695957183838, + "learning_rate": 4.756360880061408e-06, + "loss": 0.522, + "step": 11650 + }, + { + "epoch": 0.8565652109983826, + "grad_norm": 0.8837089538574219, + "learning_rate": 4.756319388786913e-06, + "loss": 0.5085, + "step": 11651 + }, + { + "epoch": 0.8566387295985884, + "grad_norm": 0.8603416681289673, + "learning_rate": 4.756277894160779e-06, + "loss": 0.5673, + "step": 11652 + }, + { + "epoch": 0.8567122481987943, + "grad_norm": 0.829249382019043, + "learning_rate": 4.756236396183067e-06, + "loss": 0.5311, + "step": 11653 + }, + { + "epoch": 0.8567857667990001, + "grad_norm": 0.8567826151847839, + "learning_rate": 4.75619489485384e-06, + "loss": 0.5159, + "step": 11654 + }, + { + "epoch": 0.856859285399206, + "grad_norm": 0.843338668346405, + "learning_rate": 4.756153390173159e-06, + "loss": 0.5579, + "step": 11655 + }, + { + "epoch": 0.8569328039994119, + "grad_norm": 0.8412454724311829, + "learning_rate": 4.756111882141087e-06, + "loss": 0.5052, + "step": 11656 + }, + { + "epoch": 0.8570063225996177, + "grad_norm": 0.7791553139686584, + "learning_rate": 4.756070370757683e-06, + "loss": 0.5268, + "step": 11657 + }, + { + "epoch": 0.8570798411998235, + "grad_norm": 0.859879732131958, + "learning_rate": 4.756028856023009e-06, + "loss": 0.5025, + "step": 11658 + }, + { + "epoch": 0.8571533598000294, + "grad_norm": 0.8016988635063171, + "learning_rate": 4.755987337937129e-06, + "loss": 0.5361, + "step": 11659 + }, + { + "epoch": 0.8572268784002353, + "grad_norm": 0.8395630717277527, + "learning_rate": 4.755945816500104e-06, + "loss": 0.4985, + "step": 11660 + }, + { + "epoch": 0.8573003970004411, + "grad_norm": 0.7681975364685059, + "learning_rate": 4.755904291711994e-06, + "loss": 0.4942, + "step": 11661 + }, + { + "epoch": 0.857373915600647, + "grad_norm": 0.9132920503616333, + "learning_rate": 4.755862763572861e-06, + "loss": 0.5554, + "step": 11662 + }, + { + "epoch": 0.8574474342008528, + "grad_norm": 0.8007957339286804, + "learning_rate": 4.755821232082769e-06, + "loss": 0.5277, + "step": 11663 + }, + { + "epoch": 0.8575209528010587, + "grad_norm": 0.8712668418884277, + "learning_rate": 4.755779697241778e-06, + "loss": 0.5396, + "step": 11664 + }, + { + "epoch": 0.8575944714012645, + "grad_norm": 0.8334965109825134, + "learning_rate": 4.755738159049949e-06, + "loss": 0.5132, + "step": 11665 + }, + { + "epoch": 0.8576679900014704, + "grad_norm": 0.8201695084571838, + "learning_rate": 4.755696617507345e-06, + "loss": 0.4853, + "step": 11666 + }, + { + "epoch": 0.8577415086016762, + "grad_norm": 0.8897238969802856, + "learning_rate": 4.755655072614026e-06, + "loss": 0.5744, + "step": 11667 + }, + { + "epoch": 0.8578150272018821, + "grad_norm": 0.837628960609436, + "learning_rate": 4.755613524370055e-06, + "loss": 0.4956, + "step": 11668 + }, + { + "epoch": 0.857888545802088, + "grad_norm": 0.7960749864578247, + "learning_rate": 4.755571972775495e-06, + "loss": 0.5482, + "step": 11669 + }, + { + "epoch": 0.8579620644022938, + "grad_norm": 0.829227864742279, + "learning_rate": 4.755530417830406e-06, + "loss": 0.5113, + "step": 11670 + }, + { + "epoch": 0.8580355830024996, + "grad_norm": 0.7969139218330383, + "learning_rate": 4.75548885953485e-06, + "loss": 0.4938, + "step": 11671 + }, + { + "epoch": 0.8581091016027055, + "grad_norm": 0.8440102934837341, + "learning_rate": 4.755447297888888e-06, + "loss": 0.5418, + "step": 11672 + }, + { + "epoch": 0.8581826202029114, + "grad_norm": 0.8486447930335999, + "learning_rate": 4.755405732892583e-06, + "loss": 0.5793, + "step": 11673 + }, + { + "epoch": 0.8582561388031172, + "grad_norm": 0.8641954660415649, + "learning_rate": 4.755364164545997e-06, + "loss": 0.5141, + "step": 11674 + }, + { + "epoch": 0.858329657403323, + "grad_norm": 0.7817215323448181, + "learning_rate": 4.75532259284919e-06, + "loss": 0.503, + "step": 11675 + }, + { + "epoch": 0.8584031760035289, + "grad_norm": 0.8044120669364929, + "learning_rate": 4.755281017802226e-06, + "loss": 0.5749, + "step": 11676 + }, + { + "epoch": 0.8584766946037348, + "grad_norm": 0.8213935494422913, + "learning_rate": 4.755239439405164e-06, + "loss": 0.4957, + "step": 11677 + }, + { + "epoch": 0.8585502132039406, + "grad_norm": 0.8373355269432068, + "learning_rate": 4.755197857658068e-06, + "loss": 0.5557, + "step": 11678 + }, + { + "epoch": 0.8586237318041464, + "grad_norm": 0.8518424034118652, + "learning_rate": 4.755156272561e-06, + "loss": 0.5035, + "step": 11679 + }, + { + "epoch": 0.8586972504043523, + "grad_norm": 0.8221222162246704, + "learning_rate": 4.75511468411402e-06, + "loss": 0.5254, + "step": 11680 + }, + { + "epoch": 0.8587707690045582, + "grad_norm": 0.8134974241256714, + "learning_rate": 4.75507309231719e-06, + "loss": 0.5413, + "step": 11681 + }, + { + "epoch": 0.8588442876047641, + "grad_norm": 0.8234747648239136, + "learning_rate": 4.755031497170573e-06, + "loss": 0.5605, + "step": 11682 + }, + { + "epoch": 0.8589178062049698, + "grad_norm": 0.8638074994087219, + "learning_rate": 4.75498989867423e-06, + "loss": 0.5508, + "step": 11683 + }, + { + "epoch": 0.8589913248051757, + "grad_norm": 0.8619284629821777, + "learning_rate": 4.754948296828224e-06, + "loss": 0.5979, + "step": 11684 + }, + { + "epoch": 0.8590648434053816, + "grad_norm": 0.7963874936103821, + "learning_rate": 4.754906691632615e-06, + "loss": 0.5193, + "step": 11685 + }, + { + "epoch": 0.8591383620055875, + "grad_norm": 0.8049269914627075, + "learning_rate": 4.754865083087465e-06, + "loss": 0.516, + "step": 11686 + }, + { + "epoch": 0.8592118806057932, + "grad_norm": 0.8146674633026123, + "learning_rate": 4.754823471192838e-06, + "loss": 0.5098, + "step": 11687 + }, + { + "epoch": 0.8592853992059991, + "grad_norm": 0.7908909916877747, + "learning_rate": 4.754781855948793e-06, + "loss": 0.5191, + "step": 11688 + }, + { + "epoch": 0.859358917806205, + "grad_norm": 0.7841481566429138, + "learning_rate": 4.754740237355393e-06, + "loss": 0.5082, + "step": 11689 + }, + { + "epoch": 0.8594324364064109, + "grad_norm": 0.8442352414131165, + "learning_rate": 4.754698615412699e-06, + "loss": 0.513, + "step": 11690 + }, + { + "epoch": 0.8595059550066166, + "grad_norm": 0.881379246711731, + "learning_rate": 4.7546569901207755e-06, + "loss": 0.5944, + "step": 11691 + }, + { + "epoch": 0.8595794736068225, + "grad_norm": 0.8117135763168335, + "learning_rate": 4.754615361479682e-06, + "loss": 0.4898, + "step": 11692 + }, + { + "epoch": 0.8596529922070284, + "grad_norm": 0.8328412771224976, + "learning_rate": 4.75457372948948e-06, + "loss": 0.5573, + "step": 11693 + }, + { + "epoch": 0.8597265108072343, + "grad_norm": 0.8395228981971741, + "learning_rate": 4.754532094150233e-06, + "loss": 0.5262, + "step": 11694 + }, + { + "epoch": 0.85980002940744, + "grad_norm": 0.8151751756668091, + "learning_rate": 4.754490455462002e-06, + "loss": 0.5104, + "step": 11695 + }, + { + "epoch": 0.8598735480076459, + "grad_norm": 0.8182353973388672, + "learning_rate": 4.7544488134248485e-06, + "loss": 0.555, + "step": 11696 + }, + { + "epoch": 0.8599470666078518, + "grad_norm": 0.8668904304504395, + "learning_rate": 4.754407168038835e-06, + "loss": 0.55, + "step": 11697 + }, + { + "epoch": 0.8600205852080577, + "grad_norm": 0.832783043384552, + "learning_rate": 4.754365519304023e-06, + "loss": 0.5908, + "step": 11698 + }, + { + "epoch": 0.8600941038082635, + "grad_norm": 0.7954687476158142, + "learning_rate": 4.754323867220474e-06, + "loss": 0.5076, + "step": 11699 + }, + { + "epoch": 0.8601676224084693, + "grad_norm": 0.8359332084655762, + "learning_rate": 4.754282211788251e-06, + "loss": 0.5501, + "step": 11700 + }, + { + "epoch": 0.8602411410086752, + "grad_norm": 0.7848337292671204, + "learning_rate": 4.754240553007415e-06, + "loss": 0.5011, + "step": 11701 + }, + { + "epoch": 0.8603146596088811, + "grad_norm": 0.8262189030647278, + "learning_rate": 4.754198890878028e-06, + "loss": 0.5445, + "step": 11702 + }, + { + "epoch": 0.8603881782090869, + "grad_norm": 0.8585407733917236, + "learning_rate": 4.754157225400152e-06, + "loss": 0.5482, + "step": 11703 + }, + { + "epoch": 0.8604616968092927, + "grad_norm": 0.7753709554672241, + "learning_rate": 4.75411555657385e-06, + "loss": 0.5323, + "step": 11704 + }, + { + "epoch": 0.8605352154094986, + "grad_norm": 0.7576660513877869, + "learning_rate": 4.754073884399181e-06, + "loss": 0.4969, + "step": 11705 + }, + { + "epoch": 0.8606087340097045, + "grad_norm": 0.8194600939750671, + "learning_rate": 4.7540322088762095e-06, + "loss": 0.554, + "step": 11706 + }, + { + "epoch": 0.8606822526099103, + "grad_norm": 0.7975332736968994, + "learning_rate": 4.753990530004996e-06, + "loss": 0.5427, + "step": 11707 + }, + { + "epoch": 0.8607557712101161, + "grad_norm": 0.7996323108673096, + "learning_rate": 4.753948847785604e-06, + "loss": 0.545, + "step": 11708 + }, + { + "epoch": 0.860829289810322, + "grad_norm": 0.781998336315155, + "learning_rate": 4.753907162218094e-06, + "loss": 0.474, + "step": 11709 + }, + { + "epoch": 0.8609028084105279, + "grad_norm": 0.8151198625564575, + "learning_rate": 4.753865473302528e-06, + "loss": 0.488, + "step": 11710 + }, + { + "epoch": 0.8609763270107337, + "grad_norm": 0.8031069040298462, + "learning_rate": 4.7538237810389685e-06, + "loss": 0.5024, + "step": 11711 + }, + { + "epoch": 0.8610498456109396, + "grad_norm": 0.8067945837974548, + "learning_rate": 4.753782085427477e-06, + "loss": 0.4814, + "step": 11712 + }, + { + "epoch": 0.8611233642111454, + "grad_norm": 0.8001220226287842, + "learning_rate": 4.753740386468116e-06, + "loss": 0.5213, + "step": 11713 + }, + { + "epoch": 0.8611968828113513, + "grad_norm": 0.8547079563140869, + "learning_rate": 4.753698684160947e-06, + "loss": 0.5313, + "step": 11714 + }, + { + "epoch": 0.8612704014115571, + "grad_norm": 0.8420602083206177, + "learning_rate": 4.753656978506031e-06, + "loss": 0.5597, + "step": 11715 + }, + { + "epoch": 0.861343920011763, + "grad_norm": 0.8100818991661072, + "learning_rate": 4.753615269503432e-06, + "loss": 0.503, + "step": 11716 + }, + { + "epoch": 0.8614174386119688, + "grad_norm": 0.8329730033874512, + "learning_rate": 4.7535735571532115e-06, + "loss": 0.5119, + "step": 11717 + }, + { + "epoch": 0.8614909572121747, + "grad_norm": 0.8289176225662231, + "learning_rate": 4.75353184145543e-06, + "loss": 0.5151, + "step": 11718 + }, + { + "epoch": 0.8615644758123805, + "grad_norm": 0.7910053133964539, + "learning_rate": 4.75349012241015e-06, + "loss": 0.5116, + "step": 11719 + }, + { + "epoch": 0.8616379944125864, + "grad_norm": 0.8262321352958679, + "learning_rate": 4.753448400017435e-06, + "loss": 0.5087, + "step": 11720 + }, + { + "epoch": 0.8617115130127923, + "grad_norm": 0.8368552327156067, + "learning_rate": 4.753406674277345e-06, + "loss": 0.4913, + "step": 11721 + }, + { + "epoch": 0.8617850316129981, + "grad_norm": 0.8212366700172424, + "learning_rate": 4.753364945189942e-06, + "loss": 0.5106, + "step": 11722 + }, + { + "epoch": 0.8618585502132039, + "grad_norm": 0.8288437128067017, + "learning_rate": 4.753323212755291e-06, + "loss": 0.5362, + "step": 11723 + }, + { + "epoch": 0.8619320688134098, + "grad_norm": 0.7862991094589233, + "learning_rate": 4.75328147697345e-06, + "loss": 0.4969, + "step": 11724 + }, + { + "epoch": 0.8620055874136157, + "grad_norm": 0.7935842275619507, + "learning_rate": 4.753239737844483e-06, + "loss": 0.5361, + "step": 11725 + }, + { + "epoch": 0.8620791060138215, + "grad_norm": 0.7573437094688416, + "learning_rate": 4.753197995368452e-06, + "loss": 0.5182, + "step": 11726 + }, + { + "epoch": 0.8621526246140273, + "grad_norm": 0.8045653104782104, + "learning_rate": 4.7531562495454186e-06, + "loss": 0.4955, + "step": 11727 + }, + { + "epoch": 0.8622261432142332, + "grad_norm": 0.8522000312805176, + "learning_rate": 4.753114500375446e-06, + "loss": 0.5229, + "step": 11728 + }, + { + "epoch": 0.8622996618144391, + "grad_norm": 0.8360479474067688, + "learning_rate": 4.753072747858594e-06, + "loss": 0.5069, + "step": 11729 + }, + { + "epoch": 0.862373180414645, + "grad_norm": 0.8047722578048706, + "learning_rate": 4.753030991994925e-06, + "loss": 0.5097, + "step": 11730 + }, + { + "epoch": 0.8624466990148507, + "grad_norm": 0.7910833358764648, + "learning_rate": 4.752989232784503e-06, + "loss": 0.4757, + "step": 11731 + }, + { + "epoch": 0.8625202176150566, + "grad_norm": 0.8527608513832092, + "learning_rate": 4.752947470227389e-06, + "loss": 0.5241, + "step": 11732 + }, + { + "epoch": 0.8625937362152625, + "grad_norm": 0.8423902988433838, + "learning_rate": 4.752905704323645e-06, + "loss": 0.5094, + "step": 11733 + }, + { + "epoch": 0.8626672548154684, + "grad_norm": 0.8579350113868713, + "learning_rate": 4.752863935073332e-06, + "loss": 0.5752, + "step": 11734 + }, + { + "epoch": 0.8627407734156741, + "grad_norm": 0.8621993660926819, + "learning_rate": 4.752822162476514e-06, + "loss": 0.5264, + "step": 11735 + }, + { + "epoch": 0.86281429201588, + "grad_norm": 0.7963270545005798, + "learning_rate": 4.7527803865332515e-06, + "loss": 0.5441, + "step": 11736 + }, + { + "epoch": 0.8628878106160859, + "grad_norm": 0.8400534987449646, + "learning_rate": 4.752738607243607e-06, + "loss": 0.539, + "step": 11737 + }, + { + "epoch": 0.8629613292162918, + "grad_norm": 0.8427748680114746, + "learning_rate": 4.752696824607643e-06, + "loss": 0.5748, + "step": 11738 + }, + { + "epoch": 0.8630348478164975, + "grad_norm": 0.7997176647186279, + "learning_rate": 4.752655038625421e-06, + "loss": 0.5086, + "step": 11739 + }, + { + "epoch": 0.8631083664167034, + "grad_norm": 0.8039833307266235, + "learning_rate": 4.752613249297004e-06, + "loss": 0.4713, + "step": 11740 + }, + { + "epoch": 0.8631818850169093, + "grad_norm": 0.8228729963302612, + "learning_rate": 4.752571456622452e-06, + "loss": 0.5174, + "step": 11741 + }, + { + "epoch": 0.8632554036171152, + "grad_norm": 0.8154787421226501, + "learning_rate": 4.75252966060183e-06, + "loss": 0.4784, + "step": 11742 + }, + { + "epoch": 0.8633289222173209, + "grad_norm": 0.8589992523193359, + "learning_rate": 4.752487861235197e-06, + "loss": 0.5243, + "step": 11743 + }, + { + "epoch": 0.8634024408175268, + "grad_norm": 0.8435271382331848, + "learning_rate": 4.752446058522617e-06, + "loss": 0.5365, + "step": 11744 + }, + { + "epoch": 0.8634759594177327, + "grad_norm": 0.8422863483428955, + "learning_rate": 4.752404252464152e-06, + "loss": 0.502, + "step": 11745 + }, + { + "epoch": 0.8635494780179386, + "grad_norm": 0.8537548780441284, + "learning_rate": 4.752362443059865e-06, + "loss": 0.5379, + "step": 11746 + }, + { + "epoch": 0.8636229966181443, + "grad_norm": 0.8318955302238464, + "learning_rate": 4.752320630309816e-06, + "loss": 0.5334, + "step": 11747 + }, + { + "epoch": 0.8636965152183502, + "grad_norm": 0.8478277921676636, + "learning_rate": 4.752278814214067e-06, + "loss": 0.516, + "step": 11748 + }, + { + "epoch": 0.8637700338185561, + "grad_norm": 0.814409077167511, + "learning_rate": 4.752236994772682e-06, + "loss": 0.5087, + "step": 11749 + }, + { + "epoch": 0.863843552418762, + "grad_norm": 0.8229157328605652, + "learning_rate": 4.752195171985723e-06, + "loss": 0.5367, + "step": 11750 + }, + { + "epoch": 0.8639170710189678, + "grad_norm": 0.8280758261680603, + "learning_rate": 4.752153345853251e-06, + "loss": 0.5635, + "step": 11751 + }, + { + "epoch": 0.8639905896191736, + "grad_norm": 0.8261830806732178, + "learning_rate": 4.7521115163753276e-06, + "loss": 0.5373, + "step": 11752 + }, + { + "epoch": 0.8640641082193795, + "grad_norm": 0.796381413936615, + "learning_rate": 4.7520696835520165e-06, + "loss": 0.5362, + "step": 11753 + }, + { + "epoch": 0.8641376268195854, + "grad_norm": 0.8252695798873901, + "learning_rate": 4.752027847383379e-06, + "loss": 0.5263, + "step": 11754 + }, + { + "epoch": 0.8642111454197912, + "grad_norm": 0.7929084897041321, + "learning_rate": 4.751986007869478e-06, + "loss": 0.4863, + "step": 11755 + }, + { + "epoch": 0.864284664019997, + "grad_norm": 0.821793794631958, + "learning_rate": 4.751944165010375e-06, + "loss": 0.5179, + "step": 11756 + }, + { + "epoch": 0.8643581826202029, + "grad_norm": 0.8406842350959778, + "learning_rate": 4.751902318806132e-06, + "loss": 0.5101, + "step": 11757 + }, + { + "epoch": 0.8644317012204088, + "grad_norm": 0.8399209380149841, + "learning_rate": 4.751860469256811e-06, + "loss": 0.5262, + "step": 11758 + }, + { + "epoch": 0.8645052198206146, + "grad_norm": 0.8611491322517395, + "learning_rate": 4.751818616362475e-06, + "loss": 0.5686, + "step": 11759 + }, + { + "epoch": 0.8645787384208204, + "grad_norm": 0.8087823390960693, + "learning_rate": 4.751776760123186e-06, + "loss": 0.5462, + "step": 11760 + }, + { + "epoch": 0.8646522570210263, + "grad_norm": 0.824794352054596, + "learning_rate": 4.751734900539006e-06, + "loss": 0.4999, + "step": 11761 + }, + { + "epoch": 0.8647257756212322, + "grad_norm": 0.8303135633468628, + "learning_rate": 4.7516930376099965e-06, + "loss": 0.4941, + "step": 11762 + }, + { + "epoch": 0.864799294221438, + "grad_norm": 0.8395006656646729, + "learning_rate": 4.75165117133622e-06, + "loss": 0.5249, + "step": 11763 + }, + { + "epoch": 0.8648728128216439, + "grad_norm": 0.8045507073402405, + "learning_rate": 4.75160930171774e-06, + "loss": 0.5294, + "step": 11764 + }, + { + "epoch": 0.8649463314218497, + "grad_norm": 0.8134841322898865, + "learning_rate": 4.751567428754617e-06, + "loss": 0.5167, + "step": 11765 + }, + { + "epoch": 0.8650198500220556, + "grad_norm": 0.8048226237297058, + "learning_rate": 4.751525552446914e-06, + "loss": 0.4912, + "step": 11766 + }, + { + "epoch": 0.8650933686222614, + "grad_norm": 0.841707706451416, + "learning_rate": 4.751483672794693e-06, + "loss": 0.5412, + "step": 11767 + }, + { + "epoch": 0.8651668872224673, + "grad_norm": 0.7707428932189941, + "learning_rate": 4.751441789798017e-06, + "loss": 0.4826, + "step": 11768 + }, + { + "epoch": 0.8652404058226731, + "grad_norm": 0.8081701397895813, + "learning_rate": 4.751399903456947e-06, + "loss": 0.5606, + "step": 11769 + }, + { + "epoch": 0.865313924422879, + "grad_norm": 0.8680357933044434, + "learning_rate": 4.751358013771545e-06, + "loss": 0.5551, + "step": 11770 + }, + { + "epoch": 0.8653874430230848, + "grad_norm": 0.7853225469589233, + "learning_rate": 4.751316120741875e-06, + "loss": 0.4858, + "step": 11771 + }, + { + "epoch": 0.8654609616232907, + "grad_norm": 0.8773239254951477, + "learning_rate": 4.751274224367998e-06, + "loss": 0.5532, + "step": 11772 + }, + { + "epoch": 0.8655344802234965, + "grad_norm": 0.7697968482971191, + "learning_rate": 4.751232324649976e-06, + "loss": 0.5401, + "step": 11773 + }, + { + "epoch": 0.8656079988237024, + "grad_norm": 0.8016461133956909, + "learning_rate": 4.7511904215878724e-06, + "loss": 0.5044, + "step": 11774 + }, + { + "epoch": 0.8656815174239082, + "grad_norm": 0.9040558934211731, + "learning_rate": 4.751148515181748e-06, + "loss": 0.5724, + "step": 11775 + }, + { + "epoch": 0.8657550360241141, + "grad_norm": 0.8639596700668335, + "learning_rate": 4.751106605431666e-06, + "loss": 0.5392, + "step": 11776 + }, + { + "epoch": 0.86582855462432, + "grad_norm": 0.8113521933555603, + "learning_rate": 4.751064692337689e-06, + "loss": 0.5104, + "step": 11777 + }, + { + "epoch": 0.8659020732245258, + "grad_norm": 0.83342045545578, + "learning_rate": 4.751022775899877e-06, + "loss": 0.4972, + "step": 11778 + }, + { + "epoch": 0.8659755918247316, + "grad_norm": 0.933581531047821, + "learning_rate": 4.750980856118296e-06, + "loss": 0.5456, + "step": 11779 + }, + { + "epoch": 0.8660491104249375, + "grad_norm": 0.8752812743186951, + "learning_rate": 4.7509389329930046e-06, + "loss": 0.5683, + "step": 11780 + }, + { + "epoch": 0.8661226290251434, + "grad_norm": 0.8393360376358032, + "learning_rate": 4.750897006524067e-06, + "loss": 0.5442, + "step": 11781 + }, + { + "epoch": 0.8661961476253492, + "grad_norm": 0.8792403340339661, + "learning_rate": 4.750855076711546e-06, + "loss": 0.5184, + "step": 11782 + }, + { + "epoch": 0.866269666225555, + "grad_norm": 0.8382928371429443, + "learning_rate": 4.750813143555502e-06, + "loss": 0.554, + "step": 11783 + }, + { + "epoch": 0.8663431848257609, + "grad_norm": 0.8342208862304688, + "learning_rate": 4.750771207055999e-06, + "loss": 0.5559, + "step": 11784 + }, + { + "epoch": 0.8664167034259668, + "grad_norm": 0.800672709941864, + "learning_rate": 4.750729267213099e-06, + "loss": 0.471, + "step": 11785 + }, + { + "epoch": 0.8664902220261727, + "grad_norm": 0.7925732731819153, + "learning_rate": 4.750687324026862e-06, + "loss": 0.4947, + "step": 11786 + }, + { + "epoch": 0.8665637406263784, + "grad_norm": 0.8628209233283997, + "learning_rate": 4.7506453774973545e-06, + "loss": 0.5719, + "step": 11787 + }, + { + "epoch": 0.8666372592265843, + "grad_norm": 0.8160484433174133, + "learning_rate": 4.750603427624635e-06, + "loss": 0.5264, + "step": 11788 + }, + { + "epoch": 0.8667107778267902, + "grad_norm": 0.8137771487236023, + "learning_rate": 4.750561474408768e-06, + "loss": 0.556, + "step": 11789 + }, + { + "epoch": 0.8667842964269961, + "grad_norm": 0.8174105882644653, + "learning_rate": 4.7505195178498155e-06, + "loss": 0.5203, + "step": 11790 + }, + { + "epoch": 0.8668578150272018, + "grad_norm": 0.8486235737800598, + "learning_rate": 4.750477557947839e-06, + "loss": 0.5292, + "step": 11791 + }, + { + "epoch": 0.8669313336274077, + "grad_norm": 0.8841566443443298, + "learning_rate": 4.750435594702901e-06, + "loss": 0.5647, + "step": 11792 + }, + { + "epoch": 0.8670048522276136, + "grad_norm": 0.813660204410553, + "learning_rate": 4.750393628115065e-06, + "loss": 0.5626, + "step": 11793 + }, + { + "epoch": 0.8670783708278195, + "grad_norm": 0.8098222613334656, + "learning_rate": 4.7503516581843915e-06, + "loss": 0.5178, + "step": 11794 + }, + { + "epoch": 0.8671518894280252, + "grad_norm": 0.8561425805091858, + "learning_rate": 4.750309684910945e-06, + "loss": 0.5248, + "step": 11795 + }, + { + "epoch": 0.8672254080282311, + "grad_norm": 0.8253902196884155, + "learning_rate": 4.7502677082947855e-06, + "loss": 0.5055, + "step": 11796 + }, + { + "epoch": 0.867298926628437, + "grad_norm": 0.8458991646766663, + "learning_rate": 4.750225728335977e-06, + "loss": 0.5567, + "step": 11797 + }, + { + "epoch": 0.8673724452286429, + "grad_norm": 0.8219136595726013, + "learning_rate": 4.750183745034582e-06, + "loss": 0.4944, + "step": 11798 + }, + { + "epoch": 0.8674459638288488, + "grad_norm": 0.79764723777771, + "learning_rate": 4.7501417583906615e-06, + "loss": 0.5301, + "step": 11799 + }, + { + "epoch": 0.8675194824290545, + "grad_norm": 0.8552611470222473, + "learning_rate": 4.750099768404279e-06, + "loss": 0.4862, + "step": 11800 + }, + { + "epoch": 0.8675930010292604, + "grad_norm": 0.8066477179527283, + "learning_rate": 4.750057775075496e-06, + "loss": 0.5031, + "step": 11801 + }, + { + "epoch": 0.8676665196294663, + "grad_norm": 0.8259584307670593, + "learning_rate": 4.750015778404376e-06, + "loss": 0.542, + "step": 11802 + }, + { + "epoch": 0.8677400382296722, + "grad_norm": 0.8015479445457458, + "learning_rate": 4.749973778390981e-06, + "loss": 0.5224, + "step": 11803 + }, + { + "epoch": 0.8678135568298779, + "grad_norm": 0.874380350112915, + "learning_rate": 4.749931775035373e-06, + "loss": 0.5263, + "step": 11804 + }, + { + "epoch": 0.8678870754300838, + "grad_norm": 0.8086399435997009, + "learning_rate": 4.749889768337614e-06, + "loss": 0.5255, + "step": 11805 + }, + { + "epoch": 0.8679605940302897, + "grad_norm": 0.8195869326591492, + "learning_rate": 4.749847758297767e-06, + "loss": 0.5355, + "step": 11806 + }, + { + "epoch": 0.8680341126304956, + "grad_norm": 0.8637982606887817, + "learning_rate": 4.7498057449158945e-06, + "loss": 0.5652, + "step": 11807 + }, + { + "epoch": 0.8681076312307013, + "grad_norm": 0.8180509805679321, + "learning_rate": 4.749763728192059e-06, + "loss": 0.5224, + "step": 11808 + }, + { + "epoch": 0.8681811498309072, + "grad_norm": 0.8310710191726685, + "learning_rate": 4.749721708126322e-06, + "loss": 0.5335, + "step": 11809 + }, + { + "epoch": 0.8682546684311131, + "grad_norm": 0.8871908187866211, + "learning_rate": 4.749679684718747e-06, + "loss": 0.5697, + "step": 11810 + }, + { + "epoch": 0.868328187031319, + "grad_norm": 0.8250532746315002, + "learning_rate": 4.749637657969397e-06, + "loss": 0.5052, + "step": 11811 + }, + { + "epoch": 0.8684017056315247, + "grad_norm": 0.7986353039741516, + "learning_rate": 4.749595627878332e-06, + "loss": 0.5014, + "step": 11812 + }, + { + "epoch": 0.8684752242317306, + "grad_norm": 0.8101218342781067, + "learning_rate": 4.749553594445617e-06, + "loss": 0.56, + "step": 11813 + }, + { + "epoch": 0.8685487428319365, + "grad_norm": 0.7788646817207336, + "learning_rate": 4.749511557671312e-06, + "loss": 0.4984, + "step": 11814 + }, + { + "epoch": 0.8686222614321424, + "grad_norm": 0.819403886795044, + "learning_rate": 4.749469517555482e-06, + "loss": 0.5123, + "step": 11815 + }, + { + "epoch": 0.8686957800323482, + "grad_norm": 0.7970436811447144, + "learning_rate": 4.749427474098187e-06, + "loss": 0.4617, + "step": 11816 + }, + { + "epoch": 0.868769298632554, + "grad_norm": 0.8766545653343201, + "learning_rate": 4.749385427299492e-06, + "loss": 0.5215, + "step": 11817 + }, + { + "epoch": 0.8688428172327599, + "grad_norm": 0.8020093441009521, + "learning_rate": 4.749343377159458e-06, + "loss": 0.5491, + "step": 11818 + }, + { + "epoch": 0.8689163358329658, + "grad_norm": 0.8055041432380676, + "learning_rate": 4.749301323678147e-06, + "loss": 0.5285, + "step": 11819 + }, + { + "epoch": 0.8689898544331716, + "grad_norm": 0.7889955639839172, + "learning_rate": 4.749259266855622e-06, + "loss": 0.4816, + "step": 11820 + }, + { + "epoch": 0.8690633730333774, + "grad_norm": 0.8438944220542908, + "learning_rate": 4.749217206691945e-06, + "loss": 0.5395, + "step": 11821 + }, + { + "epoch": 0.8691368916335833, + "grad_norm": 0.7981587052345276, + "learning_rate": 4.74917514318718e-06, + "loss": 0.4829, + "step": 11822 + }, + { + "epoch": 0.8692104102337892, + "grad_norm": 0.8905091881752014, + "learning_rate": 4.749133076341388e-06, + "loss": 0.5628, + "step": 11823 + }, + { + "epoch": 0.869283928833995, + "grad_norm": 0.7871623039245605, + "learning_rate": 4.749091006154632e-06, + "loss": 0.5321, + "step": 11824 + }, + { + "epoch": 0.8693574474342008, + "grad_norm": 0.8183894157409668, + "learning_rate": 4.749048932626975e-06, + "loss": 0.4879, + "step": 11825 + }, + { + "epoch": 0.8694309660344067, + "grad_norm": 0.8390949964523315, + "learning_rate": 4.749006855758479e-06, + "loss": 0.5241, + "step": 11826 + }, + { + "epoch": 0.8695044846346126, + "grad_norm": 0.8015508055686951, + "learning_rate": 4.7489647755492056e-06, + "loss": 0.5109, + "step": 11827 + }, + { + "epoch": 0.8695780032348184, + "grad_norm": 0.8576030135154724, + "learning_rate": 4.748922691999219e-06, + "loss": 0.5525, + "step": 11828 + }, + { + "epoch": 0.8696515218350243, + "grad_norm": 0.7828457951545715, + "learning_rate": 4.74888060510858e-06, + "loss": 0.5249, + "step": 11829 + }, + { + "epoch": 0.8697250404352301, + "grad_norm": 0.8354954123497009, + "learning_rate": 4.748838514877353e-06, + "loss": 0.5382, + "step": 11830 + }, + { + "epoch": 0.869798559035436, + "grad_norm": 0.8639366626739502, + "learning_rate": 4.748796421305599e-06, + "loss": 0.5565, + "step": 11831 + }, + { + "epoch": 0.8698720776356418, + "grad_norm": 0.8336323499679565, + "learning_rate": 4.748754324393381e-06, + "loss": 0.5632, + "step": 11832 + }, + { + "epoch": 0.8699455962358477, + "grad_norm": 0.8110392093658447, + "learning_rate": 4.748712224140761e-06, + "loss": 0.5273, + "step": 11833 + }, + { + "epoch": 0.8700191148360535, + "grad_norm": 0.8739390969276428, + "learning_rate": 4.748670120547803e-06, + "loss": 0.541, + "step": 11834 + }, + { + "epoch": 0.8700926334362594, + "grad_norm": 0.8340184092521667, + "learning_rate": 4.7486280136145694e-06, + "loss": 0.5337, + "step": 11835 + }, + { + "epoch": 0.8701661520364652, + "grad_norm": 0.8205413222312927, + "learning_rate": 4.7485859033411204e-06, + "loss": 0.5164, + "step": 11836 + }, + { + "epoch": 0.8702396706366711, + "grad_norm": 0.8317753672599792, + "learning_rate": 4.748543789727521e-06, + "loss": 0.5755, + "step": 11837 + }, + { + "epoch": 0.870313189236877, + "grad_norm": 0.8298981785774231, + "learning_rate": 4.748501672773833e-06, + "loss": 0.499, + "step": 11838 + }, + { + "epoch": 0.8703867078370828, + "grad_norm": 0.8394465446472168, + "learning_rate": 4.748459552480119e-06, + "loss": 0.5073, + "step": 11839 + }, + { + "epoch": 0.8704602264372886, + "grad_norm": 0.8243667483329773, + "learning_rate": 4.748417428846441e-06, + "loss": 0.5385, + "step": 11840 + }, + { + "epoch": 0.8705337450374945, + "grad_norm": 0.8618273735046387, + "learning_rate": 4.748375301872862e-06, + "loss": 0.5496, + "step": 11841 + }, + { + "epoch": 0.8706072636377004, + "grad_norm": 0.7885946035385132, + "learning_rate": 4.748333171559445e-06, + "loss": 0.4973, + "step": 11842 + }, + { + "epoch": 0.8706807822379062, + "grad_norm": 0.8646224141120911, + "learning_rate": 4.748291037906252e-06, + "loss": 0.4978, + "step": 11843 + }, + { + "epoch": 0.870754300838112, + "grad_norm": 0.7937051057815552, + "learning_rate": 4.748248900913345e-06, + "loss": 0.5406, + "step": 11844 + }, + { + "epoch": 0.8708278194383179, + "grad_norm": 0.8410546183586121, + "learning_rate": 4.748206760580789e-06, + "loss": 0.5449, + "step": 11845 + }, + { + "epoch": 0.8709013380385238, + "grad_norm": 0.7904650568962097, + "learning_rate": 4.748164616908644e-06, + "loss": 0.5092, + "step": 11846 + }, + { + "epoch": 0.8709748566387296, + "grad_norm": 0.7990905046463013, + "learning_rate": 4.748122469896973e-06, + "loss": 0.5018, + "step": 11847 + }, + { + "epoch": 0.8710483752389354, + "grad_norm": 0.8033142685890198, + "learning_rate": 4.7480803195458405e-06, + "loss": 0.5375, + "step": 11848 + }, + { + "epoch": 0.8711218938391413, + "grad_norm": 0.8665294051170349, + "learning_rate": 4.748038165855306e-06, + "loss": 0.5206, + "step": 11849 + }, + { + "epoch": 0.8711954124393472, + "grad_norm": 0.8340779542922974, + "learning_rate": 4.747996008825435e-06, + "loss": 0.5475, + "step": 11850 + }, + { + "epoch": 0.871268931039553, + "grad_norm": 0.8241826891899109, + "learning_rate": 4.7479538484562894e-06, + "loss": 0.4996, + "step": 11851 + }, + { + "epoch": 0.8713424496397588, + "grad_norm": 0.8160481452941895, + "learning_rate": 4.747911684747931e-06, + "loss": 0.5145, + "step": 11852 + }, + { + "epoch": 0.8714159682399647, + "grad_norm": 0.8277384638786316, + "learning_rate": 4.7478695177004236e-06, + "loss": 0.4805, + "step": 11853 + }, + { + "epoch": 0.8714894868401706, + "grad_norm": 0.8329829573631287, + "learning_rate": 4.747827347313828e-06, + "loss": 0.5342, + "step": 11854 + }, + { + "epoch": 0.8715630054403765, + "grad_norm": 0.8369163274765015, + "learning_rate": 4.7477851735882085e-06, + "loss": 0.5529, + "step": 11855 + }, + { + "epoch": 0.8716365240405822, + "grad_norm": 0.8352146148681641, + "learning_rate": 4.747742996523627e-06, + "loss": 0.5299, + "step": 11856 + }, + { + "epoch": 0.8717100426407881, + "grad_norm": 0.8208847641944885, + "learning_rate": 4.747700816120147e-06, + "loss": 0.5835, + "step": 11857 + }, + { + "epoch": 0.871783561240994, + "grad_norm": 0.872593879699707, + "learning_rate": 4.74765863237783e-06, + "loss": 0.5529, + "step": 11858 + }, + { + "epoch": 0.8718570798411999, + "grad_norm": 0.7896450161933899, + "learning_rate": 4.74761644529674e-06, + "loss": 0.5053, + "step": 11859 + }, + { + "epoch": 0.8719305984414056, + "grad_norm": 0.8238279819488525, + "learning_rate": 4.7475742548769375e-06, + "loss": 0.5017, + "step": 11860 + }, + { + "epoch": 0.8720041170416115, + "grad_norm": 0.8605353832244873, + "learning_rate": 4.747532061118487e-06, + "loss": 0.5419, + "step": 11861 + }, + { + "epoch": 0.8720776356418174, + "grad_norm": 0.837264358997345, + "learning_rate": 4.747489864021451e-06, + "loss": 0.5424, + "step": 11862 + }, + { + "epoch": 0.8721511542420233, + "grad_norm": 0.8403124809265137, + "learning_rate": 4.747447663585891e-06, + "loss": 0.5241, + "step": 11863 + }, + { + "epoch": 0.872224672842229, + "grad_norm": 0.8524872660636902, + "learning_rate": 4.747405459811872e-06, + "loss": 0.5334, + "step": 11864 + }, + { + "epoch": 0.8722981914424349, + "grad_norm": 0.8306849598884583, + "learning_rate": 4.747363252699454e-06, + "loss": 0.5159, + "step": 11865 + }, + { + "epoch": 0.8723717100426408, + "grad_norm": 0.8632964491844177, + "learning_rate": 4.747321042248702e-06, + "loss": 0.5357, + "step": 11866 + }, + { + "epoch": 0.8724452286428467, + "grad_norm": 0.8082647323608398, + "learning_rate": 4.747278828459677e-06, + "loss": 0.5201, + "step": 11867 + }, + { + "epoch": 0.8725187472430525, + "grad_norm": 0.8615816235542297, + "learning_rate": 4.747236611332442e-06, + "loss": 0.5306, + "step": 11868 + }, + { + "epoch": 0.8725922658432583, + "grad_norm": 0.8066397905349731, + "learning_rate": 4.7471943908670604e-06, + "loss": 0.5136, + "step": 11869 + }, + { + "epoch": 0.8726657844434642, + "grad_norm": 0.8395422697067261, + "learning_rate": 4.747152167063595e-06, + "loss": 0.5314, + "step": 11870 + }, + { + "epoch": 0.8727393030436701, + "grad_norm": 0.8811591267585754, + "learning_rate": 4.747109939922108e-06, + "loss": 0.5446, + "step": 11871 + }, + { + "epoch": 0.8728128216438759, + "grad_norm": 0.8157126903533936, + "learning_rate": 4.747067709442662e-06, + "loss": 0.4794, + "step": 11872 + }, + { + "epoch": 0.8728863402440817, + "grad_norm": 0.833890438079834, + "learning_rate": 4.74702547562532e-06, + "loss": 0.5206, + "step": 11873 + }, + { + "epoch": 0.8729598588442876, + "grad_norm": 0.882742702960968, + "learning_rate": 4.746983238470144e-06, + "loss": 0.539, + "step": 11874 + }, + { + "epoch": 0.8730333774444935, + "grad_norm": 0.8081914186477661, + "learning_rate": 4.746940997977199e-06, + "loss": 0.5349, + "step": 11875 + }, + { + "epoch": 0.8731068960446993, + "grad_norm": 0.8849081993103027, + "learning_rate": 4.746898754146545e-06, + "loss": 0.5655, + "step": 11876 + }, + { + "epoch": 0.8731804146449051, + "grad_norm": 0.8483217358589172, + "learning_rate": 4.746856506978247e-06, + "loss": 0.5252, + "step": 11877 + }, + { + "epoch": 0.873253933245111, + "grad_norm": 0.8381571173667908, + "learning_rate": 4.746814256472365e-06, + "loss": 0.4987, + "step": 11878 + }, + { + "epoch": 0.8733274518453169, + "grad_norm": 0.8124619722366333, + "learning_rate": 4.746772002628965e-06, + "loss": 0.5647, + "step": 11879 + }, + { + "epoch": 0.8734009704455227, + "grad_norm": 0.8243505954742432, + "learning_rate": 4.746729745448108e-06, + "loss": 0.5264, + "step": 11880 + }, + { + "epoch": 0.8734744890457286, + "grad_norm": 0.8262608051300049, + "learning_rate": 4.746687484929856e-06, + "loss": 0.5328, + "step": 11881 + }, + { + "epoch": 0.8735480076459344, + "grad_norm": 0.8195508122444153, + "learning_rate": 4.746645221074273e-06, + "loss": 0.5042, + "step": 11882 + }, + { + "epoch": 0.8736215262461403, + "grad_norm": 0.8027215003967285, + "learning_rate": 4.746602953881422e-06, + "loss": 0.4671, + "step": 11883 + }, + { + "epoch": 0.8736950448463461, + "grad_norm": 0.8944722414016724, + "learning_rate": 4.7465606833513656e-06, + "loss": 0.5523, + "step": 11884 + }, + { + "epoch": 0.873768563446552, + "grad_norm": 0.8231927156448364, + "learning_rate": 4.746518409484166e-06, + "loss": 0.5106, + "step": 11885 + }, + { + "epoch": 0.8738420820467578, + "grad_norm": 0.7970569133758545, + "learning_rate": 4.746476132279886e-06, + "loss": 0.4744, + "step": 11886 + }, + { + "epoch": 0.8739156006469637, + "grad_norm": 0.7934139966964722, + "learning_rate": 4.746433851738589e-06, + "loss": 0.52, + "step": 11887 + }, + { + "epoch": 0.8739891192471695, + "grad_norm": 0.8489076495170593, + "learning_rate": 4.746391567860338e-06, + "loss": 0.559, + "step": 11888 + }, + { + "epoch": 0.8740626378473754, + "grad_norm": 0.8423686623573303, + "learning_rate": 4.746349280645195e-06, + "loss": 0.5258, + "step": 11889 + }, + { + "epoch": 0.8741361564475812, + "grad_norm": 0.8066337704658508, + "learning_rate": 4.746306990093223e-06, + "loss": 0.5233, + "step": 11890 + }, + { + "epoch": 0.8742096750477871, + "grad_norm": 0.8884830474853516, + "learning_rate": 4.746264696204485e-06, + "loss": 0.55, + "step": 11891 + }, + { + "epoch": 0.8742831936479929, + "grad_norm": 0.8280713558197021, + "learning_rate": 4.746222398979044e-06, + "loss": 0.55, + "step": 11892 + }, + { + "epoch": 0.8743567122481988, + "grad_norm": 0.8880846500396729, + "learning_rate": 4.746180098416963e-06, + "loss": 0.5837, + "step": 11893 + }, + { + "epoch": 0.8744302308484047, + "grad_norm": 0.7972379922866821, + "learning_rate": 4.746137794518303e-06, + "loss": 0.4994, + "step": 11894 + }, + { + "epoch": 0.8745037494486105, + "grad_norm": 0.8117570877075195, + "learning_rate": 4.74609548728313e-06, + "loss": 0.5634, + "step": 11895 + }, + { + "epoch": 0.8745772680488163, + "grad_norm": 0.8371824026107788, + "learning_rate": 4.746053176711504e-06, + "loss": 0.4866, + "step": 11896 + }, + { + "epoch": 0.8746507866490222, + "grad_norm": 0.8589176535606384, + "learning_rate": 4.746010862803489e-06, + "loss": 0.5248, + "step": 11897 + }, + { + "epoch": 0.8747243052492281, + "grad_norm": 0.8106263279914856, + "learning_rate": 4.745968545559148e-06, + "loss": 0.5453, + "step": 11898 + }, + { + "epoch": 0.8747978238494339, + "grad_norm": 0.8016092777252197, + "learning_rate": 4.745926224978544e-06, + "loss": 0.5183, + "step": 11899 + }, + { + "epoch": 0.8748713424496397, + "grad_norm": 0.7483440637588501, + "learning_rate": 4.74588390106174e-06, + "loss": 0.4424, + "step": 11900 + }, + { + "epoch": 0.8749448610498456, + "grad_norm": 0.821792721748352, + "learning_rate": 4.745841573808797e-06, + "loss": 0.541, + "step": 11901 + }, + { + "epoch": 0.8750183796500515, + "grad_norm": 0.859119176864624, + "learning_rate": 4.745799243219781e-06, + "loss": 0.5479, + "step": 11902 + }, + { + "epoch": 0.8750918982502573, + "grad_norm": 0.7809334397315979, + "learning_rate": 4.745756909294752e-06, + "loss": 0.5249, + "step": 11903 + }, + { + "epoch": 0.8751654168504631, + "grad_norm": 0.8283652663230896, + "learning_rate": 4.745714572033775e-06, + "loss": 0.5336, + "step": 11904 + }, + { + "epoch": 0.875238935450669, + "grad_norm": 0.8319363594055176, + "learning_rate": 4.745672231436911e-06, + "loss": 0.5593, + "step": 11905 + }, + { + "epoch": 0.8753124540508749, + "grad_norm": 0.7797298431396484, + "learning_rate": 4.745629887504225e-06, + "loss": 0.4926, + "step": 11906 + }, + { + "epoch": 0.8753859726510808, + "grad_norm": 0.7877969145774841, + "learning_rate": 4.745587540235778e-06, + "loss": 0.5158, + "step": 11907 + }, + { + "epoch": 0.8754594912512865, + "grad_norm": 0.8315857648849487, + "learning_rate": 4.745545189631634e-06, + "loss": 0.5135, + "step": 11908 + }, + { + "epoch": 0.8755330098514924, + "grad_norm": 0.788312554359436, + "learning_rate": 4.745502835691856e-06, + "loss": 0.5487, + "step": 11909 + }, + { + "epoch": 0.8756065284516983, + "grad_norm": 0.8324661254882812, + "learning_rate": 4.7454604784165044e-06, + "loss": 0.4711, + "step": 11910 + }, + { + "epoch": 0.8756800470519042, + "grad_norm": 0.8202944993972778, + "learning_rate": 4.745418117805646e-06, + "loss": 0.5627, + "step": 11911 + }, + { + "epoch": 0.8757535656521099, + "grad_norm": 0.771819531917572, + "learning_rate": 4.745375753859342e-06, + "loss": 0.4431, + "step": 11912 + }, + { + "epoch": 0.8758270842523158, + "grad_norm": 0.8329254984855652, + "learning_rate": 4.745333386577655e-06, + "loss": 0.541, + "step": 11913 + }, + { + "epoch": 0.8759006028525217, + "grad_norm": 0.7963077425956726, + "learning_rate": 4.745291015960648e-06, + "loss": 0.5344, + "step": 11914 + }, + { + "epoch": 0.8759741214527276, + "grad_norm": 0.8148742318153381, + "learning_rate": 4.745248642008384e-06, + "loss": 0.5186, + "step": 11915 + }, + { + "epoch": 0.8760476400529333, + "grad_norm": 0.8589611053466797, + "learning_rate": 4.7452062647209264e-06, + "loss": 0.5214, + "step": 11916 + }, + { + "epoch": 0.8761211586531392, + "grad_norm": 0.8262477517127991, + "learning_rate": 4.745163884098339e-06, + "loss": 0.5702, + "step": 11917 + }, + { + "epoch": 0.8761946772533451, + "grad_norm": 0.7869393229484558, + "learning_rate": 4.745121500140681e-06, + "loss": 0.5444, + "step": 11918 + }, + { + "epoch": 0.876268195853551, + "grad_norm": 0.817826509475708, + "learning_rate": 4.74507911284802e-06, + "loss": 0.5363, + "step": 11919 + }, + { + "epoch": 0.8763417144537567, + "grad_norm": 0.8114684820175171, + "learning_rate": 4.745036722220416e-06, + "loss": 0.5577, + "step": 11920 + }, + { + "epoch": 0.8764152330539626, + "grad_norm": 0.855089545249939, + "learning_rate": 4.7449943282579335e-06, + "loss": 0.5303, + "step": 11921 + }, + { + "epoch": 0.8764887516541685, + "grad_norm": 0.771272599697113, + "learning_rate": 4.744951930960635e-06, + "loss": 0.5254, + "step": 11922 + }, + { + "epoch": 0.8765622702543744, + "grad_norm": 0.8389034867286682, + "learning_rate": 4.744909530328582e-06, + "loss": 0.5357, + "step": 11923 + }, + { + "epoch": 0.8766357888545802, + "grad_norm": 0.8416759967803955, + "learning_rate": 4.74486712636184e-06, + "loss": 0.4802, + "step": 11924 + }, + { + "epoch": 0.876709307454786, + "grad_norm": 0.7944741249084473, + "learning_rate": 4.744824719060471e-06, + "loss": 0.5252, + "step": 11925 + }, + { + "epoch": 0.8767828260549919, + "grad_norm": 0.8373250961303711, + "learning_rate": 4.744782308424537e-06, + "loss": 0.5227, + "step": 11926 + }, + { + "epoch": 0.8768563446551978, + "grad_norm": 0.8621875047683716, + "learning_rate": 4.7447398944541026e-06, + "loss": 0.5512, + "step": 11927 + }, + { + "epoch": 0.8769298632554036, + "grad_norm": 0.7943702936172485, + "learning_rate": 4.744697477149229e-06, + "loss": 0.5121, + "step": 11928 + }, + { + "epoch": 0.8770033818556094, + "grad_norm": 0.8175990581512451, + "learning_rate": 4.74465505650998e-06, + "loss": 0.5204, + "step": 11929 + }, + { + "epoch": 0.8770769004558153, + "grad_norm": 0.7681411504745483, + "learning_rate": 4.74461263253642e-06, + "loss": 0.4797, + "step": 11930 + }, + { + "epoch": 0.8771504190560212, + "grad_norm": 0.793774425983429, + "learning_rate": 4.7445702052286105e-06, + "loss": 0.5136, + "step": 11931 + }, + { + "epoch": 0.877223937656227, + "grad_norm": 0.8842911720275879, + "learning_rate": 4.744527774586615e-06, + "loss": 0.6, + "step": 11932 + }, + { + "epoch": 0.8772974562564329, + "grad_norm": 0.8591259121894836, + "learning_rate": 4.744485340610496e-06, + "loss": 0.5415, + "step": 11933 + }, + { + "epoch": 0.8773709748566387, + "grad_norm": 0.8106486201286316, + "learning_rate": 4.744442903300317e-06, + "loss": 0.5155, + "step": 11934 + }, + { + "epoch": 0.8774444934568446, + "grad_norm": 0.8172485828399658, + "learning_rate": 4.744400462656141e-06, + "loss": 0.5211, + "step": 11935 + }, + { + "epoch": 0.8775180120570504, + "grad_norm": 0.8204042911529541, + "learning_rate": 4.744358018678031e-06, + "loss": 0.5699, + "step": 11936 + }, + { + "epoch": 0.8775915306572563, + "grad_norm": 0.8208957314491272, + "learning_rate": 4.744315571366051e-06, + "loss": 0.5183, + "step": 11937 + }, + { + "epoch": 0.8776650492574621, + "grad_norm": 0.8795296549797058, + "learning_rate": 4.744273120720261e-06, + "loss": 0.552, + "step": 11938 + }, + { + "epoch": 0.877738567857668, + "grad_norm": 0.8095617294311523, + "learning_rate": 4.744230666740728e-06, + "loss": 0.552, + "step": 11939 + }, + { + "epoch": 0.8778120864578739, + "grad_norm": 0.8324663043022156, + "learning_rate": 4.744188209427513e-06, + "loss": 0.5783, + "step": 11940 + }, + { + "epoch": 0.8778856050580797, + "grad_norm": 0.7906986474990845, + "learning_rate": 4.7441457487806785e-06, + "loss": 0.5154, + "step": 11941 + }, + { + "epoch": 0.8779591236582855, + "grad_norm": 0.811707615852356, + "learning_rate": 4.744103284800289e-06, + "loss": 0.5229, + "step": 11942 + }, + { + "epoch": 0.8780326422584914, + "grad_norm": 0.8348013162612915, + "learning_rate": 4.744060817486407e-06, + "loss": 0.506, + "step": 11943 + }, + { + "epoch": 0.8781061608586973, + "grad_norm": 0.8091935515403748, + "learning_rate": 4.744018346839095e-06, + "loss": 0.5375, + "step": 11944 + }, + { + "epoch": 0.8781796794589031, + "grad_norm": 0.8018460273742676, + "learning_rate": 4.743975872858417e-06, + "loss": 0.5066, + "step": 11945 + }, + { + "epoch": 0.878253198059109, + "grad_norm": 0.9196169376373291, + "learning_rate": 4.743933395544435e-06, + "loss": 0.559, + "step": 11946 + }, + { + "epoch": 0.8783267166593148, + "grad_norm": 0.8278367519378662, + "learning_rate": 4.743890914897214e-06, + "loss": 0.5133, + "step": 11947 + }, + { + "epoch": 0.8784002352595207, + "grad_norm": 0.8123161196708679, + "learning_rate": 4.743848430916816e-06, + "loss": 0.5015, + "step": 11948 + }, + { + "epoch": 0.8784737538597265, + "grad_norm": 0.7826653122901917, + "learning_rate": 4.743805943603303e-06, + "loss": 0.5284, + "step": 11949 + }, + { + "epoch": 0.8785472724599324, + "grad_norm": 0.8256674408912659, + "learning_rate": 4.743763452956739e-06, + "loss": 0.54, + "step": 11950 + }, + { + "epoch": 0.8786207910601382, + "grad_norm": 0.8556531667709351, + "learning_rate": 4.743720958977188e-06, + "loss": 0.5398, + "step": 11951 + }, + { + "epoch": 0.8786943096603441, + "grad_norm": 0.8387752771377563, + "learning_rate": 4.743678461664712e-06, + "loss": 0.5253, + "step": 11952 + }, + { + "epoch": 0.8787678282605499, + "grad_norm": 0.8431305289268494, + "learning_rate": 4.743635961019375e-06, + "loss": 0.5539, + "step": 11953 + }, + { + "epoch": 0.8788413468607558, + "grad_norm": 0.9084832072257996, + "learning_rate": 4.7435934570412396e-06, + "loss": 0.5777, + "step": 11954 + }, + { + "epoch": 0.8789148654609616, + "grad_norm": 0.8767872452735901, + "learning_rate": 4.743550949730368e-06, + "loss": 0.6303, + "step": 11955 + }, + { + "epoch": 0.8789883840611675, + "grad_norm": 0.8323090672492981, + "learning_rate": 4.743508439086826e-06, + "loss": 0.5161, + "step": 11956 + }, + { + "epoch": 0.8790619026613733, + "grad_norm": 0.8660287857055664, + "learning_rate": 4.743465925110674e-06, + "loss": 0.5566, + "step": 11957 + }, + { + "epoch": 0.8791354212615792, + "grad_norm": 0.8588464856147766, + "learning_rate": 4.743423407801976e-06, + "loss": 0.5199, + "step": 11958 + }, + { + "epoch": 0.879208939861785, + "grad_norm": 0.828197181224823, + "learning_rate": 4.743380887160796e-06, + "loss": 0.5662, + "step": 11959 + }, + { + "epoch": 0.8792824584619909, + "grad_norm": 0.8048021197319031, + "learning_rate": 4.743338363187196e-06, + "loss": 0.5137, + "step": 11960 + }, + { + "epoch": 0.8793559770621967, + "grad_norm": 0.8218682408332825, + "learning_rate": 4.74329583588124e-06, + "loss": 0.5493, + "step": 11961 + }, + { + "epoch": 0.8794294956624026, + "grad_norm": 0.8331965804100037, + "learning_rate": 4.743253305242991e-06, + "loss": 0.5701, + "step": 11962 + }, + { + "epoch": 0.8795030142626085, + "grad_norm": 0.8005631566047668, + "learning_rate": 4.743210771272512e-06, + "loss": 0.5071, + "step": 11963 + }, + { + "epoch": 0.8795765328628143, + "grad_norm": 0.8418024778366089, + "learning_rate": 4.743168233969866e-06, + "loss": 0.5447, + "step": 11964 + }, + { + "epoch": 0.8796500514630201, + "grad_norm": 0.7995884418487549, + "learning_rate": 4.743125693335116e-06, + "loss": 0.4913, + "step": 11965 + }, + { + "epoch": 0.879723570063226, + "grad_norm": 0.8148699998855591, + "learning_rate": 4.743083149368326e-06, + "loss": 0.5178, + "step": 11966 + }, + { + "epoch": 0.8797970886634319, + "grad_norm": 0.8744255304336548, + "learning_rate": 4.743040602069559e-06, + "loss": 0.5612, + "step": 11967 + }, + { + "epoch": 0.8798706072636377, + "grad_norm": 0.814914882183075, + "learning_rate": 4.742998051438878e-06, + "loss": 0.512, + "step": 11968 + }, + { + "epoch": 0.8799441258638435, + "grad_norm": 0.808417797088623, + "learning_rate": 4.742955497476346e-06, + "loss": 0.5372, + "step": 11969 + }, + { + "epoch": 0.8800176444640494, + "grad_norm": 0.8433361649513245, + "learning_rate": 4.742912940182026e-06, + "loss": 0.524, + "step": 11970 + }, + { + "epoch": 0.8800911630642553, + "grad_norm": 0.8244028091430664, + "learning_rate": 4.7428703795559824e-06, + "loss": 0.5807, + "step": 11971 + }, + { + "epoch": 0.8801646816644612, + "grad_norm": 0.790644109249115, + "learning_rate": 4.742827815598278e-06, + "loss": 0.5012, + "step": 11972 + }, + { + "epoch": 0.8802382002646669, + "grad_norm": 0.8166815042495728, + "learning_rate": 4.742785248308975e-06, + "loss": 0.4825, + "step": 11973 + }, + { + "epoch": 0.8803117188648728, + "grad_norm": 0.821189820766449, + "learning_rate": 4.742742677688138e-06, + "loss": 0.5453, + "step": 11974 + }, + { + "epoch": 0.8803852374650787, + "grad_norm": 0.8029663562774658, + "learning_rate": 4.742700103735829e-06, + "loss": 0.4807, + "step": 11975 + }, + { + "epoch": 0.8804587560652846, + "grad_norm": 0.8350486755371094, + "learning_rate": 4.742657526452112e-06, + "loss": 0.5293, + "step": 11976 + }, + { + "epoch": 0.8805322746654903, + "grad_norm": 0.8365237712860107, + "learning_rate": 4.742614945837049e-06, + "loss": 0.5685, + "step": 11977 + }, + { + "epoch": 0.8806057932656962, + "grad_norm": 0.8316543698310852, + "learning_rate": 4.742572361890706e-06, + "loss": 0.5225, + "step": 11978 + }, + { + "epoch": 0.8806793118659021, + "grad_norm": 0.8188601732254028, + "learning_rate": 4.742529774613144e-06, + "loss": 0.5592, + "step": 11979 + }, + { + "epoch": 0.880752830466108, + "grad_norm": 0.8385722041130066, + "learning_rate": 4.742487184004427e-06, + "loss": 0.4914, + "step": 11980 + }, + { + "epoch": 0.8808263490663137, + "grad_norm": 0.8410801291465759, + "learning_rate": 4.7424445900646175e-06, + "loss": 0.5097, + "step": 11981 + }, + { + "epoch": 0.8808998676665196, + "grad_norm": 0.8233999013900757, + "learning_rate": 4.742401992793779e-06, + "loss": 0.4938, + "step": 11982 + }, + { + "epoch": 0.8809733862667255, + "grad_norm": 0.7984629273414612, + "learning_rate": 4.7423593921919756e-06, + "loss": 0.5285, + "step": 11983 + }, + { + "epoch": 0.8810469048669314, + "grad_norm": 0.8813051581382751, + "learning_rate": 4.74231678825927e-06, + "loss": 0.5233, + "step": 11984 + }, + { + "epoch": 0.8811204234671371, + "grad_norm": 0.8496869802474976, + "learning_rate": 4.7422741809957264e-06, + "loss": 0.5514, + "step": 11985 + }, + { + "epoch": 0.881193942067343, + "grad_norm": 0.8876810669898987, + "learning_rate": 4.7422315704014064e-06, + "loss": 0.5545, + "step": 11986 + }, + { + "epoch": 0.8812674606675489, + "grad_norm": 0.8433221578598022, + "learning_rate": 4.742188956476374e-06, + "loss": 0.5638, + "step": 11987 + }, + { + "epoch": 0.8813409792677548, + "grad_norm": 0.8434222340583801, + "learning_rate": 4.742146339220693e-06, + "loss": 0.5902, + "step": 11988 + }, + { + "epoch": 0.8814144978679606, + "grad_norm": 0.8718050122261047, + "learning_rate": 4.742103718634428e-06, + "loss": 0.5076, + "step": 11989 + }, + { + "epoch": 0.8814880164681664, + "grad_norm": 0.828983724117279, + "learning_rate": 4.742061094717638e-06, + "loss": 0.5267, + "step": 11990 + }, + { + "epoch": 0.8815615350683723, + "grad_norm": 0.8408948183059692, + "learning_rate": 4.74201846747039e-06, + "loss": 0.496, + "step": 11991 + }, + { + "epoch": 0.8816350536685782, + "grad_norm": 0.8714068531990051, + "learning_rate": 4.741975836892747e-06, + "loss": 0.5283, + "step": 11992 + }, + { + "epoch": 0.881708572268784, + "grad_norm": 0.843445360660553, + "learning_rate": 4.741933202984772e-06, + "loss": 0.4976, + "step": 11993 + }, + { + "epoch": 0.8817820908689898, + "grad_norm": 0.8178297877311707, + "learning_rate": 4.741890565746527e-06, + "loss": 0.5199, + "step": 11994 + }, + { + "epoch": 0.8818556094691957, + "grad_norm": 0.8236730694770813, + "learning_rate": 4.741847925178076e-06, + "loss": 0.4851, + "step": 11995 + }, + { + "epoch": 0.8819291280694016, + "grad_norm": 0.8909621238708496, + "learning_rate": 4.741805281279484e-06, + "loss": 0.5564, + "step": 11996 + }, + { + "epoch": 0.8820026466696074, + "grad_norm": 0.8121892809867859, + "learning_rate": 4.741762634050812e-06, + "loss": 0.5342, + "step": 11997 + }, + { + "epoch": 0.8820761652698133, + "grad_norm": 0.7998738884925842, + "learning_rate": 4.7417199834921245e-06, + "loss": 0.5053, + "step": 11998 + }, + { + "epoch": 0.8821496838700191, + "grad_norm": 0.830877423286438, + "learning_rate": 4.741677329603485e-06, + "loss": 0.5348, + "step": 11999 + }, + { + "epoch": 0.882223202470225, + "grad_norm": 0.810836911201477, + "learning_rate": 4.741634672384956e-06, + "loss": 0.5486, + "step": 12000 + }, + { + "epoch": 0.8822967210704308, + "grad_norm": 0.8244132995605469, + "learning_rate": 4.741592011836602e-06, + "loss": 0.5512, + "step": 12001 + }, + { + "epoch": 0.8823702396706367, + "grad_norm": 0.887921154499054, + "learning_rate": 4.741549347958486e-06, + "loss": 0.5315, + "step": 12002 + }, + { + "epoch": 0.8824437582708425, + "grad_norm": 0.8628934621810913, + "learning_rate": 4.74150668075067e-06, + "loss": 0.5483, + "step": 12003 + }, + { + "epoch": 0.8825172768710484, + "grad_norm": 0.7978149652481079, + "learning_rate": 4.74146401021322e-06, + "loss": 0.5587, + "step": 12004 + }, + { + "epoch": 0.8825907954712542, + "grad_norm": 0.8242053389549255, + "learning_rate": 4.741421336346197e-06, + "loss": 0.5381, + "step": 12005 + }, + { + "epoch": 0.8826643140714601, + "grad_norm": 0.7978452444076538, + "learning_rate": 4.7413786591496655e-06, + "loss": 0.4953, + "step": 12006 + }, + { + "epoch": 0.882737832671666, + "grad_norm": 0.7801178693771362, + "learning_rate": 4.7413359786236895e-06, + "loss": 0.4955, + "step": 12007 + }, + { + "epoch": 0.8828113512718718, + "grad_norm": 0.7972913980484009, + "learning_rate": 4.741293294768331e-06, + "loss": 0.5342, + "step": 12008 + }, + { + "epoch": 0.8828848698720776, + "grad_norm": 0.8266298174858093, + "learning_rate": 4.741250607583654e-06, + "loss": 0.5498, + "step": 12009 + }, + { + "epoch": 0.8829583884722835, + "grad_norm": 0.8423511385917664, + "learning_rate": 4.741207917069722e-06, + "loss": 0.5209, + "step": 12010 + }, + { + "epoch": 0.8830319070724894, + "grad_norm": 0.7955204248428345, + "learning_rate": 4.7411652232265984e-06, + "loss": 0.5303, + "step": 12011 + }, + { + "epoch": 0.8831054256726952, + "grad_norm": 0.8108898997306824, + "learning_rate": 4.741122526054347e-06, + "loss": 0.5217, + "step": 12012 + }, + { + "epoch": 0.883178944272901, + "grad_norm": 0.8169758915901184, + "learning_rate": 4.7410798255530306e-06, + "loss": 0.5344, + "step": 12013 + }, + { + "epoch": 0.8832524628731069, + "grad_norm": 0.7772241234779358, + "learning_rate": 4.7410371217227125e-06, + "loss": 0.5063, + "step": 12014 + }, + { + "epoch": 0.8833259814733128, + "grad_norm": 0.810430109500885, + "learning_rate": 4.740994414563457e-06, + "loss": 0.5121, + "step": 12015 + }, + { + "epoch": 0.8833995000735186, + "grad_norm": 0.7589415907859802, + "learning_rate": 4.740951704075326e-06, + "loss": 0.5074, + "step": 12016 + }, + { + "epoch": 0.8834730186737244, + "grad_norm": 0.8154648542404175, + "learning_rate": 4.740908990258385e-06, + "loss": 0.5294, + "step": 12017 + }, + { + "epoch": 0.8835465372739303, + "grad_norm": 0.8511207103729248, + "learning_rate": 4.740866273112696e-06, + "loss": 0.5378, + "step": 12018 + }, + { + "epoch": 0.8836200558741362, + "grad_norm": 0.8347947001457214, + "learning_rate": 4.740823552638323e-06, + "loss": 0.5228, + "step": 12019 + }, + { + "epoch": 0.883693574474342, + "grad_norm": 0.8980000019073486, + "learning_rate": 4.7407808288353286e-06, + "loss": 0.557, + "step": 12020 + }, + { + "epoch": 0.8837670930745478, + "grad_norm": 0.8088234663009644, + "learning_rate": 4.740738101703778e-06, + "loss": 0.4954, + "step": 12021 + }, + { + "epoch": 0.8838406116747537, + "grad_norm": 0.8362104296684265, + "learning_rate": 4.740695371243733e-06, + "loss": 0.5479, + "step": 12022 + }, + { + "epoch": 0.8839141302749596, + "grad_norm": 0.8295273184776306, + "learning_rate": 4.740652637455258e-06, + "loss": 0.5352, + "step": 12023 + }, + { + "epoch": 0.8839876488751655, + "grad_norm": 0.9257349967956543, + "learning_rate": 4.740609900338417e-06, + "loss": 0.6432, + "step": 12024 + }, + { + "epoch": 0.8840611674753712, + "grad_norm": 0.8857588171958923, + "learning_rate": 4.740567159893272e-06, + "loss": 0.4994, + "step": 12025 + }, + { + "epoch": 0.8841346860755771, + "grad_norm": 0.749029815196991, + "learning_rate": 4.740524416119886e-06, + "loss": 0.4935, + "step": 12026 + }, + { + "epoch": 0.884208204675783, + "grad_norm": 0.8001925945281982, + "learning_rate": 4.740481669018325e-06, + "loss": 0.5407, + "step": 12027 + }, + { + "epoch": 0.8842817232759889, + "grad_norm": 0.8821384310722351, + "learning_rate": 4.74043891858865e-06, + "loss": 0.5372, + "step": 12028 + }, + { + "epoch": 0.8843552418761946, + "grad_norm": 0.8588913679122925, + "learning_rate": 4.740396164830927e-06, + "loss": 0.4933, + "step": 12029 + }, + { + "epoch": 0.8844287604764005, + "grad_norm": 0.8586701154708862, + "learning_rate": 4.7403534077452175e-06, + "loss": 0.5816, + "step": 12030 + }, + { + "epoch": 0.8845022790766064, + "grad_norm": 0.898845374584198, + "learning_rate": 4.740310647331587e-06, + "loss": 0.5625, + "step": 12031 + }, + { + "epoch": 0.8845757976768123, + "grad_norm": 0.7942935228347778, + "learning_rate": 4.7402678835900955e-06, + "loss": 0.4844, + "step": 12032 + }, + { + "epoch": 0.884649316277018, + "grad_norm": 0.8021897673606873, + "learning_rate": 4.74022511652081e-06, + "loss": 0.4761, + "step": 12033 + }, + { + "epoch": 0.8847228348772239, + "grad_norm": 0.8192564845085144, + "learning_rate": 4.740182346123793e-06, + "loss": 0.5255, + "step": 12034 + }, + { + "epoch": 0.8847963534774298, + "grad_norm": 0.7940366864204407, + "learning_rate": 4.740139572399107e-06, + "loss": 0.5098, + "step": 12035 + }, + { + "epoch": 0.8848698720776357, + "grad_norm": 0.8546966910362244, + "learning_rate": 4.740096795346817e-06, + "loss": 0.5273, + "step": 12036 + }, + { + "epoch": 0.8849433906778414, + "grad_norm": 0.8688187003135681, + "learning_rate": 4.740054014966985e-06, + "loss": 0.6085, + "step": 12037 + }, + { + "epoch": 0.8850169092780473, + "grad_norm": 0.8866921067237854, + "learning_rate": 4.740011231259677e-06, + "loss": 0.5464, + "step": 12038 + }, + { + "epoch": 0.8850904278782532, + "grad_norm": 0.8628052473068237, + "learning_rate": 4.739968444224953e-06, + "loss": 0.5779, + "step": 12039 + }, + { + "epoch": 0.8851639464784591, + "grad_norm": 0.801515519618988, + "learning_rate": 4.73992565386288e-06, + "loss": 0.4876, + "step": 12040 + }, + { + "epoch": 0.8852374650786649, + "grad_norm": 0.8218737244606018, + "learning_rate": 4.739882860173519e-06, + "loss": 0.5062, + "step": 12041 + }, + { + "epoch": 0.8853109836788707, + "grad_norm": 0.8008253574371338, + "learning_rate": 4.739840063156935e-06, + "loss": 0.548, + "step": 12042 + }, + { + "epoch": 0.8853845022790766, + "grad_norm": 0.7857367992401123, + "learning_rate": 4.739797262813192e-06, + "loss": 0.5053, + "step": 12043 + }, + { + "epoch": 0.8854580208792825, + "grad_norm": 0.824010968208313, + "learning_rate": 4.739754459142352e-06, + "loss": 0.5258, + "step": 12044 + }, + { + "epoch": 0.8855315394794883, + "grad_norm": 0.8326711654663086, + "learning_rate": 4.739711652144479e-06, + "loss": 0.5301, + "step": 12045 + }, + { + "epoch": 0.8856050580796941, + "grad_norm": 0.7806745171546936, + "learning_rate": 4.739668841819639e-06, + "loss": 0.5036, + "step": 12046 + }, + { + "epoch": 0.8856785766799, + "grad_norm": 0.8208045363426208, + "learning_rate": 4.7396260281678915e-06, + "loss": 0.5567, + "step": 12047 + }, + { + "epoch": 0.8857520952801059, + "grad_norm": 0.8512855768203735, + "learning_rate": 4.739583211189304e-06, + "loss": 0.5645, + "step": 12048 + }, + { + "epoch": 0.8858256138803117, + "grad_norm": 0.7796458601951599, + "learning_rate": 4.739540390883937e-06, + "loss": 0.4929, + "step": 12049 + }, + { + "epoch": 0.8858991324805175, + "grad_norm": 0.7908848524093628, + "learning_rate": 4.739497567251855e-06, + "loss": 0.5084, + "step": 12050 + }, + { + "epoch": 0.8859726510807234, + "grad_norm": 0.8718591332435608, + "learning_rate": 4.739454740293123e-06, + "loss": 0.5004, + "step": 12051 + }, + { + "epoch": 0.8860461696809293, + "grad_norm": 0.8134229183197021, + "learning_rate": 4.739411910007803e-06, + "loss": 0.5658, + "step": 12052 + }, + { + "epoch": 0.8861196882811351, + "grad_norm": 0.8051195740699768, + "learning_rate": 4.73936907639596e-06, + "loss": 0.5213, + "step": 12053 + }, + { + "epoch": 0.886193206881341, + "grad_norm": 0.8085936903953552, + "learning_rate": 4.739326239457656e-06, + "loss": 0.5246, + "step": 12054 + }, + { + "epoch": 0.8862667254815468, + "grad_norm": 0.8090505003929138, + "learning_rate": 4.739283399192955e-06, + "loss": 0.5557, + "step": 12055 + }, + { + "epoch": 0.8863402440817527, + "grad_norm": 0.8632510900497437, + "learning_rate": 4.739240555601923e-06, + "loss": 0.5437, + "step": 12056 + }, + { + "epoch": 0.8864137626819585, + "grad_norm": 0.8420571684837341, + "learning_rate": 4.739197708684621e-06, + "loss": 0.5217, + "step": 12057 + }, + { + "epoch": 0.8864872812821644, + "grad_norm": 0.8541218638420105, + "learning_rate": 4.7391548584411125e-06, + "loss": 0.5486, + "step": 12058 + }, + { + "epoch": 0.8865607998823702, + "grad_norm": 0.8297610282897949, + "learning_rate": 4.739112004871463e-06, + "loss": 0.5375, + "step": 12059 + }, + { + "epoch": 0.8866343184825761, + "grad_norm": 0.8833162784576416, + "learning_rate": 4.739069147975735e-06, + "loss": 0.5569, + "step": 12060 + }, + { + "epoch": 0.8867078370827819, + "grad_norm": 0.851431131362915, + "learning_rate": 4.7390262877539915e-06, + "loss": 0.5145, + "step": 12061 + }, + { + "epoch": 0.8867813556829878, + "grad_norm": 0.8234438896179199, + "learning_rate": 4.738983424206298e-06, + "loss": 0.5342, + "step": 12062 + }, + { + "epoch": 0.8868548742831937, + "grad_norm": 0.8452456593513489, + "learning_rate": 4.738940557332717e-06, + "loss": 0.5339, + "step": 12063 + }, + { + "epoch": 0.8869283928833995, + "grad_norm": 0.8820220828056335, + "learning_rate": 4.738897687133313e-06, + "loss": 0.5599, + "step": 12064 + }, + { + "epoch": 0.8870019114836053, + "grad_norm": 0.7915318012237549, + "learning_rate": 4.738854813608148e-06, + "loss": 0.5584, + "step": 12065 + }, + { + "epoch": 0.8870754300838112, + "grad_norm": 0.8437394499778748, + "learning_rate": 4.7388119367572885e-06, + "loss": 0.554, + "step": 12066 + }, + { + "epoch": 0.8871489486840171, + "grad_norm": 0.8435460925102234, + "learning_rate": 4.738769056580795e-06, + "loss": 0.54, + "step": 12067 + }, + { + "epoch": 0.8872224672842229, + "grad_norm": 0.8077974915504456, + "learning_rate": 4.7387261730787326e-06, + "loss": 0.4641, + "step": 12068 + }, + { + "epoch": 0.8872959858844287, + "grad_norm": 0.7724378705024719, + "learning_rate": 4.738683286251165e-06, + "loss": 0.5277, + "step": 12069 + }, + { + "epoch": 0.8873695044846346, + "grad_norm": 0.824586033821106, + "learning_rate": 4.738640396098157e-06, + "loss": 0.5159, + "step": 12070 + }, + { + "epoch": 0.8874430230848405, + "grad_norm": 0.792141854763031, + "learning_rate": 4.738597502619771e-06, + "loss": 0.5012, + "step": 12071 + }, + { + "epoch": 0.8875165416850463, + "grad_norm": 0.8535276055335999, + "learning_rate": 4.73855460581607e-06, + "loss": 0.5147, + "step": 12072 + }, + { + "epoch": 0.8875900602852521, + "grad_norm": 0.8924418687820435, + "learning_rate": 4.73851170568712e-06, + "loss": 0.5825, + "step": 12073 + }, + { + "epoch": 0.887663578885458, + "grad_norm": 0.8947517275810242, + "learning_rate": 4.738468802232983e-06, + "loss": 0.5522, + "step": 12074 + }, + { + "epoch": 0.8877370974856639, + "grad_norm": 0.7890477180480957, + "learning_rate": 4.738425895453722e-06, + "loss": 0.514, + "step": 12075 + }, + { + "epoch": 0.8878106160858698, + "grad_norm": 0.8041014075279236, + "learning_rate": 4.7383829853494035e-06, + "loss": 0.4974, + "step": 12076 + }, + { + "epoch": 0.8878841346860756, + "grad_norm": 0.8868292570114136, + "learning_rate": 4.738340071920089e-06, + "loss": 0.5466, + "step": 12077 + }, + { + "epoch": 0.8879576532862814, + "grad_norm": 0.8323973417282104, + "learning_rate": 4.738297155165843e-06, + "loss": 0.5284, + "step": 12078 + }, + { + "epoch": 0.8880311718864873, + "grad_norm": 0.8163623809814453, + "learning_rate": 4.738254235086729e-06, + "loss": 0.5075, + "step": 12079 + }, + { + "epoch": 0.8881046904866932, + "grad_norm": 0.838019847869873, + "learning_rate": 4.738211311682811e-06, + "loss": 0.5341, + "step": 12080 + }, + { + "epoch": 0.888178209086899, + "grad_norm": 0.8196221590042114, + "learning_rate": 4.7381683849541524e-06, + "loss": 0.5435, + "step": 12081 + }, + { + "epoch": 0.8882517276871048, + "grad_norm": 0.7780373096466064, + "learning_rate": 4.738125454900818e-06, + "loss": 0.5554, + "step": 12082 + }, + { + "epoch": 0.8883252462873107, + "grad_norm": 0.822628915309906, + "learning_rate": 4.73808252152287e-06, + "loss": 0.55, + "step": 12083 + }, + { + "epoch": 0.8883987648875166, + "grad_norm": 0.8654093742370605, + "learning_rate": 4.738039584820373e-06, + "loss": 0.4959, + "step": 12084 + }, + { + "epoch": 0.8884722834877224, + "grad_norm": 0.8506792783737183, + "learning_rate": 4.7379966447933915e-06, + "loss": 0.546, + "step": 12085 + }, + { + "epoch": 0.8885458020879282, + "grad_norm": 0.7860848307609558, + "learning_rate": 4.737953701441988e-06, + "loss": 0.5201, + "step": 12086 + }, + { + "epoch": 0.8886193206881341, + "grad_norm": 0.8758854866027832, + "learning_rate": 4.737910754766226e-06, + "loss": 0.5154, + "step": 12087 + }, + { + "epoch": 0.88869283928834, + "grad_norm": 0.8064980506896973, + "learning_rate": 4.7378678047661715e-06, + "loss": 0.5577, + "step": 12088 + }, + { + "epoch": 0.8887663578885459, + "grad_norm": 0.8096937537193298, + "learning_rate": 4.737824851441886e-06, + "loss": 0.5393, + "step": 12089 + }, + { + "epoch": 0.8888398764887516, + "grad_norm": 0.8318315148353577, + "learning_rate": 4.737781894793435e-06, + "loss": 0.5096, + "step": 12090 + }, + { + "epoch": 0.8889133950889575, + "grad_norm": 0.8326083421707153, + "learning_rate": 4.73773893482088e-06, + "loss": 0.563, + "step": 12091 + }, + { + "epoch": 0.8889869136891634, + "grad_norm": 0.8370645046234131, + "learning_rate": 4.737695971524288e-06, + "loss": 0.5488, + "step": 12092 + }, + { + "epoch": 0.8890604322893693, + "grad_norm": 0.8583906292915344, + "learning_rate": 4.73765300490372e-06, + "loss": 0.549, + "step": 12093 + }, + { + "epoch": 0.889133950889575, + "grad_norm": 0.82692551612854, + "learning_rate": 4.737610034959243e-06, + "loss": 0.5293, + "step": 12094 + }, + { + "epoch": 0.8892074694897809, + "grad_norm": 0.8163405060768127, + "learning_rate": 4.737567061690917e-06, + "loss": 0.5319, + "step": 12095 + }, + { + "epoch": 0.8892809880899868, + "grad_norm": 0.8403610587120056, + "learning_rate": 4.737524085098808e-06, + "loss": 0.5367, + "step": 12096 + }, + { + "epoch": 0.8893545066901927, + "grad_norm": 0.7854315638542175, + "learning_rate": 4.73748110518298e-06, + "loss": 0.4877, + "step": 12097 + }, + { + "epoch": 0.8894280252903984, + "grad_norm": 0.8584886193275452, + "learning_rate": 4.737438121943496e-06, + "loss": 0.5525, + "step": 12098 + }, + { + "epoch": 0.8895015438906043, + "grad_norm": 0.8533989787101746, + "learning_rate": 4.737395135380419e-06, + "loss": 0.5385, + "step": 12099 + }, + { + "epoch": 0.8895750624908102, + "grad_norm": 0.8090364336967468, + "learning_rate": 4.737352145493816e-06, + "loss": 0.5335, + "step": 12100 + }, + { + "epoch": 0.8896485810910161, + "grad_norm": 0.8076899647712708, + "learning_rate": 4.737309152283748e-06, + "loss": 0.5214, + "step": 12101 + }, + { + "epoch": 0.8897220996912218, + "grad_norm": 0.8298669457435608, + "learning_rate": 4.7372661557502794e-06, + "loss": 0.5235, + "step": 12102 + }, + { + "epoch": 0.8897956182914277, + "grad_norm": 0.8533656597137451, + "learning_rate": 4.737223155893475e-06, + "loss": 0.5812, + "step": 12103 + }, + { + "epoch": 0.8898691368916336, + "grad_norm": 0.8208181858062744, + "learning_rate": 4.737180152713398e-06, + "loss": 0.5387, + "step": 12104 + }, + { + "epoch": 0.8899426554918395, + "grad_norm": 0.8099736571311951, + "learning_rate": 4.737137146210112e-06, + "loss": 0.5253, + "step": 12105 + }, + { + "epoch": 0.8900161740920453, + "grad_norm": 0.9097428321838379, + "learning_rate": 4.737094136383682e-06, + "loss": 0.571, + "step": 12106 + }, + { + "epoch": 0.8900896926922511, + "grad_norm": 0.7899516224861145, + "learning_rate": 4.737051123234171e-06, + "loss": 0.5375, + "step": 12107 + }, + { + "epoch": 0.890163211292457, + "grad_norm": 0.8165074586868286, + "learning_rate": 4.737008106761643e-06, + "loss": 0.5422, + "step": 12108 + }, + { + "epoch": 0.8902367298926629, + "grad_norm": 0.8007169961929321, + "learning_rate": 4.736965086966162e-06, + "loss": 0.5233, + "step": 12109 + }, + { + "epoch": 0.8903102484928687, + "grad_norm": 0.8458649516105652, + "learning_rate": 4.7369220638477915e-06, + "loss": 0.4897, + "step": 12110 + }, + { + "epoch": 0.8903837670930745, + "grad_norm": 0.8056740760803223, + "learning_rate": 4.736879037406596e-06, + "loss": 0.4748, + "step": 12111 + }, + { + "epoch": 0.8904572856932804, + "grad_norm": 0.8321734666824341, + "learning_rate": 4.736836007642639e-06, + "loss": 0.5537, + "step": 12112 + }, + { + "epoch": 0.8905308042934863, + "grad_norm": 0.7661308646202087, + "learning_rate": 4.736792974555985e-06, + "loss": 0.5161, + "step": 12113 + }, + { + "epoch": 0.8906043228936921, + "grad_norm": 0.8173421621322632, + "learning_rate": 4.736749938146696e-06, + "loss": 0.533, + "step": 12114 + }, + { + "epoch": 0.890677841493898, + "grad_norm": 0.8258808255195618, + "learning_rate": 4.73670689841484e-06, + "loss": 0.5354, + "step": 12115 + }, + { + "epoch": 0.8907513600941038, + "grad_norm": 0.8400785326957703, + "learning_rate": 4.736663855360477e-06, + "loss": 0.5843, + "step": 12116 + }, + { + "epoch": 0.8908248786943097, + "grad_norm": 0.8392313122749329, + "learning_rate": 4.736620808983672e-06, + "loss": 0.502, + "step": 12117 + }, + { + "epoch": 0.8908983972945155, + "grad_norm": 0.810056746006012, + "learning_rate": 4.7365777592844896e-06, + "loss": 0.4682, + "step": 12118 + }, + { + "epoch": 0.8909719158947214, + "grad_norm": 0.8756639957427979, + "learning_rate": 4.736534706262993e-06, + "loss": 0.5324, + "step": 12119 + }, + { + "epoch": 0.8910454344949272, + "grad_norm": 0.855967104434967, + "learning_rate": 4.736491649919247e-06, + "loss": 0.5159, + "step": 12120 + }, + { + "epoch": 0.8911189530951331, + "grad_norm": 0.8641577959060669, + "learning_rate": 4.736448590253315e-06, + "loss": 0.5636, + "step": 12121 + }, + { + "epoch": 0.8911924716953389, + "grad_norm": 0.842892587184906, + "learning_rate": 4.736405527265262e-06, + "loss": 0.537, + "step": 12122 + }, + { + "epoch": 0.8912659902955448, + "grad_norm": 0.8060908317565918, + "learning_rate": 4.73636246095515e-06, + "loss": 0.5244, + "step": 12123 + }, + { + "epoch": 0.8913395088957506, + "grad_norm": 0.8513226509094238, + "learning_rate": 4.736319391323044e-06, + "loss": 0.5296, + "step": 12124 + }, + { + "epoch": 0.8914130274959565, + "grad_norm": 0.8558107614517212, + "learning_rate": 4.736276318369009e-06, + "loss": 0.545, + "step": 12125 + }, + { + "epoch": 0.8914865460961623, + "grad_norm": 0.8058462738990784, + "learning_rate": 4.7362332420931065e-06, + "loss": 0.4874, + "step": 12126 + }, + { + "epoch": 0.8915600646963682, + "grad_norm": 0.8077173233032227, + "learning_rate": 4.7361901624954035e-06, + "loss": 0.5455, + "step": 12127 + }, + { + "epoch": 0.891633583296574, + "grad_norm": 0.7996972799301147, + "learning_rate": 4.736147079575961e-06, + "loss": 0.4899, + "step": 12128 + }, + { + "epoch": 0.8917071018967799, + "grad_norm": 0.8366263508796692, + "learning_rate": 4.7361039933348455e-06, + "loss": 0.5138, + "step": 12129 + }, + { + "epoch": 0.8917806204969857, + "grad_norm": 0.8014113903045654, + "learning_rate": 4.736060903772119e-06, + "loss": 0.4936, + "step": 12130 + }, + { + "epoch": 0.8918541390971916, + "grad_norm": 0.8204776644706726, + "learning_rate": 4.736017810887847e-06, + "loss": 0.5321, + "step": 12131 + }, + { + "epoch": 0.8919276576973975, + "grad_norm": 0.873039960861206, + "learning_rate": 4.735974714682093e-06, + "loss": 0.5657, + "step": 12132 + }, + { + "epoch": 0.8920011762976033, + "grad_norm": 0.8004175424575806, + "learning_rate": 4.735931615154921e-06, + "loss": 0.5484, + "step": 12133 + }, + { + "epoch": 0.8920746948978091, + "grad_norm": 0.8957428932189941, + "learning_rate": 4.735888512306396e-06, + "loss": 0.5748, + "step": 12134 + }, + { + "epoch": 0.892148213498015, + "grad_norm": 0.8004286289215088, + "learning_rate": 4.73584540613658e-06, + "loss": 0.5089, + "step": 12135 + }, + { + "epoch": 0.8922217320982209, + "grad_norm": 0.8354991674423218, + "learning_rate": 4.7358022966455385e-06, + "loss": 0.5524, + "step": 12136 + }, + { + "epoch": 0.8922952506984267, + "grad_norm": 0.8298018574714661, + "learning_rate": 4.735759183833335e-06, + "loss": 0.5294, + "step": 12137 + }, + { + "epoch": 0.8923687692986325, + "grad_norm": 0.8466982245445251, + "learning_rate": 4.735716067700033e-06, + "loss": 0.5713, + "step": 12138 + }, + { + "epoch": 0.8924422878988384, + "grad_norm": 0.8824595808982849, + "learning_rate": 4.735672948245698e-06, + "loss": 0.548, + "step": 12139 + }, + { + "epoch": 0.8925158064990443, + "grad_norm": 0.7999670505523682, + "learning_rate": 4.735629825470392e-06, + "loss": 0.5448, + "step": 12140 + }, + { + "epoch": 0.8925893250992502, + "grad_norm": 0.8249204754829407, + "learning_rate": 4.735586699374182e-06, + "loss": 0.5092, + "step": 12141 + }, + { + "epoch": 0.8926628436994559, + "grad_norm": 0.8200790286064148, + "learning_rate": 4.735543569957129e-06, + "loss": 0.5816, + "step": 12142 + }, + { + "epoch": 0.8927363622996618, + "grad_norm": 0.8792136311531067, + "learning_rate": 4.735500437219301e-06, + "loss": 0.5552, + "step": 12143 + }, + { + "epoch": 0.8928098808998677, + "grad_norm": 0.8093899488449097, + "learning_rate": 4.7354573011607565e-06, + "loss": 0.4956, + "step": 12144 + }, + { + "epoch": 0.8928833995000736, + "grad_norm": 0.8312203884124756, + "learning_rate": 4.735414161781564e-06, + "loss": 0.5288, + "step": 12145 + }, + { + "epoch": 0.8929569181002793, + "grad_norm": 0.8845125436782837, + "learning_rate": 4.735371019081786e-06, + "loss": 0.5711, + "step": 12146 + }, + { + "epoch": 0.8930304367004852, + "grad_norm": 0.806247889995575, + "learning_rate": 4.735327873061486e-06, + "loss": 0.5022, + "step": 12147 + }, + { + "epoch": 0.8931039553006911, + "grad_norm": 0.8376035690307617, + "learning_rate": 4.73528472372073e-06, + "loss": 0.5611, + "step": 12148 + }, + { + "epoch": 0.893177473900897, + "grad_norm": 0.828980028629303, + "learning_rate": 4.7352415710595804e-06, + "loss": 0.5198, + "step": 12149 + }, + { + "epoch": 0.8932509925011027, + "grad_norm": 0.842812716960907, + "learning_rate": 4.735198415078101e-06, + "loss": 0.5408, + "step": 12150 + }, + { + "epoch": 0.8933245111013086, + "grad_norm": 0.8540251851081848, + "learning_rate": 4.735155255776358e-06, + "loss": 0.572, + "step": 12151 + }, + { + "epoch": 0.8933980297015145, + "grad_norm": 0.7926031351089478, + "learning_rate": 4.7351120931544144e-06, + "loss": 0.4975, + "step": 12152 + }, + { + "epoch": 0.8934715483017204, + "grad_norm": 0.8197346329689026, + "learning_rate": 4.735068927212334e-06, + "loss": 0.515, + "step": 12153 + }, + { + "epoch": 0.8935450669019261, + "grad_norm": 0.861781656742096, + "learning_rate": 4.73502575795018e-06, + "loss": 0.5663, + "step": 12154 + }, + { + "epoch": 0.893618585502132, + "grad_norm": 0.8454930782318115, + "learning_rate": 4.734982585368019e-06, + "loss": 0.5503, + "step": 12155 + }, + { + "epoch": 0.8936921041023379, + "grad_norm": 0.8031777143478394, + "learning_rate": 4.734939409465913e-06, + "loss": 0.5247, + "step": 12156 + }, + { + "epoch": 0.8937656227025438, + "grad_norm": 0.8211241960525513, + "learning_rate": 4.734896230243926e-06, + "loss": 0.5185, + "step": 12157 + }, + { + "epoch": 0.8938391413027496, + "grad_norm": 0.8264644742012024, + "learning_rate": 4.734853047702125e-06, + "loss": 0.5239, + "step": 12158 + }, + { + "epoch": 0.8939126599029554, + "grad_norm": 0.802169144153595, + "learning_rate": 4.734809861840571e-06, + "loss": 0.5119, + "step": 12159 + }, + { + "epoch": 0.8939861785031613, + "grad_norm": 0.8147861957550049, + "learning_rate": 4.73476667265933e-06, + "loss": 0.5536, + "step": 12160 + }, + { + "epoch": 0.8940596971033672, + "grad_norm": 0.828306257724762, + "learning_rate": 4.734723480158465e-06, + "loss": 0.4764, + "step": 12161 + }, + { + "epoch": 0.894133215703573, + "grad_norm": 0.8303323984146118, + "learning_rate": 4.7346802843380405e-06, + "loss": 0.519, + "step": 12162 + }, + { + "epoch": 0.8942067343037788, + "grad_norm": 0.9019741415977478, + "learning_rate": 4.73463708519812e-06, + "loss": 0.5532, + "step": 12163 + }, + { + "epoch": 0.8942802529039847, + "grad_norm": 0.8124735951423645, + "learning_rate": 4.73459388273877e-06, + "loss": 0.5217, + "step": 12164 + }, + { + "epoch": 0.8943537715041906, + "grad_norm": 0.848119854927063, + "learning_rate": 4.734550676960052e-06, + "loss": 0.5296, + "step": 12165 + }, + { + "epoch": 0.8944272901043964, + "grad_norm": 0.810401201248169, + "learning_rate": 4.7345074678620324e-06, + "loss": 0.5118, + "step": 12166 + }, + { + "epoch": 0.8945008087046022, + "grad_norm": 0.801872730255127, + "learning_rate": 4.734464255444774e-06, + "loss": 0.5173, + "step": 12167 + }, + { + "epoch": 0.8945743273048081, + "grad_norm": 0.8164965510368347, + "learning_rate": 4.734421039708341e-06, + "loss": 0.5425, + "step": 12168 + }, + { + "epoch": 0.894647845905014, + "grad_norm": 0.8565632700920105, + "learning_rate": 4.7343778206527985e-06, + "loss": 0.508, + "step": 12169 + }, + { + "epoch": 0.8947213645052198, + "grad_norm": 0.8118507266044617, + "learning_rate": 4.73433459827821e-06, + "loss": 0.5064, + "step": 12170 + }, + { + "epoch": 0.8947948831054257, + "grad_norm": 0.8122100234031677, + "learning_rate": 4.734291372584639e-06, + "loss": 0.5353, + "step": 12171 + }, + { + "epoch": 0.8948684017056315, + "grad_norm": 0.8769838809967041, + "learning_rate": 4.734248143572151e-06, + "loss": 0.559, + "step": 12172 + }, + { + "epoch": 0.8949419203058374, + "grad_norm": 0.8192524909973145, + "learning_rate": 4.73420491124081e-06, + "loss": 0.4991, + "step": 12173 + }, + { + "epoch": 0.8950154389060432, + "grad_norm": 0.8584722280502319, + "learning_rate": 4.73416167559068e-06, + "loss": 0.5497, + "step": 12174 + }, + { + "epoch": 0.8950889575062491, + "grad_norm": 0.8454275131225586, + "learning_rate": 4.734118436621824e-06, + "loss": 0.5662, + "step": 12175 + }, + { + "epoch": 0.8951624761064549, + "grad_norm": 0.7844447493553162, + "learning_rate": 4.734075194334309e-06, + "loss": 0.5039, + "step": 12176 + }, + { + "epoch": 0.8952359947066608, + "grad_norm": 0.8152396082878113, + "learning_rate": 4.734031948728196e-06, + "loss": 0.5023, + "step": 12177 + }, + { + "epoch": 0.8953095133068666, + "grad_norm": 0.8715278506278992, + "learning_rate": 4.733988699803552e-06, + "loss": 0.5463, + "step": 12178 + }, + { + "epoch": 0.8953830319070725, + "grad_norm": 0.8253472447395325, + "learning_rate": 4.733945447560439e-06, + "loss": 0.5337, + "step": 12179 + }, + { + "epoch": 0.8954565505072783, + "grad_norm": 0.7900694012641907, + "learning_rate": 4.7339021919989234e-06, + "loss": 0.5152, + "step": 12180 + }, + { + "epoch": 0.8955300691074842, + "grad_norm": 0.8533461093902588, + "learning_rate": 4.733858933119069e-06, + "loss": 0.5761, + "step": 12181 + }, + { + "epoch": 0.89560358770769, + "grad_norm": 0.7712416052818298, + "learning_rate": 4.733815670920938e-06, + "loss": 0.5076, + "step": 12182 + }, + { + "epoch": 0.8956771063078959, + "grad_norm": 0.8040815591812134, + "learning_rate": 4.733772405404597e-06, + "loss": 0.519, + "step": 12183 + }, + { + "epoch": 0.8957506249081018, + "grad_norm": 0.8228302001953125, + "learning_rate": 4.73372913657011e-06, + "loss": 0.5226, + "step": 12184 + }, + { + "epoch": 0.8958241435083076, + "grad_norm": 0.8131595253944397, + "learning_rate": 4.7336858644175394e-06, + "loss": 0.5137, + "step": 12185 + }, + { + "epoch": 0.8958976621085134, + "grad_norm": 0.7936219573020935, + "learning_rate": 4.733642588946952e-06, + "loss": 0.5465, + "step": 12186 + }, + { + "epoch": 0.8959711807087193, + "grad_norm": 0.8220267295837402, + "learning_rate": 4.73359931015841e-06, + "loss": 0.5434, + "step": 12187 + }, + { + "epoch": 0.8960446993089252, + "grad_norm": 0.8446177840232849, + "learning_rate": 4.733556028051978e-06, + "loss": 0.5298, + "step": 12188 + }, + { + "epoch": 0.896118217909131, + "grad_norm": 0.8312393426895142, + "learning_rate": 4.733512742627722e-06, + "loss": 0.5398, + "step": 12189 + }, + { + "epoch": 0.8961917365093368, + "grad_norm": 0.8697283267974854, + "learning_rate": 4.733469453885705e-06, + "loss": 0.5359, + "step": 12190 + }, + { + "epoch": 0.8962652551095427, + "grad_norm": 0.7741820812225342, + "learning_rate": 4.73342616182599e-06, + "loss": 0.5372, + "step": 12191 + }, + { + "epoch": 0.8963387737097486, + "grad_norm": 0.8101000785827637, + "learning_rate": 4.733382866448644e-06, + "loss": 0.5509, + "step": 12192 + }, + { + "epoch": 0.8964122923099545, + "grad_norm": 0.9219827055931091, + "learning_rate": 4.73333956775373e-06, + "loss": 0.5962, + "step": 12193 + }, + { + "epoch": 0.8964858109101602, + "grad_norm": 0.8537561893463135, + "learning_rate": 4.733296265741313e-06, + "loss": 0.5594, + "step": 12194 + }, + { + "epoch": 0.8965593295103661, + "grad_norm": 0.7680385708808899, + "learning_rate": 4.733252960411455e-06, + "loss": 0.4696, + "step": 12195 + }, + { + "epoch": 0.896632848110572, + "grad_norm": 0.7796581983566284, + "learning_rate": 4.733209651764223e-06, + "loss": 0.4615, + "step": 12196 + }, + { + "epoch": 0.8967063667107779, + "grad_norm": 0.806761622428894, + "learning_rate": 4.73316633979968e-06, + "loss": 0.5276, + "step": 12197 + }, + { + "epoch": 0.8967798853109836, + "grad_norm": 0.8274409174919128, + "learning_rate": 4.733123024517891e-06, + "loss": 0.5208, + "step": 12198 + }, + { + "epoch": 0.8968534039111895, + "grad_norm": 0.8509496450424194, + "learning_rate": 4.733079705918921e-06, + "loss": 0.5006, + "step": 12199 + }, + { + "epoch": 0.8969269225113954, + "grad_norm": 0.8339163064956665, + "learning_rate": 4.733036384002832e-06, + "loss": 0.5703, + "step": 12200 + }, + { + "epoch": 0.8970004411116013, + "grad_norm": 0.8304598927497864, + "learning_rate": 4.73299305876969e-06, + "loss": 0.5082, + "step": 12201 + }, + { + "epoch": 0.897073959711807, + "grad_norm": 0.8039113879203796, + "learning_rate": 4.732949730219559e-06, + "loss": 0.5407, + "step": 12202 + }, + { + "epoch": 0.8971474783120129, + "grad_norm": 0.8007892370223999, + "learning_rate": 4.7329063983525035e-06, + "loss": 0.4876, + "step": 12203 + }, + { + "epoch": 0.8972209969122188, + "grad_norm": 0.7942519187927246, + "learning_rate": 4.732863063168588e-06, + "loss": 0.5405, + "step": 12204 + }, + { + "epoch": 0.8972945155124247, + "grad_norm": 0.8030465245246887, + "learning_rate": 4.732819724667877e-06, + "loss": 0.5183, + "step": 12205 + }, + { + "epoch": 0.8973680341126304, + "grad_norm": 0.8506141304969788, + "learning_rate": 4.732776382850434e-06, + "loss": 0.5478, + "step": 12206 + }, + { + "epoch": 0.8974415527128363, + "grad_norm": 0.8353987336158752, + "learning_rate": 4.732733037716324e-06, + "loss": 0.5014, + "step": 12207 + }, + { + "epoch": 0.8975150713130422, + "grad_norm": 0.8363416790962219, + "learning_rate": 4.7326896892656115e-06, + "loss": 0.5198, + "step": 12208 + }, + { + "epoch": 0.8975885899132481, + "grad_norm": 0.7886749505996704, + "learning_rate": 4.73264633749836e-06, + "loss": 0.5095, + "step": 12209 + }, + { + "epoch": 0.8976621085134538, + "grad_norm": 0.7984004616737366, + "learning_rate": 4.7326029824146356e-06, + "loss": 0.5384, + "step": 12210 + }, + { + "epoch": 0.8977356271136597, + "grad_norm": 0.8060006499290466, + "learning_rate": 4.7325596240145015e-06, + "loss": 0.5117, + "step": 12211 + }, + { + "epoch": 0.8978091457138656, + "grad_norm": 0.8584526181221008, + "learning_rate": 4.7325162622980225e-06, + "loss": 0.539, + "step": 12212 + }, + { + "epoch": 0.8978826643140715, + "grad_norm": 0.8317961096763611, + "learning_rate": 4.732472897265262e-06, + "loss": 0.5817, + "step": 12213 + }, + { + "epoch": 0.8979561829142773, + "grad_norm": 0.8244192004203796, + "learning_rate": 4.7324295289162865e-06, + "loss": 0.5285, + "step": 12214 + }, + { + "epoch": 0.8980297015144831, + "grad_norm": 0.8171551823616028, + "learning_rate": 4.732386157251159e-06, + "loss": 0.5331, + "step": 12215 + }, + { + "epoch": 0.898103220114689, + "grad_norm": 0.841054379940033, + "learning_rate": 4.7323427822699434e-06, + "loss": 0.5426, + "step": 12216 + }, + { + "epoch": 0.8981767387148949, + "grad_norm": 0.7813181281089783, + "learning_rate": 4.732299403972705e-06, + "loss": 0.5254, + "step": 12217 + }, + { + "epoch": 0.8982502573151008, + "grad_norm": 0.8436763286590576, + "learning_rate": 4.732256022359508e-06, + "loss": 0.5758, + "step": 12218 + }, + { + "epoch": 0.8983237759153065, + "grad_norm": 0.8551617860794067, + "learning_rate": 4.7322126374304174e-06, + "loss": 0.5389, + "step": 12219 + }, + { + "epoch": 0.8983972945155124, + "grad_norm": 0.8267289400100708, + "learning_rate": 4.732169249185497e-06, + "loss": 0.5457, + "step": 12220 + }, + { + "epoch": 0.8984708131157183, + "grad_norm": 0.7765684723854065, + "learning_rate": 4.7321258576248105e-06, + "loss": 0.5137, + "step": 12221 + }, + { + "epoch": 0.8985443317159242, + "grad_norm": 0.8133392333984375, + "learning_rate": 4.732082462748424e-06, + "loss": 0.5613, + "step": 12222 + }, + { + "epoch": 0.89861785031613, + "grad_norm": 0.8698603510856628, + "learning_rate": 4.7320390645564015e-06, + "loss": 0.5146, + "step": 12223 + }, + { + "epoch": 0.8986913689163358, + "grad_norm": 0.8541960120201111, + "learning_rate": 4.7319956630488075e-06, + "loss": 0.5921, + "step": 12224 + }, + { + "epoch": 0.8987648875165417, + "grad_norm": 0.8652960062026978, + "learning_rate": 4.731952258225705e-06, + "loss": 0.534, + "step": 12225 + }, + { + "epoch": 0.8988384061167476, + "grad_norm": 0.8177931904792786, + "learning_rate": 4.73190885008716e-06, + "loss": 0.5211, + "step": 12226 + }, + { + "epoch": 0.8989119247169534, + "grad_norm": 0.8278188109397888, + "learning_rate": 4.731865438633237e-06, + "loss": 0.5142, + "step": 12227 + }, + { + "epoch": 0.8989854433171592, + "grad_norm": 0.8266423940658569, + "learning_rate": 4.731822023864e-06, + "loss": 0.4934, + "step": 12228 + }, + { + "epoch": 0.8990589619173651, + "grad_norm": 0.8113187551498413, + "learning_rate": 4.731778605779514e-06, + "loss": 0.5157, + "step": 12229 + }, + { + "epoch": 0.899132480517571, + "grad_norm": 0.8274960517883301, + "learning_rate": 4.731735184379842e-06, + "loss": 0.5062, + "step": 12230 + }, + { + "epoch": 0.8992059991177768, + "grad_norm": 0.8814970254898071, + "learning_rate": 4.731691759665051e-06, + "loss": 0.5161, + "step": 12231 + }, + { + "epoch": 0.8992795177179826, + "grad_norm": 0.8171092867851257, + "learning_rate": 4.731648331635203e-06, + "loss": 0.5085, + "step": 12232 + }, + { + "epoch": 0.8993530363181885, + "grad_norm": 0.8227766156196594, + "learning_rate": 4.731604900290364e-06, + "loss": 0.526, + "step": 12233 + }, + { + "epoch": 0.8994265549183944, + "grad_norm": 0.8003937602043152, + "learning_rate": 4.731561465630598e-06, + "loss": 0.5047, + "step": 12234 + }, + { + "epoch": 0.8995000735186002, + "grad_norm": 0.8449622988700867, + "learning_rate": 4.73151802765597e-06, + "loss": 0.5236, + "step": 12235 + }, + { + "epoch": 0.899573592118806, + "grad_norm": 0.7702508568763733, + "learning_rate": 4.731474586366544e-06, + "loss": 0.5129, + "step": 12236 + }, + { + "epoch": 0.8996471107190119, + "grad_norm": 0.8475335240364075, + "learning_rate": 4.731431141762383e-06, + "loss": 0.5176, + "step": 12237 + }, + { + "epoch": 0.8997206293192178, + "grad_norm": 0.8082197308540344, + "learning_rate": 4.731387693843555e-06, + "loss": 0.5762, + "step": 12238 + }, + { + "epoch": 0.8997941479194236, + "grad_norm": 0.8593763113021851, + "learning_rate": 4.731344242610124e-06, + "loss": 0.5112, + "step": 12239 + }, + { + "epoch": 0.8998676665196295, + "grad_norm": 0.8249503970146179, + "learning_rate": 4.731300788062152e-06, + "loss": 0.5404, + "step": 12240 + }, + { + "epoch": 0.8999411851198353, + "grad_norm": 0.8490447402000427, + "learning_rate": 4.731257330199704e-06, + "loss": 0.5567, + "step": 12241 + }, + { + "epoch": 0.9000147037200412, + "grad_norm": 0.7830750942230225, + "learning_rate": 4.731213869022846e-06, + "loss": 0.5282, + "step": 12242 + }, + { + "epoch": 0.900088222320247, + "grad_norm": 0.8354717493057251, + "learning_rate": 4.731170404531642e-06, + "loss": 0.5595, + "step": 12243 + }, + { + "epoch": 0.9001617409204529, + "grad_norm": 0.8559369444847107, + "learning_rate": 4.731126936726157e-06, + "loss": 0.5299, + "step": 12244 + }, + { + "epoch": 0.9002352595206587, + "grad_norm": 0.8019455671310425, + "learning_rate": 4.731083465606454e-06, + "loss": 0.51, + "step": 12245 + }, + { + "epoch": 0.9003087781208646, + "grad_norm": 0.8628367781639099, + "learning_rate": 4.7310399911726e-06, + "loss": 0.5323, + "step": 12246 + }, + { + "epoch": 0.9003822967210704, + "grad_norm": 0.7985897064208984, + "learning_rate": 4.730996513424658e-06, + "loss": 0.5395, + "step": 12247 + }, + { + "epoch": 0.9004558153212763, + "grad_norm": 0.7927533984184265, + "learning_rate": 4.730953032362693e-06, + "loss": 0.5227, + "step": 12248 + }, + { + "epoch": 0.9005293339214822, + "grad_norm": 0.8325881361961365, + "learning_rate": 4.730909547986769e-06, + "loss": 0.5394, + "step": 12249 + }, + { + "epoch": 0.900602852521688, + "grad_norm": 0.7847926020622253, + "learning_rate": 4.73086606029695e-06, + "loss": 0.4494, + "step": 12250 + }, + { + "epoch": 0.9006763711218938, + "grad_norm": 0.7840732336044312, + "learning_rate": 4.730822569293303e-06, + "loss": 0.4785, + "step": 12251 + }, + { + "epoch": 0.9007498897220997, + "grad_norm": 0.8048475384712219, + "learning_rate": 4.730779074975891e-06, + "loss": 0.5112, + "step": 12252 + }, + { + "epoch": 0.9008234083223056, + "grad_norm": 0.8196940422058105, + "learning_rate": 4.730735577344778e-06, + "loss": 0.4815, + "step": 12253 + }, + { + "epoch": 0.9008969269225114, + "grad_norm": 0.8213188648223877, + "learning_rate": 4.730692076400031e-06, + "loss": 0.5033, + "step": 12254 + }, + { + "epoch": 0.9009704455227172, + "grad_norm": 0.8253061175346375, + "learning_rate": 4.7306485721417125e-06, + "loss": 0.539, + "step": 12255 + }, + { + "epoch": 0.9010439641229231, + "grad_norm": 0.8180424571037292, + "learning_rate": 4.730605064569887e-06, + "loss": 0.525, + "step": 12256 + }, + { + "epoch": 0.901117482723129, + "grad_norm": 0.7706122994422913, + "learning_rate": 4.73056155368462e-06, + "loss": 0.5044, + "step": 12257 + }, + { + "epoch": 0.9011910013233349, + "grad_norm": 0.7962748408317566, + "learning_rate": 4.7305180394859764e-06, + "loss": 0.5184, + "step": 12258 + }, + { + "epoch": 0.9012645199235406, + "grad_norm": 0.8062325716018677, + "learning_rate": 4.73047452197402e-06, + "loss": 0.5018, + "step": 12259 + }, + { + "epoch": 0.9013380385237465, + "grad_norm": 0.8802713751792908, + "learning_rate": 4.730431001148817e-06, + "loss": 0.5418, + "step": 12260 + }, + { + "epoch": 0.9014115571239524, + "grad_norm": 0.8568894863128662, + "learning_rate": 4.730387477010429e-06, + "loss": 0.5113, + "step": 12261 + }, + { + "epoch": 0.9014850757241583, + "grad_norm": 0.8143397569656372, + "learning_rate": 4.7303439495589235e-06, + "loss": 0.512, + "step": 12262 + }, + { + "epoch": 0.901558594324364, + "grad_norm": 0.7855808734893799, + "learning_rate": 4.730300418794364e-06, + "loss": 0.4937, + "step": 12263 + }, + { + "epoch": 0.9016321129245699, + "grad_norm": 0.8240112662315369, + "learning_rate": 4.730256884716816e-06, + "loss": 0.5394, + "step": 12264 + }, + { + "epoch": 0.9017056315247758, + "grad_norm": 0.7663348913192749, + "learning_rate": 4.730213347326343e-06, + "loss": 0.4965, + "step": 12265 + }, + { + "epoch": 0.9017791501249817, + "grad_norm": 0.8090226054191589, + "learning_rate": 4.73016980662301e-06, + "loss": 0.5785, + "step": 12266 + }, + { + "epoch": 0.9018526687251874, + "grad_norm": 0.7931985855102539, + "learning_rate": 4.730126262606883e-06, + "loss": 0.5043, + "step": 12267 + }, + { + "epoch": 0.9019261873253933, + "grad_norm": 0.8277098536491394, + "learning_rate": 4.730082715278024e-06, + "loss": 0.4899, + "step": 12268 + }, + { + "epoch": 0.9019997059255992, + "grad_norm": 0.7980617880821228, + "learning_rate": 4.730039164636499e-06, + "loss": 0.4972, + "step": 12269 + }, + { + "epoch": 0.9020732245258051, + "grad_norm": 0.8179551362991333, + "learning_rate": 4.729995610682374e-06, + "loss": 0.5651, + "step": 12270 + }, + { + "epoch": 0.9021467431260108, + "grad_norm": 0.8315860033035278, + "learning_rate": 4.729952053415713e-06, + "loss": 0.5294, + "step": 12271 + }, + { + "epoch": 0.9022202617262167, + "grad_norm": 0.8273695707321167, + "learning_rate": 4.72990849283658e-06, + "loss": 0.5214, + "step": 12272 + }, + { + "epoch": 0.9022937803264226, + "grad_norm": 0.7499702572822571, + "learning_rate": 4.729864928945039e-06, + "loss": 0.4913, + "step": 12273 + }, + { + "epoch": 0.9023672989266285, + "grad_norm": 0.8796113133430481, + "learning_rate": 4.729821361741157e-06, + "loss": 0.5636, + "step": 12274 + }, + { + "epoch": 0.9024408175268342, + "grad_norm": 0.798827588558197, + "learning_rate": 4.729777791224998e-06, + "loss": 0.5338, + "step": 12275 + }, + { + "epoch": 0.9025143361270401, + "grad_norm": 0.8380861282348633, + "learning_rate": 4.729734217396625e-06, + "loss": 0.4965, + "step": 12276 + }, + { + "epoch": 0.902587854727246, + "grad_norm": 0.8362091183662415, + "learning_rate": 4.7296906402561035e-06, + "loss": 0.5071, + "step": 12277 + }, + { + "epoch": 0.9026613733274519, + "grad_norm": 0.8095185160636902, + "learning_rate": 4.729647059803499e-06, + "loss": 0.5201, + "step": 12278 + }, + { + "epoch": 0.9027348919276577, + "grad_norm": 0.760608434677124, + "learning_rate": 4.729603476038876e-06, + "loss": 0.4779, + "step": 12279 + }, + { + "epoch": 0.9028084105278635, + "grad_norm": 0.8354253768920898, + "learning_rate": 4.7295598889623e-06, + "loss": 0.5549, + "step": 12280 + }, + { + "epoch": 0.9028819291280694, + "grad_norm": 0.832141101360321, + "learning_rate": 4.729516298573834e-06, + "loss": 0.5348, + "step": 12281 + }, + { + "epoch": 0.9029554477282753, + "grad_norm": 0.8193498253822327, + "learning_rate": 4.729472704873544e-06, + "loss": 0.5065, + "step": 12282 + }, + { + "epoch": 0.9030289663284811, + "grad_norm": 0.8359411954879761, + "learning_rate": 4.729429107861494e-06, + "loss": 0.526, + "step": 12283 + }, + { + "epoch": 0.9031024849286869, + "grad_norm": 0.8513715863227844, + "learning_rate": 4.729385507537749e-06, + "loss": 0.5047, + "step": 12284 + }, + { + "epoch": 0.9031760035288928, + "grad_norm": 0.8356185555458069, + "learning_rate": 4.7293419039023745e-06, + "loss": 0.5151, + "step": 12285 + }, + { + "epoch": 0.9032495221290987, + "grad_norm": 0.8784118890762329, + "learning_rate": 4.729298296955435e-06, + "loss": 0.4941, + "step": 12286 + }, + { + "epoch": 0.9033230407293045, + "grad_norm": 0.8830668926239014, + "learning_rate": 4.729254686696994e-06, + "loss": 0.5589, + "step": 12287 + }, + { + "epoch": 0.9033965593295104, + "grad_norm": 0.7852911949157715, + "learning_rate": 4.729211073127118e-06, + "loss": 0.5215, + "step": 12288 + }, + { + "epoch": 0.9034700779297162, + "grad_norm": 0.8203508257865906, + "learning_rate": 4.72916745624587e-06, + "loss": 0.5103, + "step": 12289 + }, + { + "epoch": 0.9035435965299221, + "grad_norm": 0.8186088800430298, + "learning_rate": 4.729123836053316e-06, + "loss": 0.5255, + "step": 12290 + }, + { + "epoch": 0.9036171151301279, + "grad_norm": 0.8526996970176697, + "learning_rate": 4.729080212549522e-06, + "loss": 0.5305, + "step": 12291 + }, + { + "epoch": 0.9036906337303338, + "grad_norm": 0.8379542231559753, + "learning_rate": 4.7290365857345495e-06, + "loss": 0.5591, + "step": 12292 + }, + { + "epoch": 0.9037641523305396, + "grad_norm": 0.8140172958374023, + "learning_rate": 4.728992955608467e-06, + "loss": 0.515, + "step": 12293 + }, + { + "epoch": 0.9038376709307455, + "grad_norm": 0.8742057681083679, + "learning_rate": 4.728949322171337e-06, + "loss": 0.5269, + "step": 12294 + }, + { + "epoch": 0.9039111895309513, + "grad_norm": 0.8866970539093018, + "learning_rate": 4.728905685423224e-06, + "loss": 0.5524, + "step": 12295 + }, + { + "epoch": 0.9039847081311572, + "grad_norm": 0.8127012848854065, + "learning_rate": 4.7288620453641945e-06, + "loss": 0.5596, + "step": 12296 + }, + { + "epoch": 0.904058226731363, + "grad_norm": 0.8456578254699707, + "learning_rate": 4.728818401994312e-06, + "loss": 0.5386, + "step": 12297 + }, + { + "epoch": 0.9041317453315689, + "grad_norm": 0.8245177865028381, + "learning_rate": 4.728774755313642e-06, + "loss": 0.5368, + "step": 12298 + }, + { + "epoch": 0.9042052639317747, + "grad_norm": 0.8474507331848145, + "learning_rate": 4.728731105322249e-06, + "loss": 0.5555, + "step": 12299 + }, + { + "epoch": 0.9042787825319806, + "grad_norm": 0.7950893640518188, + "learning_rate": 4.728687452020198e-06, + "loss": 0.5109, + "step": 12300 + }, + { + "epoch": 0.9043523011321865, + "grad_norm": 0.8098939657211304, + "learning_rate": 4.728643795407554e-06, + "loss": 0.4844, + "step": 12301 + }, + { + "epoch": 0.9044258197323923, + "grad_norm": 0.8381838798522949, + "learning_rate": 4.728600135484381e-06, + "loss": 0.573, + "step": 12302 + }, + { + "epoch": 0.9044993383325981, + "grad_norm": 0.8109122514724731, + "learning_rate": 4.728556472250746e-06, + "loss": 0.5383, + "step": 12303 + }, + { + "epoch": 0.904572856932804, + "grad_norm": 0.8342539072036743, + "learning_rate": 4.728512805706711e-06, + "loss": 0.5408, + "step": 12304 + }, + { + "epoch": 0.9046463755330099, + "grad_norm": 0.7903233170509338, + "learning_rate": 4.728469135852343e-06, + "loss": 0.4826, + "step": 12305 + }, + { + "epoch": 0.9047198941332157, + "grad_norm": 0.803068995475769, + "learning_rate": 4.728425462687706e-06, + "loss": 0.5468, + "step": 12306 + }, + { + "epoch": 0.9047934127334215, + "grad_norm": 0.8499330282211304, + "learning_rate": 4.728381786212865e-06, + "loss": 0.5019, + "step": 12307 + }, + { + "epoch": 0.9048669313336274, + "grad_norm": 0.792760968208313, + "learning_rate": 4.728338106427885e-06, + "loss": 0.507, + "step": 12308 + }, + { + "epoch": 0.9049404499338333, + "grad_norm": 0.804858386516571, + "learning_rate": 4.72829442333283e-06, + "loss": 0.4877, + "step": 12309 + }, + { + "epoch": 0.9050139685340391, + "grad_norm": 0.8480097055435181, + "learning_rate": 4.728250736927766e-06, + "loss": 0.5398, + "step": 12310 + }, + { + "epoch": 0.9050874871342449, + "grad_norm": 0.8195894360542297, + "learning_rate": 4.728207047212758e-06, + "loss": 0.5012, + "step": 12311 + }, + { + "epoch": 0.9051610057344508, + "grad_norm": 0.8253975510597229, + "learning_rate": 4.72816335418787e-06, + "loss": 0.4781, + "step": 12312 + }, + { + "epoch": 0.9052345243346567, + "grad_norm": 0.8270231485366821, + "learning_rate": 4.728119657853168e-06, + "loss": 0.5149, + "step": 12313 + }, + { + "epoch": 0.9053080429348626, + "grad_norm": 0.811749279499054, + "learning_rate": 4.728075958208716e-06, + "loss": 0.5256, + "step": 12314 + }, + { + "epoch": 0.9053815615350683, + "grad_norm": 0.8006261587142944, + "learning_rate": 4.728032255254578e-06, + "loss": 0.5522, + "step": 12315 + }, + { + "epoch": 0.9054550801352742, + "grad_norm": 0.8057201504707336, + "learning_rate": 4.727988548990822e-06, + "loss": 0.5176, + "step": 12316 + }, + { + "epoch": 0.9055285987354801, + "grad_norm": 0.8061075806617737, + "learning_rate": 4.72794483941751e-06, + "loss": 0.4999, + "step": 12317 + }, + { + "epoch": 0.905602117335686, + "grad_norm": 0.7850827574729919, + "learning_rate": 4.7279011265347075e-06, + "loss": 0.4898, + "step": 12318 + }, + { + "epoch": 0.9056756359358917, + "grad_norm": 0.8645355105400085, + "learning_rate": 4.72785741034248e-06, + "loss": 0.5429, + "step": 12319 + }, + { + "epoch": 0.9057491545360976, + "grad_norm": 0.8768169283866882, + "learning_rate": 4.727813690840893e-06, + "loss": 0.5301, + "step": 12320 + }, + { + "epoch": 0.9058226731363035, + "grad_norm": 0.8506595492362976, + "learning_rate": 4.7277699680300106e-06, + "loss": 0.5418, + "step": 12321 + }, + { + "epoch": 0.9058961917365094, + "grad_norm": 0.7952856421470642, + "learning_rate": 4.727726241909898e-06, + "loss": 0.4757, + "step": 12322 + }, + { + "epoch": 0.9059697103367151, + "grad_norm": 0.8512459993362427, + "learning_rate": 4.72768251248062e-06, + "loss": 0.5397, + "step": 12323 + }, + { + "epoch": 0.906043228936921, + "grad_norm": 0.8232445120811462, + "learning_rate": 4.727638779742241e-06, + "loss": 0.5095, + "step": 12324 + }, + { + "epoch": 0.9061167475371269, + "grad_norm": 0.8477522134780884, + "learning_rate": 4.727595043694827e-06, + "loss": 0.5227, + "step": 12325 + }, + { + "epoch": 0.9061902661373328, + "grad_norm": 0.8445379734039307, + "learning_rate": 4.727551304338443e-06, + "loss": 0.5429, + "step": 12326 + }, + { + "epoch": 0.9062637847375385, + "grad_norm": 0.7604362368583679, + "learning_rate": 4.727507561673152e-06, + "loss": 0.4781, + "step": 12327 + }, + { + "epoch": 0.9063373033377444, + "grad_norm": 0.8152221441268921, + "learning_rate": 4.727463815699023e-06, + "loss": 0.5493, + "step": 12328 + }, + { + "epoch": 0.9064108219379503, + "grad_norm": 0.8585119843482971, + "learning_rate": 4.727420066416116e-06, + "loss": 0.5353, + "step": 12329 + }, + { + "epoch": 0.9064843405381562, + "grad_norm": 0.8493886590003967, + "learning_rate": 4.7273763138245e-06, + "loss": 0.5317, + "step": 12330 + }, + { + "epoch": 0.906557859138362, + "grad_norm": 0.840339720249176, + "learning_rate": 4.727332557924238e-06, + "loss": 0.4964, + "step": 12331 + }, + { + "epoch": 0.9066313777385678, + "grad_norm": 0.798254668712616, + "learning_rate": 4.7272887987153946e-06, + "loss": 0.5122, + "step": 12332 + }, + { + "epoch": 0.9067048963387737, + "grad_norm": 0.8270586133003235, + "learning_rate": 4.727245036198037e-06, + "loss": 0.5548, + "step": 12333 + }, + { + "epoch": 0.9067784149389796, + "grad_norm": 0.845694899559021, + "learning_rate": 4.727201270372228e-06, + "loss": 0.504, + "step": 12334 + }, + { + "epoch": 0.9068519335391854, + "grad_norm": 0.8416576385498047, + "learning_rate": 4.727157501238034e-06, + "loss": 0.5243, + "step": 12335 + }, + { + "epoch": 0.9069254521393912, + "grad_norm": 0.8584108948707581, + "learning_rate": 4.72711372879552e-06, + "loss": 0.5097, + "step": 12336 + }, + { + "epoch": 0.9069989707395971, + "grad_norm": 0.8367537260055542, + "learning_rate": 4.727069953044749e-06, + "loss": 0.5258, + "step": 12337 + }, + { + "epoch": 0.907072489339803, + "grad_norm": 0.8068572282791138, + "learning_rate": 4.727026173985788e-06, + "loss": 0.4567, + "step": 12338 + }, + { + "epoch": 0.9071460079400088, + "grad_norm": 0.8320988416671753, + "learning_rate": 4.726982391618702e-06, + "loss": 0.5626, + "step": 12339 + }, + { + "epoch": 0.9072195265402146, + "grad_norm": 0.8060988187789917, + "learning_rate": 4.7269386059435555e-06, + "loss": 0.549, + "step": 12340 + }, + { + "epoch": 0.9072930451404205, + "grad_norm": 0.7957454919815063, + "learning_rate": 4.726894816960414e-06, + "loss": 0.4989, + "step": 12341 + }, + { + "epoch": 0.9073665637406264, + "grad_norm": 0.8381626605987549, + "learning_rate": 4.726851024669341e-06, + "loss": 0.5666, + "step": 12342 + }, + { + "epoch": 0.9074400823408322, + "grad_norm": 0.8382695317268372, + "learning_rate": 4.726807229070405e-06, + "loss": 0.5237, + "step": 12343 + }, + { + "epoch": 0.9075136009410381, + "grad_norm": 0.7931427955627441, + "learning_rate": 4.726763430163667e-06, + "loss": 0.5424, + "step": 12344 + }, + { + "epoch": 0.9075871195412439, + "grad_norm": 0.8501537442207336, + "learning_rate": 4.726719627949194e-06, + "loss": 0.5449, + "step": 12345 + }, + { + "epoch": 0.9076606381414498, + "grad_norm": 0.8380720615386963, + "learning_rate": 4.726675822427051e-06, + "loss": 0.5108, + "step": 12346 + }, + { + "epoch": 0.9077341567416556, + "grad_norm": 0.8436480164527893, + "learning_rate": 4.726632013597303e-06, + "loss": 0.5635, + "step": 12347 + }, + { + "epoch": 0.9078076753418615, + "grad_norm": 0.8082118630409241, + "learning_rate": 4.726588201460015e-06, + "loss": 0.5012, + "step": 12348 + }, + { + "epoch": 0.9078811939420673, + "grad_norm": 0.854632556438446, + "learning_rate": 4.726544386015253e-06, + "loss": 0.5263, + "step": 12349 + }, + { + "epoch": 0.9079547125422732, + "grad_norm": 0.7729374766349792, + "learning_rate": 4.72650056726308e-06, + "loss": 0.4987, + "step": 12350 + }, + { + "epoch": 0.908028231142479, + "grad_norm": 0.789901077747345, + "learning_rate": 4.726456745203563e-06, + "loss": 0.5366, + "step": 12351 + }, + { + "epoch": 0.9081017497426849, + "grad_norm": 0.8006756901741028, + "learning_rate": 4.726412919836766e-06, + "loss": 0.5301, + "step": 12352 + }, + { + "epoch": 0.9081752683428908, + "grad_norm": 0.7832468748092651, + "learning_rate": 4.726369091162755e-06, + "loss": 0.4833, + "step": 12353 + }, + { + "epoch": 0.9082487869430966, + "grad_norm": 0.885749340057373, + "learning_rate": 4.726325259181595e-06, + "loss": 0.5738, + "step": 12354 + }, + { + "epoch": 0.9083223055433025, + "grad_norm": 0.8185760974884033, + "learning_rate": 4.7262814238933496e-06, + "loss": 0.5099, + "step": 12355 + }, + { + "epoch": 0.9083958241435083, + "grad_norm": 0.8172842264175415, + "learning_rate": 4.726237585298085e-06, + "loss": 0.5507, + "step": 12356 + }, + { + "epoch": 0.9084693427437142, + "grad_norm": 0.8118313550949097, + "learning_rate": 4.726193743395868e-06, + "loss": 0.4891, + "step": 12357 + }, + { + "epoch": 0.90854286134392, + "grad_norm": 0.8048453330993652, + "learning_rate": 4.726149898186761e-06, + "loss": 0.5325, + "step": 12358 + }, + { + "epoch": 0.9086163799441259, + "grad_norm": 0.8852908611297607, + "learning_rate": 4.72610604967083e-06, + "loss": 0.5817, + "step": 12359 + }, + { + "epoch": 0.9086898985443317, + "grad_norm": 0.831838071346283, + "learning_rate": 4.726062197848141e-06, + "loss": 0.5039, + "step": 12360 + }, + { + "epoch": 0.9087634171445376, + "grad_norm": 0.8262007832527161, + "learning_rate": 4.726018342718759e-06, + "loss": 0.5361, + "step": 12361 + }, + { + "epoch": 0.9088369357447434, + "grad_norm": 0.8391527533531189, + "learning_rate": 4.725974484282747e-06, + "loss": 0.5293, + "step": 12362 + }, + { + "epoch": 0.9089104543449493, + "grad_norm": 0.8258872032165527, + "learning_rate": 4.725930622540173e-06, + "loss": 0.5477, + "step": 12363 + }, + { + "epoch": 0.9089839729451551, + "grad_norm": 0.8288187980651855, + "learning_rate": 4.7258867574911005e-06, + "loss": 0.5505, + "step": 12364 + }, + { + "epoch": 0.909057491545361, + "grad_norm": 0.8228837251663208, + "learning_rate": 4.725842889135595e-06, + "loss": 0.5355, + "step": 12365 + }, + { + "epoch": 0.9091310101455669, + "grad_norm": 0.8970023989677429, + "learning_rate": 4.7257990174737225e-06, + "loss": 0.5829, + "step": 12366 + }, + { + "epoch": 0.9092045287457727, + "grad_norm": 0.8333466053009033, + "learning_rate": 4.725755142505547e-06, + "loss": 0.5377, + "step": 12367 + }, + { + "epoch": 0.9092780473459785, + "grad_norm": 0.8124305009841919, + "learning_rate": 4.725711264231135e-06, + "loss": 0.5456, + "step": 12368 + }, + { + "epoch": 0.9093515659461844, + "grad_norm": 0.7864965796470642, + "learning_rate": 4.725667382650549e-06, + "loss": 0.494, + "step": 12369 + }, + { + "epoch": 0.9094250845463903, + "grad_norm": 0.8560629487037659, + "learning_rate": 4.725623497763856e-06, + "loss": 0.5345, + "step": 12370 + }, + { + "epoch": 0.9094986031465961, + "grad_norm": 0.8484708666801453, + "learning_rate": 4.725579609571122e-06, + "loss": 0.509, + "step": 12371 + }, + { + "epoch": 0.9095721217468019, + "grad_norm": 0.8220342993736267, + "learning_rate": 4.725535718072412e-06, + "loss": 0.502, + "step": 12372 + }, + { + "epoch": 0.9096456403470078, + "grad_norm": 0.818983793258667, + "learning_rate": 4.72549182326779e-06, + "loss": 0.5212, + "step": 12373 + }, + { + "epoch": 0.9097191589472137, + "grad_norm": 0.8290420174598694, + "learning_rate": 4.725447925157322e-06, + "loss": 0.5569, + "step": 12374 + }, + { + "epoch": 0.9097926775474195, + "grad_norm": 0.8310536742210388, + "learning_rate": 4.725404023741073e-06, + "loss": 0.4849, + "step": 12375 + }, + { + "epoch": 0.9098661961476253, + "grad_norm": 0.7656113505363464, + "learning_rate": 4.725360119019107e-06, + "loss": 0.4682, + "step": 12376 + }, + { + "epoch": 0.9099397147478312, + "grad_norm": 0.805352509021759, + "learning_rate": 4.7253162109914915e-06, + "loss": 0.5265, + "step": 12377 + }, + { + "epoch": 0.9100132333480371, + "grad_norm": 0.8116934895515442, + "learning_rate": 4.72527229965829e-06, + "loss": 0.5345, + "step": 12378 + }, + { + "epoch": 0.910086751948243, + "grad_norm": 0.865117609500885, + "learning_rate": 4.725228385019569e-06, + "loss": 0.544, + "step": 12379 + }, + { + "epoch": 0.9101602705484487, + "grad_norm": 0.8335793018341064, + "learning_rate": 4.7251844670753925e-06, + "loss": 0.522, + "step": 12380 + }, + { + "epoch": 0.9102337891486546, + "grad_norm": 0.7997403144836426, + "learning_rate": 4.725140545825827e-06, + "loss": 0.5157, + "step": 12381 + }, + { + "epoch": 0.9103073077488605, + "grad_norm": 0.7785705924034119, + "learning_rate": 4.725096621270936e-06, + "loss": 0.4917, + "step": 12382 + }, + { + "epoch": 0.9103808263490664, + "grad_norm": 0.871350884437561, + "learning_rate": 4.7250526934107865e-06, + "loss": 0.5468, + "step": 12383 + }, + { + "epoch": 0.9104543449492721, + "grad_norm": 0.7988447546958923, + "learning_rate": 4.725008762245443e-06, + "loss": 0.5241, + "step": 12384 + }, + { + "epoch": 0.910527863549478, + "grad_norm": 0.813126266002655, + "learning_rate": 4.724964827774972e-06, + "loss": 0.5072, + "step": 12385 + }, + { + "epoch": 0.9106013821496839, + "grad_norm": 0.9244722723960876, + "learning_rate": 4.7249208899994355e-06, + "loss": 0.5334, + "step": 12386 + }, + { + "epoch": 0.9106749007498898, + "grad_norm": 0.8003733158111572, + "learning_rate": 4.724876948918902e-06, + "loss": 0.5166, + "step": 12387 + }, + { + "epoch": 0.9107484193500955, + "grad_norm": 0.8424487709999084, + "learning_rate": 4.724833004533436e-06, + "loss": 0.5358, + "step": 12388 + }, + { + "epoch": 0.9108219379503014, + "grad_norm": 0.7864069938659668, + "learning_rate": 4.724789056843101e-06, + "loss": 0.4921, + "step": 12389 + }, + { + "epoch": 0.9108954565505073, + "grad_norm": 0.7806556820869446, + "learning_rate": 4.724745105847965e-06, + "loss": 0.5393, + "step": 12390 + }, + { + "epoch": 0.9109689751507132, + "grad_norm": 0.82209312915802, + "learning_rate": 4.724701151548091e-06, + "loss": 0.5469, + "step": 12391 + }, + { + "epoch": 0.911042493750919, + "grad_norm": 0.7831273674964905, + "learning_rate": 4.724657193943546e-06, + "loss": 0.5072, + "step": 12392 + }, + { + "epoch": 0.9111160123511248, + "grad_norm": 0.8684839010238647, + "learning_rate": 4.724613233034394e-06, + "loss": 0.5588, + "step": 12393 + }, + { + "epoch": 0.9111895309513307, + "grad_norm": 0.820296049118042, + "learning_rate": 4.724569268820702e-06, + "loss": 0.5049, + "step": 12394 + }, + { + "epoch": 0.9112630495515366, + "grad_norm": 0.8134139776229858, + "learning_rate": 4.724525301302533e-06, + "loss": 0.5067, + "step": 12395 + }, + { + "epoch": 0.9113365681517424, + "grad_norm": 0.8545206189155579, + "learning_rate": 4.7244813304799535e-06, + "loss": 0.4961, + "step": 12396 + }, + { + "epoch": 0.9114100867519482, + "grad_norm": 0.7821997404098511, + "learning_rate": 4.724437356353029e-06, + "loss": 0.5007, + "step": 12397 + }, + { + "epoch": 0.9114836053521541, + "grad_norm": 0.7877660989761353, + "learning_rate": 4.724393378921825e-06, + "loss": 0.479, + "step": 12398 + }, + { + "epoch": 0.91155712395236, + "grad_norm": 0.824164092540741, + "learning_rate": 4.724349398186406e-06, + "loss": 0.5782, + "step": 12399 + }, + { + "epoch": 0.9116306425525658, + "grad_norm": 0.8221728205680847, + "learning_rate": 4.7243054141468384e-06, + "loss": 0.4802, + "step": 12400 + }, + { + "epoch": 0.9117041611527716, + "grad_norm": 0.8155287504196167, + "learning_rate": 4.724261426803186e-06, + "loss": 0.5342, + "step": 12401 + }, + { + "epoch": 0.9117776797529775, + "grad_norm": 0.8439645767211914, + "learning_rate": 4.724217436155516e-06, + "loss": 0.5649, + "step": 12402 + }, + { + "epoch": 0.9118511983531834, + "grad_norm": 0.8098392486572266, + "learning_rate": 4.724173442203892e-06, + "loss": 0.5333, + "step": 12403 + }, + { + "epoch": 0.9119247169533892, + "grad_norm": 0.779233455657959, + "learning_rate": 4.7241294449483795e-06, + "loss": 0.4828, + "step": 12404 + }, + { + "epoch": 0.911998235553595, + "grad_norm": 0.8708856701850891, + "learning_rate": 4.724085444389046e-06, + "loss": 0.5283, + "step": 12405 + }, + { + "epoch": 0.9120717541538009, + "grad_norm": 0.871131956577301, + "learning_rate": 4.724041440525955e-06, + "loss": 0.5399, + "step": 12406 + }, + { + "epoch": 0.9121452727540068, + "grad_norm": 0.8176207542419434, + "learning_rate": 4.723997433359172e-06, + "loss": 0.5445, + "step": 12407 + }, + { + "epoch": 0.9122187913542126, + "grad_norm": 0.8295831084251404, + "learning_rate": 4.723953422888763e-06, + "loss": 0.5399, + "step": 12408 + }, + { + "epoch": 0.9122923099544185, + "grad_norm": 0.8257999420166016, + "learning_rate": 4.723909409114792e-06, + "loss": 0.5699, + "step": 12409 + }, + { + "epoch": 0.9123658285546243, + "grad_norm": 0.8375827670097351, + "learning_rate": 4.723865392037326e-06, + "loss": 0.5365, + "step": 12410 + }, + { + "epoch": 0.9124393471548302, + "grad_norm": 0.8188108801841736, + "learning_rate": 4.7238213716564295e-06, + "loss": 0.5409, + "step": 12411 + }, + { + "epoch": 0.912512865755036, + "grad_norm": 0.8225244879722595, + "learning_rate": 4.723777347972168e-06, + "loss": 0.5053, + "step": 12412 + }, + { + "epoch": 0.9125863843552419, + "grad_norm": 0.8240169286727905, + "learning_rate": 4.723733320984608e-06, + "loss": 0.5148, + "step": 12413 + }, + { + "epoch": 0.9126599029554477, + "grad_norm": 0.8023998737335205, + "learning_rate": 4.723689290693813e-06, + "loss": 0.5119, + "step": 12414 + }, + { + "epoch": 0.9127334215556536, + "grad_norm": 0.8158513307571411, + "learning_rate": 4.723645257099849e-06, + "loss": 0.5318, + "step": 12415 + }, + { + "epoch": 0.9128069401558594, + "grad_norm": 0.8682414889335632, + "learning_rate": 4.723601220202783e-06, + "loss": 0.5094, + "step": 12416 + }, + { + "epoch": 0.9128804587560653, + "grad_norm": 0.8123832941055298, + "learning_rate": 4.723557180002678e-06, + "loss": 0.5795, + "step": 12417 + }, + { + "epoch": 0.9129539773562712, + "grad_norm": 0.8273134827613831, + "learning_rate": 4.723513136499601e-06, + "loss": 0.5608, + "step": 12418 + }, + { + "epoch": 0.913027495956477, + "grad_norm": 0.8509067893028259, + "learning_rate": 4.723469089693617e-06, + "loss": 0.5653, + "step": 12419 + }, + { + "epoch": 0.9131010145566828, + "grad_norm": 0.8501867055892944, + "learning_rate": 4.723425039584791e-06, + "loss": 0.5454, + "step": 12420 + }, + { + "epoch": 0.9131745331568887, + "grad_norm": 0.8102939128875732, + "learning_rate": 4.72338098617319e-06, + "loss": 0.5659, + "step": 12421 + }, + { + "epoch": 0.9132480517570946, + "grad_norm": 0.8625596165657043, + "learning_rate": 4.723336929458877e-06, + "loss": 0.5697, + "step": 12422 + }, + { + "epoch": 0.9133215703573004, + "grad_norm": 0.802345871925354, + "learning_rate": 4.72329286944192e-06, + "loss": 0.5327, + "step": 12423 + }, + { + "epoch": 0.9133950889575062, + "grad_norm": 0.8272935152053833, + "learning_rate": 4.723248806122383e-06, + "loss": 0.5601, + "step": 12424 + }, + { + "epoch": 0.9134686075577121, + "grad_norm": 0.8055128455162048, + "learning_rate": 4.723204739500331e-06, + "loss": 0.529, + "step": 12425 + }, + { + "epoch": 0.913542126157918, + "grad_norm": 0.8916405439376831, + "learning_rate": 4.723160669575829e-06, + "loss": 0.6015, + "step": 12426 + }, + { + "epoch": 0.9136156447581238, + "grad_norm": 0.8187142014503479, + "learning_rate": 4.723116596348946e-06, + "loss": 0.509, + "step": 12427 + }, + { + "epoch": 0.9136891633583296, + "grad_norm": 0.8451641201972961, + "learning_rate": 4.723072519819743e-06, + "loss": 0.5393, + "step": 12428 + }, + { + "epoch": 0.9137626819585355, + "grad_norm": 0.804922878742218, + "learning_rate": 4.723028439988288e-06, + "loss": 0.492, + "step": 12429 + }, + { + "epoch": 0.9138362005587414, + "grad_norm": 0.8010591268539429, + "learning_rate": 4.722984356854647e-06, + "loss": 0.5622, + "step": 12430 + }, + { + "epoch": 0.9139097191589473, + "grad_norm": 0.8713906407356262, + "learning_rate": 4.722940270418883e-06, + "loss": 0.5275, + "step": 12431 + }, + { + "epoch": 0.913983237759153, + "grad_norm": 0.8358128070831299, + "learning_rate": 4.722896180681064e-06, + "loss": 0.5448, + "step": 12432 + }, + { + "epoch": 0.9140567563593589, + "grad_norm": 0.8632388114929199, + "learning_rate": 4.722852087641254e-06, + "loss": 0.5156, + "step": 12433 + }, + { + "epoch": 0.9141302749595648, + "grad_norm": 0.793736457824707, + "learning_rate": 4.722807991299519e-06, + "loss": 0.4898, + "step": 12434 + }, + { + "epoch": 0.9142037935597707, + "grad_norm": 0.8122971057891846, + "learning_rate": 4.722763891655925e-06, + "loss": 0.482, + "step": 12435 + }, + { + "epoch": 0.9142773121599764, + "grad_norm": 0.8386822938919067, + "learning_rate": 4.722719788710537e-06, + "loss": 0.5331, + "step": 12436 + }, + { + "epoch": 0.9143508307601823, + "grad_norm": 0.8350658416748047, + "learning_rate": 4.7226756824634196e-06, + "loss": 0.5036, + "step": 12437 + }, + { + "epoch": 0.9144243493603882, + "grad_norm": 0.8068216443061829, + "learning_rate": 4.72263157291464e-06, + "loss": 0.5355, + "step": 12438 + }, + { + "epoch": 0.9144978679605941, + "grad_norm": 0.8543376922607422, + "learning_rate": 4.722587460064262e-06, + "loss": 0.5626, + "step": 12439 + }, + { + "epoch": 0.9145713865607998, + "grad_norm": 0.8737711906433105, + "learning_rate": 4.722543343912352e-06, + "loss": 0.5627, + "step": 12440 + }, + { + "epoch": 0.9146449051610057, + "grad_norm": 0.7837775349617004, + "learning_rate": 4.7224992244589765e-06, + "loss": 0.5055, + "step": 12441 + }, + { + "epoch": 0.9147184237612116, + "grad_norm": 0.8971225023269653, + "learning_rate": 4.7224551017042e-06, + "loss": 0.5496, + "step": 12442 + }, + { + "epoch": 0.9147919423614175, + "grad_norm": 0.8364417552947998, + "learning_rate": 4.722410975648087e-06, + "loss": 0.5038, + "step": 12443 + }, + { + "epoch": 0.9148654609616232, + "grad_norm": 0.8091779947280884, + "learning_rate": 4.722366846290706e-06, + "loss": 0.5127, + "step": 12444 + }, + { + "epoch": 0.9149389795618291, + "grad_norm": 0.7851834893226624, + "learning_rate": 4.722322713632119e-06, + "loss": 0.5057, + "step": 12445 + }, + { + "epoch": 0.915012498162035, + "grad_norm": 0.8177030086517334, + "learning_rate": 4.7222785776723944e-06, + "loss": 0.4892, + "step": 12446 + }, + { + "epoch": 0.9150860167622409, + "grad_norm": 0.8208519220352173, + "learning_rate": 4.722234438411596e-06, + "loss": 0.5104, + "step": 12447 + }, + { + "epoch": 0.9151595353624467, + "grad_norm": 0.8398533463478088, + "learning_rate": 4.72219029584979e-06, + "loss": 0.5355, + "step": 12448 + }, + { + "epoch": 0.9152330539626525, + "grad_norm": 0.8404563665390015, + "learning_rate": 4.722146149987043e-06, + "loss": 0.5306, + "step": 12449 + }, + { + "epoch": 0.9153065725628584, + "grad_norm": 0.7999579906463623, + "learning_rate": 4.722102000823418e-06, + "loss": 0.5049, + "step": 12450 + }, + { + "epoch": 0.9153800911630643, + "grad_norm": 0.8147726058959961, + "learning_rate": 4.722057848358984e-06, + "loss": 0.5885, + "step": 12451 + }, + { + "epoch": 0.9154536097632701, + "grad_norm": 0.8250687122344971, + "learning_rate": 4.722013692593804e-06, + "loss": 0.5637, + "step": 12452 + }, + { + "epoch": 0.9155271283634759, + "grad_norm": 0.8297216892242432, + "learning_rate": 4.721969533527944e-06, + "loss": 0.5411, + "step": 12453 + }, + { + "epoch": 0.9156006469636818, + "grad_norm": 0.8283566236495972, + "learning_rate": 4.72192537116147e-06, + "loss": 0.5258, + "step": 12454 + }, + { + "epoch": 0.9156741655638877, + "grad_norm": 0.8133636116981506, + "learning_rate": 4.721881205494448e-06, + "loss": 0.5258, + "step": 12455 + }, + { + "epoch": 0.9157476841640935, + "grad_norm": 0.7956687211990356, + "learning_rate": 4.721837036526942e-06, + "loss": 0.5119, + "step": 12456 + }, + { + "epoch": 0.9158212027642993, + "grad_norm": 0.8201804161071777, + "learning_rate": 4.72179286425902e-06, + "loss": 0.5372, + "step": 12457 + }, + { + "epoch": 0.9158947213645052, + "grad_norm": 0.7815749049186707, + "learning_rate": 4.721748688690746e-06, + "loss": 0.4593, + "step": 12458 + }, + { + "epoch": 0.9159682399647111, + "grad_norm": 0.8228449821472168, + "learning_rate": 4.721704509822186e-06, + "loss": 0.5084, + "step": 12459 + }, + { + "epoch": 0.9160417585649169, + "grad_norm": 0.8656172156333923, + "learning_rate": 4.721660327653405e-06, + "loss": 0.5395, + "step": 12460 + }, + { + "epoch": 0.9161152771651228, + "grad_norm": 0.8150357604026794, + "learning_rate": 4.7216161421844695e-06, + "loss": 0.5072, + "step": 12461 + }, + { + "epoch": 0.9161887957653286, + "grad_norm": 0.7729746103286743, + "learning_rate": 4.721571953415445e-06, + "loss": 0.4609, + "step": 12462 + }, + { + "epoch": 0.9162623143655345, + "grad_norm": 0.7838029861450195, + "learning_rate": 4.721527761346397e-06, + "loss": 0.4779, + "step": 12463 + }, + { + "epoch": 0.9163358329657403, + "grad_norm": 0.8193151950836182, + "learning_rate": 4.721483565977391e-06, + "loss": 0.5383, + "step": 12464 + }, + { + "epoch": 0.9164093515659462, + "grad_norm": 0.8070891499519348, + "learning_rate": 4.721439367308493e-06, + "loss": 0.552, + "step": 12465 + }, + { + "epoch": 0.916482870166152, + "grad_norm": 0.8143934011459351, + "learning_rate": 4.721395165339769e-06, + "loss": 0.514, + "step": 12466 + }, + { + "epoch": 0.9165563887663579, + "grad_norm": 0.8594380021095276, + "learning_rate": 4.721350960071284e-06, + "loss": 0.5692, + "step": 12467 + }, + { + "epoch": 0.9166299073665637, + "grad_norm": 0.785399854183197, + "learning_rate": 4.721306751503103e-06, + "loss": 0.4774, + "step": 12468 + }, + { + "epoch": 0.9167034259667696, + "grad_norm": 0.8105850219726562, + "learning_rate": 4.721262539635293e-06, + "loss": 0.5124, + "step": 12469 + }, + { + "epoch": 0.9167769445669754, + "grad_norm": 0.8678642511367798, + "learning_rate": 4.7212183244679186e-06, + "loss": 0.5227, + "step": 12470 + }, + { + "epoch": 0.9168504631671813, + "grad_norm": 0.8215739130973816, + "learning_rate": 4.721174106001047e-06, + "loss": 0.536, + "step": 12471 + }, + { + "epoch": 0.9169239817673871, + "grad_norm": 0.7824904918670654, + "learning_rate": 4.721129884234742e-06, + "loss": 0.5217, + "step": 12472 + }, + { + "epoch": 0.916997500367593, + "grad_norm": 0.8295871019363403, + "learning_rate": 4.7210856591690705e-06, + "loss": 0.4998, + "step": 12473 + }, + { + "epoch": 0.9170710189677989, + "grad_norm": 0.7937814593315125, + "learning_rate": 4.721041430804098e-06, + "loss": 0.5046, + "step": 12474 + }, + { + "epoch": 0.9171445375680047, + "grad_norm": 0.8270875811576843, + "learning_rate": 4.72099719913989e-06, + "loss": 0.4862, + "step": 12475 + }, + { + "epoch": 0.9172180561682105, + "grad_norm": 0.8025266528129578, + "learning_rate": 4.720952964176512e-06, + "loss": 0.5482, + "step": 12476 + }, + { + "epoch": 0.9172915747684164, + "grad_norm": 0.9044243693351746, + "learning_rate": 4.72090872591403e-06, + "loss": 0.5336, + "step": 12477 + }, + { + "epoch": 0.9173650933686223, + "grad_norm": 0.7942496538162231, + "learning_rate": 4.72086448435251e-06, + "loss": 0.476, + "step": 12478 + }, + { + "epoch": 0.9174386119688281, + "grad_norm": 0.8552907705307007, + "learning_rate": 4.720820239492018e-06, + "loss": 0.528, + "step": 12479 + }, + { + "epoch": 0.9175121305690339, + "grad_norm": 0.8124240040779114, + "learning_rate": 4.7207759913326176e-06, + "loss": 0.4815, + "step": 12480 + }, + { + "epoch": 0.9175856491692398, + "grad_norm": 0.8069126605987549, + "learning_rate": 4.7207317398743764e-06, + "loss": 0.5408, + "step": 12481 + }, + { + "epoch": 0.9176591677694457, + "grad_norm": 0.8438368439674377, + "learning_rate": 4.720687485117361e-06, + "loss": 0.5553, + "step": 12482 + }, + { + "epoch": 0.9177326863696516, + "grad_norm": 0.8451912999153137, + "learning_rate": 4.720643227061636e-06, + "loss": 0.4963, + "step": 12483 + }, + { + "epoch": 0.9178062049698573, + "grad_norm": 0.866154670715332, + "learning_rate": 4.720598965707266e-06, + "loss": 0.5752, + "step": 12484 + }, + { + "epoch": 0.9178797235700632, + "grad_norm": 0.8020147681236267, + "learning_rate": 4.720554701054318e-06, + "loss": 0.5103, + "step": 12485 + }, + { + "epoch": 0.9179532421702691, + "grad_norm": 0.8247328400611877, + "learning_rate": 4.720510433102858e-06, + "loss": 0.5608, + "step": 12486 + }, + { + "epoch": 0.918026760770475, + "grad_norm": 0.8341869115829468, + "learning_rate": 4.7204661618529515e-06, + "loss": 0.5322, + "step": 12487 + }, + { + "epoch": 0.9181002793706807, + "grad_norm": 0.8289043307304382, + "learning_rate": 4.720421887304663e-06, + "loss": 0.4524, + "step": 12488 + }, + { + "epoch": 0.9181737979708866, + "grad_norm": 0.8343620300292969, + "learning_rate": 4.72037760945806e-06, + "loss": 0.521, + "step": 12489 + }, + { + "epoch": 0.9182473165710925, + "grad_norm": 0.8220709562301636, + "learning_rate": 4.720333328313208e-06, + "loss": 0.5405, + "step": 12490 + }, + { + "epoch": 0.9183208351712984, + "grad_norm": 0.8276365995407104, + "learning_rate": 4.720289043870172e-06, + "loss": 0.5342, + "step": 12491 + }, + { + "epoch": 0.9183943537715042, + "grad_norm": 0.828594982624054, + "learning_rate": 4.720244756129019e-06, + "loss": 0.5263, + "step": 12492 + }, + { + "epoch": 0.91846787237171, + "grad_norm": 0.846060037612915, + "learning_rate": 4.720200465089812e-06, + "loss": 0.5424, + "step": 12493 + }, + { + "epoch": 0.9185413909719159, + "grad_norm": 0.8333685994148254, + "learning_rate": 4.720156170752621e-06, + "loss": 0.5527, + "step": 12494 + }, + { + "epoch": 0.9186149095721218, + "grad_norm": 0.8488031625747681, + "learning_rate": 4.720111873117509e-06, + "loss": 0.5263, + "step": 12495 + }, + { + "epoch": 0.9186884281723277, + "grad_norm": 0.8128961324691772, + "learning_rate": 4.720067572184542e-06, + "loss": 0.5179, + "step": 12496 + }, + { + "epoch": 0.9187619467725334, + "grad_norm": 0.814037024974823, + "learning_rate": 4.720023267953786e-06, + "loss": 0.5289, + "step": 12497 + }, + { + "epoch": 0.9188354653727393, + "grad_norm": 0.8127397894859314, + "learning_rate": 4.7199789604253076e-06, + "loss": 0.512, + "step": 12498 + }, + { + "epoch": 0.9189089839729452, + "grad_norm": 0.7933748364448547, + "learning_rate": 4.719934649599172e-06, + "loss": 0.4771, + "step": 12499 + }, + { + "epoch": 0.9189825025731511, + "grad_norm": 0.8347010016441345, + "learning_rate": 4.719890335475444e-06, + "loss": 0.5131, + "step": 12500 + }, + { + "epoch": 0.9190560211733568, + "grad_norm": 0.8059952259063721, + "learning_rate": 4.7198460180541914e-06, + "loss": 0.4942, + "step": 12501 + }, + { + "epoch": 0.9191295397735627, + "grad_norm": 0.8947141170501709, + "learning_rate": 4.719801697335479e-06, + "loss": 0.5152, + "step": 12502 + }, + { + "epoch": 0.9192030583737686, + "grad_norm": 0.8276797533035278, + "learning_rate": 4.7197573733193725e-06, + "loss": 0.5152, + "step": 12503 + }, + { + "epoch": 0.9192765769739745, + "grad_norm": 0.8060464262962341, + "learning_rate": 4.7197130460059385e-06, + "loss": 0.5031, + "step": 12504 + }, + { + "epoch": 0.9193500955741802, + "grad_norm": 0.8438850045204163, + "learning_rate": 4.719668715395242e-06, + "loss": 0.5249, + "step": 12505 + }, + { + "epoch": 0.9194236141743861, + "grad_norm": 0.8821241855621338, + "learning_rate": 4.719624381487349e-06, + "loss": 0.5583, + "step": 12506 + }, + { + "epoch": 0.919497132774592, + "grad_norm": 0.8470426797866821, + "learning_rate": 4.719580044282326e-06, + "loss": 0.5029, + "step": 12507 + }, + { + "epoch": 0.9195706513747979, + "grad_norm": 0.8035007119178772, + "learning_rate": 4.719535703780238e-06, + "loss": 0.5094, + "step": 12508 + }, + { + "epoch": 0.9196441699750036, + "grad_norm": 0.8093074560165405, + "learning_rate": 4.719491359981151e-06, + "loss": 0.4811, + "step": 12509 + }, + { + "epoch": 0.9197176885752095, + "grad_norm": 0.820161759853363, + "learning_rate": 4.719447012885131e-06, + "loss": 0.5317, + "step": 12510 + }, + { + "epoch": 0.9197912071754154, + "grad_norm": 0.8256163001060486, + "learning_rate": 4.719402662492245e-06, + "loss": 0.4978, + "step": 12511 + }, + { + "epoch": 0.9198647257756213, + "grad_norm": 0.7840100526809692, + "learning_rate": 4.719358308802558e-06, + "loss": 0.4883, + "step": 12512 + }, + { + "epoch": 0.919938244375827, + "grad_norm": 0.8255428671836853, + "learning_rate": 4.719313951816134e-06, + "loss": 0.5427, + "step": 12513 + }, + { + "epoch": 0.9200117629760329, + "grad_norm": 0.8035992383956909, + "learning_rate": 4.719269591533042e-06, + "loss": 0.4852, + "step": 12514 + }, + { + "epoch": 0.9200852815762388, + "grad_norm": 0.8637033104896545, + "learning_rate": 4.7192252279533455e-06, + "loss": 0.5249, + "step": 12515 + }, + { + "epoch": 0.9201588001764447, + "grad_norm": 0.850702702999115, + "learning_rate": 4.719180861077113e-06, + "loss": 0.5492, + "step": 12516 + }, + { + "epoch": 0.9202323187766505, + "grad_norm": 0.777526319026947, + "learning_rate": 4.719136490904408e-06, + "loss": 0.5249, + "step": 12517 + }, + { + "epoch": 0.9203058373768563, + "grad_norm": 0.8173571228981018, + "learning_rate": 4.7190921174352965e-06, + "loss": 0.5476, + "step": 12518 + }, + { + "epoch": 0.9203793559770622, + "grad_norm": 0.9253337979316711, + "learning_rate": 4.719047740669846e-06, + "loss": 0.5358, + "step": 12519 + }, + { + "epoch": 0.9204528745772681, + "grad_norm": 0.8976876139640808, + "learning_rate": 4.719003360608122e-06, + "loss": 0.5603, + "step": 12520 + }, + { + "epoch": 0.9205263931774739, + "grad_norm": 0.7995920777320862, + "learning_rate": 4.71895897725019e-06, + "loss": 0.5219, + "step": 12521 + }, + { + "epoch": 0.9205999117776797, + "grad_norm": 0.8047534227371216, + "learning_rate": 4.718914590596115e-06, + "loss": 0.4762, + "step": 12522 + }, + { + "epoch": 0.9206734303778856, + "grad_norm": 0.8654239177703857, + "learning_rate": 4.718870200645964e-06, + "loss": 0.5221, + "step": 12523 + }, + { + "epoch": 0.9207469489780915, + "grad_norm": 0.8043009638786316, + "learning_rate": 4.7188258073998035e-06, + "loss": 0.5152, + "step": 12524 + }, + { + "epoch": 0.9208204675782973, + "grad_norm": 0.8268427848815918, + "learning_rate": 4.718781410857699e-06, + "loss": 0.544, + "step": 12525 + }, + { + "epoch": 0.9208939861785032, + "grad_norm": 0.7908338308334351, + "learning_rate": 4.7187370110197155e-06, + "loss": 0.5126, + "step": 12526 + }, + { + "epoch": 0.920967504778709, + "grad_norm": 0.8456102013587952, + "learning_rate": 4.71869260788592e-06, + "loss": 0.5757, + "step": 12527 + }, + { + "epoch": 0.9210410233789149, + "grad_norm": 0.8114858865737915, + "learning_rate": 4.718648201456378e-06, + "loss": 0.4837, + "step": 12528 + }, + { + "epoch": 0.9211145419791207, + "grad_norm": 0.7885743379592896, + "learning_rate": 4.718603791731155e-06, + "loss": 0.4849, + "step": 12529 + }, + { + "epoch": 0.9211880605793266, + "grad_norm": 0.8261392116546631, + "learning_rate": 4.718559378710318e-06, + "loss": 0.5315, + "step": 12530 + }, + { + "epoch": 0.9212615791795324, + "grad_norm": 0.7781260013580322, + "learning_rate": 4.718514962393933e-06, + "loss": 0.5018, + "step": 12531 + }, + { + "epoch": 0.9213350977797383, + "grad_norm": 0.8511605858802795, + "learning_rate": 4.718470542782065e-06, + "loss": 0.5507, + "step": 12532 + }, + { + "epoch": 0.9214086163799441, + "grad_norm": 0.9188262224197388, + "learning_rate": 4.718426119874781e-06, + "loss": 0.5567, + "step": 12533 + }, + { + "epoch": 0.92148213498015, + "grad_norm": 0.8330750465393066, + "learning_rate": 4.718381693672146e-06, + "loss": 0.5056, + "step": 12534 + }, + { + "epoch": 0.9215556535803558, + "grad_norm": 0.7906540632247925, + "learning_rate": 4.718337264174226e-06, + "loss": 0.5091, + "step": 12535 + }, + { + "epoch": 0.9216291721805617, + "grad_norm": 0.7890476584434509, + "learning_rate": 4.718292831381088e-06, + "loss": 0.5101, + "step": 12536 + }, + { + "epoch": 0.9217026907807675, + "grad_norm": 0.8076112866401672, + "learning_rate": 4.7182483952927984e-06, + "loss": 0.5021, + "step": 12537 + }, + { + "epoch": 0.9217762093809734, + "grad_norm": 0.8272475600242615, + "learning_rate": 4.718203955909421e-06, + "loss": 0.5229, + "step": 12538 + }, + { + "epoch": 0.9218497279811793, + "grad_norm": 0.811841607093811, + "learning_rate": 4.718159513231024e-06, + "loss": 0.5168, + "step": 12539 + }, + { + "epoch": 0.9219232465813851, + "grad_norm": 0.7848494648933411, + "learning_rate": 4.718115067257671e-06, + "loss": 0.4829, + "step": 12540 + }, + { + "epoch": 0.9219967651815909, + "grad_norm": 0.8100199103355408, + "learning_rate": 4.718070617989431e-06, + "loss": 0.5277, + "step": 12541 + }, + { + "epoch": 0.9220702837817968, + "grad_norm": 0.7707422971725464, + "learning_rate": 4.718026165426368e-06, + "loss": 0.4872, + "step": 12542 + }, + { + "epoch": 0.9221438023820027, + "grad_norm": 0.8547807931900024, + "learning_rate": 4.717981709568549e-06, + "loss": 0.5159, + "step": 12543 + }, + { + "epoch": 0.9222173209822085, + "grad_norm": 0.7954878211021423, + "learning_rate": 4.717937250416039e-06, + "loss": 0.526, + "step": 12544 + }, + { + "epoch": 0.9222908395824143, + "grad_norm": 0.7813257575035095, + "learning_rate": 4.717892787968905e-06, + "loss": 0.4445, + "step": 12545 + }, + { + "epoch": 0.9223643581826202, + "grad_norm": 0.8317292332649231, + "learning_rate": 4.717848322227213e-06, + "loss": 0.5207, + "step": 12546 + }, + { + "epoch": 0.9224378767828261, + "grad_norm": 0.8151893615722656, + "learning_rate": 4.717803853191028e-06, + "loss": 0.5177, + "step": 12547 + }, + { + "epoch": 0.922511395383032, + "grad_norm": 0.8274028301239014, + "learning_rate": 4.717759380860417e-06, + "loss": 0.4741, + "step": 12548 + }, + { + "epoch": 0.9225849139832377, + "grad_norm": 0.8267737627029419, + "learning_rate": 4.717714905235446e-06, + "loss": 0.5189, + "step": 12549 + }, + { + "epoch": 0.9226584325834436, + "grad_norm": 0.8541632890701294, + "learning_rate": 4.717670426316182e-06, + "loss": 0.512, + "step": 12550 + }, + { + "epoch": 0.9227319511836495, + "grad_norm": 0.8231522440910339, + "learning_rate": 4.717625944102689e-06, + "loss": 0.5195, + "step": 12551 + }, + { + "epoch": 0.9228054697838554, + "grad_norm": 0.7958799004554749, + "learning_rate": 4.7175814585950345e-06, + "loss": 0.481, + "step": 12552 + }, + { + "epoch": 0.9228789883840611, + "grad_norm": 0.8195514678955078, + "learning_rate": 4.7175369697932835e-06, + "loss": 0.4995, + "step": 12553 + }, + { + "epoch": 0.922952506984267, + "grad_norm": 0.7815265655517578, + "learning_rate": 4.717492477697503e-06, + "loss": 0.5381, + "step": 12554 + }, + { + "epoch": 0.9230260255844729, + "grad_norm": 0.8164342641830444, + "learning_rate": 4.717447982307759e-06, + "loss": 0.5055, + "step": 12555 + }, + { + "epoch": 0.9230995441846788, + "grad_norm": 0.8176435828208923, + "learning_rate": 4.7174034836241176e-06, + "loss": 0.4934, + "step": 12556 + }, + { + "epoch": 0.9231730627848845, + "grad_norm": 0.8668699264526367, + "learning_rate": 4.7173589816466445e-06, + "loss": 0.5085, + "step": 12557 + }, + { + "epoch": 0.9232465813850904, + "grad_norm": 0.8264632225036621, + "learning_rate": 4.717314476375407e-06, + "loss": 0.5124, + "step": 12558 + }, + { + "epoch": 0.9233200999852963, + "grad_norm": 0.8348678946495056, + "learning_rate": 4.717269967810468e-06, + "loss": 0.5453, + "step": 12559 + }, + { + "epoch": 0.9233936185855022, + "grad_norm": 0.7963692545890808, + "learning_rate": 4.717225455951897e-06, + "loss": 0.5147, + "step": 12560 + }, + { + "epoch": 0.9234671371857079, + "grad_norm": 0.835648775100708, + "learning_rate": 4.71718094079976e-06, + "loss": 0.531, + "step": 12561 + }, + { + "epoch": 0.9235406557859138, + "grad_norm": 0.8206196427345276, + "learning_rate": 4.717136422354122e-06, + "loss": 0.5326, + "step": 12562 + }, + { + "epoch": 0.9236141743861197, + "grad_norm": 0.8195794820785522, + "learning_rate": 4.7170919006150475e-06, + "loss": 0.5171, + "step": 12563 + }, + { + "epoch": 0.9236876929863256, + "grad_norm": 0.809111475944519, + "learning_rate": 4.7170473755826065e-06, + "loss": 0.511, + "step": 12564 + }, + { + "epoch": 0.9237612115865314, + "grad_norm": 0.7818899750709534, + "learning_rate": 4.717002847256862e-06, + "loss": 0.4902, + "step": 12565 + }, + { + "epoch": 0.9238347301867372, + "grad_norm": 0.8728354573249817, + "learning_rate": 4.716958315637881e-06, + "loss": 0.5669, + "step": 12566 + }, + { + "epoch": 0.9239082487869431, + "grad_norm": 0.8191679120063782, + "learning_rate": 4.7169137807257295e-06, + "loss": 0.5475, + "step": 12567 + }, + { + "epoch": 0.923981767387149, + "grad_norm": 0.8323441743850708, + "learning_rate": 4.716869242520475e-06, + "loss": 0.5261, + "step": 12568 + }, + { + "epoch": 0.9240552859873548, + "grad_norm": 0.8329950571060181, + "learning_rate": 4.716824701022182e-06, + "loss": 0.5169, + "step": 12569 + }, + { + "epoch": 0.9241288045875606, + "grad_norm": 0.8936725854873657, + "learning_rate": 4.716780156230917e-06, + "loss": 0.5734, + "step": 12570 + }, + { + "epoch": 0.9242023231877665, + "grad_norm": 0.8295260071754456, + "learning_rate": 4.716735608146747e-06, + "loss": 0.553, + "step": 12571 + }, + { + "epoch": 0.9242758417879724, + "grad_norm": 0.8480058312416077, + "learning_rate": 4.716691056769738e-06, + "loss": 0.5592, + "step": 12572 + }, + { + "epoch": 0.9243493603881782, + "grad_norm": 0.8497088551521301, + "learning_rate": 4.716646502099955e-06, + "loss": 0.5212, + "step": 12573 + }, + { + "epoch": 0.924422878988384, + "grad_norm": 0.8027347326278687, + "learning_rate": 4.716601944137465e-06, + "loss": 0.5088, + "step": 12574 + }, + { + "epoch": 0.9244963975885899, + "grad_norm": 0.8321629762649536, + "learning_rate": 4.716557382882335e-06, + "loss": 0.5074, + "step": 12575 + }, + { + "epoch": 0.9245699161887958, + "grad_norm": 0.7566220760345459, + "learning_rate": 4.71651281833463e-06, + "loss": 0.5041, + "step": 12576 + }, + { + "epoch": 0.9246434347890016, + "grad_norm": 0.820753812789917, + "learning_rate": 4.716468250494417e-06, + "loss": 0.5341, + "step": 12577 + }, + { + "epoch": 0.9247169533892075, + "grad_norm": 0.7989791631698608, + "learning_rate": 4.716423679361761e-06, + "loss": 0.5469, + "step": 12578 + }, + { + "epoch": 0.9247904719894133, + "grad_norm": 0.7533949613571167, + "learning_rate": 4.716379104936729e-06, + "loss": 0.511, + "step": 12579 + }, + { + "epoch": 0.9248639905896192, + "grad_norm": 0.8375959396362305, + "learning_rate": 4.716334527219388e-06, + "loss": 0.5493, + "step": 12580 + }, + { + "epoch": 0.924937509189825, + "grad_norm": 0.8188689351081848, + "learning_rate": 4.716289946209802e-06, + "loss": 0.5068, + "step": 12581 + }, + { + "epoch": 0.9250110277900309, + "grad_norm": 0.849807620048523, + "learning_rate": 4.71624536190804e-06, + "loss": 0.5305, + "step": 12582 + }, + { + "epoch": 0.9250845463902367, + "grad_norm": 0.8028153777122498, + "learning_rate": 4.7162007743141665e-06, + "loss": 0.5284, + "step": 12583 + }, + { + "epoch": 0.9251580649904426, + "grad_norm": 0.8437380194664001, + "learning_rate": 4.716156183428248e-06, + "loss": 0.5118, + "step": 12584 + }, + { + "epoch": 0.9252315835906484, + "grad_norm": 0.7953616976737976, + "learning_rate": 4.716111589250351e-06, + "loss": 0.5416, + "step": 12585 + }, + { + "epoch": 0.9253051021908543, + "grad_norm": 0.8008787631988525, + "learning_rate": 4.7160669917805415e-06, + "loss": 0.5362, + "step": 12586 + }, + { + "epoch": 0.9253786207910601, + "grad_norm": 0.80682373046875, + "learning_rate": 4.716022391018885e-06, + "loss": 0.5245, + "step": 12587 + }, + { + "epoch": 0.925452139391266, + "grad_norm": 0.7996590733528137, + "learning_rate": 4.71597778696545e-06, + "loss": 0.4673, + "step": 12588 + }, + { + "epoch": 0.9255256579914718, + "grad_norm": 0.8263216614723206, + "learning_rate": 4.715933179620301e-06, + "loss": 0.5312, + "step": 12589 + }, + { + "epoch": 0.9255991765916777, + "grad_norm": 0.8028913140296936, + "learning_rate": 4.715888568983504e-06, + "loss": 0.5482, + "step": 12590 + }, + { + "epoch": 0.9256726951918836, + "grad_norm": 0.8438177108764648, + "learning_rate": 4.715843955055127e-06, + "loss": 0.5009, + "step": 12591 + }, + { + "epoch": 0.9257462137920894, + "grad_norm": 0.8714125752449036, + "learning_rate": 4.715799337835234e-06, + "loss": 0.5461, + "step": 12592 + }, + { + "epoch": 0.9258197323922952, + "grad_norm": 0.8104299902915955, + "learning_rate": 4.7157547173238925e-06, + "loss": 0.4952, + "step": 12593 + }, + { + "epoch": 0.9258932509925011, + "grad_norm": 0.8398018479347229, + "learning_rate": 4.715710093521169e-06, + "loss": 0.5101, + "step": 12594 + }, + { + "epoch": 0.925966769592707, + "grad_norm": 0.8242349624633789, + "learning_rate": 4.715665466427129e-06, + "loss": 0.5302, + "step": 12595 + }, + { + "epoch": 0.9260402881929128, + "grad_norm": 0.8734395503997803, + "learning_rate": 4.71562083604184e-06, + "loss": 0.5292, + "step": 12596 + }, + { + "epoch": 0.9261138067931186, + "grad_norm": 0.8617703318595886, + "learning_rate": 4.715576202365367e-06, + "loss": 0.5625, + "step": 12597 + }, + { + "epoch": 0.9261873253933245, + "grad_norm": 0.8435696363449097, + "learning_rate": 4.715531565397778e-06, + "loss": 0.5393, + "step": 12598 + }, + { + "epoch": 0.9262608439935304, + "grad_norm": 0.79660964012146, + "learning_rate": 4.715486925139137e-06, + "loss": 0.5088, + "step": 12599 + }, + { + "epoch": 0.9263343625937362, + "grad_norm": 0.7912499904632568, + "learning_rate": 4.715442281589511e-06, + "loss": 0.489, + "step": 12600 + }, + { + "epoch": 0.926407881193942, + "grad_norm": 0.8761945366859436, + "learning_rate": 4.715397634748968e-06, + "loss": 0.5757, + "step": 12601 + }, + { + "epoch": 0.9264813997941479, + "grad_norm": 0.8712537884712219, + "learning_rate": 4.715352984617573e-06, + "loss": 0.4909, + "step": 12602 + }, + { + "epoch": 0.9265549183943538, + "grad_norm": 0.8042596578598022, + "learning_rate": 4.715308331195393e-06, + "loss": 0.4947, + "step": 12603 + }, + { + "epoch": 0.9266284369945597, + "grad_norm": 0.8105570077896118, + "learning_rate": 4.7152636744824926e-06, + "loss": 0.5089, + "step": 12604 + }, + { + "epoch": 0.9267019555947654, + "grad_norm": 0.78998863697052, + "learning_rate": 4.71521901447894e-06, + "loss": 0.5173, + "step": 12605 + }, + { + "epoch": 0.9267754741949713, + "grad_norm": 0.8109129071235657, + "learning_rate": 4.7151743511848006e-06, + "loss": 0.5374, + "step": 12606 + }, + { + "epoch": 0.9268489927951772, + "grad_norm": 0.8035292029380798, + "learning_rate": 4.71512968460014e-06, + "loss": 0.5153, + "step": 12607 + }, + { + "epoch": 0.9269225113953831, + "grad_norm": 0.7868620157241821, + "learning_rate": 4.715085014725027e-06, + "loss": 0.4794, + "step": 12608 + }, + { + "epoch": 0.9269960299955888, + "grad_norm": 0.8007035255432129, + "learning_rate": 4.715040341559526e-06, + "loss": 0.4989, + "step": 12609 + }, + { + "epoch": 0.9270695485957947, + "grad_norm": 0.8405644297599792, + "learning_rate": 4.714995665103704e-06, + "loss": 0.5382, + "step": 12610 + }, + { + "epoch": 0.9271430671960006, + "grad_norm": 0.8341719508171082, + "learning_rate": 4.714950985357627e-06, + "loss": 0.5385, + "step": 12611 + }, + { + "epoch": 0.9272165857962065, + "grad_norm": 0.7628992795944214, + "learning_rate": 4.714906302321362e-06, + "loss": 0.515, + "step": 12612 + }, + { + "epoch": 0.9272901043964122, + "grad_norm": 0.8592363595962524, + "learning_rate": 4.714861615994975e-06, + "loss": 0.5212, + "step": 12613 + }, + { + "epoch": 0.9273636229966181, + "grad_norm": 0.7943896651268005, + "learning_rate": 4.714816926378531e-06, + "loss": 0.5194, + "step": 12614 + }, + { + "epoch": 0.927437141596824, + "grad_norm": 0.817773699760437, + "learning_rate": 4.7147722334720994e-06, + "loss": 0.5424, + "step": 12615 + }, + { + "epoch": 0.9275106601970299, + "grad_norm": 0.7817407846450806, + "learning_rate": 4.714727537275744e-06, + "loss": 0.4924, + "step": 12616 + }, + { + "epoch": 0.9275841787972356, + "grad_norm": 0.8017929196357727, + "learning_rate": 4.714682837789532e-06, + "loss": 0.5225, + "step": 12617 + }, + { + "epoch": 0.9276576973974415, + "grad_norm": 0.8404014706611633, + "learning_rate": 4.714638135013531e-06, + "loss": 0.5001, + "step": 12618 + }, + { + "epoch": 0.9277312159976474, + "grad_norm": 0.7912546396255493, + "learning_rate": 4.714593428947805e-06, + "loss": 0.5218, + "step": 12619 + }, + { + "epoch": 0.9278047345978533, + "grad_norm": 0.7823320031166077, + "learning_rate": 4.714548719592423e-06, + "loss": 0.4922, + "step": 12620 + }, + { + "epoch": 0.927878253198059, + "grad_norm": 0.8335933685302734, + "learning_rate": 4.71450400694745e-06, + "loss": 0.5524, + "step": 12621 + }, + { + "epoch": 0.9279517717982649, + "grad_norm": 0.7984040975570679, + "learning_rate": 4.714459291012951e-06, + "loss": 0.4869, + "step": 12622 + }, + { + "epoch": 0.9280252903984708, + "grad_norm": 0.7732642889022827, + "learning_rate": 4.714414571788996e-06, + "loss": 0.5058, + "step": 12623 + }, + { + "epoch": 0.9280988089986767, + "grad_norm": 0.8860666751861572, + "learning_rate": 4.714369849275649e-06, + "loss": 0.5235, + "step": 12624 + }, + { + "epoch": 0.9281723275988825, + "grad_norm": 0.8770896196365356, + "learning_rate": 4.714325123472976e-06, + "loss": 0.554, + "step": 12625 + }, + { + "epoch": 0.9282458461990883, + "grad_norm": 0.7970790863037109, + "learning_rate": 4.714280394381044e-06, + "loss": 0.5427, + "step": 12626 + }, + { + "epoch": 0.9283193647992942, + "grad_norm": 0.81419438123703, + "learning_rate": 4.714235661999921e-06, + "loss": 0.4937, + "step": 12627 + }, + { + "epoch": 0.9283928833995001, + "grad_norm": 0.861254870891571, + "learning_rate": 4.714190926329671e-06, + "loss": 0.5748, + "step": 12628 + }, + { + "epoch": 0.9284664019997059, + "grad_norm": 0.8358956575393677, + "learning_rate": 4.714146187370362e-06, + "loss": 0.518, + "step": 12629 + }, + { + "epoch": 0.9285399205999118, + "grad_norm": 0.8131512999534607, + "learning_rate": 4.71410144512206e-06, + "loss": 0.5114, + "step": 12630 + }, + { + "epoch": 0.9286134392001176, + "grad_norm": 0.7945464849472046, + "learning_rate": 4.714056699584832e-06, + "loss": 0.4894, + "step": 12631 + }, + { + "epoch": 0.9286869578003235, + "grad_norm": 0.8546895980834961, + "learning_rate": 4.714011950758745e-06, + "loss": 0.5519, + "step": 12632 + }, + { + "epoch": 0.9287604764005294, + "grad_norm": 0.8686352968215942, + "learning_rate": 4.713967198643863e-06, + "loss": 0.5615, + "step": 12633 + }, + { + "epoch": 0.9288339950007352, + "grad_norm": 0.8513809442520142, + "learning_rate": 4.713922443240253e-06, + "loss": 0.5322, + "step": 12634 + }, + { + "epoch": 0.928907513600941, + "grad_norm": 0.7770766019821167, + "learning_rate": 4.713877684547984e-06, + "loss": 0.4918, + "step": 12635 + }, + { + "epoch": 0.9289810322011469, + "grad_norm": 0.75960773229599, + "learning_rate": 4.713832922567121e-06, + "loss": 0.4951, + "step": 12636 + }, + { + "epoch": 0.9290545508013528, + "grad_norm": 0.870685875415802, + "learning_rate": 4.71378815729773e-06, + "loss": 0.5774, + "step": 12637 + }, + { + "epoch": 0.9291280694015586, + "grad_norm": 0.8691454529762268, + "learning_rate": 4.713743388739878e-06, + "loss": 0.5477, + "step": 12638 + }, + { + "epoch": 0.9292015880017644, + "grad_norm": 0.8398176431655884, + "learning_rate": 4.713698616893632e-06, + "loss": 0.5225, + "step": 12639 + }, + { + "epoch": 0.9292751066019703, + "grad_norm": 0.8015018701553345, + "learning_rate": 4.713653841759057e-06, + "loss": 0.5089, + "step": 12640 + }, + { + "epoch": 0.9293486252021762, + "grad_norm": 0.8098991513252258, + "learning_rate": 4.71360906333622e-06, + "loss": 0.4868, + "step": 12641 + }, + { + "epoch": 0.929422143802382, + "grad_norm": 0.7930776476860046, + "learning_rate": 4.7135642816251895e-06, + "loss": 0.5503, + "step": 12642 + }, + { + "epoch": 0.9294956624025879, + "grad_norm": 0.8351119160652161, + "learning_rate": 4.713519496626029e-06, + "loss": 0.5139, + "step": 12643 + }, + { + "epoch": 0.9295691810027937, + "grad_norm": 0.8304259777069092, + "learning_rate": 4.713474708338807e-06, + "loss": 0.535, + "step": 12644 + }, + { + "epoch": 0.9296426996029996, + "grad_norm": 0.8083568215370178, + "learning_rate": 4.71342991676359e-06, + "loss": 0.5374, + "step": 12645 + }, + { + "epoch": 0.9297162182032054, + "grad_norm": 0.8069698810577393, + "learning_rate": 4.713385121900444e-06, + "loss": 0.4999, + "step": 12646 + }, + { + "epoch": 0.9297897368034113, + "grad_norm": 0.7960013747215271, + "learning_rate": 4.7133403237494345e-06, + "loss": 0.5663, + "step": 12647 + }, + { + "epoch": 0.9298632554036171, + "grad_norm": 0.8325176239013672, + "learning_rate": 4.713295522310631e-06, + "loss": 0.5502, + "step": 12648 + }, + { + "epoch": 0.929936774003823, + "grad_norm": 0.8343145847320557, + "learning_rate": 4.713250717584096e-06, + "loss": 0.5405, + "step": 12649 + }, + { + "epoch": 0.9300102926040288, + "grad_norm": 0.8082731366157532, + "learning_rate": 4.7132059095699e-06, + "loss": 0.4916, + "step": 12650 + }, + { + "epoch": 0.9300838112042347, + "grad_norm": 0.8170218467712402, + "learning_rate": 4.713161098268107e-06, + "loss": 0.5742, + "step": 12651 + }, + { + "epoch": 0.9301573298044405, + "grad_norm": 0.81476229429245, + "learning_rate": 4.713116283678785e-06, + "loss": 0.5528, + "step": 12652 + }, + { + "epoch": 0.9302308484046464, + "grad_norm": 0.8151546120643616, + "learning_rate": 4.7130714658019996e-06, + "loss": 0.5187, + "step": 12653 + }, + { + "epoch": 0.9303043670048522, + "grad_norm": 0.7964958548545837, + "learning_rate": 4.713026644637817e-06, + "loss": 0.5242, + "step": 12654 + }, + { + "epoch": 0.9303778856050581, + "grad_norm": 0.8247057199478149, + "learning_rate": 4.712981820186305e-06, + "loss": 0.5512, + "step": 12655 + }, + { + "epoch": 0.930451404205264, + "grad_norm": 0.845162570476532, + "learning_rate": 4.7129369924475304e-06, + "loss": 0.5493, + "step": 12656 + }, + { + "epoch": 0.9305249228054698, + "grad_norm": 0.8087677955627441, + "learning_rate": 4.712892161421558e-06, + "loss": 0.5073, + "step": 12657 + }, + { + "epoch": 0.9305984414056756, + "grad_norm": 0.8156158328056335, + "learning_rate": 4.712847327108456e-06, + "loss": 0.5479, + "step": 12658 + }, + { + "epoch": 0.9306719600058815, + "grad_norm": 0.9163749814033508, + "learning_rate": 4.712802489508291e-06, + "loss": 0.5474, + "step": 12659 + }, + { + "epoch": 0.9307454786060874, + "grad_norm": 0.8208238482475281, + "learning_rate": 4.712757648621128e-06, + "loss": 0.5708, + "step": 12660 + }, + { + "epoch": 0.9308189972062932, + "grad_norm": 0.7889565825462341, + "learning_rate": 4.712712804447036e-06, + "loss": 0.515, + "step": 12661 + }, + { + "epoch": 0.930892515806499, + "grad_norm": 0.8316226601600647, + "learning_rate": 4.712667956986079e-06, + "loss": 0.5615, + "step": 12662 + }, + { + "epoch": 0.9309660344067049, + "grad_norm": 0.7975957989692688, + "learning_rate": 4.712623106238326e-06, + "loss": 0.5032, + "step": 12663 + }, + { + "epoch": 0.9310395530069108, + "grad_norm": 0.7550929188728333, + "learning_rate": 4.712578252203842e-06, + "loss": 0.4609, + "step": 12664 + }, + { + "epoch": 0.9311130716071166, + "grad_norm": 0.8104507923126221, + "learning_rate": 4.712533394882694e-06, + "loss": 0.5138, + "step": 12665 + }, + { + "epoch": 0.9311865902073224, + "grad_norm": 0.7807633876800537, + "learning_rate": 4.712488534274949e-06, + "loss": 0.5388, + "step": 12666 + }, + { + "epoch": 0.9312601088075283, + "grad_norm": 0.7982831597328186, + "learning_rate": 4.712443670380673e-06, + "loss": 0.4843, + "step": 12667 + }, + { + "epoch": 0.9313336274077342, + "grad_norm": 0.801541268825531, + "learning_rate": 4.712398803199934e-06, + "loss": 0.5105, + "step": 12668 + }, + { + "epoch": 0.9314071460079401, + "grad_norm": 0.8308626413345337, + "learning_rate": 4.712353932732797e-06, + "loss": 0.5519, + "step": 12669 + }, + { + "epoch": 0.9314806646081458, + "grad_norm": 0.8156243562698364, + "learning_rate": 4.7123090589793295e-06, + "loss": 0.5406, + "step": 12670 + }, + { + "epoch": 0.9315541832083517, + "grad_norm": 0.824535608291626, + "learning_rate": 4.712264181939598e-06, + "loss": 0.5, + "step": 12671 + }, + { + "epoch": 0.9316277018085576, + "grad_norm": 0.8192705512046814, + "learning_rate": 4.7122193016136694e-06, + "loss": 0.5414, + "step": 12672 + }, + { + "epoch": 0.9317012204087635, + "grad_norm": 0.7921075820922852, + "learning_rate": 4.71217441800161e-06, + "loss": 0.5262, + "step": 12673 + }, + { + "epoch": 0.9317747390089692, + "grad_norm": 0.7991498112678528, + "learning_rate": 4.712129531103486e-06, + "loss": 0.5519, + "step": 12674 + }, + { + "epoch": 0.9318482576091751, + "grad_norm": 0.7837907671928406, + "learning_rate": 4.712084640919366e-06, + "loss": 0.5198, + "step": 12675 + }, + { + "epoch": 0.931921776209381, + "grad_norm": 0.8246380090713501, + "learning_rate": 4.712039747449315e-06, + "loss": 0.4851, + "step": 12676 + }, + { + "epoch": 0.9319952948095869, + "grad_norm": 0.83909672498703, + "learning_rate": 4.7119948506933995e-06, + "loss": 0.5096, + "step": 12677 + }, + { + "epoch": 0.9320688134097926, + "grad_norm": 0.8526402711868286, + "learning_rate": 4.7119499506516875e-06, + "loss": 0.5554, + "step": 12678 + }, + { + "epoch": 0.9321423320099985, + "grad_norm": 0.8517556190490723, + "learning_rate": 4.711905047324244e-06, + "loss": 0.5174, + "step": 12679 + }, + { + "epoch": 0.9322158506102044, + "grad_norm": 0.8048402070999146, + "learning_rate": 4.711860140711138e-06, + "loss": 0.5185, + "step": 12680 + }, + { + "epoch": 0.9322893692104103, + "grad_norm": 0.7671645879745483, + "learning_rate": 4.711815230812434e-06, + "loss": 0.4809, + "step": 12681 + }, + { + "epoch": 0.932362887810616, + "grad_norm": 0.8099327683448792, + "learning_rate": 4.7117703176282e-06, + "loss": 0.5479, + "step": 12682 + }, + { + "epoch": 0.9324364064108219, + "grad_norm": 0.8024306893348694, + "learning_rate": 4.711725401158502e-06, + "loss": 0.4867, + "step": 12683 + }, + { + "epoch": 0.9325099250110278, + "grad_norm": 0.8500090837478638, + "learning_rate": 4.711680481403407e-06, + "loss": 0.5189, + "step": 12684 + }, + { + "epoch": 0.9325834436112337, + "grad_norm": 0.786375880241394, + "learning_rate": 4.711635558362981e-06, + "loss": 0.4557, + "step": 12685 + }, + { + "epoch": 0.9326569622114395, + "grad_norm": 0.8439856767654419, + "learning_rate": 4.711590632037293e-06, + "loss": 0.543, + "step": 12686 + }, + { + "epoch": 0.9327304808116453, + "grad_norm": 0.807842493057251, + "learning_rate": 4.711545702426408e-06, + "loss": 0.5196, + "step": 12687 + }, + { + "epoch": 0.9328039994118512, + "grad_norm": 0.8715994954109192, + "learning_rate": 4.711500769530392e-06, + "loss": 0.5223, + "step": 12688 + }, + { + "epoch": 0.9328775180120571, + "grad_norm": 0.8240419030189514, + "learning_rate": 4.711455833349313e-06, + "loss": 0.5715, + "step": 12689 + }, + { + "epoch": 0.9329510366122629, + "grad_norm": 0.8367997407913208, + "learning_rate": 4.7114108938832384e-06, + "loss": 0.4978, + "step": 12690 + }, + { + "epoch": 0.9330245552124687, + "grad_norm": 0.8184006810188293, + "learning_rate": 4.711365951132233e-06, + "loss": 0.5327, + "step": 12691 + }, + { + "epoch": 0.9330980738126746, + "grad_norm": 0.8071537017822266, + "learning_rate": 4.711321005096365e-06, + "loss": 0.4989, + "step": 12692 + }, + { + "epoch": 0.9331715924128805, + "grad_norm": 0.8794370889663696, + "learning_rate": 4.7112760557757e-06, + "loss": 0.5902, + "step": 12693 + }, + { + "epoch": 0.9332451110130863, + "grad_norm": 0.8494759202003479, + "learning_rate": 4.711231103170306e-06, + "loss": 0.523, + "step": 12694 + }, + { + "epoch": 0.9333186296132922, + "grad_norm": 0.7941902875900269, + "learning_rate": 4.71118614728025e-06, + "loss": 0.5243, + "step": 12695 + }, + { + "epoch": 0.933392148213498, + "grad_norm": 0.8107139468193054, + "learning_rate": 4.7111411881055975e-06, + "loss": 0.5037, + "step": 12696 + }, + { + "epoch": 0.9334656668137039, + "grad_norm": 0.7635526657104492, + "learning_rate": 4.711096225646416e-06, + "loss": 0.5442, + "step": 12697 + }, + { + "epoch": 0.9335391854139097, + "grad_norm": 0.8038543462753296, + "learning_rate": 4.711051259902771e-06, + "loss": 0.5355, + "step": 12698 + }, + { + "epoch": 0.9336127040141156, + "grad_norm": 0.7872193455696106, + "learning_rate": 4.711006290874733e-06, + "loss": 0.536, + "step": 12699 + }, + { + "epoch": 0.9336862226143214, + "grad_norm": 0.7894639372825623, + "learning_rate": 4.710961318562364e-06, + "loss": 0.5354, + "step": 12700 + }, + { + "epoch": 0.9337597412145273, + "grad_norm": 0.8443173766136169, + "learning_rate": 4.710916342965733e-06, + "loss": 0.5439, + "step": 12701 + }, + { + "epoch": 0.9338332598147331, + "grad_norm": 0.8495767712593079, + "learning_rate": 4.710871364084908e-06, + "loss": 0.498, + "step": 12702 + }, + { + "epoch": 0.933906778414939, + "grad_norm": 0.8198673725128174, + "learning_rate": 4.710826381919954e-06, + "loss": 0.4894, + "step": 12703 + }, + { + "epoch": 0.9339802970151448, + "grad_norm": 0.8863058686256409, + "learning_rate": 4.710781396470938e-06, + "loss": 0.5597, + "step": 12704 + }, + { + "epoch": 0.9340538156153507, + "grad_norm": 0.7924801707267761, + "learning_rate": 4.7107364077379284e-06, + "loss": 0.5307, + "step": 12705 + }, + { + "epoch": 0.9341273342155565, + "grad_norm": 0.7548168897628784, + "learning_rate": 4.71069141572099e-06, + "loss": 0.4881, + "step": 12706 + }, + { + "epoch": 0.9342008528157624, + "grad_norm": 0.8000404238700867, + "learning_rate": 4.710646420420191e-06, + "loss": 0.5419, + "step": 12707 + }, + { + "epoch": 0.9342743714159683, + "grad_norm": 0.7977374196052551, + "learning_rate": 4.710601421835597e-06, + "loss": 0.5085, + "step": 12708 + }, + { + "epoch": 0.9343478900161741, + "grad_norm": 0.8308356404304504, + "learning_rate": 4.710556419967277e-06, + "loss": 0.5416, + "step": 12709 + }, + { + "epoch": 0.9344214086163799, + "grad_norm": 0.7635153532028198, + "learning_rate": 4.710511414815296e-06, + "loss": 0.4757, + "step": 12710 + }, + { + "epoch": 0.9344949272165858, + "grad_norm": 0.7692279815673828, + "learning_rate": 4.710466406379721e-06, + "loss": 0.5176, + "step": 12711 + }, + { + "epoch": 0.9345684458167917, + "grad_norm": 0.8253777623176575, + "learning_rate": 4.710421394660619e-06, + "loss": 0.516, + "step": 12712 + }, + { + "epoch": 0.9346419644169975, + "grad_norm": 0.8451455235481262, + "learning_rate": 4.710376379658058e-06, + "loss": 0.5515, + "step": 12713 + }, + { + "epoch": 0.9347154830172033, + "grad_norm": 0.9289820194244385, + "learning_rate": 4.7103313613721035e-06, + "loss": 0.5705, + "step": 12714 + }, + { + "epoch": 0.9347890016174092, + "grad_norm": 0.7933692336082458, + "learning_rate": 4.710286339802822e-06, + "loss": 0.4968, + "step": 12715 + }, + { + "epoch": 0.9348625202176151, + "grad_norm": 0.8065758347511292, + "learning_rate": 4.710241314950282e-06, + "loss": 0.4959, + "step": 12716 + }, + { + "epoch": 0.934936038817821, + "grad_norm": 0.7974640130996704, + "learning_rate": 4.71019628681455e-06, + "loss": 0.4998, + "step": 12717 + }, + { + "epoch": 0.9350095574180267, + "grad_norm": 0.876404345035553, + "learning_rate": 4.7101512553956916e-06, + "loss": 0.5592, + "step": 12718 + }, + { + "epoch": 0.9350830760182326, + "grad_norm": 0.8875867128372192, + "learning_rate": 4.710106220693775e-06, + "loss": 0.5048, + "step": 12719 + }, + { + "epoch": 0.9351565946184385, + "grad_norm": 0.7820202708244324, + "learning_rate": 4.710061182708866e-06, + "loss": 0.5305, + "step": 12720 + }, + { + "epoch": 0.9352301132186444, + "grad_norm": 0.841515064239502, + "learning_rate": 4.710016141441033e-06, + "loss": 0.5009, + "step": 12721 + }, + { + "epoch": 0.9353036318188501, + "grad_norm": 0.8352593183517456, + "learning_rate": 4.709971096890341e-06, + "loss": 0.5094, + "step": 12722 + }, + { + "epoch": 0.935377150419056, + "grad_norm": 0.8046164512634277, + "learning_rate": 4.709926049056859e-06, + "loss": 0.4624, + "step": 12723 + }, + { + "epoch": 0.9354506690192619, + "grad_norm": 0.8608724474906921, + "learning_rate": 4.709880997940652e-06, + "loss": 0.5425, + "step": 12724 + }, + { + "epoch": 0.9355241876194678, + "grad_norm": 0.8874301314353943, + "learning_rate": 4.709835943541788e-06, + "loss": 0.5364, + "step": 12725 + }, + { + "epoch": 0.9355977062196735, + "grad_norm": 0.8526413440704346, + "learning_rate": 4.709790885860335e-06, + "loss": 0.5563, + "step": 12726 + }, + { + "epoch": 0.9356712248198794, + "grad_norm": 0.8393363952636719, + "learning_rate": 4.709745824896357e-06, + "loss": 0.4934, + "step": 12727 + }, + { + "epoch": 0.9357447434200853, + "grad_norm": 0.8668023347854614, + "learning_rate": 4.7097007606499235e-06, + "loss": 0.6016, + "step": 12728 + }, + { + "epoch": 0.9358182620202912, + "grad_norm": 0.8257916569709778, + "learning_rate": 4.7096556931210995e-06, + "loss": 0.5695, + "step": 12729 + }, + { + "epoch": 0.9358917806204969, + "grad_norm": 0.8673436641693115, + "learning_rate": 4.7096106223099535e-06, + "loss": 0.5404, + "step": 12730 + }, + { + "epoch": 0.9359652992207028, + "grad_norm": 0.8344614505767822, + "learning_rate": 4.709565548216552e-06, + "loss": 0.539, + "step": 12731 + }, + { + "epoch": 0.9360388178209087, + "grad_norm": 0.8041743040084839, + "learning_rate": 4.709520470840963e-06, + "loss": 0.5759, + "step": 12732 + }, + { + "epoch": 0.9361123364211146, + "grad_norm": 0.8393919467926025, + "learning_rate": 4.70947539018325e-06, + "loss": 0.5403, + "step": 12733 + }, + { + "epoch": 0.9361858550213203, + "grad_norm": 0.8419398665428162, + "learning_rate": 4.7094303062434845e-06, + "loss": 0.5352, + "step": 12734 + }, + { + "epoch": 0.9362593736215262, + "grad_norm": 0.819783627986908, + "learning_rate": 4.70938521902173e-06, + "loss": 0.5283, + "step": 12735 + }, + { + "epoch": 0.9363328922217321, + "grad_norm": 0.8316270709037781, + "learning_rate": 4.709340128518055e-06, + "loss": 0.4688, + "step": 12736 + }, + { + "epoch": 0.936406410821938, + "grad_norm": 0.9137793183326721, + "learning_rate": 4.709295034732526e-06, + "loss": 0.5492, + "step": 12737 + }, + { + "epoch": 0.9364799294221438, + "grad_norm": 0.884826123714447, + "learning_rate": 4.7092499376652105e-06, + "loss": 0.5803, + "step": 12738 + }, + { + "epoch": 0.9365534480223496, + "grad_norm": 0.842541515827179, + "learning_rate": 4.709204837316175e-06, + "loss": 0.4974, + "step": 12739 + }, + { + "epoch": 0.9366269666225555, + "grad_norm": 0.8007906079292297, + "learning_rate": 4.709159733685487e-06, + "loss": 0.4999, + "step": 12740 + }, + { + "epoch": 0.9367004852227614, + "grad_norm": 0.8439920544624329, + "learning_rate": 4.709114626773213e-06, + "loss": 0.5418, + "step": 12741 + }, + { + "epoch": 0.9367740038229672, + "grad_norm": 0.840448260307312, + "learning_rate": 4.70906951657942e-06, + "loss": 0.5406, + "step": 12742 + }, + { + "epoch": 0.936847522423173, + "grad_norm": 0.8012194037437439, + "learning_rate": 4.7090244031041756e-06, + "loss": 0.5257, + "step": 12743 + }, + { + "epoch": 0.9369210410233789, + "grad_norm": 0.8336975574493408, + "learning_rate": 4.708979286347546e-06, + "loss": 0.5085, + "step": 12744 + }, + { + "epoch": 0.9369945596235848, + "grad_norm": 0.7822766900062561, + "learning_rate": 4.7089341663095986e-06, + "loss": 0.5182, + "step": 12745 + }, + { + "epoch": 0.9370680782237906, + "grad_norm": 0.9013201594352722, + "learning_rate": 4.708889042990401e-06, + "loss": 0.5829, + "step": 12746 + }, + { + "epoch": 0.9371415968239964, + "grad_norm": 0.8306474089622498, + "learning_rate": 4.70884391639002e-06, + "loss": 0.5122, + "step": 12747 + }, + { + "epoch": 0.9372151154242023, + "grad_norm": 0.8358700275421143, + "learning_rate": 4.7087987865085215e-06, + "loss": 0.5158, + "step": 12748 + }, + { + "epoch": 0.9372886340244082, + "grad_norm": 0.781217098236084, + "learning_rate": 4.708753653345973e-06, + "loss": 0.5128, + "step": 12749 + }, + { + "epoch": 0.937362152624614, + "grad_norm": 0.8504724502563477, + "learning_rate": 4.708708516902443e-06, + "loss": 0.5366, + "step": 12750 + }, + { + "epoch": 0.9374356712248199, + "grad_norm": 0.8077144622802734, + "learning_rate": 4.708663377177996e-06, + "loss": 0.5533, + "step": 12751 + }, + { + "epoch": 0.9375091898250257, + "grad_norm": 0.7988356351852417, + "learning_rate": 4.708618234172702e-06, + "loss": 0.438, + "step": 12752 + }, + { + "epoch": 0.9375827084252316, + "grad_norm": 0.8148082494735718, + "learning_rate": 4.708573087886626e-06, + "loss": 0.4922, + "step": 12753 + }, + { + "epoch": 0.9376562270254374, + "grad_norm": 0.8136352896690369, + "learning_rate": 4.708527938319836e-06, + "loss": 0.5226, + "step": 12754 + }, + { + "epoch": 0.9377297456256433, + "grad_norm": 0.8369523882865906, + "learning_rate": 4.708482785472398e-06, + "loss": 0.5339, + "step": 12755 + }, + { + "epoch": 0.9378032642258491, + "grad_norm": 0.833112359046936, + "learning_rate": 4.7084376293443804e-06, + "loss": 0.5458, + "step": 12756 + }, + { + "epoch": 0.937876782826055, + "grad_norm": 0.8306522965431213, + "learning_rate": 4.708392469935849e-06, + "loss": 0.5541, + "step": 12757 + }, + { + "epoch": 0.9379503014262608, + "grad_norm": 0.8045960664749146, + "learning_rate": 4.708347307246871e-06, + "loss": 0.4812, + "step": 12758 + }, + { + "epoch": 0.9380238200264667, + "grad_norm": 0.8656691908836365, + "learning_rate": 4.7083021412775154e-06, + "loss": 0.5148, + "step": 12759 + }, + { + "epoch": 0.9380973386266725, + "grad_norm": 0.8499042987823486, + "learning_rate": 4.708256972027847e-06, + "loss": 0.5106, + "step": 12760 + }, + { + "epoch": 0.9381708572268784, + "grad_norm": 0.89762282371521, + "learning_rate": 4.708211799497934e-06, + "loss": 0.5398, + "step": 12761 + }, + { + "epoch": 0.9382443758270842, + "grad_norm": 0.8174026012420654, + "learning_rate": 4.708166623687843e-06, + "loss": 0.5092, + "step": 12762 + }, + { + "epoch": 0.9383178944272901, + "grad_norm": 0.791602373123169, + "learning_rate": 4.708121444597642e-06, + "loss": 0.5368, + "step": 12763 + }, + { + "epoch": 0.938391413027496, + "grad_norm": 0.8132501244544983, + "learning_rate": 4.708076262227397e-06, + "loss": 0.5141, + "step": 12764 + }, + { + "epoch": 0.9384649316277018, + "grad_norm": 0.8208827376365662, + "learning_rate": 4.708031076577176e-06, + "loss": 0.5047, + "step": 12765 + }, + { + "epoch": 0.9385384502279076, + "grad_norm": 0.826894223690033, + "learning_rate": 4.707985887647046e-06, + "loss": 0.5302, + "step": 12766 + }, + { + "epoch": 0.9386119688281135, + "grad_norm": 0.8200744986534119, + "learning_rate": 4.7079406954370734e-06, + "loss": 0.527, + "step": 12767 + }, + { + "epoch": 0.9386854874283194, + "grad_norm": 0.8692695498466492, + "learning_rate": 4.707895499947326e-06, + "loss": 0.517, + "step": 12768 + }, + { + "epoch": 0.9387590060285252, + "grad_norm": 0.8748288750648499, + "learning_rate": 4.70785030117787e-06, + "loss": 0.6032, + "step": 12769 + }, + { + "epoch": 0.9388325246287311, + "grad_norm": 0.8270940780639648, + "learning_rate": 4.707805099128774e-06, + "loss": 0.5132, + "step": 12770 + }, + { + "epoch": 0.9389060432289369, + "grad_norm": 0.8105807304382324, + "learning_rate": 4.707759893800105e-06, + "loss": 0.5052, + "step": 12771 + }, + { + "epoch": 0.9389795618291428, + "grad_norm": 0.833502471446991, + "learning_rate": 4.707714685191929e-06, + "loss": 0.5185, + "step": 12772 + }, + { + "epoch": 0.9390530804293487, + "grad_norm": 0.8352954387664795, + "learning_rate": 4.707669473304313e-06, + "loss": 0.5485, + "step": 12773 + }, + { + "epoch": 0.9391265990295545, + "grad_norm": 0.7936894297599792, + "learning_rate": 4.707624258137325e-06, + "loss": 0.4825, + "step": 12774 + }, + { + "epoch": 0.9392001176297603, + "grad_norm": 0.8391838073730469, + "learning_rate": 4.707579039691033e-06, + "loss": 0.5158, + "step": 12775 + }, + { + "epoch": 0.9392736362299662, + "grad_norm": 0.817629337310791, + "learning_rate": 4.707533817965503e-06, + "loss": 0.5309, + "step": 12776 + }, + { + "epoch": 0.9393471548301721, + "grad_norm": 0.7643235325813293, + "learning_rate": 4.707488592960802e-06, + "loss": 0.4519, + "step": 12777 + }, + { + "epoch": 0.9394206734303779, + "grad_norm": 0.8310174942016602, + "learning_rate": 4.707443364676997e-06, + "loss": 0.5085, + "step": 12778 + }, + { + "epoch": 0.9394941920305837, + "grad_norm": 0.842117190361023, + "learning_rate": 4.707398133114157e-06, + "loss": 0.5149, + "step": 12779 + }, + { + "epoch": 0.9395677106307896, + "grad_norm": 0.804474949836731, + "learning_rate": 4.707352898272347e-06, + "loss": 0.5334, + "step": 12780 + }, + { + "epoch": 0.9396412292309955, + "grad_norm": 0.8106326460838318, + "learning_rate": 4.707307660151636e-06, + "loss": 0.5235, + "step": 12781 + }, + { + "epoch": 0.9397147478312013, + "grad_norm": 0.8075023293495178, + "learning_rate": 4.7072624187520895e-06, + "loss": 0.5027, + "step": 12782 + }, + { + "epoch": 0.9397882664314071, + "grad_norm": 0.8390026092529297, + "learning_rate": 4.707217174073776e-06, + "loss": 0.4872, + "step": 12783 + }, + { + "epoch": 0.939861785031613, + "grad_norm": 0.8229402303695679, + "learning_rate": 4.707171926116762e-06, + "loss": 0.5264, + "step": 12784 + }, + { + "epoch": 0.9399353036318189, + "grad_norm": 0.9393048882484436, + "learning_rate": 4.707126674881115e-06, + "loss": 0.5946, + "step": 12785 + }, + { + "epoch": 0.9400088222320248, + "grad_norm": 0.8246299028396606, + "learning_rate": 4.707081420366903e-06, + "loss": 0.5543, + "step": 12786 + }, + { + "epoch": 0.9400823408322305, + "grad_norm": 0.8538563847541809, + "learning_rate": 4.7070361625741914e-06, + "loss": 0.5239, + "step": 12787 + }, + { + "epoch": 0.9401558594324364, + "grad_norm": 0.8775039911270142, + "learning_rate": 4.706990901503049e-06, + "loss": 0.5501, + "step": 12788 + }, + { + "epoch": 0.9402293780326423, + "grad_norm": 0.8169376254081726, + "learning_rate": 4.7069456371535426e-06, + "loss": 0.5067, + "step": 12789 + }, + { + "epoch": 0.9403028966328482, + "grad_norm": 0.8439212441444397, + "learning_rate": 4.706900369525739e-06, + "loss": 0.5609, + "step": 12790 + }, + { + "epoch": 0.9403764152330539, + "grad_norm": 0.8057352304458618, + "learning_rate": 4.7068550986197056e-06, + "loss": 0.5397, + "step": 12791 + }, + { + "epoch": 0.9404499338332598, + "grad_norm": 0.8397768139839172, + "learning_rate": 4.70680982443551e-06, + "loss": 0.4703, + "step": 12792 + }, + { + "epoch": 0.9405234524334657, + "grad_norm": 0.8410151600837708, + "learning_rate": 4.706764546973218e-06, + "loss": 0.5273, + "step": 12793 + }, + { + "epoch": 0.9405969710336716, + "grad_norm": 0.7994846701622009, + "learning_rate": 4.7067192662329e-06, + "loss": 0.4892, + "step": 12794 + }, + { + "epoch": 0.9406704896338773, + "grad_norm": 0.7855179309844971, + "learning_rate": 4.70667398221462e-06, + "loss": 0.5433, + "step": 12795 + }, + { + "epoch": 0.9407440082340832, + "grad_norm": 0.7830292582511902, + "learning_rate": 4.706628694918448e-06, + "loss": 0.5434, + "step": 12796 + }, + { + "epoch": 0.9408175268342891, + "grad_norm": 0.8405627608299255, + "learning_rate": 4.706583404344449e-06, + "loss": 0.563, + "step": 12797 + }, + { + "epoch": 0.940891045434495, + "grad_norm": 0.8265547156333923, + "learning_rate": 4.706538110492691e-06, + "loss": 0.4966, + "step": 12798 + }, + { + "epoch": 0.9409645640347007, + "grad_norm": 0.8321351408958435, + "learning_rate": 4.706492813363242e-06, + "loss": 0.5324, + "step": 12799 + }, + { + "epoch": 0.9410380826349066, + "grad_norm": 0.7945255637168884, + "learning_rate": 4.706447512956169e-06, + "loss": 0.4895, + "step": 12800 + }, + { + "epoch": 0.9411116012351125, + "grad_norm": 0.7854182720184326, + "learning_rate": 4.706402209271539e-06, + "loss": 0.4867, + "step": 12801 + }, + { + "epoch": 0.9411851198353184, + "grad_norm": 0.8341281414031982, + "learning_rate": 4.706356902309418e-06, + "loss": 0.5485, + "step": 12802 + }, + { + "epoch": 0.9412586384355242, + "grad_norm": 0.7859062552452087, + "learning_rate": 4.706311592069876e-06, + "loss": 0.5129, + "step": 12803 + }, + { + "epoch": 0.94133215703573, + "grad_norm": 0.8068663477897644, + "learning_rate": 4.706266278552979e-06, + "loss": 0.499, + "step": 12804 + }, + { + "epoch": 0.9414056756359359, + "grad_norm": 0.7901623845100403, + "learning_rate": 4.706220961758793e-06, + "loss": 0.5502, + "step": 12805 + }, + { + "epoch": 0.9414791942361418, + "grad_norm": 0.8642349243164062, + "learning_rate": 4.7061756416873875e-06, + "loss": 0.5635, + "step": 12806 + }, + { + "epoch": 0.9415527128363476, + "grad_norm": 0.8397525548934937, + "learning_rate": 4.706130318338828e-06, + "loss": 0.529, + "step": 12807 + }, + { + "epoch": 0.9416262314365534, + "grad_norm": 0.8434532880783081, + "learning_rate": 4.706084991713185e-06, + "loss": 0.5282, + "step": 12808 + }, + { + "epoch": 0.9416997500367593, + "grad_norm": 0.7855566143989563, + "learning_rate": 4.706039661810522e-06, + "loss": 0.5218, + "step": 12809 + }, + { + "epoch": 0.9417732686369652, + "grad_norm": 0.7705658674240112, + "learning_rate": 4.7059943286309075e-06, + "loss": 0.5278, + "step": 12810 + }, + { + "epoch": 0.941846787237171, + "grad_norm": 0.8375200629234314, + "learning_rate": 4.705948992174409e-06, + "loss": 0.5132, + "step": 12811 + }, + { + "epoch": 0.9419203058373768, + "grad_norm": 0.8294750452041626, + "learning_rate": 4.705903652441095e-06, + "loss": 0.5357, + "step": 12812 + }, + { + "epoch": 0.9419938244375827, + "grad_norm": 0.8169001340866089, + "learning_rate": 4.705858309431032e-06, + "loss": 0.5498, + "step": 12813 + }, + { + "epoch": 0.9420673430377886, + "grad_norm": 0.809637725353241, + "learning_rate": 4.705812963144286e-06, + "loss": 0.5533, + "step": 12814 + }, + { + "epoch": 0.9421408616379944, + "grad_norm": 0.8555650115013123, + "learning_rate": 4.7057676135809274e-06, + "loss": 0.5281, + "step": 12815 + }, + { + "epoch": 0.9422143802382003, + "grad_norm": 0.8201001882553101, + "learning_rate": 4.70572226074102e-06, + "loss": 0.5327, + "step": 12816 + }, + { + "epoch": 0.9422878988384061, + "grad_norm": 0.8818779587745667, + "learning_rate": 4.705676904624634e-06, + "loss": 0.5529, + "step": 12817 + }, + { + "epoch": 0.942361417438612, + "grad_norm": 0.816500186920166, + "learning_rate": 4.705631545231835e-06, + "loss": 0.5107, + "step": 12818 + }, + { + "epoch": 0.9424349360388178, + "grad_norm": 0.8212431073188782, + "learning_rate": 4.705586182562693e-06, + "loss": 0.5279, + "step": 12819 + }, + { + "epoch": 0.9425084546390237, + "grad_norm": 0.8770092129707336, + "learning_rate": 4.705540816617272e-06, + "loss": 0.5416, + "step": 12820 + }, + { + "epoch": 0.9425819732392295, + "grad_norm": 0.8221542239189148, + "learning_rate": 4.705495447395641e-06, + "loss": 0.5505, + "step": 12821 + }, + { + "epoch": 0.9426554918394354, + "grad_norm": 0.8623083233833313, + "learning_rate": 4.705450074897867e-06, + "loss": 0.5329, + "step": 12822 + }, + { + "epoch": 0.9427290104396412, + "grad_norm": 0.8334737420082092, + "learning_rate": 4.705404699124018e-06, + "loss": 0.5524, + "step": 12823 + }, + { + "epoch": 0.9428025290398471, + "grad_norm": 0.8135871887207031, + "learning_rate": 4.705359320074161e-06, + "loss": 0.5436, + "step": 12824 + }, + { + "epoch": 0.942876047640053, + "grad_norm": 0.8319916129112244, + "learning_rate": 4.7053139377483635e-06, + "loss": 0.4699, + "step": 12825 + }, + { + "epoch": 0.9429495662402588, + "grad_norm": 0.8154650926589966, + "learning_rate": 4.705268552146693e-06, + "loss": 0.5052, + "step": 12826 + }, + { + "epoch": 0.9430230848404646, + "grad_norm": 0.8063068389892578, + "learning_rate": 4.705223163269217e-06, + "loss": 0.5586, + "step": 12827 + }, + { + "epoch": 0.9430966034406705, + "grad_norm": 0.7996635437011719, + "learning_rate": 4.705177771116003e-06, + "loss": 0.5111, + "step": 12828 + }, + { + "epoch": 0.9431701220408764, + "grad_norm": 0.8176705241203308, + "learning_rate": 4.7051323756871185e-06, + "loss": 0.5334, + "step": 12829 + }, + { + "epoch": 0.9432436406410822, + "grad_norm": 0.834838330745697, + "learning_rate": 4.705086976982629e-06, + "loss": 0.494, + "step": 12830 + }, + { + "epoch": 0.943317159241288, + "grad_norm": 0.804400622844696, + "learning_rate": 4.705041575002604e-06, + "loss": 0.4609, + "step": 12831 + }, + { + "epoch": 0.9433906778414939, + "grad_norm": 0.811482846736908, + "learning_rate": 4.704996169747112e-06, + "loss": 0.5029, + "step": 12832 + }, + { + "epoch": 0.9434641964416998, + "grad_norm": 0.8387359976768494, + "learning_rate": 4.704950761216217e-06, + "loss": 0.5293, + "step": 12833 + }, + { + "epoch": 0.9435377150419056, + "grad_norm": 0.8131037354469299, + "learning_rate": 4.70490534940999e-06, + "loss": 0.4989, + "step": 12834 + }, + { + "epoch": 0.9436112336421114, + "grad_norm": 0.8548509478569031, + "learning_rate": 4.704859934328496e-06, + "loss": 0.5125, + "step": 12835 + }, + { + "epoch": 0.9436847522423173, + "grad_norm": 0.8305383324623108, + "learning_rate": 4.704814515971803e-06, + "loss": 0.5595, + "step": 12836 + }, + { + "epoch": 0.9437582708425232, + "grad_norm": 0.8452845215797424, + "learning_rate": 4.70476909433998e-06, + "loss": 0.5033, + "step": 12837 + }, + { + "epoch": 0.943831789442729, + "grad_norm": 0.7723831534385681, + "learning_rate": 4.7047236694330916e-06, + "loss": 0.4922, + "step": 12838 + }, + { + "epoch": 0.9439053080429348, + "grad_norm": 0.8389518857002258, + "learning_rate": 4.704678241251207e-06, + "loss": 0.5453, + "step": 12839 + }, + { + "epoch": 0.9439788266431407, + "grad_norm": 0.849503755569458, + "learning_rate": 4.704632809794395e-06, + "loss": 0.5307, + "step": 12840 + }, + { + "epoch": 0.9440523452433466, + "grad_norm": 0.8739752769470215, + "learning_rate": 4.7045873750627205e-06, + "loss": 0.5575, + "step": 12841 + }, + { + "epoch": 0.9441258638435525, + "grad_norm": 0.8418553471565247, + "learning_rate": 4.704541937056253e-06, + "loss": 0.5551, + "step": 12842 + }, + { + "epoch": 0.9441993824437582, + "grad_norm": 0.828339159488678, + "learning_rate": 4.704496495775059e-06, + "loss": 0.5252, + "step": 12843 + }, + { + "epoch": 0.9442729010439641, + "grad_norm": 0.8231087327003479, + "learning_rate": 4.704451051219205e-06, + "loss": 0.5238, + "step": 12844 + }, + { + "epoch": 0.94434641964417, + "grad_norm": 0.8192477822303772, + "learning_rate": 4.704405603388762e-06, + "loss": 0.5107, + "step": 12845 + }, + { + "epoch": 0.9444199382443759, + "grad_norm": 0.8125112056732178, + "learning_rate": 4.704360152283793e-06, + "loss": 0.5539, + "step": 12846 + }, + { + "epoch": 0.9444934568445816, + "grad_norm": 0.7958342432975769, + "learning_rate": 4.704314697904369e-06, + "loss": 0.4747, + "step": 12847 + }, + { + "epoch": 0.9445669754447875, + "grad_norm": 0.7974357008934021, + "learning_rate": 4.704269240250556e-06, + "loss": 0.5062, + "step": 12848 + }, + { + "epoch": 0.9446404940449934, + "grad_norm": 0.8281763792037964, + "learning_rate": 4.704223779322421e-06, + "loss": 0.4879, + "step": 12849 + }, + { + "epoch": 0.9447140126451993, + "grad_norm": 0.8228180408477783, + "learning_rate": 4.704178315120033e-06, + "loss": 0.5146, + "step": 12850 + }, + { + "epoch": 0.944787531245405, + "grad_norm": 0.8090354204177856, + "learning_rate": 4.7041328476434586e-06, + "loss": 0.478, + "step": 12851 + }, + { + "epoch": 0.9448610498456109, + "grad_norm": 0.8444223403930664, + "learning_rate": 4.7040873768927655e-06, + "loss": 0.5471, + "step": 12852 + }, + { + "epoch": 0.9449345684458168, + "grad_norm": 0.825285792350769, + "learning_rate": 4.7040419028680215e-06, + "loss": 0.5594, + "step": 12853 + }, + { + "epoch": 0.9450080870460227, + "grad_norm": 0.807231605052948, + "learning_rate": 4.703996425569294e-06, + "loss": 0.5432, + "step": 12854 + }, + { + "epoch": 0.9450816056462285, + "grad_norm": 0.819275438785553, + "learning_rate": 4.7039509449966504e-06, + "loss": 0.4997, + "step": 12855 + }, + { + "epoch": 0.9451551242464343, + "grad_norm": 0.8570737242698669, + "learning_rate": 4.703905461150158e-06, + "loss": 0.5399, + "step": 12856 + }, + { + "epoch": 0.9452286428466402, + "grad_norm": 0.8206460475921631, + "learning_rate": 4.703859974029885e-06, + "loss": 0.5254, + "step": 12857 + }, + { + "epoch": 0.9453021614468461, + "grad_norm": 0.8445389270782471, + "learning_rate": 4.703814483635899e-06, + "loss": 0.5053, + "step": 12858 + }, + { + "epoch": 0.9453756800470519, + "grad_norm": 0.7996640801429749, + "learning_rate": 4.703768989968267e-06, + "loss": 0.4888, + "step": 12859 + }, + { + "epoch": 0.9454491986472577, + "grad_norm": 0.9276821613311768, + "learning_rate": 4.703723493027056e-06, + "loss": 0.5828, + "step": 12860 + }, + { + "epoch": 0.9455227172474636, + "grad_norm": 0.8277099132537842, + "learning_rate": 4.703677992812335e-06, + "loss": 0.5052, + "step": 12861 + }, + { + "epoch": 0.9455962358476695, + "grad_norm": 0.8503478169441223, + "learning_rate": 4.703632489324171e-06, + "loss": 0.5312, + "step": 12862 + }, + { + "epoch": 0.9456697544478753, + "grad_norm": 0.8601330518722534, + "learning_rate": 4.703586982562632e-06, + "loss": 0.5449, + "step": 12863 + }, + { + "epoch": 0.9457432730480811, + "grad_norm": 0.7840422987937927, + "learning_rate": 4.703541472527786e-06, + "loss": 0.5216, + "step": 12864 + }, + { + "epoch": 0.945816791648287, + "grad_norm": 0.8086277842521667, + "learning_rate": 4.7034959592196985e-06, + "loss": 0.5327, + "step": 12865 + }, + { + "epoch": 0.9458903102484929, + "grad_norm": 0.8376564979553223, + "learning_rate": 4.703450442638439e-06, + "loss": 0.4726, + "step": 12866 + }, + { + "epoch": 0.9459638288486987, + "grad_norm": 0.8234798908233643, + "learning_rate": 4.703404922784074e-06, + "loss": 0.5306, + "step": 12867 + }, + { + "epoch": 0.9460373474489046, + "grad_norm": 0.8474032878875732, + "learning_rate": 4.703359399656672e-06, + "loss": 0.5039, + "step": 12868 + }, + { + "epoch": 0.9461108660491104, + "grad_norm": 0.8159898519515991, + "learning_rate": 4.7033138732563e-06, + "loss": 0.5323, + "step": 12869 + }, + { + "epoch": 0.9461843846493163, + "grad_norm": 0.8698883056640625, + "learning_rate": 4.703268343583026e-06, + "loss": 0.5463, + "step": 12870 + }, + { + "epoch": 0.9462579032495221, + "grad_norm": 0.8051015138626099, + "learning_rate": 4.703222810636917e-06, + "loss": 0.4889, + "step": 12871 + }, + { + "epoch": 0.946331421849728, + "grad_norm": 0.8592368960380554, + "learning_rate": 4.703177274418042e-06, + "loss": 0.4965, + "step": 12872 + }, + { + "epoch": 0.9464049404499338, + "grad_norm": 0.822298526763916, + "learning_rate": 4.703131734926468e-06, + "loss": 0.5193, + "step": 12873 + }, + { + "epoch": 0.9464784590501397, + "grad_norm": 0.841953456401825, + "learning_rate": 4.703086192162262e-06, + "loss": 0.535, + "step": 12874 + }, + { + "epoch": 0.9465519776503455, + "grad_norm": 0.8084226846694946, + "learning_rate": 4.703040646125492e-06, + "loss": 0.4681, + "step": 12875 + }, + { + "epoch": 0.9466254962505514, + "grad_norm": 0.8355420231819153, + "learning_rate": 4.702995096816225e-06, + "loss": 0.5286, + "step": 12876 + }, + { + "epoch": 0.9466990148507572, + "grad_norm": 0.8493549823760986, + "learning_rate": 4.70294954423453e-06, + "loss": 0.5307, + "step": 12877 + }, + { + "epoch": 0.9467725334509631, + "grad_norm": 0.8432765007019043, + "learning_rate": 4.702903988380475e-06, + "loss": 0.5443, + "step": 12878 + }, + { + "epoch": 0.9468460520511689, + "grad_norm": 0.8840752243995667, + "learning_rate": 4.702858429254126e-06, + "loss": 0.5599, + "step": 12879 + }, + { + "epoch": 0.9469195706513748, + "grad_norm": 0.7784997224807739, + "learning_rate": 4.7028128668555515e-06, + "loss": 0.4697, + "step": 12880 + }, + { + "epoch": 0.9469930892515807, + "grad_norm": 0.7547805905342102, + "learning_rate": 4.70276730118482e-06, + "loss": 0.5021, + "step": 12881 + }, + { + "epoch": 0.9470666078517865, + "grad_norm": 0.8278003931045532, + "learning_rate": 4.702721732241997e-06, + "loss": 0.5678, + "step": 12882 + }, + { + "epoch": 0.9471401264519923, + "grad_norm": 0.8211076259613037, + "learning_rate": 4.702676160027152e-06, + "loss": 0.5427, + "step": 12883 + }, + { + "epoch": 0.9472136450521982, + "grad_norm": 0.8236597180366516, + "learning_rate": 4.702630584540352e-06, + "loss": 0.4999, + "step": 12884 + }, + { + "epoch": 0.9472871636524041, + "grad_norm": 0.8388932347297668, + "learning_rate": 4.702585005781664e-06, + "loss": 0.5703, + "step": 12885 + }, + { + "epoch": 0.9473606822526099, + "grad_norm": 0.8172115683555603, + "learning_rate": 4.702539423751159e-06, + "loss": 0.5005, + "step": 12886 + }, + { + "epoch": 0.9474342008528157, + "grad_norm": 0.7992186546325684, + "learning_rate": 4.7024938384489005e-06, + "loss": 0.5063, + "step": 12887 + }, + { + "epoch": 0.9475077194530216, + "grad_norm": 0.8002921342849731, + "learning_rate": 4.702448249874958e-06, + "loss": 0.4918, + "step": 12888 + }, + { + "epoch": 0.9475812380532275, + "grad_norm": 0.8195213079452515, + "learning_rate": 4.7024026580294e-06, + "loss": 0.5518, + "step": 12889 + }, + { + "epoch": 0.9476547566534333, + "grad_norm": 0.7422095537185669, + "learning_rate": 4.702357062912293e-06, + "loss": 0.4595, + "step": 12890 + }, + { + "epoch": 0.9477282752536391, + "grad_norm": 0.7921066284179688, + "learning_rate": 4.702311464523705e-06, + "loss": 0.5001, + "step": 12891 + }, + { + "epoch": 0.947801793853845, + "grad_norm": 0.822811484336853, + "learning_rate": 4.702265862863704e-06, + "loss": 0.5332, + "step": 12892 + }, + { + "epoch": 0.9478753124540509, + "grad_norm": 0.7973006963729858, + "learning_rate": 4.702220257932358e-06, + "loss": 0.5458, + "step": 12893 + }, + { + "epoch": 0.9479488310542568, + "grad_norm": 0.7973834872245789, + "learning_rate": 4.702174649729734e-06, + "loss": 0.5398, + "step": 12894 + }, + { + "epoch": 0.9480223496544625, + "grad_norm": 0.8558759093284607, + "learning_rate": 4.7021290382559004e-06, + "loss": 0.5211, + "step": 12895 + }, + { + "epoch": 0.9480958682546684, + "grad_norm": 0.8649523258209229, + "learning_rate": 4.702083423510925e-06, + "loss": 0.5682, + "step": 12896 + }, + { + "epoch": 0.9481693868548743, + "grad_norm": 0.7933817505836487, + "learning_rate": 4.702037805494875e-06, + "loss": 0.5307, + "step": 12897 + }, + { + "epoch": 0.9482429054550802, + "grad_norm": 0.8093383312225342, + "learning_rate": 4.7019921842078175e-06, + "loss": 0.5263, + "step": 12898 + }, + { + "epoch": 0.9483164240552859, + "grad_norm": 0.8038107752799988, + "learning_rate": 4.701946559649822e-06, + "loss": 0.5165, + "step": 12899 + }, + { + "epoch": 0.9483899426554918, + "grad_norm": 0.7696380019187927, + "learning_rate": 4.701900931820955e-06, + "loss": 0.4896, + "step": 12900 + }, + { + "epoch": 0.9484634612556977, + "grad_norm": 0.7640898823738098, + "learning_rate": 4.701855300721286e-06, + "loss": 0.5456, + "step": 12901 + }, + { + "epoch": 0.9485369798559036, + "grad_norm": 0.8241194486618042, + "learning_rate": 4.701809666350881e-06, + "loss": 0.5319, + "step": 12902 + }, + { + "epoch": 0.9486104984561093, + "grad_norm": 0.842100977897644, + "learning_rate": 4.701764028709808e-06, + "loss": 0.5225, + "step": 12903 + }, + { + "epoch": 0.9486840170563152, + "grad_norm": 0.8249097466468811, + "learning_rate": 4.701718387798134e-06, + "loss": 0.5496, + "step": 12904 + }, + { + "epoch": 0.9487575356565211, + "grad_norm": 0.8307448029518127, + "learning_rate": 4.7016727436159295e-06, + "loss": 0.5133, + "step": 12905 + }, + { + "epoch": 0.948831054256727, + "grad_norm": 0.804196834564209, + "learning_rate": 4.7016270961632606e-06, + "loss": 0.5133, + "step": 12906 + }, + { + "epoch": 0.9489045728569327, + "grad_norm": 0.7918329834938049, + "learning_rate": 4.701581445440194e-06, + "loss": 0.5065, + "step": 12907 + }, + { + "epoch": 0.9489780914571386, + "grad_norm": 0.8254845142364502, + "learning_rate": 4.7015357914468e-06, + "loss": 0.5507, + "step": 12908 + }, + { + "epoch": 0.9490516100573445, + "grad_norm": 0.8235844969749451, + "learning_rate": 4.701490134183144e-06, + "loss": 0.5562, + "step": 12909 + }, + { + "epoch": 0.9491251286575504, + "grad_norm": 0.7921002507209778, + "learning_rate": 4.701444473649297e-06, + "loss": 0.4887, + "step": 12910 + }, + { + "epoch": 0.9491986472577563, + "grad_norm": 0.8251567482948303, + "learning_rate": 4.701398809845323e-06, + "loss": 0.5162, + "step": 12911 + }, + { + "epoch": 0.949272165857962, + "grad_norm": 0.7983090877532959, + "learning_rate": 4.701353142771292e-06, + "loss": 0.4827, + "step": 12912 + }, + { + "epoch": 0.9493456844581679, + "grad_norm": 0.8058785796165466, + "learning_rate": 4.701307472427271e-06, + "loss": 0.5353, + "step": 12913 + }, + { + "epoch": 0.9494192030583738, + "grad_norm": 0.8344943523406982, + "learning_rate": 4.701261798813329e-06, + "loss": 0.5391, + "step": 12914 + }, + { + "epoch": 0.9494927216585797, + "grad_norm": 0.8452015519142151, + "learning_rate": 4.701216121929533e-06, + "loss": 0.5433, + "step": 12915 + }, + { + "epoch": 0.9495662402587854, + "grad_norm": 0.8134019374847412, + "learning_rate": 4.701170441775951e-06, + "loss": 0.5347, + "step": 12916 + }, + { + "epoch": 0.9496397588589913, + "grad_norm": 0.8017851114273071, + "learning_rate": 4.701124758352651e-06, + "loss": 0.492, + "step": 12917 + }, + { + "epoch": 0.9497132774591972, + "grad_norm": 0.7935941815376282, + "learning_rate": 4.7010790716596995e-06, + "loss": 0.5148, + "step": 12918 + }, + { + "epoch": 0.9497867960594031, + "grad_norm": 0.8271809816360474, + "learning_rate": 4.701033381697167e-06, + "loss": 0.5658, + "step": 12919 + }, + { + "epoch": 0.9498603146596089, + "grad_norm": 0.8986924290657043, + "learning_rate": 4.70098768846512e-06, + "loss": 0.6344, + "step": 12920 + }, + { + "epoch": 0.9499338332598147, + "grad_norm": 0.8317413330078125, + "learning_rate": 4.700941991963626e-06, + "loss": 0.5459, + "step": 12921 + }, + { + "epoch": 0.9500073518600206, + "grad_norm": 0.7990757822990417, + "learning_rate": 4.700896292192752e-06, + "loss": 0.509, + "step": 12922 + }, + { + "epoch": 0.9500808704602265, + "grad_norm": 0.7973297238349915, + "learning_rate": 4.700850589152568e-06, + "loss": 0.5009, + "step": 12923 + }, + { + "epoch": 0.9501543890604323, + "grad_norm": 0.7894018292427063, + "learning_rate": 4.7008048828431405e-06, + "loss": 0.5332, + "step": 12924 + }, + { + "epoch": 0.9502279076606381, + "grad_norm": 0.7837730646133423, + "learning_rate": 4.700759173264539e-06, + "loss": 0.5395, + "step": 12925 + }, + { + "epoch": 0.950301426260844, + "grad_norm": 0.8056895136833191, + "learning_rate": 4.70071346041683e-06, + "loss": 0.4639, + "step": 12926 + }, + { + "epoch": 0.9503749448610499, + "grad_norm": 0.7975019812583923, + "learning_rate": 4.7006677443000805e-06, + "loss": 0.5488, + "step": 12927 + }, + { + "epoch": 0.9504484634612557, + "grad_norm": 0.776835024356842, + "learning_rate": 4.700622024914361e-06, + "loss": 0.4994, + "step": 12928 + }, + { + "epoch": 0.9505219820614615, + "grad_norm": 0.8231103420257568, + "learning_rate": 4.700576302259737e-06, + "loss": 0.5763, + "step": 12929 + }, + { + "epoch": 0.9505955006616674, + "grad_norm": 0.8479498028755188, + "learning_rate": 4.700530576336277e-06, + "loss": 0.5756, + "step": 12930 + }, + { + "epoch": 0.9506690192618733, + "grad_norm": 0.8077409267425537, + "learning_rate": 4.700484847144051e-06, + "loss": 0.5356, + "step": 12931 + }, + { + "epoch": 0.9507425378620791, + "grad_norm": 0.791947603225708, + "learning_rate": 4.700439114683124e-06, + "loss": 0.4694, + "step": 12932 + }, + { + "epoch": 0.950816056462285, + "grad_norm": 0.8019623160362244, + "learning_rate": 4.700393378953565e-06, + "loss": 0.5708, + "step": 12933 + }, + { + "epoch": 0.9508895750624908, + "grad_norm": 0.8712536692619324, + "learning_rate": 4.700347639955443e-06, + "loss": 0.5972, + "step": 12934 + }, + { + "epoch": 0.9509630936626967, + "grad_norm": 0.8441437482833862, + "learning_rate": 4.700301897688825e-06, + "loss": 0.552, + "step": 12935 + }, + { + "epoch": 0.9510366122629025, + "grad_norm": 0.8122960925102234, + "learning_rate": 4.700256152153779e-06, + "loss": 0.5302, + "step": 12936 + }, + { + "epoch": 0.9511101308631084, + "grad_norm": 0.8643137812614441, + "learning_rate": 4.700210403350373e-06, + "loss": 0.5018, + "step": 12937 + }, + { + "epoch": 0.9511836494633142, + "grad_norm": 0.8090433478355408, + "learning_rate": 4.700164651278674e-06, + "loss": 0.4922, + "step": 12938 + }, + { + "epoch": 0.9512571680635201, + "grad_norm": 0.7875640988349915, + "learning_rate": 4.700118895938751e-06, + "loss": 0.4983, + "step": 12939 + }, + { + "epoch": 0.9513306866637259, + "grad_norm": 0.8091264367103577, + "learning_rate": 4.700073137330673e-06, + "loss": 0.5068, + "step": 12940 + }, + { + "epoch": 0.9514042052639318, + "grad_norm": 0.8521982431411743, + "learning_rate": 4.700027375454506e-06, + "loss": 0.5301, + "step": 12941 + }, + { + "epoch": 0.9514777238641376, + "grad_norm": 0.8292946815490723, + "learning_rate": 4.699981610310319e-06, + "loss": 0.573, + "step": 12942 + }, + { + "epoch": 0.9515512424643435, + "grad_norm": 0.7928541302680969, + "learning_rate": 4.69993584189818e-06, + "loss": 0.5119, + "step": 12943 + }, + { + "epoch": 0.9516247610645493, + "grad_norm": 0.776757001876831, + "learning_rate": 4.699890070218156e-06, + "loss": 0.4284, + "step": 12944 + }, + { + "epoch": 0.9516982796647552, + "grad_norm": 0.8286495804786682, + "learning_rate": 4.699844295270316e-06, + "loss": 0.525, + "step": 12945 + }, + { + "epoch": 0.951771798264961, + "grad_norm": 0.7783989310264587, + "learning_rate": 4.699798517054728e-06, + "loss": 0.4837, + "step": 12946 + }, + { + "epoch": 0.9518453168651669, + "grad_norm": 0.8024452924728394, + "learning_rate": 4.69975273557146e-06, + "loss": 0.4605, + "step": 12947 + }, + { + "epoch": 0.9519188354653727, + "grad_norm": 0.8550262451171875, + "learning_rate": 4.699706950820579e-06, + "loss": 0.5509, + "step": 12948 + }, + { + "epoch": 0.9519923540655786, + "grad_norm": 0.8012595176696777, + "learning_rate": 4.699661162802155e-06, + "loss": 0.4814, + "step": 12949 + }, + { + "epoch": 0.9520658726657845, + "grad_norm": 0.785831868648529, + "learning_rate": 4.699615371516254e-06, + "loss": 0.4945, + "step": 12950 + }, + { + "epoch": 0.9521393912659903, + "grad_norm": 0.7974480986595154, + "learning_rate": 4.699569576962946e-06, + "loss": 0.4916, + "step": 12951 + }, + { + "epoch": 0.9522129098661961, + "grad_norm": 0.8391967415809631, + "learning_rate": 4.699523779142296e-06, + "loss": 0.5559, + "step": 12952 + }, + { + "epoch": 0.952286428466402, + "grad_norm": 0.8405502438545227, + "learning_rate": 4.6994779780543744e-06, + "loss": 0.5565, + "step": 12953 + }, + { + "epoch": 0.9523599470666079, + "grad_norm": 0.7960054278373718, + "learning_rate": 4.699432173699249e-06, + "loss": 0.5195, + "step": 12954 + }, + { + "epoch": 0.9524334656668137, + "grad_norm": 0.8003990054130554, + "learning_rate": 4.699386366076988e-06, + "loss": 0.5146, + "step": 12955 + }, + { + "epoch": 0.9525069842670195, + "grad_norm": 0.8309160470962524, + "learning_rate": 4.699340555187658e-06, + "loss": 0.5445, + "step": 12956 + }, + { + "epoch": 0.9525805028672254, + "grad_norm": 0.8193902373313904, + "learning_rate": 4.699294741031329e-06, + "loss": 0.5202, + "step": 12957 + }, + { + "epoch": 0.9526540214674313, + "grad_norm": 0.7720134854316711, + "learning_rate": 4.6992489236080675e-06, + "loss": 0.4898, + "step": 12958 + }, + { + "epoch": 0.9527275400676372, + "grad_norm": 0.807982325553894, + "learning_rate": 4.699203102917943e-06, + "loss": 0.4814, + "step": 12959 + }, + { + "epoch": 0.9528010586678429, + "grad_norm": 0.7964876890182495, + "learning_rate": 4.699157278961022e-06, + "loss": 0.5158, + "step": 12960 + }, + { + "epoch": 0.9528745772680488, + "grad_norm": 0.8278550505638123, + "learning_rate": 4.6991114517373724e-06, + "loss": 0.5472, + "step": 12961 + }, + { + "epoch": 0.9529480958682547, + "grad_norm": 0.8023400902748108, + "learning_rate": 4.699065621247064e-06, + "loss": 0.508, + "step": 12962 + }, + { + "epoch": 0.9530216144684606, + "grad_norm": 0.8294438123703003, + "learning_rate": 4.6990197874901645e-06, + "loss": 0.4683, + "step": 12963 + }, + { + "epoch": 0.9530951330686663, + "grad_norm": 0.8035861253738403, + "learning_rate": 4.6989739504667405e-06, + "loss": 0.4897, + "step": 12964 + }, + { + "epoch": 0.9531686516688722, + "grad_norm": 0.8022502660751343, + "learning_rate": 4.698928110176863e-06, + "loss": 0.5255, + "step": 12965 + }, + { + "epoch": 0.9532421702690781, + "grad_norm": 0.8415276408195496, + "learning_rate": 4.6988822666205955e-06, + "loss": 0.5341, + "step": 12966 + }, + { + "epoch": 0.953315688869284, + "grad_norm": 0.8247674107551575, + "learning_rate": 4.698836419798011e-06, + "loss": 0.5114, + "step": 12967 + }, + { + "epoch": 0.9533892074694897, + "grad_norm": 0.8032832741737366, + "learning_rate": 4.698790569709174e-06, + "loss": 0.5187, + "step": 12968 + }, + { + "epoch": 0.9534627260696956, + "grad_norm": 0.8323307633399963, + "learning_rate": 4.698744716354155e-06, + "loss": 0.5492, + "step": 12969 + }, + { + "epoch": 0.9535362446699015, + "grad_norm": 0.7577111721038818, + "learning_rate": 4.69869885973302e-06, + "loss": 0.4615, + "step": 12970 + }, + { + "epoch": 0.9536097632701074, + "grad_norm": 0.8514518141746521, + "learning_rate": 4.698652999845839e-06, + "loss": 0.5128, + "step": 12971 + }, + { + "epoch": 0.9536832818703131, + "grad_norm": 0.8139685392379761, + "learning_rate": 4.69860713669268e-06, + "loss": 0.5259, + "step": 12972 + }, + { + "epoch": 0.953756800470519, + "grad_norm": 0.8655186891555786, + "learning_rate": 4.69856127027361e-06, + "loss": 0.5038, + "step": 12973 + }, + { + "epoch": 0.9538303190707249, + "grad_norm": 0.8441364169120789, + "learning_rate": 4.698515400588698e-06, + "loss": 0.5512, + "step": 12974 + }, + { + "epoch": 0.9539038376709308, + "grad_norm": 0.8866949677467346, + "learning_rate": 4.698469527638011e-06, + "loss": 0.6052, + "step": 12975 + }, + { + "epoch": 0.9539773562711366, + "grad_norm": 0.8205017447471619, + "learning_rate": 4.6984236514216175e-06, + "loss": 0.5403, + "step": 12976 + }, + { + "epoch": 0.9540508748713424, + "grad_norm": 0.8563524484634399, + "learning_rate": 4.698377771939587e-06, + "loss": 0.5621, + "step": 12977 + }, + { + "epoch": 0.9541243934715483, + "grad_norm": 0.7878749966621399, + "learning_rate": 4.698331889191986e-06, + "loss": 0.5011, + "step": 12978 + }, + { + "epoch": 0.9541979120717542, + "grad_norm": 0.8464922904968262, + "learning_rate": 4.698286003178884e-06, + "loss": 0.4935, + "step": 12979 + }, + { + "epoch": 0.95427143067196, + "grad_norm": 0.8451545238494873, + "learning_rate": 4.698240113900349e-06, + "loss": 0.5359, + "step": 12980 + }, + { + "epoch": 0.9543449492721658, + "grad_norm": 0.8195537328720093, + "learning_rate": 4.698194221356447e-06, + "loss": 0.5245, + "step": 12981 + }, + { + "epoch": 0.9544184678723717, + "grad_norm": 0.7863239645957947, + "learning_rate": 4.698148325547249e-06, + "loss": 0.4895, + "step": 12982 + }, + { + "epoch": 0.9544919864725776, + "grad_norm": 0.8399698138237, + "learning_rate": 4.698102426472822e-06, + "loss": 0.5571, + "step": 12983 + }, + { + "epoch": 0.9545655050727834, + "grad_norm": 0.8179692625999451, + "learning_rate": 4.698056524133234e-06, + "loss": 0.542, + "step": 12984 + }, + { + "epoch": 0.9546390236729893, + "grad_norm": 0.8537431359291077, + "learning_rate": 4.6980106185285536e-06, + "loss": 0.558, + "step": 12985 + }, + { + "epoch": 0.9547125422731951, + "grad_norm": 0.8439038395881653, + "learning_rate": 4.697964709658849e-06, + "loss": 0.528, + "step": 12986 + }, + { + "epoch": 0.954786060873401, + "grad_norm": 0.823293924331665, + "learning_rate": 4.697918797524187e-06, + "loss": 0.5141, + "step": 12987 + }, + { + "epoch": 0.9548595794736068, + "grad_norm": 0.7949049472808838, + "learning_rate": 4.6978728821246376e-06, + "loss": 0.48, + "step": 12988 + }, + { + "epoch": 0.9549330980738127, + "grad_norm": 0.8234531283378601, + "learning_rate": 4.697826963460269e-06, + "loss": 0.4977, + "step": 12989 + }, + { + "epoch": 0.9550066166740185, + "grad_norm": 0.7825401425361633, + "learning_rate": 4.697781041531148e-06, + "loss": 0.5068, + "step": 12990 + }, + { + "epoch": 0.9550801352742244, + "grad_norm": 0.8130341172218323, + "learning_rate": 4.697735116337344e-06, + "loss": 0.4854, + "step": 12991 + }, + { + "epoch": 0.9551536538744302, + "grad_norm": 0.8119472861289978, + "learning_rate": 4.697689187878925e-06, + "loss": 0.523, + "step": 12992 + }, + { + "epoch": 0.9552271724746361, + "grad_norm": 0.790073573589325, + "learning_rate": 4.697643256155959e-06, + "loss": 0.5311, + "step": 12993 + }, + { + "epoch": 0.955300691074842, + "grad_norm": 0.8082317113876343, + "learning_rate": 4.697597321168514e-06, + "loss": 0.4883, + "step": 12994 + }, + { + "epoch": 0.9553742096750478, + "grad_norm": 0.8000502586364746, + "learning_rate": 4.697551382916659e-06, + "loss": 0.5167, + "step": 12995 + }, + { + "epoch": 0.9554477282752536, + "grad_norm": 0.8166965246200562, + "learning_rate": 4.697505441400461e-06, + "loss": 0.555, + "step": 12996 + }, + { + "epoch": 0.9555212468754595, + "grad_norm": 0.8347498774528503, + "learning_rate": 4.697459496619989e-06, + "loss": 0.5376, + "step": 12997 + }, + { + "epoch": 0.9555947654756654, + "grad_norm": 0.7984060645103455, + "learning_rate": 4.697413548575312e-06, + "loss": 0.5354, + "step": 12998 + }, + { + "epoch": 0.9556682840758712, + "grad_norm": 0.8111883401870728, + "learning_rate": 4.697367597266498e-06, + "loss": 0.517, + "step": 12999 + }, + { + "epoch": 0.955741802676077, + "grad_norm": 0.8483935594558716, + "learning_rate": 4.697321642693613e-06, + "loss": 0.4964, + "step": 13000 + }, + { + "epoch": 0.9558153212762829, + "grad_norm": 0.8309717774391174, + "learning_rate": 4.697275684856728e-06, + "loss": 0.5303, + "step": 13001 + }, + { + "epoch": 0.9558888398764888, + "grad_norm": 0.7868986129760742, + "learning_rate": 4.697229723755911e-06, + "loss": 0.4814, + "step": 13002 + }, + { + "epoch": 0.9559623584766946, + "grad_norm": 0.842043936252594, + "learning_rate": 4.697183759391228e-06, + "loss": 0.53, + "step": 13003 + }, + { + "epoch": 0.9560358770769004, + "grad_norm": 0.8271235823631287, + "learning_rate": 4.69713779176275e-06, + "loss": 0.5314, + "step": 13004 + }, + { + "epoch": 0.9561093956771063, + "grad_norm": 0.7948480248451233, + "learning_rate": 4.697091820870544e-06, + "loss": 0.4842, + "step": 13005 + }, + { + "epoch": 0.9561829142773122, + "grad_norm": 0.8041320443153381, + "learning_rate": 4.697045846714678e-06, + "loss": 0.5446, + "step": 13006 + }, + { + "epoch": 0.956256432877518, + "grad_norm": 0.7997049689292908, + "learning_rate": 4.696999869295221e-06, + "loss": 0.4942, + "step": 13007 + }, + { + "epoch": 0.9563299514777238, + "grad_norm": 0.7808353304862976, + "learning_rate": 4.696953888612241e-06, + "loss": 0.5077, + "step": 13008 + }, + { + "epoch": 0.9564034700779297, + "grad_norm": 0.7998905777931213, + "learning_rate": 4.696907904665806e-06, + "loss": 0.4939, + "step": 13009 + }, + { + "epoch": 0.9564769886781356, + "grad_norm": 0.8178301453590393, + "learning_rate": 4.696861917455985e-06, + "loss": 0.5412, + "step": 13010 + }, + { + "epoch": 0.9565505072783415, + "grad_norm": 0.8202406764030457, + "learning_rate": 4.696815926982845e-06, + "loss": 0.5656, + "step": 13011 + }, + { + "epoch": 0.9566240258785472, + "grad_norm": 0.8147901892662048, + "learning_rate": 4.696769933246456e-06, + "loss": 0.551, + "step": 13012 + }, + { + "epoch": 0.9566975444787531, + "grad_norm": 0.8405466079711914, + "learning_rate": 4.696723936246885e-06, + "loss": 0.5663, + "step": 13013 + }, + { + "epoch": 0.956771063078959, + "grad_norm": 0.827465832233429, + "learning_rate": 4.696677935984202e-06, + "loss": 0.5012, + "step": 13014 + }, + { + "epoch": 0.9568445816791649, + "grad_norm": 0.8073240518569946, + "learning_rate": 4.696631932458474e-06, + "loss": 0.482, + "step": 13015 + }, + { + "epoch": 0.9569181002793706, + "grad_norm": 0.8202147483825684, + "learning_rate": 4.696585925669769e-06, + "loss": 0.5431, + "step": 13016 + }, + { + "epoch": 0.9569916188795765, + "grad_norm": 0.8143343925476074, + "learning_rate": 4.696539915618156e-06, + "loss": 0.5238, + "step": 13017 + }, + { + "epoch": 0.9570651374797824, + "grad_norm": 0.8175305128097534, + "learning_rate": 4.696493902303703e-06, + "loss": 0.495, + "step": 13018 + }, + { + "epoch": 0.9571386560799883, + "grad_norm": 0.822433352470398, + "learning_rate": 4.6964478857264786e-06, + "loss": 0.502, + "step": 13019 + }, + { + "epoch": 0.957212174680194, + "grad_norm": 0.7841078042984009, + "learning_rate": 4.696401865886552e-06, + "loss": 0.5288, + "step": 13020 + }, + { + "epoch": 0.9572856932803999, + "grad_norm": 0.8296602368354797, + "learning_rate": 4.696355842783989e-06, + "loss": 0.5578, + "step": 13021 + }, + { + "epoch": 0.9573592118806058, + "grad_norm": 0.7778880000114441, + "learning_rate": 4.6963098164188615e-06, + "loss": 0.479, + "step": 13022 + }, + { + "epoch": 0.9574327304808117, + "grad_norm": 0.8288112282752991, + "learning_rate": 4.6962637867912355e-06, + "loss": 0.5304, + "step": 13023 + }, + { + "epoch": 0.9575062490810174, + "grad_norm": 0.8084984421730042, + "learning_rate": 4.696217753901179e-06, + "loss": 0.5108, + "step": 13024 + }, + { + "epoch": 0.9575797676812233, + "grad_norm": 0.8141902089118958, + "learning_rate": 4.696171717748762e-06, + "loss": 0.5128, + "step": 13025 + }, + { + "epoch": 0.9576532862814292, + "grad_norm": 0.7621471285820007, + "learning_rate": 4.696125678334053e-06, + "loss": 0.5139, + "step": 13026 + }, + { + "epoch": 0.9577268048816351, + "grad_norm": 0.8336462378501892, + "learning_rate": 4.696079635657118e-06, + "loss": 0.5664, + "step": 13027 + }, + { + "epoch": 0.9578003234818409, + "grad_norm": 0.8082940578460693, + "learning_rate": 4.6960335897180275e-06, + "loss": 0.5162, + "step": 13028 + }, + { + "epoch": 0.9578738420820467, + "grad_norm": 0.7936598658561707, + "learning_rate": 4.69598754051685e-06, + "loss": 0.5089, + "step": 13029 + }, + { + "epoch": 0.9579473606822526, + "grad_norm": 0.8050732612609863, + "learning_rate": 4.695941488053653e-06, + "loss": 0.5277, + "step": 13030 + }, + { + "epoch": 0.9580208792824585, + "grad_norm": 0.8320176005363464, + "learning_rate": 4.6958954323285045e-06, + "loss": 0.5039, + "step": 13031 + }, + { + "epoch": 0.9580943978826643, + "grad_norm": 0.7657521963119507, + "learning_rate": 4.695849373341474e-06, + "loss": 0.4744, + "step": 13032 + }, + { + "epoch": 0.9581679164828701, + "grad_norm": 0.8280256986618042, + "learning_rate": 4.69580331109263e-06, + "loss": 0.538, + "step": 13033 + }, + { + "epoch": 0.958241435083076, + "grad_norm": 0.8220599293708801, + "learning_rate": 4.695757245582039e-06, + "loss": 0.4565, + "step": 13034 + }, + { + "epoch": 0.9583149536832819, + "grad_norm": 0.7839629054069519, + "learning_rate": 4.695711176809772e-06, + "loss": 0.4862, + "step": 13035 + }, + { + "epoch": 0.9583884722834877, + "grad_norm": 0.7672079205513, + "learning_rate": 4.695665104775896e-06, + "loss": 0.5244, + "step": 13036 + }, + { + "epoch": 0.9584619908836935, + "grad_norm": 0.8432726860046387, + "learning_rate": 4.69561902948048e-06, + "loss": 0.5454, + "step": 13037 + }, + { + "epoch": 0.9585355094838994, + "grad_norm": 0.8326320648193359, + "learning_rate": 4.695572950923591e-06, + "loss": 0.5308, + "step": 13038 + }, + { + "epoch": 0.9586090280841053, + "grad_norm": 0.8224483132362366, + "learning_rate": 4.6955268691053e-06, + "loss": 0.5183, + "step": 13039 + }, + { + "epoch": 0.9586825466843111, + "grad_norm": 0.840118408203125, + "learning_rate": 4.695480784025674e-06, + "loss": 0.5127, + "step": 13040 + }, + { + "epoch": 0.958756065284517, + "grad_norm": 0.8162356019020081, + "learning_rate": 4.695434695684781e-06, + "loss": 0.5059, + "step": 13041 + }, + { + "epoch": 0.9588295838847228, + "grad_norm": 0.8541580438613892, + "learning_rate": 4.6953886040826905e-06, + "loss": 0.5681, + "step": 13042 + }, + { + "epoch": 0.9589031024849287, + "grad_norm": 0.8292033076286316, + "learning_rate": 4.69534250921947e-06, + "loss": 0.5332, + "step": 13043 + }, + { + "epoch": 0.9589766210851345, + "grad_norm": 0.8319926857948303, + "learning_rate": 4.695296411095188e-06, + "loss": 0.5376, + "step": 13044 + }, + { + "epoch": 0.9590501396853404, + "grad_norm": 0.8248200416564941, + "learning_rate": 4.695250309709914e-06, + "loss": 0.5277, + "step": 13045 + }, + { + "epoch": 0.9591236582855462, + "grad_norm": 0.7876359224319458, + "learning_rate": 4.695204205063716e-06, + "loss": 0.4813, + "step": 13046 + }, + { + "epoch": 0.9591971768857521, + "grad_norm": 0.9019877910614014, + "learning_rate": 4.6951580971566624e-06, + "loss": 0.5466, + "step": 13047 + }, + { + "epoch": 0.959270695485958, + "grad_norm": 0.8131929039955139, + "learning_rate": 4.69511198598882e-06, + "loss": 0.5331, + "step": 13048 + }, + { + "epoch": 0.9593442140861638, + "grad_norm": 0.8821544647216797, + "learning_rate": 4.695065871560261e-06, + "loss": 0.5232, + "step": 13049 + }, + { + "epoch": 0.9594177326863697, + "grad_norm": 0.8216239213943481, + "learning_rate": 4.695019753871051e-06, + "loss": 0.5869, + "step": 13050 + }, + { + "epoch": 0.9594912512865755, + "grad_norm": 0.7981279492378235, + "learning_rate": 4.69497363292126e-06, + "loss": 0.5472, + "step": 13051 + }, + { + "epoch": 0.9595647698867814, + "grad_norm": 0.8010904788970947, + "learning_rate": 4.6949275087109545e-06, + "loss": 0.533, + "step": 13052 + }, + { + "epoch": 0.9596382884869872, + "grad_norm": 0.8209830522537231, + "learning_rate": 4.694881381240206e-06, + "loss": 0.5164, + "step": 13053 + }, + { + "epoch": 0.9597118070871931, + "grad_norm": 0.8234881162643433, + "learning_rate": 4.6948352505090805e-06, + "loss": 0.5179, + "step": 13054 + }, + { + "epoch": 0.9597853256873989, + "grad_norm": 0.8029109835624695, + "learning_rate": 4.6947891165176475e-06, + "loss": 0.5448, + "step": 13055 + }, + { + "epoch": 0.9598588442876048, + "grad_norm": 0.8693456053733826, + "learning_rate": 4.694742979265975e-06, + "loss": 0.4997, + "step": 13056 + }, + { + "epoch": 0.9599323628878106, + "grad_norm": 0.8547256588935852, + "learning_rate": 4.694696838754133e-06, + "loss": 0.522, + "step": 13057 + }, + { + "epoch": 0.9600058814880165, + "grad_norm": 0.7941811084747314, + "learning_rate": 4.694650694982188e-06, + "loss": 0.488, + "step": 13058 + }, + { + "epoch": 0.9600794000882223, + "grad_norm": 0.8071573972702026, + "learning_rate": 4.6946045479502104e-06, + "loss": 0.5275, + "step": 13059 + }, + { + "epoch": 0.9601529186884282, + "grad_norm": 0.8823153972625732, + "learning_rate": 4.694558397658268e-06, + "loss": 0.5412, + "step": 13060 + }, + { + "epoch": 0.960226437288634, + "grad_norm": 0.7925499677658081, + "learning_rate": 4.6945122441064285e-06, + "loss": 0.4837, + "step": 13061 + }, + { + "epoch": 0.9602999558888399, + "grad_norm": 0.8616592288017273, + "learning_rate": 4.6944660872947616e-06, + "loss": 0.5488, + "step": 13062 + }, + { + "epoch": 0.9603734744890458, + "grad_norm": 0.7854942083358765, + "learning_rate": 4.694419927223336e-06, + "loss": 0.5293, + "step": 13063 + }, + { + "epoch": 0.9604469930892516, + "grad_norm": 0.8658326864242554, + "learning_rate": 4.694373763892219e-06, + "loss": 0.5631, + "step": 13064 + }, + { + "epoch": 0.9605205116894574, + "grad_norm": 0.8066447377204895, + "learning_rate": 4.69432759730148e-06, + "loss": 0.5092, + "step": 13065 + }, + { + "epoch": 0.9605940302896633, + "grad_norm": 0.7906829714775085, + "learning_rate": 4.694281427451188e-06, + "loss": 0.5506, + "step": 13066 + }, + { + "epoch": 0.9606675488898692, + "grad_norm": 0.8371718525886536, + "learning_rate": 4.69423525434141e-06, + "loss": 0.5193, + "step": 13067 + }, + { + "epoch": 0.960741067490075, + "grad_norm": 0.7529963254928589, + "learning_rate": 4.694189077972217e-06, + "loss": 0.4803, + "step": 13068 + }, + { + "epoch": 0.9608145860902808, + "grad_norm": 0.843265175819397, + "learning_rate": 4.694142898343675e-06, + "loss": 0.5252, + "step": 13069 + }, + { + "epoch": 0.9608881046904867, + "grad_norm": 0.8066529631614685, + "learning_rate": 4.694096715455855e-06, + "loss": 0.54, + "step": 13070 + }, + { + "epoch": 0.9609616232906926, + "grad_norm": 0.8192151188850403, + "learning_rate": 4.694050529308825e-06, + "loss": 0.4942, + "step": 13071 + }, + { + "epoch": 0.9610351418908984, + "grad_norm": 0.8269169926643372, + "learning_rate": 4.694004339902652e-06, + "loss": 0.4219, + "step": 13072 + }, + { + "epoch": 0.9611086604911042, + "grad_norm": 0.8283172845840454, + "learning_rate": 4.693958147237405e-06, + "loss": 0.5238, + "step": 13073 + }, + { + "epoch": 0.9611821790913101, + "grad_norm": 0.8762542605400085, + "learning_rate": 4.693911951313155e-06, + "loss": 0.496, + "step": 13074 + }, + { + "epoch": 0.961255697691516, + "grad_norm": 0.7903136610984802, + "learning_rate": 4.693865752129968e-06, + "loss": 0.5232, + "step": 13075 + }, + { + "epoch": 0.9613292162917219, + "grad_norm": 0.829400897026062, + "learning_rate": 4.693819549687914e-06, + "loss": 0.4921, + "step": 13076 + }, + { + "epoch": 0.9614027348919276, + "grad_norm": 0.8150823712348938, + "learning_rate": 4.6937733439870604e-06, + "loss": 0.5066, + "step": 13077 + }, + { + "epoch": 0.9614762534921335, + "grad_norm": 0.796834409236908, + "learning_rate": 4.693727135027477e-06, + "loss": 0.527, + "step": 13078 + }, + { + "epoch": 0.9615497720923394, + "grad_norm": 0.8195757865905762, + "learning_rate": 4.693680922809232e-06, + "loss": 0.5616, + "step": 13079 + }, + { + "epoch": 0.9616232906925453, + "grad_norm": 0.7739953398704529, + "learning_rate": 4.693634707332394e-06, + "loss": 0.523, + "step": 13080 + }, + { + "epoch": 0.961696809292751, + "grad_norm": 0.8329055309295654, + "learning_rate": 4.693588488597032e-06, + "loss": 0.5158, + "step": 13081 + }, + { + "epoch": 0.9617703278929569, + "grad_norm": 0.8382688164710999, + "learning_rate": 4.693542266603215e-06, + "loss": 0.5089, + "step": 13082 + }, + { + "epoch": 0.9618438464931628, + "grad_norm": 0.8440569043159485, + "learning_rate": 4.69349604135101e-06, + "loss": 0.5216, + "step": 13083 + }, + { + "epoch": 0.9619173650933687, + "grad_norm": 0.7756239175796509, + "learning_rate": 4.693449812840487e-06, + "loss": 0.5275, + "step": 13084 + }, + { + "epoch": 0.9619908836935744, + "grad_norm": 0.8710290789604187, + "learning_rate": 4.693403581071715e-06, + "loss": 0.5304, + "step": 13085 + }, + { + "epoch": 0.9620644022937803, + "grad_norm": 0.8728243112564087, + "learning_rate": 4.693357346044762e-06, + "loss": 0.5308, + "step": 13086 + }, + { + "epoch": 0.9621379208939862, + "grad_norm": 0.7910879850387573, + "learning_rate": 4.693311107759696e-06, + "loss": 0.5359, + "step": 13087 + }, + { + "epoch": 0.9622114394941921, + "grad_norm": 0.8447602391242981, + "learning_rate": 4.693264866216588e-06, + "loss": 0.5074, + "step": 13088 + }, + { + "epoch": 0.9622849580943978, + "grad_norm": 0.785169243812561, + "learning_rate": 4.6932186214155035e-06, + "loss": 0.4979, + "step": 13089 + }, + { + "epoch": 0.9623584766946037, + "grad_norm": 0.7860833406448364, + "learning_rate": 4.6931723733565135e-06, + "loss": 0.5231, + "step": 13090 + }, + { + "epoch": 0.9624319952948096, + "grad_norm": 0.8010823726654053, + "learning_rate": 4.6931261220396866e-06, + "loss": 0.5011, + "step": 13091 + }, + { + "epoch": 0.9625055138950155, + "grad_norm": 0.843231737613678, + "learning_rate": 4.69307986746509e-06, + "loss": 0.5477, + "step": 13092 + }, + { + "epoch": 0.9625790324952213, + "grad_norm": 0.8323673009872437, + "learning_rate": 4.693033609632793e-06, + "loss": 0.5683, + "step": 13093 + }, + { + "epoch": 0.9626525510954271, + "grad_norm": 0.8442392945289612, + "learning_rate": 4.692987348542866e-06, + "loss": 0.4945, + "step": 13094 + }, + { + "epoch": 0.962726069695633, + "grad_norm": 0.8031865358352661, + "learning_rate": 4.692941084195376e-06, + "loss": 0.4976, + "step": 13095 + }, + { + "epoch": 0.9627995882958389, + "grad_norm": 0.8335294723510742, + "learning_rate": 4.692894816590391e-06, + "loss": 0.547, + "step": 13096 + }, + { + "epoch": 0.9628731068960447, + "grad_norm": 0.8243638873100281, + "learning_rate": 4.692848545727981e-06, + "loss": 0.5214, + "step": 13097 + }, + { + "epoch": 0.9629466254962505, + "grad_norm": 0.8103165626525879, + "learning_rate": 4.692802271608216e-06, + "loss": 0.5031, + "step": 13098 + }, + { + "epoch": 0.9630201440964564, + "grad_norm": 0.799589991569519, + "learning_rate": 4.6927559942311625e-06, + "loss": 0.5066, + "step": 13099 + }, + { + "epoch": 0.9630936626966623, + "grad_norm": 0.7933424711227417, + "learning_rate": 4.6927097135968905e-06, + "loss": 0.5288, + "step": 13100 + }, + { + "epoch": 0.9631671812968681, + "grad_norm": 0.8527258038520813, + "learning_rate": 4.692663429705468e-06, + "loss": 0.514, + "step": 13101 + }, + { + "epoch": 0.963240699897074, + "grad_norm": 0.835639238357544, + "learning_rate": 4.6926171425569634e-06, + "loss": 0.544, + "step": 13102 + }, + { + "epoch": 0.9633142184972798, + "grad_norm": 0.8818907737731934, + "learning_rate": 4.6925708521514464e-06, + "loss": 0.5623, + "step": 13103 + }, + { + "epoch": 0.9633877370974857, + "grad_norm": 0.814808189868927, + "learning_rate": 4.692524558488985e-06, + "loss": 0.5038, + "step": 13104 + }, + { + "epoch": 0.9634612556976915, + "grad_norm": 0.8556792736053467, + "learning_rate": 4.692478261569649e-06, + "loss": 0.5214, + "step": 13105 + }, + { + "epoch": 0.9635347742978974, + "grad_norm": 0.8321027159690857, + "learning_rate": 4.692431961393507e-06, + "loss": 0.556, + "step": 13106 + }, + { + "epoch": 0.9636082928981032, + "grad_norm": 0.7766373753547668, + "learning_rate": 4.692385657960628e-06, + "loss": 0.5106, + "step": 13107 + }, + { + "epoch": 0.9636818114983091, + "grad_norm": 0.8001741766929626, + "learning_rate": 4.692339351271079e-06, + "loss": 0.5094, + "step": 13108 + }, + { + "epoch": 0.9637553300985149, + "grad_norm": 0.781761646270752, + "learning_rate": 4.69229304132493e-06, + "loss": 0.4858, + "step": 13109 + }, + { + "epoch": 0.9638288486987208, + "grad_norm": 0.8015168905258179, + "learning_rate": 4.6922467281222496e-06, + "loss": 0.5383, + "step": 13110 + }, + { + "epoch": 0.9639023672989266, + "grad_norm": 0.7972438931465149, + "learning_rate": 4.692200411663107e-06, + "loss": 0.5062, + "step": 13111 + }, + { + "epoch": 0.9639758858991325, + "grad_norm": 0.822363018989563, + "learning_rate": 4.692154091947571e-06, + "loss": 0.5577, + "step": 13112 + }, + { + "epoch": 0.9640494044993383, + "grad_norm": 0.8166245818138123, + "learning_rate": 4.69210776897571e-06, + "loss": 0.5049, + "step": 13113 + }, + { + "epoch": 0.9641229230995442, + "grad_norm": 0.8361416459083557, + "learning_rate": 4.692061442747593e-06, + "loss": 0.5272, + "step": 13114 + }, + { + "epoch": 0.96419644169975, + "grad_norm": 0.8020101189613342, + "learning_rate": 4.6920151132632886e-06, + "loss": 0.5319, + "step": 13115 + }, + { + "epoch": 0.9642699602999559, + "grad_norm": 0.7620130181312561, + "learning_rate": 4.691968780522865e-06, + "loss": 0.5139, + "step": 13116 + }, + { + "epoch": 0.9643434789001617, + "grad_norm": 0.8555250763893127, + "learning_rate": 4.691922444526393e-06, + "loss": 0.5467, + "step": 13117 + }, + { + "epoch": 0.9644169975003676, + "grad_norm": 0.790247917175293, + "learning_rate": 4.69187610527394e-06, + "loss": 0.4899, + "step": 13118 + }, + { + "epoch": 0.9644905161005735, + "grad_norm": 0.8658046126365662, + "learning_rate": 4.6918297627655745e-06, + "loss": 0.5523, + "step": 13119 + }, + { + "epoch": 0.9645640347007793, + "grad_norm": 0.7657522559165955, + "learning_rate": 4.691783417001366e-06, + "loss": 0.4712, + "step": 13120 + }, + { + "epoch": 0.9646375533009851, + "grad_norm": 0.8543522953987122, + "learning_rate": 4.691737067981384e-06, + "loss": 0.5333, + "step": 13121 + }, + { + "epoch": 0.964711071901191, + "grad_norm": 0.7997320890426636, + "learning_rate": 4.691690715705696e-06, + "loss": 0.5581, + "step": 13122 + }, + { + "epoch": 0.9647845905013969, + "grad_norm": 0.8090802431106567, + "learning_rate": 4.691644360174372e-06, + "loss": 0.5706, + "step": 13123 + }, + { + "epoch": 0.9648581091016027, + "grad_norm": 0.8434675931930542, + "learning_rate": 4.691598001387479e-06, + "loss": 0.5415, + "step": 13124 + }, + { + "epoch": 0.9649316277018085, + "grad_norm": 0.8058936595916748, + "learning_rate": 4.6915516393450885e-06, + "loss": 0.5625, + "step": 13125 + }, + { + "epoch": 0.9650051463020144, + "grad_norm": 0.7763025164604187, + "learning_rate": 4.691505274047268e-06, + "loss": 0.5316, + "step": 13126 + }, + { + "epoch": 0.9650786649022203, + "grad_norm": 0.8972384333610535, + "learning_rate": 4.691458905494086e-06, + "loss": 0.5737, + "step": 13127 + }, + { + "epoch": 0.9651521835024262, + "grad_norm": 0.807519793510437, + "learning_rate": 4.691412533685612e-06, + "loss": 0.5554, + "step": 13128 + }, + { + "epoch": 0.9652257021026319, + "grad_norm": 0.7608625292778015, + "learning_rate": 4.6913661586219136e-06, + "loss": 0.4999, + "step": 13129 + }, + { + "epoch": 0.9652992207028378, + "grad_norm": 0.8086706399917603, + "learning_rate": 4.691319780303062e-06, + "loss": 0.4979, + "step": 13130 + }, + { + "epoch": 0.9653727393030437, + "grad_norm": 0.8189239501953125, + "learning_rate": 4.691273398729125e-06, + "loss": 0.5294, + "step": 13131 + }, + { + "epoch": 0.9654462579032496, + "grad_norm": 0.7919953465461731, + "learning_rate": 4.69122701390017e-06, + "loss": 0.5238, + "step": 13132 + }, + { + "epoch": 0.9655197765034553, + "grad_norm": 0.787329912185669, + "learning_rate": 4.691180625816269e-06, + "loss": 0.5108, + "step": 13133 + }, + { + "epoch": 0.9655932951036612, + "grad_norm": 0.836691677570343, + "learning_rate": 4.691134234477488e-06, + "loss": 0.5266, + "step": 13134 + }, + { + "epoch": 0.9656668137038671, + "grad_norm": 0.8170356154441833, + "learning_rate": 4.691087839883898e-06, + "loss": 0.5686, + "step": 13135 + }, + { + "epoch": 0.965740332304073, + "grad_norm": 0.8318507671356201, + "learning_rate": 4.691041442035566e-06, + "loss": 0.5217, + "step": 13136 + }, + { + "epoch": 0.9658138509042787, + "grad_norm": 0.8583023548126221, + "learning_rate": 4.690995040932562e-06, + "loss": 0.4772, + "step": 13137 + }, + { + "epoch": 0.9658873695044846, + "grad_norm": 0.7961549758911133, + "learning_rate": 4.690948636574956e-06, + "loss": 0.5591, + "step": 13138 + }, + { + "epoch": 0.9659608881046905, + "grad_norm": 0.794687807559967, + "learning_rate": 4.690902228962815e-06, + "loss": 0.52, + "step": 13139 + }, + { + "epoch": 0.9660344067048964, + "grad_norm": 0.8013433218002319, + "learning_rate": 4.690855818096208e-06, + "loss": 0.5059, + "step": 13140 + }, + { + "epoch": 0.9661079253051021, + "grad_norm": 0.8238387703895569, + "learning_rate": 4.690809403975205e-06, + "loss": 0.5275, + "step": 13141 + }, + { + "epoch": 0.966181443905308, + "grad_norm": 0.7931861281394958, + "learning_rate": 4.690762986599875e-06, + "loss": 0.4534, + "step": 13142 + }, + { + "epoch": 0.9662549625055139, + "grad_norm": 0.8484964966773987, + "learning_rate": 4.690716565970287e-06, + "loss": 0.5403, + "step": 13143 + }, + { + "epoch": 0.9663284811057198, + "grad_norm": 0.8049620389938354, + "learning_rate": 4.6906701420865085e-06, + "loss": 0.5104, + "step": 13144 + }, + { + "epoch": 0.9664019997059256, + "grad_norm": 0.8080085515975952, + "learning_rate": 4.69062371494861e-06, + "loss": 0.5458, + "step": 13145 + }, + { + "epoch": 0.9664755183061314, + "grad_norm": 0.8784766793251038, + "learning_rate": 4.69057728455666e-06, + "loss": 0.5456, + "step": 13146 + }, + { + "epoch": 0.9665490369063373, + "grad_norm": 0.785374104976654, + "learning_rate": 4.690530850910727e-06, + "loss": 0.4882, + "step": 13147 + }, + { + "epoch": 0.9666225555065432, + "grad_norm": 0.7716122269630432, + "learning_rate": 4.690484414010881e-06, + "loss": 0.4948, + "step": 13148 + }, + { + "epoch": 0.966696074106749, + "grad_norm": 0.7796360850334167, + "learning_rate": 4.69043797385719e-06, + "loss": 0.4956, + "step": 13149 + }, + { + "epoch": 0.9667695927069548, + "grad_norm": 0.830773651599884, + "learning_rate": 4.690391530449722e-06, + "loss": 0.5281, + "step": 13150 + }, + { + "epoch": 0.9668431113071607, + "grad_norm": 0.8891956806182861, + "learning_rate": 4.69034508378855e-06, + "loss": 0.5738, + "step": 13151 + }, + { + "epoch": 0.9669166299073666, + "grad_norm": 0.829633891582489, + "learning_rate": 4.6902986338737384e-06, + "loss": 0.5388, + "step": 13152 + }, + { + "epoch": 0.9669901485075724, + "grad_norm": 0.8100030422210693, + "learning_rate": 4.6902521807053595e-06, + "loss": 0.5293, + "step": 13153 + }, + { + "epoch": 0.9670636671077782, + "grad_norm": 0.8440976142883301, + "learning_rate": 4.6902057242834795e-06, + "loss": 0.5612, + "step": 13154 + }, + { + "epoch": 0.9671371857079841, + "grad_norm": 0.806501567363739, + "learning_rate": 4.69015926460817e-06, + "loss": 0.5014, + "step": 13155 + }, + { + "epoch": 0.96721070430819, + "grad_norm": 0.8589464426040649, + "learning_rate": 4.690112801679497e-06, + "loss": 0.5735, + "step": 13156 + }, + { + "epoch": 0.9672842229083958, + "grad_norm": 0.8331448435783386, + "learning_rate": 4.6900663354975325e-06, + "loss": 0.5142, + "step": 13157 + }, + { + "epoch": 0.9673577415086017, + "grad_norm": 0.818132221698761, + "learning_rate": 4.690019866062345e-06, + "loss": 0.5364, + "step": 13158 + }, + { + "epoch": 0.9674312601088075, + "grad_norm": 0.7935975193977356, + "learning_rate": 4.689973393374002e-06, + "loss": 0.521, + "step": 13159 + }, + { + "epoch": 0.9675047787090134, + "grad_norm": 0.8720217943191528, + "learning_rate": 4.6899269174325745e-06, + "loss": 0.42, + "step": 13160 + }, + { + "epoch": 0.9675782973092192, + "grad_norm": 0.8202923536300659, + "learning_rate": 4.68988043823813e-06, + "loss": 0.503, + "step": 13161 + }, + { + "epoch": 0.9676518159094251, + "grad_norm": 0.8070036768913269, + "learning_rate": 4.6898339557907375e-06, + "loss": 0.4747, + "step": 13162 + }, + { + "epoch": 0.9677253345096309, + "grad_norm": 0.8036499619483948, + "learning_rate": 4.6897874700904666e-06, + "loss": 0.5704, + "step": 13163 + }, + { + "epoch": 0.9677988531098368, + "grad_norm": 0.795900285243988, + "learning_rate": 4.6897409811373875e-06, + "loss": 0.5361, + "step": 13164 + }, + { + "epoch": 0.9678723717100426, + "grad_norm": 0.8311697840690613, + "learning_rate": 4.6896944889315665e-06, + "loss": 0.5445, + "step": 13165 + }, + { + "epoch": 0.9679458903102485, + "grad_norm": 0.7821821570396423, + "learning_rate": 4.6896479934730746e-06, + "loss": 0.5211, + "step": 13166 + }, + { + "epoch": 0.9680194089104543, + "grad_norm": 0.7937787771224976, + "learning_rate": 4.689601494761982e-06, + "loss": 0.5326, + "step": 13167 + }, + { + "epoch": 0.9680929275106602, + "grad_norm": 0.8567473888397217, + "learning_rate": 4.689554992798355e-06, + "loss": 0.5373, + "step": 13168 + }, + { + "epoch": 0.968166446110866, + "grad_norm": 0.8229916095733643, + "learning_rate": 4.689508487582264e-06, + "loss": 0.5449, + "step": 13169 + }, + { + "epoch": 0.9682399647110719, + "grad_norm": 0.851360023021698, + "learning_rate": 4.689461979113778e-06, + "loss": 0.5111, + "step": 13170 + }, + { + "epoch": 0.9683134833112778, + "grad_norm": 0.8447389006614685, + "learning_rate": 4.6894154673929655e-06, + "loss": 0.4511, + "step": 13171 + }, + { + "epoch": 0.9683870019114836, + "grad_norm": 0.8011580109596252, + "learning_rate": 4.6893689524198975e-06, + "loss": 0.4702, + "step": 13172 + }, + { + "epoch": 0.9684605205116894, + "grad_norm": 0.817223310470581, + "learning_rate": 4.6893224341946405e-06, + "loss": 0.5461, + "step": 13173 + }, + { + "epoch": 0.9685340391118953, + "grad_norm": 0.8516513109207153, + "learning_rate": 4.689275912717266e-06, + "loss": 0.5359, + "step": 13174 + }, + { + "epoch": 0.9686075577121012, + "grad_norm": 0.8251588344573975, + "learning_rate": 4.689229387987842e-06, + "loss": 0.5004, + "step": 13175 + }, + { + "epoch": 0.968681076312307, + "grad_norm": 0.8158938884735107, + "learning_rate": 4.6891828600064375e-06, + "loss": 0.5622, + "step": 13176 + }, + { + "epoch": 0.9687545949125128, + "grad_norm": 0.8054515719413757, + "learning_rate": 4.689136328773122e-06, + "loss": 0.5287, + "step": 13177 + }, + { + "epoch": 0.9688281135127187, + "grad_norm": 0.8617294430732727, + "learning_rate": 4.689089794287963e-06, + "loss": 0.5294, + "step": 13178 + }, + { + "epoch": 0.9689016321129246, + "grad_norm": 0.8200574517250061, + "learning_rate": 4.689043256551033e-06, + "loss": 0.507, + "step": 13179 + }, + { + "epoch": 0.9689751507131305, + "grad_norm": 0.8180118203163147, + "learning_rate": 4.688996715562397e-06, + "loss": 0.5896, + "step": 13180 + }, + { + "epoch": 0.9690486693133362, + "grad_norm": 0.8093910813331604, + "learning_rate": 4.688950171322128e-06, + "loss": 0.5347, + "step": 13181 + }, + { + "epoch": 0.9691221879135421, + "grad_norm": 0.8058781623840332, + "learning_rate": 4.6889036238302924e-06, + "loss": 0.5274, + "step": 13182 + }, + { + "epoch": 0.969195706513748, + "grad_norm": 0.8309918642044067, + "learning_rate": 4.68885707308696e-06, + "loss": 0.5813, + "step": 13183 + }, + { + "epoch": 0.9692692251139539, + "grad_norm": 0.818178653717041, + "learning_rate": 4.688810519092201e-06, + "loss": 0.5251, + "step": 13184 + }, + { + "epoch": 0.9693427437141596, + "grad_norm": 0.8386787176132202, + "learning_rate": 4.688763961846084e-06, + "loss": 0.557, + "step": 13185 + }, + { + "epoch": 0.9694162623143655, + "grad_norm": 0.8089742064476013, + "learning_rate": 4.688717401348677e-06, + "loss": 0.4943, + "step": 13186 + }, + { + "epoch": 0.9694897809145714, + "grad_norm": 0.783819854259491, + "learning_rate": 4.688670837600051e-06, + "loss": 0.5054, + "step": 13187 + }, + { + "epoch": 0.9695632995147773, + "grad_norm": 0.8163173794746399, + "learning_rate": 4.688624270600274e-06, + "loss": 0.5049, + "step": 13188 + }, + { + "epoch": 0.9696368181149831, + "grad_norm": 0.7864866256713867, + "learning_rate": 4.688577700349416e-06, + "loss": 0.5066, + "step": 13189 + }, + { + "epoch": 0.9697103367151889, + "grad_norm": 0.8800264596939087, + "learning_rate": 4.688531126847545e-06, + "loss": 0.5941, + "step": 13190 + }, + { + "epoch": 0.9697838553153948, + "grad_norm": 0.9067132472991943, + "learning_rate": 4.6884845500947315e-06, + "loss": 0.5834, + "step": 13191 + }, + { + "epoch": 0.9698573739156007, + "grad_norm": 0.8469509482383728, + "learning_rate": 4.6884379700910435e-06, + "loss": 0.5632, + "step": 13192 + }, + { + "epoch": 0.9699308925158066, + "grad_norm": 0.7690514326095581, + "learning_rate": 4.688391386836551e-06, + "loss": 0.5057, + "step": 13193 + }, + { + "epoch": 0.9700044111160123, + "grad_norm": 0.8298050165176392, + "learning_rate": 4.688344800331323e-06, + "loss": 0.5544, + "step": 13194 + }, + { + "epoch": 0.9700779297162182, + "grad_norm": 0.8581977486610413, + "learning_rate": 4.688298210575428e-06, + "loss": 0.5416, + "step": 13195 + }, + { + "epoch": 0.9701514483164241, + "grad_norm": 0.8402338027954102, + "learning_rate": 4.688251617568937e-06, + "loss": 0.5356, + "step": 13196 + }, + { + "epoch": 0.97022496691663, + "grad_norm": 0.7938409447669983, + "learning_rate": 4.688205021311917e-06, + "loss": 0.5213, + "step": 13197 + }, + { + "epoch": 0.9702984855168357, + "grad_norm": 0.7788214683532715, + "learning_rate": 4.688158421804438e-06, + "loss": 0.5399, + "step": 13198 + }, + { + "epoch": 0.9703720041170416, + "grad_norm": 0.8089534044265747, + "learning_rate": 4.6881118190465706e-06, + "loss": 0.52, + "step": 13199 + }, + { + "epoch": 0.9704455227172475, + "grad_norm": 0.7784228324890137, + "learning_rate": 4.688065213038383e-06, + "loss": 0.4993, + "step": 13200 + }, + { + "epoch": 0.9705190413174534, + "grad_norm": 0.7668042778968811, + "learning_rate": 4.688018603779943e-06, + "loss": 0.4958, + "step": 13201 + }, + { + "epoch": 0.9705925599176591, + "grad_norm": 0.8256990313529968, + "learning_rate": 4.687971991271322e-06, + "loss": 0.5193, + "step": 13202 + }, + { + "epoch": 0.970666078517865, + "grad_norm": 0.875663697719574, + "learning_rate": 4.687925375512589e-06, + "loss": 0.4995, + "step": 13203 + }, + { + "epoch": 0.9707395971180709, + "grad_norm": 0.8168803453445435, + "learning_rate": 4.687878756503812e-06, + "loss": 0.4747, + "step": 13204 + }, + { + "epoch": 0.9708131157182768, + "grad_norm": 0.8322542905807495, + "learning_rate": 4.68783213424506e-06, + "loss": 0.5771, + "step": 13205 + }, + { + "epoch": 0.9708866343184825, + "grad_norm": 0.8044901490211487, + "learning_rate": 4.687785508736404e-06, + "loss": 0.5105, + "step": 13206 + }, + { + "epoch": 0.9709601529186884, + "grad_norm": 0.7808482646942139, + "learning_rate": 4.687738879977913e-06, + "loss": 0.5261, + "step": 13207 + }, + { + "epoch": 0.9710336715188943, + "grad_norm": 0.7952555418014526, + "learning_rate": 4.687692247969655e-06, + "loss": 0.4737, + "step": 13208 + }, + { + "epoch": 0.9711071901191002, + "grad_norm": 0.7883443832397461, + "learning_rate": 4.687645612711701e-06, + "loss": 0.51, + "step": 13209 + }, + { + "epoch": 0.971180708719306, + "grad_norm": 0.8109560608863831, + "learning_rate": 4.6875989742041185e-06, + "loss": 0.4871, + "step": 13210 + }, + { + "epoch": 0.9712542273195118, + "grad_norm": 0.800926148891449, + "learning_rate": 4.687552332446977e-06, + "loss": 0.5432, + "step": 13211 + }, + { + "epoch": 0.9713277459197177, + "grad_norm": 0.8263463973999023, + "learning_rate": 4.687505687440347e-06, + "loss": 0.5145, + "step": 13212 + }, + { + "epoch": 0.9714012645199236, + "grad_norm": 0.7815383076667786, + "learning_rate": 4.687459039184296e-06, + "loss": 0.4882, + "step": 13213 + }, + { + "epoch": 0.9714747831201294, + "grad_norm": 0.8054284453392029, + "learning_rate": 4.687412387678896e-06, + "loss": 0.5322, + "step": 13214 + }, + { + "epoch": 0.9715483017203352, + "grad_norm": 0.77695631980896, + "learning_rate": 4.687365732924214e-06, + "loss": 0.5289, + "step": 13215 + }, + { + "epoch": 0.9716218203205411, + "grad_norm": 0.8577467799186707, + "learning_rate": 4.687319074920319e-06, + "loss": 0.4862, + "step": 13216 + }, + { + "epoch": 0.971695338920747, + "grad_norm": 0.8166067600250244, + "learning_rate": 4.687272413667282e-06, + "loss": 0.5226, + "step": 13217 + }, + { + "epoch": 0.9717688575209528, + "grad_norm": 0.8346696496009827, + "learning_rate": 4.687225749165172e-06, + "loss": 0.565, + "step": 13218 + }, + { + "epoch": 0.9718423761211586, + "grad_norm": 0.7656717896461487, + "learning_rate": 4.687179081414058e-06, + "loss": 0.5027, + "step": 13219 + }, + { + "epoch": 0.9719158947213645, + "grad_norm": 0.843596339225769, + "learning_rate": 4.6871324104140086e-06, + "loss": 0.5485, + "step": 13220 + }, + { + "epoch": 0.9719894133215704, + "grad_norm": 0.8717944622039795, + "learning_rate": 4.687085736165095e-06, + "loss": 0.5228, + "step": 13221 + }, + { + "epoch": 0.9720629319217762, + "grad_norm": 0.7998218536376953, + "learning_rate": 4.687039058667383e-06, + "loss": 0.4935, + "step": 13222 + }, + { + "epoch": 0.972136450521982, + "grad_norm": 0.8167248964309692, + "learning_rate": 4.686992377920946e-06, + "loss": 0.4817, + "step": 13223 + }, + { + "epoch": 0.9722099691221879, + "grad_norm": 0.8015933036804199, + "learning_rate": 4.6869456939258515e-06, + "loss": 0.5105, + "step": 13224 + }, + { + "epoch": 0.9722834877223938, + "grad_norm": 0.8506314754486084, + "learning_rate": 4.686899006682168e-06, + "loss": 0.5854, + "step": 13225 + }, + { + "epoch": 0.9723570063225996, + "grad_norm": 0.8032073974609375, + "learning_rate": 4.686852316189967e-06, + "loss": 0.5028, + "step": 13226 + }, + { + "epoch": 0.9724305249228055, + "grad_norm": 0.8082671761512756, + "learning_rate": 4.6868056224493154e-06, + "loss": 0.5528, + "step": 13227 + }, + { + "epoch": 0.9725040435230113, + "grad_norm": 0.8369854688644409, + "learning_rate": 4.686758925460285e-06, + "loss": 0.5371, + "step": 13228 + }, + { + "epoch": 0.9725775621232172, + "grad_norm": 0.8137633800506592, + "learning_rate": 4.686712225222943e-06, + "loss": 0.5323, + "step": 13229 + }, + { + "epoch": 0.972651080723423, + "grad_norm": 0.8526973128318787, + "learning_rate": 4.68666552173736e-06, + "loss": 0.5743, + "step": 13230 + }, + { + "epoch": 0.9727245993236289, + "grad_norm": 0.772181510925293, + "learning_rate": 4.686618815003605e-06, + "loss": 0.5097, + "step": 13231 + }, + { + "epoch": 0.9727981179238347, + "grad_norm": 0.7970917224884033, + "learning_rate": 4.6865721050217485e-06, + "loss": 0.5474, + "step": 13232 + }, + { + "epoch": 0.9728716365240406, + "grad_norm": 0.8026096820831299, + "learning_rate": 4.6865253917918575e-06, + "loss": 0.528, + "step": 13233 + }, + { + "epoch": 0.9729451551242464, + "grad_norm": 0.8009037375450134, + "learning_rate": 4.686478675314003e-06, + "loss": 0.5362, + "step": 13234 + }, + { + "epoch": 0.9730186737244523, + "grad_norm": 0.8453885912895203, + "learning_rate": 4.686431955588255e-06, + "loss": 0.5352, + "step": 13235 + }, + { + "epoch": 0.9730921923246582, + "grad_norm": 0.8008883595466614, + "learning_rate": 4.686385232614681e-06, + "loss": 0.5157, + "step": 13236 + }, + { + "epoch": 0.973165710924864, + "grad_norm": 0.7746396064758301, + "learning_rate": 4.686338506393353e-06, + "loss": 0.5244, + "step": 13237 + }, + { + "epoch": 0.9732392295250698, + "grad_norm": 0.8323204517364502, + "learning_rate": 4.6862917769243376e-06, + "loss": 0.5017, + "step": 13238 + }, + { + "epoch": 0.9733127481252757, + "grad_norm": 0.8148661255836487, + "learning_rate": 4.686245044207706e-06, + "loss": 0.5347, + "step": 13239 + }, + { + "epoch": 0.9733862667254816, + "grad_norm": 0.7756259441375732, + "learning_rate": 4.6861983082435265e-06, + "loss": 0.5025, + "step": 13240 + }, + { + "epoch": 0.9734597853256874, + "grad_norm": 0.8426780104637146, + "learning_rate": 4.68615156903187e-06, + "loss": 0.4935, + "step": 13241 + }, + { + "epoch": 0.9735333039258932, + "grad_norm": 0.8378527164459229, + "learning_rate": 4.686104826572804e-06, + "loss": 0.4997, + "step": 13242 + }, + { + "epoch": 0.9736068225260991, + "grad_norm": 0.8579155206680298, + "learning_rate": 4.686058080866399e-06, + "loss": 0.5213, + "step": 13243 + }, + { + "epoch": 0.973680341126305, + "grad_norm": 0.7994510531425476, + "learning_rate": 4.686011331912726e-06, + "loss": 0.5092, + "step": 13244 + }, + { + "epoch": 0.9737538597265109, + "grad_norm": 0.8205957412719727, + "learning_rate": 4.685964579711852e-06, + "loss": 0.5278, + "step": 13245 + }, + { + "epoch": 0.9738273783267166, + "grad_norm": 0.8577693104743958, + "learning_rate": 4.685917824263846e-06, + "loss": 0.5418, + "step": 13246 + }, + { + "epoch": 0.9739008969269225, + "grad_norm": 0.8292307257652283, + "learning_rate": 4.68587106556878e-06, + "loss": 0.5304, + "step": 13247 + }, + { + "epoch": 0.9739744155271284, + "grad_norm": 0.8412092328071594, + "learning_rate": 4.685824303626723e-06, + "loss": 0.5294, + "step": 13248 + }, + { + "epoch": 0.9740479341273343, + "grad_norm": 0.7800624370574951, + "learning_rate": 4.685777538437742e-06, + "loss": 0.5103, + "step": 13249 + }, + { + "epoch": 0.97412145272754, + "grad_norm": 0.7987063527107239, + "learning_rate": 4.6857307700019095e-06, + "loss": 0.519, + "step": 13250 + }, + { + "epoch": 0.9741949713277459, + "grad_norm": 0.7929978370666504, + "learning_rate": 4.685683998319293e-06, + "loss": 0.5379, + "step": 13251 + }, + { + "epoch": 0.9742684899279518, + "grad_norm": 0.8474598526954651, + "learning_rate": 4.685637223389963e-06, + "loss": 0.4946, + "step": 13252 + }, + { + "epoch": 0.9743420085281577, + "grad_norm": 0.8361024260520935, + "learning_rate": 4.685590445213988e-06, + "loss": 0.5244, + "step": 13253 + }, + { + "epoch": 0.9744155271283634, + "grad_norm": 0.8760491609573364, + "learning_rate": 4.685543663791438e-06, + "loss": 0.5411, + "step": 13254 + }, + { + "epoch": 0.9744890457285693, + "grad_norm": 0.7858641147613525, + "learning_rate": 4.685496879122383e-06, + "loss": 0.518, + "step": 13255 + }, + { + "epoch": 0.9745625643287752, + "grad_norm": 0.8114504814147949, + "learning_rate": 4.685450091206893e-06, + "loss": 0.4858, + "step": 13256 + }, + { + "epoch": 0.9746360829289811, + "grad_norm": 0.8341386914253235, + "learning_rate": 4.685403300045035e-06, + "loss": 0.523, + "step": 13257 + }, + { + "epoch": 0.9747096015291868, + "grad_norm": 0.8664938807487488, + "learning_rate": 4.68535650563688e-06, + "loss": 0.5648, + "step": 13258 + }, + { + "epoch": 0.9747831201293927, + "grad_norm": 0.8216680884361267, + "learning_rate": 4.6853097079824985e-06, + "loss": 0.5604, + "step": 13259 + }, + { + "epoch": 0.9748566387295986, + "grad_norm": 0.8584418296813965, + "learning_rate": 4.685262907081959e-06, + "loss": 0.5037, + "step": 13260 + }, + { + "epoch": 0.9749301573298045, + "grad_norm": 0.8271647095680237, + "learning_rate": 4.685216102935331e-06, + "loss": 0.5468, + "step": 13261 + }, + { + "epoch": 0.9750036759300102, + "grad_norm": 0.8259106278419495, + "learning_rate": 4.685169295542684e-06, + "loss": 0.5472, + "step": 13262 + }, + { + "epoch": 0.9750771945302161, + "grad_norm": 0.8453142642974854, + "learning_rate": 4.6851224849040875e-06, + "loss": 0.5369, + "step": 13263 + }, + { + "epoch": 0.975150713130422, + "grad_norm": 0.8330841064453125, + "learning_rate": 4.685075671019611e-06, + "loss": 0.5164, + "step": 13264 + }, + { + "epoch": 0.9752242317306279, + "grad_norm": 0.8304910659790039, + "learning_rate": 4.685028853889325e-06, + "loss": 0.5292, + "step": 13265 + }, + { + "epoch": 0.9752977503308337, + "grad_norm": 0.8521130681037903, + "learning_rate": 4.684982033513298e-06, + "loss": 0.562, + "step": 13266 + }, + { + "epoch": 0.9753712689310395, + "grad_norm": 0.7797905206680298, + "learning_rate": 4.6849352098915995e-06, + "loss": 0.5329, + "step": 13267 + }, + { + "epoch": 0.9754447875312454, + "grad_norm": 0.8651224970817566, + "learning_rate": 4.684888383024299e-06, + "loss": 0.5225, + "step": 13268 + }, + { + "epoch": 0.9755183061314513, + "grad_norm": 0.8244214057922363, + "learning_rate": 4.684841552911468e-06, + "loss": 0.5511, + "step": 13269 + }, + { + "epoch": 0.9755918247316571, + "grad_norm": 0.8513219356536865, + "learning_rate": 4.684794719553173e-06, + "loss": 0.544, + "step": 13270 + }, + { + "epoch": 0.9756653433318629, + "grad_norm": 0.7897894382476807, + "learning_rate": 4.684747882949487e-06, + "loss": 0.5147, + "step": 13271 + }, + { + "epoch": 0.9757388619320688, + "grad_norm": 0.8266818523406982, + "learning_rate": 4.684701043100476e-06, + "loss": 0.5435, + "step": 13272 + }, + { + "epoch": 0.9758123805322747, + "grad_norm": 0.8169264793395996, + "learning_rate": 4.684654200006211e-06, + "loss": 0.5073, + "step": 13273 + }, + { + "epoch": 0.9758858991324805, + "grad_norm": 0.7970740795135498, + "learning_rate": 4.684607353666762e-06, + "loss": 0.4669, + "step": 13274 + }, + { + "epoch": 0.9759594177326864, + "grad_norm": 0.800966203212738, + "learning_rate": 4.684560504082199e-06, + "loss": 0.4941, + "step": 13275 + }, + { + "epoch": 0.9760329363328922, + "grad_norm": 0.7794301509857178, + "learning_rate": 4.6845136512525914e-06, + "loss": 0.5143, + "step": 13276 + }, + { + "epoch": 0.9761064549330981, + "grad_norm": 0.8472841382026672, + "learning_rate": 4.684466795178008e-06, + "loss": 0.5226, + "step": 13277 + }, + { + "epoch": 0.9761799735333039, + "grad_norm": 0.8701894879341125, + "learning_rate": 4.684419935858518e-06, + "loss": 0.5534, + "step": 13278 + }, + { + "epoch": 0.9762534921335098, + "grad_norm": 0.7986413240432739, + "learning_rate": 4.684373073294193e-06, + "loss": 0.5412, + "step": 13279 + }, + { + "epoch": 0.9763270107337156, + "grad_norm": 0.8461158871650696, + "learning_rate": 4.6843262074851e-06, + "loss": 0.523, + "step": 13280 + }, + { + "epoch": 0.9764005293339215, + "grad_norm": 0.791404664516449, + "learning_rate": 4.684279338431311e-06, + "loss": 0.5381, + "step": 13281 + }, + { + "epoch": 0.9764740479341273, + "grad_norm": 0.8477190136909485, + "learning_rate": 4.684232466132895e-06, + "loss": 0.5352, + "step": 13282 + }, + { + "epoch": 0.9765475665343332, + "grad_norm": 0.8206968307495117, + "learning_rate": 4.68418559058992e-06, + "loss": 0.52, + "step": 13283 + }, + { + "epoch": 0.976621085134539, + "grad_norm": 0.7727460265159607, + "learning_rate": 4.684138711802459e-06, + "loss": 0.4912, + "step": 13284 + }, + { + "epoch": 0.9766946037347449, + "grad_norm": 0.8096509575843811, + "learning_rate": 4.684091829770578e-06, + "loss": 0.4853, + "step": 13285 + }, + { + "epoch": 0.9767681223349507, + "grad_norm": 0.7674127221107483, + "learning_rate": 4.684044944494348e-06, + "loss": 0.4768, + "step": 13286 + }, + { + "epoch": 0.9768416409351566, + "grad_norm": 0.827183723449707, + "learning_rate": 4.683998055973839e-06, + "loss": 0.5357, + "step": 13287 + }, + { + "epoch": 0.9769151595353625, + "grad_norm": 0.8492379188537598, + "learning_rate": 4.68395116420912e-06, + "loss": 0.5169, + "step": 13288 + }, + { + "epoch": 0.9769886781355683, + "grad_norm": 0.8288705945014954, + "learning_rate": 4.683904269200263e-06, + "loss": 0.5707, + "step": 13289 + }, + { + "epoch": 0.9770621967357741, + "grad_norm": 0.8370354771614075, + "learning_rate": 4.683857370947335e-06, + "loss": 0.5039, + "step": 13290 + }, + { + "epoch": 0.97713571533598, + "grad_norm": 0.7831302881240845, + "learning_rate": 4.683810469450405e-06, + "loss": 0.4818, + "step": 13291 + }, + { + "epoch": 0.9772092339361859, + "grad_norm": 0.8024160265922546, + "learning_rate": 4.6837635647095454e-06, + "loss": 0.5359, + "step": 13292 + }, + { + "epoch": 0.9772827525363917, + "grad_norm": 0.8301874399185181, + "learning_rate": 4.683716656724825e-06, + "loss": 0.5136, + "step": 13293 + }, + { + "epoch": 0.9773562711365975, + "grad_norm": 0.818678617477417, + "learning_rate": 4.683669745496312e-06, + "loss": 0.5098, + "step": 13294 + }, + { + "epoch": 0.9774297897368034, + "grad_norm": 0.7921224236488342, + "learning_rate": 4.683622831024078e-06, + "loss": 0.5349, + "step": 13295 + }, + { + "epoch": 0.9775033083370093, + "grad_norm": 0.7900388240814209, + "learning_rate": 4.683575913308192e-06, + "loss": 0.4905, + "step": 13296 + }, + { + "epoch": 0.9775768269372151, + "grad_norm": 0.78719162940979, + "learning_rate": 4.683528992348723e-06, + "loss": 0.5009, + "step": 13297 + }, + { + "epoch": 0.9776503455374209, + "grad_norm": 0.8124309182167053, + "learning_rate": 4.68348206814574e-06, + "loss": 0.5566, + "step": 13298 + }, + { + "epoch": 0.9777238641376268, + "grad_norm": 0.7894476056098938, + "learning_rate": 4.683435140699316e-06, + "loss": 0.531, + "step": 13299 + }, + { + "epoch": 0.9777973827378327, + "grad_norm": 0.7981123924255371, + "learning_rate": 4.683388210009518e-06, + "loss": 0.509, + "step": 13300 + }, + { + "epoch": 0.9778709013380386, + "grad_norm": 0.7730478644371033, + "learning_rate": 4.683341276076417e-06, + "loss": 0.4674, + "step": 13301 + }, + { + "epoch": 0.9779444199382443, + "grad_norm": 0.7931380271911621, + "learning_rate": 4.683294338900081e-06, + "loss": 0.5219, + "step": 13302 + }, + { + "epoch": 0.9780179385384502, + "grad_norm": 0.8365522027015686, + "learning_rate": 4.683247398480581e-06, + "loss": 0.5626, + "step": 13303 + }, + { + "epoch": 0.9780914571386561, + "grad_norm": 0.8706329464912415, + "learning_rate": 4.683200454817987e-06, + "loss": 0.5471, + "step": 13304 + }, + { + "epoch": 0.978164975738862, + "grad_norm": 0.7883985638618469, + "learning_rate": 4.683153507912368e-06, + "loss": 0.5049, + "step": 13305 + }, + { + "epoch": 0.9782384943390677, + "grad_norm": 0.7910456657409668, + "learning_rate": 4.683106557763793e-06, + "loss": 0.5289, + "step": 13306 + }, + { + "epoch": 0.9783120129392736, + "grad_norm": 0.8834639191627502, + "learning_rate": 4.683059604372334e-06, + "loss": 0.5394, + "step": 13307 + }, + { + "epoch": 0.9783855315394795, + "grad_norm": 0.7977176904678345, + "learning_rate": 4.68301264773806e-06, + "loss": 0.5082, + "step": 13308 + }, + { + "epoch": 0.9784590501396854, + "grad_norm": 0.8171656131744385, + "learning_rate": 4.68296568786104e-06, + "loss": 0.5347, + "step": 13309 + }, + { + "epoch": 0.9785325687398911, + "grad_norm": 0.7815824151039124, + "learning_rate": 4.682918724741343e-06, + "loss": 0.5316, + "step": 13310 + }, + { + "epoch": 0.978606087340097, + "grad_norm": 0.8450369238853455, + "learning_rate": 4.68287175837904e-06, + "loss": 0.5276, + "step": 13311 + }, + { + "epoch": 0.9786796059403029, + "grad_norm": 0.8090283870697021, + "learning_rate": 4.682824788774201e-06, + "loss": 0.5025, + "step": 13312 + }, + { + "epoch": 0.9787531245405088, + "grad_norm": 0.8199152946472168, + "learning_rate": 4.682777815926895e-06, + "loss": 0.4975, + "step": 13313 + }, + { + "epoch": 0.9788266431407145, + "grad_norm": 0.819462776184082, + "learning_rate": 4.682730839837193e-06, + "loss": 0.5239, + "step": 13314 + }, + { + "epoch": 0.9789001617409204, + "grad_norm": 0.8671666979789734, + "learning_rate": 4.682683860505162e-06, + "loss": 0.5282, + "step": 13315 + }, + { + "epoch": 0.9789736803411263, + "grad_norm": 0.7866939306259155, + "learning_rate": 4.682636877930875e-06, + "loss": 0.5385, + "step": 13316 + }, + { + "epoch": 0.9790471989413322, + "grad_norm": 0.8380091190338135, + "learning_rate": 4.682589892114401e-06, + "loss": 0.5887, + "step": 13317 + }, + { + "epoch": 0.979120717541538, + "grad_norm": 0.8301372528076172, + "learning_rate": 4.6825429030558076e-06, + "loss": 0.5253, + "step": 13318 + }, + { + "epoch": 0.9791942361417438, + "grad_norm": 0.7877981662750244, + "learning_rate": 4.682495910755167e-06, + "loss": 0.5313, + "step": 13319 + }, + { + "epoch": 0.9792677547419497, + "grad_norm": 0.8822405338287354, + "learning_rate": 4.6824489152125485e-06, + "loss": 0.5777, + "step": 13320 + }, + { + "epoch": 0.9793412733421556, + "grad_norm": 0.806773841381073, + "learning_rate": 4.682401916428022e-06, + "loss": 0.5043, + "step": 13321 + }, + { + "epoch": 0.9794147919423614, + "grad_norm": 0.8165210485458374, + "learning_rate": 4.682354914401656e-06, + "loss": 0.5158, + "step": 13322 + }, + { + "epoch": 0.9794883105425672, + "grad_norm": 0.7843955159187317, + "learning_rate": 4.682307909133522e-06, + "loss": 0.5031, + "step": 13323 + }, + { + "epoch": 0.9795618291427731, + "grad_norm": 0.8103297352790833, + "learning_rate": 4.682260900623688e-06, + "loss": 0.5304, + "step": 13324 + }, + { + "epoch": 0.979635347742979, + "grad_norm": 0.8309164643287659, + "learning_rate": 4.6822138888722265e-06, + "loss": 0.5281, + "step": 13325 + }, + { + "epoch": 0.9797088663431849, + "grad_norm": 0.8005740642547607, + "learning_rate": 4.682166873879204e-06, + "loss": 0.5081, + "step": 13326 + }, + { + "epoch": 0.9797823849433906, + "grad_norm": 0.8032575249671936, + "learning_rate": 4.682119855644693e-06, + "loss": 0.5324, + "step": 13327 + }, + { + "epoch": 0.9798559035435965, + "grad_norm": 0.8365293741226196, + "learning_rate": 4.682072834168764e-06, + "loss": 0.559, + "step": 13328 + }, + { + "epoch": 0.9799294221438024, + "grad_norm": 0.7986150979995728, + "learning_rate": 4.682025809451483e-06, + "loss": 0.5005, + "step": 13329 + }, + { + "epoch": 0.9800029407440083, + "grad_norm": 0.8172249794006348, + "learning_rate": 4.681978781492923e-06, + "loss": 0.5293, + "step": 13330 + }, + { + "epoch": 0.9800764593442141, + "grad_norm": 0.8253992199897766, + "learning_rate": 4.681931750293152e-06, + "loss": 0.5605, + "step": 13331 + }, + { + "epoch": 0.9801499779444199, + "grad_norm": 0.8286256194114685, + "learning_rate": 4.681884715852243e-06, + "loss": 0.5114, + "step": 13332 + }, + { + "epoch": 0.9802234965446258, + "grad_norm": 0.7800440192222595, + "learning_rate": 4.681837678170262e-06, + "loss": 0.4565, + "step": 13333 + }, + { + "epoch": 0.9802970151448317, + "grad_norm": 0.8146877288818359, + "learning_rate": 4.6817906372472816e-06, + "loss": 0.548, + "step": 13334 + }, + { + "epoch": 0.9803705337450375, + "grad_norm": 0.8358119130134583, + "learning_rate": 4.68174359308337e-06, + "loss": 0.5201, + "step": 13335 + }, + { + "epoch": 0.9804440523452433, + "grad_norm": 0.8206890225410461, + "learning_rate": 4.681696545678598e-06, + "loss": 0.5312, + "step": 13336 + }, + { + "epoch": 0.9805175709454492, + "grad_norm": 0.755370557308197, + "learning_rate": 4.681649495033036e-06, + "loss": 0.5007, + "step": 13337 + }, + { + "epoch": 0.9805910895456551, + "grad_norm": 0.7980484366416931, + "learning_rate": 4.681602441146752e-06, + "loss": 0.5127, + "step": 13338 + }, + { + "epoch": 0.9806646081458609, + "grad_norm": 0.8420430421829224, + "learning_rate": 4.681555384019817e-06, + "loss": 0.5237, + "step": 13339 + }, + { + "epoch": 0.9807381267460668, + "grad_norm": 0.8892338275909424, + "learning_rate": 4.6815083236523016e-06, + "loss": 0.5451, + "step": 13340 + }, + { + "epoch": 0.9808116453462726, + "grad_norm": 0.8640855550765991, + "learning_rate": 4.681461260044274e-06, + "loss": 0.5415, + "step": 13341 + }, + { + "epoch": 0.9808851639464785, + "grad_norm": 0.8452255129814148, + "learning_rate": 4.681414193195807e-06, + "loss": 0.5419, + "step": 13342 + }, + { + "epoch": 0.9809586825466843, + "grad_norm": 0.8212642073631287, + "learning_rate": 4.681367123106967e-06, + "loss": 0.5314, + "step": 13343 + }, + { + "epoch": 0.9810322011468902, + "grad_norm": 0.7908020615577698, + "learning_rate": 4.681320049777827e-06, + "loss": 0.5228, + "step": 13344 + }, + { + "epoch": 0.981105719747096, + "grad_norm": 0.8199082016944885, + "learning_rate": 4.681272973208454e-06, + "loss": 0.5549, + "step": 13345 + }, + { + "epoch": 0.9811792383473019, + "grad_norm": 0.8545604944229126, + "learning_rate": 4.68122589339892e-06, + "loss": 0.5476, + "step": 13346 + }, + { + "epoch": 0.9812527569475077, + "grad_norm": 0.8129513263702393, + "learning_rate": 4.681178810349295e-06, + "loss": 0.5128, + "step": 13347 + }, + { + "epoch": 0.9813262755477136, + "grad_norm": 0.8338724374771118, + "learning_rate": 4.6811317240596475e-06, + "loss": 0.5266, + "step": 13348 + }, + { + "epoch": 0.9813997941479194, + "grad_norm": 0.8215116262435913, + "learning_rate": 4.6810846345300486e-06, + "loss": 0.5381, + "step": 13349 + }, + { + "epoch": 0.9814733127481253, + "grad_norm": 0.8350509405136108, + "learning_rate": 4.681037541760568e-06, + "loss": 0.5584, + "step": 13350 + }, + { + "epoch": 0.9815468313483311, + "grad_norm": 0.8020725250244141, + "learning_rate": 4.6809904457512756e-06, + "loss": 0.543, + "step": 13351 + }, + { + "epoch": 0.981620349948537, + "grad_norm": 0.8920763731002808, + "learning_rate": 4.680943346502241e-06, + "loss": 0.5268, + "step": 13352 + }, + { + "epoch": 0.9816938685487429, + "grad_norm": 0.8683487176895142, + "learning_rate": 4.680896244013535e-06, + "loss": 0.5134, + "step": 13353 + }, + { + "epoch": 0.9817673871489487, + "grad_norm": 0.8338858485221863, + "learning_rate": 4.680849138285226e-06, + "loss": 0.5242, + "step": 13354 + }, + { + "epoch": 0.9818409057491545, + "grad_norm": 0.8128159642219543, + "learning_rate": 4.680802029317386e-06, + "loss": 0.4985, + "step": 13355 + }, + { + "epoch": 0.9819144243493604, + "grad_norm": 0.7962736487388611, + "learning_rate": 4.680754917110084e-06, + "loss": 0.51, + "step": 13356 + }, + { + "epoch": 0.9819879429495663, + "grad_norm": 0.8187333941459656, + "learning_rate": 4.68070780166339e-06, + "loss": 0.49, + "step": 13357 + }, + { + "epoch": 0.9820614615497721, + "grad_norm": 0.8774624466896057, + "learning_rate": 4.680660682977373e-06, + "loss": 0.5369, + "step": 13358 + }, + { + "epoch": 0.9821349801499779, + "grad_norm": 0.8222813010215759, + "learning_rate": 4.680613561052105e-06, + "loss": 0.5174, + "step": 13359 + }, + { + "epoch": 0.9822084987501838, + "grad_norm": 0.8586554527282715, + "learning_rate": 4.680566435887655e-06, + "loss": 0.5415, + "step": 13360 + }, + { + "epoch": 0.9822820173503897, + "grad_norm": 0.7849283218383789, + "learning_rate": 4.680519307484093e-06, + "loss": 0.5221, + "step": 13361 + }, + { + "epoch": 0.9823555359505955, + "grad_norm": 0.809354305267334, + "learning_rate": 4.680472175841488e-06, + "loss": 0.5345, + "step": 13362 + }, + { + "epoch": 0.9824290545508013, + "grad_norm": 0.8509615659713745, + "learning_rate": 4.680425040959911e-06, + "loss": 0.4832, + "step": 13363 + }, + { + "epoch": 0.9825025731510072, + "grad_norm": 0.7984037399291992, + "learning_rate": 4.680377902839433e-06, + "loss": 0.5266, + "step": 13364 + }, + { + "epoch": 0.9825760917512131, + "grad_norm": 0.7673056125640869, + "learning_rate": 4.680330761480123e-06, + "loss": 0.5003, + "step": 13365 + }, + { + "epoch": 0.982649610351419, + "grad_norm": 0.9404829740524292, + "learning_rate": 4.68028361688205e-06, + "loss": 0.5859, + "step": 13366 + }, + { + "epoch": 0.9827231289516247, + "grad_norm": 0.8678496479988098, + "learning_rate": 4.680236469045286e-06, + "loss": 0.5367, + "step": 13367 + }, + { + "epoch": 0.9827966475518306, + "grad_norm": 0.8272296786308289, + "learning_rate": 4.6801893179699e-06, + "loss": 0.5125, + "step": 13368 + }, + { + "epoch": 0.9828701661520365, + "grad_norm": 0.7760437726974487, + "learning_rate": 4.680142163655962e-06, + "loss": 0.4663, + "step": 13369 + }, + { + "epoch": 0.9829436847522424, + "grad_norm": 0.8284146785736084, + "learning_rate": 4.680095006103542e-06, + "loss": 0.5408, + "step": 13370 + }, + { + "epoch": 0.9830172033524481, + "grad_norm": 0.8067589402198792, + "learning_rate": 4.68004784531271e-06, + "loss": 0.5156, + "step": 13371 + }, + { + "epoch": 0.983090721952654, + "grad_norm": 0.8357143402099609, + "learning_rate": 4.680000681283537e-06, + "loss": 0.5437, + "step": 13372 + }, + { + "epoch": 0.9831642405528599, + "grad_norm": 0.8428236246109009, + "learning_rate": 4.679953514016093e-06, + "loss": 0.5115, + "step": 13373 + }, + { + "epoch": 0.9832377591530658, + "grad_norm": 0.8199270367622375, + "learning_rate": 4.6799063435104455e-06, + "loss": 0.5391, + "step": 13374 + }, + { + "epoch": 0.9833112777532715, + "grad_norm": 0.8320136070251465, + "learning_rate": 4.679859169766668e-06, + "loss": 0.565, + "step": 13375 + }, + { + "epoch": 0.9833847963534774, + "grad_norm": 0.7971094846725464, + "learning_rate": 4.679811992784828e-06, + "loss": 0.5002, + "step": 13376 + }, + { + "epoch": 0.9834583149536833, + "grad_norm": 0.8414693474769592, + "learning_rate": 4.6797648125649976e-06, + "loss": 0.5177, + "step": 13377 + }, + { + "epoch": 0.9835318335538892, + "grad_norm": 0.7986851334571838, + "learning_rate": 4.679717629107245e-06, + "loss": 0.495, + "step": 13378 + }, + { + "epoch": 0.983605352154095, + "grad_norm": 0.8351437449455261, + "learning_rate": 4.6796704424116415e-06, + "loss": 0.5314, + "step": 13379 + }, + { + "epoch": 0.9836788707543008, + "grad_norm": 0.7661401033401489, + "learning_rate": 4.679623252478258e-06, + "loss": 0.5006, + "step": 13380 + }, + { + "epoch": 0.9837523893545067, + "grad_norm": 0.8143934607505798, + "learning_rate": 4.679576059307163e-06, + "loss": 0.4678, + "step": 13381 + }, + { + "epoch": 0.9838259079547126, + "grad_norm": 0.828984797000885, + "learning_rate": 4.6795288628984255e-06, + "loss": 0.5492, + "step": 13382 + }, + { + "epoch": 0.9838994265549184, + "grad_norm": 0.8333982825279236, + "learning_rate": 4.679481663252119e-06, + "loss": 0.5621, + "step": 13383 + }, + { + "epoch": 0.9839729451551242, + "grad_norm": 0.8153504133224487, + "learning_rate": 4.679434460368312e-06, + "loss": 0.4948, + "step": 13384 + }, + { + "epoch": 0.9840464637553301, + "grad_norm": 0.8624430298805237, + "learning_rate": 4.679387254247073e-06, + "loss": 0.5147, + "step": 13385 + }, + { + "epoch": 0.984119982355536, + "grad_norm": 0.8115010261535645, + "learning_rate": 4.679340044888474e-06, + "loss": 0.5694, + "step": 13386 + }, + { + "epoch": 0.9841935009557418, + "grad_norm": 0.8330442905426025, + "learning_rate": 4.679292832292586e-06, + "loss": 0.4946, + "step": 13387 + }, + { + "epoch": 0.9842670195559476, + "grad_norm": 0.7969723343849182, + "learning_rate": 4.679245616459477e-06, + "loss": 0.5409, + "step": 13388 + }, + { + "epoch": 0.9843405381561535, + "grad_norm": 0.7944476008415222, + "learning_rate": 4.679198397389218e-06, + "loss": 0.4955, + "step": 13389 + }, + { + "epoch": 0.9844140567563594, + "grad_norm": 0.7951041460037231, + "learning_rate": 4.679151175081879e-06, + "loss": 0.4616, + "step": 13390 + }, + { + "epoch": 0.9844875753565652, + "grad_norm": 0.8369160890579224, + "learning_rate": 4.67910394953753e-06, + "loss": 0.5272, + "step": 13391 + }, + { + "epoch": 0.984561093956771, + "grad_norm": 0.8611461520195007, + "learning_rate": 4.6790567207562415e-06, + "loss": 0.5365, + "step": 13392 + }, + { + "epoch": 0.9846346125569769, + "grad_norm": 0.8077738285064697, + "learning_rate": 4.6790094887380835e-06, + "loss": 0.5349, + "step": 13393 + }, + { + "epoch": 0.9847081311571828, + "grad_norm": 0.7844159007072449, + "learning_rate": 4.678962253483127e-06, + "loss": 0.5188, + "step": 13394 + }, + { + "epoch": 0.9847816497573886, + "grad_norm": 0.8310151100158691, + "learning_rate": 4.67891501499144e-06, + "loss": 0.564, + "step": 13395 + }, + { + "epoch": 0.9848551683575945, + "grad_norm": 0.8818651437759399, + "learning_rate": 4.678867773263095e-06, + "loss": 0.5095, + "step": 13396 + }, + { + "epoch": 0.9849286869578003, + "grad_norm": 0.8334793448448181, + "learning_rate": 4.6788205282981614e-06, + "loss": 0.5309, + "step": 13397 + }, + { + "epoch": 0.9850022055580062, + "grad_norm": 0.7810930609703064, + "learning_rate": 4.678773280096709e-06, + "loss": 0.5054, + "step": 13398 + }, + { + "epoch": 0.985075724158212, + "grad_norm": 0.8487179279327393, + "learning_rate": 4.6787260286588076e-06, + "loss": 0.538, + "step": 13399 + }, + { + "epoch": 0.9851492427584179, + "grad_norm": 0.7991941571235657, + "learning_rate": 4.678678773984529e-06, + "loss": 0.5519, + "step": 13400 + }, + { + "epoch": 0.9852227613586237, + "grad_norm": 0.827107846736908, + "learning_rate": 4.678631516073942e-06, + "loss": 0.5012, + "step": 13401 + }, + { + "epoch": 0.9852962799588296, + "grad_norm": 0.8633467555046082, + "learning_rate": 4.678584254927117e-06, + "loss": 0.5343, + "step": 13402 + }, + { + "epoch": 0.9853697985590354, + "grad_norm": 0.8295740485191345, + "learning_rate": 4.6785369905441245e-06, + "loss": 0.5344, + "step": 13403 + }, + { + "epoch": 0.9854433171592413, + "grad_norm": 0.800258219242096, + "learning_rate": 4.6784897229250345e-06, + "loss": 0.5204, + "step": 13404 + }, + { + "epoch": 0.9855168357594472, + "grad_norm": 0.872317910194397, + "learning_rate": 4.678442452069918e-06, + "loss": 0.5054, + "step": 13405 + }, + { + "epoch": 0.985590354359653, + "grad_norm": 0.7993034720420837, + "learning_rate": 4.678395177978844e-06, + "loss": 0.4876, + "step": 13406 + }, + { + "epoch": 0.9856638729598588, + "grad_norm": 0.8463208079338074, + "learning_rate": 4.678347900651883e-06, + "loss": 0.4987, + "step": 13407 + }, + { + "epoch": 0.9857373915600647, + "grad_norm": 0.8082835674285889, + "learning_rate": 4.678300620089105e-06, + "loss": 0.4737, + "step": 13408 + }, + { + "epoch": 0.9858109101602706, + "grad_norm": 0.777974009513855, + "learning_rate": 4.678253336290581e-06, + "loss": 0.479, + "step": 13409 + }, + { + "epoch": 0.9858844287604764, + "grad_norm": 0.8370180726051331, + "learning_rate": 4.678206049256382e-06, + "loss": 0.5485, + "step": 13410 + }, + { + "epoch": 0.9859579473606822, + "grad_norm": 0.8190217018127441, + "learning_rate": 4.678158758986577e-06, + "loss": 0.576, + "step": 13411 + }, + { + "epoch": 0.9860314659608881, + "grad_norm": 0.8429766893386841, + "learning_rate": 4.6781114654812355e-06, + "loss": 0.5226, + "step": 13412 + }, + { + "epoch": 0.986104984561094, + "grad_norm": 0.7713918685913086, + "learning_rate": 4.6780641687404285e-06, + "loss": 0.5537, + "step": 13413 + }, + { + "epoch": 0.9861785031612998, + "grad_norm": 0.8040790557861328, + "learning_rate": 4.678016868764227e-06, + "loss": 0.5341, + "step": 13414 + }, + { + "epoch": 0.9862520217615056, + "grad_norm": 0.8077600002288818, + "learning_rate": 4.677969565552701e-06, + "loss": 0.4872, + "step": 13415 + }, + { + "epoch": 0.9863255403617115, + "grad_norm": 0.8494779467582703, + "learning_rate": 4.677922259105921e-06, + "loss": 0.5075, + "step": 13416 + }, + { + "epoch": 0.9863990589619174, + "grad_norm": 0.7737098932266235, + "learning_rate": 4.6778749494239555e-06, + "loss": 0.4611, + "step": 13417 + }, + { + "epoch": 0.9864725775621233, + "grad_norm": 0.8370172381401062, + "learning_rate": 4.677827636506877e-06, + "loss": 0.5334, + "step": 13418 + }, + { + "epoch": 0.986546096162329, + "grad_norm": 0.8101970553398132, + "learning_rate": 4.677780320354754e-06, + "loss": 0.4937, + "step": 13419 + }, + { + "epoch": 0.9866196147625349, + "grad_norm": 0.8264331221580505, + "learning_rate": 4.677733000967658e-06, + "loss": 0.5465, + "step": 13420 + }, + { + "epoch": 0.9866931333627408, + "grad_norm": 0.777454137802124, + "learning_rate": 4.677685678345658e-06, + "loss": 0.4576, + "step": 13421 + }, + { + "epoch": 0.9867666519629467, + "grad_norm": 0.8282933235168457, + "learning_rate": 4.677638352488826e-06, + "loss": 0.4988, + "step": 13422 + }, + { + "epoch": 0.9868401705631524, + "grad_norm": 0.7698178887367249, + "learning_rate": 4.677591023397231e-06, + "loss": 0.4832, + "step": 13423 + }, + { + "epoch": 0.9869136891633583, + "grad_norm": 0.8358536958694458, + "learning_rate": 4.677543691070945e-06, + "loss": 0.5418, + "step": 13424 + }, + { + "epoch": 0.9869872077635642, + "grad_norm": 0.785736083984375, + "learning_rate": 4.677496355510036e-06, + "loss": 0.5256, + "step": 13425 + }, + { + "epoch": 0.9870607263637701, + "grad_norm": 0.7603150010108948, + "learning_rate": 4.677449016714576e-06, + "loss": 0.4885, + "step": 13426 + }, + { + "epoch": 0.9871342449639758, + "grad_norm": 0.8507996201515198, + "learning_rate": 4.677401674684634e-06, + "loss": 0.4885, + "step": 13427 + }, + { + "epoch": 0.9872077635641817, + "grad_norm": 0.8498921394348145, + "learning_rate": 4.677354329420282e-06, + "loss": 0.5616, + "step": 13428 + }, + { + "epoch": 0.9872812821643876, + "grad_norm": 0.7696986794471741, + "learning_rate": 4.677306980921588e-06, + "loss": 0.4821, + "step": 13429 + }, + { + "epoch": 0.9873548007645935, + "grad_norm": 0.7845966219902039, + "learning_rate": 4.677259629188625e-06, + "loss": 0.5088, + "step": 13430 + }, + { + "epoch": 0.9874283193647992, + "grad_norm": 0.8478249907493591, + "learning_rate": 4.677212274221461e-06, + "loss": 0.5531, + "step": 13431 + }, + { + "epoch": 0.9875018379650051, + "grad_norm": 0.824731707572937, + "learning_rate": 4.677164916020168e-06, + "loss": 0.5485, + "step": 13432 + }, + { + "epoch": 0.987575356565211, + "grad_norm": 0.8321125507354736, + "learning_rate": 4.677117554584816e-06, + "loss": 0.5281, + "step": 13433 + }, + { + "epoch": 0.9876488751654169, + "grad_norm": 0.7867273688316345, + "learning_rate": 4.677070189915475e-06, + "loss": 0.4772, + "step": 13434 + }, + { + "epoch": 0.9877223937656227, + "grad_norm": 0.8351856470108032, + "learning_rate": 4.677022822012215e-06, + "loss": 0.537, + "step": 13435 + }, + { + "epoch": 0.9877959123658285, + "grad_norm": 0.8176206946372986, + "learning_rate": 4.676975450875106e-06, + "loss": 0.5262, + "step": 13436 + }, + { + "epoch": 0.9878694309660344, + "grad_norm": 0.8283653259277344, + "learning_rate": 4.676928076504221e-06, + "loss": 0.5263, + "step": 13437 + }, + { + "epoch": 0.9879429495662403, + "grad_norm": 0.8336198925971985, + "learning_rate": 4.676880698899627e-06, + "loss": 0.5141, + "step": 13438 + }, + { + "epoch": 0.9880164681664461, + "grad_norm": 0.8608580231666565, + "learning_rate": 4.676833318061397e-06, + "loss": 0.5336, + "step": 13439 + }, + { + "epoch": 0.9880899867666519, + "grad_norm": 0.8097835183143616, + "learning_rate": 4.676785933989601e-06, + "loss": 0.5163, + "step": 13440 + }, + { + "epoch": 0.9881635053668578, + "grad_norm": 0.7961310744285583, + "learning_rate": 4.676738546684307e-06, + "loss": 0.5063, + "step": 13441 + }, + { + "epoch": 0.9882370239670637, + "grad_norm": 0.8368309140205383, + "learning_rate": 4.676691156145587e-06, + "loss": 0.5359, + "step": 13442 + }, + { + "epoch": 0.9883105425672695, + "grad_norm": 0.8211684823036194, + "learning_rate": 4.676643762373513e-06, + "loss": 0.5459, + "step": 13443 + }, + { + "epoch": 0.9883840611674753, + "grad_norm": 0.810947597026825, + "learning_rate": 4.676596365368153e-06, + "loss": 0.5221, + "step": 13444 + }, + { + "epoch": 0.9884575797676812, + "grad_norm": 0.8370155692100525, + "learning_rate": 4.676548965129578e-06, + "loss": 0.5005, + "step": 13445 + }, + { + "epoch": 0.9885310983678871, + "grad_norm": 0.8694355487823486, + "learning_rate": 4.676501561657859e-06, + "loss": 0.553, + "step": 13446 + }, + { + "epoch": 0.9886046169680929, + "grad_norm": 0.8380739092826843, + "learning_rate": 4.676454154953067e-06, + "loss": 0.5223, + "step": 13447 + }, + { + "epoch": 0.9886781355682988, + "grad_norm": 0.7868539094924927, + "learning_rate": 4.67640674501527e-06, + "loss": 0.5082, + "step": 13448 + }, + { + "epoch": 0.9887516541685046, + "grad_norm": 0.7770549058914185, + "learning_rate": 4.676359331844541e-06, + "loss": 0.527, + "step": 13449 + }, + { + "epoch": 0.9888251727687105, + "grad_norm": 0.8019719123840332, + "learning_rate": 4.676311915440949e-06, + "loss": 0.4786, + "step": 13450 + }, + { + "epoch": 0.9888986913689163, + "grad_norm": 0.8324170112609863, + "learning_rate": 4.676264495804565e-06, + "loss": 0.4825, + "step": 13451 + }, + { + "epoch": 0.9889722099691222, + "grad_norm": 0.8724036812782288, + "learning_rate": 4.676217072935459e-06, + "loss": 0.5626, + "step": 13452 + }, + { + "epoch": 0.989045728569328, + "grad_norm": 0.8337355256080627, + "learning_rate": 4.676169646833702e-06, + "loss": 0.5453, + "step": 13453 + }, + { + "epoch": 0.9891192471695339, + "grad_norm": 0.8043547868728638, + "learning_rate": 4.676122217499364e-06, + "loss": 0.5217, + "step": 13454 + }, + { + "epoch": 0.9891927657697397, + "grad_norm": 0.8173573017120361, + "learning_rate": 4.6760747849325155e-06, + "loss": 0.5317, + "step": 13455 + }, + { + "epoch": 0.9892662843699456, + "grad_norm": 0.8240271806716919, + "learning_rate": 4.676027349133228e-06, + "loss": 0.4915, + "step": 13456 + }, + { + "epoch": 0.9893398029701514, + "grad_norm": 0.8027346134185791, + "learning_rate": 4.675979910101569e-06, + "loss": 0.4767, + "step": 13457 + }, + { + "epoch": 0.9894133215703573, + "grad_norm": 0.7946153879165649, + "learning_rate": 4.675932467837613e-06, + "loss": 0.5464, + "step": 13458 + }, + { + "epoch": 0.9894868401705631, + "grad_norm": 0.7649976015090942, + "learning_rate": 4.675885022341428e-06, + "loss": 0.5099, + "step": 13459 + }, + { + "epoch": 0.989560358770769, + "grad_norm": 0.8170389533042908, + "learning_rate": 4.675837573613084e-06, + "loss": 0.5156, + "step": 13460 + }, + { + "epoch": 0.9896338773709749, + "grad_norm": 0.8121697306632996, + "learning_rate": 4.6757901216526535e-06, + "loss": 0.5086, + "step": 13461 + }, + { + "epoch": 0.9897073959711807, + "grad_norm": 0.8316643238067627, + "learning_rate": 4.675742666460205e-06, + "loss": 0.5284, + "step": 13462 + }, + { + "epoch": 0.9897809145713865, + "grad_norm": 0.7816647887229919, + "learning_rate": 4.67569520803581e-06, + "loss": 0.5118, + "step": 13463 + }, + { + "epoch": 0.9898544331715924, + "grad_norm": 0.7779991626739502, + "learning_rate": 4.675647746379539e-06, + "loss": 0.5061, + "step": 13464 + }, + { + "epoch": 0.9899279517717983, + "grad_norm": 0.8167598247528076, + "learning_rate": 4.675600281491464e-06, + "loss": 0.4951, + "step": 13465 + }, + { + "epoch": 0.9900014703720041, + "grad_norm": 0.8298002481460571, + "learning_rate": 4.675552813371652e-06, + "loss": 0.5426, + "step": 13466 + }, + { + "epoch": 0.99007498897221, + "grad_norm": 0.7953876852989197, + "learning_rate": 4.675505342020176e-06, + "loss": 0.4886, + "step": 13467 + }, + { + "epoch": 0.9901485075724158, + "grad_norm": 0.7898802161216736, + "learning_rate": 4.675457867437106e-06, + "loss": 0.5152, + "step": 13468 + }, + { + "epoch": 0.9902220261726217, + "grad_norm": 0.8075243234634399, + "learning_rate": 4.675410389622512e-06, + "loss": 0.5216, + "step": 13469 + }, + { + "epoch": 0.9902955447728276, + "grad_norm": 0.804076611995697, + "learning_rate": 4.675362908576466e-06, + "loss": 0.553, + "step": 13470 + }, + { + "epoch": 0.9903690633730334, + "grad_norm": 0.8231919407844543, + "learning_rate": 4.675315424299037e-06, + "loss": 0.5444, + "step": 13471 + }, + { + "epoch": 0.9904425819732392, + "grad_norm": 0.8004414439201355, + "learning_rate": 4.675267936790296e-06, + "loss": 0.5105, + "step": 13472 + }, + { + "epoch": 0.9905161005734451, + "grad_norm": 0.7874735593795776, + "learning_rate": 4.675220446050313e-06, + "loss": 0.5135, + "step": 13473 + }, + { + "epoch": 0.990589619173651, + "grad_norm": 0.7842705249786377, + "learning_rate": 4.67517295207916e-06, + "loss": 0.5677, + "step": 13474 + }, + { + "epoch": 0.9906631377738568, + "grad_norm": 0.822589635848999, + "learning_rate": 4.675125454876907e-06, + "loss": 0.5212, + "step": 13475 + }, + { + "epoch": 0.9907366563740626, + "grad_norm": 0.7959745526313782, + "learning_rate": 4.675077954443623e-06, + "loss": 0.5288, + "step": 13476 + }, + { + "epoch": 0.9908101749742685, + "grad_norm": 0.862266480922699, + "learning_rate": 4.675030450779381e-06, + "loss": 0.5441, + "step": 13477 + }, + { + "epoch": 0.9908836935744744, + "grad_norm": 0.8139234781265259, + "learning_rate": 4.67498294388425e-06, + "loss": 0.5266, + "step": 13478 + }, + { + "epoch": 0.9909572121746802, + "grad_norm": 0.8090066909790039, + "learning_rate": 4.674935433758301e-06, + "loss": 0.527, + "step": 13479 + }, + { + "epoch": 0.991030730774886, + "grad_norm": 0.7961745262145996, + "learning_rate": 4.674887920401604e-06, + "loss": 0.5385, + "step": 13480 + }, + { + "epoch": 0.9911042493750919, + "grad_norm": 0.7629886865615845, + "learning_rate": 4.6748404038142306e-06, + "loss": 0.4771, + "step": 13481 + }, + { + "epoch": 0.9911777679752978, + "grad_norm": 0.7940900325775146, + "learning_rate": 4.67479288399625e-06, + "loss": 0.5244, + "step": 13482 + }, + { + "epoch": 0.9912512865755037, + "grad_norm": 0.7898763418197632, + "learning_rate": 4.674745360947735e-06, + "loss": 0.4752, + "step": 13483 + }, + { + "epoch": 0.9913248051757094, + "grad_norm": 0.763904333114624, + "learning_rate": 4.6746978346687536e-06, + "loss": 0.4657, + "step": 13484 + }, + { + "epoch": 0.9913983237759153, + "grad_norm": 0.8107481002807617, + "learning_rate": 4.6746503051593785e-06, + "loss": 0.5471, + "step": 13485 + }, + { + "epoch": 0.9914718423761212, + "grad_norm": 0.7859587669372559, + "learning_rate": 4.674602772419679e-06, + "loss": 0.5099, + "step": 13486 + }, + { + "epoch": 0.9915453609763271, + "grad_norm": 0.7948758602142334, + "learning_rate": 4.674555236449726e-06, + "loss": 0.5129, + "step": 13487 + }, + { + "epoch": 0.9916188795765328, + "grad_norm": 0.8177010416984558, + "learning_rate": 4.67450769724959e-06, + "loss": 0.4897, + "step": 13488 + }, + { + "epoch": 0.9916923981767387, + "grad_norm": 0.812379002571106, + "learning_rate": 4.674460154819343e-06, + "loss": 0.4863, + "step": 13489 + }, + { + "epoch": 0.9917659167769446, + "grad_norm": 0.8438368439674377, + "learning_rate": 4.674412609159054e-06, + "loss": 0.5362, + "step": 13490 + }, + { + "epoch": 0.9918394353771505, + "grad_norm": 0.8364152908325195, + "learning_rate": 4.6743650602687944e-06, + "loss": 0.5347, + "step": 13491 + }, + { + "epoch": 0.9919129539773562, + "grad_norm": 0.8566079139709473, + "learning_rate": 4.674317508148635e-06, + "loss": 0.5295, + "step": 13492 + }, + { + "epoch": 0.9919864725775621, + "grad_norm": 0.8237242698669434, + "learning_rate": 4.674269952798644e-06, + "loss": 0.5227, + "step": 13493 + }, + { + "epoch": 0.992059991177768, + "grad_norm": 0.8497795462608337, + "learning_rate": 4.674222394218896e-06, + "loss": 0.5572, + "step": 13494 + }, + { + "epoch": 0.9921335097779739, + "grad_norm": 0.7748612761497498, + "learning_rate": 4.674174832409459e-06, + "loss": 0.4607, + "step": 13495 + }, + { + "epoch": 0.9922070283781796, + "grad_norm": 0.8269532918930054, + "learning_rate": 4.674127267370404e-06, + "loss": 0.4923, + "step": 13496 + }, + { + "epoch": 0.9922805469783855, + "grad_norm": 0.8064407706260681, + "learning_rate": 4.674079699101803e-06, + "loss": 0.5255, + "step": 13497 + }, + { + "epoch": 0.9923540655785914, + "grad_norm": 0.780075192451477, + "learning_rate": 4.674032127603725e-06, + "loss": 0.5101, + "step": 13498 + }, + { + "epoch": 0.9924275841787973, + "grad_norm": 0.8150351047515869, + "learning_rate": 4.6739845528762415e-06, + "loss": 0.5154, + "step": 13499 + }, + { + "epoch": 0.992501102779003, + "grad_norm": 0.8190025687217712, + "learning_rate": 4.673936974919423e-06, + "loss": 0.4959, + "step": 13500 + }, + { + "epoch": 0.9925746213792089, + "grad_norm": 0.8203639984130859, + "learning_rate": 4.673889393733339e-06, + "loss": 0.4867, + "step": 13501 + }, + { + "epoch": 0.9926481399794148, + "grad_norm": 0.8699906468391418, + "learning_rate": 4.673841809318063e-06, + "loss": 0.5323, + "step": 13502 + }, + { + "epoch": 0.9927216585796207, + "grad_norm": 0.7841641306877136, + "learning_rate": 4.673794221673663e-06, + "loss": 0.494, + "step": 13503 + }, + { + "epoch": 0.9927951771798265, + "grad_norm": 0.8273792862892151, + "learning_rate": 4.673746630800211e-06, + "loss": 0.4882, + "step": 13504 + }, + { + "epoch": 0.9928686957800323, + "grad_norm": 0.7795954346656799, + "learning_rate": 4.673699036697777e-06, + "loss": 0.5075, + "step": 13505 + }, + { + "epoch": 0.9929422143802382, + "grad_norm": 0.8223637938499451, + "learning_rate": 4.673651439366434e-06, + "loss": 0.5118, + "step": 13506 + }, + { + "epoch": 0.9930157329804441, + "grad_norm": 0.8153749704360962, + "learning_rate": 4.6736038388062485e-06, + "loss": 0.5336, + "step": 13507 + }, + { + "epoch": 0.9930892515806499, + "grad_norm": 0.8566752076148987, + "learning_rate": 4.673556235017295e-06, + "loss": 0.544, + "step": 13508 + }, + { + "epoch": 0.9931627701808557, + "grad_norm": 0.8088647127151489, + "learning_rate": 4.673508627999642e-06, + "loss": 0.5129, + "step": 13509 + }, + { + "epoch": 0.9932362887810616, + "grad_norm": 0.7946030497550964, + "learning_rate": 4.673461017753361e-06, + "loss": 0.5267, + "step": 13510 + }, + { + "epoch": 0.9933098073812675, + "grad_norm": 0.7868434190750122, + "learning_rate": 4.673413404278523e-06, + "loss": 0.4853, + "step": 13511 + }, + { + "epoch": 0.9933833259814733, + "grad_norm": 0.8334751129150391, + "learning_rate": 4.673365787575198e-06, + "loss": 0.5728, + "step": 13512 + }, + { + "epoch": 0.9934568445816792, + "grad_norm": 0.8358775973320007, + "learning_rate": 4.673318167643457e-06, + "loss": 0.4777, + "step": 13513 + }, + { + "epoch": 0.993530363181885, + "grad_norm": 0.7791205048561096, + "learning_rate": 4.673270544483372e-06, + "loss": 0.5011, + "step": 13514 + }, + { + "epoch": 0.9936038817820909, + "grad_norm": 0.8145034909248352, + "learning_rate": 4.673222918095012e-06, + "loss": 0.51, + "step": 13515 + }, + { + "epoch": 0.9936774003822967, + "grad_norm": 0.8331305384635925, + "learning_rate": 4.673175288478448e-06, + "loss": 0.4858, + "step": 13516 + }, + { + "epoch": 0.9937509189825026, + "grad_norm": 0.7777766585350037, + "learning_rate": 4.673127655633751e-06, + "loss": 0.4628, + "step": 13517 + }, + { + "epoch": 0.9938244375827084, + "grad_norm": 0.7696402668952942, + "learning_rate": 4.673080019560992e-06, + "loss": 0.4927, + "step": 13518 + }, + { + "epoch": 0.9938979561829143, + "grad_norm": 0.8060415983200073, + "learning_rate": 4.673032380260242e-06, + "loss": 0.494, + "step": 13519 + }, + { + "epoch": 0.9939714747831201, + "grad_norm": 0.8473955988883972, + "learning_rate": 4.672984737731571e-06, + "loss": 0.5318, + "step": 13520 + }, + { + "epoch": 0.994044993383326, + "grad_norm": 0.8002733588218689, + "learning_rate": 4.672937091975051e-06, + "loss": 0.5428, + "step": 13521 + }, + { + "epoch": 0.9941185119835318, + "grad_norm": 0.8204347491264343, + "learning_rate": 4.672889442990751e-06, + "loss": 0.5244, + "step": 13522 + }, + { + "epoch": 0.9941920305837377, + "grad_norm": 0.8302879333496094, + "learning_rate": 4.672841790778742e-06, + "loss": 0.5129, + "step": 13523 + }, + { + "epoch": 0.9942655491839435, + "grad_norm": 0.8295431137084961, + "learning_rate": 4.672794135339097e-06, + "loss": 0.5411, + "step": 13524 + }, + { + "epoch": 0.9943390677841494, + "grad_norm": 0.8444982767105103, + "learning_rate": 4.672746476671885e-06, + "loss": 0.5067, + "step": 13525 + }, + { + "epoch": 0.9944125863843553, + "grad_norm": 0.7999381422996521, + "learning_rate": 4.672698814777177e-06, + "loss": 0.5194, + "step": 13526 + }, + { + "epoch": 0.9944861049845611, + "grad_norm": 0.834953248500824, + "learning_rate": 4.6726511496550435e-06, + "loss": 0.5455, + "step": 13527 + }, + { + "epoch": 0.9945596235847669, + "grad_norm": 0.8057733774185181, + "learning_rate": 4.672603481305556e-06, + "loss": 0.5031, + "step": 13528 + }, + { + "epoch": 0.9946331421849728, + "grad_norm": 0.808690071105957, + "learning_rate": 4.672555809728785e-06, + "loss": 0.5133, + "step": 13529 + }, + { + "epoch": 0.9947066607851787, + "grad_norm": 0.7988542318344116, + "learning_rate": 4.672508134924801e-06, + "loss": 0.5077, + "step": 13530 + }, + { + "epoch": 0.9947801793853845, + "grad_norm": 0.8247075080871582, + "learning_rate": 4.672460456893676e-06, + "loss": 0.4873, + "step": 13531 + }, + { + "epoch": 0.9948536979855903, + "grad_norm": 0.7931106090545654, + "learning_rate": 4.672412775635479e-06, + "loss": 0.5551, + "step": 13532 + }, + { + "epoch": 0.9949272165857962, + "grad_norm": 0.8159436583518982, + "learning_rate": 4.672365091150282e-06, + "loss": 0.533, + "step": 13533 + }, + { + "epoch": 0.9950007351860021, + "grad_norm": 0.8334630727767944, + "learning_rate": 4.672317403438156e-06, + "loss": 0.5583, + "step": 13534 + }, + { + "epoch": 0.995074253786208, + "grad_norm": 0.8295565247535706, + "learning_rate": 4.672269712499171e-06, + "loss": 0.5241, + "step": 13535 + }, + { + "epoch": 0.9951477723864137, + "grad_norm": 0.8178665637969971, + "learning_rate": 4.672222018333399e-06, + "loss": 0.4944, + "step": 13536 + }, + { + "epoch": 0.9952212909866196, + "grad_norm": 0.8341147899627686, + "learning_rate": 4.672174320940909e-06, + "loss": 0.5063, + "step": 13537 + }, + { + "epoch": 0.9952948095868255, + "grad_norm": 0.8246516585350037, + "learning_rate": 4.6721266203217735e-06, + "loss": 0.5207, + "step": 13538 + }, + { + "epoch": 0.9953683281870314, + "grad_norm": 0.8636490106582642, + "learning_rate": 4.672078916476063e-06, + "loss": 0.5763, + "step": 13539 + }, + { + "epoch": 0.9954418467872371, + "grad_norm": 0.8134170174598694, + "learning_rate": 4.672031209403849e-06, + "loss": 0.4864, + "step": 13540 + }, + { + "epoch": 0.995515365387443, + "grad_norm": 0.8106949329376221, + "learning_rate": 4.6719834991052e-06, + "loss": 0.5343, + "step": 13541 + }, + { + "epoch": 0.9955888839876489, + "grad_norm": 0.8218570351600647, + "learning_rate": 4.671935785580189e-06, + "loss": 0.4968, + "step": 13542 + }, + { + "epoch": 0.9956624025878548, + "grad_norm": 0.8185403943061829, + "learning_rate": 4.671888068828887e-06, + "loss": 0.5478, + "step": 13543 + }, + { + "epoch": 0.9957359211880605, + "grad_norm": 0.7817920446395874, + "learning_rate": 4.671840348851363e-06, + "loss": 0.5182, + "step": 13544 + }, + { + "epoch": 0.9958094397882664, + "grad_norm": 0.7635395526885986, + "learning_rate": 4.67179262564769e-06, + "loss": 0.4885, + "step": 13545 + }, + { + "epoch": 0.9958829583884723, + "grad_norm": 0.8326099514961243, + "learning_rate": 4.671744899217938e-06, + "loss": 0.5237, + "step": 13546 + }, + { + "epoch": 0.9959564769886782, + "grad_norm": 0.8046284914016724, + "learning_rate": 4.671697169562177e-06, + "loss": 0.535, + "step": 13547 + }, + { + "epoch": 0.9960299955888839, + "grad_norm": 0.8678742051124573, + "learning_rate": 4.67164943668048e-06, + "loss": 0.5288, + "step": 13548 + }, + { + "epoch": 0.9961035141890898, + "grad_norm": 0.7909377217292786, + "learning_rate": 4.671601700572916e-06, + "loss": 0.5468, + "step": 13549 + }, + { + "epoch": 0.9961770327892957, + "grad_norm": 0.8705161809921265, + "learning_rate": 4.671553961239557e-06, + "loss": 0.5714, + "step": 13550 + }, + { + "epoch": 0.9962505513895016, + "grad_norm": 0.8131377696990967, + "learning_rate": 4.671506218680473e-06, + "loss": 0.5427, + "step": 13551 + }, + { + "epoch": 0.9963240699897074, + "grad_norm": 0.8090256452560425, + "learning_rate": 4.671458472895736e-06, + "loss": 0.4964, + "step": 13552 + }, + { + "epoch": 0.9963975885899132, + "grad_norm": 0.78809654712677, + "learning_rate": 4.671410723885415e-06, + "loss": 0.4774, + "step": 13553 + }, + { + "epoch": 0.9964711071901191, + "grad_norm": 0.8169184923171997, + "learning_rate": 4.671362971649583e-06, + "loss": 0.5053, + "step": 13554 + }, + { + "epoch": 0.996544625790325, + "grad_norm": 0.8384061455726624, + "learning_rate": 4.671315216188311e-06, + "loss": 0.5573, + "step": 13555 + }, + { + "epoch": 0.9966181443905308, + "grad_norm": 0.8469624519348145, + "learning_rate": 4.6712674575016685e-06, + "loss": 0.539, + "step": 13556 + }, + { + "epoch": 0.9966916629907366, + "grad_norm": 0.7899113893508911, + "learning_rate": 4.6712196955897275e-06, + "loss": 0.5178, + "step": 13557 + }, + { + "epoch": 0.9967651815909425, + "grad_norm": 0.8228161334991455, + "learning_rate": 4.6711719304525586e-06, + "loss": 0.5133, + "step": 13558 + }, + { + "epoch": 0.9968387001911484, + "grad_norm": 0.8512375354766846, + "learning_rate": 4.671124162090231e-06, + "loss": 0.5698, + "step": 13559 + }, + { + "epoch": 0.9969122187913542, + "grad_norm": 0.7572901248931885, + "learning_rate": 4.671076390502819e-06, + "loss": 0.4875, + "step": 13560 + }, + { + "epoch": 0.99698573739156, + "grad_norm": 0.844639241695404, + "learning_rate": 4.671028615690392e-06, + "loss": 0.4831, + "step": 13561 + }, + { + "epoch": 0.9970592559917659, + "grad_norm": 0.8449146747589111, + "learning_rate": 4.6709808376530206e-06, + "loss": 0.5426, + "step": 13562 + }, + { + "epoch": 0.9971327745919718, + "grad_norm": 0.7929855585098267, + "learning_rate": 4.670933056390775e-06, + "loss": 0.5111, + "step": 13563 + }, + { + "epoch": 0.9972062931921776, + "grad_norm": 0.8942828178405762, + "learning_rate": 4.670885271903729e-06, + "loss": 0.5354, + "step": 13564 + }, + { + "epoch": 0.9972798117923835, + "grad_norm": 0.8306213021278381, + "learning_rate": 4.670837484191951e-06, + "loss": 0.4989, + "step": 13565 + }, + { + "epoch": 0.9973533303925893, + "grad_norm": 0.8286524415016174, + "learning_rate": 4.670789693255512e-06, + "loss": 0.5022, + "step": 13566 + }, + { + "epoch": 0.9974268489927952, + "grad_norm": 0.8198011517524719, + "learning_rate": 4.6707418990944845e-06, + "loss": 0.5378, + "step": 13567 + }, + { + "epoch": 0.997500367593001, + "grad_norm": 0.8270484209060669, + "learning_rate": 4.670694101708939e-06, + "loss": 0.546, + "step": 13568 + }, + { + "epoch": 0.9975738861932069, + "grad_norm": 0.8300146460533142, + "learning_rate": 4.670646301098945e-06, + "loss": 0.5531, + "step": 13569 + }, + { + "epoch": 0.9976474047934127, + "grad_norm": 0.7913962602615356, + "learning_rate": 4.670598497264576e-06, + "loss": 0.5058, + "step": 13570 + }, + { + "epoch": 0.9977209233936186, + "grad_norm": 0.8054326772689819, + "learning_rate": 4.670550690205902e-06, + "loss": 0.5089, + "step": 13571 + }, + { + "epoch": 0.9977944419938244, + "grad_norm": 0.8750394582748413, + "learning_rate": 4.6705028799229936e-06, + "loss": 0.4977, + "step": 13572 + }, + { + "epoch": 0.9978679605940303, + "grad_norm": 0.8137717843055725, + "learning_rate": 4.670455066415922e-06, + "loss": 0.5041, + "step": 13573 + }, + { + "epoch": 0.9979414791942361, + "grad_norm": 0.8075687289237976, + "learning_rate": 4.670407249684758e-06, + "loss": 0.5392, + "step": 13574 + }, + { + "epoch": 0.998014997794442, + "grad_norm": 0.8080381155014038, + "learning_rate": 4.670359429729573e-06, + "loss": 0.5379, + "step": 13575 + }, + { + "epoch": 0.9980885163946478, + "grad_norm": 0.8252204060554504, + "learning_rate": 4.6703116065504375e-06, + "loss": 0.5518, + "step": 13576 + }, + { + "epoch": 0.9981620349948537, + "grad_norm": 0.8114098906517029, + "learning_rate": 4.670263780147424e-06, + "loss": 0.541, + "step": 13577 + }, + { + "epoch": 0.9982355535950596, + "grad_norm": 0.8660712242126465, + "learning_rate": 4.670215950520601e-06, + "loss": 0.5433, + "step": 13578 + }, + { + "epoch": 0.9983090721952654, + "grad_norm": 0.8064799904823303, + "learning_rate": 4.670168117670042e-06, + "loss": 0.5633, + "step": 13579 + }, + { + "epoch": 0.9983825907954712, + "grad_norm": 0.8030191659927368, + "learning_rate": 4.670120281595817e-06, + "loss": 0.4809, + "step": 13580 + }, + { + "epoch": 0.9984561093956771, + "grad_norm": 0.8037552833557129, + "learning_rate": 4.670072442297997e-06, + "loss": 0.5103, + "step": 13581 + }, + { + "epoch": 0.998529627995883, + "grad_norm": 0.7873378396034241, + "learning_rate": 4.670024599776654e-06, + "loss": 0.5284, + "step": 13582 + }, + { + "epoch": 0.9986031465960888, + "grad_norm": 0.8293755650520325, + "learning_rate": 4.669976754031857e-06, + "loss": 0.5168, + "step": 13583 + }, + { + "epoch": 0.9986766651962946, + "grad_norm": 0.824741005897522, + "learning_rate": 4.669928905063678e-06, + "loss": 0.5317, + "step": 13584 + }, + { + "epoch": 0.9987501837965005, + "grad_norm": 0.8411204218864441, + "learning_rate": 4.669881052872191e-06, + "loss": 0.5453, + "step": 13585 + }, + { + "epoch": 0.9988237023967064, + "grad_norm": 0.793535590171814, + "learning_rate": 4.669833197457462e-06, + "loss": 0.505, + "step": 13586 + }, + { + "epoch": 0.9988972209969122, + "grad_norm": 0.7862092852592468, + "learning_rate": 4.669785338819566e-06, + "loss": 0.5367, + "step": 13587 + }, + { + "epoch": 0.998970739597118, + "grad_norm": 0.7974005937576294, + "learning_rate": 4.6697374769585715e-06, + "loss": 0.485, + "step": 13588 + }, + { + "epoch": 0.9990442581973239, + "grad_norm": 0.8138454556465149, + "learning_rate": 4.669689611874551e-06, + "loss": 0.4798, + "step": 13589 + }, + { + "epoch": 0.9991177767975298, + "grad_norm": 0.8455530405044556, + "learning_rate": 4.669641743567577e-06, + "loss": 0.6107, + "step": 13590 + }, + { + "epoch": 0.9991912953977357, + "grad_norm": 0.7789888381958008, + "learning_rate": 4.6695938720377175e-06, + "loss": 0.4786, + "step": 13591 + }, + { + "epoch": 0.9992648139979414, + "grad_norm": 0.8012475967407227, + "learning_rate": 4.669545997285045e-06, + "loss": 0.5397, + "step": 13592 + }, + { + "epoch": 0.9993383325981473, + "grad_norm": 0.8598352670669556, + "learning_rate": 4.669498119309632e-06, + "loss": 0.5265, + "step": 13593 + }, + { + "epoch": 0.9994118511983532, + "grad_norm": 0.8088831901550293, + "learning_rate": 4.669450238111547e-06, + "loss": 0.4949, + "step": 13594 + }, + { + "epoch": 0.9994853697985591, + "grad_norm": 0.851161777973175, + "learning_rate": 4.669402353690863e-06, + "loss": 0.5537, + "step": 13595 + }, + { + "epoch": 0.9995588883987648, + "grad_norm": 0.822172224521637, + "learning_rate": 4.669354466047651e-06, + "loss": 0.517, + "step": 13596 + }, + { + "epoch": 0.9996324069989707, + "grad_norm": 0.8157437443733215, + "learning_rate": 4.669306575181981e-06, + "loss": 0.5616, + "step": 13597 + }, + { + "epoch": 0.9997059255991766, + "grad_norm": 0.8154730200767517, + "learning_rate": 4.669258681093926e-06, + "loss": 0.4917, + "step": 13598 + }, + { + "epoch": 0.9997794441993825, + "grad_norm": 0.7948343753814697, + "learning_rate": 4.6692107837835546e-06, + "loss": 0.5256, + "step": 13599 + }, + { + "epoch": 0.9998529627995882, + "grad_norm": 0.8494049906730652, + "learning_rate": 4.66916288325094e-06, + "loss": 0.5791, + "step": 13600 + }, + { + "epoch": 0.9999264813997941, + "grad_norm": 0.8308073878288269, + "learning_rate": 4.669114979496153e-06, + "loss": 0.55, + "step": 13601 + }, + { + "epoch": 1.0, + "grad_norm": 0.8458868861198425, + "learning_rate": 4.669067072519264e-06, + "loss": 0.5445, + "step": 13602 + } + ], + "logging_steps": 1, + "max_steps": 81612, + "num_input_tokens_seen": 0, + "num_train_epochs": 6, + "save_steps": 13602, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.349622076832534e+19, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}