{ "best_global_step": 3689, "best_metric": 62.08277152527448, "best_model_checkpoint": "whisper-medium-quantized-lora/checkpoints/checkpoint-3689", "epoch": 0.49990260097737804, "eval_steps": 3689, "global_step": 7378, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.775584182398726e-05, "grad_norm": 11.2294282913208, "learning_rate": 0.0, "loss": 3.7623, "step": 1 }, { "epoch": 0.00013551168364797452, "grad_norm": 27.403789520263672, "learning_rate": 5.333333333333333e-07, "loss": 3.7509, "step": 2 }, { "epoch": 0.0002032675254719618, "grad_norm": 29.29781150817871, "learning_rate": 1.0666666666666667e-06, "loss": 4.1779, "step": 3 }, { "epoch": 0.00027102336729594904, "grad_norm": 5.965636730194092, "learning_rate": 1.6000000000000001e-06, "loss": 3.0093, "step": 4 }, { "epoch": 0.0003387792091199363, "grad_norm": 7.786504745483398, "learning_rate": 2.1333333333333334e-06, "loss": 3.0575, "step": 5 }, { "epoch": 0.0004065350509439236, "grad_norm": 21.127445220947266, "learning_rate": 2.666666666666667e-06, "loss": 2.7851, "step": 6 }, { "epoch": 0.00047429089276791086, "grad_norm": 5.899219989776611, "learning_rate": 3.2000000000000003e-06, "loss": 2.6554, "step": 7 }, { "epoch": 0.0005420467345918981, "grad_norm": 29.54319953918457, "learning_rate": 3.7333333333333337e-06, "loss": 2.561, "step": 8 }, { "epoch": 0.0006098025764158854, "grad_norm": 16.01921844482422, "learning_rate": 4.266666666666667e-06, "loss": 3.1085, "step": 9 }, { "epoch": 0.0006775584182398726, "grad_norm": 6.624917030334473, "learning_rate": 4.800000000000001e-06, "loss": 2.8822, "step": 10 }, { "epoch": 0.0007453142600638598, "grad_norm": 8.62118148803711, "learning_rate": 5.333333333333334e-06, "loss": 3.8258, "step": 11 }, { "epoch": 0.0008130701018878472, "grad_norm": 6.586535453796387, "learning_rate": 5.866666666666667e-06, "loss": 3.0811, "step": 12 }, { "epoch": 0.0008808259437118344, "grad_norm": 22.17951011657715, "learning_rate": 6.4000000000000006e-06, "loss": 3.4985, "step": 13 }, { "epoch": 0.0009485817855358217, "grad_norm": 14.850955963134766, "learning_rate": 6.933333333333334e-06, "loss": 4.1996, "step": 14 }, { "epoch": 0.0010163376273598088, "grad_norm": 62.20494842529297, "learning_rate": 7.4666666666666675e-06, "loss": 3.4069, "step": 15 }, { "epoch": 0.0010840934691837961, "grad_norm": 6.534540176391602, "learning_rate": 8.000000000000001e-06, "loss": 2.9703, "step": 16 }, { "epoch": 0.0011518493110077835, "grad_norm": 8.012496948242188, "learning_rate": 8.533333333333334e-06, "loss": 2.8364, "step": 17 }, { "epoch": 0.0012196051528317708, "grad_norm": 9.2630615234375, "learning_rate": 9.066666666666667e-06, "loss": 2.004, "step": 18 }, { "epoch": 0.001287360994655758, "grad_norm": 3.8082871437072754, "learning_rate": 9.600000000000001e-06, "loss": 2.1368, "step": 19 }, { "epoch": 0.0013551168364797452, "grad_norm": 5.680924415588379, "learning_rate": 1.0133333333333333e-05, "loss": 2.0163, "step": 20 }, { "epoch": 0.0014228726783037326, "grad_norm": 34.47060012817383, "learning_rate": 1.0666666666666667e-05, "loss": 3.1532, "step": 21 }, { "epoch": 0.0014906285201277197, "grad_norm": 6.08674430847168, "learning_rate": 1.1200000000000001e-05, "loss": 2.9393, "step": 22 }, { "epoch": 0.001558384361951707, "grad_norm": 8.805831909179688, "learning_rate": 1.1733333333333333e-05, "loss": 2.7824, "step": 23 }, { "epoch": 0.0016261402037756943, "grad_norm": 10.128340721130371, "learning_rate": 1.2266666666666667e-05, "loss": 2.9302, "step": 24 }, { "epoch": 0.0016938960455996814, "grad_norm": 5.021735668182373, "learning_rate": 1.2800000000000001e-05, "loss": 2.8988, "step": 25 }, { "epoch": 0.0017616518874236688, "grad_norm": 27.6428165435791, "learning_rate": 1.3333333333333333e-05, "loss": 4.0013, "step": 26 }, { "epoch": 0.001829407729247656, "grad_norm": 5.653388500213623, "learning_rate": 1.3866666666666667e-05, "loss": 3.0088, "step": 27 }, { "epoch": 0.0018971635710716434, "grad_norm": 8.216667175292969, "learning_rate": 1.44e-05, "loss": 2.8235, "step": 28 }, { "epoch": 0.0019649194128956307, "grad_norm": 5.633044719696045, "learning_rate": 1.4933333333333335e-05, "loss": 2.5359, "step": 29 }, { "epoch": 0.0020326752547196176, "grad_norm": 6.593887805938721, "learning_rate": 1.546666666666667e-05, "loss": 2.307, "step": 30 }, { "epoch": 0.002100431096543605, "grad_norm": 16.731855392456055, "learning_rate": 1.6000000000000003e-05, "loss": 2.3399, "step": 31 }, { "epoch": 0.0021681869383675923, "grad_norm": 15.881052017211914, "learning_rate": 1.6533333333333333e-05, "loss": 2.6155, "step": 32 }, { "epoch": 0.0022359427801915796, "grad_norm": 14.086976051330566, "learning_rate": 1.7066666666666667e-05, "loss": 2.3971, "step": 33 }, { "epoch": 0.002303698622015567, "grad_norm": 36.32572937011719, "learning_rate": 1.76e-05, "loss": 3.217, "step": 34 }, { "epoch": 0.0023714544638395543, "grad_norm": 8.181658744812012, "learning_rate": 1.8133333333333335e-05, "loss": 2.0689, "step": 35 }, { "epoch": 0.0024392103056635416, "grad_norm": 5.850778579711914, "learning_rate": 1.866666666666667e-05, "loss": 3.0411, "step": 36 }, { "epoch": 0.0025069661474875285, "grad_norm": 4.756629943847656, "learning_rate": 1.9200000000000003e-05, "loss": 2.7895, "step": 37 }, { "epoch": 0.002574721989311516, "grad_norm": 4.575255870819092, "learning_rate": 1.9733333333333333e-05, "loss": 2.6805, "step": 38 }, { "epoch": 0.002642477831135503, "grad_norm": 4.238344669342041, "learning_rate": 2.0266666666666667e-05, "loss": 2.2225, "step": 39 }, { "epoch": 0.0027102336729594905, "grad_norm": 5.039498805999756, "learning_rate": 2.08e-05, "loss": 2.9887, "step": 40 }, { "epoch": 0.002777989514783478, "grad_norm": 3.856889247894287, "learning_rate": 2.1333333333333335e-05, "loss": 2.4299, "step": 41 }, { "epoch": 0.002845745356607465, "grad_norm": 3.5066351890563965, "learning_rate": 2.186666666666667e-05, "loss": 2.237, "step": 42 }, { "epoch": 0.0029135011984314525, "grad_norm": 4.3462677001953125, "learning_rate": 2.2400000000000002e-05, "loss": 2.5201, "step": 43 }, { "epoch": 0.0029812570402554394, "grad_norm": 4.263307094573975, "learning_rate": 2.2933333333333333e-05, "loss": 2.5842, "step": 44 }, { "epoch": 0.0030490128820794267, "grad_norm": 4.542484760284424, "learning_rate": 2.3466666666666667e-05, "loss": 2.3541, "step": 45 }, { "epoch": 0.003116768723903414, "grad_norm": 3.7715518474578857, "learning_rate": 2.4e-05, "loss": 2.206, "step": 46 }, { "epoch": 0.0031845245657274013, "grad_norm": 4.391230583190918, "learning_rate": 2.4533333333333334e-05, "loss": 2.4971, "step": 47 }, { "epoch": 0.0032522804075513887, "grad_norm": 4.592225551605225, "learning_rate": 2.5066666666666665e-05, "loss": 2.7705, "step": 48 }, { "epoch": 0.003320036249375376, "grad_norm": 4.211614608764648, "learning_rate": 2.5600000000000002e-05, "loss": 2.6184, "step": 49 }, { "epoch": 0.003387792091199363, "grad_norm": 3.9807486534118652, "learning_rate": 2.6133333333333333e-05, "loss": 2.3636, "step": 50 }, { "epoch": 0.00345554793302335, "grad_norm": 3.6588141918182373, "learning_rate": 2.6666666666666667e-05, "loss": 1.8357, "step": 51 }, { "epoch": 0.0035233037748473375, "grad_norm": 6.977685928344727, "learning_rate": 2.7200000000000004e-05, "loss": 2.9069, "step": 52 }, { "epoch": 0.003591059616671325, "grad_norm": 3.890204906463623, "learning_rate": 2.7733333333333334e-05, "loss": 2.2357, "step": 53 }, { "epoch": 0.003658815458495312, "grad_norm": 3.3455967903137207, "learning_rate": 2.8266666666666668e-05, "loss": 1.7488, "step": 54 }, { "epoch": 0.0037265713003192995, "grad_norm": 3.685267686843872, "learning_rate": 2.88e-05, "loss": 1.9956, "step": 55 }, { "epoch": 0.003794327142143287, "grad_norm": 4.967723846435547, "learning_rate": 2.9333333333333336e-05, "loss": 2.4638, "step": 56 }, { "epoch": 0.0038620829839672737, "grad_norm": 3.6209311485290527, "learning_rate": 2.986666666666667e-05, "loss": 1.8269, "step": 57 }, { "epoch": 0.0039298388257912615, "grad_norm": 4.622175216674805, "learning_rate": 3.04e-05, "loss": 2.6827, "step": 58 }, { "epoch": 0.003997594667615249, "grad_norm": 4.11090612411499, "learning_rate": 3.093333333333334e-05, "loss": 2.2083, "step": 59 }, { "epoch": 0.004065350509439235, "grad_norm": 3.5581538677215576, "learning_rate": 3.146666666666667e-05, "loss": 1.9052, "step": 60 }, { "epoch": 0.004133106351263223, "grad_norm": 3.498354196548462, "learning_rate": 3.2000000000000005e-05, "loss": 1.7929, "step": 61 }, { "epoch": 0.00420086219308721, "grad_norm": 5.344475746154785, "learning_rate": 3.253333333333333e-05, "loss": 2.0024, "step": 62 }, { "epoch": 0.004268618034911197, "grad_norm": 4.66567325592041, "learning_rate": 3.3066666666666666e-05, "loss": 2.7599, "step": 63 }, { "epoch": 0.004336373876735185, "grad_norm": 3.411456346511841, "learning_rate": 3.3600000000000004e-05, "loss": 1.9109, "step": 64 }, { "epoch": 0.004404129718559172, "grad_norm": 4.811199188232422, "learning_rate": 3.4133333333333334e-05, "loss": 2.5342, "step": 65 }, { "epoch": 0.004471885560383159, "grad_norm": 3.51120662689209, "learning_rate": 3.466666666666667e-05, "loss": 1.8567, "step": 66 }, { "epoch": 0.004539641402207147, "grad_norm": 5.19026517868042, "learning_rate": 3.52e-05, "loss": 2.4372, "step": 67 }, { "epoch": 0.004607397244031134, "grad_norm": 3.779278516769409, "learning_rate": 3.573333333333333e-05, "loss": 1.8131, "step": 68 }, { "epoch": 0.004675153085855121, "grad_norm": 2.883894443511963, "learning_rate": 3.626666666666667e-05, "loss": 1.4007, "step": 69 }, { "epoch": 0.0047429089276791086, "grad_norm": 4.038563251495361, "learning_rate": 3.68e-05, "loss": 1.7251, "step": 70 }, { "epoch": 0.004810664769503096, "grad_norm": 4.436848163604736, "learning_rate": 3.733333333333334e-05, "loss": 2.0543, "step": 71 }, { "epoch": 0.004878420611327083, "grad_norm": 3.919203996658325, "learning_rate": 3.786666666666667e-05, "loss": 1.8566, "step": 72 }, { "epoch": 0.0049461764531510705, "grad_norm": 3.197636604309082, "learning_rate": 3.8400000000000005e-05, "loss": 1.7718, "step": 73 }, { "epoch": 0.005013932294975057, "grad_norm": 4.205629348754883, "learning_rate": 3.8933333333333336e-05, "loss": 2.2516, "step": 74 }, { "epoch": 0.005081688136799044, "grad_norm": 4.737552642822266, "learning_rate": 3.9466666666666666e-05, "loss": 2.277, "step": 75 }, { "epoch": 0.005149443978623032, "grad_norm": 4.187109470367432, "learning_rate": 4e-05, "loss": 2.0053, "step": 76 }, { "epoch": 0.005217199820447019, "grad_norm": 4.559085369110107, "learning_rate": 4.0533333333333334e-05, "loss": 1.9662, "step": 77 }, { "epoch": 0.005284955662271006, "grad_norm": 4.184353351593018, "learning_rate": 4.106666666666667e-05, "loss": 2.2256, "step": 78 }, { "epoch": 0.005352711504094994, "grad_norm": 3.842942714691162, "learning_rate": 4.16e-05, "loss": 1.7252, "step": 79 }, { "epoch": 0.005420467345918981, "grad_norm": 4.1036763191223145, "learning_rate": 4.213333333333334e-05, "loss": 2.1751, "step": 80 }, { "epoch": 0.005488223187742968, "grad_norm": 2.9933395385742188, "learning_rate": 4.266666666666667e-05, "loss": 1.3973, "step": 81 }, { "epoch": 0.005555979029566956, "grad_norm": 4.0848565101623535, "learning_rate": 4.32e-05, "loss": 1.7059, "step": 82 }, { "epoch": 0.005623734871390943, "grad_norm": 3.6175732612609863, "learning_rate": 4.373333333333334e-05, "loss": 1.7883, "step": 83 }, { "epoch": 0.00569149071321493, "grad_norm": 5.00525426864624, "learning_rate": 4.426666666666667e-05, "loss": 2.4767, "step": 84 }, { "epoch": 0.005759246555038918, "grad_norm": 3.891401767730713, "learning_rate": 4.4800000000000005e-05, "loss": 1.8256, "step": 85 }, { "epoch": 0.005827002396862905, "grad_norm": 3.9124996662139893, "learning_rate": 4.5333333333333335e-05, "loss": 2.0405, "step": 86 }, { "epoch": 0.005894758238686891, "grad_norm": 3.7935447692871094, "learning_rate": 4.5866666666666666e-05, "loss": 1.9211, "step": 87 }, { "epoch": 0.005962514080510879, "grad_norm": 3.410778045654297, "learning_rate": 4.64e-05, "loss": 1.6252, "step": 88 }, { "epoch": 0.006030269922334866, "grad_norm": 3.507871389389038, "learning_rate": 4.6933333333333333e-05, "loss": 1.7026, "step": 89 }, { "epoch": 0.006098025764158853, "grad_norm": 3.449367046356201, "learning_rate": 4.746666666666667e-05, "loss": 1.4691, "step": 90 }, { "epoch": 0.006165781605982841, "grad_norm": 4.297393798828125, "learning_rate": 4.8e-05, "loss": 2.1289, "step": 91 }, { "epoch": 0.006233537447806828, "grad_norm": 3.728422164916992, "learning_rate": 4.853333333333334e-05, "loss": 1.6801, "step": 92 }, { "epoch": 0.006301293289630815, "grad_norm": 4.284979343414307, "learning_rate": 4.906666666666667e-05, "loss": 1.8669, "step": 93 }, { "epoch": 0.006369049131454803, "grad_norm": 2.7867817878723145, "learning_rate": 4.96e-05, "loss": 1.2686, "step": 94 }, { "epoch": 0.00643680497327879, "grad_norm": 4.663074016571045, "learning_rate": 5.013333333333333e-05, "loss": 1.5725, "step": 95 }, { "epoch": 0.006504560815102777, "grad_norm": 3.4489972591400146, "learning_rate": 5.0666666666666674e-05, "loss": 1.5573, "step": 96 }, { "epoch": 0.006572316656926765, "grad_norm": 3.902392864227295, "learning_rate": 5.1200000000000004e-05, "loss": 1.1236, "step": 97 }, { "epoch": 0.006640072498750752, "grad_norm": 3.595935821533203, "learning_rate": 5.1733333333333335e-05, "loss": 1.4469, "step": 98 }, { "epoch": 0.006707828340574739, "grad_norm": 3.480823040008545, "learning_rate": 5.2266666666666665e-05, "loss": 1.6294, "step": 99 }, { "epoch": 0.006775584182398726, "grad_norm": 5.669904708862305, "learning_rate": 5.28e-05, "loss": 2.3132, "step": 100 }, { "epoch": 0.006843340024222713, "grad_norm": 3.375321626663208, "learning_rate": 5.333333333333333e-05, "loss": 1.5394, "step": 101 }, { "epoch": 0.0069110958660467, "grad_norm": 4.202518463134766, "learning_rate": 5.3866666666666664e-05, "loss": 1.9353, "step": 102 }, { "epoch": 0.006978851707870688, "grad_norm": 5.418217182159424, "learning_rate": 5.440000000000001e-05, "loss": 1.8818, "step": 103 }, { "epoch": 0.007046607549694675, "grad_norm": 3.229679584503174, "learning_rate": 5.493333333333334e-05, "loss": 1.5784, "step": 104 }, { "epoch": 0.007114363391518662, "grad_norm": 5.178295612335205, "learning_rate": 5.546666666666667e-05, "loss": 2.0211, "step": 105 }, { "epoch": 0.00718211923334265, "grad_norm": 3.9106807708740234, "learning_rate": 5.6000000000000006e-05, "loss": 1.7349, "step": 106 }, { "epoch": 0.007249875075166637, "grad_norm": 694.2337646484375, "learning_rate": 5.6533333333333336e-05, "loss": 1.3922, "step": 107 }, { "epoch": 0.007317630916990624, "grad_norm": 3.4870429039001465, "learning_rate": 5.706666666666667e-05, "loss": 1.5259, "step": 108 }, { "epoch": 0.007385386758814612, "grad_norm": 3.41497802734375, "learning_rate": 5.76e-05, "loss": 1.262, "step": 109 }, { "epoch": 0.007453142600638599, "grad_norm": 4.137330532073975, "learning_rate": 5.813333333333334e-05, "loss": 1.6849, "step": 110 }, { "epoch": 0.007520898442462586, "grad_norm": 3.933605909347534, "learning_rate": 5.866666666666667e-05, "loss": 1.604, "step": 111 }, { "epoch": 0.007588654284286574, "grad_norm": 4.345171928405762, "learning_rate": 5.92e-05, "loss": 1.7677, "step": 112 }, { "epoch": 0.007656410126110561, "grad_norm": 4.059954643249512, "learning_rate": 5.973333333333334e-05, "loss": 1.5128, "step": 113 }, { "epoch": 0.0077241659679345475, "grad_norm": 4.287632465362549, "learning_rate": 6.026666666666667e-05, "loss": 1.7742, "step": 114 }, { "epoch": 0.007791921809758535, "grad_norm": 4.052312850952148, "learning_rate": 6.08e-05, "loss": 1.3687, "step": 115 }, { "epoch": 0.007859677651582523, "grad_norm": 3.244309425354004, "learning_rate": 6.133333333333334e-05, "loss": 1.3108, "step": 116 }, { "epoch": 0.00792743349340651, "grad_norm": 3.454943895339966, "learning_rate": 6.186666666666668e-05, "loss": 1.4112, "step": 117 }, { "epoch": 0.007995189335230498, "grad_norm": 5.388648509979248, "learning_rate": 6.24e-05, "loss": 2.002, "step": 118 }, { "epoch": 0.008062945177054484, "grad_norm": 4.050853252410889, "learning_rate": 6.293333333333334e-05, "loss": 1.454, "step": 119 }, { "epoch": 0.00813070101887847, "grad_norm": 4.374539375305176, "learning_rate": 6.346666666666667e-05, "loss": 1.5503, "step": 120 }, { "epoch": 0.008198456860702459, "grad_norm": 5.6923933029174805, "learning_rate": 6.400000000000001e-05, "loss": 1.9256, "step": 121 }, { "epoch": 0.008266212702526445, "grad_norm": 4.457003593444824, "learning_rate": 6.453333333333333e-05, "loss": 1.6966, "step": 122 }, { "epoch": 0.008333968544350433, "grad_norm": 5.176385879516602, "learning_rate": 6.506666666666666e-05, "loss": 1.7484, "step": 123 }, { "epoch": 0.00840172438617442, "grad_norm": 3.6228065490722656, "learning_rate": 6.560000000000001e-05, "loss": 1.1885, "step": 124 }, { "epoch": 0.008469480227998408, "grad_norm": 4.48412561416626, "learning_rate": 6.613333333333333e-05, "loss": 1.2613, "step": 125 }, { "epoch": 0.008537236069822395, "grad_norm": 4.5856194496154785, "learning_rate": 6.666666666666667e-05, "loss": 1.3579, "step": 126 }, { "epoch": 0.008604991911646383, "grad_norm": 4.485749244689941, "learning_rate": 6.720000000000001e-05, "loss": 1.2942, "step": 127 }, { "epoch": 0.00867274775347037, "grad_norm": 4.471734046936035, "learning_rate": 6.773333333333333e-05, "loss": 1.2102, "step": 128 }, { "epoch": 0.008740503595294357, "grad_norm": 3.6487972736358643, "learning_rate": 6.826666666666667e-05, "loss": 1.0166, "step": 129 }, { "epoch": 0.008808259437118344, "grad_norm": 4.2145304679870605, "learning_rate": 6.879999999999999e-05, "loss": 1.2544, "step": 130 }, { "epoch": 0.008876015278942332, "grad_norm": 3.7923190593719482, "learning_rate": 6.933333333333334e-05, "loss": 0.9969, "step": 131 }, { "epoch": 0.008943771120766318, "grad_norm": 5.3202080726623535, "learning_rate": 6.986666666666667e-05, "loss": 1.3898, "step": 132 }, { "epoch": 0.009011526962590307, "grad_norm": 3.911545515060425, "learning_rate": 7.04e-05, "loss": 1.0362, "step": 133 }, { "epoch": 0.009079282804414293, "grad_norm": 4.085530757904053, "learning_rate": 7.093333333333334e-05, "loss": 1.0703, "step": 134 }, { "epoch": 0.00914703864623828, "grad_norm": 3.54276180267334, "learning_rate": 7.146666666666666e-05, "loss": 0.7889, "step": 135 }, { "epoch": 0.009214794488062268, "grad_norm": 3.69317364692688, "learning_rate": 7.2e-05, "loss": 0.9805, "step": 136 }, { "epoch": 0.009282550329886254, "grad_norm": 4.455929756164551, "learning_rate": 7.253333333333334e-05, "loss": 1.0272, "step": 137 }, { "epoch": 0.009350306171710242, "grad_norm": 3.0266013145446777, "learning_rate": 7.306666666666668e-05, "loss": 0.6603, "step": 138 }, { "epoch": 0.009418062013534229, "grad_norm": 3.714937925338745, "learning_rate": 7.36e-05, "loss": 0.8242, "step": 139 }, { "epoch": 0.009485817855358217, "grad_norm": 4.581395626068115, "learning_rate": 7.413333333333334e-05, "loss": 1.1374, "step": 140 }, { "epoch": 0.009553573697182204, "grad_norm": 3.572059154510498, "learning_rate": 7.466666666666667e-05, "loss": 0.8379, "step": 141 }, { "epoch": 0.009621329539006192, "grad_norm": 4.542558193206787, "learning_rate": 7.52e-05, "loss": 0.8601, "step": 142 }, { "epoch": 0.009689085380830178, "grad_norm": 4.202064037322998, "learning_rate": 7.573333333333334e-05, "loss": 1.0706, "step": 143 }, { "epoch": 0.009756841222654166, "grad_norm": 4.403738498687744, "learning_rate": 7.626666666666667e-05, "loss": 1.0262, "step": 144 }, { "epoch": 0.009824597064478153, "grad_norm": 4.19655704498291, "learning_rate": 7.680000000000001e-05, "loss": 0.926, "step": 145 }, { "epoch": 0.009892352906302141, "grad_norm": 3.5180933475494385, "learning_rate": 7.733333333333333e-05, "loss": 0.8757, "step": 146 }, { "epoch": 0.009960108748126128, "grad_norm": 22.808645248413086, "learning_rate": 7.786666666666667e-05, "loss": 1.27, "step": 147 }, { "epoch": 0.010027864589950114, "grad_norm": 3.7410507202148438, "learning_rate": 7.840000000000001e-05, "loss": 1.0631, "step": 148 }, { "epoch": 0.010095620431774102, "grad_norm": 3.299713611602783, "learning_rate": 7.893333333333333e-05, "loss": 0.8575, "step": 149 }, { "epoch": 0.010163376273598089, "grad_norm": 2.661968231201172, "learning_rate": 7.946666666666667e-05, "loss": 0.7436, "step": 150 }, { "epoch": 0.010231132115422077, "grad_norm": 4.543766021728516, "learning_rate": 8e-05, "loss": 1.0868, "step": 151 }, { "epoch": 0.010298887957246063, "grad_norm": 5.307805061340332, "learning_rate": 8.053333333333334e-05, "loss": 1.112, "step": 152 }, { "epoch": 0.010366643799070051, "grad_norm": 4.216804504394531, "learning_rate": 8.106666666666667e-05, "loss": 0.9197, "step": 153 }, { "epoch": 0.010434399640894038, "grad_norm": 4.234525203704834, "learning_rate": 8.16e-05, "loss": 0.9975, "step": 154 }, { "epoch": 0.010502155482718026, "grad_norm": 3.536555051803589, "learning_rate": 8.213333333333334e-05, "loss": 0.8271, "step": 155 }, { "epoch": 0.010569911324542013, "grad_norm": 4.309999465942383, "learning_rate": 8.266666666666667e-05, "loss": 0.9115, "step": 156 }, { "epoch": 0.010637667166366, "grad_norm": 2.9323556423187256, "learning_rate": 8.32e-05, "loss": 0.8719, "step": 157 }, { "epoch": 0.010705423008189987, "grad_norm": 3.687777519226074, "learning_rate": 8.373333333333334e-05, "loss": 1.0331, "step": 158 }, { "epoch": 0.010773178850013975, "grad_norm": 3.153407573699951, "learning_rate": 8.426666666666668e-05, "loss": 0.8971, "step": 159 }, { "epoch": 0.010840934691837962, "grad_norm": 4.415497779846191, "learning_rate": 8.48e-05, "loss": 1.2505, "step": 160 }, { "epoch": 0.010908690533661948, "grad_norm": 3.849696636199951, "learning_rate": 8.533333333333334e-05, "loss": 1.0184, "step": 161 }, { "epoch": 0.010976446375485937, "grad_norm": 3.315385580062866, "learning_rate": 8.586666666666668e-05, "loss": 0.7916, "step": 162 }, { "epoch": 0.011044202217309923, "grad_norm": 3.611583948135376, "learning_rate": 8.64e-05, "loss": 1.0153, "step": 163 }, { "epoch": 0.011111958059133911, "grad_norm": 4.0692138671875, "learning_rate": 8.693333333333334e-05, "loss": 1.1156, "step": 164 }, { "epoch": 0.011179713900957898, "grad_norm": 3.334744691848755, "learning_rate": 8.746666666666667e-05, "loss": 0.7543, "step": 165 }, { "epoch": 0.011247469742781886, "grad_norm": 3.654917001724243, "learning_rate": 8.800000000000001e-05, "loss": 0.8977, "step": 166 }, { "epoch": 0.011315225584605872, "grad_norm": 4.702078342437744, "learning_rate": 8.853333333333333e-05, "loss": 1.1895, "step": 167 }, { "epoch": 0.01138298142642986, "grad_norm": 4.290605545043945, "learning_rate": 8.906666666666667e-05, "loss": 1.0402, "step": 168 }, { "epoch": 0.011450737268253847, "grad_norm": 3.28179931640625, "learning_rate": 8.960000000000001e-05, "loss": 0.7626, "step": 169 }, { "epoch": 0.011518493110077835, "grad_norm": 3.1806724071502686, "learning_rate": 9.013333333333333e-05, "loss": 0.7357, "step": 170 }, { "epoch": 0.011586248951901822, "grad_norm": 3.482248544692993, "learning_rate": 9.066666666666667e-05, "loss": 0.98, "step": 171 }, { "epoch": 0.01165400479372581, "grad_norm": 3.5067379474639893, "learning_rate": 9.120000000000001e-05, "loss": 0.9183, "step": 172 }, { "epoch": 0.011721760635549796, "grad_norm": 4.362545013427734, "learning_rate": 9.173333333333333e-05, "loss": 0.9597, "step": 173 }, { "epoch": 0.011789516477373783, "grad_norm": 3.65118145942688, "learning_rate": 9.226666666666667e-05, "loss": 0.866, "step": 174 }, { "epoch": 0.011857272319197771, "grad_norm": 7.4912896156311035, "learning_rate": 9.28e-05, "loss": 1.2576, "step": 175 }, { "epoch": 0.011925028161021757, "grad_norm": 2.8435049057006836, "learning_rate": 9.333333333333334e-05, "loss": 0.8427, "step": 176 }, { "epoch": 0.011992784002845746, "grad_norm": 3.598757266998291, "learning_rate": 9.386666666666667e-05, "loss": 1.0179, "step": 177 }, { "epoch": 0.012060539844669732, "grad_norm": 3.3011691570281982, "learning_rate": 9.44e-05, "loss": 0.8488, "step": 178 }, { "epoch": 0.01212829568649372, "grad_norm": 3.7547154426574707, "learning_rate": 9.493333333333334e-05, "loss": 0.9933, "step": 179 }, { "epoch": 0.012196051528317707, "grad_norm": 4.0833611488342285, "learning_rate": 9.546666666666667e-05, "loss": 1.1849, "step": 180 }, { "epoch": 0.012263807370141695, "grad_norm": 3.6523630619049072, "learning_rate": 9.6e-05, "loss": 0.9923, "step": 181 }, { "epoch": 0.012331563211965681, "grad_norm": 3.123002052307129, "learning_rate": 9.653333333333334e-05, "loss": 0.8659, "step": 182 }, { "epoch": 0.01239931905378967, "grad_norm": 3.65999436378479, "learning_rate": 9.706666666666668e-05, "loss": 0.9637, "step": 183 }, { "epoch": 0.012467074895613656, "grad_norm": 4.547555446624756, "learning_rate": 9.76e-05, "loss": 1.2165, "step": 184 }, { "epoch": 0.012534830737437644, "grad_norm": 3.4823532104492188, "learning_rate": 9.813333333333334e-05, "loss": 0.9079, "step": 185 }, { "epoch": 0.01260258657926163, "grad_norm": 4.131546974182129, "learning_rate": 9.866666666666668e-05, "loss": 0.9782, "step": 186 }, { "epoch": 0.012670342421085617, "grad_norm": 3.025775909423828, "learning_rate": 9.92e-05, "loss": 0.7689, "step": 187 }, { "epoch": 0.012738098262909605, "grad_norm": 4.011147975921631, "learning_rate": 9.973333333333334e-05, "loss": 1.0396, "step": 188 }, { "epoch": 0.012805854104733592, "grad_norm": 3.103466749191284, "learning_rate": 0.00010026666666666666, "loss": 0.795, "step": 189 }, { "epoch": 0.01287360994655758, "grad_norm": 2.866642713546753, "learning_rate": 0.00010080000000000001, "loss": 0.8959, "step": 190 }, { "epoch": 0.012941365788381566, "grad_norm": 2.8990366458892822, "learning_rate": 0.00010133333333333335, "loss": 0.9935, "step": 191 }, { "epoch": 0.013009121630205555, "grad_norm": 2.6250760555267334, "learning_rate": 0.00010186666666666667, "loss": 0.9082, "step": 192 }, { "epoch": 0.013076877472029541, "grad_norm": 3.451451301574707, "learning_rate": 0.00010240000000000001, "loss": 0.8814, "step": 193 }, { "epoch": 0.01314463331385353, "grad_norm": 3.2968547344207764, "learning_rate": 0.00010293333333333335, "loss": 0.7823, "step": 194 }, { "epoch": 0.013212389155677516, "grad_norm": 2.873727798461914, "learning_rate": 0.00010346666666666667, "loss": 0.8378, "step": 195 }, { "epoch": 0.013280144997501504, "grad_norm": 3.4928581714630127, "learning_rate": 0.00010400000000000001, "loss": 1.0139, "step": 196 }, { "epoch": 0.01334790083932549, "grad_norm": 3.464414358139038, "learning_rate": 0.00010453333333333333, "loss": 0.9106, "step": 197 }, { "epoch": 0.013415656681149479, "grad_norm": 3.058838129043579, "learning_rate": 0.00010506666666666667, "loss": 0.6724, "step": 198 }, { "epoch": 0.013483412522973465, "grad_norm": 3.876274824142456, "learning_rate": 0.0001056, "loss": 0.872, "step": 199 }, { "epoch": 0.013551168364797452, "grad_norm": 3.2966196537017822, "learning_rate": 0.00010613333333333333, "loss": 0.9288, "step": 200 }, { "epoch": 0.01361892420662144, "grad_norm": 3.082512617111206, "learning_rate": 0.00010666666666666667, "loss": 0.833, "step": 201 }, { "epoch": 0.013686680048445426, "grad_norm": 4.060554504394531, "learning_rate": 0.00010720000000000002, "loss": 1.0788, "step": 202 }, { "epoch": 0.013754435890269414, "grad_norm": 3.3564369678497314, "learning_rate": 0.00010773333333333333, "loss": 1.19, "step": 203 }, { "epoch": 0.0138221917320934, "grad_norm": 3.56966495513916, "learning_rate": 0.00010826666666666668, "loss": 0.8799, "step": 204 }, { "epoch": 0.013889947573917389, "grad_norm": 3.1727395057678223, "learning_rate": 0.00010880000000000002, "loss": 0.8247, "step": 205 }, { "epoch": 0.013957703415741375, "grad_norm": 4.123342514038086, "learning_rate": 0.00010933333333333333, "loss": 1.3223, "step": 206 }, { "epoch": 0.014025459257565364, "grad_norm": 3.971982002258301, "learning_rate": 0.00010986666666666668, "loss": 1.1013, "step": 207 }, { "epoch": 0.01409321509938935, "grad_norm": 3.719574451446533, "learning_rate": 0.00011040000000000001, "loss": 1.17, "step": 208 }, { "epoch": 0.014160970941213338, "grad_norm": 4.213026523590088, "learning_rate": 0.00011093333333333334, "loss": 0.7647, "step": 209 }, { "epoch": 0.014228726783037325, "grad_norm": 3.341592311859131, "learning_rate": 0.00011146666666666667, "loss": 1.0674, "step": 210 }, { "epoch": 0.014296482624861313, "grad_norm": 3.7537477016448975, "learning_rate": 0.00011200000000000001, "loss": 0.9634, "step": 211 }, { "epoch": 0.0143642384666853, "grad_norm": 3.766063928604126, "learning_rate": 0.00011253333333333334, "loss": 0.9494, "step": 212 }, { "epoch": 0.014431994308509288, "grad_norm": 2.777581214904785, "learning_rate": 0.00011306666666666667, "loss": 0.6205, "step": 213 }, { "epoch": 0.014499750150333274, "grad_norm": 3.964770793914795, "learning_rate": 0.0001136, "loss": 0.9115, "step": 214 }, { "epoch": 0.01456750599215726, "grad_norm": 3.22684645652771, "learning_rate": 0.00011413333333333333, "loss": 0.923, "step": 215 }, { "epoch": 0.014635261833981249, "grad_norm": 3.009641408920288, "learning_rate": 0.00011466666666666667, "loss": 0.8137, "step": 216 }, { "epoch": 0.014703017675805235, "grad_norm": 4.65781307220459, "learning_rate": 0.0001152, "loss": 1.1172, "step": 217 }, { "epoch": 0.014770773517629223, "grad_norm": 3.9027650356292725, "learning_rate": 0.00011573333333333333, "loss": 1.4727, "step": 218 }, { "epoch": 0.01483852935945321, "grad_norm": 3.0178661346435547, "learning_rate": 0.00011626666666666668, "loss": 0.7875, "step": 219 }, { "epoch": 0.014906285201277198, "grad_norm": 3.878479480743408, "learning_rate": 0.00011679999999999999, "loss": 1.3101, "step": 220 }, { "epoch": 0.014974041043101185, "grad_norm": 4.002731800079346, "learning_rate": 0.00011733333333333334, "loss": 0.9453, "step": 221 }, { "epoch": 0.015041796884925173, "grad_norm": 2.576362133026123, "learning_rate": 0.00011786666666666668, "loss": 0.7215, "step": 222 }, { "epoch": 0.01510955272674916, "grad_norm": 3.33084774017334, "learning_rate": 0.0001184, "loss": 0.9659, "step": 223 }, { "epoch": 0.015177308568573147, "grad_norm": 3.3597161769866943, "learning_rate": 0.00011893333333333334, "loss": 0.9257, "step": 224 }, { "epoch": 0.015245064410397134, "grad_norm": 3.4632585048675537, "learning_rate": 0.00011946666666666668, "loss": 0.8929, "step": 225 }, { "epoch": 0.015312820252221122, "grad_norm": 4.075017929077148, "learning_rate": 0.00012, "loss": 0.958, "step": 226 }, { "epoch": 0.015380576094045108, "grad_norm": 3.3880038261413574, "learning_rate": 0.00012053333333333334, "loss": 1.0581, "step": 227 }, { "epoch": 0.015448331935869095, "grad_norm": 4.207815647125244, "learning_rate": 0.00012106666666666666, "loss": 1.1511, "step": 228 }, { "epoch": 0.015516087777693083, "grad_norm": 3.4052436351776123, "learning_rate": 0.0001216, "loss": 1.0359, "step": 229 }, { "epoch": 0.01558384361951707, "grad_norm": 3.804454803466797, "learning_rate": 0.00012213333333333334, "loss": 1.0629, "step": 230 }, { "epoch": 0.015651599461341058, "grad_norm": 3.846328020095825, "learning_rate": 0.00012266666666666668, "loss": 0.993, "step": 231 }, { "epoch": 0.015719355303165046, "grad_norm": 3.96236515045166, "learning_rate": 0.0001232, "loss": 1.1515, "step": 232 }, { "epoch": 0.01578711114498903, "grad_norm": 3.798048496246338, "learning_rate": 0.00012373333333333335, "loss": 0.7662, "step": 233 }, { "epoch": 0.01585486698681302, "grad_norm": 3.0351669788360596, "learning_rate": 0.00012426666666666666, "loss": 0.9648, "step": 234 }, { "epoch": 0.015922622828637007, "grad_norm": 3.380739212036133, "learning_rate": 0.0001248, "loss": 0.8763, "step": 235 }, { "epoch": 0.015990378670460995, "grad_norm": 3.049619674682617, "learning_rate": 0.00012533333333333334, "loss": 0.8025, "step": 236 }, { "epoch": 0.01605813451228498, "grad_norm": 3.167330265045166, "learning_rate": 0.00012586666666666667, "loss": 1.2007, "step": 237 }, { "epoch": 0.016125890354108968, "grad_norm": 3.3723909854888916, "learning_rate": 0.0001264, "loss": 1.0588, "step": 238 }, { "epoch": 0.016193646195932956, "grad_norm": 2.830580234527588, "learning_rate": 0.00012693333333333335, "loss": 0.6549, "step": 239 }, { "epoch": 0.01626140203775694, "grad_norm": 3.2318930625915527, "learning_rate": 0.00012746666666666666, "loss": 1.1047, "step": 240 }, { "epoch": 0.01632915787958093, "grad_norm": 2.931008815765381, "learning_rate": 0.00012800000000000002, "loss": 0.8645, "step": 241 }, { "epoch": 0.016396913721404918, "grad_norm": 4.3279829025268555, "learning_rate": 0.00012853333333333336, "loss": 1.0481, "step": 242 }, { "epoch": 0.016464669563228906, "grad_norm": 2.757809638977051, "learning_rate": 0.00012906666666666667, "loss": 0.7434, "step": 243 }, { "epoch": 0.01653242540505289, "grad_norm": 2.969024658203125, "learning_rate": 0.0001296, "loss": 0.8965, "step": 244 }, { "epoch": 0.01660018124687688, "grad_norm": 2.9457848072052, "learning_rate": 0.00013013333333333332, "loss": 0.938, "step": 245 }, { "epoch": 0.016667937088700867, "grad_norm": 3.3430593013763428, "learning_rate": 0.00013066666666666668, "loss": 1.0461, "step": 246 }, { "epoch": 0.016735692930524855, "grad_norm": 3.8152661323547363, "learning_rate": 0.00013120000000000002, "loss": 1.1667, "step": 247 }, { "epoch": 0.01680344877234884, "grad_norm": 3.352856397628784, "learning_rate": 0.00013173333333333333, "loss": 0.9266, "step": 248 }, { "epoch": 0.016871204614172828, "grad_norm": 2.7780158519744873, "learning_rate": 0.00013226666666666667, "loss": 0.8136, "step": 249 }, { "epoch": 0.016938960455996816, "grad_norm": 3.764047145843506, "learning_rate": 0.0001328, "loss": 0.8565, "step": 250 }, { "epoch": 0.017006716297820804, "grad_norm": 3.134251832962036, "learning_rate": 0.00013333333333333334, "loss": 0.9564, "step": 251 }, { "epoch": 0.01707447213964479, "grad_norm": 3.4929568767547607, "learning_rate": 0.00013386666666666668, "loss": 1.1402, "step": 252 }, { "epoch": 0.017142227981468777, "grad_norm": 2.6721930503845215, "learning_rate": 0.00013440000000000001, "loss": 0.9342, "step": 253 }, { "epoch": 0.017209983823292765, "grad_norm": 2.8653156757354736, "learning_rate": 0.00013493333333333332, "loss": 0.7382, "step": 254 }, { "epoch": 0.01727773966511675, "grad_norm": 2.549006938934326, "learning_rate": 0.00013546666666666666, "loss": 0.7793, "step": 255 }, { "epoch": 0.01734549550694074, "grad_norm": 2.753033399581909, "learning_rate": 0.00013600000000000003, "loss": 0.8672, "step": 256 }, { "epoch": 0.017413251348764727, "grad_norm": 2.9388537406921387, "learning_rate": 0.00013653333333333334, "loss": 0.916, "step": 257 }, { "epoch": 0.017481007190588715, "grad_norm": 2.5541250705718994, "learning_rate": 0.00013706666666666667, "loss": 0.8142, "step": 258 }, { "epoch": 0.0175487630324127, "grad_norm": 3.18033504486084, "learning_rate": 0.00013759999999999998, "loss": 0.8763, "step": 259 }, { "epoch": 0.017616518874236688, "grad_norm": 3.3773415088653564, "learning_rate": 0.00013813333333333335, "loss": 1.0218, "step": 260 }, { "epoch": 0.017684274716060676, "grad_norm": 3.6449365615844727, "learning_rate": 0.00013866666666666669, "loss": 0.9939, "step": 261 }, { "epoch": 0.017752030557884664, "grad_norm": 3.41015887260437, "learning_rate": 0.0001392, "loss": 1.06, "step": 262 }, { "epoch": 0.01781978639970865, "grad_norm": 2.5204412937164307, "learning_rate": 0.00013973333333333333, "loss": 0.8231, "step": 263 }, { "epoch": 0.017887542241532637, "grad_norm": 3.4601964950561523, "learning_rate": 0.00014026666666666667, "loss": 1.0674, "step": 264 }, { "epoch": 0.017955298083356625, "grad_norm": 3.367053747177124, "learning_rate": 0.0001408, "loss": 1.0056, "step": 265 }, { "epoch": 0.018023053925180613, "grad_norm": 2.6200222969055176, "learning_rate": 0.00014133333333333334, "loss": 0.7781, "step": 266 }, { "epoch": 0.018090809767004598, "grad_norm": 2.720961809158325, "learning_rate": 0.00014186666666666668, "loss": 0.8741, "step": 267 }, { "epoch": 0.018158565608828586, "grad_norm": 4.248485088348389, "learning_rate": 0.0001424, "loss": 1.1909, "step": 268 }, { "epoch": 0.018226321450652574, "grad_norm": 2.551419258117676, "learning_rate": 0.00014293333333333333, "loss": 0.8267, "step": 269 }, { "epoch": 0.01829407729247656, "grad_norm": 2.759575366973877, "learning_rate": 0.0001434666666666667, "loss": 0.8734, "step": 270 }, { "epoch": 0.018361833134300547, "grad_norm": 2.9063026905059814, "learning_rate": 0.000144, "loss": 0.8042, "step": 271 }, { "epoch": 0.018429588976124536, "grad_norm": 10.986420631408691, "learning_rate": 0.00014453333333333334, "loss": 1.1958, "step": 272 }, { "epoch": 0.018497344817948524, "grad_norm": 2.7618868350982666, "learning_rate": 0.00014506666666666668, "loss": 0.7861, "step": 273 }, { "epoch": 0.01856510065977251, "grad_norm": 3.453500509262085, "learning_rate": 0.00014560000000000002, "loss": 1.1442, "step": 274 }, { "epoch": 0.018632856501596497, "grad_norm": 3.5590434074401855, "learning_rate": 0.00014613333333333335, "loss": 0.8994, "step": 275 }, { "epoch": 0.018700612343420485, "grad_norm": 2.4055211544036865, "learning_rate": 0.00014666666666666666, "loss": 0.7386, "step": 276 }, { "epoch": 0.018768368185244473, "grad_norm": 2.8150551319122314, "learning_rate": 0.0001472, "loss": 0.8823, "step": 277 }, { "epoch": 0.018836124027068458, "grad_norm": 2.630995035171509, "learning_rate": 0.00014773333333333334, "loss": 0.7175, "step": 278 }, { "epoch": 0.018903879868892446, "grad_norm": 3.557161569595337, "learning_rate": 0.00014826666666666667, "loss": 0.8712, "step": 279 }, { "epoch": 0.018971635710716434, "grad_norm": 2.6256723403930664, "learning_rate": 0.0001488, "loss": 0.7671, "step": 280 }, { "epoch": 0.01903939155254042, "grad_norm": 3.0437567234039307, "learning_rate": 0.00014933333333333335, "loss": 1.0063, "step": 281 }, { "epoch": 0.019107147394364407, "grad_norm": 3.4554121494293213, "learning_rate": 0.00014986666666666666, "loss": 1.129, "step": 282 }, { "epoch": 0.019174903236188395, "grad_norm": 2.7510783672332764, "learning_rate": 0.0001504, "loss": 0.8512, "step": 283 }, { "epoch": 0.019242659078012384, "grad_norm": 3.6570682525634766, "learning_rate": 0.00015093333333333336, "loss": 1.0906, "step": 284 }, { "epoch": 0.019310414919836368, "grad_norm": 2.8303823471069336, "learning_rate": 0.00015146666666666667, "loss": 0.9401, "step": 285 }, { "epoch": 0.019378170761660356, "grad_norm": 4.669590950012207, "learning_rate": 0.000152, "loss": 1.2186, "step": 286 }, { "epoch": 0.019445926603484345, "grad_norm": 3.1037960052490234, "learning_rate": 0.00015253333333333335, "loss": 1.0955, "step": 287 }, { "epoch": 0.019513682445308333, "grad_norm": 2.164179563522339, "learning_rate": 0.00015306666666666666, "loss": 0.5372, "step": 288 }, { "epoch": 0.019581438287132318, "grad_norm": 3.3186652660369873, "learning_rate": 0.00015360000000000002, "loss": 0.9419, "step": 289 }, { "epoch": 0.019649194128956306, "grad_norm": 2.662095785140991, "learning_rate": 0.00015413333333333336, "loss": 0.9512, "step": 290 }, { "epoch": 0.019716949970780294, "grad_norm": 2.8470234870910645, "learning_rate": 0.00015466666666666667, "loss": 0.8353, "step": 291 }, { "epoch": 0.019784705812604282, "grad_norm": 2.5618224143981934, "learning_rate": 0.0001552, "loss": 0.7797, "step": 292 }, { "epoch": 0.019852461654428267, "grad_norm": 3.2155261039733887, "learning_rate": 0.00015573333333333334, "loss": 0.8396, "step": 293 }, { "epoch": 0.019920217496252255, "grad_norm": 2.711542844772339, "learning_rate": 0.00015626666666666668, "loss": 1.0394, "step": 294 }, { "epoch": 0.019987973338076243, "grad_norm": 2.3177831172943115, "learning_rate": 0.00015680000000000002, "loss": 0.7734, "step": 295 }, { "epoch": 0.020055729179900228, "grad_norm": 2.9781432151794434, "learning_rate": 0.00015733333333333333, "loss": 0.9441, "step": 296 }, { "epoch": 0.020123485021724216, "grad_norm": 3.547036647796631, "learning_rate": 0.00015786666666666666, "loss": 0.9722, "step": 297 }, { "epoch": 0.020191240863548204, "grad_norm": 2.6772027015686035, "learning_rate": 0.00015840000000000003, "loss": 0.9965, "step": 298 }, { "epoch": 0.020258996705372193, "grad_norm": 2.731415033340454, "learning_rate": 0.00015893333333333334, "loss": 0.88, "step": 299 }, { "epoch": 0.020326752547196177, "grad_norm": 3.2088096141815186, "learning_rate": 0.00015946666666666668, "loss": 0.8667, "step": 300 }, { "epoch": 0.020394508389020165, "grad_norm": 2.8343393802642822, "learning_rate": 0.00016, "loss": 0.8519, "step": 301 }, { "epoch": 0.020462264230844154, "grad_norm": 2.5601022243499756, "learning_rate": 0.00016053333333333332, "loss": 0.7365, "step": 302 }, { "epoch": 0.020530020072668142, "grad_norm": 2.4892749786376953, "learning_rate": 0.0001610666666666667, "loss": 0.7574, "step": 303 }, { "epoch": 0.020597775914492127, "grad_norm": 2.4366605281829834, "learning_rate": 0.00016160000000000002, "loss": 0.804, "step": 304 }, { "epoch": 0.020665531756316115, "grad_norm": 3.2073616981506348, "learning_rate": 0.00016213333333333334, "loss": 1.0517, "step": 305 }, { "epoch": 0.020733287598140103, "grad_norm": 2.8581137657165527, "learning_rate": 0.00016266666666666667, "loss": 0.76, "step": 306 }, { "epoch": 0.020801043439964088, "grad_norm": 2.4472806453704834, "learning_rate": 0.0001632, "loss": 0.7537, "step": 307 }, { "epoch": 0.020868799281788076, "grad_norm": 3.2488083839416504, "learning_rate": 0.00016373333333333335, "loss": 0.9871, "step": 308 }, { "epoch": 0.020936555123612064, "grad_norm": 2.7125866413116455, "learning_rate": 0.00016426666666666668, "loss": 0.9442, "step": 309 }, { "epoch": 0.021004310965436052, "grad_norm": 3.6005852222442627, "learning_rate": 0.0001648, "loss": 1.1218, "step": 310 }, { "epoch": 0.021072066807260037, "grad_norm": 2.3172333240509033, "learning_rate": 0.00016533333333333333, "loss": 0.7338, "step": 311 }, { "epoch": 0.021139822649084025, "grad_norm": 2.5886096954345703, "learning_rate": 0.00016586666666666667, "loss": 0.9067, "step": 312 }, { "epoch": 0.021207578490908013, "grad_norm": 2.5885937213897705, "learning_rate": 0.0001664, "loss": 0.8025, "step": 313 }, { "epoch": 0.021275334332732, "grad_norm": 2.9777655601501465, "learning_rate": 0.00016693333333333334, "loss": 0.9416, "step": 314 }, { "epoch": 0.021343090174555986, "grad_norm": 2.4649710655212402, "learning_rate": 0.00016746666666666668, "loss": 0.7932, "step": 315 }, { "epoch": 0.021410846016379975, "grad_norm": 3.091679096221924, "learning_rate": 0.000168, "loss": 0.7981, "step": 316 }, { "epoch": 0.021478601858203963, "grad_norm": 3.334155797958374, "learning_rate": 0.00016853333333333336, "loss": 0.7285, "step": 317 }, { "epoch": 0.02154635770002795, "grad_norm": 3.0908756256103516, "learning_rate": 0.0001690666666666667, "loss": 1.0706, "step": 318 }, { "epoch": 0.021614113541851936, "grad_norm": 2.8760488033294678, "learning_rate": 0.0001696, "loss": 0.8715, "step": 319 }, { "epoch": 0.021681869383675924, "grad_norm": 2.9952962398529053, "learning_rate": 0.00017013333333333334, "loss": 0.9492, "step": 320 }, { "epoch": 0.021749625225499912, "grad_norm": 3.413982391357422, "learning_rate": 0.00017066666666666668, "loss": 1.0295, "step": 321 }, { "epoch": 0.021817381067323897, "grad_norm": 2.950277805328369, "learning_rate": 0.00017120000000000001, "loss": 0.9315, "step": 322 }, { "epoch": 0.021885136909147885, "grad_norm": 2.929976224899292, "learning_rate": 0.00017173333333333335, "loss": 0.8389, "step": 323 }, { "epoch": 0.021952892750971873, "grad_norm": 2.878108263015747, "learning_rate": 0.00017226666666666666, "loss": 0.9949, "step": 324 }, { "epoch": 0.02202064859279586, "grad_norm": 2.654740571975708, "learning_rate": 0.0001728, "loss": 1.1421, "step": 325 }, { "epoch": 0.022088404434619846, "grad_norm": 2.782033681869507, "learning_rate": 0.00017333333333333334, "loss": 0.8863, "step": 326 }, { "epoch": 0.022156160276443834, "grad_norm": 3.639472246170044, "learning_rate": 0.00017386666666666667, "loss": 1.1587, "step": 327 }, { "epoch": 0.022223916118267822, "grad_norm": 3.403918981552124, "learning_rate": 0.0001744, "loss": 1.0138, "step": 328 }, { "epoch": 0.02229167196009181, "grad_norm": 2.8108558654785156, "learning_rate": 0.00017493333333333335, "loss": 0.8243, "step": 329 }, { "epoch": 0.022359427801915795, "grad_norm": 3.242318868637085, "learning_rate": 0.00017546666666666666, "loss": 1.0326, "step": 330 }, { "epoch": 0.022427183643739784, "grad_norm": 2.594991683959961, "learning_rate": 0.00017600000000000002, "loss": 0.8135, "step": 331 }, { "epoch": 0.022494939485563772, "grad_norm": 2.890305519104004, "learning_rate": 0.00017653333333333336, "loss": 0.9373, "step": 332 }, { "epoch": 0.02256269532738776, "grad_norm": 2.6706676483154297, "learning_rate": 0.00017706666666666667, "loss": 0.8682, "step": 333 }, { "epoch": 0.022630451169211745, "grad_norm": 2.7954084873199463, "learning_rate": 0.0001776, "loss": 0.8013, "step": 334 }, { "epoch": 0.022698207011035733, "grad_norm": 3.222986936569214, "learning_rate": 0.00017813333333333334, "loss": 1.136, "step": 335 }, { "epoch": 0.02276596285285972, "grad_norm": 3.1042490005493164, "learning_rate": 0.00017866666666666668, "loss": 1.1317, "step": 336 }, { "epoch": 0.022833718694683706, "grad_norm": 3.553807497024536, "learning_rate": 0.00017920000000000002, "loss": 1.0309, "step": 337 }, { "epoch": 0.022901474536507694, "grad_norm": 3.4068756103515625, "learning_rate": 0.00017973333333333333, "loss": 0.8812, "step": 338 }, { "epoch": 0.022969230378331682, "grad_norm": 2.8250908851623535, "learning_rate": 0.00018026666666666667, "loss": 0.8691, "step": 339 }, { "epoch": 0.02303698622015567, "grad_norm": 3.1037330627441406, "learning_rate": 0.0001808, "loss": 0.8692, "step": 340 }, { "epoch": 0.023104742061979655, "grad_norm": 2.989802598953247, "learning_rate": 0.00018133333333333334, "loss": 1.0568, "step": 341 }, { "epoch": 0.023172497903803643, "grad_norm": 4.6037068367004395, "learning_rate": 0.00018186666666666668, "loss": 1.1491, "step": 342 }, { "epoch": 0.02324025374562763, "grad_norm": 3.0545082092285156, "learning_rate": 0.00018240000000000002, "loss": 0.9532, "step": 343 }, { "epoch": 0.02330800958745162, "grad_norm": 2.2162246704101562, "learning_rate": 0.00018293333333333333, "loss": 0.8658, "step": 344 }, { "epoch": 0.023375765429275604, "grad_norm": 2.557610273361206, "learning_rate": 0.00018346666666666666, "loss": 0.8813, "step": 345 }, { "epoch": 0.023443521271099593, "grad_norm": 2.785144805908203, "learning_rate": 0.00018400000000000003, "loss": 0.8998, "step": 346 }, { "epoch": 0.02351127711292358, "grad_norm": 2.822430372238159, "learning_rate": 0.00018453333333333334, "loss": 0.9069, "step": 347 }, { "epoch": 0.023579032954747565, "grad_norm": 3.2252402305603027, "learning_rate": 0.00018506666666666667, "loss": 0.8276, "step": 348 }, { "epoch": 0.023646788796571554, "grad_norm": 3.279068946838379, "learning_rate": 0.0001856, "loss": 1.2341, "step": 349 }, { "epoch": 0.023714544638395542, "grad_norm": 2.407787799835205, "learning_rate": 0.00018613333333333335, "loss": 0.848, "step": 350 }, { "epoch": 0.02378230048021953, "grad_norm": 2.13181209564209, "learning_rate": 0.0001866666666666667, "loss": 0.7186, "step": 351 }, { "epoch": 0.023850056322043515, "grad_norm": 3.5988128185272217, "learning_rate": 0.00018720000000000002, "loss": 0.9629, "step": 352 }, { "epoch": 0.023917812163867503, "grad_norm": 2.6874983310699463, "learning_rate": 0.00018773333333333333, "loss": 0.8074, "step": 353 }, { "epoch": 0.02398556800569149, "grad_norm": 3.2776784896850586, "learning_rate": 0.00018826666666666667, "loss": 1.0154, "step": 354 }, { "epoch": 0.02405332384751548, "grad_norm": 2.798130750656128, "learning_rate": 0.0001888, "loss": 1.1858, "step": 355 }, { "epoch": 0.024121079689339464, "grad_norm": 2.985553503036499, "learning_rate": 0.00018933333333333335, "loss": 1.0351, "step": 356 }, { "epoch": 0.024188835531163452, "grad_norm": 2.8096508979797363, "learning_rate": 0.00018986666666666668, "loss": 0.8279, "step": 357 }, { "epoch": 0.02425659137298744, "grad_norm": 2.5704684257507324, "learning_rate": 0.0001904, "loss": 0.7203, "step": 358 }, { "epoch": 0.02432434721481143, "grad_norm": 4.470186710357666, "learning_rate": 0.00019093333333333333, "loss": 1.0528, "step": 359 }, { "epoch": 0.024392103056635413, "grad_norm": 2.5878007411956787, "learning_rate": 0.0001914666666666667, "loss": 0.7575, "step": 360 }, { "epoch": 0.0244598588984594, "grad_norm": 3.5642030239105225, "learning_rate": 0.000192, "loss": 1.0158, "step": 361 }, { "epoch": 0.02452761474028339, "grad_norm": 3.050518751144409, "learning_rate": 0.00019253333333333334, "loss": 0.7788, "step": 362 }, { "epoch": 0.024595370582107375, "grad_norm": 2.7089364528656006, "learning_rate": 0.00019306666666666668, "loss": 1.0027, "step": 363 }, { "epoch": 0.024663126423931363, "grad_norm": 2.6781551837921143, "learning_rate": 0.00019360000000000002, "loss": 0.9534, "step": 364 }, { "epoch": 0.02473088226575535, "grad_norm": 2.707778215408325, "learning_rate": 0.00019413333333333335, "loss": 0.741, "step": 365 }, { "epoch": 0.02479863810757934, "grad_norm": 2.9382736682891846, "learning_rate": 0.0001946666666666667, "loss": 0.8631, "step": 366 }, { "epoch": 0.024866393949403324, "grad_norm": 2.2730894088745117, "learning_rate": 0.0001952, "loss": 0.8481, "step": 367 }, { "epoch": 0.024934149791227312, "grad_norm": 2.788771867752075, "learning_rate": 0.00019573333333333334, "loss": 0.8739, "step": 368 }, { "epoch": 0.0250019056330513, "grad_norm": 2.500476598739624, "learning_rate": 0.00019626666666666668, "loss": 0.761, "step": 369 }, { "epoch": 0.02506966147487529, "grad_norm": 2.9384407997131348, "learning_rate": 0.0001968, "loss": 0.8118, "step": 370 }, { "epoch": 0.025137417316699273, "grad_norm": 3.3813512325286865, "learning_rate": 0.00019733333333333335, "loss": 0.9742, "step": 371 }, { "epoch": 0.02520517315852326, "grad_norm": 3.193253993988037, "learning_rate": 0.00019786666666666666, "loss": 1.1012, "step": 372 }, { "epoch": 0.02527292900034725, "grad_norm": 2.9866743087768555, "learning_rate": 0.0001984, "loss": 1.0315, "step": 373 }, { "epoch": 0.025340684842171234, "grad_norm": 3.231721878051758, "learning_rate": 0.00019893333333333336, "loss": 1.2791, "step": 374 }, { "epoch": 0.025408440683995222, "grad_norm": 2.749004364013672, "learning_rate": 0.00019946666666666667, "loss": 0.6948, "step": 375 }, { "epoch": 0.02547619652581921, "grad_norm": 2.88962459564209, "learning_rate": 0.0002, "loss": 0.7165, "step": 376 }, { "epoch": 0.0255439523676432, "grad_norm": 2.8554301261901855, "learning_rate": 0.00020053333333333332, "loss": 1.064, "step": 377 }, { "epoch": 0.025611708209467184, "grad_norm": 4.560920238494873, "learning_rate": 0.00020106666666666668, "loss": 1.0572, "step": 378 }, { "epoch": 0.025679464051291172, "grad_norm": 3.338428497314453, "learning_rate": 0.00020160000000000002, "loss": 1.1139, "step": 379 }, { "epoch": 0.02574721989311516, "grad_norm": 3.9975242614746094, "learning_rate": 0.00020213333333333333, "loss": 0.9781, "step": 380 }, { "epoch": 0.025814975734939148, "grad_norm": 2.463844060897827, "learning_rate": 0.0002026666666666667, "loss": 0.8245, "step": 381 }, { "epoch": 0.025882731576763133, "grad_norm": 2.7112913131713867, "learning_rate": 0.0002032, "loss": 0.8705, "step": 382 }, { "epoch": 0.02595048741858712, "grad_norm": 3.175318956375122, "learning_rate": 0.00020373333333333334, "loss": 0.8363, "step": 383 }, { "epoch": 0.02601824326041111, "grad_norm": 3.4241058826446533, "learning_rate": 0.0002042666666666667, "loss": 0.8909, "step": 384 }, { "epoch": 0.026085999102235097, "grad_norm": 2.8540198802948, "learning_rate": 0.00020480000000000002, "loss": 0.7986, "step": 385 }, { "epoch": 0.026153754944059082, "grad_norm": 3.551433801651001, "learning_rate": 0.00020533333333333333, "loss": 1.0388, "step": 386 }, { "epoch": 0.02622151078588307, "grad_norm": 2.895348310470581, "learning_rate": 0.0002058666666666667, "loss": 1.0517, "step": 387 }, { "epoch": 0.02628926662770706, "grad_norm": 2.378032684326172, "learning_rate": 0.0002064, "loss": 1.0098, "step": 388 }, { "epoch": 0.026357022469531043, "grad_norm": 2.6091766357421875, "learning_rate": 0.00020693333333333334, "loss": 0.9787, "step": 389 }, { "epoch": 0.02642477831135503, "grad_norm": 3.1191773414611816, "learning_rate": 0.0002074666666666667, "loss": 0.9477, "step": 390 }, { "epoch": 0.02649253415317902, "grad_norm": 2.812795400619507, "learning_rate": 0.00020800000000000001, "loss": 0.8659, "step": 391 }, { "epoch": 0.026560289995003008, "grad_norm": 2.7146248817443848, "learning_rate": 0.00020853333333333332, "loss": 0.9959, "step": 392 }, { "epoch": 0.026628045836826993, "grad_norm": 3.1946487426757812, "learning_rate": 0.00020906666666666666, "loss": 0.902, "step": 393 }, { "epoch": 0.02669580167865098, "grad_norm": 3.826266050338745, "learning_rate": 0.00020960000000000003, "loss": 1.0937, "step": 394 }, { "epoch": 0.02676355752047497, "grad_norm": 2.771653175354004, "learning_rate": 0.00021013333333333334, "loss": 0.9665, "step": 395 }, { "epoch": 0.026831313362298957, "grad_norm": 3.2525782585144043, "learning_rate": 0.00021066666666666665, "loss": 1.1349, "step": 396 }, { "epoch": 0.026899069204122942, "grad_norm": 3.4051384925842285, "learning_rate": 0.0002112, "loss": 1.2167, "step": 397 }, { "epoch": 0.02696682504594693, "grad_norm": 3.4748735427856445, "learning_rate": 0.00021173333333333335, "loss": 0.848, "step": 398 }, { "epoch": 0.02703458088777092, "grad_norm": 2.094017267227173, "learning_rate": 0.00021226666666666666, "loss": 0.6179, "step": 399 }, { "epoch": 0.027102336729594903, "grad_norm": 2.9348931312561035, "learning_rate": 0.00021280000000000002, "loss": 0.9316, "step": 400 }, { "epoch": 0.02717009257141889, "grad_norm": 2.874321222305298, "learning_rate": 0.00021333333333333333, "loss": 0.9912, "step": 401 }, { "epoch": 0.02723784841324288, "grad_norm": 3.2894723415374756, "learning_rate": 0.00021386666666666667, "loss": 1.1963, "step": 402 }, { "epoch": 0.027305604255066868, "grad_norm": 3.2671563625335693, "learning_rate": 0.00021440000000000003, "loss": 0.9469, "step": 403 }, { "epoch": 0.027373360096890852, "grad_norm": 2.426436424255371, "learning_rate": 0.00021493333333333334, "loss": 0.7675, "step": 404 }, { "epoch": 0.02744111593871484, "grad_norm": 2.979081869125366, "learning_rate": 0.00021546666666666665, "loss": 1.0344, "step": 405 }, { "epoch": 0.02750887178053883, "grad_norm": 2.872697114944458, "learning_rate": 0.00021600000000000002, "loss": 0.7642, "step": 406 }, { "epoch": 0.027576627622362817, "grad_norm": 2.9084696769714355, "learning_rate": 0.00021653333333333336, "loss": 0.87, "step": 407 }, { "epoch": 0.0276443834641868, "grad_norm": 2.231675386428833, "learning_rate": 0.00021706666666666667, "loss": 0.7374, "step": 408 }, { "epoch": 0.02771213930601079, "grad_norm": 2.4713785648345947, "learning_rate": 0.00021760000000000003, "loss": 0.7137, "step": 409 }, { "epoch": 0.027779895147834778, "grad_norm": 3.2178897857666016, "learning_rate": 0.00021813333333333334, "loss": 0.8865, "step": 410 }, { "epoch": 0.027847650989658766, "grad_norm": 2.7771530151367188, "learning_rate": 0.00021866666666666665, "loss": 0.8657, "step": 411 }, { "epoch": 0.02791540683148275, "grad_norm": 2.7896981239318848, "learning_rate": 0.00021920000000000002, "loss": 0.8779, "step": 412 }, { "epoch": 0.02798316267330674, "grad_norm": 3.215338706970215, "learning_rate": 0.00021973333333333335, "loss": 0.971, "step": 413 }, { "epoch": 0.028050918515130727, "grad_norm": 3.515970468521118, "learning_rate": 0.00022026666666666666, "loss": 0.9283, "step": 414 }, { "epoch": 0.028118674356954712, "grad_norm": 2.9704067707061768, "learning_rate": 0.00022080000000000003, "loss": 0.9245, "step": 415 }, { "epoch": 0.0281864301987787, "grad_norm": 2.9705166816711426, "learning_rate": 0.00022133333333333334, "loss": 0.9025, "step": 416 }, { "epoch": 0.02825418604060269, "grad_norm": 2.9910025596618652, "learning_rate": 0.00022186666666666667, "loss": 1.0317, "step": 417 }, { "epoch": 0.028321941882426677, "grad_norm": 3.0812675952911377, "learning_rate": 0.00022240000000000004, "loss": 0.9516, "step": 418 }, { "epoch": 0.02838969772425066, "grad_norm": 4.055918216705322, "learning_rate": 0.00022293333333333335, "loss": 0.9599, "step": 419 }, { "epoch": 0.02845745356607465, "grad_norm": 2.581022024154663, "learning_rate": 0.00022346666666666666, "loss": 0.7968, "step": 420 }, { "epoch": 0.028525209407898638, "grad_norm": 2.069627523422241, "learning_rate": 0.00022400000000000002, "loss": 0.6994, "step": 421 }, { "epoch": 0.028592965249722626, "grad_norm": 3.5436763763427734, "learning_rate": 0.00022453333333333336, "loss": 0.9731, "step": 422 }, { "epoch": 0.02866072109154661, "grad_norm": 2.5492074489593506, "learning_rate": 0.00022506666666666667, "loss": 0.9319, "step": 423 }, { "epoch": 0.0287284769333706, "grad_norm": 3.4567394256591797, "learning_rate": 0.00022559999999999998, "loss": 0.9793, "step": 424 }, { "epoch": 0.028796232775194587, "grad_norm": 3.0208230018615723, "learning_rate": 0.00022613333333333335, "loss": 0.9211, "step": 425 }, { "epoch": 0.028863988617018575, "grad_norm": 3.6448960304260254, "learning_rate": 0.00022666666666666668, "loss": 1.0795, "step": 426 }, { "epoch": 0.02893174445884256, "grad_norm": 2.785637140274048, "learning_rate": 0.0002272, "loss": 1.0245, "step": 427 }, { "epoch": 0.028999500300666548, "grad_norm": 2.842777967453003, "learning_rate": 0.00022773333333333336, "loss": 0.8712, "step": 428 }, { "epoch": 0.029067256142490536, "grad_norm": 3.13641095161438, "learning_rate": 0.00022826666666666667, "loss": 0.9302, "step": 429 }, { "epoch": 0.02913501198431452, "grad_norm": 3.7317042350769043, "learning_rate": 0.0002288, "loss": 1.0933, "step": 430 }, { "epoch": 0.02920276782613851, "grad_norm": 2.997500419616699, "learning_rate": 0.00022933333333333334, "loss": 0.887, "step": 431 }, { "epoch": 0.029270523667962497, "grad_norm": 3.0842528343200684, "learning_rate": 0.00022986666666666668, "loss": 1.1222, "step": 432 }, { "epoch": 0.029338279509786486, "grad_norm": 2.915990114212036, "learning_rate": 0.0002304, "loss": 0.9592, "step": 433 }, { "epoch": 0.02940603535161047, "grad_norm": 3.1716978549957275, "learning_rate": 0.00023093333333333335, "loss": 1.0654, "step": 434 }, { "epoch": 0.02947379119343446, "grad_norm": 2.9748449325561523, "learning_rate": 0.00023146666666666666, "loss": 0.9074, "step": 435 }, { "epoch": 0.029541547035258447, "grad_norm": 2.785900115966797, "learning_rate": 0.000232, "loss": 0.8457, "step": 436 }, { "epoch": 0.029609302877082435, "grad_norm": 2.569838523864746, "learning_rate": 0.00023253333333333337, "loss": 0.8915, "step": 437 }, { "epoch": 0.02967705871890642, "grad_norm": 2.8905043601989746, "learning_rate": 0.00023306666666666668, "loss": 0.8502, "step": 438 }, { "epoch": 0.029744814560730408, "grad_norm": 2.7213053703308105, "learning_rate": 0.00023359999999999999, "loss": 0.8998, "step": 439 }, { "epoch": 0.029812570402554396, "grad_norm": 3.745122194290161, "learning_rate": 0.00023413333333333335, "loss": 1.1459, "step": 440 }, { "epoch": 0.02988032624437838, "grad_norm": 2.9431862831115723, "learning_rate": 0.0002346666666666667, "loss": 0.9016, "step": 441 }, { "epoch": 0.02994808208620237, "grad_norm": 2.872504711151123, "learning_rate": 0.0002352, "loss": 0.9417, "step": 442 }, { "epoch": 0.030015837928026357, "grad_norm": 2.563321828842163, "learning_rate": 0.00023573333333333336, "loss": 0.8268, "step": 443 }, { "epoch": 0.030083593769850345, "grad_norm": 3.125023603439331, "learning_rate": 0.00023626666666666667, "loss": 0.8371, "step": 444 }, { "epoch": 0.03015134961167433, "grad_norm": 3.140502452850342, "learning_rate": 0.0002368, "loss": 1.0886, "step": 445 }, { "epoch": 0.03021910545349832, "grad_norm": 2.709343433380127, "learning_rate": 0.00023733333333333337, "loss": 0.8819, "step": 446 }, { "epoch": 0.030286861295322307, "grad_norm": 2.917924165725708, "learning_rate": 0.00023786666666666668, "loss": 1.0793, "step": 447 }, { "epoch": 0.030354617137146295, "grad_norm": 2.741274833679199, "learning_rate": 0.0002384, "loss": 0.9807, "step": 448 }, { "epoch": 0.03042237297897028, "grad_norm": 2.7455129623413086, "learning_rate": 0.00023893333333333336, "loss": 0.9961, "step": 449 }, { "epoch": 0.030490128820794268, "grad_norm": 3.417269229888916, "learning_rate": 0.0002394666666666667, "loss": 0.8002, "step": 450 }, { "epoch": 0.030557884662618256, "grad_norm": 2.652172088623047, "learning_rate": 0.00024, "loss": 0.7414, "step": 451 }, { "epoch": 0.030625640504442244, "grad_norm": 3.259737968444824, "learning_rate": 0.00024053333333333337, "loss": 1.0514, "step": 452 }, { "epoch": 0.03069339634626623, "grad_norm": 4.5452070236206055, "learning_rate": 0.00024106666666666668, "loss": 1.2329, "step": 453 }, { "epoch": 0.030761152188090217, "grad_norm": 3.586223602294922, "learning_rate": 0.0002416, "loss": 1.0274, "step": 454 }, { "epoch": 0.030828908029914205, "grad_norm": 2.549325704574585, "learning_rate": 0.00024213333333333333, "loss": 0.8767, "step": 455 }, { "epoch": 0.03089666387173819, "grad_norm": 2.952594757080078, "learning_rate": 0.0002426666666666667, "loss": 0.9265, "step": 456 }, { "epoch": 0.030964419713562178, "grad_norm": 2.737058639526367, "learning_rate": 0.0002432, "loss": 0.9006, "step": 457 }, { "epoch": 0.031032175555386166, "grad_norm": 2.8463776111602783, "learning_rate": 0.0002437333333333333, "loss": 1.0022, "step": 458 }, { "epoch": 0.031099931397210154, "grad_norm": 2.8198354244232178, "learning_rate": 0.0002442666666666667, "loss": 0.8579, "step": 459 }, { "epoch": 0.03116768723903414, "grad_norm": 2.6514904499053955, "learning_rate": 0.0002448, "loss": 1.0371, "step": 460 }, { "epoch": 0.031235443080858127, "grad_norm": 3.080641031265259, "learning_rate": 0.00024533333333333335, "loss": 0.9005, "step": 461 }, { "epoch": 0.031303198922682116, "grad_norm": 3.199880361557007, "learning_rate": 0.0002458666666666667, "loss": 0.9525, "step": 462 }, { "epoch": 0.031370954764506104, "grad_norm": 2.3403611183166504, "learning_rate": 0.0002464, "loss": 0.8538, "step": 463 }, { "epoch": 0.03143871060633009, "grad_norm": 2.694025754928589, "learning_rate": 0.00024693333333333334, "loss": 0.8852, "step": 464 }, { "epoch": 0.03150646644815408, "grad_norm": 2.6202657222747803, "learning_rate": 0.0002474666666666667, "loss": 0.7338, "step": 465 }, { "epoch": 0.03157422228997806, "grad_norm": 2.4244868755340576, "learning_rate": 0.000248, "loss": 0.8328, "step": 466 }, { "epoch": 0.03164197813180205, "grad_norm": 2.990705966949463, "learning_rate": 0.0002485333333333333, "loss": 0.9602, "step": 467 }, { "epoch": 0.03170973397362604, "grad_norm": 2.595392942428589, "learning_rate": 0.0002490666666666667, "loss": 0.7387, "step": 468 }, { "epoch": 0.031777489815450026, "grad_norm": 2.7632737159729004, "learning_rate": 0.0002496, "loss": 0.8419, "step": 469 }, { "epoch": 0.031845245657274014, "grad_norm": 3.8289895057678223, "learning_rate": 0.0002501333333333333, "loss": 1.0181, "step": 470 }, { "epoch": 0.031913001499098, "grad_norm": 4.551751136779785, "learning_rate": 0.00025066666666666667, "loss": 0.901, "step": 471 }, { "epoch": 0.03198075734092199, "grad_norm": 2.899014949798584, "learning_rate": 0.00025120000000000003, "loss": 0.8226, "step": 472 }, { "epoch": 0.03204851318274597, "grad_norm": 2.805997610092163, "learning_rate": 0.00025173333333333334, "loss": 0.7902, "step": 473 }, { "epoch": 0.03211626902456996, "grad_norm": 2.6749446392059326, "learning_rate": 0.0002522666666666667, "loss": 0.9136, "step": 474 }, { "epoch": 0.03218402486639395, "grad_norm": 3.000572681427002, "learning_rate": 0.0002528, "loss": 0.977, "step": 475 }, { "epoch": 0.032251780708217936, "grad_norm": 3.4974238872528076, "learning_rate": 0.00025333333333333333, "loss": 1.0862, "step": 476 }, { "epoch": 0.032319536550041925, "grad_norm": 2.88631272315979, "learning_rate": 0.0002538666666666667, "loss": 1.1025, "step": 477 }, { "epoch": 0.03238729239186591, "grad_norm": 3.2899982929229736, "learning_rate": 0.0002544, "loss": 0.9626, "step": 478 }, { "epoch": 0.0324550482336899, "grad_norm": 2.4934239387512207, "learning_rate": 0.0002549333333333333, "loss": 0.857, "step": 479 }, { "epoch": 0.03252280407551388, "grad_norm": 3.5751945972442627, "learning_rate": 0.0002554666666666667, "loss": 1.039, "step": 480 }, { "epoch": 0.03259055991733787, "grad_norm": 2.848538637161255, "learning_rate": 0.00025600000000000004, "loss": 0.8887, "step": 481 }, { "epoch": 0.03265831575916186, "grad_norm": 3.5423502922058105, "learning_rate": 0.00025653333333333335, "loss": 0.9348, "step": 482 }, { "epoch": 0.03272607160098585, "grad_norm": 2.3428285121917725, "learning_rate": 0.0002570666666666667, "loss": 0.6604, "step": 483 }, { "epoch": 0.032793827442809835, "grad_norm": 2.862464427947998, "learning_rate": 0.00025760000000000003, "loss": 0.7328, "step": 484 }, { "epoch": 0.03286158328463382, "grad_norm": 2.3965930938720703, "learning_rate": 0.00025813333333333334, "loss": 0.8123, "step": 485 }, { "epoch": 0.03292933912645781, "grad_norm": 3.907257556915283, "learning_rate": 0.00025866666666666665, "loss": 1.1166, "step": 486 }, { "epoch": 0.0329970949682818, "grad_norm": 6.006860733032227, "learning_rate": 0.0002592, "loss": 0.9687, "step": 487 }, { "epoch": 0.03306485081010578, "grad_norm": 3.908031702041626, "learning_rate": 0.0002597333333333333, "loss": 0.8756, "step": 488 }, { "epoch": 0.03313260665192977, "grad_norm": 2.7148728370666504, "learning_rate": 0.00026026666666666663, "loss": 0.7544, "step": 489 }, { "epoch": 0.03320036249375376, "grad_norm": 3.107423782348633, "learning_rate": 0.0002608, "loss": 0.8232, "step": 490 }, { "epoch": 0.033268118335577745, "grad_norm": 2.9716622829437256, "learning_rate": 0.00026133333333333336, "loss": 0.7154, "step": 491 }, { "epoch": 0.033335874177401734, "grad_norm": 2.700042963027954, "learning_rate": 0.00026186666666666667, "loss": 0.7477, "step": 492 }, { "epoch": 0.03340363001922572, "grad_norm": 3.002424478530884, "learning_rate": 0.00026240000000000004, "loss": 0.7439, "step": 493 }, { "epoch": 0.03347138586104971, "grad_norm": 2.69582200050354, "learning_rate": 0.00026293333333333335, "loss": 0.8381, "step": 494 }, { "epoch": 0.03353914170287369, "grad_norm": 3.2823925018310547, "learning_rate": 0.00026346666666666666, "loss": 0.8639, "step": 495 }, { "epoch": 0.03360689754469768, "grad_norm": 2.939606189727783, "learning_rate": 0.000264, "loss": 0.9284, "step": 496 }, { "epoch": 0.03367465338652167, "grad_norm": 2.807971954345703, "learning_rate": 0.00026453333333333333, "loss": 0.8527, "step": 497 }, { "epoch": 0.033742409228345656, "grad_norm": 2.946587562561035, "learning_rate": 0.00026506666666666664, "loss": 0.986, "step": 498 }, { "epoch": 0.033810165070169644, "grad_norm": 3.3178935050964355, "learning_rate": 0.0002656, "loss": 1.0663, "step": 499 }, { "epoch": 0.03387792091199363, "grad_norm": 3.8597490787506104, "learning_rate": 0.00026613333333333337, "loss": 1.1641, "step": 500 }, { "epoch": 0.03394567675381762, "grad_norm": 2.5785224437713623, "learning_rate": 0.0002666666666666667, "loss": 0.9571, "step": 501 }, { "epoch": 0.03401343259564161, "grad_norm": 3.227065324783325, "learning_rate": 0.00026720000000000004, "loss": 1.0555, "step": 502 }, { "epoch": 0.03408118843746559, "grad_norm": 2.8175675868988037, "learning_rate": 0.00026773333333333335, "loss": 0.892, "step": 503 }, { "epoch": 0.03414894427928958, "grad_norm": 2.33778977394104, "learning_rate": 0.00026826666666666666, "loss": 0.7826, "step": 504 }, { "epoch": 0.034216700121113566, "grad_norm": 3.1155123710632324, "learning_rate": 0.00026880000000000003, "loss": 1.1612, "step": 505 }, { "epoch": 0.034284455962937554, "grad_norm": 2.7025229930877686, "learning_rate": 0.00026933333333333334, "loss": 0.9171, "step": 506 }, { "epoch": 0.03435221180476154, "grad_norm": 3.002000093460083, "learning_rate": 0.00026986666666666665, "loss": 0.9522, "step": 507 }, { "epoch": 0.03441996764658553, "grad_norm": 2.819808006286621, "learning_rate": 0.0002704, "loss": 0.9387, "step": 508 }, { "epoch": 0.03448772348840952, "grad_norm": 2.7500219345092773, "learning_rate": 0.0002709333333333333, "loss": 1.0149, "step": 509 }, { "epoch": 0.0345554793302335, "grad_norm": 3.1415674686431885, "learning_rate": 0.0002714666666666667, "loss": 0.7711, "step": 510 }, { "epoch": 0.03462323517205749, "grad_norm": 4.178181171417236, "learning_rate": 0.00027200000000000005, "loss": 1.1125, "step": 511 }, { "epoch": 0.03469099101388148, "grad_norm": 3.3421645164489746, "learning_rate": 0.00027253333333333336, "loss": 1.0714, "step": 512 }, { "epoch": 0.034758746855705465, "grad_norm": 3.297851085662842, "learning_rate": 0.00027306666666666667, "loss": 0.9904, "step": 513 }, { "epoch": 0.03482650269752945, "grad_norm": 2.8368825912475586, "learning_rate": 0.00027360000000000004, "loss": 0.7776, "step": 514 }, { "epoch": 0.03489425853935344, "grad_norm": 2.7940316200256348, "learning_rate": 0.00027413333333333335, "loss": 0.8111, "step": 515 }, { "epoch": 0.03496201438117743, "grad_norm": 2.5380403995513916, "learning_rate": 0.00027466666666666666, "loss": 0.7809, "step": 516 }, { "epoch": 0.03502977022300142, "grad_norm": 3.905104398727417, "learning_rate": 0.00027519999999999997, "loss": 1.0114, "step": 517 }, { "epoch": 0.0350975260648254, "grad_norm": 3.1693973541259766, "learning_rate": 0.00027573333333333333, "loss": 0.8502, "step": 518 }, { "epoch": 0.03516528190664939, "grad_norm": 2.3691413402557373, "learning_rate": 0.0002762666666666667, "loss": 0.6939, "step": 519 }, { "epoch": 0.035233037748473375, "grad_norm": 2.954050064086914, "learning_rate": 0.0002768, "loss": 0.8949, "step": 520 }, { "epoch": 0.035300793590297364, "grad_norm": 3.937025547027588, "learning_rate": 0.00027733333333333337, "loss": 1.2839, "step": 521 }, { "epoch": 0.03536854943212135, "grad_norm": 2.931206703186035, "learning_rate": 0.0002778666666666667, "loss": 0.9143, "step": 522 }, { "epoch": 0.03543630527394534, "grad_norm": 4.082540512084961, "learning_rate": 0.0002784, "loss": 0.8048, "step": 523 }, { "epoch": 0.03550406111576933, "grad_norm": 2.4357848167419434, "learning_rate": 0.00027893333333333336, "loss": 0.716, "step": 524 }, { "epoch": 0.03557181695759331, "grad_norm": 2.947275400161743, "learning_rate": 0.00027946666666666667, "loss": 0.897, "step": 525 }, { "epoch": 0.0356395727994173, "grad_norm": 3.021286725997925, "learning_rate": 0.00028, "loss": 1.0273, "step": 526 }, { "epoch": 0.035707328641241286, "grad_norm": 3.121258020401001, "learning_rate": 0.00028053333333333334, "loss": 0.8244, "step": 527 }, { "epoch": 0.035775084483065274, "grad_norm": 2.824312210083008, "learning_rate": 0.0002810666666666667, "loss": 1.058, "step": 528 }, { "epoch": 0.03584284032488926, "grad_norm": 3.4016153812408447, "learning_rate": 0.0002816, "loss": 0.772, "step": 529 }, { "epoch": 0.03591059616671325, "grad_norm": 3.031737804412842, "learning_rate": 0.0002821333333333334, "loss": 0.9401, "step": 530 }, { "epoch": 0.03597835200853724, "grad_norm": 3.0860114097595215, "learning_rate": 0.0002826666666666667, "loss": 0.8193, "step": 531 }, { "epoch": 0.03604610785036123, "grad_norm": 3.2134311199188232, "learning_rate": 0.0002832, "loss": 0.9726, "step": 532 }, { "epoch": 0.03611386369218521, "grad_norm": 2.6056156158447266, "learning_rate": 0.00028373333333333336, "loss": 0.7144, "step": 533 }, { "epoch": 0.036181619534009196, "grad_norm": 3.4816110134124756, "learning_rate": 0.0002842666666666667, "loss": 0.9486, "step": 534 }, { "epoch": 0.036249375375833184, "grad_norm": 2.573964834213257, "learning_rate": 0.0002848, "loss": 0.8565, "step": 535 }, { "epoch": 0.03631713121765717, "grad_norm": 2.908560276031494, "learning_rate": 0.00028533333333333335, "loss": 0.8386, "step": 536 }, { "epoch": 0.03638488705948116, "grad_norm": 2.734622001647949, "learning_rate": 0.00028586666666666666, "loss": 0.8293, "step": 537 }, { "epoch": 0.03645264290130515, "grad_norm": 2.6459169387817383, "learning_rate": 0.0002864, "loss": 0.8053, "step": 538 }, { "epoch": 0.03652039874312914, "grad_norm": 2.2266340255737305, "learning_rate": 0.0002869333333333334, "loss": 0.6746, "step": 539 }, { "epoch": 0.03658815458495312, "grad_norm": 2.785195827484131, "learning_rate": 0.0002874666666666667, "loss": 0.8155, "step": 540 }, { "epoch": 0.03665591042677711, "grad_norm": 3.1835994720458984, "learning_rate": 0.000288, "loss": 0.753, "step": 541 }, { "epoch": 0.036723666268601095, "grad_norm": 2.3835973739624023, "learning_rate": 0.00028853333333333337, "loss": 0.7402, "step": 542 }, { "epoch": 0.03679142211042508, "grad_norm": 3.2223799228668213, "learning_rate": 0.0002890666666666667, "loss": 0.9505, "step": 543 }, { "epoch": 0.03685917795224907, "grad_norm": 3.0398600101470947, "learning_rate": 0.0002896, "loss": 0.8357, "step": 544 }, { "epoch": 0.03692693379407306, "grad_norm": 3.1473746299743652, "learning_rate": 0.00029013333333333336, "loss": 0.8665, "step": 545 }, { "epoch": 0.03699468963589705, "grad_norm": 3.3487610816955566, "learning_rate": 0.00029066666666666667, "loss": 1.2443, "step": 546 }, { "epoch": 0.03706244547772103, "grad_norm": 3.5685651302337646, "learning_rate": 0.00029120000000000003, "loss": 0.8331, "step": 547 }, { "epoch": 0.03713020131954502, "grad_norm": 2.2936601638793945, "learning_rate": 0.00029173333333333334, "loss": 0.7988, "step": 548 }, { "epoch": 0.037197957161369005, "grad_norm": 2.2276930809020996, "learning_rate": 0.0002922666666666667, "loss": 0.7061, "step": 549 }, { "epoch": 0.03726571300319299, "grad_norm": 2.878218650817871, "learning_rate": 0.0002928, "loss": 0.8431, "step": 550 }, { "epoch": 0.03733346884501698, "grad_norm": 2.5694830417633057, "learning_rate": 0.0002933333333333333, "loss": 0.785, "step": 551 }, { "epoch": 0.03740122468684097, "grad_norm": 2.700960397720337, "learning_rate": 0.0002938666666666667, "loss": 0.8431, "step": 552 }, { "epoch": 0.03746898052866496, "grad_norm": 2.5335254669189453, "learning_rate": 0.0002944, "loss": 0.7384, "step": 553 }, { "epoch": 0.037536736370488946, "grad_norm": 3.4916114807128906, "learning_rate": 0.0002949333333333333, "loss": 1.1884, "step": 554 }, { "epoch": 0.03760449221231293, "grad_norm": 2.864866018295288, "learning_rate": 0.0002954666666666667, "loss": 0.7412, "step": 555 }, { "epoch": 0.037672248054136916, "grad_norm": 2.990323305130005, "learning_rate": 0.000296, "loss": 0.9573, "step": 556 }, { "epoch": 0.037740003895960904, "grad_norm": 3.1998960971832275, "learning_rate": 0.00029653333333333335, "loss": 0.6946, "step": 557 }, { "epoch": 0.03780775973778489, "grad_norm": 4.070067405700684, "learning_rate": 0.0002970666666666667, "loss": 1.1749, "step": 558 }, { "epoch": 0.03787551557960888, "grad_norm": 3.5025758743286133, "learning_rate": 0.0002976, "loss": 1.0348, "step": 559 }, { "epoch": 0.03794327142143287, "grad_norm": 3.336334705352783, "learning_rate": 0.00029813333333333333, "loss": 0.8743, "step": 560 }, { "epoch": 0.03801102726325686, "grad_norm": 2.947784662246704, "learning_rate": 0.0002986666666666667, "loss": 0.8129, "step": 561 }, { "epoch": 0.03807878310508084, "grad_norm": 3.5019314289093018, "learning_rate": 0.0002992, "loss": 0.8994, "step": 562 }, { "epoch": 0.038146538946904826, "grad_norm": 5.031900882720947, "learning_rate": 0.0002997333333333333, "loss": 0.8815, "step": 563 }, { "epoch": 0.038214294788728814, "grad_norm": 2.7715864181518555, "learning_rate": 0.0003002666666666667, "loss": 0.9663, "step": 564 }, { "epoch": 0.0382820506305528, "grad_norm": 3.0140392780303955, "learning_rate": 0.0003008, "loss": 1.3123, "step": 565 }, { "epoch": 0.03834980647237679, "grad_norm": 2.718428134918213, "learning_rate": 0.00030133333333333336, "loss": 0.8085, "step": 566 }, { "epoch": 0.03841756231420078, "grad_norm": 3.546281099319458, "learning_rate": 0.0003018666666666667, "loss": 1.1422, "step": 567 }, { "epoch": 0.03848531815602477, "grad_norm": 2.809030771255493, "learning_rate": 0.00030240000000000003, "loss": 0.8929, "step": 568 }, { "epoch": 0.038553073997848755, "grad_norm": 4.255012512207031, "learning_rate": 0.00030293333333333334, "loss": 0.9192, "step": 569 }, { "epoch": 0.038620829839672736, "grad_norm": 4.0748419761657715, "learning_rate": 0.0003034666666666667, "loss": 1.1949, "step": 570 }, { "epoch": 0.038688585681496725, "grad_norm": 3.522765636444092, "learning_rate": 0.000304, "loss": 0.9937, "step": 571 }, { "epoch": 0.03875634152332071, "grad_norm": 3.249178886413574, "learning_rate": 0.0003045333333333333, "loss": 0.9696, "step": 572 }, { "epoch": 0.0388240973651447, "grad_norm": 2.560654401779175, "learning_rate": 0.0003050666666666667, "loss": 0.7643, "step": 573 }, { "epoch": 0.03889185320696869, "grad_norm": 3.2656116485595703, "learning_rate": 0.0003056, "loss": 0.9903, "step": 574 }, { "epoch": 0.03895960904879268, "grad_norm": 3.4029576778411865, "learning_rate": 0.0003061333333333333, "loss": 0.9068, "step": 575 }, { "epoch": 0.039027364890616666, "grad_norm": 2.3622934818267822, "learning_rate": 0.0003066666666666667, "loss": 0.846, "step": 576 }, { "epoch": 0.03909512073244065, "grad_norm": 2.6016721725463867, "learning_rate": 0.00030720000000000004, "loss": 0.8258, "step": 577 }, { "epoch": 0.039162876574264635, "grad_norm": 2.965467929840088, "learning_rate": 0.00030773333333333335, "loss": 0.8572, "step": 578 }, { "epoch": 0.03923063241608862, "grad_norm": 3.1143174171447754, "learning_rate": 0.0003082666666666667, "loss": 1.0713, "step": 579 }, { "epoch": 0.03929838825791261, "grad_norm": 3.061610221862793, "learning_rate": 0.0003088, "loss": 1.0639, "step": 580 }, { "epoch": 0.0393661440997366, "grad_norm": 2.217522382736206, "learning_rate": 0.00030933333333333334, "loss": 0.7877, "step": 581 }, { "epoch": 0.03943389994156059, "grad_norm": 2.861865282058716, "learning_rate": 0.00030986666666666665, "loss": 0.9587, "step": 582 }, { "epoch": 0.039501655783384576, "grad_norm": 2.6877834796905518, "learning_rate": 0.0003104, "loss": 0.8095, "step": 583 }, { "epoch": 0.039569411625208564, "grad_norm": 2.6415841579437256, "learning_rate": 0.0003109333333333333, "loss": 0.829, "step": 584 }, { "epoch": 0.039637167467032546, "grad_norm": 3.292417049407959, "learning_rate": 0.0003114666666666667, "loss": 1.1176, "step": 585 }, { "epoch": 0.039704923308856534, "grad_norm": 3.163455009460449, "learning_rate": 0.00031200000000000005, "loss": 1.0602, "step": 586 }, { "epoch": 0.03977267915068052, "grad_norm": 2.049830675125122, "learning_rate": 0.00031253333333333336, "loss": 0.7819, "step": 587 }, { "epoch": 0.03984043499250451, "grad_norm": 3.6346240043640137, "learning_rate": 0.00031306666666666667, "loss": 0.9563, "step": 588 }, { "epoch": 0.0399081908343285, "grad_norm": 3.3403074741363525, "learning_rate": 0.00031360000000000003, "loss": 0.9137, "step": 589 }, { "epoch": 0.039975946676152486, "grad_norm": 3.304730176925659, "learning_rate": 0.00031413333333333334, "loss": 0.7727, "step": 590 }, { "epoch": 0.040043702517976475, "grad_norm": 2.6178839206695557, "learning_rate": 0.00031466666666666665, "loss": 0.8185, "step": 591 }, { "epoch": 0.040111458359800456, "grad_norm": 2.526662826538086, "learning_rate": 0.0003152, "loss": 0.9307, "step": 592 }, { "epoch": 0.040179214201624444, "grad_norm": 2.9365689754486084, "learning_rate": 0.00031573333333333333, "loss": 1.0056, "step": 593 }, { "epoch": 0.04024697004344843, "grad_norm": 30.42789649963379, "learning_rate": 0.0003162666666666667, "loss": 1.089, "step": 594 }, { "epoch": 0.04031472588527242, "grad_norm": 2.970416784286499, "learning_rate": 0.00031680000000000006, "loss": 0.9169, "step": 595 }, { "epoch": 0.04038248172709641, "grad_norm": 3.2144784927368164, "learning_rate": 0.00031733333333333337, "loss": 1.0329, "step": 596 }, { "epoch": 0.0404502375689204, "grad_norm": 3.707195281982422, "learning_rate": 0.0003178666666666667, "loss": 0.9338, "step": 597 }, { "epoch": 0.040517993410744385, "grad_norm": 3.0473079681396484, "learning_rate": 0.00031840000000000004, "loss": 0.9199, "step": 598 }, { "epoch": 0.04058574925256837, "grad_norm": 2.6992573738098145, "learning_rate": 0.00031893333333333335, "loss": 0.7692, "step": 599 }, { "epoch": 0.040653505094392355, "grad_norm": 3.3280906677246094, "learning_rate": 0.00031946666666666666, "loss": 1.0642, "step": 600 }, { "epoch": 0.04072126093621634, "grad_norm": 3.3712239265441895, "learning_rate": 0.00032, "loss": 1.0029, "step": 601 }, { "epoch": 0.04078901677804033, "grad_norm": 3.070742130279541, "learning_rate": 0.00032053333333333334, "loss": 0.9725, "step": 602 }, { "epoch": 0.04085677261986432, "grad_norm": 2.912259578704834, "learning_rate": 0.00032106666666666665, "loss": 1.0389, "step": 603 }, { "epoch": 0.04092452846168831, "grad_norm": 2.375462532043457, "learning_rate": 0.0003216, "loss": 0.8011, "step": 604 }, { "epoch": 0.040992284303512296, "grad_norm": 3.4855964183807373, "learning_rate": 0.0003221333333333334, "loss": 0.9889, "step": 605 }, { "epoch": 0.041060040145336284, "grad_norm": 2.7019526958465576, "learning_rate": 0.0003226666666666667, "loss": 0.818, "step": 606 }, { "epoch": 0.041127795987160265, "grad_norm": 10.730135917663574, "learning_rate": 0.00032320000000000005, "loss": 0.7862, "step": 607 }, { "epoch": 0.04119555182898425, "grad_norm": 2.9470598697662354, "learning_rate": 0.00032373333333333336, "loss": 0.841, "step": 608 }, { "epoch": 0.04126330767080824, "grad_norm": 3.629456043243408, "learning_rate": 0.00032426666666666667, "loss": 0.8935, "step": 609 }, { "epoch": 0.04133106351263223, "grad_norm": 2.7942190170288086, "learning_rate": 0.00032480000000000003, "loss": 0.8737, "step": 610 }, { "epoch": 0.04139881935445622, "grad_norm": 3.5281028747558594, "learning_rate": 0.00032533333333333334, "loss": 0.7731, "step": 611 }, { "epoch": 0.041466575196280206, "grad_norm": 2.6879961490631104, "learning_rate": 0.00032586666666666665, "loss": 0.7488, "step": 612 }, { "epoch": 0.041534331038104194, "grad_norm": 2.6562204360961914, "learning_rate": 0.0003264, "loss": 0.8302, "step": 613 }, { "epoch": 0.041602086879928175, "grad_norm": 2.436709403991699, "learning_rate": 0.0003269333333333334, "loss": 0.6787, "step": 614 }, { "epoch": 0.041669842721752164, "grad_norm": 3.0573251247406006, "learning_rate": 0.0003274666666666667, "loss": 0.897, "step": 615 }, { "epoch": 0.04173759856357615, "grad_norm": 3.079270839691162, "learning_rate": 0.000328, "loss": 0.7824, "step": 616 }, { "epoch": 0.04180535440540014, "grad_norm": 2.557379961013794, "learning_rate": 0.00032853333333333337, "loss": 0.9262, "step": 617 }, { "epoch": 0.04187311024722413, "grad_norm": 2.3489010334014893, "learning_rate": 0.0003290666666666667, "loss": 0.7791, "step": 618 }, { "epoch": 0.041940866089048116, "grad_norm": 3.7144384384155273, "learning_rate": 0.0003296, "loss": 1.1106, "step": 619 }, { "epoch": 0.042008621930872105, "grad_norm": 3.401601552963257, "learning_rate": 0.00033013333333333335, "loss": 0.8669, "step": 620 }, { "epoch": 0.04207637777269609, "grad_norm": 2.546100616455078, "learning_rate": 0.00033066666666666666, "loss": 0.8754, "step": 621 }, { "epoch": 0.042144133614520074, "grad_norm": 2.6193525791168213, "learning_rate": 0.0003312, "loss": 0.7717, "step": 622 }, { "epoch": 0.04221188945634406, "grad_norm": 4.521993160247803, "learning_rate": 0.00033173333333333334, "loss": 0.9104, "step": 623 }, { "epoch": 0.04227964529816805, "grad_norm": 2.9595706462860107, "learning_rate": 0.0003322666666666667, "loss": 0.957, "step": 624 }, { "epoch": 0.04234740113999204, "grad_norm": 4.649613857269287, "learning_rate": 0.0003328, "loss": 1.0525, "step": 625 }, { "epoch": 0.04241515698181603, "grad_norm": 3.1371946334838867, "learning_rate": 0.0003333333333333334, "loss": 0.7659, "step": 626 }, { "epoch": 0.042482912823640015, "grad_norm": 2.692748785018921, "learning_rate": 0.0003338666666666667, "loss": 0.8026, "step": 627 }, { "epoch": 0.042550668665464, "grad_norm": 2.395095109939575, "learning_rate": 0.0003344, "loss": 0.78, "step": 628 }, { "epoch": 0.042618424507287984, "grad_norm": 2.9649155139923096, "learning_rate": 0.00033493333333333336, "loss": 0.8616, "step": 629 }, { "epoch": 0.04268618034911197, "grad_norm": 2.880617618560791, "learning_rate": 0.00033546666666666667, "loss": 0.8346, "step": 630 }, { "epoch": 0.04275393619093596, "grad_norm": 3.5783746242523193, "learning_rate": 0.000336, "loss": 0.9194, "step": 631 }, { "epoch": 0.04282169203275995, "grad_norm": 3.871279239654541, "learning_rate": 0.00033653333333333335, "loss": 1.0733, "step": 632 }, { "epoch": 0.04288944787458394, "grad_norm": 4.584722995758057, "learning_rate": 0.0003370666666666667, "loss": 1.0333, "step": 633 }, { "epoch": 0.042957203716407925, "grad_norm": 2.96329402923584, "learning_rate": 0.0003376, "loss": 0.9121, "step": 634 }, { "epoch": 0.043024959558231914, "grad_norm": 3.54400634765625, "learning_rate": 0.0003381333333333334, "loss": 1.0999, "step": 635 }, { "epoch": 0.0430927154000559, "grad_norm": 3.631817579269409, "learning_rate": 0.0003386666666666667, "loss": 0.8852, "step": 636 }, { "epoch": 0.04316047124187988, "grad_norm": 2.812607765197754, "learning_rate": 0.0003392, "loss": 1.0069, "step": 637 }, { "epoch": 0.04322822708370387, "grad_norm": 2.5128774642944336, "learning_rate": 0.00033973333333333337, "loss": 0.8595, "step": 638 }, { "epoch": 0.04329598292552786, "grad_norm": 3.2381293773651123, "learning_rate": 0.0003402666666666667, "loss": 0.8818, "step": 639 }, { "epoch": 0.04336373876735185, "grad_norm": 3.136335611343384, "learning_rate": 0.0003408, "loss": 1.0274, "step": 640 }, { "epoch": 0.043431494609175836, "grad_norm": 3.9792425632476807, "learning_rate": 0.00034133333333333335, "loss": 1.0939, "step": 641 }, { "epoch": 0.043499250450999824, "grad_norm": 3.2825241088867188, "learning_rate": 0.00034186666666666666, "loss": 1.1231, "step": 642 }, { "epoch": 0.04356700629282381, "grad_norm": 3.5767383575439453, "learning_rate": 0.00034240000000000003, "loss": 0.9208, "step": 643 }, { "epoch": 0.04363476213464779, "grad_norm": 3.1160783767700195, "learning_rate": 0.00034293333333333334, "loss": 1.0849, "step": 644 }, { "epoch": 0.04370251797647178, "grad_norm": 3.11011004447937, "learning_rate": 0.0003434666666666667, "loss": 0.9525, "step": 645 }, { "epoch": 0.04377027381829577, "grad_norm": 3.019231081008911, "learning_rate": 0.000344, "loss": 0.8811, "step": 646 }, { "epoch": 0.04383802966011976, "grad_norm": 4.965334892272949, "learning_rate": 0.0003445333333333333, "loss": 1.012, "step": 647 }, { "epoch": 0.043905785501943746, "grad_norm": 3.4868407249450684, "learning_rate": 0.0003450666666666667, "loss": 0.9993, "step": 648 }, { "epoch": 0.043973541343767734, "grad_norm": 3.9563772678375244, "learning_rate": 0.0003456, "loss": 1.0098, "step": 649 }, { "epoch": 0.04404129718559172, "grad_norm": 2.7158477306365967, "learning_rate": 0.0003461333333333333, "loss": 0.7081, "step": 650 }, { "epoch": 0.04410905302741571, "grad_norm": 4.0581512451171875, "learning_rate": 0.00034666666666666667, "loss": 1.1142, "step": 651 }, { "epoch": 0.04417680886923969, "grad_norm": 2.7889671325683594, "learning_rate": 0.00034720000000000004, "loss": 0.7659, "step": 652 }, { "epoch": 0.04424456471106368, "grad_norm": 3.3274965286254883, "learning_rate": 0.00034773333333333335, "loss": 0.9495, "step": 653 }, { "epoch": 0.04431232055288767, "grad_norm": 3.3760342597961426, "learning_rate": 0.0003482666666666667, "loss": 0.9421, "step": 654 }, { "epoch": 0.04438007639471166, "grad_norm": 2.580782651901245, "learning_rate": 0.0003488, "loss": 0.7386, "step": 655 }, { "epoch": 0.044447832236535645, "grad_norm": 2.6320087909698486, "learning_rate": 0.00034933333333333333, "loss": 0.838, "step": 656 }, { "epoch": 0.04451558807835963, "grad_norm": 3.1689820289611816, "learning_rate": 0.0003498666666666667, "loss": 0.9038, "step": 657 }, { "epoch": 0.04458334392018362, "grad_norm": 3.744007110595703, "learning_rate": 0.0003504, "loss": 1.0123, "step": 658 }, { "epoch": 0.0446510997620076, "grad_norm": 3.1133198738098145, "learning_rate": 0.0003509333333333333, "loss": 0.8086, "step": 659 }, { "epoch": 0.04471885560383159, "grad_norm": 3.892974376678467, "learning_rate": 0.0003514666666666667, "loss": 1.0866, "step": 660 }, { "epoch": 0.04478661144565558, "grad_norm": 3.008099317550659, "learning_rate": 0.00035200000000000005, "loss": 0.9656, "step": 661 }, { "epoch": 0.04485436728747957, "grad_norm": 2.807969808578491, "learning_rate": 0.00035253333333333336, "loss": 0.7714, "step": 662 }, { "epoch": 0.044922123129303555, "grad_norm": 3.3305890560150146, "learning_rate": 0.0003530666666666667, "loss": 1.0984, "step": 663 }, { "epoch": 0.044989878971127543, "grad_norm": 3.3619790077209473, "learning_rate": 0.00035360000000000003, "loss": 0.9861, "step": 664 }, { "epoch": 0.04505763481295153, "grad_norm": 3.570796251296997, "learning_rate": 0.00035413333333333334, "loss": 1.1391, "step": 665 }, { "epoch": 0.04512539065477552, "grad_norm": 3.096438407897949, "learning_rate": 0.0003546666666666667, "loss": 1.2258, "step": 666 }, { "epoch": 0.0451931464965995, "grad_norm": 2.5740082263946533, "learning_rate": 0.0003552, "loss": 0.8846, "step": 667 }, { "epoch": 0.04526090233842349, "grad_norm": 3.066537380218506, "learning_rate": 0.0003557333333333333, "loss": 0.855, "step": 668 }, { "epoch": 0.04532865818024748, "grad_norm": 3.297196388244629, "learning_rate": 0.0003562666666666667, "loss": 1.1191, "step": 669 }, { "epoch": 0.045396414022071466, "grad_norm": 3.025925874710083, "learning_rate": 0.0003568, "loss": 0.8708, "step": 670 }, { "epoch": 0.045464169863895454, "grad_norm": 2.9280247688293457, "learning_rate": 0.00035733333333333336, "loss": 0.965, "step": 671 }, { "epoch": 0.04553192570571944, "grad_norm": 3.341310739517212, "learning_rate": 0.00035786666666666673, "loss": 1.0948, "step": 672 }, { "epoch": 0.04559968154754343, "grad_norm": 3.2387642860412598, "learning_rate": 0.00035840000000000004, "loss": 0.9062, "step": 673 }, { "epoch": 0.04566743738936741, "grad_norm": 3.0945987701416016, "learning_rate": 0.00035893333333333335, "loss": 0.9888, "step": 674 }, { "epoch": 0.0457351932311914, "grad_norm": 3.4746453762054443, "learning_rate": 0.00035946666666666666, "loss": 0.9801, "step": 675 }, { "epoch": 0.04580294907301539, "grad_norm": 3.2266781330108643, "learning_rate": 0.00036, "loss": 0.9089, "step": 676 }, { "epoch": 0.045870704914839376, "grad_norm": 3.4057743549346924, "learning_rate": 0.00036053333333333333, "loss": 0.9321, "step": 677 }, { "epoch": 0.045938460756663364, "grad_norm": 5.1262617111206055, "learning_rate": 0.00036106666666666664, "loss": 1.0046, "step": 678 }, { "epoch": 0.04600621659848735, "grad_norm": 3.4456918239593506, "learning_rate": 0.0003616, "loss": 0.9374, "step": 679 }, { "epoch": 0.04607397244031134, "grad_norm": 4.373721122741699, "learning_rate": 0.00036213333333333337, "loss": 1.0569, "step": 680 }, { "epoch": 0.04614172828213532, "grad_norm": 4.362050533294678, "learning_rate": 0.0003626666666666667, "loss": 0.9526, "step": 681 }, { "epoch": 0.04620948412395931, "grad_norm": 3.5073964595794678, "learning_rate": 0.00036320000000000005, "loss": 1.0211, "step": 682 }, { "epoch": 0.0462772399657833, "grad_norm": 3.259704351425171, "learning_rate": 0.00036373333333333336, "loss": 0.7831, "step": 683 }, { "epoch": 0.04634499580760729, "grad_norm": 2.681959867477417, "learning_rate": 0.00036426666666666667, "loss": 0.8421, "step": 684 }, { "epoch": 0.046412751649431275, "grad_norm": 4.008633136749268, "learning_rate": 0.00036480000000000003, "loss": 1.0095, "step": 685 }, { "epoch": 0.04648050749125526, "grad_norm": 3.215111494064331, "learning_rate": 0.00036533333333333334, "loss": 0.7915, "step": 686 }, { "epoch": 0.04654826333307925, "grad_norm": 3.1472980976104736, "learning_rate": 0.00036586666666666665, "loss": 0.8681, "step": 687 }, { "epoch": 0.04661601917490324, "grad_norm": 3.7159383296966553, "learning_rate": 0.0003664, "loss": 1.2919, "step": 688 }, { "epoch": 0.04668377501672722, "grad_norm": 2.898164749145508, "learning_rate": 0.0003669333333333333, "loss": 0.6529, "step": 689 }, { "epoch": 0.04675153085855121, "grad_norm": 3.1191375255584717, "learning_rate": 0.0003674666666666667, "loss": 1.119, "step": 690 }, { "epoch": 0.0468192867003752, "grad_norm": 3.1812002658843994, "learning_rate": 0.00036800000000000005, "loss": 1.1914, "step": 691 }, { "epoch": 0.046887042542199185, "grad_norm": 4.197123050689697, "learning_rate": 0.00036853333333333336, "loss": 1.2257, "step": 692 }, { "epoch": 0.04695479838402317, "grad_norm": 2.880222797393799, "learning_rate": 0.0003690666666666667, "loss": 0.7928, "step": 693 }, { "epoch": 0.04702255422584716, "grad_norm": 3.1386241912841797, "learning_rate": 0.00036960000000000004, "loss": 0.9633, "step": 694 }, { "epoch": 0.04709031006767115, "grad_norm": 2.7785093784332275, "learning_rate": 0.00037013333333333335, "loss": 0.7293, "step": 695 }, { "epoch": 0.04715806590949513, "grad_norm": 3.5515894889831543, "learning_rate": 0.00037066666666666666, "loss": 0.8559, "step": 696 }, { "epoch": 0.04722582175131912, "grad_norm": 3.512742519378662, "learning_rate": 0.0003712, "loss": 0.8807, "step": 697 }, { "epoch": 0.04729357759314311, "grad_norm": 3.525923728942871, "learning_rate": 0.00037173333333333333, "loss": 0.8417, "step": 698 }, { "epoch": 0.047361333434967096, "grad_norm": 4.317751884460449, "learning_rate": 0.0003722666666666667, "loss": 1.134, "step": 699 }, { "epoch": 0.047429089276791084, "grad_norm": 3.909864664077759, "learning_rate": 0.00037280000000000006, "loss": 1.0501, "step": 700 }, { "epoch": 0.04749684511861507, "grad_norm": 3.746986150741577, "learning_rate": 0.0003733333333333334, "loss": 0.8394, "step": 701 }, { "epoch": 0.04756460096043906, "grad_norm": 2.4710848331451416, "learning_rate": 0.0003738666666666667, "loss": 0.7526, "step": 702 }, { "epoch": 0.04763235680226305, "grad_norm": 4.529815673828125, "learning_rate": 0.00037440000000000005, "loss": 1.1123, "step": 703 }, { "epoch": 0.04770011264408703, "grad_norm": 3.030824899673462, "learning_rate": 0.00037493333333333336, "loss": 0.8696, "step": 704 }, { "epoch": 0.04776786848591102, "grad_norm": 5.741715431213379, "learning_rate": 0.00037546666666666667, "loss": 0.7314, "step": 705 }, { "epoch": 0.047835624327735006, "grad_norm": 3.3223133087158203, "learning_rate": 0.000376, "loss": 0.8842, "step": 706 }, { "epoch": 0.047903380169558994, "grad_norm": 3.0566413402557373, "learning_rate": 0.00037653333333333334, "loss": 0.7958, "step": 707 }, { "epoch": 0.04797113601138298, "grad_norm": 6.268533706665039, "learning_rate": 0.00037706666666666665, "loss": 0.9317, "step": 708 }, { "epoch": 0.04803889185320697, "grad_norm": 3.068249464035034, "learning_rate": 0.0003776, "loss": 0.7673, "step": 709 }, { "epoch": 0.04810664769503096, "grad_norm": 2.9452309608459473, "learning_rate": 0.0003781333333333334, "loss": 0.8689, "step": 710 }, { "epoch": 0.04817440353685494, "grad_norm": 2.7499923706054688, "learning_rate": 0.0003786666666666667, "loss": 0.8073, "step": 711 }, { "epoch": 0.04824215937867893, "grad_norm": 3.541820764541626, "learning_rate": 0.0003792, "loss": 1.0404, "step": 712 }, { "epoch": 0.048309915220502916, "grad_norm": 2.7944793701171875, "learning_rate": 0.00037973333333333337, "loss": 0.8429, "step": 713 }, { "epoch": 0.048377671062326905, "grad_norm": 3.4795570373535156, "learning_rate": 0.0003802666666666667, "loss": 1.1089, "step": 714 }, { "epoch": 0.04844542690415089, "grad_norm": 4.470477104187012, "learning_rate": 0.0003808, "loss": 1.1987, "step": 715 }, { "epoch": 0.04851318274597488, "grad_norm": 3.7686710357666016, "learning_rate": 0.00038133333333333335, "loss": 0.9905, "step": 716 }, { "epoch": 0.04858093858779887, "grad_norm": 3.4814260005950928, "learning_rate": 0.00038186666666666666, "loss": 1.0501, "step": 717 }, { "epoch": 0.04864869442962286, "grad_norm": 2.9690535068511963, "learning_rate": 0.0003824, "loss": 0.8655, "step": 718 }, { "epoch": 0.04871645027144684, "grad_norm": 3.0018696784973145, "learning_rate": 0.0003829333333333334, "loss": 0.9617, "step": 719 }, { "epoch": 0.04878420611327083, "grad_norm": 2.861053466796875, "learning_rate": 0.0003834666666666667, "loss": 0.8903, "step": 720 }, { "epoch": 0.048851961955094815, "grad_norm": 3.7595620155334473, "learning_rate": 0.000384, "loss": 0.9875, "step": 721 }, { "epoch": 0.0489197177969188, "grad_norm": 2.907060146331787, "learning_rate": 0.0003845333333333334, "loss": 0.7723, "step": 722 }, { "epoch": 0.04898747363874279, "grad_norm": 4.519503593444824, "learning_rate": 0.0003850666666666667, "loss": 0.8526, "step": 723 }, { "epoch": 0.04905522948056678, "grad_norm": 3.334078788757324, "learning_rate": 0.0003856, "loss": 0.9351, "step": 724 }, { "epoch": 0.04912298532239077, "grad_norm": 3.5368268489837646, "learning_rate": 0.00038613333333333336, "loss": 0.8089, "step": 725 }, { "epoch": 0.04919074116421475, "grad_norm": 4.235016345977783, "learning_rate": 0.00038666666666666667, "loss": 0.8744, "step": 726 }, { "epoch": 0.04925849700603874, "grad_norm": 2.5189549922943115, "learning_rate": 0.00038720000000000003, "loss": 0.6836, "step": 727 }, { "epoch": 0.049326252847862725, "grad_norm": 3.3870596885681152, "learning_rate": 0.0003877333333333334, "loss": 0.9274, "step": 728 }, { "epoch": 0.049394008689686714, "grad_norm": 4.001610279083252, "learning_rate": 0.0003882666666666667, "loss": 1.0687, "step": 729 }, { "epoch": 0.0494617645315107, "grad_norm": 3.3768110275268555, "learning_rate": 0.0003888, "loss": 0.9495, "step": 730 }, { "epoch": 0.04952952037333469, "grad_norm": 2.8878121376037598, "learning_rate": 0.0003893333333333334, "loss": 0.8092, "step": 731 }, { "epoch": 0.04959727621515868, "grad_norm": 3.6729800701141357, "learning_rate": 0.0003898666666666667, "loss": 0.9209, "step": 732 }, { "epoch": 0.049665032056982666, "grad_norm": 2.899528980255127, "learning_rate": 0.0003904, "loss": 0.896, "step": 733 }, { "epoch": 0.04973278789880665, "grad_norm": 2.9886481761932373, "learning_rate": 0.00039093333333333337, "loss": 0.9277, "step": 734 }, { "epoch": 0.049800543740630636, "grad_norm": 3.924001693725586, "learning_rate": 0.0003914666666666667, "loss": 1.131, "step": 735 }, { "epoch": 0.049868299582454624, "grad_norm": 2.929126262664795, "learning_rate": 0.000392, "loss": 0.8613, "step": 736 }, { "epoch": 0.04993605542427861, "grad_norm": 3.5196645259857178, "learning_rate": 0.00039253333333333335, "loss": 1.0517, "step": 737 }, { "epoch": 0.0500038112661026, "grad_norm": 5.8890485763549805, "learning_rate": 0.0003930666666666667, "loss": 0.8777, "step": 738 }, { "epoch": 0.05007156710792659, "grad_norm": 11.812308311462402, "learning_rate": 0.0003936, "loss": 3.0905, "step": 739 }, { "epoch": 0.05013932294975058, "grad_norm": 17.958906173706055, "learning_rate": 0.00039413333333333334, "loss": 2.6268, "step": 740 }, { "epoch": 0.05020707879157456, "grad_norm": 600.3106079101562, "learning_rate": 0.0003946666666666667, "loss": 7.0879, "step": 741 }, { "epoch": 0.050274834633398546, "grad_norm": 84.66808319091797, "learning_rate": 0.0003952, "loss": 12.546, "step": 742 }, { "epoch": 0.050342590475222534, "grad_norm": 16.59645652770996, "learning_rate": 0.0003957333333333333, "loss": 10.5624, "step": 743 }, { "epoch": 0.05041034631704652, "grad_norm": 8.208555221557617, "learning_rate": 0.0003962666666666667, "loss": 10.2705, "step": 744 }, { "epoch": 0.05047810215887051, "grad_norm": 6.307867527008057, "learning_rate": 0.0003968, "loss": 9.8993, "step": 745 }, { "epoch": 0.0505458580006945, "grad_norm": 8.820687294006348, "learning_rate": 0.00039733333333333336, "loss": 9.9412, "step": 746 }, { "epoch": 0.05061361384251849, "grad_norm": 2.205470085144043, "learning_rate": 0.0003978666666666667, "loss": 9.9789, "step": 747 }, { "epoch": 0.05068136968434247, "grad_norm": 4.151752471923828, "learning_rate": 0.00039840000000000003, "loss": 9.6283, "step": 748 }, { "epoch": 0.05074912552616646, "grad_norm": 3.4692091941833496, "learning_rate": 0.00039893333333333334, "loss": 9.2068, "step": 749 }, { "epoch": 0.050816881367990445, "grad_norm": 6.643427848815918, "learning_rate": 0.0003994666666666667, "loss": 9.306, "step": 750 }, { "epoch": 0.05088463720981443, "grad_norm": 2.035916805267334, "learning_rate": 0.0004, "loss": 9.2433, "step": 751 }, { "epoch": 0.05095239305163842, "grad_norm": 3.0338926315307617, "learning_rate": 0.0003999945235487404, "loss": 8.8081, "step": 752 }, { "epoch": 0.05102014889346241, "grad_norm": 4.1774983406066895, "learning_rate": 0.0003999890470974808, "loss": 7.834, "step": 753 }, { "epoch": 0.0510879047352864, "grad_norm": 10.138450622558594, "learning_rate": 0.00039998357064622127, "loss": 8.4914, "step": 754 }, { "epoch": 0.051155660577110386, "grad_norm": 6.8651442527771, "learning_rate": 0.0003999780941949617, "loss": 8.3133, "step": 755 }, { "epoch": 0.05122341641893437, "grad_norm": 6.002278804779053, "learning_rate": 0.0003999726177437021, "loss": 7.6195, "step": 756 }, { "epoch": 0.051291172260758355, "grad_norm": 8.576353073120117, "learning_rate": 0.0003999671412924425, "loss": 6.9921, "step": 757 }, { "epoch": 0.051358928102582344, "grad_norm": 4.290818214416504, "learning_rate": 0.0003999616648411829, "loss": 6.7199, "step": 758 }, { "epoch": 0.05142668394440633, "grad_norm": 4.6634602546691895, "learning_rate": 0.0003999561883899233, "loss": 7.2129, "step": 759 }, { "epoch": 0.05149443978623032, "grad_norm": 2.2393553256988525, "learning_rate": 0.0003999507119386638, "loss": 6.3264, "step": 760 }, { "epoch": 0.05156219562805431, "grad_norm": 8.81263256072998, "learning_rate": 0.0003999452354874042, "loss": 6.212, "step": 761 }, { "epoch": 0.051629951469878296, "grad_norm": 4.736633777618408, "learning_rate": 0.0003999397590361446, "loss": 6.3428, "step": 762 }, { "epoch": 0.05169770731170228, "grad_norm": 4.971992015838623, "learning_rate": 0.000399934282584885, "loss": 5.8014, "step": 763 }, { "epoch": 0.051765463153526266, "grad_norm": 38.19525909423828, "learning_rate": 0.0003999288061336254, "loss": 6.3546, "step": 764 }, { "epoch": 0.051833218995350254, "grad_norm": 23.310277938842773, "learning_rate": 0.0003999233296823659, "loss": 6.5051, "step": 765 }, { "epoch": 0.05190097483717424, "grad_norm": 13.49642562866211, "learning_rate": 0.0003999178532311063, "loss": 5.7665, "step": 766 }, { "epoch": 0.05196873067899823, "grad_norm": 2.8793632984161377, "learning_rate": 0.0003999123767798467, "loss": 6.0075, "step": 767 }, { "epoch": 0.05203648652082222, "grad_norm": 3.381035327911377, "learning_rate": 0.0003999069003285871, "loss": 5.3249, "step": 768 }, { "epoch": 0.05210424236264621, "grad_norm": 6.088931560516357, "learning_rate": 0.0003999014238773275, "loss": 5.4656, "step": 769 }, { "epoch": 0.052171998204470195, "grad_norm": 3.5593619346618652, "learning_rate": 0.00039989594742606793, "loss": 5.5078, "step": 770 }, { "epoch": 0.052239754046294176, "grad_norm": 2.1775152683258057, "learning_rate": 0.0003998904709748084, "loss": 5.5093, "step": 771 }, { "epoch": 0.052307509888118164, "grad_norm": 7.67357063293457, "learning_rate": 0.0003998849945235488, "loss": 5.1376, "step": 772 }, { "epoch": 0.05237526572994215, "grad_norm": 3.736935615539551, "learning_rate": 0.0003998795180722892, "loss": 5.2221, "step": 773 }, { "epoch": 0.05244302157176614, "grad_norm": 1.1449586153030396, "learning_rate": 0.0003998740416210296, "loss": 5.4248, "step": 774 }, { "epoch": 0.05251077741359013, "grad_norm": 1.7828917503356934, "learning_rate": 0.00039986856516977, "loss": 4.9743, "step": 775 }, { "epoch": 0.05257853325541412, "grad_norm": 1.9232449531555176, "learning_rate": 0.00039986308871851043, "loss": 4.9526, "step": 776 }, { "epoch": 0.052646289097238105, "grad_norm": 0.8622642159461975, "learning_rate": 0.00039985761226725083, "loss": 5.24, "step": 777 }, { "epoch": 0.05271404493906209, "grad_norm": 1.9289792776107788, "learning_rate": 0.00039985213581599123, "loss": 4.9224, "step": 778 }, { "epoch": 0.052781800780886075, "grad_norm": 1.022391438484192, "learning_rate": 0.0003998466593647317, "loss": 4.7525, "step": 779 }, { "epoch": 0.05284955662271006, "grad_norm": 1.4984608888626099, "learning_rate": 0.0003998411829134721, "loss": 4.667, "step": 780 }, { "epoch": 0.05291731246453405, "grad_norm": 4.3209662437438965, "learning_rate": 0.00039983570646221254, "loss": 4.692, "step": 781 }, { "epoch": 0.05298506830635804, "grad_norm": 1.600207805633545, "learning_rate": 0.00039983023001095294, "loss": 4.6204, "step": 782 }, { "epoch": 0.05305282414818203, "grad_norm": 2.554429531097412, "learning_rate": 0.00039982475355969334, "loss": 4.5914, "step": 783 }, { "epoch": 0.053120579990006016, "grad_norm": 2.0773489475250244, "learning_rate": 0.00039981927710843374, "loss": 4.6497, "step": 784 }, { "epoch": 0.053188335831830004, "grad_norm": 2.5732383728027344, "learning_rate": 0.00039981380065717413, "loss": 4.8684, "step": 785 }, { "epoch": 0.053256091673653985, "grad_norm": 1.5930901765823364, "learning_rate": 0.0003998083242059146, "loss": 4.7589, "step": 786 }, { "epoch": 0.05332384751547797, "grad_norm": 1.8856751918792725, "learning_rate": 0.00039980284775465504, "loss": 4.7605, "step": 787 }, { "epoch": 0.05339160335730196, "grad_norm": 19.68305778503418, "learning_rate": 0.00039979737130339544, "loss": 4.9804, "step": 788 }, { "epoch": 0.05345935919912595, "grad_norm": 2.6940083503723145, "learning_rate": 0.00039979189485213584, "loss": 4.4454, "step": 789 }, { "epoch": 0.05352711504094994, "grad_norm": 45.97073745727539, "learning_rate": 0.00039978641840087624, "loss": 4.7939, "step": 790 }, { "epoch": 0.053594870882773926, "grad_norm": 10.410804748535156, "learning_rate": 0.00039978094194961664, "loss": 4.5857, "step": 791 }, { "epoch": 0.053662626724597914, "grad_norm": 23.36691665649414, "learning_rate": 0.0003997754654983571, "loss": 4.3369, "step": 792 }, { "epoch": 0.053730382566421896, "grad_norm": 1.963236927986145, "learning_rate": 0.0003997699890470975, "loss": 4.6738, "step": 793 }, { "epoch": 0.053798138408245884, "grad_norm": 1.1950116157531738, "learning_rate": 0.00039976451259583794, "loss": 4.8778, "step": 794 }, { "epoch": 0.05386589425006987, "grad_norm": 81.23480987548828, "learning_rate": 0.00039975903614457834, "loss": 4.7837, "step": 795 }, { "epoch": 0.05393365009189386, "grad_norm": 5.290766716003418, "learning_rate": 0.00039975355969331874, "loss": 4.506, "step": 796 }, { "epoch": 0.05400140593371785, "grad_norm": 3.2260262966156006, "learning_rate": 0.00039974808324205914, "loss": 4.3442, "step": 797 }, { "epoch": 0.05406916177554184, "grad_norm": 1.7140204906463623, "learning_rate": 0.0003997426067907996, "loss": 4.2865, "step": 798 }, { "epoch": 0.054136917617365825, "grad_norm": 3.7086181640625, "learning_rate": 0.00039973713033954, "loss": 4.4939, "step": 799 }, { "epoch": 0.054204673459189806, "grad_norm": 2.6034536361694336, "learning_rate": 0.0003997316538882804, "loss": 4.5271, "step": 800 }, { "epoch": 0.054272429301013794, "grad_norm": 1.5339417457580566, "learning_rate": 0.0003997261774370208, "loss": 4.5487, "step": 801 }, { "epoch": 0.05434018514283778, "grad_norm": 0.9662337303161621, "learning_rate": 0.00039972070098576125, "loss": 4.4613, "step": 802 }, { "epoch": 0.05440794098466177, "grad_norm": 4.066468715667725, "learning_rate": 0.0003997152245345017, "loss": 4.5914, "step": 803 }, { "epoch": 0.05447569682648576, "grad_norm": 1.0601027011871338, "learning_rate": 0.0003997097480832421, "loss": 4.7598, "step": 804 }, { "epoch": 0.05454345266830975, "grad_norm": 0.9377830624580383, "learning_rate": 0.0003997042716319825, "loss": 4.8, "step": 805 }, { "epoch": 0.054611208510133735, "grad_norm": 1.1871048212051392, "learning_rate": 0.0003996987951807229, "loss": 4.7096, "step": 806 }, { "epoch": 0.05467896435195772, "grad_norm": 21.85776138305664, "learning_rate": 0.0003996933187294633, "loss": 5.3304, "step": 807 }, { "epoch": 0.054746720193781705, "grad_norm": 1.4815789461135864, "learning_rate": 0.00039968784227820375, "loss": 4.552, "step": 808 }, { "epoch": 0.05481447603560569, "grad_norm": 2.6075069904327393, "learning_rate": 0.00039968236582694415, "loss": 4.4726, "step": 809 }, { "epoch": 0.05488223187742968, "grad_norm": 2.40385365486145, "learning_rate": 0.0003996768893756846, "loss": 4.5902, "step": 810 }, { "epoch": 0.05494998771925367, "grad_norm": 0.9601931571960449, "learning_rate": 0.000399671412924425, "loss": 4.6827, "step": 811 }, { "epoch": 0.05501774356107766, "grad_norm": 2.3693535327911377, "learning_rate": 0.0003996659364731654, "loss": 4.5815, "step": 812 }, { "epoch": 0.055085499402901646, "grad_norm": 11.339011192321777, "learning_rate": 0.0003996604600219058, "loss": 4.8419, "step": 813 }, { "epoch": 0.055153255244725634, "grad_norm": 0.9337912201881409, "learning_rate": 0.00039965498357064625, "loss": 4.3496, "step": 814 }, { "epoch": 0.055221011086549615, "grad_norm": 6.3379950523376465, "learning_rate": 0.00039964950711938665, "loss": 4.911, "step": 815 }, { "epoch": 0.0552887669283736, "grad_norm": 1.7116317749023438, "learning_rate": 0.00039964403066812705, "loss": 4.9699, "step": 816 }, { "epoch": 0.05535652277019759, "grad_norm": 2.0963220596313477, "learning_rate": 0.0003996385542168675, "loss": 4.507, "step": 817 }, { "epoch": 0.05542427861202158, "grad_norm": 4.710665702819824, "learning_rate": 0.0003996330777656079, "loss": 4.5403, "step": 818 }, { "epoch": 0.05549203445384557, "grad_norm": 6.667975425720215, "learning_rate": 0.00039962760131434836, "loss": 4.4518, "step": 819 }, { "epoch": 0.055559790295669556, "grad_norm": 2.572284698486328, "learning_rate": 0.00039962212486308876, "loss": 5.0368, "step": 820 }, { "epoch": 0.055627546137493544, "grad_norm": 1.625669002532959, "learning_rate": 0.00039961664841182916, "loss": 4.6799, "step": 821 }, { "epoch": 0.05569530197931753, "grad_norm": 2.4382598400115967, "learning_rate": 0.00039961117196056956, "loss": 4.568, "step": 822 }, { "epoch": 0.055763057821141514, "grad_norm": 1.5718884468078613, "learning_rate": 0.00039960569550930996, "loss": 4.4828, "step": 823 }, { "epoch": 0.0558308136629655, "grad_norm": 1.398815631866455, "learning_rate": 0.0003996002190580504, "loss": 4.7335, "step": 824 }, { "epoch": 0.05589856950478949, "grad_norm": 2.414658784866333, "learning_rate": 0.00039959474260679086, "loss": 4.8948, "step": 825 }, { "epoch": 0.05596632534661348, "grad_norm": 0.6470986008644104, "learning_rate": 0.00039958926615553126, "loss": 4.7307, "step": 826 }, { "epoch": 0.056034081188437466, "grad_norm": 0.9816928505897522, "learning_rate": 0.00039958378970427166, "loss": 4.6925, "step": 827 }, { "epoch": 0.056101837030261455, "grad_norm": 0.7028578519821167, "learning_rate": 0.00039957831325301206, "loss": 3.8737, "step": 828 }, { "epoch": 0.05616959287208544, "grad_norm": 1.4886821508407593, "learning_rate": 0.00039957283680175246, "loss": 4.2847, "step": 829 }, { "epoch": 0.056237348713909424, "grad_norm": 0.9010695815086365, "learning_rate": 0.0003995673603504929, "loss": 4.5775, "step": 830 }, { "epoch": 0.05630510455573341, "grad_norm": 1.010182499885559, "learning_rate": 0.0003995618838992333, "loss": 4.201, "step": 831 }, { "epoch": 0.0563728603975574, "grad_norm": 0.8768413662910461, "learning_rate": 0.0003995564074479737, "loss": 5.0419, "step": 832 }, { "epoch": 0.05644061623938139, "grad_norm": 0.5664284229278564, "learning_rate": 0.00039955093099671416, "loss": 4.8007, "step": 833 }, { "epoch": 0.05650837208120538, "grad_norm": 0.9429548382759094, "learning_rate": 0.00039954545454545456, "loss": 4.2521, "step": 834 }, { "epoch": 0.056576127923029365, "grad_norm": 1.328769564628601, "learning_rate": 0.00039953997809419496, "loss": 4.9852, "step": 835 }, { "epoch": 0.05664388376485335, "grad_norm": 0.9594823122024536, "learning_rate": 0.0003995345016429354, "loss": 4.6721, "step": 836 }, { "epoch": 0.05671163960667734, "grad_norm": 1.1371556520462036, "learning_rate": 0.0003995290251916758, "loss": 4.6596, "step": 837 }, { "epoch": 0.05677939544850132, "grad_norm": 0.9706981778144836, "learning_rate": 0.0003995235487404162, "loss": 4.8398, "step": 838 }, { "epoch": 0.05684715129032531, "grad_norm": 3.1281120777130127, "learning_rate": 0.0003995180722891566, "loss": 4.2573, "step": 839 }, { "epoch": 0.0569149071321493, "grad_norm": 3.9823930263519287, "learning_rate": 0.00039951259583789707, "loss": 4.5177, "step": 840 }, { "epoch": 0.05698266297397329, "grad_norm": 3.1667728424072266, "learning_rate": 0.0003995071193866375, "loss": 4.362, "step": 841 }, { "epoch": 0.057050418815797276, "grad_norm": 1.2826701402664185, "learning_rate": 0.0003995016429353779, "loss": 4.4142, "step": 842 }, { "epoch": 0.057118174657621264, "grad_norm": 1.3967714309692383, "learning_rate": 0.0003994961664841183, "loss": 4.6293, "step": 843 }, { "epoch": 0.05718593049944525, "grad_norm": 1.2823281288146973, "learning_rate": 0.0003994906900328587, "loss": 4.5802, "step": 844 }, { "epoch": 0.05725368634126923, "grad_norm": 1.9164766073226929, "learning_rate": 0.0003994852135815991, "loss": 4.5611, "step": 845 }, { "epoch": 0.05732144218309322, "grad_norm": 3.7186031341552734, "learning_rate": 0.00039947973713033957, "loss": 4.6504, "step": 846 }, { "epoch": 0.05738919802491721, "grad_norm": 0.5601058006286621, "learning_rate": 0.00039947426067907997, "loss": 4.3909, "step": 847 }, { "epoch": 0.0574569538667412, "grad_norm": 1.2436442375183105, "learning_rate": 0.00039946878422782037, "loss": 4.4965, "step": 848 }, { "epoch": 0.057524709708565186, "grad_norm": 0.584384024143219, "learning_rate": 0.0003994633077765608, "loss": 4.5081, "step": 849 }, { "epoch": 0.057592465550389174, "grad_norm": 0.7431687116622925, "learning_rate": 0.0003994578313253012, "loss": 4.5399, "step": 850 }, { "epoch": 0.05766022139221316, "grad_norm": 1.0575697422027588, "learning_rate": 0.0003994523548740416, "loss": 4.4467, "step": 851 }, { "epoch": 0.05772797723403715, "grad_norm": 1.2355396747589111, "learning_rate": 0.0003994468784227821, "loss": 4.1727, "step": 852 }, { "epoch": 0.05779573307586113, "grad_norm": 0.7289327383041382, "learning_rate": 0.0003994414019715225, "loss": 4.4649, "step": 853 }, { "epoch": 0.05786348891768512, "grad_norm": 0.6486912369728088, "learning_rate": 0.0003994359255202629, "loss": 4.4857, "step": 854 }, { "epoch": 0.05793124475950911, "grad_norm": 3.4088003635406494, "learning_rate": 0.00039943044906900327, "loss": 4.3327, "step": 855 }, { "epoch": 0.057999000601333096, "grad_norm": 0.862855851650238, "learning_rate": 0.0003994249726177437, "loss": 4.3531, "step": 856 }, { "epoch": 0.058066756443157085, "grad_norm": 1.7253564596176147, "learning_rate": 0.0003994194961664842, "loss": 4.5792, "step": 857 }, { "epoch": 0.05813451228498107, "grad_norm": 1.3579140901565552, "learning_rate": 0.0003994140197152246, "loss": 4.6806, "step": 858 }, { "epoch": 0.05820226812680506, "grad_norm": 0.5810004472732544, "learning_rate": 0.000399408543263965, "loss": 4.5432, "step": 859 }, { "epoch": 0.05827002396862904, "grad_norm": 1.7456716299057007, "learning_rate": 0.0003994030668127054, "loss": 4.9803, "step": 860 }, { "epoch": 0.05833777981045303, "grad_norm": 3.1904361248016357, "learning_rate": 0.0003993975903614458, "loss": 4.3816, "step": 861 }, { "epoch": 0.05840553565227702, "grad_norm": 0.6206144094467163, "learning_rate": 0.00039939211391018623, "loss": 4.3105, "step": 862 }, { "epoch": 0.05847329149410101, "grad_norm": 1.7596697807312012, "learning_rate": 0.00039938663745892663, "loss": 4.621, "step": 863 }, { "epoch": 0.058541047335924995, "grad_norm": 2.8368983268737793, "learning_rate": 0.0003993811610076671, "loss": 4.152, "step": 864 }, { "epoch": 0.05860880317774898, "grad_norm": 1.091814398765564, "learning_rate": 0.0003993756845564075, "loss": 4.5723, "step": 865 }, { "epoch": 0.05867655901957297, "grad_norm": 1.0570927858352661, "learning_rate": 0.0003993702081051479, "loss": 4.1766, "step": 866 }, { "epoch": 0.05874431486139695, "grad_norm": 3.649738073348999, "learning_rate": 0.0003993647316538883, "loss": 4.7552, "step": 867 }, { "epoch": 0.05881207070322094, "grad_norm": 2.5619242191314697, "learning_rate": 0.00039935925520262873, "loss": 4.3709, "step": 868 }, { "epoch": 0.05887982654504493, "grad_norm": 3.267697334289551, "learning_rate": 0.00039935377875136913, "loss": 3.8417, "step": 869 }, { "epoch": 0.05894758238686892, "grad_norm": 1.2656768560409546, "learning_rate": 0.00039934830230010953, "loss": 4.7309, "step": 870 }, { "epoch": 0.059015338228692905, "grad_norm": 1.1866697072982788, "learning_rate": 0.00039934282584884993, "loss": 4.5168, "step": 871 }, { "epoch": 0.059083094070516894, "grad_norm": 2.16274356842041, "learning_rate": 0.0003993373493975904, "loss": 4.3978, "step": 872 }, { "epoch": 0.05915084991234088, "grad_norm": 3.568162441253662, "learning_rate": 0.0003993318729463308, "loss": 4.5905, "step": 873 }, { "epoch": 0.05921860575416487, "grad_norm": 0.8236764669418335, "learning_rate": 0.00039932639649507124, "loss": 4.4475, "step": 874 }, { "epoch": 0.05928636159598885, "grad_norm": 0.4662351608276367, "learning_rate": 0.00039932092004381164, "loss": 4.6467, "step": 875 }, { "epoch": 0.05935411743781284, "grad_norm": 0.882517397403717, "learning_rate": 0.00039931544359255204, "loss": 4.7026, "step": 876 }, { "epoch": 0.05942187327963683, "grad_norm": 0.5317507386207581, "learning_rate": 0.00039930996714129243, "loss": 4.3514, "step": 877 }, { "epoch": 0.059489629121460816, "grad_norm": 0.6500367522239685, "learning_rate": 0.0003993044906900329, "loss": 4.533, "step": 878 }, { "epoch": 0.059557384963284804, "grad_norm": 0.4958527982234955, "learning_rate": 0.0003992990142387733, "loss": 4.241, "step": 879 }, { "epoch": 0.05962514080510879, "grad_norm": 0.5385131239891052, "learning_rate": 0.00039929353778751374, "loss": 4.5951, "step": 880 }, { "epoch": 0.05969289664693278, "grad_norm": 0.9459277987480164, "learning_rate": 0.00039928806133625414, "loss": 4.6937, "step": 881 }, { "epoch": 0.05976065248875676, "grad_norm": 7.695199489593506, "learning_rate": 0.00039928258488499454, "loss": 4.8578, "step": 882 }, { "epoch": 0.05982840833058075, "grad_norm": 1.0553722381591797, "learning_rate": 0.00039927710843373494, "loss": 4.2417, "step": 883 }, { "epoch": 0.05989616417240474, "grad_norm": 1.0332300662994385, "learning_rate": 0.0003992716319824754, "loss": 4.2995, "step": 884 }, { "epoch": 0.059963920014228726, "grad_norm": 2.336469888687134, "learning_rate": 0.0003992661555312158, "loss": 4.4714, "step": 885 }, { "epoch": 0.060031675856052714, "grad_norm": 1.1845463514328003, "learning_rate": 0.0003992606790799562, "loss": 4.1134, "step": 886 }, { "epoch": 0.0600994316978767, "grad_norm": 4.708810806274414, "learning_rate": 0.00039925520262869664, "loss": 4.3074, "step": 887 }, { "epoch": 0.06016718753970069, "grad_norm": 1.973006248474121, "learning_rate": 0.00039924972617743704, "loss": 4.173, "step": 888 }, { "epoch": 0.06023494338152468, "grad_norm": 1.1969362497329712, "learning_rate": 0.00039924424972617744, "loss": 4.4708, "step": 889 }, { "epoch": 0.06030269922334866, "grad_norm": 1.9653278589248657, "learning_rate": 0.0003992387732749179, "loss": 4.9491, "step": 890 }, { "epoch": 0.06037045506517265, "grad_norm": 4.1152472496032715, "learning_rate": 0.0003992332968236583, "loss": 4.098, "step": 891 }, { "epoch": 0.06043821090699664, "grad_norm": 1.6702840328216553, "learning_rate": 0.0003992278203723987, "loss": 4.49, "step": 892 }, { "epoch": 0.060505966748820625, "grad_norm": 2.285109758377075, "learning_rate": 0.0003992223439211391, "loss": 4.2792, "step": 893 }, { "epoch": 0.06057372259064461, "grad_norm": 1.6747833490371704, "learning_rate": 0.0003992168674698795, "loss": 4.5042, "step": 894 }, { "epoch": 0.0606414784324686, "grad_norm": 1.3493297100067139, "learning_rate": 0.00039921139101862, "loss": 4.5992, "step": 895 }, { "epoch": 0.06070923427429259, "grad_norm": 0.7767273187637329, "learning_rate": 0.0003992059145673604, "loss": 4.4913, "step": 896 }, { "epoch": 0.06077699011611657, "grad_norm": 3.5427534580230713, "learning_rate": 0.0003992004381161008, "loss": 4.6355, "step": 897 }, { "epoch": 0.06084474595794056, "grad_norm": 6.20245361328125, "learning_rate": 0.0003991949616648412, "loss": 4.264, "step": 898 }, { "epoch": 0.06091250179976455, "grad_norm": 2.2082314491271973, "learning_rate": 0.0003991894852135816, "loss": 4.5164, "step": 899 }, { "epoch": 0.060980257641588535, "grad_norm": 2.7530717849731445, "learning_rate": 0.00039918400876232205, "loss": 4.6049, "step": 900 }, { "epoch": 0.061048013483412523, "grad_norm": 1.00613534450531, "learning_rate": 0.00039917853231106245, "loss": 4.6851, "step": 901 }, { "epoch": 0.06111576932523651, "grad_norm": 1.1841747760772705, "learning_rate": 0.00039917305585980285, "loss": 4.2413, "step": 902 }, { "epoch": 0.0611835251670605, "grad_norm": 1.3521040678024292, "learning_rate": 0.0003991675794085433, "loss": 4.2449, "step": 903 }, { "epoch": 0.06125128100888449, "grad_norm": 1.6031383275985718, "learning_rate": 0.0003991621029572837, "loss": 4.5417, "step": 904 }, { "epoch": 0.06131903685070847, "grad_norm": 0.4420834183692932, "learning_rate": 0.0003991566265060241, "loss": 4.4557, "step": 905 }, { "epoch": 0.06138679269253246, "grad_norm": 0.8626751899719238, "learning_rate": 0.00039915115005476455, "loss": 4.4907, "step": 906 }, { "epoch": 0.061454548534356446, "grad_norm": 0.6176138520240784, "learning_rate": 0.00039914567360350495, "loss": 4.9553, "step": 907 }, { "epoch": 0.061522304376180434, "grad_norm": 1.3151031732559204, "learning_rate": 0.00039914019715224535, "loss": 4.2128, "step": 908 }, { "epoch": 0.06159006021800442, "grad_norm": 1.351062536239624, "learning_rate": 0.00039913472070098575, "loss": 4.557, "step": 909 }, { "epoch": 0.06165781605982841, "grad_norm": 0.9055910110473633, "learning_rate": 0.00039912924424972615, "loss": 4.4782, "step": 910 }, { "epoch": 0.0617255719016524, "grad_norm": 0.6445788145065308, "learning_rate": 0.0003991237677984666, "loss": 4.5884, "step": 911 }, { "epoch": 0.06179332774347638, "grad_norm": 7.6627421379089355, "learning_rate": 0.00039911829134720706, "loss": 4.4845, "step": 912 }, { "epoch": 0.06186108358530037, "grad_norm": 1.9889767169952393, "learning_rate": 0.00039911281489594746, "loss": 4.3548, "step": 913 }, { "epoch": 0.061928839427124356, "grad_norm": 0.8727238774299622, "learning_rate": 0.00039910733844468786, "loss": 4.8449, "step": 914 }, { "epoch": 0.061996595268948344, "grad_norm": 0.5041613578796387, "learning_rate": 0.00039910186199342826, "loss": 4.8343, "step": 915 }, { "epoch": 0.06206435111077233, "grad_norm": 1.674844741821289, "learning_rate": 0.0003990963855421687, "loss": 4.8545, "step": 916 }, { "epoch": 0.06213210695259632, "grad_norm": 0.914272665977478, "learning_rate": 0.0003990909090909091, "loss": 4.7193, "step": 917 }, { "epoch": 0.06219986279442031, "grad_norm": 0.46128419041633606, "learning_rate": 0.00039908543263964956, "loss": 4.8599, "step": 918 }, { "epoch": 0.0622676186362443, "grad_norm": 0.9554855823516846, "learning_rate": 0.00039907995618838996, "loss": 4.6282, "step": 919 }, { "epoch": 0.06233537447806828, "grad_norm": 0.6503332257270813, "learning_rate": 0.00039907447973713036, "loss": 4.73, "step": 920 }, { "epoch": 0.06240313031989227, "grad_norm": 0.6322983503341675, "learning_rate": 0.00039906900328587076, "loss": 4.5616, "step": 921 }, { "epoch": 0.062470886161716255, "grad_norm": 1.0539222955703735, "learning_rate": 0.0003990635268346112, "loss": 4.561, "step": 922 }, { "epoch": 0.06253864200354024, "grad_norm": 0.5341288447380066, "learning_rate": 0.0003990580503833516, "loss": 4.5272, "step": 923 }, { "epoch": 0.06260639784536423, "grad_norm": 0.5188624262809753, "learning_rate": 0.000399052573932092, "loss": 3.9657, "step": 924 }, { "epoch": 0.06267415368718822, "grad_norm": 1.161488652229309, "learning_rate": 0.0003990470974808324, "loss": 4.8578, "step": 925 }, { "epoch": 0.06274190952901221, "grad_norm": 0.901706337928772, "learning_rate": 0.00039904162102957286, "loss": 4.1258, "step": 926 }, { "epoch": 0.0628096653708362, "grad_norm": 0.7442399263381958, "learning_rate": 0.00039903614457831326, "loss": 4.3266, "step": 927 }, { "epoch": 0.06287742121266018, "grad_norm": 0.933000385761261, "learning_rate": 0.0003990306681270537, "loss": 4.3659, "step": 928 }, { "epoch": 0.06294517705448417, "grad_norm": 0.8820518851280212, "learning_rate": 0.0003990251916757941, "loss": 4.4208, "step": 929 }, { "epoch": 0.06301293289630816, "grad_norm": 0.5613499283790588, "learning_rate": 0.0003990197152245345, "loss": 4.3645, "step": 930 }, { "epoch": 0.06308068873813213, "grad_norm": 0.5223057270050049, "learning_rate": 0.0003990142387732749, "loss": 4.2303, "step": 931 }, { "epoch": 0.06314844457995612, "grad_norm": 1.0660102367401123, "learning_rate": 0.0003990087623220153, "loss": 4.7075, "step": 932 }, { "epoch": 0.06321620042178011, "grad_norm": 0.6657586693763733, "learning_rate": 0.00039900328587075577, "loss": 4.4128, "step": 933 }, { "epoch": 0.0632839562636041, "grad_norm": 1.7147760391235352, "learning_rate": 0.0003989978094194962, "loss": 4.2895, "step": 934 }, { "epoch": 0.06335171210542809, "grad_norm": 1.1054401397705078, "learning_rate": 0.0003989923329682366, "loss": 4.2386, "step": 935 }, { "epoch": 0.06341946794725208, "grad_norm": 1.0966932773590088, "learning_rate": 0.000398986856516977, "loss": 4.8277, "step": 936 }, { "epoch": 0.06348722378907606, "grad_norm": 3.9974160194396973, "learning_rate": 0.0003989813800657174, "loss": 4.5172, "step": 937 }, { "epoch": 0.06355497963090005, "grad_norm": 1.267701268196106, "learning_rate": 0.00039897590361445787, "loss": 4.3277, "step": 938 }, { "epoch": 0.06362273547272404, "grad_norm": 1.3108429908752441, "learning_rate": 0.00039897042716319827, "loss": 4.7102, "step": 939 }, { "epoch": 0.06369049131454803, "grad_norm": 1.343170166015625, "learning_rate": 0.00039896495071193867, "loss": 4.6718, "step": 940 }, { "epoch": 0.06375824715637202, "grad_norm": 0.9056527614593506, "learning_rate": 0.00039895947426067907, "loss": 4.539, "step": 941 }, { "epoch": 0.063826002998196, "grad_norm": 0.9318846464157104, "learning_rate": 0.0003989539978094195, "loss": 4.5815, "step": 942 }, { "epoch": 0.06389375884001999, "grad_norm": 3.7652578353881836, "learning_rate": 0.0003989485213581599, "loss": 4.3776, "step": 943 }, { "epoch": 0.06396151468184398, "grad_norm": 3.356126546859741, "learning_rate": 0.0003989430449069004, "loss": 4.4733, "step": 944 }, { "epoch": 0.06402927052366797, "grad_norm": 0.5597432851791382, "learning_rate": 0.0003989375684556408, "loss": 4.2956, "step": 945 }, { "epoch": 0.06409702636549194, "grad_norm": 1.158571720123291, "learning_rate": 0.00039893209200438117, "loss": 4.5364, "step": 946 }, { "epoch": 0.06416478220731593, "grad_norm": 0.6880437135696411, "learning_rate": 0.00039892661555312157, "loss": 4.7095, "step": 947 }, { "epoch": 0.06423253804913992, "grad_norm": 0.8595593571662903, "learning_rate": 0.00039892113910186197, "loss": 4.1686, "step": 948 }, { "epoch": 0.06430029389096391, "grad_norm": 1.2641078233718872, "learning_rate": 0.0003989156626506024, "loss": 4.3254, "step": 949 }, { "epoch": 0.0643680497327879, "grad_norm": 2.034414768218994, "learning_rate": 0.0003989101861993429, "loss": 4.7, "step": 950 }, { "epoch": 0.06443580557461188, "grad_norm": 0.6731711626052856, "learning_rate": 0.0003989047097480833, "loss": 3.9361, "step": 951 }, { "epoch": 0.06450356141643587, "grad_norm": 0.8040578365325928, "learning_rate": 0.0003988992332968237, "loss": 4.5364, "step": 952 }, { "epoch": 0.06457131725825986, "grad_norm": 1.1313163042068481, "learning_rate": 0.0003988937568455641, "loss": 4.3159, "step": 953 }, { "epoch": 0.06463907310008385, "grad_norm": 0.8646087646484375, "learning_rate": 0.00039888828039430453, "loss": 4.2919, "step": 954 }, { "epoch": 0.06470682894190784, "grad_norm": 1.0710028409957886, "learning_rate": 0.00039888280394304493, "loss": 4.3825, "step": 955 }, { "epoch": 0.06477458478373183, "grad_norm": 0.5882676839828491, "learning_rate": 0.00039887732749178533, "loss": 4.5684, "step": 956 }, { "epoch": 0.06484234062555581, "grad_norm": 0.8101402521133423, "learning_rate": 0.0003988718510405258, "loss": 4.6568, "step": 957 }, { "epoch": 0.0649100964673798, "grad_norm": 1.2646816968917847, "learning_rate": 0.0003988663745892662, "loss": 4.3971, "step": 958 }, { "epoch": 0.06497785230920379, "grad_norm": 0.813895046710968, "learning_rate": 0.0003988608981380066, "loss": 4.295, "step": 959 }, { "epoch": 0.06504560815102776, "grad_norm": 0.5896276831626892, "learning_rate": 0.00039885542168674703, "loss": 4.5589, "step": 960 }, { "epoch": 0.06511336399285175, "grad_norm": 0.8398045301437378, "learning_rate": 0.00039884994523548743, "loss": 4.5343, "step": 961 }, { "epoch": 0.06518111983467574, "grad_norm": 0.811392605304718, "learning_rate": 0.00039884446878422783, "loss": 4.5549, "step": 962 }, { "epoch": 0.06524887567649973, "grad_norm": 1.1811622381210327, "learning_rate": 0.00039883899233296823, "loss": 4.3639, "step": 963 }, { "epoch": 0.06531663151832372, "grad_norm": 0.8886870741844177, "learning_rate": 0.00039883351588170863, "loss": 4.344, "step": 964 }, { "epoch": 0.0653843873601477, "grad_norm": 0.6795641183853149, "learning_rate": 0.0003988280394304491, "loss": 4.0812, "step": 965 }, { "epoch": 0.0654521432019717, "grad_norm": 0.890152633190155, "learning_rate": 0.00039882256297918954, "loss": 4.3028, "step": 966 }, { "epoch": 0.06551989904379568, "grad_norm": 1.1197621822357178, "learning_rate": 0.00039881708652792994, "loss": 4.1714, "step": 967 }, { "epoch": 0.06558765488561967, "grad_norm": 0.7774588465690613, "learning_rate": 0.00039881161007667033, "loss": 4.5156, "step": 968 }, { "epoch": 0.06565541072744366, "grad_norm": 0.7460579872131348, "learning_rate": 0.00039880613362541073, "loss": 4.7706, "step": 969 }, { "epoch": 0.06572316656926765, "grad_norm": 0.9860925078392029, "learning_rate": 0.00039880065717415113, "loss": 4.3168, "step": 970 }, { "epoch": 0.06579092241109163, "grad_norm": 0.6449454426765442, "learning_rate": 0.0003987951807228916, "loss": 4.2623, "step": 971 }, { "epoch": 0.06585867825291562, "grad_norm": 1.4655145406723022, "learning_rate": 0.000398789704271632, "loss": 4.4417, "step": 972 }, { "epoch": 0.06592643409473961, "grad_norm": 0.7237178683280945, "learning_rate": 0.00039878422782037244, "loss": 4.7146, "step": 973 }, { "epoch": 0.0659941899365636, "grad_norm": 1.0752249956130981, "learning_rate": 0.00039877875136911284, "loss": 4.4108, "step": 974 }, { "epoch": 0.06606194577838757, "grad_norm": 0.8396843075752258, "learning_rate": 0.00039877327491785324, "loss": 4.6174, "step": 975 }, { "epoch": 0.06612970162021156, "grad_norm": 0.56378173828125, "learning_rate": 0.0003987677984665937, "loss": 4.5004, "step": 976 }, { "epoch": 0.06619745746203555, "grad_norm": 2.1583292484283447, "learning_rate": 0.0003987623220153341, "loss": 4.4253, "step": 977 }, { "epoch": 0.06626521330385954, "grad_norm": 0.8933156132698059, "learning_rate": 0.0003987568455640745, "loss": 4.0331, "step": 978 }, { "epoch": 0.06633296914568353, "grad_norm": 1.0771517753601074, "learning_rate": 0.0003987513691128149, "loss": 4.4169, "step": 979 }, { "epoch": 0.06640072498750751, "grad_norm": 0.991187334060669, "learning_rate": 0.0003987458926615553, "loss": 4.5004, "step": 980 }, { "epoch": 0.0664684808293315, "grad_norm": 1.509549856185913, "learning_rate": 0.00039874041621029574, "loss": 3.8146, "step": 981 }, { "epoch": 0.06653623667115549, "grad_norm": 1.5677217245101929, "learning_rate": 0.0003987349397590362, "loss": 4.6354, "step": 982 }, { "epoch": 0.06660399251297948, "grad_norm": 1.3638211488723755, "learning_rate": 0.0003987294633077766, "loss": 4.5198, "step": 983 }, { "epoch": 0.06667174835480347, "grad_norm": 1.1667355298995972, "learning_rate": 0.000398723986856517, "loss": 4.8187, "step": 984 }, { "epoch": 0.06673950419662746, "grad_norm": 0.5072071552276611, "learning_rate": 0.0003987185104052574, "loss": 4.4645, "step": 985 }, { "epoch": 0.06680726003845144, "grad_norm": 1.853524088859558, "learning_rate": 0.0003987130339539978, "loss": 3.9969, "step": 986 }, { "epoch": 0.06687501588027543, "grad_norm": 1.3862594366073608, "learning_rate": 0.00039870755750273825, "loss": 4.4346, "step": 987 }, { "epoch": 0.06694277172209942, "grad_norm": 1.3050211668014526, "learning_rate": 0.0003987020810514787, "loss": 4.1496, "step": 988 }, { "epoch": 0.06701052756392341, "grad_norm": 1.7112905979156494, "learning_rate": 0.0003986966046002191, "loss": 3.8965, "step": 989 }, { "epoch": 0.06707828340574738, "grad_norm": 0.9160133004188538, "learning_rate": 0.0003986911281489595, "loss": 4.0008, "step": 990 }, { "epoch": 0.06714603924757137, "grad_norm": 1.2426139116287231, "learning_rate": 0.0003986856516976999, "loss": 4.8977, "step": 991 }, { "epoch": 0.06721379508939536, "grad_norm": 1.0831527709960938, "learning_rate": 0.00039868017524644035, "loss": 4.548, "step": 992 }, { "epoch": 0.06728155093121935, "grad_norm": 1.182830572128296, "learning_rate": 0.00039867469879518075, "loss": 4.2618, "step": 993 }, { "epoch": 0.06734930677304334, "grad_norm": 1.413882851600647, "learning_rate": 0.00039866922234392115, "loss": 4.6476, "step": 994 }, { "epoch": 0.06741706261486732, "grad_norm": 0.7133269906044006, "learning_rate": 0.00039866374589266155, "loss": 4.3837, "step": 995 }, { "epoch": 0.06748481845669131, "grad_norm": 0.6412605047225952, "learning_rate": 0.000398658269441402, "loss": 4.1152, "step": 996 }, { "epoch": 0.0675525742985153, "grad_norm": 0.9335253834724426, "learning_rate": 0.0003986527929901424, "loss": 4.7547, "step": 997 }, { "epoch": 0.06762033014033929, "grad_norm": 1.1208101511001587, "learning_rate": 0.00039864731653888285, "loss": 3.9859, "step": 998 }, { "epoch": 0.06768808598216328, "grad_norm": 2.39587664604187, "learning_rate": 0.00039864184008762325, "loss": 4.3104, "step": 999 }, { "epoch": 0.06775584182398726, "grad_norm": 2.464878797531128, "learning_rate": 0.00039863636363636365, "loss": 4.2393, "step": 1000 }, { "epoch": 0.06782359766581125, "grad_norm": 1.4290380477905273, "learning_rate": 0.00039863088718510405, "loss": 4.3641, "step": 1001 }, { "epoch": 0.06789135350763524, "grad_norm": 1.573796033859253, "learning_rate": 0.00039862541073384445, "loss": 4.0094, "step": 1002 }, { "epoch": 0.06795910934945923, "grad_norm": 0.8379250168800354, "learning_rate": 0.0003986199342825849, "loss": 4.2881, "step": 1003 }, { "epoch": 0.06802686519128322, "grad_norm": 1.0250741243362427, "learning_rate": 0.00039861445783132536, "loss": 4.5162, "step": 1004 }, { "epoch": 0.06809462103310719, "grad_norm": 0.7703651785850525, "learning_rate": 0.00039860898138006576, "loss": 4.1853, "step": 1005 }, { "epoch": 0.06816237687493118, "grad_norm": 0.8128710389137268, "learning_rate": 0.00039860350492880616, "loss": 4.4538, "step": 1006 }, { "epoch": 0.06823013271675517, "grad_norm": 1.4643748998641968, "learning_rate": 0.00039859802847754655, "loss": 4.1139, "step": 1007 }, { "epoch": 0.06829788855857916, "grad_norm": 1.2365601062774658, "learning_rate": 0.00039859255202628695, "loss": 4.5798, "step": 1008 }, { "epoch": 0.06836564440040314, "grad_norm": 1.72235906124115, "learning_rate": 0.0003985870755750274, "loss": 4.0178, "step": 1009 }, { "epoch": 0.06843340024222713, "grad_norm": 1.3185621500015259, "learning_rate": 0.0003985815991237678, "loss": 4.2725, "step": 1010 }, { "epoch": 0.06850115608405112, "grad_norm": 1.703866958618164, "learning_rate": 0.0003985761226725082, "loss": 4.2919, "step": 1011 }, { "epoch": 0.06856891192587511, "grad_norm": 1.0072482824325562, "learning_rate": 0.00039857064622124866, "loss": 3.9373, "step": 1012 }, { "epoch": 0.0686366677676991, "grad_norm": 0.7422221302986145, "learning_rate": 0.00039856516976998906, "loss": 3.892, "step": 1013 }, { "epoch": 0.06870442360952309, "grad_norm": 1.128000020980835, "learning_rate": 0.0003985596933187295, "loss": 4.4625, "step": 1014 }, { "epoch": 0.06877217945134707, "grad_norm": 1.2138216495513916, "learning_rate": 0.0003985542168674699, "loss": 4.1466, "step": 1015 }, { "epoch": 0.06883993529317106, "grad_norm": 2.2494773864746094, "learning_rate": 0.0003985487404162103, "loss": 4.2887, "step": 1016 }, { "epoch": 0.06890769113499505, "grad_norm": 2.6325812339782715, "learning_rate": 0.0003985432639649507, "loss": 4.1611, "step": 1017 }, { "epoch": 0.06897544697681904, "grad_norm": 0.9742172956466675, "learning_rate": 0.0003985377875136911, "loss": 4.2634, "step": 1018 }, { "epoch": 0.06904320281864303, "grad_norm": 2.056999683380127, "learning_rate": 0.00039853231106243156, "loss": 4.0202, "step": 1019 }, { "epoch": 0.069110958660467, "grad_norm": 1.5828012228012085, "learning_rate": 0.000398526834611172, "loss": 4.4205, "step": 1020 }, { "epoch": 0.06917871450229099, "grad_norm": 0.9790469408035278, "learning_rate": 0.0003985213581599124, "loss": 4.1712, "step": 1021 }, { "epoch": 0.06924647034411498, "grad_norm": 1.1368647813796997, "learning_rate": 0.0003985158817086528, "loss": 4.4563, "step": 1022 }, { "epoch": 0.06931422618593897, "grad_norm": 1.9270405769348145, "learning_rate": 0.0003985104052573932, "loss": 4.3087, "step": 1023 }, { "epoch": 0.06938198202776295, "grad_norm": 1.3553575277328491, "learning_rate": 0.0003985049288061336, "loss": 3.8444, "step": 1024 }, { "epoch": 0.06944973786958694, "grad_norm": 0.9720080494880676, "learning_rate": 0.00039849945235487407, "loss": 4.2423, "step": 1025 }, { "epoch": 0.06951749371141093, "grad_norm": 1.716758370399475, "learning_rate": 0.00039849397590361447, "loss": 4.4032, "step": 1026 }, { "epoch": 0.06958524955323492, "grad_norm": 0.9964433908462524, "learning_rate": 0.0003984884994523549, "loss": 4.3942, "step": 1027 }, { "epoch": 0.0696530053950589, "grad_norm": 3.2764737606048584, "learning_rate": 0.0003984830230010953, "loss": 4.1631, "step": 1028 }, { "epoch": 0.0697207612368829, "grad_norm": 1.7656371593475342, "learning_rate": 0.0003984775465498357, "loss": 4.373, "step": 1029 }, { "epoch": 0.06978851707870688, "grad_norm": 1.1527352333068848, "learning_rate": 0.00039847207009857617, "loss": 4.2716, "step": 1030 }, { "epoch": 0.06985627292053087, "grad_norm": 1.7451441287994385, "learning_rate": 0.00039846659364731657, "loss": 4.2979, "step": 1031 }, { "epoch": 0.06992402876235486, "grad_norm": 1.229651689529419, "learning_rate": 0.00039846111719605697, "loss": 4.101, "step": 1032 }, { "epoch": 0.06999178460417885, "grad_norm": 1.1700464487075806, "learning_rate": 0.00039845564074479737, "loss": 4.4432, "step": 1033 }, { "epoch": 0.07005954044600284, "grad_norm": 1.0305942296981812, "learning_rate": 0.00039845016429353777, "loss": 3.8128, "step": 1034 }, { "epoch": 0.07012729628782681, "grad_norm": 0.9988133907318115, "learning_rate": 0.0003984446878422782, "loss": 4.2801, "step": 1035 }, { "epoch": 0.0701950521296508, "grad_norm": 1.3242058753967285, "learning_rate": 0.0003984392113910187, "loss": 4.1748, "step": 1036 }, { "epoch": 0.07026280797147479, "grad_norm": 1.203094720840454, "learning_rate": 0.0003984337349397591, "loss": 4.5268, "step": 1037 }, { "epoch": 0.07033056381329877, "grad_norm": 1.6429884433746338, "learning_rate": 0.00039842825848849947, "loss": 4.2603, "step": 1038 }, { "epoch": 0.07039831965512276, "grad_norm": 1.2461512088775635, "learning_rate": 0.00039842278203723987, "loss": 4.0363, "step": 1039 }, { "epoch": 0.07046607549694675, "grad_norm": 1.0287927389144897, "learning_rate": 0.00039841730558598027, "loss": 4.5749, "step": 1040 }, { "epoch": 0.07053383133877074, "grad_norm": 1.0760092735290527, "learning_rate": 0.0003984118291347207, "loss": 4.2876, "step": 1041 }, { "epoch": 0.07060158718059473, "grad_norm": 1.201130986213684, "learning_rate": 0.0003984063526834611, "loss": 4.0461, "step": 1042 }, { "epoch": 0.07066934302241872, "grad_norm": 1.146349549293518, "learning_rate": 0.0003984008762322016, "loss": 4.2335, "step": 1043 }, { "epoch": 0.0707370988642427, "grad_norm": 2.2435390949249268, "learning_rate": 0.000398395399780942, "loss": 4.5031, "step": 1044 }, { "epoch": 0.07080485470606669, "grad_norm": 1.7536578178405762, "learning_rate": 0.0003983899233296824, "loss": 4.1415, "step": 1045 }, { "epoch": 0.07087261054789068, "grad_norm": 2.5189902782440186, "learning_rate": 0.0003983844468784228, "loss": 3.8517, "step": 1046 }, { "epoch": 0.07094036638971467, "grad_norm": 1.8342978954315186, "learning_rate": 0.00039837897042716323, "loss": 4.3664, "step": 1047 }, { "epoch": 0.07100812223153866, "grad_norm": 1.0705716609954834, "learning_rate": 0.00039837349397590363, "loss": 4.6583, "step": 1048 }, { "epoch": 0.07107587807336264, "grad_norm": 2.1361026763916016, "learning_rate": 0.000398368017524644, "loss": 4.0603, "step": 1049 }, { "epoch": 0.07114363391518662, "grad_norm": 1.3097550868988037, "learning_rate": 0.0003983625410733845, "loss": 4.0418, "step": 1050 }, { "epoch": 0.0712113897570106, "grad_norm": 1.4214437007904053, "learning_rate": 0.0003983570646221249, "loss": 4.2993, "step": 1051 }, { "epoch": 0.0712791455988346, "grad_norm": 1.1609143018722534, "learning_rate": 0.00039835158817086533, "loss": 4.2596, "step": 1052 }, { "epoch": 0.07134690144065858, "grad_norm": 1.250801920890808, "learning_rate": 0.00039834611171960573, "loss": 4.3336, "step": 1053 }, { "epoch": 0.07141465728248257, "grad_norm": 1.3202877044677734, "learning_rate": 0.00039834063526834613, "loss": 4.3284, "step": 1054 }, { "epoch": 0.07148241312430656, "grad_norm": 4.244118690490723, "learning_rate": 0.00039833515881708653, "loss": 3.9084, "step": 1055 }, { "epoch": 0.07155016896613055, "grad_norm": 1.9815622568130493, "learning_rate": 0.00039832968236582693, "loss": 4.1809, "step": 1056 }, { "epoch": 0.07161792480795454, "grad_norm": 1.541872262954712, "learning_rate": 0.0003983242059145674, "loss": 4.3144, "step": 1057 }, { "epoch": 0.07168568064977852, "grad_norm": 1.379879117012024, "learning_rate": 0.00039831872946330784, "loss": 4.5101, "step": 1058 }, { "epoch": 0.07175343649160251, "grad_norm": 1.5362669229507446, "learning_rate": 0.00039831325301204824, "loss": 4.2083, "step": 1059 }, { "epoch": 0.0718211923334265, "grad_norm": 1.9623464345932007, "learning_rate": 0.00039830777656078863, "loss": 4.2309, "step": 1060 }, { "epoch": 0.07188894817525049, "grad_norm": 1.2217607498168945, "learning_rate": 0.00039830230010952903, "loss": 4.4965, "step": 1061 }, { "epoch": 0.07195670401707448, "grad_norm": 1.2235454320907593, "learning_rate": 0.00039829682365826943, "loss": 4.6595, "step": 1062 }, { "epoch": 0.07202445985889847, "grad_norm": 1.48357093334198, "learning_rate": 0.0003982913472070099, "loss": 4.3102, "step": 1063 }, { "epoch": 0.07209221570072245, "grad_norm": 0.9867489337921143, "learning_rate": 0.0003982858707557503, "loss": 4.4084, "step": 1064 }, { "epoch": 0.07215997154254643, "grad_norm": 2.2526583671569824, "learning_rate": 0.0003982803943044907, "loss": 3.8039, "step": 1065 }, { "epoch": 0.07222772738437042, "grad_norm": 1.3563092947006226, "learning_rate": 0.00039827491785323114, "loss": 3.8589, "step": 1066 }, { "epoch": 0.0722954832261944, "grad_norm": 2.4422338008880615, "learning_rate": 0.00039826944140197154, "loss": 3.8616, "step": 1067 }, { "epoch": 0.07236323906801839, "grad_norm": 1.7460412979125977, "learning_rate": 0.000398263964950712, "loss": 3.8777, "step": 1068 }, { "epoch": 0.07243099490984238, "grad_norm": 1.8586010932922363, "learning_rate": 0.0003982584884994524, "loss": 4.1959, "step": 1069 }, { "epoch": 0.07249875075166637, "grad_norm": 1.4706990718841553, "learning_rate": 0.0003982530120481928, "loss": 4.0381, "step": 1070 }, { "epoch": 0.07256650659349036, "grad_norm": 0.9791918992996216, "learning_rate": 0.0003982475355969332, "loss": 4.5752, "step": 1071 }, { "epoch": 0.07263426243531435, "grad_norm": 1.6259567737579346, "learning_rate": 0.0003982420591456736, "loss": 4.043, "step": 1072 }, { "epoch": 0.07270201827713833, "grad_norm": 4.648763179779053, "learning_rate": 0.00039823658269441404, "loss": 3.8579, "step": 1073 }, { "epoch": 0.07276977411896232, "grad_norm": 1.136399745941162, "learning_rate": 0.0003982311062431545, "loss": 3.8428, "step": 1074 }, { "epoch": 0.07283752996078631, "grad_norm": 1.1962316036224365, "learning_rate": 0.0003982256297918949, "loss": 4.3146, "step": 1075 }, { "epoch": 0.0729052858026103, "grad_norm": 1.6209527254104614, "learning_rate": 0.0003982201533406353, "loss": 4.1499, "step": 1076 }, { "epoch": 0.07297304164443429, "grad_norm": 1.22721529006958, "learning_rate": 0.0003982146768893757, "loss": 4.0989, "step": 1077 }, { "epoch": 0.07304079748625827, "grad_norm": 1.5025233030319214, "learning_rate": 0.0003982092004381161, "loss": 3.9824, "step": 1078 }, { "epoch": 0.07310855332808226, "grad_norm": 24.357927322387695, "learning_rate": 0.00039820372398685655, "loss": 4.501, "step": 1079 }, { "epoch": 0.07317630916990624, "grad_norm": 3.035731792449951, "learning_rate": 0.00039819824753559694, "loss": 4.2724, "step": 1080 }, { "epoch": 0.07324406501173022, "grad_norm": 2.525163173675537, "learning_rate": 0.0003981927710843374, "loss": 4.1938, "step": 1081 }, { "epoch": 0.07331182085355421, "grad_norm": 2.7789783477783203, "learning_rate": 0.0003981872946330778, "loss": 4.4936, "step": 1082 }, { "epoch": 0.0733795766953782, "grad_norm": 2.0346388816833496, "learning_rate": 0.0003981818181818182, "loss": 4.2575, "step": 1083 }, { "epoch": 0.07344733253720219, "grad_norm": 2.5741000175476074, "learning_rate": 0.0003981763417305586, "loss": 3.5467, "step": 1084 }, { "epoch": 0.07351508837902618, "grad_norm": 1.519612431526184, "learning_rate": 0.00039817086527929905, "loss": 4.0075, "step": 1085 }, { "epoch": 0.07358284422085017, "grad_norm": 1.7198134660720825, "learning_rate": 0.00039816538882803945, "loss": 3.7975, "step": 1086 }, { "epoch": 0.07365060006267415, "grad_norm": 1.966604232788086, "learning_rate": 0.00039815991237677985, "loss": 3.9286, "step": 1087 }, { "epoch": 0.07371835590449814, "grad_norm": 44.42156982421875, "learning_rate": 0.00039815443592552025, "loss": 4.1852, "step": 1088 }, { "epoch": 0.07378611174632213, "grad_norm": 6.972700119018555, "learning_rate": 0.0003981489594742607, "loss": 4.1075, "step": 1089 }, { "epoch": 0.07385386758814612, "grad_norm": 3.37237548828125, "learning_rate": 0.00039814348302300115, "loss": 4.036, "step": 1090 }, { "epoch": 0.0739216234299701, "grad_norm": 2.1974034309387207, "learning_rate": 0.00039813800657174155, "loss": 3.7596, "step": 1091 }, { "epoch": 0.0739893792717941, "grad_norm": 2.3063008785247803, "learning_rate": 0.00039813253012048195, "loss": 3.9324, "step": 1092 }, { "epoch": 0.07405713511361808, "grad_norm": 1.6853234767913818, "learning_rate": 0.00039812705366922235, "loss": 3.9795, "step": 1093 }, { "epoch": 0.07412489095544206, "grad_norm": 3.2866408824920654, "learning_rate": 0.00039812157721796275, "loss": 4.1397, "step": 1094 }, { "epoch": 0.07419264679726605, "grad_norm": 3.4523186683654785, "learning_rate": 0.0003981161007667032, "loss": 3.7963, "step": 1095 }, { "epoch": 0.07426040263909003, "grad_norm": 1.4153375625610352, "learning_rate": 0.0003981106243154436, "loss": 3.878, "step": 1096 }, { "epoch": 0.07432815848091402, "grad_norm": 1.4361470937728882, "learning_rate": 0.00039810514786418406, "loss": 4.0755, "step": 1097 }, { "epoch": 0.07439591432273801, "grad_norm": 1.1677697896957397, "learning_rate": 0.00039809967141292446, "loss": 4.3206, "step": 1098 }, { "epoch": 0.074463670164562, "grad_norm": 1.5047650337219238, "learning_rate": 0.00039809419496166485, "loss": 3.8538, "step": 1099 }, { "epoch": 0.07453142600638599, "grad_norm": 2.8860878944396973, "learning_rate": 0.00039808871851040525, "loss": 3.9605, "step": 1100 }, { "epoch": 0.07459918184820997, "grad_norm": 3.908668041229248, "learning_rate": 0.0003980832420591457, "loss": 3.5234, "step": 1101 }, { "epoch": 0.07466693769003396, "grad_norm": 3.668790340423584, "learning_rate": 0.0003980777656078861, "loss": 3.9832, "step": 1102 }, { "epoch": 0.07473469353185795, "grad_norm": 3.1576225757598877, "learning_rate": 0.0003980722891566265, "loss": 4.3703, "step": 1103 }, { "epoch": 0.07480244937368194, "grad_norm": 1.5882000923156738, "learning_rate": 0.0003980668127053669, "loss": 3.4142, "step": 1104 }, { "epoch": 0.07487020521550593, "grad_norm": 1.420247197151184, "learning_rate": 0.00039806133625410736, "loss": 4.2078, "step": 1105 }, { "epoch": 0.07493796105732992, "grad_norm": 1.231593370437622, "learning_rate": 0.0003980558598028478, "loss": 4.2299, "step": 1106 }, { "epoch": 0.0750057168991539, "grad_norm": 1.689089059829712, "learning_rate": 0.0003980503833515882, "loss": 3.762, "step": 1107 }, { "epoch": 0.07507347274097789, "grad_norm": 2.394148588180542, "learning_rate": 0.0003980449069003286, "loss": 4.3099, "step": 1108 }, { "epoch": 0.07514122858280187, "grad_norm": 1.8881683349609375, "learning_rate": 0.000398039430449069, "loss": 4.3448, "step": 1109 }, { "epoch": 0.07520898442462585, "grad_norm": 2.1728670597076416, "learning_rate": 0.0003980339539978094, "loss": 3.8618, "step": 1110 }, { "epoch": 0.07527674026644984, "grad_norm": 1.6774365901947021, "learning_rate": 0.00039802847754654986, "loss": 4.1364, "step": 1111 }, { "epoch": 0.07534449610827383, "grad_norm": 1.4898735284805298, "learning_rate": 0.00039802300109529026, "loss": 3.9005, "step": 1112 }, { "epoch": 0.07541225195009782, "grad_norm": 1.7666774988174438, "learning_rate": 0.0003980175246440307, "loss": 3.4955, "step": 1113 }, { "epoch": 0.07548000779192181, "grad_norm": 2.5947883129119873, "learning_rate": 0.0003980120481927711, "loss": 4.3883, "step": 1114 }, { "epoch": 0.0755477636337458, "grad_norm": 1.867881417274475, "learning_rate": 0.0003980065717415115, "loss": 3.8803, "step": 1115 }, { "epoch": 0.07561551947556978, "grad_norm": 1.6566728353500366, "learning_rate": 0.0003980010952902519, "loss": 4.3265, "step": 1116 }, { "epoch": 0.07568327531739377, "grad_norm": 2.073270559310913, "learning_rate": 0.00039799561883899237, "loss": 3.99, "step": 1117 }, { "epoch": 0.07575103115921776, "grad_norm": 2.2588183879852295, "learning_rate": 0.00039799014238773277, "loss": 4.1738, "step": 1118 }, { "epoch": 0.07581878700104175, "grad_norm": 1.4516103267669678, "learning_rate": 0.00039798466593647316, "loss": 4.043, "step": 1119 }, { "epoch": 0.07588654284286574, "grad_norm": 2.8987886905670166, "learning_rate": 0.0003979791894852136, "loss": 3.8589, "step": 1120 }, { "epoch": 0.07595429868468973, "grad_norm": 4.085813999176025, "learning_rate": 0.000397973713033954, "loss": 3.9801, "step": 1121 }, { "epoch": 0.07602205452651371, "grad_norm": 2.655787229537964, "learning_rate": 0.0003979682365826944, "loss": 4.2628, "step": 1122 }, { "epoch": 0.0760898103683377, "grad_norm": 2.1648433208465576, "learning_rate": 0.00039796276013143487, "loss": 4.2921, "step": 1123 }, { "epoch": 0.07615756621016168, "grad_norm": 27.031539916992188, "learning_rate": 0.00039795728368017527, "loss": 4.7289, "step": 1124 }, { "epoch": 0.07622532205198566, "grad_norm": 20.55954933166504, "learning_rate": 0.00039795180722891567, "loss": 4.0354, "step": 1125 }, { "epoch": 0.07629307789380965, "grad_norm": 3.843536615371704, "learning_rate": 0.00039794633077765607, "loss": 4.1474, "step": 1126 }, { "epoch": 0.07636083373563364, "grad_norm": 2.7076430320739746, "learning_rate": 0.0003979408543263965, "loss": 4.074, "step": 1127 }, { "epoch": 0.07642858957745763, "grad_norm": 1.980013132095337, "learning_rate": 0.000397935377875137, "loss": 4.0907, "step": 1128 }, { "epoch": 0.07649634541928162, "grad_norm": 1.9771209955215454, "learning_rate": 0.0003979299014238774, "loss": 3.8624, "step": 1129 }, { "epoch": 0.0765641012611056, "grad_norm": 3.319946050643921, "learning_rate": 0.00039792442497261777, "loss": 4.1016, "step": 1130 }, { "epoch": 0.0766318571029296, "grad_norm": 4.815307140350342, "learning_rate": 0.00039791894852135817, "loss": 4.1382, "step": 1131 }, { "epoch": 0.07669961294475358, "grad_norm": 3.3918800354003906, "learning_rate": 0.00039791347207009857, "loss": 4.0498, "step": 1132 }, { "epoch": 0.07676736878657757, "grad_norm": 1.7358074188232422, "learning_rate": 0.000397907995618839, "loss": 3.4853, "step": 1133 }, { "epoch": 0.07683512462840156, "grad_norm": 2.709115505218506, "learning_rate": 0.0003979025191675794, "loss": 3.5773, "step": 1134 }, { "epoch": 0.07690288047022555, "grad_norm": 2.2798941135406494, "learning_rate": 0.0003978970427163198, "loss": 3.6055, "step": 1135 }, { "epoch": 0.07697063631204953, "grad_norm": 2.559767723083496, "learning_rate": 0.0003978915662650603, "loss": 4.0622, "step": 1136 }, { "epoch": 0.07703839215387352, "grad_norm": 1.953681230545044, "learning_rate": 0.0003978860898138007, "loss": 3.9368, "step": 1137 }, { "epoch": 0.07710614799569751, "grad_norm": 1.8372457027435303, "learning_rate": 0.0003978806133625411, "loss": 3.8773, "step": 1138 }, { "epoch": 0.07717390383752148, "grad_norm": 2.1165294647216797, "learning_rate": 0.00039787513691128153, "loss": 3.9216, "step": 1139 }, { "epoch": 0.07724165967934547, "grad_norm": 1.4757955074310303, "learning_rate": 0.00039786966046002193, "loss": 4.2867, "step": 1140 }, { "epoch": 0.07730941552116946, "grad_norm": 2.4588067531585693, "learning_rate": 0.0003978641840087623, "loss": 4.1888, "step": 1141 }, { "epoch": 0.07737717136299345, "grad_norm": 1.3946064710617065, "learning_rate": 0.0003978587075575027, "loss": 4.1066, "step": 1142 }, { "epoch": 0.07744492720481744, "grad_norm": 2.1390771865844727, "learning_rate": 0.0003978532311062431, "loss": 4.1404, "step": 1143 }, { "epoch": 0.07751268304664143, "grad_norm": 1.8265283107757568, "learning_rate": 0.00039784775465498363, "loss": 4.1295, "step": 1144 }, { "epoch": 0.07758043888846541, "grad_norm": 1.5415360927581787, "learning_rate": 0.00039784227820372403, "loss": 4.4015, "step": 1145 }, { "epoch": 0.0776481947302894, "grad_norm": 2.158518075942993, "learning_rate": 0.00039783680175246443, "loss": 3.7127, "step": 1146 }, { "epoch": 0.07771595057211339, "grad_norm": 2.151182174682617, "learning_rate": 0.00039783132530120483, "loss": 4.1543, "step": 1147 }, { "epoch": 0.07778370641393738, "grad_norm": 2.291529893875122, "learning_rate": 0.00039782584884994523, "loss": 3.8075, "step": 1148 }, { "epoch": 0.07785146225576137, "grad_norm": 2.1098005771636963, "learning_rate": 0.0003978203723986857, "loss": 3.7958, "step": 1149 }, { "epoch": 0.07791921809758535, "grad_norm": 1.9717562198638916, "learning_rate": 0.0003978148959474261, "loss": 3.7032, "step": 1150 }, { "epoch": 0.07798697393940934, "grad_norm": 15.983768463134766, "learning_rate": 0.00039780941949616654, "loss": 3.6224, "step": 1151 }, { "epoch": 0.07805472978123333, "grad_norm": 2.297041654586792, "learning_rate": 0.00039780394304490693, "loss": 3.739, "step": 1152 }, { "epoch": 0.07812248562305732, "grad_norm": 1.7756987810134888, "learning_rate": 0.00039779846659364733, "loss": 3.9994, "step": 1153 }, { "epoch": 0.0781902414648813, "grad_norm": 2.3172268867492676, "learning_rate": 0.00039779299014238773, "loss": 4.0928, "step": 1154 }, { "epoch": 0.07825799730670528, "grad_norm": 1.8446166515350342, "learning_rate": 0.0003977875136911282, "loss": 3.9243, "step": 1155 }, { "epoch": 0.07832575314852927, "grad_norm": 4.68627405166626, "learning_rate": 0.0003977820372398686, "loss": 4.1586, "step": 1156 }, { "epoch": 0.07839350899035326, "grad_norm": 3.5925960540771484, "learning_rate": 0.000397776560788609, "loss": 3.6326, "step": 1157 }, { "epoch": 0.07846126483217725, "grad_norm": 2.144824743270874, "learning_rate": 0.0003977710843373494, "loss": 3.8534, "step": 1158 }, { "epoch": 0.07852902067400123, "grad_norm": 2.5469396114349365, "learning_rate": 0.00039776560788608984, "loss": 3.6471, "step": 1159 }, { "epoch": 0.07859677651582522, "grad_norm": 2.061211347579956, "learning_rate": 0.00039776013143483024, "loss": 3.7156, "step": 1160 }, { "epoch": 0.07866453235764921, "grad_norm": 1.4750325679779053, "learning_rate": 0.0003977546549835707, "loss": 3.9912, "step": 1161 }, { "epoch": 0.0787322881994732, "grad_norm": 3.591585159301758, "learning_rate": 0.0003977491785323111, "loss": 3.6267, "step": 1162 }, { "epoch": 0.07880004404129719, "grad_norm": 4.180237293243408, "learning_rate": 0.0003977437020810515, "loss": 3.6864, "step": 1163 }, { "epoch": 0.07886779988312118, "grad_norm": 2.0687642097473145, "learning_rate": 0.0003977382256297919, "loss": 3.9684, "step": 1164 }, { "epoch": 0.07893555572494516, "grad_norm": 1.9622997045516968, "learning_rate": 0.00039773274917853234, "loss": 4.2062, "step": 1165 }, { "epoch": 0.07900331156676915, "grad_norm": 2.522752285003662, "learning_rate": 0.00039772727272727274, "loss": 3.9249, "step": 1166 }, { "epoch": 0.07907106740859314, "grad_norm": 2.1261966228485107, "learning_rate": 0.0003977217962760132, "loss": 4.1087, "step": 1167 }, { "epoch": 0.07913882325041713, "grad_norm": 1.7126882076263428, "learning_rate": 0.0003977163198247536, "loss": 4.3366, "step": 1168 }, { "epoch": 0.0792065790922411, "grad_norm": 1.7277259826660156, "learning_rate": 0.000397710843373494, "loss": 3.702, "step": 1169 }, { "epoch": 0.07927433493406509, "grad_norm": 2.5878758430480957, "learning_rate": 0.0003977053669222344, "loss": 4.0585, "step": 1170 }, { "epoch": 0.07934209077588908, "grad_norm": 1.8253357410430908, "learning_rate": 0.00039769989047097484, "loss": 4.1687, "step": 1171 }, { "epoch": 0.07940984661771307, "grad_norm": 2.15036940574646, "learning_rate": 0.00039769441401971524, "loss": 3.8367, "step": 1172 }, { "epoch": 0.07947760245953706, "grad_norm": 2.114055871963501, "learning_rate": 0.00039768893756845564, "loss": 3.707, "step": 1173 }, { "epoch": 0.07954535830136104, "grad_norm": 2.469308614730835, "learning_rate": 0.00039768346111719604, "loss": 3.6145, "step": 1174 }, { "epoch": 0.07961311414318503, "grad_norm": 1.8836160898208618, "learning_rate": 0.0003976779846659365, "loss": 3.9521, "step": 1175 }, { "epoch": 0.07968086998500902, "grad_norm": 1.7649003267288208, "learning_rate": 0.0003976725082146769, "loss": 4.0314, "step": 1176 }, { "epoch": 0.07974862582683301, "grad_norm": 2.820612907409668, "learning_rate": 0.00039766703176341735, "loss": 3.6167, "step": 1177 }, { "epoch": 0.079816381668657, "grad_norm": 2.147737979888916, "learning_rate": 0.00039766155531215775, "loss": 3.6669, "step": 1178 }, { "epoch": 0.07988413751048098, "grad_norm": 1.8214514255523682, "learning_rate": 0.00039765607886089815, "loss": 3.5683, "step": 1179 }, { "epoch": 0.07995189335230497, "grad_norm": 1.6605969667434692, "learning_rate": 0.00039765060240963855, "loss": 4.1463, "step": 1180 }, { "epoch": 0.08001964919412896, "grad_norm": 2.7118911743164062, "learning_rate": 0.00039764512595837895, "loss": 3.3552, "step": 1181 }, { "epoch": 0.08008740503595295, "grad_norm": 2.5016448497772217, "learning_rate": 0.00039763964950711945, "loss": 3.3393, "step": 1182 }, { "epoch": 0.08015516087777694, "grad_norm": 3.1485249996185303, "learning_rate": 0.00039763417305585985, "loss": 4.069, "step": 1183 }, { "epoch": 0.08022291671960091, "grad_norm": 2.1321465969085693, "learning_rate": 0.00039762869660460025, "loss": 3.6277, "step": 1184 }, { "epoch": 0.0802906725614249, "grad_norm": 2.209886074066162, "learning_rate": 0.00039762322015334065, "loss": 3.8263, "step": 1185 }, { "epoch": 0.08035842840324889, "grad_norm": 2.7138776779174805, "learning_rate": 0.00039761774370208105, "loss": 3.3651, "step": 1186 }, { "epoch": 0.08042618424507288, "grad_norm": 1.907503366470337, "learning_rate": 0.0003976122672508215, "loss": 3.9095, "step": 1187 }, { "epoch": 0.08049394008689686, "grad_norm": 2.3787288665771484, "learning_rate": 0.0003976067907995619, "loss": 3.618, "step": 1188 }, { "epoch": 0.08056169592872085, "grad_norm": 1.8711837530136108, "learning_rate": 0.0003976013143483023, "loss": 3.8969, "step": 1189 }, { "epoch": 0.08062945177054484, "grad_norm": 2.116868257522583, "learning_rate": 0.00039759583789704276, "loss": 3.4202, "step": 1190 }, { "epoch": 0.08069720761236883, "grad_norm": 2.397768497467041, "learning_rate": 0.00039759036144578315, "loss": 3.5774, "step": 1191 }, { "epoch": 0.08076496345419282, "grad_norm": 2.316174030303955, "learning_rate": 0.00039758488499452355, "loss": 3.7317, "step": 1192 }, { "epoch": 0.0808327192960168, "grad_norm": 1.8714388608932495, "learning_rate": 0.000397579408543264, "loss": 3.7417, "step": 1193 }, { "epoch": 0.0809004751378408, "grad_norm": 3.3521344661712646, "learning_rate": 0.0003975739320920044, "loss": 3.8725, "step": 1194 }, { "epoch": 0.08096823097966478, "grad_norm": 2.0648295879364014, "learning_rate": 0.0003975684556407448, "loss": 3.9086, "step": 1195 }, { "epoch": 0.08103598682148877, "grad_norm": 2.3664958477020264, "learning_rate": 0.0003975629791894852, "loss": 4.2032, "step": 1196 }, { "epoch": 0.08110374266331276, "grad_norm": 1.995086908340454, "learning_rate": 0.0003975575027382256, "loss": 3.8502, "step": 1197 }, { "epoch": 0.08117149850513675, "grad_norm": 1.6786521673202515, "learning_rate": 0.00039755202628696606, "loss": 4.3441, "step": 1198 }, { "epoch": 0.08123925434696072, "grad_norm": 2.189594030380249, "learning_rate": 0.0003975465498357065, "loss": 4.1061, "step": 1199 }, { "epoch": 0.08130701018878471, "grad_norm": 2.766935110092163, "learning_rate": 0.0003975410733844469, "loss": 3.564, "step": 1200 }, { "epoch": 0.0813747660306087, "grad_norm": 2.5711405277252197, "learning_rate": 0.0003975355969331873, "loss": 3.6341, "step": 1201 }, { "epoch": 0.08144252187243269, "grad_norm": 2.426211357116699, "learning_rate": 0.0003975301204819277, "loss": 4.0764, "step": 1202 }, { "epoch": 0.08151027771425667, "grad_norm": 2.789097309112549, "learning_rate": 0.00039752464403066816, "loss": 3.7514, "step": 1203 }, { "epoch": 0.08157803355608066, "grad_norm": 2.1677470207214355, "learning_rate": 0.00039751916757940856, "loss": 3.9546, "step": 1204 }, { "epoch": 0.08164578939790465, "grad_norm": 2.308382511138916, "learning_rate": 0.00039751369112814896, "loss": 3.7583, "step": 1205 }, { "epoch": 0.08171354523972864, "grad_norm": 2.3552372455596924, "learning_rate": 0.0003975082146768894, "loss": 3.9251, "step": 1206 }, { "epoch": 0.08178130108155263, "grad_norm": 5.0255656242370605, "learning_rate": 0.0003975027382256298, "loss": 3.1247, "step": 1207 }, { "epoch": 0.08184905692337661, "grad_norm": 2.271611213684082, "learning_rate": 0.0003974972617743702, "loss": 3.9579, "step": 1208 }, { "epoch": 0.0819168127652006, "grad_norm": 2.6856629848480225, "learning_rate": 0.00039749178532311067, "loss": 3.4936, "step": 1209 }, { "epoch": 0.08198456860702459, "grad_norm": 2.271657943725586, "learning_rate": 0.00039748630887185106, "loss": 3.4154, "step": 1210 }, { "epoch": 0.08205232444884858, "grad_norm": 1.982198715209961, "learning_rate": 0.00039748083242059146, "loss": 4.0364, "step": 1211 }, { "epoch": 0.08212008029067257, "grad_norm": 2.110283136367798, "learning_rate": 0.00039747535596933186, "loss": 3.9218, "step": 1212 }, { "epoch": 0.08218783613249656, "grad_norm": 2.626458168029785, "learning_rate": 0.0003974698795180723, "loss": 3.7014, "step": 1213 }, { "epoch": 0.08225559197432053, "grad_norm": 2.199005365371704, "learning_rate": 0.0003974644030668127, "loss": 3.7801, "step": 1214 }, { "epoch": 0.08232334781614452, "grad_norm": 10.258249282836914, "learning_rate": 0.00039745892661555317, "loss": 3.7388, "step": 1215 }, { "epoch": 0.0823911036579685, "grad_norm": 6.497037887573242, "learning_rate": 0.00039745345016429357, "loss": 3.8982, "step": 1216 }, { "epoch": 0.0824588594997925, "grad_norm": 2.4071481227874756, "learning_rate": 0.00039744797371303397, "loss": 3.3666, "step": 1217 }, { "epoch": 0.08252661534161648, "grad_norm": 3.222608804702759, "learning_rate": 0.00039744249726177437, "loss": 3.4107, "step": 1218 }, { "epoch": 0.08259437118344047, "grad_norm": 2.414372682571411, "learning_rate": 0.00039743702081051477, "loss": 3.832, "step": 1219 }, { "epoch": 0.08266212702526446, "grad_norm": 2.607917070388794, "learning_rate": 0.0003974315443592552, "loss": 3.6625, "step": 1220 }, { "epoch": 0.08272988286708845, "grad_norm": 3.644857883453369, "learning_rate": 0.0003974260679079957, "loss": 3.3418, "step": 1221 }, { "epoch": 0.08279763870891244, "grad_norm": 4.009765625, "learning_rate": 0.00039742059145673607, "loss": 3.6009, "step": 1222 }, { "epoch": 0.08286539455073642, "grad_norm": 4.018517971038818, "learning_rate": 0.00039741511500547647, "loss": 3.2331, "step": 1223 }, { "epoch": 0.08293315039256041, "grad_norm": 3.6456215381622314, "learning_rate": 0.00039740963855421687, "loss": 3.8497, "step": 1224 }, { "epoch": 0.0830009062343844, "grad_norm": 2.227560520172119, "learning_rate": 0.0003974041621029573, "loss": 3.8611, "step": 1225 }, { "epoch": 0.08306866207620839, "grad_norm": 2.6084868907928467, "learning_rate": 0.0003973986856516977, "loss": 3.999, "step": 1226 }, { "epoch": 0.08313641791803238, "grad_norm": 2.6379199028015137, "learning_rate": 0.0003973932092004381, "loss": 3.446, "step": 1227 }, { "epoch": 0.08320417375985635, "grad_norm": 2.3672125339508057, "learning_rate": 0.0003973877327491785, "loss": 3.1848, "step": 1228 }, { "epoch": 0.08327192960168034, "grad_norm": 3.0808262825012207, "learning_rate": 0.000397382256297919, "loss": 3.4877, "step": 1229 }, { "epoch": 0.08333968544350433, "grad_norm": 5.694168567657471, "learning_rate": 0.0003973767798466594, "loss": 3.4092, "step": 1230 }, { "epoch": 0.08340744128532832, "grad_norm": 3.3568620681762695, "learning_rate": 0.00039737130339539983, "loss": 3.5356, "step": 1231 }, { "epoch": 0.0834751971271523, "grad_norm": 2.977346181869507, "learning_rate": 0.0003973658269441402, "loss": 3.4667, "step": 1232 }, { "epoch": 0.08354295296897629, "grad_norm": 2.025078773498535, "learning_rate": 0.0003973603504928806, "loss": 3.8365, "step": 1233 }, { "epoch": 0.08361070881080028, "grad_norm": 2.478451728820801, "learning_rate": 0.000397354874041621, "loss": 4.1945, "step": 1234 }, { "epoch": 0.08367846465262427, "grad_norm": 2.5750892162323, "learning_rate": 0.0003973493975903614, "loss": 3.6372, "step": 1235 }, { "epoch": 0.08374622049444826, "grad_norm": 1.994739055633545, "learning_rate": 0.0003973439211391019, "loss": 3.7147, "step": 1236 }, { "epoch": 0.08381397633627224, "grad_norm": 2.232776641845703, "learning_rate": 0.00039733844468784233, "loss": 3.7312, "step": 1237 }, { "epoch": 0.08388173217809623, "grad_norm": 2.582740068435669, "learning_rate": 0.00039733296823658273, "loss": 3.3792, "step": 1238 }, { "epoch": 0.08394948801992022, "grad_norm": 3.110473394393921, "learning_rate": 0.00039732749178532313, "loss": 3.5127, "step": 1239 }, { "epoch": 0.08401724386174421, "grad_norm": 5.40733003616333, "learning_rate": 0.00039732201533406353, "loss": 3.675, "step": 1240 }, { "epoch": 0.0840849997035682, "grad_norm": 3.805846691131592, "learning_rate": 0.000397316538882804, "loss": 3.7882, "step": 1241 }, { "epoch": 0.08415275554539219, "grad_norm": 2.9184610843658447, "learning_rate": 0.0003973110624315444, "loss": 3.1571, "step": 1242 }, { "epoch": 0.08422051138721616, "grad_norm": 2.2012500762939453, "learning_rate": 0.0003973055859802848, "loss": 3.5428, "step": 1243 }, { "epoch": 0.08428826722904015, "grad_norm": 4.310023784637451, "learning_rate": 0.00039730010952902523, "loss": 3.7799, "step": 1244 }, { "epoch": 0.08435602307086414, "grad_norm": 4.646803855895996, "learning_rate": 0.00039729463307776563, "loss": 3.6777, "step": 1245 }, { "epoch": 0.08442377891268812, "grad_norm": 3.766892194747925, "learning_rate": 0.00039728915662650603, "loss": 3.4044, "step": 1246 }, { "epoch": 0.08449153475451211, "grad_norm": 4.320135593414307, "learning_rate": 0.0003972836801752465, "loss": 3.8089, "step": 1247 }, { "epoch": 0.0845592905963361, "grad_norm": 2.767925500869751, "learning_rate": 0.0003972782037239869, "loss": 3.7742, "step": 1248 }, { "epoch": 0.08462704643816009, "grad_norm": 2.7828221321105957, "learning_rate": 0.0003972727272727273, "loss": 3.8359, "step": 1249 }, { "epoch": 0.08469480227998408, "grad_norm": 3.131392478942871, "learning_rate": 0.0003972672508214677, "loss": 3.4213, "step": 1250 }, { "epoch": 0.08476255812180807, "grad_norm": 5.914330959320068, "learning_rate": 0.0003972617743702081, "loss": 3.5991, "step": 1251 }, { "epoch": 0.08483031396363205, "grad_norm": 3.821100950241089, "learning_rate": 0.00039725629791894854, "loss": 3.7064, "step": 1252 }, { "epoch": 0.08489806980545604, "grad_norm": 2.647351026535034, "learning_rate": 0.000397250821467689, "loss": 3.5764, "step": 1253 }, { "epoch": 0.08496582564728003, "grad_norm": 2.7663350105285645, "learning_rate": 0.0003972453450164294, "loss": 3.799, "step": 1254 }, { "epoch": 0.08503358148910402, "grad_norm": 2.8820762634277344, "learning_rate": 0.0003972398685651698, "loss": 3.4514, "step": 1255 }, { "epoch": 0.085101337330928, "grad_norm": 3.7021536827087402, "learning_rate": 0.0003972343921139102, "loss": 3.6858, "step": 1256 }, { "epoch": 0.085169093172752, "grad_norm": 3.1065738201141357, "learning_rate": 0.0003972289156626506, "loss": 3.5724, "step": 1257 }, { "epoch": 0.08523684901457597, "grad_norm": 2.983675003051758, "learning_rate": 0.00039722343921139104, "loss": 3.6062, "step": 1258 }, { "epoch": 0.08530460485639996, "grad_norm": 3.379542112350464, "learning_rate": 0.00039721796276013144, "loss": 3.6398, "step": 1259 }, { "epoch": 0.08537236069822395, "grad_norm": 4.429060459136963, "learning_rate": 0.0003972124863088719, "loss": 3.5044, "step": 1260 }, { "epoch": 0.08544011654004793, "grad_norm": 3.0019874572753906, "learning_rate": 0.0003972070098576123, "loss": 3.5509, "step": 1261 }, { "epoch": 0.08550787238187192, "grad_norm": 1.8628062009811401, "learning_rate": 0.0003972015334063527, "loss": 3.919, "step": 1262 }, { "epoch": 0.08557562822369591, "grad_norm": 5.054527759552002, "learning_rate": 0.00039719605695509314, "loss": 2.9386, "step": 1263 }, { "epoch": 0.0856433840655199, "grad_norm": 2.693042516708374, "learning_rate": 0.00039719058050383354, "loss": 3.7089, "step": 1264 }, { "epoch": 0.08571113990734389, "grad_norm": 2.401817798614502, "learning_rate": 0.00039718510405257394, "loss": 3.6142, "step": 1265 }, { "epoch": 0.08577889574916787, "grad_norm": 2.529827117919922, "learning_rate": 0.00039717962760131434, "loss": 3.3647, "step": 1266 }, { "epoch": 0.08584665159099186, "grad_norm": 1.9811527729034424, "learning_rate": 0.00039717415115005474, "loss": 3.8447, "step": 1267 }, { "epoch": 0.08591440743281585, "grad_norm": 2.4195988178253174, "learning_rate": 0.0003971686746987952, "loss": 3.7935, "step": 1268 }, { "epoch": 0.08598216327463984, "grad_norm": 3.4873714447021484, "learning_rate": 0.00039716319824753565, "loss": 3.8628, "step": 1269 }, { "epoch": 0.08604991911646383, "grad_norm": 3.22560715675354, "learning_rate": 0.00039715772179627605, "loss": 3.545, "step": 1270 }, { "epoch": 0.08611767495828782, "grad_norm": 2.0998849868774414, "learning_rate": 0.00039715224534501645, "loss": 4.0359, "step": 1271 }, { "epoch": 0.0861854308001118, "grad_norm": 3.034543752670288, "learning_rate": 0.00039714676889375685, "loss": 3.6052, "step": 1272 }, { "epoch": 0.08625318664193578, "grad_norm": 3.707247734069824, "learning_rate": 0.00039714129244249725, "loss": 3.0152, "step": 1273 }, { "epoch": 0.08632094248375977, "grad_norm": 3.397484302520752, "learning_rate": 0.0003971358159912377, "loss": 3.2396, "step": 1274 }, { "epoch": 0.08638869832558375, "grad_norm": 2.2703237533569336, "learning_rate": 0.0003971303395399781, "loss": 3.5158, "step": 1275 }, { "epoch": 0.08645645416740774, "grad_norm": 3.1869099140167236, "learning_rate": 0.00039712486308871855, "loss": 3.7413, "step": 1276 }, { "epoch": 0.08652421000923173, "grad_norm": 7.536550521850586, "learning_rate": 0.00039711938663745895, "loss": 3.9123, "step": 1277 }, { "epoch": 0.08659196585105572, "grad_norm": 3.970386266708374, "learning_rate": 0.00039711391018619935, "loss": 3.7636, "step": 1278 }, { "epoch": 0.08665972169287971, "grad_norm": 8.451091766357422, "learning_rate": 0.0003971084337349398, "loss": 3.2727, "step": 1279 }, { "epoch": 0.0867274775347037, "grad_norm": 3.6506245136260986, "learning_rate": 0.0003971029572836802, "loss": 3.2431, "step": 1280 }, { "epoch": 0.08679523337652768, "grad_norm": 2.3779211044311523, "learning_rate": 0.0003970974808324206, "loss": 4.0591, "step": 1281 }, { "epoch": 0.08686298921835167, "grad_norm": 3.352254867553711, "learning_rate": 0.000397092004381161, "loss": 3.4645, "step": 1282 }, { "epoch": 0.08693074506017566, "grad_norm": 2.5513463020324707, "learning_rate": 0.00039708652792990145, "loss": 3.7434, "step": 1283 }, { "epoch": 0.08699850090199965, "grad_norm": 2.913905620574951, "learning_rate": 0.00039708105147864185, "loss": 3.9168, "step": 1284 }, { "epoch": 0.08706625674382364, "grad_norm": 2.863588333129883, "learning_rate": 0.0003970755750273823, "loss": 3.2169, "step": 1285 }, { "epoch": 0.08713401258564762, "grad_norm": 2.9639158248901367, "learning_rate": 0.0003970700985761227, "loss": 3.5472, "step": 1286 }, { "epoch": 0.08720176842747161, "grad_norm": 2.3217127323150635, "learning_rate": 0.0003970646221248631, "loss": 3.0184, "step": 1287 }, { "epoch": 0.08726952426929559, "grad_norm": 3.683032274246216, "learning_rate": 0.0003970591456736035, "loss": 3.6671, "step": 1288 }, { "epoch": 0.08733728011111958, "grad_norm": 4.01596736907959, "learning_rate": 0.0003970536692223439, "loss": 3.9717, "step": 1289 }, { "epoch": 0.08740503595294356, "grad_norm": 3.7086997032165527, "learning_rate": 0.00039704819277108436, "loss": 2.9476, "step": 1290 }, { "epoch": 0.08747279179476755, "grad_norm": 2.7542834281921387, "learning_rate": 0.0003970427163198248, "loss": 3.6172, "step": 1291 }, { "epoch": 0.08754054763659154, "grad_norm": 4.100141525268555, "learning_rate": 0.0003970372398685652, "loss": 3.2291, "step": 1292 }, { "epoch": 0.08760830347841553, "grad_norm": 3.0796902179718018, "learning_rate": 0.0003970317634173056, "loss": 3.712, "step": 1293 }, { "epoch": 0.08767605932023952, "grad_norm": 3.585057497024536, "learning_rate": 0.000397026286966046, "loss": 3.0526, "step": 1294 }, { "epoch": 0.0877438151620635, "grad_norm": 3.9870405197143555, "learning_rate": 0.0003970208105147864, "loss": 3.2653, "step": 1295 }, { "epoch": 0.08781157100388749, "grad_norm": 2.910722255706787, "learning_rate": 0.00039701533406352686, "loss": 3.4261, "step": 1296 }, { "epoch": 0.08787932684571148, "grad_norm": 2.6165783405303955, "learning_rate": 0.00039700985761226726, "loss": 3.7293, "step": 1297 }, { "epoch": 0.08794708268753547, "grad_norm": 4.945798397064209, "learning_rate": 0.00039700438116100766, "loss": 3.5293, "step": 1298 }, { "epoch": 0.08801483852935946, "grad_norm": 2.7560267448425293, "learning_rate": 0.0003969989047097481, "loss": 3.4106, "step": 1299 }, { "epoch": 0.08808259437118345, "grad_norm": 7.161624431610107, "learning_rate": 0.0003969934282584885, "loss": 3.4445, "step": 1300 }, { "epoch": 0.08815035021300743, "grad_norm": 5.628994464874268, "learning_rate": 0.00039698795180722897, "loss": 3.5507, "step": 1301 }, { "epoch": 0.08821810605483142, "grad_norm": 3.652656316757202, "learning_rate": 0.00039698247535596936, "loss": 3.2534, "step": 1302 }, { "epoch": 0.0882858618966554, "grad_norm": 6.7578959465026855, "learning_rate": 0.00039697699890470976, "loss": 4.0627, "step": 1303 }, { "epoch": 0.08835361773847938, "grad_norm": 8.673806190490723, "learning_rate": 0.00039697152245345016, "loss": 3.3421, "step": 1304 }, { "epoch": 0.08842137358030337, "grad_norm": 2.8153746128082275, "learning_rate": 0.00039696604600219056, "loss": 2.7227, "step": 1305 }, { "epoch": 0.08848912942212736, "grad_norm": 4.202126502990723, "learning_rate": 0.000396960569550931, "loss": 3.5121, "step": 1306 }, { "epoch": 0.08855688526395135, "grad_norm": 3.4320075511932373, "learning_rate": 0.00039695509309967147, "loss": 3.4808, "step": 1307 }, { "epoch": 0.08862464110577534, "grad_norm": 3.0245823860168457, "learning_rate": 0.00039694961664841187, "loss": 3.0227, "step": 1308 }, { "epoch": 0.08869239694759933, "grad_norm": 2.5735254287719727, "learning_rate": 0.00039694414019715227, "loss": 3.2619, "step": 1309 }, { "epoch": 0.08876015278942331, "grad_norm": 5.699209213256836, "learning_rate": 0.00039693866374589267, "loss": 3.2324, "step": 1310 }, { "epoch": 0.0888279086312473, "grad_norm": 4.076539993286133, "learning_rate": 0.00039693318729463307, "loss": 2.823, "step": 1311 }, { "epoch": 0.08889566447307129, "grad_norm": 4.222191333770752, "learning_rate": 0.0003969277108433735, "loss": 3.6137, "step": 1312 }, { "epoch": 0.08896342031489528, "grad_norm": 3.719456672668457, "learning_rate": 0.0003969222343921139, "loss": 3.6398, "step": 1313 }, { "epoch": 0.08903117615671927, "grad_norm": 2.9299585819244385, "learning_rate": 0.00039691675794085437, "loss": 3.5518, "step": 1314 }, { "epoch": 0.08909893199854325, "grad_norm": 3.1081583499908447, "learning_rate": 0.00039691128148959477, "loss": 3.7595, "step": 1315 }, { "epoch": 0.08916668784036724, "grad_norm": 2.9591050148010254, "learning_rate": 0.00039690580503833517, "loss": 3.9243, "step": 1316 }, { "epoch": 0.08923444368219123, "grad_norm": 2.3958845138549805, "learning_rate": 0.0003969003285870756, "loss": 3.5978, "step": 1317 }, { "epoch": 0.0893021995240152, "grad_norm": 2.804438591003418, "learning_rate": 0.000396894852135816, "loss": 2.9166, "step": 1318 }, { "epoch": 0.0893699553658392, "grad_norm": 3.00793194770813, "learning_rate": 0.0003968893756845564, "loss": 3.2201, "step": 1319 }, { "epoch": 0.08943771120766318, "grad_norm": 3.10834002494812, "learning_rate": 0.0003968838992332968, "loss": 3.7626, "step": 1320 }, { "epoch": 0.08950546704948717, "grad_norm": 4.358127117156982, "learning_rate": 0.0003968784227820372, "loss": 3.3129, "step": 1321 }, { "epoch": 0.08957322289131116, "grad_norm": 4.7332305908203125, "learning_rate": 0.0003968729463307777, "loss": 3.4599, "step": 1322 }, { "epoch": 0.08964097873313515, "grad_norm": 4.504525184631348, "learning_rate": 0.00039686746987951813, "loss": 3.0129, "step": 1323 }, { "epoch": 0.08970873457495913, "grad_norm": 3.25687837600708, "learning_rate": 0.0003968619934282585, "loss": 2.944, "step": 1324 }, { "epoch": 0.08977649041678312, "grad_norm": 4.91163444519043, "learning_rate": 0.0003968565169769989, "loss": 3.6705, "step": 1325 }, { "epoch": 0.08984424625860711, "grad_norm": 2.9517040252685547, "learning_rate": 0.0003968510405257393, "loss": 3.0409, "step": 1326 }, { "epoch": 0.0899120021004311, "grad_norm": 3.991425037384033, "learning_rate": 0.0003968455640744797, "loss": 3.1587, "step": 1327 }, { "epoch": 0.08997975794225509, "grad_norm": 2.9522857666015625, "learning_rate": 0.0003968400876232202, "loss": 3.3263, "step": 1328 }, { "epoch": 0.09004751378407908, "grad_norm": 3.084933280944824, "learning_rate": 0.0003968346111719606, "loss": 3.6117, "step": 1329 }, { "epoch": 0.09011526962590306, "grad_norm": 4.300806999206543, "learning_rate": 0.00039682913472070103, "loss": 3.3952, "step": 1330 }, { "epoch": 0.09018302546772705, "grad_norm": 3.70619797706604, "learning_rate": 0.00039682365826944143, "loss": 2.8007, "step": 1331 }, { "epoch": 0.09025078130955104, "grad_norm": 3.1480019092559814, "learning_rate": 0.00039681818181818183, "loss": 3.4314, "step": 1332 }, { "epoch": 0.09031853715137501, "grad_norm": 5.503115653991699, "learning_rate": 0.00039681270536692223, "loss": 3.2433, "step": 1333 }, { "epoch": 0.090386292993199, "grad_norm": 5.408228397369385, "learning_rate": 0.0003968072289156627, "loss": 2.953, "step": 1334 }, { "epoch": 0.09045404883502299, "grad_norm": 4.142291069030762, "learning_rate": 0.0003968017524644031, "loss": 3.5657, "step": 1335 }, { "epoch": 0.09052180467684698, "grad_norm": 6.977697849273682, "learning_rate": 0.0003967962760131435, "loss": 3.2821, "step": 1336 }, { "epoch": 0.09058956051867097, "grad_norm": 3.5408384799957275, "learning_rate": 0.0003967907995618839, "loss": 3.3353, "step": 1337 }, { "epoch": 0.09065731636049496, "grad_norm": 7.353470802307129, "learning_rate": 0.00039678532311062433, "loss": 3.4904, "step": 1338 }, { "epoch": 0.09072507220231894, "grad_norm": 5.857029914855957, "learning_rate": 0.0003967798466593648, "loss": 3.1007, "step": 1339 }, { "epoch": 0.09079282804414293, "grad_norm": 3.2496349811553955, "learning_rate": 0.0003967743702081052, "loss": 3.0114, "step": 1340 }, { "epoch": 0.09086058388596692, "grad_norm": 4.774024486541748, "learning_rate": 0.0003967688937568456, "loss": 3.5133, "step": 1341 }, { "epoch": 0.09092833972779091, "grad_norm": 4.487707138061523, "learning_rate": 0.000396763417305586, "loss": 3.2241, "step": 1342 }, { "epoch": 0.0909960955696149, "grad_norm": 4.201592445373535, "learning_rate": 0.0003967579408543264, "loss": 3.7208, "step": 1343 }, { "epoch": 0.09106385141143888, "grad_norm": 8.039995193481445, "learning_rate": 0.00039675246440306684, "loss": 3.5228, "step": 1344 }, { "epoch": 0.09113160725326287, "grad_norm": 8.409346580505371, "learning_rate": 0.0003967469879518073, "loss": 3.6083, "step": 1345 }, { "epoch": 0.09119936309508686, "grad_norm": 3.661201000213623, "learning_rate": 0.0003967415115005477, "loss": 3.6194, "step": 1346 }, { "epoch": 0.09126711893691083, "grad_norm": 3.2771658897399902, "learning_rate": 0.0003967360350492881, "loss": 3.1981, "step": 1347 }, { "epoch": 0.09133487477873482, "grad_norm": 2.745028257369995, "learning_rate": 0.0003967305585980285, "loss": 3.0765, "step": 1348 }, { "epoch": 0.09140263062055881, "grad_norm": 3.067625045776367, "learning_rate": 0.0003967250821467689, "loss": 3.4039, "step": 1349 }, { "epoch": 0.0914703864623828, "grad_norm": 4.588881015777588, "learning_rate": 0.00039671960569550934, "loss": 3.5035, "step": 1350 }, { "epoch": 0.09153814230420679, "grad_norm": 3.3754680156707764, "learning_rate": 0.00039671412924424974, "loss": 3.459, "step": 1351 }, { "epoch": 0.09160589814603078, "grad_norm": 3.2061920166015625, "learning_rate": 0.00039670865279299014, "loss": 3.6217, "step": 1352 }, { "epoch": 0.09167365398785476, "grad_norm": 3.9220097064971924, "learning_rate": 0.0003967031763417306, "loss": 3.4331, "step": 1353 }, { "epoch": 0.09174140982967875, "grad_norm": 3.916447639465332, "learning_rate": 0.000396697699890471, "loss": 3.1779, "step": 1354 }, { "epoch": 0.09180916567150274, "grad_norm": 5.143884181976318, "learning_rate": 0.00039669222343921144, "loss": 3.0871, "step": 1355 }, { "epoch": 0.09187692151332673, "grad_norm": 3.3875112533569336, "learning_rate": 0.00039668674698795184, "loss": 2.807, "step": 1356 }, { "epoch": 0.09194467735515072, "grad_norm": 4.5538506507873535, "learning_rate": 0.00039668127053669224, "loss": 3.0644, "step": 1357 }, { "epoch": 0.0920124331969747, "grad_norm": 4.417010307312012, "learning_rate": 0.00039667579408543264, "loss": 3.4697, "step": 1358 }, { "epoch": 0.0920801890387987, "grad_norm": 5.526939392089844, "learning_rate": 0.00039667031763417304, "loss": 3.6266, "step": 1359 }, { "epoch": 0.09214794488062268, "grad_norm": 5.602273941040039, "learning_rate": 0.0003966648411829135, "loss": 3.2932, "step": 1360 }, { "epoch": 0.09221570072244667, "grad_norm": 3.233726739883423, "learning_rate": 0.00039665936473165395, "loss": 3.3159, "step": 1361 }, { "epoch": 0.09228345656427064, "grad_norm": 4.651501178741455, "learning_rate": 0.00039665388828039435, "loss": 3.0679, "step": 1362 }, { "epoch": 0.09235121240609463, "grad_norm": 4.072600841522217, "learning_rate": 0.00039664841182913475, "loss": 3.5562, "step": 1363 }, { "epoch": 0.09241896824791862, "grad_norm": 3.0264618396759033, "learning_rate": 0.00039664293537787515, "loss": 3.1274, "step": 1364 }, { "epoch": 0.09248672408974261, "grad_norm": 3.4799113273620605, "learning_rate": 0.00039663745892661555, "loss": 3.1202, "step": 1365 }, { "epoch": 0.0925544799315666, "grad_norm": 3.753041982650757, "learning_rate": 0.000396631982475356, "loss": 3.2853, "step": 1366 }, { "epoch": 0.09262223577339058, "grad_norm": 3.1483142375946045, "learning_rate": 0.0003966265060240964, "loss": 2.8398, "step": 1367 }, { "epoch": 0.09268999161521457, "grad_norm": 4.377002716064453, "learning_rate": 0.0003966210295728368, "loss": 2.6961, "step": 1368 }, { "epoch": 0.09275774745703856, "grad_norm": 5.337320327758789, "learning_rate": 0.00039661555312157725, "loss": 3.8357, "step": 1369 }, { "epoch": 0.09282550329886255, "grad_norm": 8.339109420776367, "learning_rate": 0.00039661007667031765, "loss": 3.4294, "step": 1370 }, { "epoch": 0.09289325914068654, "grad_norm": 4.493359565734863, "learning_rate": 0.00039660460021905805, "loss": 3.004, "step": 1371 }, { "epoch": 0.09296101498251053, "grad_norm": 3.51812481880188, "learning_rate": 0.0003965991237677985, "loss": 3.3724, "step": 1372 }, { "epoch": 0.09302877082433451, "grad_norm": 6.700555801391602, "learning_rate": 0.0003965936473165389, "loss": 3.4482, "step": 1373 }, { "epoch": 0.0930965266661585, "grad_norm": 3.407228469848633, "learning_rate": 0.0003965881708652793, "loss": 3.3394, "step": 1374 }, { "epoch": 0.09316428250798249, "grad_norm": 3.750430107116699, "learning_rate": 0.0003965826944140197, "loss": 3.1796, "step": 1375 }, { "epoch": 0.09323203834980648, "grad_norm": 4.640967845916748, "learning_rate": 0.00039657721796276015, "loss": 3.3042, "step": 1376 }, { "epoch": 0.09329979419163045, "grad_norm": 6.119855880737305, "learning_rate": 0.0003965717415115006, "loss": 3.14, "step": 1377 }, { "epoch": 0.09336755003345444, "grad_norm": 3.8944735527038574, "learning_rate": 0.000396566265060241, "loss": 3.4573, "step": 1378 }, { "epoch": 0.09343530587527843, "grad_norm": 6.502141952514648, "learning_rate": 0.0003965607886089814, "loss": 3.5776, "step": 1379 }, { "epoch": 0.09350306171710242, "grad_norm": 2.7702245712280273, "learning_rate": 0.0003965553121577218, "loss": 3.4609, "step": 1380 }, { "epoch": 0.0935708175589264, "grad_norm": 3.210815668106079, "learning_rate": 0.0003965498357064622, "loss": 3.251, "step": 1381 }, { "epoch": 0.0936385734007504, "grad_norm": 6.19578742980957, "learning_rate": 0.00039654435925520266, "loss": 2.6267, "step": 1382 }, { "epoch": 0.09370632924257438, "grad_norm": 4.072349548339844, "learning_rate": 0.00039653888280394306, "loss": 3.2351, "step": 1383 }, { "epoch": 0.09377408508439837, "grad_norm": 4.1360392570495605, "learning_rate": 0.0003965334063526835, "loss": 3.162, "step": 1384 }, { "epoch": 0.09384184092622236, "grad_norm": 5.888863563537598, "learning_rate": 0.0003965279299014239, "loss": 3.9504, "step": 1385 }, { "epoch": 0.09390959676804635, "grad_norm": 5.3361430168151855, "learning_rate": 0.0003965224534501643, "loss": 3.3076, "step": 1386 }, { "epoch": 0.09397735260987033, "grad_norm": 3.3490030765533447, "learning_rate": 0.0003965169769989047, "loss": 3.0575, "step": 1387 }, { "epoch": 0.09404510845169432, "grad_norm": 3.54402756690979, "learning_rate": 0.00039651150054764516, "loss": 2.7207, "step": 1388 }, { "epoch": 0.09411286429351831, "grad_norm": 6.320133209228516, "learning_rate": 0.00039650602409638556, "loss": 3.2264, "step": 1389 }, { "epoch": 0.0941806201353423, "grad_norm": 4.098153591156006, "learning_rate": 0.00039650054764512596, "loss": 3.4084, "step": 1390 }, { "epoch": 0.09424837597716629, "grad_norm": 3.734311580657959, "learning_rate": 0.00039649507119386636, "loss": 2.9108, "step": 1391 }, { "epoch": 0.09431613181899026, "grad_norm": 8.233625411987305, "learning_rate": 0.0003964895947426068, "loss": 3.3912, "step": 1392 }, { "epoch": 0.09438388766081425, "grad_norm": 3.449410915374756, "learning_rate": 0.00039648411829134727, "loss": 2.9413, "step": 1393 }, { "epoch": 0.09445164350263824, "grad_norm": 4.187081336975098, "learning_rate": 0.00039647864184008766, "loss": 2.9833, "step": 1394 }, { "epoch": 0.09451939934446223, "grad_norm": 3.8117613792419434, "learning_rate": 0.00039647316538882806, "loss": 3.5015, "step": 1395 }, { "epoch": 0.09458715518628621, "grad_norm": 3.2268080711364746, "learning_rate": 0.00039646768893756846, "loss": 3.4804, "step": 1396 }, { "epoch": 0.0946549110281102, "grad_norm": 5.477669715881348, "learning_rate": 0.00039646221248630886, "loss": 2.9596, "step": 1397 }, { "epoch": 0.09472266686993419, "grad_norm": 3.455084800720215, "learning_rate": 0.0003964567360350493, "loss": 3.476, "step": 1398 }, { "epoch": 0.09479042271175818, "grad_norm": 3.612034559249878, "learning_rate": 0.0003964512595837897, "loss": 2.7545, "step": 1399 }, { "epoch": 0.09485817855358217, "grad_norm": 5.558725833892822, "learning_rate": 0.00039644578313253017, "loss": 2.6956, "step": 1400 }, { "epoch": 0.09492593439540616, "grad_norm": 4.886908054351807, "learning_rate": 0.00039644030668127057, "loss": 3.1163, "step": 1401 }, { "epoch": 0.09499369023723014, "grad_norm": 3.92747163772583, "learning_rate": 0.00039643483023001097, "loss": 3.2695, "step": 1402 }, { "epoch": 0.09506144607905413, "grad_norm": 5.023552894592285, "learning_rate": 0.00039642935377875137, "loss": 2.6324, "step": 1403 }, { "epoch": 0.09512920192087812, "grad_norm": 5.036474704742432, "learning_rate": 0.0003964238773274918, "loss": 3.3597, "step": 1404 }, { "epoch": 0.09519695776270211, "grad_norm": 7.500438213348389, "learning_rate": 0.0003964184008762322, "loss": 3.2728, "step": 1405 }, { "epoch": 0.0952647136045261, "grad_norm": 4.588469505310059, "learning_rate": 0.0003964129244249726, "loss": 2.8626, "step": 1406 }, { "epoch": 0.09533246944635007, "grad_norm": 4.683378219604492, "learning_rate": 0.000396407447973713, "loss": 3.4165, "step": 1407 }, { "epoch": 0.09540022528817406, "grad_norm": 3.172830581665039, "learning_rate": 0.00039640197152245347, "loss": 3.1271, "step": 1408 }, { "epoch": 0.09546798112999805, "grad_norm": 5.370691776275635, "learning_rate": 0.00039639649507119387, "loss": 2.6997, "step": 1409 }, { "epoch": 0.09553573697182204, "grad_norm": 4.910064697265625, "learning_rate": 0.0003963910186199343, "loss": 2.9232, "step": 1410 }, { "epoch": 0.09560349281364602, "grad_norm": 5.6997785568237305, "learning_rate": 0.0003963855421686747, "loss": 2.7393, "step": 1411 }, { "epoch": 0.09567124865547001, "grad_norm": 3.432875871658325, "learning_rate": 0.0003963800657174151, "loss": 2.9108, "step": 1412 }, { "epoch": 0.095739004497294, "grad_norm": 5.890623092651367, "learning_rate": 0.0003963745892661555, "loss": 3.1078, "step": 1413 }, { "epoch": 0.09580676033911799, "grad_norm": 3.961026668548584, "learning_rate": 0.000396369112814896, "loss": 3.2651, "step": 1414 }, { "epoch": 0.09587451618094198, "grad_norm": 5.667285442352295, "learning_rate": 0.00039636363636363643, "loss": 2.8413, "step": 1415 }, { "epoch": 0.09594227202276596, "grad_norm": 4.445078372955322, "learning_rate": 0.0003963581599123768, "loss": 3.1501, "step": 1416 }, { "epoch": 0.09601002786458995, "grad_norm": 5.004968643188477, "learning_rate": 0.0003963526834611172, "loss": 3.0727, "step": 1417 }, { "epoch": 0.09607778370641394, "grad_norm": 5.210635662078857, "learning_rate": 0.0003963472070098576, "loss": 2.9248, "step": 1418 }, { "epoch": 0.09614553954823793, "grad_norm": 7.102078914642334, "learning_rate": 0.000396341730558598, "loss": 2.6631, "step": 1419 }, { "epoch": 0.09621329539006192, "grad_norm": 3.516155242919922, "learning_rate": 0.0003963362541073385, "loss": 3.0647, "step": 1420 }, { "epoch": 0.0962810512318859, "grad_norm": 3.451227903366089, "learning_rate": 0.0003963307776560789, "loss": 3.6905, "step": 1421 }, { "epoch": 0.09634880707370988, "grad_norm": 4.446077823638916, "learning_rate": 0.0003963253012048193, "loss": 3.3557, "step": 1422 }, { "epoch": 0.09641656291553387, "grad_norm": 4.488996982574463, "learning_rate": 0.00039631982475355973, "loss": 2.5365, "step": 1423 }, { "epoch": 0.09648431875735786, "grad_norm": 6.061187744140625, "learning_rate": 0.00039631434830230013, "loss": 3.1525, "step": 1424 }, { "epoch": 0.09655207459918184, "grad_norm": 5.657248020172119, "learning_rate": 0.00039630887185104053, "loss": 2.6317, "step": 1425 }, { "epoch": 0.09661983044100583, "grad_norm": 6.425600528717041, "learning_rate": 0.000396303395399781, "loss": 2.9827, "step": 1426 }, { "epoch": 0.09668758628282982, "grad_norm": 5.036628723144531, "learning_rate": 0.0003962979189485214, "loss": 2.879, "step": 1427 }, { "epoch": 0.09675534212465381, "grad_norm": 4.367918968200684, "learning_rate": 0.0003962924424972618, "loss": 2.8053, "step": 1428 }, { "epoch": 0.0968230979664778, "grad_norm": 4.822283744812012, "learning_rate": 0.0003962869660460022, "loss": 3.023, "step": 1429 }, { "epoch": 0.09689085380830179, "grad_norm": 6.562445640563965, "learning_rate": 0.0003962814895947426, "loss": 3.0136, "step": 1430 }, { "epoch": 0.09695860965012577, "grad_norm": 4.266103267669678, "learning_rate": 0.0003962760131434831, "loss": 2.6668, "step": 1431 }, { "epoch": 0.09702636549194976, "grad_norm": 3.7837908267974854, "learning_rate": 0.0003962705366922235, "loss": 2.7827, "step": 1432 }, { "epoch": 0.09709412133377375, "grad_norm": 7.818897247314453, "learning_rate": 0.0003962650602409639, "loss": 2.8416, "step": 1433 }, { "epoch": 0.09716187717559774, "grad_norm": 7.434922695159912, "learning_rate": 0.0003962595837897043, "loss": 3.4634, "step": 1434 }, { "epoch": 0.09722963301742173, "grad_norm": 3.6432392597198486, "learning_rate": 0.0003962541073384447, "loss": 2.9409, "step": 1435 }, { "epoch": 0.09729738885924571, "grad_norm": 4.159408092498779, "learning_rate": 0.00039624863088718514, "loss": 3.1823, "step": 1436 }, { "epoch": 0.09736514470106969, "grad_norm": 3.582764148712158, "learning_rate": 0.00039624315443592554, "loss": 2.3781, "step": 1437 }, { "epoch": 0.09743290054289368, "grad_norm": 7.628521919250488, "learning_rate": 0.00039623767798466593, "loss": 3.2472, "step": 1438 }, { "epoch": 0.09750065638471767, "grad_norm": 5.080845832824707, "learning_rate": 0.0003962322015334064, "loss": 2.9873, "step": 1439 }, { "epoch": 0.09756841222654165, "grad_norm": 6.963984489440918, "learning_rate": 0.0003962267250821468, "loss": 2.9787, "step": 1440 }, { "epoch": 0.09763616806836564, "grad_norm": 4.882854461669922, "learning_rate": 0.0003962212486308872, "loss": 2.9118, "step": 1441 }, { "epoch": 0.09770392391018963, "grad_norm": 3.8537163734436035, "learning_rate": 0.00039621577217962764, "loss": 3.0972, "step": 1442 }, { "epoch": 0.09777167975201362, "grad_norm": 4.407215595245361, "learning_rate": 0.00039621029572836804, "loss": 3.0343, "step": 1443 }, { "epoch": 0.0978394355938376, "grad_norm": 6.060123443603516, "learning_rate": 0.00039620481927710844, "loss": 2.7828, "step": 1444 }, { "epoch": 0.0979071914356616, "grad_norm": 5.434008598327637, "learning_rate": 0.00039619934282584884, "loss": 2.5776, "step": 1445 }, { "epoch": 0.09797494727748558, "grad_norm": 4.19885778427124, "learning_rate": 0.0003961938663745893, "loss": 2.3513, "step": 1446 }, { "epoch": 0.09804270311930957, "grad_norm": 4.149463176727295, "learning_rate": 0.0003961883899233297, "loss": 2.7142, "step": 1447 }, { "epoch": 0.09811045896113356, "grad_norm": 6.602935791015625, "learning_rate": 0.00039618291347207014, "loss": 2.975, "step": 1448 }, { "epoch": 0.09817821480295755, "grad_norm": 5.896492958068848, "learning_rate": 0.00039617743702081054, "loss": 3.2241, "step": 1449 }, { "epoch": 0.09824597064478154, "grad_norm": 4.733145713806152, "learning_rate": 0.00039617196056955094, "loss": 2.801, "step": 1450 }, { "epoch": 0.09831372648660552, "grad_norm": 5.897229194641113, "learning_rate": 0.00039616648411829134, "loss": 2.1594, "step": 1451 }, { "epoch": 0.0983814823284295, "grad_norm": 4.709463596343994, "learning_rate": 0.0003961610076670318, "loss": 2.8955, "step": 1452 }, { "epoch": 0.09844923817025349, "grad_norm": 5.284852981567383, "learning_rate": 0.0003961555312157722, "loss": 2.5491, "step": 1453 }, { "epoch": 0.09851699401207747, "grad_norm": 8.136198043823242, "learning_rate": 0.00039615005476451265, "loss": 2.8608, "step": 1454 }, { "epoch": 0.09858474985390146, "grad_norm": 4.148588180541992, "learning_rate": 0.00039614457831325305, "loss": 2.7032, "step": 1455 }, { "epoch": 0.09865250569572545, "grad_norm": 7.193914890289307, "learning_rate": 0.00039613910186199345, "loss": 3.1814, "step": 1456 }, { "epoch": 0.09872026153754944, "grad_norm": 5.965203285217285, "learning_rate": 0.00039613362541073385, "loss": 2.9008, "step": 1457 }, { "epoch": 0.09878801737937343, "grad_norm": 8.79650592803955, "learning_rate": 0.0003961281489594743, "loss": 3.2172, "step": 1458 }, { "epoch": 0.09885577322119742, "grad_norm": 6.5463032722473145, "learning_rate": 0.0003961226725082147, "loss": 2.8966, "step": 1459 }, { "epoch": 0.0989235290630214, "grad_norm": 4.920925617218018, "learning_rate": 0.0003961171960569551, "loss": 2.7232, "step": 1460 }, { "epoch": 0.09899128490484539, "grad_norm": 5.8619771003723145, "learning_rate": 0.0003961117196056955, "loss": 2.5593, "step": 1461 }, { "epoch": 0.09905904074666938, "grad_norm": 4.172482967376709, "learning_rate": 0.00039610624315443595, "loss": 3.1805, "step": 1462 }, { "epoch": 0.09912679658849337, "grad_norm": 4.212977409362793, "learning_rate": 0.00039610076670317635, "loss": 2.6483, "step": 1463 }, { "epoch": 0.09919455243031736, "grad_norm": 6.384244441986084, "learning_rate": 0.0003960952902519168, "loss": 2.5341, "step": 1464 }, { "epoch": 0.09926230827214134, "grad_norm": 5.955776691436768, "learning_rate": 0.0003960898138006572, "loss": 2.9147, "step": 1465 }, { "epoch": 0.09933006411396533, "grad_norm": 5.747391223907471, "learning_rate": 0.0003960843373493976, "loss": 3.1934, "step": 1466 }, { "epoch": 0.09939781995578931, "grad_norm": 5.476200580596924, "learning_rate": 0.000396078860898138, "loss": 3.2506, "step": 1467 }, { "epoch": 0.0994655757976133, "grad_norm": 6.76502799987793, "learning_rate": 0.0003960733844468784, "loss": 2.8963, "step": 1468 }, { "epoch": 0.09953333163943728, "grad_norm": 5.352315425872803, "learning_rate": 0.00039606790799561885, "loss": 2.5278, "step": 1469 }, { "epoch": 0.09960108748126127, "grad_norm": 6.485702991485596, "learning_rate": 0.0003960624315443593, "loss": 2.7978, "step": 1470 }, { "epoch": 0.09966884332308526, "grad_norm": 7.389594554901123, "learning_rate": 0.0003960569550930997, "loss": 2.773, "step": 1471 }, { "epoch": 0.09973659916490925, "grad_norm": 6.473922252655029, "learning_rate": 0.0003960514786418401, "loss": 2.0795, "step": 1472 }, { "epoch": 0.09980435500673324, "grad_norm": 4.82306432723999, "learning_rate": 0.0003960460021905805, "loss": 2.3972, "step": 1473 }, { "epoch": 0.09987211084855722, "grad_norm": 6.37922477722168, "learning_rate": 0.00039604052573932096, "loss": 2.3796, "step": 1474 }, { "epoch": 0.09993986669038121, "grad_norm": 5.97680139541626, "learning_rate": 0.00039603504928806136, "loss": 2.878, "step": 1475 }, { "epoch": 0.1000076225322052, "grad_norm": 5.786788463592529, "learning_rate": 0.00039602957283680176, "loss": 2.7823, "step": 1476 }, { "epoch": 0.10007537837402919, "grad_norm": 4.16139030456543, "learning_rate": 0.0003960240963855422, "loss": 2.4569, "step": 1477 }, { "epoch": 0.10014313421585318, "grad_norm": 7.280211448669434, "learning_rate": 0.0003960186199342826, "loss": 2.6938, "step": 1478 }, { "epoch": 0.10021089005767717, "grad_norm": 6.102603435516357, "learning_rate": 0.000396013143483023, "loss": 3.0976, "step": 1479 }, { "epoch": 0.10027864589950115, "grad_norm": 10.204267501831055, "learning_rate": 0.00039600766703176346, "loss": 2.3684, "step": 1480 }, { "epoch": 0.10034640174132513, "grad_norm": 4.742012977600098, "learning_rate": 0.00039600219058050386, "loss": 2.2722, "step": 1481 }, { "epoch": 0.10041415758314912, "grad_norm": 6.115045070648193, "learning_rate": 0.00039599671412924426, "loss": 3.1026, "step": 1482 }, { "epoch": 0.1004819134249731, "grad_norm": 5.21455717086792, "learning_rate": 0.00039599123767798466, "loss": 2.4663, "step": 1483 }, { "epoch": 0.10054966926679709, "grad_norm": 5.136172294616699, "learning_rate": 0.00039598576122672506, "loss": 2.7271, "step": 1484 }, { "epoch": 0.10061742510862108, "grad_norm": 8.2840576171875, "learning_rate": 0.0003959802847754655, "loss": 2.7367, "step": 1485 }, { "epoch": 0.10068518095044507, "grad_norm": 7.235759735107422, "learning_rate": 0.00039597480832420596, "loss": 2.706, "step": 1486 }, { "epoch": 0.10075293679226906, "grad_norm": 5.551850318908691, "learning_rate": 0.00039596933187294636, "loss": 2.3654, "step": 1487 }, { "epoch": 0.10082069263409305, "grad_norm": 9.961389541625977, "learning_rate": 0.00039596385542168676, "loss": 3.0937, "step": 1488 }, { "epoch": 0.10088844847591703, "grad_norm": 7.169528484344482, "learning_rate": 0.00039595837897042716, "loss": 2.2312, "step": 1489 }, { "epoch": 0.10095620431774102, "grad_norm": 7.014019012451172, "learning_rate": 0.0003959529025191676, "loss": 2.3882, "step": 1490 }, { "epoch": 0.10102396015956501, "grad_norm": 6.1078948974609375, "learning_rate": 0.000395947426067908, "loss": 2.1489, "step": 1491 }, { "epoch": 0.101091716001389, "grad_norm": 8.101475715637207, "learning_rate": 0.0003959419496166484, "loss": 2.4828, "step": 1492 }, { "epoch": 0.10115947184321299, "grad_norm": 7.1105523109436035, "learning_rate": 0.00039593647316538887, "loss": 2.5171, "step": 1493 }, { "epoch": 0.10122722768503697, "grad_norm": 6.7352614402771, "learning_rate": 0.00039593099671412927, "loss": 2.5246, "step": 1494 }, { "epoch": 0.10129498352686096, "grad_norm": 5.006543159484863, "learning_rate": 0.00039592552026286967, "loss": 2.515, "step": 1495 }, { "epoch": 0.10136273936868494, "grad_norm": 12.342672348022461, "learning_rate": 0.0003959200438116101, "loss": 2.4808, "step": 1496 }, { "epoch": 0.10143049521050893, "grad_norm": 7.189533710479736, "learning_rate": 0.0003959145673603505, "loss": 2.9216, "step": 1497 }, { "epoch": 0.10149825105233291, "grad_norm": 6.30384635925293, "learning_rate": 0.0003959090909090909, "loss": 2.6325, "step": 1498 }, { "epoch": 0.1015660068941569, "grad_norm": 14.021766662597656, "learning_rate": 0.0003959036144578313, "loss": 2.9859, "step": 1499 }, { "epoch": 0.10163376273598089, "grad_norm": 5.380823612213135, "learning_rate": 0.0003958981380065717, "loss": 2.5742, "step": 1500 }, { "epoch": 0.10170151857780488, "grad_norm": 10.402581214904785, "learning_rate": 0.00039589266155531217, "loss": 2.3011, "step": 1501 }, { "epoch": 0.10176927441962887, "grad_norm": 10.72081184387207, "learning_rate": 0.0003958871851040526, "loss": 2.6849, "step": 1502 }, { "epoch": 0.10183703026145285, "grad_norm": 9.020021438598633, "learning_rate": 0.000395881708652793, "loss": 2.3963, "step": 1503 }, { "epoch": 0.10190478610327684, "grad_norm": 6.383272171020508, "learning_rate": 0.0003958762322015334, "loss": 1.6993, "step": 1504 }, { "epoch": 0.10197254194510083, "grad_norm": 10.206408500671387, "learning_rate": 0.0003958707557502738, "loss": 2.9702, "step": 1505 }, { "epoch": 0.10204029778692482, "grad_norm": 5.172316551208496, "learning_rate": 0.0003958652792990142, "loss": 2.1335, "step": 1506 }, { "epoch": 0.10210805362874881, "grad_norm": 8.15674114227295, "learning_rate": 0.0003958598028477547, "loss": 2.934, "step": 1507 }, { "epoch": 0.1021758094705728, "grad_norm": 6.668185234069824, "learning_rate": 0.0003958543263964951, "loss": 2.7704, "step": 1508 }, { "epoch": 0.10224356531239678, "grad_norm": 12.379969596862793, "learning_rate": 0.0003958488499452355, "loss": 2.6764, "step": 1509 }, { "epoch": 0.10231132115422077, "grad_norm": 11.362810134887695, "learning_rate": 0.0003958433734939759, "loss": 2.2466, "step": 1510 }, { "epoch": 0.10237907699604475, "grad_norm": 6.38102388381958, "learning_rate": 0.0003958378970427163, "loss": 2.6702, "step": 1511 }, { "epoch": 0.10244683283786873, "grad_norm": 5.729297637939453, "learning_rate": 0.0003958324205914568, "loss": 2.4596, "step": 1512 }, { "epoch": 0.10251458867969272, "grad_norm": 7.818333148956299, "learning_rate": 0.0003958269441401972, "loss": 2.7329, "step": 1513 }, { "epoch": 0.10258234452151671, "grad_norm": 6.059780597686768, "learning_rate": 0.0003958214676889376, "loss": 2.4393, "step": 1514 }, { "epoch": 0.1026501003633407, "grad_norm": 6.723374366760254, "learning_rate": 0.000395815991237678, "loss": 2.477, "step": 1515 }, { "epoch": 0.10271785620516469, "grad_norm": 6.627840518951416, "learning_rate": 0.00039581051478641843, "loss": 2.1288, "step": 1516 }, { "epoch": 0.10278561204698868, "grad_norm": 6.84044885635376, "learning_rate": 0.00039580503833515883, "loss": 2.5723, "step": 1517 }, { "epoch": 0.10285336788881266, "grad_norm": 6.076621055603027, "learning_rate": 0.0003957995618838993, "loss": 2.3418, "step": 1518 }, { "epoch": 0.10292112373063665, "grad_norm": 5.930188179016113, "learning_rate": 0.0003957940854326397, "loss": 2.8516, "step": 1519 }, { "epoch": 0.10298887957246064, "grad_norm": 6.278172492980957, "learning_rate": 0.0003957886089813801, "loss": 2.0256, "step": 1520 }, { "epoch": 0.10305663541428463, "grad_norm": 10.729782104492188, "learning_rate": 0.0003957831325301205, "loss": 2.2401, "step": 1521 }, { "epoch": 0.10312439125610862, "grad_norm": 6.3917741775512695, "learning_rate": 0.0003957776560788609, "loss": 2.8768, "step": 1522 }, { "epoch": 0.1031921470979326, "grad_norm": 7.535367012023926, "learning_rate": 0.00039577217962760133, "loss": 2.4606, "step": 1523 }, { "epoch": 0.10325990293975659, "grad_norm": 5.2437262535095215, "learning_rate": 0.0003957667031763418, "loss": 1.643, "step": 1524 }, { "epoch": 0.10332765878158058, "grad_norm": 10.855128288269043, "learning_rate": 0.0003957612267250822, "loss": 1.9913, "step": 1525 }, { "epoch": 0.10339541462340456, "grad_norm": 5.784173011779785, "learning_rate": 0.0003957557502738226, "loss": 2.0367, "step": 1526 }, { "epoch": 0.10346317046522854, "grad_norm": 12.547819137573242, "learning_rate": 0.000395750273822563, "loss": 2.4602, "step": 1527 }, { "epoch": 0.10353092630705253, "grad_norm": 12.875210762023926, "learning_rate": 0.00039574479737130344, "loss": 2.6349, "step": 1528 }, { "epoch": 0.10359868214887652, "grad_norm": 11.491096496582031, "learning_rate": 0.00039573932092004384, "loss": 2.8111, "step": 1529 }, { "epoch": 0.10366643799070051, "grad_norm": 10.651978492736816, "learning_rate": 0.00039573384446878423, "loss": 2.4111, "step": 1530 }, { "epoch": 0.1037341938325245, "grad_norm": 11.756603240966797, "learning_rate": 0.00039572836801752463, "loss": 2.1734, "step": 1531 }, { "epoch": 0.10380194967434848, "grad_norm": 7.683773994445801, "learning_rate": 0.0003957228915662651, "loss": 2.0083, "step": 1532 }, { "epoch": 0.10386970551617247, "grad_norm": 6.986481666564941, "learning_rate": 0.0003957174151150055, "loss": 1.7878, "step": 1533 }, { "epoch": 0.10393746135799646, "grad_norm": 7.494926929473877, "learning_rate": 0.00039571193866374594, "loss": 2.2895, "step": 1534 }, { "epoch": 0.10400521719982045, "grad_norm": 5.501714706420898, "learning_rate": 0.00039570646221248634, "loss": 1.9569, "step": 1535 }, { "epoch": 0.10407297304164444, "grad_norm": 10.523665428161621, "learning_rate": 0.00039570098576122674, "loss": 2.0419, "step": 1536 }, { "epoch": 0.10414072888346843, "grad_norm": 10.4854097366333, "learning_rate": 0.00039569550930996714, "loss": 2.2933, "step": 1537 }, { "epoch": 0.10420848472529241, "grad_norm": 19.430438995361328, "learning_rate": 0.00039569003285870754, "loss": 2.2288, "step": 1538 }, { "epoch": 0.1042762405671164, "grad_norm": 8.312188148498535, "learning_rate": 0.000395684556407448, "loss": 1.9245, "step": 1539 }, { "epoch": 0.10434399640894039, "grad_norm": 9.426054000854492, "learning_rate": 0.00039567907995618844, "loss": 2.2526, "step": 1540 }, { "epoch": 0.10441175225076436, "grad_norm": 10.691904067993164, "learning_rate": 0.00039567360350492884, "loss": 2.193, "step": 1541 }, { "epoch": 0.10447950809258835, "grad_norm": 6.674832820892334, "learning_rate": 0.00039566812705366924, "loss": 2.0514, "step": 1542 }, { "epoch": 0.10454726393441234, "grad_norm": 7.035017013549805, "learning_rate": 0.00039566265060240964, "loss": 2.0483, "step": 1543 }, { "epoch": 0.10461501977623633, "grad_norm": 7.759828090667725, "learning_rate": 0.00039565717415115004, "loss": 2.1638, "step": 1544 }, { "epoch": 0.10468277561806032, "grad_norm": 7.071404933929443, "learning_rate": 0.0003956516976998905, "loss": 2.0131, "step": 1545 }, { "epoch": 0.1047505314598843, "grad_norm": 8.26661491394043, "learning_rate": 0.0003956462212486309, "loss": 1.8065, "step": 1546 }, { "epoch": 0.1048182873017083, "grad_norm": 5.137301445007324, "learning_rate": 0.00039564074479737135, "loss": 1.8734, "step": 1547 }, { "epoch": 0.10488604314353228, "grad_norm": 6.565690994262695, "learning_rate": 0.00039563526834611175, "loss": 2.0718, "step": 1548 }, { "epoch": 0.10495379898535627, "grad_norm": 5.399538993835449, "learning_rate": 0.00039562979189485214, "loss": 1.6017, "step": 1549 }, { "epoch": 0.10502155482718026, "grad_norm": 8.50685977935791, "learning_rate": 0.0003956243154435926, "loss": 1.6937, "step": 1550 }, { "epoch": 0.10508931066900425, "grad_norm": 9.405255317687988, "learning_rate": 0.000395618838992333, "loss": 1.5298, "step": 1551 }, { "epoch": 0.10515706651082823, "grad_norm": 10.210711479187012, "learning_rate": 0.0003956133625410734, "loss": 1.9383, "step": 1552 }, { "epoch": 0.10522482235265222, "grad_norm": 5.813169002532959, "learning_rate": 0.0003956078860898138, "loss": 1.7489, "step": 1553 }, { "epoch": 0.10529257819447621, "grad_norm": 7.955734729766846, "learning_rate": 0.0003956024096385542, "loss": 2.0913, "step": 1554 }, { "epoch": 0.1053603340363002, "grad_norm": 6.004859447479248, "learning_rate": 0.00039559693318729465, "loss": 1.7586, "step": 1555 }, { "epoch": 0.10542808987812417, "grad_norm": 7.190276622772217, "learning_rate": 0.0003955914567360351, "loss": 1.8692, "step": 1556 }, { "epoch": 0.10549584571994816, "grad_norm": 6.522455215454102, "learning_rate": 0.0003955859802847755, "loss": 1.6489, "step": 1557 }, { "epoch": 0.10556360156177215, "grad_norm": 16.731210708618164, "learning_rate": 0.0003955805038335159, "loss": 2.1741, "step": 1558 }, { "epoch": 0.10563135740359614, "grad_norm": 9.050701141357422, "learning_rate": 0.0003955750273822563, "loss": 1.71, "step": 1559 }, { "epoch": 0.10569911324542013, "grad_norm": 5.514721870422363, "learning_rate": 0.0003955695509309967, "loss": 1.5551, "step": 1560 }, { "epoch": 0.10576686908724411, "grad_norm": 8.143468856811523, "learning_rate": 0.00039556407447973715, "loss": 1.3367, "step": 1561 }, { "epoch": 0.1058346249290681, "grad_norm": 7.43070650100708, "learning_rate": 0.00039555859802847755, "loss": 1.7636, "step": 1562 }, { "epoch": 0.10590238077089209, "grad_norm": 7.976333141326904, "learning_rate": 0.000395553121577218, "loss": 1.6309, "step": 1563 }, { "epoch": 0.10597013661271608, "grad_norm": 5.722769260406494, "learning_rate": 0.0003955476451259584, "loss": 1.7133, "step": 1564 }, { "epoch": 0.10603789245454007, "grad_norm": 6.700421333312988, "learning_rate": 0.0003955421686746988, "loss": 1.6723, "step": 1565 }, { "epoch": 0.10610564829636406, "grad_norm": 6.96038293838501, "learning_rate": 0.00039553669222343926, "loss": 1.8589, "step": 1566 }, { "epoch": 0.10617340413818804, "grad_norm": 5.43287467956543, "learning_rate": 0.00039553121577217966, "loss": 1.5708, "step": 1567 }, { "epoch": 0.10624115998001203, "grad_norm": 6.3512654304504395, "learning_rate": 0.00039552573932092006, "loss": 1.8835, "step": 1568 }, { "epoch": 0.10630891582183602, "grad_norm": 6.035426139831543, "learning_rate": 0.00039552026286966045, "loss": 1.3528, "step": 1569 }, { "epoch": 0.10637667166366001, "grad_norm": 6.670430660247803, "learning_rate": 0.00039551478641840085, "loss": 1.7812, "step": 1570 }, { "epoch": 0.10644442750548398, "grad_norm": 6.571809768676758, "learning_rate": 0.0003955093099671413, "loss": 1.532, "step": 1571 }, { "epoch": 0.10651218334730797, "grad_norm": 5.342930793762207, "learning_rate": 0.00039550383351588176, "loss": 1.284, "step": 1572 }, { "epoch": 0.10657993918913196, "grad_norm": 5.984511375427246, "learning_rate": 0.00039549835706462216, "loss": 1.4433, "step": 1573 }, { "epoch": 0.10664769503095595, "grad_norm": 5.874916076660156, "learning_rate": 0.00039549288061336256, "loss": 1.6582, "step": 1574 }, { "epoch": 0.10671545087277994, "grad_norm": 6.260425090789795, "learning_rate": 0.00039548740416210296, "loss": 1.3773, "step": 1575 }, { "epoch": 0.10678320671460392, "grad_norm": 7.710513114929199, "learning_rate": 0.00039548192771084336, "loss": 1.7306, "step": 1576 }, { "epoch": 0.10685096255642791, "grad_norm": 7.668156623840332, "learning_rate": 0.0003954764512595838, "loss": 1.1667, "step": 1577 }, { "epoch": 0.1069187183982519, "grad_norm": 4.716894626617432, "learning_rate": 0.00039547097480832426, "loss": 1.6682, "step": 1578 }, { "epoch": 0.10698647424007589, "grad_norm": 14.606276512145996, "learning_rate": 0.00039546549835706466, "loss": 1.8005, "step": 1579 }, { "epoch": 0.10705423008189988, "grad_norm": 8.625142097473145, "learning_rate": 0.00039546002190580506, "loss": 2.0248, "step": 1580 }, { "epoch": 0.10712198592372386, "grad_norm": 6.178574562072754, "learning_rate": 0.00039545454545454546, "loss": 1.6757, "step": 1581 }, { "epoch": 0.10718974176554785, "grad_norm": 6.782114028930664, "learning_rate": 0.00039544906900328586, "loss": 1.384, "step": 1582 }, { "epoch": 0.10725749760737184, "grad_norm": 4.977735996246338, "learning_rate": 0.0003954435925520263, "loss": 1.4704, "step": 1583 }, { "epoch": 0.10732525344919583, "grad_norm": 6.067221164703369, "learning_rate": 0.0003954381161007667, "loss": 1.5696, "step": 1584 }, { "epoch": 0.10739300929101982, "grad_norm": 8.837647438049316, "learning_rate": 0.0003954326396495071, "loss": 1.6017, "step": 1585 }, { "epoch": 0.10746076513284379, "grad_norm": 5.214369773864746, "learning_rate": 0.00039542716319824757, "loss": 1.4412, "step": 1586 }, { "epoch": 0.10752852097466778, "grad_norm": 7.491814136505127, "learning_rate": 0.00039542168674698797, "loss": 1.3705, "step": 1587 }, { "epoch": 0.10759627681649177, "grad_norm": 6.499130725860596, "learning_rate": 0.0003954162102957284, "loss": 1.237, "step": 1588 }, { "epoch": 0.10766403265831576, "grad_norm": 5.969573974609375, "learning_rate": 0.0003954107338444688, "loss": 1.2787, "step": 1589 }, { "epoch": 0.10773178850013974, "grad_norm": 5.7216267585754395, "learning_rate": 0.0003954052573932092, "loss": 1.5467, "step": 1590 }, { "epoch": 0.10779954434196373, "grad_norm": 4.942505836486816, "learning_rate": 0.0003953997809419496, "loss": 1.467, "step": 1591 }, { "epoch": 0.10786730018378772, "grad_norm": 5.39434814453125, "learning_rate": 0.00039539430449069, "loss": 1.0495, "step": 1592 }, { "epoch": 0.10793505602561171, "grad_norm": 6.367964744567871, "learning_rate": 0.00039538882803943047, "loss": 1.4729, "step": 1593 }, { "epoch": 0.1080028118674357, "grad_norm": 6.016597270965576, "learning_rate": 0.0003953833515881709, "loss": 1.5513, "step": 1594 }, { "epoch": 0.10807056770925969, "grad_norm": 7.340023517608643, "learning_rate": 0.0003953778751369113, "loss": 1.5264, "step": 1595 }, { "epoch": 0.10813832355108367, "grad_norm": 7.440442085266113, "learning_rate": 0.0003953723986856517, "loss": 1.272, "step": 1596 }, { "epoch": 0.10820607939290766, "grad_norm": 6.647540092468262, "learning_rate": 0.0003953669222343921, "loss": 1.2248, "step": 1597 }, { "epoch": 0.10827383523473165, "grad_norm": 4.900035858154297, "learning_rate": 0.0003953614457831325, "loss": 1.41, "step": 1598 }, { "epoch": 0.10834159107655564, "grad_norm": 4.665961742401123, "learning_rate": 0.000395355969331873, "loss": 1.4108, "step": 1599 }, { "epoch": 0.10840934691837961, "grad_norm": 4.730132579803467, "learning_rate": 0.00039535049288061337, "loss": 1.3128, "step": 1600 }, { "epoch": 0.1084771027602036, "grad_norm": 4.807755947113037, "learning_rate": 0.00039534501642935377, "loss": 1.0521, "step": 1601 }, { "epoch": 0.10854485860202759, "grad_norm": 6.883864402770996, "learning_rate": 0.0003953395399780942, "loss": 1.3626, "step": 1602 }, { "epoch": 0.10861261444385158, "grad_norm": 7.1888813972473145, "learning_rate": 0.0003953340635268346, "loss": 1.4059, "step": 1603 }, { "epoch": 0.10868037028567556, "grad_norm": 5.712009906768799, "learning_rate": 0.0003953285870755751, "loss": 1.01, "step": 1604 }, { "epoch": 0.10874812612749955, "grad_norm": 4.674473285675049, "learning_rate": 0.0003953231106243155, "loss": 1.2335, "step": 1605 }, { "epoch": 0.10881588196932354, "grad_norm": 6.6640706062316895, "learning_rate": 0.0003953176341730559, "loss": 1.5289, "step": 1606 }, { "epoch": 0.10888363781114753, "grad_norm": 4.0357794761657715, "learning_rate": 0.0003953121577217963, "loss": 0.9895, "step": 1607 }, { "epoch": 0.10895139365297152, "grad_norm": 4.879321575164795, "learning_rate": 0.0003953066812705367, "loss": 1.5579, "step": 1608 }, { "epoch": 0.1090191494947955, "grad_norm": 5.251523971557617, "learning_rate": 0.00039530120481927713, "loss": 1.4569, "step": 1609 }, { "epoch": 0.1090869053366195, "grad_norm": 7.1547956466674805, "learning_rate": 0.0003952957283680176, "loss": 1.6002, "step": 1610 }, { "epoch": 0.10915466117844348, "grad_norm": 8.155040740966797, "learning_rate": 0.000395290251916758, "loss": 1.4523, "step": 1611 }, { "epoch": 0.10922241702026747, "grad_norm": 5.886632919311523, "learning_rate": 0.0003952847754654984, "loss": 1.2546, "step": 1612 }, { "epoch": 0.10929017286209146, "grad_norm": 6.654346466064453, "learning_rate": 0.0003952792990142388, "loss": 1.3579, "step": 1613 }, { "epoch": 0.10935792870391545, "grad_norm": 4.759560585021973, "learning_rate": 0.0003952738225629792, "loss": 1.6299, "step": 1614 }, { "epoch": 0.10942568454573942, "grad_norm": 5.457400321960449, "learning_rate": 0.00039526834611171963, "loss": 1.1663, "step": 1615 }, { "epoch": 0.10949344038756341, "grad_norm": 5.7266645431518555, "learning_rate": 0.00039526286966046003, "loss": 1.4867, "step": 1616 }, { "epoch": 0.1095611962293874, "grad_norm": 5.383896827697754, "learning_rate": 0.0003952573932092005, "loss": 1.5384, "step": 1617 }, { "epoch": 0.10962895207121139, "grad_norm": 4.416357040405273, "learning_rate": 0.0003952519167579409, "loss": 1.2511, "step": 1618 }, { "epoch": 0.10969670791303537, "grad_norm": 5.343496322631836, "learning_rate": 0.0003952464403066813, "loss": 1.4638, "step": 1619 }, { "epoch": 0.10976446375485936, "grad_norm": 7.058844566345215, "learning_rate": 0.0003952409638554217, "loss": 1.444, "step": 1620 }, { "epoch": 0.10983221959668335, "grad_norm": 4.528946399688721, "learning_rate": 0.00039523548740416214, "loss": 1.2046, "step": 1621 }, { "epoch": 0.10989997543850734, "grad_norm": 4.877941131591797, "learning_rate": 0.00039523001095290253, "loss": 1.2193, "step": 1622 }, { "epoch": 0.10996773128033133, "grad_norm": 4.469624996185303, "learning_rate": 0.00039522453450164293, "loss": 1.2235, "step": 1623 }, { "epoch": 0.11003548712215531, "grad_norm": 5.840634822845459, "learning_rate": 0.00039521905805038333, "loss": 1.3656, "step": 1624 }, { "epoch": 0.1101032429639793, "grad_norm": 8.5831880569458, "learning_rate": 0.0003952135815991238, "loss": 1.4582, "step": 1625 }, { "epoch": 0.11017099880580329, "grad_norm": 6.826632976531982, "learning_rate": 0.00039520810514786424, "loss": 1.3172, "step": 1626 }, { "epoch": 0.11023875464762728, "grad_norm": 4.207441806793213, "learning_rate": 0.00039520262869660464, "loss": 1.3562, "step": 1627 }, { "epoch": 0.11030651048945127, "grad_norm": 4.119324207305908, "learning_rate": 0.00039519715224534504, "loss": 1.522, "step": 1628 }, { "epoch": 0.11037426633127526, "grad_norm": 3.9980592727661133, "learning_rate": 0.00039519167579408544, "loss": 0.942, "step": 1629 }, { "epoch": 0.11044202217309923, "grad_norm": 6.543672561645508, "learning_rate": 0.00039518619934282584, "loss": 1.2047, "step": 1630 }, { "epoch": 0.11050977801492322, "grad_norm": 5.766164302825928, "learning_rate": 0.0003951807228915663, "loss": 1.3645, "step": 1631 }, { "epoch": 0.1105775338567472, "grad_norm": 4.91998815536499, "learning_rate": 0.0003951752464403067, "loss": 1.2502, "step": 1632 }, { "epoch": 0.1106452896985712, "grad_norm": 3.525949001312256, "learning_rate": 0.00039516976998904714, "loss": 1.2463, "step": 1633 }, { "epoch": 0.11071304554039518, "grad_norm": 4.586998462677002, "learning_rate": 0.00039516429353778754, "loss": 1.1385, "step": 1634 }, { "epoch": 0.11078080138221917, "grad_norm": 3.7567641735076904, "learning_rate": 0.00039515881708652794, "loss": 0.9112, "step": 1635 }, { "epoch": 0.11084855722404316, "grad_norm": 2.997755527496338, "learning_rate": 0.00039515334063526834, "loss": 0.9114, "step": 1636 }, { "epoch": 0.11091631306586715, "grad_norm": 4.092390537261963, "learning_rate": 0.0003951478641840088, "loss": 1.0279, "step": 1637 }, { "epoch": 0.11098406890769114, "grad_norm": 5.978790760040283, "learning_rate": 0.0003951423877327492, "loss": 1.2139, "step": 1638 }, { "epoch": 0.11105182474951512, "grad_norm": 4.779129981994629, "learning_rate": 0.0003951369112814896, "loss": 1.2412, "step": 1639 }, { "epoch": 0.11111958059133911, "grad_norm": 5.832210540771484, "learning_rate": 0.00039513143483023005, "loss": 1.2601, "step": 1640 }, { "epoch": 0.1111873364331631, "grad_norm": 5.680749416351318, "learning_rate": 0.00039512595837897044, "loss": 1.4006, "step": 1641 }, { "epoch": 0.11125509227498709, "grad_norm": 15.622469902038574, "learning_rate": 0.0003951204819277109, "loss": 1.3316, "step": 1642 }, { "epoch": 0.11132284811681108, "grad_norm": 5.687697410583496, "learning_rate": 0.0003951150054764513, "loss": 0.9684, "step": 1643 }, { "epoch": 0.11139060395863506, "grad_norm": 4.347750186920166, "learning_rate": 0.0003951095290251917, "loss": 1.21, "step": 1644 }, { "epoch": 0.11145835980045904, "grad_norm": 4.140382289886475, "learning_rate": 0.0003951040525739321, "loss": 1.2795, "step": 1645 }, { "epoch": 0.11152611564228303, "grad_norm": 4.603139400482178, "learning_rate": 0.0003950985761226725, "loss": 1.1003, "step": 1646 }, { "epoch": 0.11159387148410702, "grad_norm": 4.206707000732422, "learning_rate": 0.00039509309967141295, "loss": 1.1876, "step": 1647 }, { "epoch": 0.111661627325931, "grad_norm": 4.687857627868652, "learning_rate": 0.0003950876232201534, "loss": 1.1081, "step": 1648 }, { "epoch": 0.11172938316775499, "grad_norm": 4.899212837219238, "learning_rate": 0.0003950821467688938, "loss": 1.4534, "step": 1649 }, { "epoch": 0.11179713900957898, "grad_norm": 4.628307342529297, "learning_rate": 0.0003950766703176342, "loss": 1.0362, "step": 1650 }, { "epoch": 0.11186489485140297, "grad_norm": 5.196741104125977, "learning_rate": 0.0003950711938663746, "loss": 1.3754, "step": 1651 }, { "epoch": 0.11193265069322696, "grad_norm": 6.112636089324951, "learning_rate": 0.000395065717415115, "loss": 0.9511, "step": 1652 }, { "epoch": 0.11200040653505094, "grad_norm": 4.267406463623047, "learning_rate": 0.00039506024096385545, "loss": 0.9151, "step": 1653 }, { "epoch": 0.11206816237687493, "grad_norm": 8.704758644104004, "learning_rate": 0.00039505476451259585, "loss": 1.1057, "step": 1654 }, { "epoch": 0.11213591821869892, "grad_norm": 4.903751850128174, "learning_rate": 0.00039504928806133625, "loss": 1.0817, "step": 1655 }, { "epoch": 0.11220367406052291, "grad_norm": 8.844914436340332, "learning_rate": 0.0003950438116100767, "loss": 1.4385, "step": 1656 }, { "epoch": 0.1122714299023469, "grad_norm": 4.817325115203857, "learning_rate": 0.0003950383351588171, "loss": 1.1953, "step": 1657 }, { "epoch": 0.11233918574417089, "grad_norm": 9.573628425598145, "learning_rate": 0.0003950328587075575, "loss": 1.4472, "step": 1658 }, { "epoch": 0.11240694158599487, "grad_norm": 7.392648220062256, "learning_rate": 0.00039502738225629796, "loss": 0.987, "step": 1659 }, { "epoch": 0.11247469742781885, "grad_norm": 7.680069446563721, "learning_rate": 0.00039502190580503835, "loss": 1.0029, "step": 1660 }, { "epoch": 0.11254245326964284, "grad_norm": 5.185049057006836, "learning_rate": 0.00039501642935377875, "loss": 0.9314, "step": 1661 }, { "epoch": 0.11261020911146682, "grad_norm": 4.063965797424316, "learning_rate": 0.00039501095290251915, "loss": 1.1307, "step": 1662 }, { "epoch": 0.11267796495329081, "grad_norm": 3.820652723312378, "learning_rate": 0.0003950054764512596, "loss": 1.161, "step": 1663 }, { "epoch": 0.1127457207951148, "grad_norm": 4.7121477127075195, "learning_rate": 0.00039500000000000006, "loss": 1.3195, "step": 1664 }, { "epoch": 0.11281347663693879, "grad_norm": 4.318291664123535, "learning_rate": 0.00039499452354874046, "loss": 1.1162, "step": 1665 }, { "epoch": 0.11288123247876278, "grad_norm": 3.9716618061065674, "learning_rate": 0.00039498904709748086, "loss": 1.1623, "step": 1666 }, { "epoch": 0.11294898832058677, "grad_norm": 5.931141376495361, "learning_rate": 0.00039498357064622126, "loss": 1.0076, "step": 1667 }, { "epoch": 0.11301674416241075, "grad_norm": 5.203697204589844, "learning_rate": 0.00039497809419496166, "loss": 1.2425, "step": 1668 }, { "epoch": 0.11308450000423474, "grad_norm": 6.24186897277832, "learning_rate": 0.0003949726177437021, "loss": 1.1169, "step": 1669 }, { "epoch": 0.11315225584605873, "grad_norm": 4.47183895111084, "learning_rate": 0.0003949671412924425, "loss": 1.4699, "step": 1670 }, { "epoch": 0.11322001168788272, "grad_norm": 5.265829563140869, "learning_rate": 0.00039496166484118296, "loss": 1.2083, "step": 1671 }, { "epoch": 0.1132877675297067, "grad_norm": 4.152087688446045, "learning_rate": 0.00039495618838992336, "loss": 1.0802, "step": 1672 }, { "epoch": 0.1133555233715307, "grad_norm": 4.926446437835693, "learning_rate": 0.00039495071193866376, "loss": 1.3733, "step": 1673 }, { "epoch": 0.11342327921335468, "grad_norm": 4.520811557769775, "learning_rate": 0.00039494523548740416, "loss": 1.292, "step": 1674 }, { "epoch": 0.11349103505517866, "grad_norm": 4.155094623565674, "learning_rate": 0.0003949397590361446, "loss": 1.1369, "step": 1675 }, { "epoch": 0.11355879089700265, "grad_norm": 4.683309078216553, "learning_rate": 0.000394934282584885, "loss": 1.141, "step": 1676 }, { "epoch": 0.11362654673882663, "grad_norm": 4.8344902992248535, "learning_rate": 0.0003949288061336254, "loss": 1.2673, "step": 1677 }, { "epoch": 0.11369430258065062, "grad_norm": 6.6621994972229, "learning_rate": 0.0003949233296823658, "loss": 1.1439, "step": 1678 }, { "epoch": 0.11376205842247461, "grad_norm": 5.894452095031738, "learning_rate": 0.00039491785323110627, "loss": 1.226, "step": 1679 }, { "epoch": 0.1138298142642986, "grad_norm": 4.4098615646362305, "learning_rate": 0.0003949123767798467, "loss": 0.973, "step": 1680 }, { "epoch": 0.11389757010612259, "grad_norm": 4.392683982849121, "learning_rate": 0.0003949069003285871, "loss": 1.3998, "step": 1681 }, { "epoch": 0.11396532594794657, "grad_norm": 6.045029640197754, "learning_rate": 0.0003949014238773275, "loss": 1.3963, "step": 1682 }, { "epoch": 0.11403308178977056, "grad_norm": 4.191753387451172, "learning_rate": 0.0003948959474260679, "loss": 1.1791, "step": 1683 }, { "epoch": 0.11410083763159455, "grad_norm": 4.844413757324219, "learning_rate": 0.0003948904709748083, "loss": 1.124, "step": 1684 }, { "epoch": 0.11416859347341854, "grad_norm": 3.790539264678955, "learning_rate": 0.00039488499452354877, "loss": 1.1751, "step": 1685 }, { "epoch": 0.11423634931524253, "grad_norm": 4.676909923553467, "learning_rate": 0.00039487951807228917, "loss": 1.2751, "step": 1686 }, { "epoch": 0.11430410515706652, "grad_norm": 4.819333553314209, "learning_rate": 0.0003948740416210296, "loss": 1.3905, "step": 1687 }, { "epoch": 0.1143718609988905, "grad_norm": 6.56510066986084, "learning_rate": 0.00039486856516977, "loss": 1.2123, "step": 1688 }, { "epoch": 0.11443961684071449, "grad_norm": 3.3898584842681885, "learning_rate": 0.0003948630887185104, "loss": 0.9921, "step": 1689 }, { "epoch": 0.11450737268253847, "grad_norm": 3.93350887298584, "learning_rate": 0.0003948576122672508, "loss": 0.9894, "step": 1690 }, { "epoch": 0.11457512852436245, "grad_norm": 4.13767671585083, "learning_rate": 0.00039485213581599127, "loss": 1.2404, "step": 1691 }, { "epoch": 0.11464288436618644, "grad_norm": 4.236894607543945, "learning_rate": 0.00039484665936473167, "loss": 0.9455, "step": 1692 }, { "epoch": 0.11471064020801043, "grad_norm": 5.689702987670898, "learning_rate": 0.00039484118291347207, "loss": 1.0252, "step": 1693 }, { "epoch": 0.11477839604983442, "grad_norm": 3.144671678543091, "learning_rate": 0.00039483570646221247, "loss": 0.9894, "step": 1694 }, { "epoch": 0.11484615189165841, "grad_norm": 4.160654544830322, "learning_rate": 0.0003948302300109529, "loss": 0.9231, "step": 1695 }, { "epoch": 0.1149139077334824, "grad_norm": 4.583846092224121, "learning_rate": 0.0003948247535596933, "loss": 1.0016, "step": 1696 }, { "epoch": 0.11498166357530638, "grad_norm": 3.437683343887329, "learning_rate": 0.0003948192771084338, "loss": 1.0387, "step": 1697 }, { "epoch": 0.11504941941713037, "grad_norm": 4.374656677246094, "learning_rate": 0.0003948138006571742, "loss": 1.3334, "step": 1698 }, { "epoch": 0.11511717525895436, "grad_norm": 5.45866060256958, "learning_rate": 0.0003948083242059146, "loss": 1.1768, "step": 1699 }, { "epoch": 0.11518493110077835, "grad_norm": 3.4499471187591553, "learning_rate": 0.000394802847754655, "loss": 1.1246, "step": 1700 }, { "epoch": 0.11525268694260234, "grad_norm": 5.838320732116699, "learning_rate": 0.00039479737130339543, "loss": 1.2268, "step": 1701 }, { "epoch": 0.11532044278442632, "grad_norm": 4.32048225402832, "learning_rate": 0.0003947918948521358, "loss": 1.3561, "step": 1702 }, { "epoch": 0.11538819862625031, "grad_norm": 5.305597305297852, "learning_rate": 0.0003947864184008763, "loss": 1.2363, "step": 1703 }, { "epoch": 0.1154559544680743, "grad_norm": 4.5266194343566895, "learning_rate": 0.0003947809419496167, "loss": 1.2523, "step": 1704 }, { "epoch": 0.11552371030989828, "grad_norm": 5.089867115020752, "learning_rate": 0.0003947754654983571, "loss": 1.1502, "step": 1705 }, { "epoch": 0.11559146615172226, "grad_norm": 3.994213104248047, "learning_rate": 0.0003947699890470975, "loss": 1.0087, "step": 1706 }, { "epoch": 0.11565922199354625, "grad_norm": 3.7438063621520996, "learning_rate": 0.00039476451259583793, "loss": 1.1033, "step": 1707 }, { "epoch": 0.11572697783537024, "grad_norm": 4.32094144821167, "learning_rate": 0.00039475903614457833, "loss": 1.2075, "step": 1708 }, { "epoch": 0.11579473367719423, "grad_norm": 5.756608963012695, "learning_rate": 0.00039475355969331873, "loss": 1.0526, "step": 1709 }, { "epoch": 0.11586248951901822, "grad_norm": 3.862917423248291, "learning_rate": 0.0003947480832420592, "loss": 0.9751, "step": 1710 }, { "epoch": 0.1159302453608422, "grad_norm": 4.8131866455078125, "learning_rate": 0.0003947426067907996, "loss": 1.0616, "step": 1711 }, { "epoch": 0.11599800120266619, "grad_norm": 4.730789661407471, "learning_rate": 0.00039473713033954, "loss": 1.3042, "step": 1712 }, { "epoch": 0.11606575704449018, "grad_norm": 4.7966790199279785, "learning_rate": 0.00039473165388828043, "loss": 1.2819, "step": 1713 }, { "epoch": 0.11613351288631417, "grad_norm": 7.302445888519287, "learning_rate": 0.00039472617743702083, "loss": 1.1682, "step": 1714 }, { "epoch": 0.11620126872813816, "grad_norm": 5.587337017059326, "learning_rate": 0.00039472070098576123, "loss": 1.2243, "step": 1715 }, { "epoch": 0.11626902456996215, "grad_norm": 5.829506874084473, "learning_rate": 0.00039471522453450163, "loss": 1.4658, "step": 1716 }, { "epoch": 0.11633678041178613, "grad_norm": 4.281583786010742, "learning_rate": 0.0003947097480832421, "loss": 1.1215, "step": 1717 }, { "epoch": 0.11640453625361012, "grad_norm": 5.28855562210083, "learning_rate": 0.00039470427163198254, "loss": 1.0329, "step": 1718 }, { "epoch": 0.11647229209543411, "grad_norm": 3.234163284301758, "learning_rate": 0.00039469879518072294, "loss": 0.926, "step": 1719 }, { "epoch": 0.11654004793725808, "grad_norm": 3.5200612545013428, "learning_rate": 0.00039469331872946334, "loss": 1.1168, "step": 1720 }, { "epoch": 0.11660780377908207, "grad_norm": 10.688859939575195, "learning_rate": 0.00039468784227820374, "loss": 1.0462, "step": 1721 }, { "epoch": 0.11667555962090606, "grad_norm": 4.175796985626221, "learning_rate": 0.00039468236582694414, "loss": 0.965, "step": 1722 }, { "epoch": 0.11674331546273005, "grad_norm": 4.7228102684021, "learning_rate": 0.0003946768893756846, "loss": 1.0021, "step": 1723 }, { "epoch": 0.11681107130455404, "grad_norm": 3.9389655590057373, "learning_rate": 0.000394671412924425, "loss": 1.3303, "step": 1724 }, { "epoch": 0.11687882714637803, "grad_norm": 4.331575870513916, "learning_rate": 0.0003946659364731654, "loss": 1.1472, "step": 1725 }, { "epoch": 0.11694658298820201, "grad_norm": 4.8456830978393555, "learning_rate": 0.00039466046002190584, "loss": 1.0899, "step": 1726 }, { "epoch": 0.117014338830026, "grad_norm": 4.778947353363037, "learning_rate": 0.00039465498357064624, "loss": 1.0251, "step": 1727 }, { "epoch": 0.11708209467184999, "grad_norm": 3.2696573734283447, "learning_rate": 0.00039464950711938664, "loss": 0.9866, "step": 1728 }, { "epoch": 0.11714985051367398, "grad_norm": 4.278584003448486, "learning_rate": 0.0003946440306681271, "loss": 1.2932, "step": 1729 }, { "epoch": 0.11721760635549797, "grad_norm": 4.096561908721924, "learning_rate": 0.0003946385542168675, "loss": 1.1068, "step": 1730 }, { "epoch": 0.11728536219732195, "grad_norm": 4.081788063049316, "learning_rate": 0.0003946330777656079, "loss": 1.0378, "step": 1731 }, { "epoch": 0.11735311803914594, "grad_norm": 4.094526767730713, "learning_rate": 0.0003946276013143483, "loss": 1.0869, "step": 1732 }, { "epoch": 0.11742087388096993, "grad_norm": 3.8240902423858643, "learning_rate": 0.0003946221248630887, "loss": 1.134, "step": 1733 }, { "epoch": 0.1174886297227939, "grad_norm": 5.050183296203613, "learning_rate": 0.00039461664841182914, "loss": 1.2009, "step": 1734 }, { "epoch": 0.1175563855646179, "grad_norm": 4.489464282989502, "learning_rate": 0.0003946111719605696, "loss": 1.0914, "step": 1735 }, { "epoch": 0.11762414140644188, "grad_norm": 5.214837551116943, "learning_rate": 0.00039460569550931, "loss": 0.9761, "step": 1736 }, { "epoch": 0.11769189724826587, "grad_norm": 5.526569843292236, "learning_rate": 0.0003946002190580504, "loss": 1.0425, "step": 1737 }, { "epoch": 0.11775965309008986, "grad_norm": 7.189176559448242, "learning_rate": 0.0003945947426067908, "loss": 1.3829, "step": 1738 }, { "epoch": 0.11782740893191385, "grad_norm": 4.321474075317383, "learning_rate": 0.00039458926615553125, "loss": 1.0042, "step": 1739 }, { "epoch": 0.11789516477373783, "grad_norm": 6.104100704193115, "learning_rate": 0.00039458378970427165, "loss": 1.1142, "step": 1740 }, { "epoch": 0.11796292061556182, "grad_norm": 3.5817818641662598, "learning_rate": 0.0003945783132530121, "loss": 0.7784, "step": 1741 }, { "epoch": 0.11803067645738581, "grad_norm": 5.243605136871338, "learning_rate": 0.0003945728368017525, "loss": 0.9711, "step": 1742 }, { "epoch": 0.1180984322992098, "grad_norm": 4.971038818359375, "learning_rate": 0.0003945673603504929, "loss": 1.1133, "step": 1743 }, { "epoch": 0.11816618814103379, "grad_norm": 4.007874965667725, "learning_rate": 0.0003945618838992333, "loss": 1.063, "step": 1744 }, { "epoch": 0.11823394398285778, "grad_norm": 3.4564332962036133, "learning_rate": 0.00039455640744797375, "loss": 1.0024, "step": 1745 }, { "epoch": 0.11830169982468176, "grad_norm": 5.168217658996582, "learning_rate": 0.00039455093099671415, "loss": 0.9845, "step": 1746 }, { "epoch": 0.11836945566650575, "grad_norm": 4.99029541015625, "learning_rate": 0.00039454545454545455, "loss": 1.3208, "step": 1747 }, { "epoch": 0.11843721150832974, "grad_norm": 4.303587436676025, "learning_rate": 0.00039453997809419495, "loss": 1.2922, "step": 1748 }, { "epoch": 0.11850496735015371, "grad_norm": 4.584913730621338, "learning_rate": 0.0003945345016429354, "loss": 1.4634, "step": 1749 }, { "epoch": 0.1185727231919777, "grad_norm": 3.5839524269104004, "learning_rate": 0.0003945290251916758, "loss": 0.9421, "step": 1750 }, { "epoch": 0.11864047903380169, "grad_norm": 3.8840532302856445, "learning_rate": 0.00039452354874041626, "loss": 0.9788, "step": 1751 }, { "epoch": 0.11870823487562568, "grad_norm": 4.87594747543335, "learning_rate": 0.00039451807228915665, "loss": 1.3483, "step": 1752 }, { "epoch": 0.11877599071744967, "grad_norm": 6.998860836029053, "learning_rate": 0.00039451259583789705, "loss": 0.9338, "step": 1753 }, { "epoch": 0.11884374655927366, "grad_norm": 5.057334899902344, "learning_rate": 0.00039450711938663745, "loss": 1.0174, "step": 1754 }, { "epoch": 0.11891150240109764, "grad_norm": 4.529388427734375, "learning_rate": 0.0003945016429353779, "loss": 1.3518, "step": 1755 }, { "epoch": 0.11897925824292163, "grad_norm": 3.73679780960083, "learning_rate": 0.0003944961664841183, "loss": 0.9592, "step": 1756 }, { "epoch": 0.11904701408474562, "grad_norm": 4.708619117736816, "learning_rate": 0.00039449069003285876, "loss": 1.157, "step": 1757 }, { "epoch": 0.11911476992656961, "grad_norm": 4.4102654457092285, "learning_rate": 0.00039448521358159916, "loss": 1.1355, "step": 1758 }, { "epoch": 0.1191825257683936, "grad_norm": 4.033970355987549, "learning_rate": 0.00039447973713033956, "loss": 1.1677, "step": 1759 }, { "epoch": 0.11925028161021758, "grad_norm": 4.176807403564453, "learning_rate": 0.00039447426067907996, "loss": 1.05, "step": 1760 }, { "epoch": 0.11931803745204157, "grad_norm": 5.637633323669434, "learning_rate": 0.0003944687842278204, "loss": 0.8578, "step": 1761 }, { "epoch": 0.11938579329386556, "grad_norm": 4.136836051940918, "learning_rate": 0.0003944633077765608, "loss": 1.1186, "step": 1762 }, { "epoch": 0.11945354913568955, "grad_norm": 3.8431918621063232, "learning_rate": 0.0003944578313253012, "loss": 0.882, "step": 1763 }, { "epoch": 0.11952130497751352, "grad_norm": 4.419764995574951, "learning_rate": 0.0003944523548740416, "loss": 1.173, "step": 1764 }, { "epoch": 0.11958906081933751, "grad_norm": 3.850884437561035, "learning_rate": 0.00039444687842278206, "loss": 1.2835, "step": 1765 }, { "epoch": 0.1196568166611615, "grad_norm": 3.596729278564453, "learning_rate": 0.00039444140197152246, "loss": 0.9831, "step": 1766 }, { "epoch": 0.11972457250298549, "grad_norm": 4.727614402770996, "learning_rate": 0.0003944359255202629, "loss": 1.1181, "step": 1767 }, { "epoch": 0.11979232834480948, "grad_norm": 3.492016315460205, "learning_rate": 0.0003944304490690033, "loss": 0.9477, "step": 1768 }, { "epoch": 0.11986008418663346, "grad_norm": 4.364447116851807, "learning_rate": 0.0003944249726177437, "loss": 1.1886, "step": 1769 }, { "epoch": 0.11992784002845745, "grad_norm": 3.8655457496643066, "learning_rate": 0.0003944194961664841, "loss": 1.1625, "step": 1770 }, { "epoch": 0.11999559587028144, "grad_norm": 3.2298905849456787, "learning_rate": 0.0003944140197152245, "loss": 1.0474, "step": 1771 }, { "epoch": 0.12006335171210543, "grad_norm": 3.3382368087768555, "learning_rate": 0.00039440854326396496, "loss": 0.987, "step": 1772 }, { "epoch": 0.12013110755392942, "grad_norm": 4.010012626647949, "learning_rate": 0.0003944030668127054, "loss": 0.783, "step": 1773 }, { "epoch": 0.1201988633957534, "grad_norm": 3.6999971866607666, "learning_rate": 0.0003943975903614458, "loss": 1.2317, "step": 1774 }, { "epoch": 0.1202666192375774, "grad_norm": 4.398083686828613, "learning_rate": 0.0003943921139101862, "loss": 1.1358, "step": 1775 }, { "epoch": 0.12033437507940138, "grad_norm": 4.35454797744751, "learning_rate": 0.0003943866374589266, "loss": 1.0099, "step": 1776 }, { "epoch": 0.12040213092122537, "grad_norm": 5.814678192138672, "learning_rate": 0.00039438116100766707, "loss": 1.2242, "step": 1777 }, { "epoch": 0.12046988676304936, "grad_norm": 3.9683175086975098, "learning_rate": 0.00039437568455640747, "loss": 0.9783, "step": 1778 }, { "epoch": 0.12053764260487333, "grad_norm": 5.008693218231201, "learning_rate": 0.00039437020810514787, "loss": 1.0379, "step": 1779 }, { "epoch": 0.12060539844669732, "grad_norm": 5.433497428894043, "learning_rate": 0.0003943647316538883, "loss": 1.3031, "step": 1780 }, { "epoch": 0.12067315428852131, "grad_norm": 5.59146785736084, "learning_rate": 0.0003943592552026287, "loss": 0.8908, "step": 1781 }, { "epoch": 0.1207409101303453, "grad_norm": 4.415099620819092, "learning_rate": 0.0003943537787513691, "loss": 1.1647, "step": 1782 }, { "epoch": 0.12080866597216929, "grad_norm": 5.488921642303467, "learning_rate": 0.00039434830230010957, "loss": 0.8395, "step": 1783 }, { "epoch": 0.12087642181399327, "grad_norm": 6.12910270690918, "learning_rate": 0.00039434282584884997, "loss": 1.4274, "step": 1784 }, { "epoch": 0.12094417765581726, "grad_norm": 4.462892532348633, "learning_rate": 0.00039433734939759037, "loss": 1.0254, "step": 1785 }, { "epoch": 0.12101193349764125, "grad_norm": 3.3209166526794434, "learning_rate": 0.00039433187294633077, "loss": 1.0128, "step": 1786 }, { "epoch": 0.12107968933946524, "grad_norm": 2.8834686279296875, "learning_rate": 0.00039432639649507117, "loss": 0.9423, "step": 1787 }, { "epoch": 0.12114744518128923, "grad_norm": 3.505333185195923, "learning_rate": 0.0003943209200438116, "loss": 0.9559, "step": 1788 }, { "epoch": 0.12121520102311321, "grad_norm": 4.342011451721191, "learning_rate": 0.0003943154435925521, "loss": 1.131, "step": 1789 }, { "epoch": 0.1212829568649372, "grad_norm": 3.5092380046844482, "learning_rate": 0.0003943099671412925, "loss": 0.944, "step": 1790 }, { "epoch": 0.12135071270676119, "grad_norm": 3.4735143184661865, "learning_rate": 0.0003943044906900329, "loss": 1.1781, "step": 1791 }, { "epoch": 0.12141846854858518, "grad_norm": 3.1634914875030518, "learning_rate": 0.0003942990142387733, "loss": 0.8058, "step": 1792 }, { "epoch": 0.12148622439040917, "grad_norm": 3.91396164894104, "learning_rate": 0.00039429353778751373, "loss": 1.0665, "step": 1793 }, { "epoch": 0.12155398023223314, "grad_norm": 3.8572874069213867, "learning_rate": 0.0003942880613362541, "loss": 1.2749, "step": 1794 }, { "epoch": 0.12162173607405713, "grad_norm": 4.059725761413574, "learning_rate": 0.0003942825848849945, "loss": 1.0422, "step": 1795 }, { "epoch": 0.12168949191588112, "grad_norm": 4.507661819458008, "learning_rate": 0.000394277108433735, "loss": 0.9989, "step": 1796 }, { "epoch": 0.1217572477577051, "grad_norm": 3.1628077030181885, "learning_rate": 0.0003942716319824754, "loss": 0.8951, "step": 1797 }, { "epoch": 0.1218250035995291, "grad_norm": 3.8676633834838867, "learning_rate": 0.0003942661555312158, "loss": 0.9236, "step": 1798 }, { "epoch": 0.12189275944135308, "grad_norm": 4.942222595214844, "learning_rate": 0.00039426067907995623, "loss": 1.3439, "step": 1799 }, { "epoch": 0.12196051528317707, "grad_norm": 3.636735439300537, "learning_rate": 0.00039425520262869663, "loss": 1.1559, "step": 1800 }, { "epoch": 0.12202827112500106, "grad_norm": 5.252878665924072, "learning_rate": 0.00039424972617743703, "loss": 1.1251, "step": 1801 }, { "epoch": 0.12209602696682505, "grad_norm": 3.1147611141204834, "learning_rate": 0.00039424424972617743, "loss": 0.8764, "step": 1802 }, { "epoch": 0.12216378280864904, "grad_norm": 4.047060966491699, "learning_rate": 0.0003942387732749179, "loss": 0.9859, "step": 1803 }, { "epoch": 0.12223153865047302, "grad_norm": 4.130135536193848, "learning_rate": 0.0003942332968236583, "loss": 1.109, "step": 1804 }, { "epoch": 0.12229929449229701, "grad_norm": 4.171240329742432, "learning_rate": 0.00039422782037239873, "loss": 1.0646, "step": 1805 }, { "epoch": 0.122367050334121, "grad_norm": 3.735419750213623, "learning_rate": 0.00039422234392113913, "loss": 0.9979, "step": 1806 }, { "epoch": 0.12243480617594499, "grad_norm": 4.967855453491211, "learning_rate": 0.00039421686746987953, "loss": 1.385, "step": 1807 }, { "epoch": 0.12250256201776898, "grad_norm": 5.171457290649414, "learning_rate": 0.00039421139101861993, "loss": 1.3712, "step": 1808 }, { "epoch": 0.12257031785959295, "grad_norm": 3.952385187149048, "learning_rate": 0.00039420591456736033, "loss": 1.129, "step": 1809 }, { "epoch": 0.12263807370141694, "grad_norm": 3.3985743522644043, "learning_rate": 0.0003942004381161008, "loss": 1.0455, "step": 1810 }, { "epoch": 0.12270582954324093, "grad_norm": 3.752293586730957, "learning_rate": 0.00039419496166484124, "loss": 0.9598, "step": 1811 }, { "epoch": 0.12277358538506492, "grad_norm": 3.917031764984131, "learning_rate": 0.00039418948521358164, "loss": 0.9095, "step": 1812 }, { "epoch": 0.1228413412268889, "grad_norm": 3.6257455348968506, "learning_rate": 0.00039418400876232204, "loss": 1.0676, "step": 1813 }, { "epoch": 0.12290909706871289, "grad_norm": 4.0780134201049805, "learning_rate": 0.00039417853231106244, "loss": 1.0206, "step": 1814 }, { "epoch": 0.12297685291053688, "grad_norm": 3.7420945167541504, "learning_rate": 0.0003941730558598029, "loss": 1.0192, "step": 1815 }, { "epoch": 0.12304460875236087, "grad_norm": 3.4092986583709717, "learning_rate": 0.0003941675794085433, "loss": 0.9628, "step": 1816 }, { "epoch": 0.12311236459418486, "grad_norm": 3.78250789642334, "learning_rate": 0.0003941621029572837, "loss": 1.1779, "step": 1817 }, { "epoch": 0.12318012043600884, "grad_norm": 4.220417022705078, "learning_rate": 0.0003941566265060241, "loss": 1.1888, "step": 1818 }, { "epoch": 0.12324787627783283, "grad_norm": 4.681819915771484, "learning_rate": 0.00039415115005476454, "loss": 1.5143, "step": 1819 }, { "epoch": 0.12331563211965682, "grad_norm": 4.093831539154053, "learning_rate": 0.00039414567360350494, "loss": 1.0043, "step": 1820 }, { "epoch": 0.12338338796148081, "grad_norm": 4.1450042724609375, "learning_rate": 0.0003941401971522454, "loss": 1.0525, "step": 1821 }, { "epoch": 0.1234511438033048, "grad_norm": 4.708794116973877, "learning_rate": 0.0003941347207009858, "loss": 1.1564, "step": 1822 }, { "epoch": 0.12351889964512879, "grad_norm": 3.7568252086639404, "learning_rate": 0.0003941292442497262, "loss": 1.0529, "step": 1823 }, { "epoch": 0.12358665548695276, "grad_norm": 5.402228832244873, "learning_rate": 0.0003941237677984666, "loss": 1.0908, "step": 1824 }, { "epoch": 0.12365441132877675, "grad_norm": 4.131001949310303, "learning_rate": 0.000394118291347207, "loss": 1.0465, "step": 1825 }, { "epoch": 0.12372216717060074, "grad_norm": 4.514806270599365, "learning_rate": 0.00039411281489594744, "loss": 1.2402, "step": 1826 }, { "epoch": 0.12378992301242472, "grad_norm": 4.582322120666504, "learning_rate": 0.0003941073384446879, "loss": 0.8958, "step": 1827 }, { "epoch": 0.12385767885424871, "grad_norm": 3.4016494750976562, "learning_rate": 0.0003941018619934283, "loss": 0.8938, "step": 1828 }, { "epoch": 0.1239254346960727, "grad_norm": 3.735915422439575, "learning_rate": 0.0003940963855421687, "loss": 1.0233, "step": 1829 }, { "epoch": 0.12399319053789669, "grad_norm": 4.394433498382568, "learning_rate": 0.0003940909090909091, "loss": 0.8388, "step": 1830 }, { "epoch": 0.12406094637972068, "grad_norm": 4.135808944702148, "learning_rate": 0.00039408543263964955, "loss": 1.1488, "step": 1831 }, { "epoch": 0.12412870222154467, "grad_norm": 3.190540075302124, "learning_rate": 0.00039407995618838995, "loss": 0.861, "step": 1832 }, { "epoch": 0.12419645806336865, "grad_norm": 4.317681789398193, "learning_rate": 0.00039407447973713035, "loss": 1.1285, "step": 1833 }, { "epoch": 0.12426421390519264, "grad_norm": 4.385807037353516, "learning_rate": 0.00039406900328587075, "loss": 0.9116, "step": 1834 }, { "epoch": 0.12433196974701663, "grad_norm": 3.790863275527954, "learning_rate": 0.0003940635268346112, "loss": 1.1933, "step": 1835 }, { "epoch": 0.12439972558884062, "grad_norm": 4.086167812347412, "learning_rate": 0.0003940580503833516, "loss": 1.1334, "step": 1836 }, { "epoch": 0.1244674814306646, "grad_norm": 5.536815166473389, "learning_rate": 0.00039405257393209205, "loss": 0.8746, "step": 1837 }, { "epoch": 0.1245352372724886, "grad_norm": 3.602147340774536, "learning_rate": 0.00039404709748083245, "loss": 1.003, "step": 1838 }, { "epoch": 0.12460299311431257, "grad_norm": 4.224771976470947, "learning_rate": 0.00039404162102957285, "loss": 1.2177, "step": 1839 }, { "epoch": 0.12467074895613656, "grad_norm": 4.504672527313232, "learning_rate": 0.00039403614457831325, "loss": 1.2838, "step": 1840 }, { "epoch": 0.12473850479796054, "grad_norm": 4.660628795623779, "learning_rate": 0.00039403066812705365, "loss": 0.8578, "step": 1841 }, { "epoch": 0.12480626063978453, "grad_norm": 7.820394039154053, "learning_rate": 0.0003940251916757941, "loss": 1.2228, "step": 1842 }, { "epoch": 0.12487401648160852, "grad_norm": 3.7876226902008057, "learning_rate": 0.00039401971522453456, "loss": 0.8574, "step": 1843 }, { "epoch": 0.12494177232343251, "grad_norm": 3.594557762145996, "learning_rate": 0.00039401423877327495, "loss": 1.0456, "step": 1844 }, { "epoch": 0.1250095281652565, "grad_norm": 3.5986955165863037, "learning_rate": 0.00039400876232201535, "loss": 0.9908, "step": 1845 }, { "epoch": 0.12507728400708049, "grad_norm": 3.9941565990448, "learning_rate": 0.00039400328587075575, "loss": 1.0189, "step": 1846 }, { "epoch": 0.12514503984890446, "grad_norm": 4.092831134796143, "learning_rate": 0.00039399780941949615, "loss": 0.9447, "step": 1847 }, { "epoch": 0.12521279569072846, "grad_norm": 4.5969390869140625, "learning_rate": 0.0003939923329682366, "loss": 1.3735, "step": 1848 }, { "epoch": 0.12528055153255244, "grad_norm": 3.631680965423584, "learning_rate": 0.000393986856516977, "loss": 0.9864, "step": 1849 }, { "epoch": 0.12534830737437644, "grad_norm": 4.72289514541626, "learning_rate": 0.00039398138006571746, "loss": 0.9884, "step": 1850 }, { "epoch": 0.1254160632162004, "grad_norm": 3.4760959148406982, "learning_rate": 0.00039397590361445786, "loss": 0.8979, "step": 1851 }, { "epoch": 0.12548381905802442, "grad_norm": 4.025509834289551, "learning_rate": 0.00039397042716319826, "loss": 1.0439, "step": 1852 }, { "epoch": 0.1255515748998484, "grad_norm": 3.7536063194274902, "learning_rate": 0.0003939649507119387, "loss": 1.0138, "step": 1853 }, { "epoch": 0.1256193307416724, "grad_norm": 3.739171028137207, "learning_rate": 0.0003939594742606791, "loss": 1.1156, "step": 1854 }, { "epoch": 0.12568708658349637, "grad_norm": 4.956604480743408, "learning_rate": 0.0003939539978094195, "loss": 1.2284, "step": 1855 }, { "epoch": 0.12575484242532037, "grad_norm": 3.817725896835327, "learning_rate": 0.0003939485213581599, "loss": 0.7855, "step": 1856 }, { "epoch": 0.12582259826714434, "grad_norm": 3.9724466800689697, "learning_rate": 0.0003939430449069003, "loss": 1.0788, "step": 1857 }, { "epoch": 0.12589035410896834, "grad_norm": 3.7098357677459717, "learning_rate": 0.00039393756845564076, "loss": 0.9621, "step": 1858 }, { "epoch": 0.12595810995079232, "grad_norm": 4.540678977966309, "learning_rate": 0.0003939320920043812, "loss": 1.2005, "step": 1859 }, { "epoch": 0.12602586579261632, "grad_norm": 4.152740955352783, "learning_rate": 0.0003939266155531216, "loss": 1.1063, "step": 1860 }, { "epoch": 0.1260936216344403, "grad_norm": 5.679985523223877, "learning_rate": 0.000393921139101862, "loss": 0.9053, "step": 1861 }, { "epoch": 0.12616137747626427, "grad_norm": 5.012297630310059, "learning_rate": 0.0003939156626506024, "loss": 1.3216, "step": 1862 }, { "epoch": 0.12622913331808827, "grad_norm": 3.621983528137207, "learning_rate": 0.0003939101861993428, "loss": 0.8771, "step": 1863 }, { "epoch": 0.12629688915991225, "grad_norm": 3.760590076446533, "learning_rate": 0.00039390470974808326, "loss": 1.1374, "step": 1864 }, { "epoch": 0.12636464500173625, "grad_norm": 4.252878665924072, "learning_rate": 0.00039389923329682366, "loss": 1.1744, "step": 1865 }, { "epoch": 0.12643240084356022, "grad_norm": 4.536449909210205, "learning_rate": 0.0003938937568455641, "loss": 1.1041, "step": 1866 }, { "epoch": 0.12650015668538422, "grad_norm": 3.6722726821899414, "learning_rate": 0.0003938882803943045, "loss": 1.2249, "step": 1867 }, { "epoch": 0.1265679125272082, "grad_norm": 3.4794342517852783, "learning_rate": 0.0003938828039430449, "loss": 0.9817, "step": 1868 }, { "epoch": 0.1266356683690322, "grad_norm": 3.702176570892334, "learning_rate": 0.00039387732749178537, "loss": 0.9495, "step": 1869 }, { "epoch": 0.12670342421085617, "grad_norm": 3.2054443359375, "learning_rate": 0.00039387185104052577, "loss": 1.0327, "step": 1870 }, { "epoch": 0.12677118005268018, "grad_norm": 3.591895341873169, "learning_rate": 0.00039386637458926617, "loss": 0.9529, "step": 1871 }, { "epoch": 0.12683893589450415, "grad_norm": 3.1956913471221924, "learning_rate": 0.00039386089813800657, "loss": 0.9698, "step": 1872 }, { "epoch": 0.12690669173632815, "grad_norm": 5.032118797302246, "learning_rate": 0.000393855421686747, "loss": 0.8259, "step": 1873 }, { "epoch": 0.12697444757815213, "grad_norm": 3.301806688308716, "learning_rate": 0.0003938499452354874, "loss": 0.8616, "step": 1874 }, { "epoch": 0.12704220341997613, "grad_norm": 5.120059013366699, "learning_rate": 0.00039384446878422787, "loss": 1.1377, "step": 1875 }, { "epoch": 0.1271099592618001, "grad_norm": 5.013489723205566, "learning_rate": 0.00039383899233296827, "loss": 1.2948, "step": 1876 }, { "epoch": 0.12717771510362408, "grad_norm": 4.0147271156311035, "learning_rate": 0.00039383351588170867, "loss": 1.2281, "step": 1877 }, { "epoch": 0.12724547094544808, "grad_norm": 3.3962347507476807, "learning_rate": 0.00039382803943044907, "loss": 0.9244, "step": 1878 }, { "epoch": 0.12731322678727205, "grad_norm": 3.543621063232422, "learning_rate": 0.00039382256297918947, "loss": 1.0228, "step": 1879 }, { "epoch": 0.12738098262909606, "grad_norm": 3.8247084617614746, "learning_rate": 0.0003938170865279299, "loss": 1.0812, "step": 1880 }, { "epoch": 0.12744873847092003, "grad_norm": 2.591374397277832, "learning_rate": 0.0003938116100766704, "loss": 0.6708, "step": 1881 }, { "epoch": 0.12751649431274403, "grad_norm": 3.8190577030181885, "learning_rate": 0.0003938061336254108, "loss": 1.0116, "step": 1882 }, { "epoch": 0.127584250154568, "grad_norm": 3.098743200302124, "learning_rate": 0.0003938006571741512, "loss": 0.8639, "step": 1883 }, { "epoch": 0.127652005996392, "grad_norm": 6.168898105621338, "learning_rate": 0.0003937951807228916, "loss": 1.5368, "step": 1884 }, { "epoch": 0.12771976183821598, "grad_norm": 4.254391193389893, "learning_rate": 0.000393789704271632, "loss": 1.1098, "step": 1885 }, { "epoch": 0.12778751768003999, "grad_norm": 3.7747035026550293, "learning_rate": 0.0003937842278203724, "loss": 1.1788, "step": 1886 }, { "epoch": 0.12785527352186396, "grad_norm": 4.273073196411133, "learning_rate": 0.0003937787513691128, "loss": 0.9761, "step": 1887 }, { "epoch": 0.12792302936368796, "grad_norm": 3.7713770866394043, "learning_rate": 0.0003937732749178532, "loss": 1.0652, "step": 1888 }, { "epoch": 0.12799078520551194, "grad_norm": 2.95625376701355, "learning_rate": 0.0003937677984665937, "loss": 0.9818, "step": 1889 }, { "epoch": 0.12805854104733594, "grad_norm": 3.0310325622558594, "learning_rate": 0.0003937623220153341, "loss": 0.8769, "step": 1890 }, { "epoch": 0.1281262968891599, "grad_norm": 3.168720006942749, "learning_rate": 0.00039375684556407453, "loss": 1.0757, "step": 1891 }, { "epoch": 0.1281940527309839, "grad_norm": 3.157223701477051, "learning_rate": 0.00039375136911281493, "loss": 0.9094, "step": 1892 }, { "epoch": 0.1282618085728079, "grad_norm": 4.76072883605957, "learning_rate": 0.00039374589266155533, "loss": 1.0586, "step": 1893 }, { "epoch": 0.12832956441463186, "grad_norm": 3.1946115493774414, "learning_rate": 0.00039374041621029573, "loss": 0.9054, "step": 1894 }, { "epoch": 0.12839732025645587, "grad_norm": 3.923870325088501, "learning_rate": 0.00039373493975903613, "loss": 1.1368, "step": 1895 }, { "epoch": 0.12846507609827984, "grad_norm": 4.610617637634277, "learning_rate": 0.0003937294633077766, "loss": 1.0974, "step": 1896 }, { "epoch": 0.12853283194010384, "grad_norm": 3.994490623474121, "learning_rate": 0.00039372398685651703, "loss": 1.2797, "step": 1897 }, { "epoch": 0.12860058778192782, "grad_norm": 3.981736183166504, "learning_rate": 0.00039371851040525743, "loss": 0.9522, "step": 1898 }, { "epoch": 0.12866834362375182, "grad_norm": 2.765848159790039, "learning_rate": 0.00039371303395399783, "loss": 0.7337, "step": 1899 }, { "epoch": 0.1287360994655758, "grad_norm": 3.7265939712524414, "learning_rate": 0.00039370755750273823, "loss": 1.0794, "step": 1900 }, { "epoch": 0.1288038553073998, "grad_norm": 3.501375198364258, "learning_rate": 0.00039370208105147863, "loss": 1.0447, "step": 1901 }, { "epoch": 0.12887161114922377, "grad_norm": 4.184782028198242, "learning_rate": 0.0003936966046002191, "loss": 1.1991, "step": 1902 }, { "epoch": 0.12893936699104777, "grad_norm": 8.838193893432617, "learning_rate": 0.0003936911281489595, "loss": 1.3164, "step": 1903 }, { "epoch": 0.12900712283287175, "grad_norm": 4.1561431884765625, "learning_rate": 0.00039368565169769994, "loss": 1.1864, "step": 1904 }, { "epoch": 0.12907487867469575, "grad_norm": 3.0376083850860596, "learning_rate": 0.00039368017524644034, "loss": 0.9058, "step": 1905 }, { "epoch": 0.12914263451651972, "grad_norm": 3.793778419494629, "learning_rate": 0.00039367469879518074, "loss": 0.977, "step": 1906 }, { "epoch": 0.1292103903583437, "grad_norm": 5.864257335662842, "learning_rate": 0.0003936692223439212, "loss": 1.194, "step": 1907 }, { "epoch": 0.1292781462001677, "grad_norm": 4.183814525604248, "learning_rate": 0.0003936637458926616, "loss": 0.8594, "step": 1908 }, { "epoch": 0.12934590204199167, "grad_norm": 3.8619446754455566, "learning_rate": 0.000393658269441402, "loss": 1.0024, "step": 1909 }, { "epoch": 0.12941365788381567, "grad_norm": 3.3930470943450928, "learning_rate": 0.0003936527929901424, "loss": 0.8368, "step": 1910 }, { "epoch": 0.12948141372563965, "grad_norm": 4.857607364654541, "learning_rate": 0.0003936473165388828, "loss": 1.2485, "step": 1911 }, { "epoch": 0.12954916956746365, "grad_norm": 3.1457479000091553, "learning_rate": 0.00039364184008762324, "loss": 1.1457, "step": 1912 }, { "epoch": 0.12961692540928763, "grad_norm": 3.3775463104248047, "learning_rate": 0.0003936363636363637, "loss": 0.8994, "step": 1913 }, { "epoch": 0.12968468125111163, "grad_norm": 3.782445192337036, "learning_rate": 0.0003936308871851041, "loss": 1.2035, "step": 1914 }, { "epoch": 0.1297524370929356, "grad_norm": 3.567512273788452, "learning_rate": 0.0003936254107338445, "loss": 1.2224, "step": 1915 }, { "epoch": 0.1298201929347596, "grad_norm": 4.4081926345825195, "learning_rate": 0.0003936199342825849, "loss": 1.0101, "step": 1916 }, { "epoch": 0.12988794877658358, "grad_norm": 3.556549072265625, "learning_rate": 0.0003936144578313253, "loss": 0.8286, "step": 1917 }, { "epoch": 0.12995570461840758, "grad_norm": 4.220504283905029, "learning_rate": 0.00039360898138006574, "loss": 0.9659, "step": 1918 }, { "epoch": 0.13002346046023155, "grad_norm": 3.6562061309814453, "learning_rate": 0.00039360350492880614, "loss": 0.9577, "step": 1919 }, { "epoch": 0.13009121630205553, "grad_norm": 2.9608206748962402, "learning_rate": 0.0003935980284775466, "loss": 0.887, "step": 1920 }, { "epoch": 0.13015897214387953, "grad_norm": 4.085014820098877, "learning_rate": 0.000393592552026287, "loss": 1.2644, "step": 1921 }, { "epoch": 0.1302267279857035, "grad_norm": 5.186523914337158, "learning_rate": 0.0003935870755750274, "loss": 1.1567, "step": 1922 }, { "epoch": 0.1302944838275275, "grad_norm": 4.298685073852539, "learning_rate": 0.0003935815991237678, "loss": 0.964, "step": 1923 }, { "epoch": 0.13036223966935148, "grad_norm": 3.5432169437408447, "learning_rate": 0.00039357612267250825, "loss": 1.0692, "step": 1924 }, { "epoch": 0.13042999551117548, "grad_norm": 3.3199679851531982, "learning_rate": 0.00039357064622124865, "loss": 1.0625, "step": 1925 }, { "epoch": 0.13049775135299946, "grad_norm": 3.9811880588531494, "learning_rate": 0.00039356516976998905, "loss": 1.1002, "step": 1926 }, { "epoch": 0.13056550719482346, "grad_norm": 3.4884250164031982, "learning_rate": 0.00039355969331872944, "loss": 0.8508, "step": 1927 }, { "epoch": 0.13063326303664743, "grad_norm": 3.6489617824554443, "learning_rate": 0.0003935542168674699, "loss": 0.9108, "step": 1928 }, { "epoch": 0.13070101887847144, "grad_norm": 3.4023690223693848, "learning_rate": 0.00039354874041621035, "loss": 0.9027, "step": 1929 }, { "epoch": 0.1307687747202954, "grad_norm": 3.6431820392608643, "learning_rate": 0.00039354326396495075, "loss": 1.0201, "step": 1930 }, { "epoch": 0.1308365305621194, "grad_norm": 2.8955748081207275, "learning_rate": 0.00039353778751369115, "loss": 0.7453, "step": 1931 }, { "epoch": 0.1309042864039434, "grad_norm": 4.0951972007751465, "learning_rate": 0.00039353231106243155, "loss": 1.0779, "step": 1932 }, { "epoch": 0.1309720422457674, "grad_norm": 4.259312629699707, "learning_rate": 0.00039352683461117195, "loss": 1.1478, "step": 1933 }, { "epoch": 0.13103979808759136, "grad_norm": 3.5302422046661377, "learning_rate": 0.0003935213581599124, "loss": 0.9817, "step": 1934 }, { "epoch": 0.13110755392941534, "grad_norm": 4.699799060821533, "learning_rate": 0.00039351588170865286, "loss": 1.0692, "step": 1935 }, { "epoch": 0.13117530977123934, "grad_norm": 3.059727191925049, "learning_rate": 0.00039351040525739325, "loss": 0.9202, "step": 1936 }, { "epoch": 0.13124306561306331, "grad_norm": 3.9536166191101074, "learning_rate": 0.00039350492880613365, "loss": 0.9628, "step": 1937 }, { "epoch": 0.13131082145488732, "grad_norm": 4.202176570892334, "learning_rate": 0.00039349945235487405, "loss": 1.3277, "step": 1938 }, { "epoch": 0.1313785772967113, "grad_norm": 4.248271465301514, "learning_rate": 0.00039349397590361445, "loss": 1.3216, "step": 1939 }, { "epoch": 0.1314463331385353, "grad_norm": 3.7766263484954834, "learning_rate": 0.0003934884994523549, "loss": 0.9515, "step": 1940 }, { "epoch": 0.13151408898035927, "grad_norm": 2.7945661544799805, "learning_rate": 0.0003934830230010953, "loss": 0.9346, "step": 1941 }, { "epoch": 0.13158184482218327, "grad_norm": 3.5524611473083496, "learning_rate": 0.0003934775465498357, "loss": 0.9856, "step": 1942 }, { "epoch": 0.13164960066400724, "grad_norm": 3.028143882751465, "learning_rate": 0.00039347207009857616, "loss": 0.7529, "step": 1943 }, { "epoch": 0.13171735650583125, "grad_norm": 3.4239816665649414, "learning_rate": 0.00039346659364731656, "loss": 0.9032, "step": 1944 }, { "epoch": 0.13178511234765522, "grad_norm": 4.826498985290527, "learning_rate": 0.000393461117196057, "loss": 1.208, "step": 1945 }, { "epoch": 0.13185286818947922, "grad_norm": 3.227996349334717, "learning_rate": 0.0003934556407447974, "loss": 1.0093, "step": 1946 }, { "epoch": 0.1319206240313032, "grad_norm": 3.463676691055298, "learning_rate": 0.0003934501642935378, "loss": 0.8302, "step": 1947 }, { "epoch": 0.1319883798731272, "grad_norm": 3.568833112716675, "learning_rate": 0.0003934446878422782, "loss": 0.8723, "step": 1948 }, { "epoch": 0.13205613571495117, "grad_norm": 4.0637030601501465, "learning_rate": 0.0003934392113910186, "loss": 0.8502, "step": 1949 }, { "epoch": 0.13212389155677515, "grad_norm": 3.7621164321899414, "learning_rate": 0.00039343373493975906, "loss": 1.0091, "step": 1950 }, { "epoch": 0.13219164739859915, "grad_norm": 4.1454386711120605, "learning_rate": 0.0003934282584884995, "loss": 1.1275, "step": 1951 }, { "epoch": 0.13225940324042312, "grad_norm": 5.30384635925293, "learning_rate": 0.0003934227820372399, "loss": 0.99, "step": 1952 }, { "epoch": 0.13232715908224713, "grad_norm": 3.7639780044555664, "learning_rate": 0.0003934173055859803, "loss": 1.0394, "step": 1953 }, { "epoch": 0.1323949149240711, "grad_norm": 4.136169910430908, "learning_rate": 0.0003934118291347207, "loss": 0.9947, "step": 1954 }, { "epoch": 0.1324626707658951, "grad_norm": 4.134731769561768, "learning_rate": 0.0003934063526834611, "loss": 1.0699, "step": 1955 }, { "epoch": 0.13253042660771908, "grad_norm": 3.162278652191162, "learning_rate": 0.00039340087623220156, "loss": 0.9359, "step": 1956 }, { "epoch": 0.13259818244954308, "grad_norm": 3.5070345401763916, "learning_rate": 0.00039339539978094196, "loss": 0.9803, "step": 1957 }, { "epoch": 0.13266593829136705, "grad_norm": 3.192782163619995, "learning_rate": 0.00039338992332968236, "loss": 0.907, "step": 1958 }, { "epoch": 0.13273369413319105, "grad_norm": 4.041593551635742, "learning_rate": 0.0003933844468784228, "loss": 0.878, "step": 1959 }, { "epoch": 0.13280144997501503, "grad_norm": 5.201786041259766, "learning_rate": 0.0003933789704271632, "loss": 1.1319, "step": 1960 }, { "epoch": 0.13286920581683903, "grad_norm": 3.7757298946380615, "learning_rate": 0.0003933734939759036, "loss": 0.8727, "step": 1961 }, { "epoch": 0.132936961658663, "grad_norm": 4.938465118408203, "learning_rate": 0.00039336801752464407, "loss": 1.0432, "step": 1962 }, { "epoch": 0.133004717500487, "grad_norm": 3.8576560020446777, "learning_rate": 0.00039336254107338447, "loss": 1.1053, "step": 1963 }, { "epoch": 0.13307247334231098, "grad_norm": 4.339880466461182, "learning_rate": 0.00039335706462212487, "loss": 1.0894, "step": 1964 }, { "epoch": 0.13314022918413496, "grad_norm": 3.7322065830230713, "learning_rate": 0.00039335158817086527, "loss": 0.8703, "step": 1965 }, { "epoch": 0.13320798502595896, "grad_norm": 3.177706718444824, "learning_rate": 0.0003933461117196057, "loss": 1.0181, "step": 1966 }, { "epoch": 0.13327574086778293, "grad_norm": 4.539488315582275, "learning_rate": 0.00039334063526834617, "loss": 0.9901, "step": 1967 }, { "epoch": 0.13334349670960693, "grad_norm": 4.758763790130615, "learning_rate": 0.00039333515881708657, "loss": 1.1513, "step": 1968 }, { "epoch": 0.1334112525514309, "grad_norm": 3.745053768157959, "learning_rate": 0.00039332968236582697, "loss": 1.0957, "step": 1969 }, { "epoch": 0.1334790083932549, "grad_norm": 4.3162841796875, "learning_rate": 0.00039332420591456737, "loss": 1.3349, "step": 1970 }, { "epoch": 0.13354676423507889, "grad_norm": 3.856776237487793, "learning_rate": 0.00039331872946330777, "loss": 0.7896, "step": 1971 }, { "epoch": 0.1336145200769029, "grad_norm": 2.958937168121338, "learning_rate": 0.0003933132530120482, "loss": 0.9366, "step": 1972 }, { "epoch": 0.13368227591872686, "grad_norm": 3.46213698387146, "learning_rate": 0.0003933077765607886, "loss": 1.0736, "step": 1973 }, { "epoch": 0.13375003176055086, "grad_norm": 3.272308588027954, "learning_rate": 0.0003933023001095291, "loss": 1.0175, "step": 1974 }, { "epoch": 0.13381778760237484, "grad_norm": 4.073008060455322, "learning_rate": 0.0003932968236582695, "loss": 0.8983, "step": 1975 }, { "epoch": 0.13388554344419884, "grad_norm": 4.09628438949585, "learning_rate": 0.0003932913472070099, "loss": 1.0099, "step": 1976 }, { "epoch": 0.13395329928602281, "grad_norm": 6.971934795379639, "learning_rate": 0.00039328587075575027, "loss": 1.1008, "step": 1977 }, { "epoch": 0.13402105512784682, "grad_norm": 4.075629711151123, "learning_rate": 0.0003932803943044907, "loss": 1.1905, "step": 1978 }, { "epoch": 0.1340888109696708, "grad_norm": 3.393519639968872, "learning_rate": 0.0003932749178532311, "loss": 0.9598, "step": 1979 }, { "epoch": 0.13415656681149477, "grad_norm": 3.1066389083862305, "learning_rate": 0.0003932694414019715, "loss": 0.7126, "step": 1980 }, { "epoch": 0.13422432265331877, "grad_norm": 3.3074233531951904, "learning_rate": 0.0003932639649507119, "loss": 0.9072, "step": 1981 }, { "epoch": 0.13429207849514274, "grad_norm": 3.258561611175537, "learning_rate": 0.0003932584884994524, "loss": 1.16, "step": 1982 }, { "epoch": 0.13435983433696674, "grad_norm": 4.268911361694336, "learning_rate": 0.00039325301204819283, "loss": 1.0758, "step": 1983 }, { "epoch": 0.13442759017879072, "grad_norm": 3.4436447620391846, "learning_rate": 0.00039324753559693323, "loss": 0.8295, "step": 1984 }, { "epoch": 0.13449534602061472, "grad_norm": 5.179892063140869, "learning_rate": 0.00039324205914567363, "loss": 1.1655, "step": 1985 }, { "epoch": 0.1345631018624387, "grad_norm": 3.867161989212036, "learning_rate": 0.00039323658269441403, "loss": 0.7784, "step": 1986 }, { "epoch": 0.1346308577042627, "grad_norm": 3.456005096435547, "learning_rate": 0.00039323110624315443, "loss": 0.9455, "step": 1987 }, { "epoch": 0.13469861354608667, "grad_norm": 4.781862735748291, "learning_rate": 0.0003932256297918949, "loss": 1.0181, "step": 1988 }, { "epoch": 0.13476636938791067, "grad_norm": 4.400859355926514, "learning_rate": 0.0003932201533406353, "loss": 0.9509, "step": 1989 }, { "epoch": 0.13483412522973465, "grad_norm": 3.301814317703247, "learning_rate": 0.00039321467688937573, "loss": 0.8368, "step": 1990 }, { "epoch": 0.13490188107155865, "grad_norm": 4.028049945831299, "learning_rate": 0.00039320920043811613, "loss": 1.231, "step": 1991 }, { "epoch": 0.13496963691338262, "grad_norm": 3.308310031890869, "learning_rate": 0.00039320372398685653, "loss": 0.8777, "step": 1992 }, { "epoch": 0.13503739275520663, "grad_norm": 3.7963478565216064, "learning_rate": 0.00039319824753559693, "loss": 0.9725, "step": 1993 }, { "epoch": 0.1351051485970306, "grad_norm": 3.3007848262786865, "learning_rate": 0.0003931927710843374, "loss": 0.9158, "step": 1994 }, { "epoch": 0.13517290443885457, "grad_norm": 3.9070980548858643, "learning_rate": 0.0003931872946330778, "loss": 1.0185, "step": 1995 }, { "epoch": 0.13524066028067858, "grad_norm": 4.237251281738281, "learning_rate": 0.0003931818181818182, "loss": 1.2018, "step": 1996 }, { "epoch": 0.13530841612250255, "grad_norm": 3.5162501335144043, "learning_rate": 0.0003931763417305586, "loss": 1.0947, "step": 1997 }, { "epoch": 0.13537617196432655, "grad_norm": 3.6359193325042725, "learning_rate": 0.00039317086527929904, "loss": 0.8624, "step": 1998 }, { "epoch": 0.13544392780615053, "grad_norm": 4.43295431137085, "learning_rate": 0.00039316538882803943, "loss": 0.8911, "step": 1999 }, { "epoch": 0.13551168364797453, "grad_norm": 2.9032492637634277, "learning_rate": 0.0003931599123767799, "loss": 1.0228, "step": 2000 }, { "epoch": 0.1355794394897985, "grad_norm": 3.0034313201904297, "learning_rate": 0.0003931544359255203, "loss": 0.9481, "step": 2001 }, { "epoch": 0.1356471953316225, "grad_norm": 3.2944109439849854, "learning_rate": 0.0003931489594742607, "loss": 0.8185, "step": 2002 }, { "epoch": 0.13571495117344648, "grad_norm": 3.4253170490264893, "learning_rate": 0.0003931434830230011, "loss": 0.6822, "step": 2003 }, { "epoch": 0.13578270701527048, "grad_norm": 4.060725688934326, "learning_rate": 0.00039313800657174154, "loss": 0.908, "step": 2004 }, { "epoch": 0.13585046285709446, "grad_norm": 4.162361145019531, "learning_rate": 0.000393132530120482, "loss": 1.1549, "step": 2005 }, { "epoch": 0.13591821869891846, "grad_norm": 4.029611587524414, "learning_rate": 0.0003931270536692224, "loss": 1.1323, "step": 2006 }, { "epoch": 0.13598597454074243, "grad_norm": 3.3221356868743896, "learning_rate": 0.0003931215772179628, "loss": 0.813, "step": 2007 }, { "epoch": 0.13605373038256643, "grad_norm": 4.053617000579834, "learning_rate": 0.0003931161007667032, "loss": 0.9458, "step": 2008 }, { "epoch": 0.1361214862243904, "grad_norm": 3.43747615814209, "learning_rate": 0.0003931106243154436, "loss": 0.9893, "step": 2009 }, { "epoch": 0.13618924206621438, "grad_norm": 3.0420141220092773, "learning_rate": 0.00039310514786418404, "loss": 0.7516, "step": 2010 }, { "epoch": 0.13625699790803839, "grad_norm": 3.4257984161376953, "learning_rate": 0.00039309967141292444, "loss": 0.9864, "step": 2011 }, { "epoch": 0.13632475374986236, "grad_norm": 3.67362904548645, "learning_rate": 0.00039309419496166484, "loss": 0.9954, "step": 2012 }, { "epoch": 0.13639250959168636, "grad_norm": 4.418880462646484, "learning_rate": 0.0003930887185104053, "loss": 0.9241, "step": 2013 }, { "epoch": 0.13646026543351034, "grad_norm": 3.5813469886779785, "learning_rate": 0.0003930832420591457, "loss": 0.8535, "step": 2014 }, { "epoch": 0.13652802127533434, "grad_norm": 3.273195743560791, "learning_rate": 0.0003930777656078861, "loss": 0.9462, "step": 2015 }, { "epoch": 0.1365957771171583, "grad_norm": 3.664151430130005, "learning_rate": 0.00039307228915662655, "loss": 0.9933, "step": 2016 }, { "epoch": 0.13666353295898231, "grad_norm": 3.130005359649658, "learning_rate": 0.00039306681270536695, "loss": 0.962, "step": 2017 }, { "epoch": 0.1367312888008063, "grad_norm": 2.9658141136169434, "learning_rate": 0.00039306133625410735, "loss": 0.8924, "step": 2018 }, { "epoch": 0.1367990446426303, "grad_norm": 3.34104585647583, "learning_rate": 0.00039305585980284774, "loss": 1.1484, "step": 2019 }, { "epoch": 0.13686680048445427, "grad_norm": 3.6597139835357666, "learning_rate": 0.00039305038335158814, "loss": 0.9039, "step": 2020 }, { "epoch": 0.13693455632627827, "grad_norm": 3.5139095783233643, "learning_rate": 0.00039304490690032865, "loss": 1.0545, "step": 2021 }, { "epoch": 0.13700231216810224, "grad_norm": 2.8280012607574463, "learning_rate": 0.00039303943044906905, "loss": 0.8668, "step": 2022 }, { "epoch": 0.13707006800992624, "grad_norm": 3.2996609210968018, "learning_rate": 0.00039303395399780945, "loss": 0.9786, "step": 2023 }, { "epoch": 0.13713782385175022, "grad_norm": 4.314419269561768, "learning_rate": 0.00039302847754654985, "loss": 1.3774, "step": 2024 }, { "epoch": 0.1372055796935742, "grad_norm": 3.809823513031006, "learning_rate": 0.00039302300109529025, "loss": 1.0177, "step": 2025 }, { "epoch": 0.1372733355353982, "grad_norm": 3.2323153018951416, "learning_rate": 0.0003930175246440307, "loss": 0.9028, "step": 2026 }, { "epoch": 0.13734109137722217, "grad_norm": 4.208483695983887, "learning_rate": 0.0003930120481927711, "loss": 1.0285, "step": 2027 }, { "epoch": 0.13740884721904617, "grad_norm": 5.641801834106445, "learning_rate": 0.0003930065717415115, "loss": 1.2101, "step": 2028 }, { "epoch": 0.13747660306087015, "grad_norm": 3.521711587905884, "learning_rate": 0.00039300109529025195, "loss": 1.0655, "step": 2029 }, { "epoch": 0.13754435890269415, "grad_norm": 2.7891299724578857, "learning_rate": 0.00039299561883899235, "loss": 0.8202, "step": 2030 }, { "epoch": 0.13761211474451812, "grad_norm": 2.9741005897521973, "learning_rate": 0.00039299014238773275, "loss": 0.7934, "step": 2031 }, { "epoch": 0.13767987058634212, "grad_norm": 7.217936038970947, "learning_rate": 0.0003929846659364732, "loss": 1.1702, "step": 2032 }, { "epoch": 0.1377476264281661, "grad_norm": 3.7688241004943848, "learning_rate": 0.0003929791894852136, "loss": 0.8708, "step": 2033 }, { "epoch": 0.1378153822699901, "grad_norm": 3.4128355979919434, "learning_rate": 0.000392973713033954, "loss": 1.1512, "step": 2034 }, { "epoch": 0.13788313811181407, "grad_norm": 4.0219597816467285, "learning_rate": 0.0003929682365826944, "loss": 1.1092, "step": 2035 }, { "epoch": 0.13795089395363808, "grad_norm": 4.2756571769714355, "learning_rate": 0.00039296276013143486, "loss": 1.0763, "step": 2036 }, { "epoch": 0.13801864979546205, "grad_norm": 5.709436416625977, "learning_rate": 0.00039295728368017526, "loss": 1.1948, "step": 2037 }, { "epoch": 0.13808640563728605, "grad_norm": 2.7488200664520264, "learning_rate": 0.0003929518072289157, "loss": 0.9372, "step": 2038 }, { "epoch": 0.13815416147911003, "grad_norm": 3.348262310028076, "learning_rate": 0.0003929463307776561, "loss": 0.8962, "step": 2039 }, { "epoch": 0.138221917320934, "grad_norm": 5.292445659637451, "learning_rate": 0.0003929408543263965, "loss": 1.1315, "step": 2040 }, { "epoch": 0.138289673162758, "grad_norm": 4.651246547698975, "learning_rate": 0.0003929353778751369, "loss": 1.0607, "step": 2041 }, { "epoch": 0.13835742900458198, "grad_norm": 3.3094773292541504, "learning_rate": 0.00039292990142387736, "loss": 1.0272, "step": 2042 }, { "epoch": 0.13842518484640598, "grad_norm": 4.301296710968018, "learning_rate": 0.00039292442497261776, "loss": 1.1787, "step": 2043 }, { "epoch": 0.13849294068822995, "grad_norm": 5.791898250579834, "learning_rate": 0.0003929189485213582, "loss": 0.9111, "step": 2044 }, { "epoch": 0.13856069653005396, "grad_norm": 4.063332557678223, "learning_rate": 0.0003929134720700986, "loss": 1.0756, "step": 2045 }, { "epoch": 0.13862845237187793, "grad_norm": 4.12991189956665, "learning_rate": 0.000392907995618839, "loss": 1.3346, "step": 2046 }, { "epoch": 0.13869620821370193, "grad_norm": 2.9076972007751465, "learning_rate": 0.0003929025191675794, "loss": 0.8134, "step": 2047 }, { "epoch": 0.1387639640555259, "grad_norm": 3.608147144317627, "learning_rate": 0.00039289704271631986, "loss": 1.0595, "step": 2048 }, { "epoch": 0.1388317198973499, "grad_norm": 2.986985683441162, "learning_rate": 0.00039289156626506026, "loss": 0.8501, "step": 2049 }, { "epoch": 0.13889947573917388, "grad_norm": 3.6966941356658936, "learning_rate": 0.00039288608981380066, "loss": 0.8774, "step": 2050 }, { "epoch": 0.13896723158099789, "grad_norm": 4.209543704986572, "learning_rate": 0.00039288061336254106, "loss": 1.0716, "step": 2051 }, { "epoch": 0.13903498742282186, "grad_norm": 3.5952212810516357, "learning_rate": 0.0003928751369112815, "loss": 0.8348, "step": 2052 }, { "epoch": 0.13910274326464586, "grad_norm": 3.881042242050171, "learning_rate": 0.0003928696604600219, "loss": 1.0374, "step": 2053 }, { "epoch": 0.13917049910646984, "grad_norm": 3.3455593585968018, "learning_rate": 0.00039286418400876237, "loss": 0.8962, "step": 2054 }, { "epoch": 0.1392382549482938, "grad_norm": 3.5047824382781982, "learning_rate": 0.00039285870755750277, "loss": 0.9041, "step": 2055 }, { "epoch": 0.1393060107901178, "grad_norm": 5.325544357299805, "learning_rate": 0.00039285323110624317, "loss": 1.0228, "step": 2056 }, { "epoch": 0.1393737666319418, "grad_norm": 3.436539649963379, "learning_rate": 0.00039284775465498357, "loss": 0.9214, "step": 2057 }, { "epoch": 0.1394415224737658, "grad_norm": 3.9321558475494385, "learning_rate": 0.00039284227820372396, "loss": 0.9105, "step": 2058 }, { "epoch": 0.13950927831558976, "grad_norm": 5.037657737731934, "learning_rate": 0.0003928368017524644, "loss": 0.9893, "step": 2059 }, { "epoch": 0.13957703415741377, "grad_norm": 4.227497577667236, "learning_rate": 0.00039283132530120487, "loss": 1.161, "step": 2060 }, { "epoch": 0.13964478999923774, "grad_norm": 3.6891133785247803, "learning_rate": 0.00039282584884994527, "loss": 0.983, "step": 2061 }, { "epoch": 0.13971254584106174, "grad_norm": 3.7053208351135254, "learning_rate": 0.00039282037239868567, "loss": 1.0068, "step": 2062 }, { "epoch": 0.13978030168288572, "grad_norm": 4.371405124664307, "learning_rate": 0.00039281489594742607, "loss": 1.168, "step": 2063 }, { "epoch": 0.13984805752470972, "grad_norm": 5.198989391326904, "learning_rate": 0.0003928094194961665, "loss": 1.0547, "step": 2064 }, { "epoch": 0.1399158133665337, "grad_norm": 4.24947452545166, "learning_rate": 0.0003928039430449069, "loss": 0.8769, "step": 2065 }, { "epoch": 0.1399835692083577, "grad_norm": 3.553696393966675, "learning_rate": 0.0003927984665936473, "loss": 0.927, "step": 2066 }, { "epoch": 0.14005132505018167, "grad_norm": 7.917940616607666, "learning_rate": 0.0003927929901423878, "loss": 0.8645, "step": 2067 }, { "epoch": 0.14011908089200567, "grad_norm": 3.103926181793213, "learning_rate": 0.0003927875136911282, "loss": 0.8436, "step": 2068 }, { "epoch": 0.14018683673382965, "grad_norm": 5.303426742553711, "learning_rate": 0.00039278203723986857, "loss": 1.0966, "step": 2069 }, { "epoch": 0.14025459257565362, "grad_norm": 4.286533832550049, "learning_rate": 0.000392776560788609, "loss": 1.0876, "step": 2070 }, { "epoch": 0.14032234841747762, "grad_norm": 4.240042209625244, "learning_rate": 0.0003927710843373494, "loss": 1.073, "step": 2071 }, { "epoch": 0.1403901042593016, "grad_norm": 3.280837059020996, "learning_rate": 0.0003927656078860898, "loss": 0.9107, "step": 2072 }, { "epoch": 0.1404578601011256, "grad_norm": 4.542739391326904, "learning_rate": 0.0003927601314348302, "loss": 1.0865, "step": 2073 }, { "epoch": 0.14052561594294957, "grad_norm": 7.275092601776123, "learning_rate": 0.0003927546549835706, "loss": 1.2329, "step": 2074 }, { "epoch": 0.14059337178477357, "grad_norm": 4.99735164642334, "learning_rate": 0.0003927491785323111, "loss": 0.9936, "step": 2075 }, { "epoch": 0.14066112762659755, "grad_norm": 5.522153854370117, "learning_rate": 0.00039274370208105153, "loss": 1.3785, "step": 2076 }, { "epoch": 0.14072888346842155, "grad_norm": 5.422906875610352, "learning_rate": 0.00039273822562979193, "loss": 1.2085, "step": 2077 }, { "epoch": 0.14079663931024552, "grad_norm": 3.4788289070129395, "learning_rate": 0.00039273274917853233, "loss": 0.9071, "step": 2078 }, { "epoch": 0.14086439515206953, "grad_norm": 3.3869335651397705, "learning_rate": 0.00039272727272727273, "loss": 0.7983, "step": 2079 }, { "epoch": 0.1409321509938935, "grad_norm": 5.545078277587891, "learning_rate": 0.0003927217962760132, "loss": 1.2516, "step": 2080 }, { "epoch": 0.1409999068357175, "grad_norm": 3.380993604660034, "learning_rate": 0.0003927163198247536, "loss": 0.9104, "step": 2081 }, { "epoch": 0.14106766267754148, "grad_norm": 2.994847536087036, "learning_rate": 0.000392710843373494, "loss": 0.8234, "step": 2082 }, { "epoch": 0.14113541851936548, "grad_norm": 4.117560386657715, "learning_rate": 0.00039270536692223443, "loss": 0.8144, "step": 2083 }, { "epoch": 0.14120317436118945, "grad_norm": 3.5420539379119873, "learning_rate": 0.00039269989047097483, "loss": 0.8388, "step": 2084 }, { "epoch": 0.14127093020301343, "grad_norm": 3.6176364421844482, "learning_rate": 0.00039269441401971523, "loss": 0.9861, "step": 2085 }, { "epoch": 0.14133868604483743, "grad_norm": 3.8295180797576904, "learning_rate": 0.0003926889375684557, "loss": 0.8231, "step": 2086 }, { "epoch": 0.1414064418866614, "grad_norm": 5.298388481140137, "learning_rate": 0.0003926834611171961, "loss": 1.2394, "step": 2087 }, { "epoch": 0.1414741977284854, "grad_norm": 4.269284725189209, "learning_rate": 0.0003926779846659365, "loss": 1.1129, "step": 2088 }, { "epoch": 0.14154195357030938, "grad_norm": 3.3076815605163574, "learning_rate": 0.0003926725082146769, "loss": 0.8602, "step": 2089 }, { "epoch": 0.14160970941213338, "grad_norm": 4.088136196136475, "learning_rate": 0.0003926670317634173, "loss": 1.0013, "step": 2090 }, { "epoch": 0.14167746525395736, "grad_norm": 4.127427577972412, "learning_rate": 0.00039266155531215773, "loss": 0.8866, "step": 2091 }, { "epoch": 0.14174522109578136, "grad_norm": 7.000527381896973, "learning_rate": 0.0003926560788608982, "loss": 0.7672, "step": 2092 }, { "epoch": 0.14181297693760533, "grad_norm": 3.2157084941864014, "learning_rate": 0.0003926506024096386, "loss": 0.8506, "step": 2093 }, { "epoch": 0.14188073277942934, "grad_norm": 3.456639051437378, "learning_rate": 0.000392645125958379, "loss": 1.1805, "step": 2094 }, { "epoch": 0.1419484886212533, "grad_norm": 2.636897087097168, "learning_rate": 0.0003926396495071194, "loss": 0.8704, "step": 2095 }, { "epoch": 0.1420162444630773, "grad_norm": 3.683851718902588, "learning_rate": 0.0003926341730558598, "loss": 0.8362, "step": 2096 }, { "epoch": 0.1420840003049013, "grad_norm": 3.9194719791412354, "learning_rate": 0.00039262869660460024, "loss": 1.0408, "step": 2097 }, { "epoch": 0.1421517561467253, "grad_norm": 5.0648884773254395, "learning_rate": 0.0003926232201533407, "loss": 1.3083, "step": 2098 }, { "epoch": 0.14221951198854926, "grad_norm": 3.2401018142700195, "learning_rate": 0.0003926177437020811, "loss": 0.8714, "step": 2099 }, { "epoch": 0.14228726783037324, "grad_norm": 2.7266671657562256, "learning_rate": 0.0003926122672508215, "loss": 0.5694, "step": 2100 }, { "epoch": 0.14235502367219724, "grad_norm": 4.627926349639893, "learning_rate": 0.0003926067907995619, "loss": 1.2818, "step": 2101 }, { "epoch": 0.1424227795140212, "grad_norm": 3.868546962738037, "learning_rate": 0.00039260131434830234, "loss": 0.8009, "step": 2102 }, { "epoch": 0.14249053535584522, "grad_norm": 4.255434989929199, "learning_rate": 0.00039259583789704274, "loss": 1.1021, "step": 2103 }, { "epoch": 0.1425582911976692, "grad_norm": 3.9906864166259766, "learning_rate": 0.00039259036144578314, "loss": 1.0281, "step": 2104 }, { "epoch": 0.1426260470394932, "grad_norm": 3.5460011959075928, "learning_rate": 0.00039258488499452354, "loss": 0.9633, "step": 2105 }, { "epoch": 0.14269380288131717, "grad_norm": 3.7452077865600586, "learning_rate": 0.000392579408543264, "loss": 0.936, "step": 2106 }, { "epoch": 0.14276155872314117, "grad_norm": 3.526322364807129, "learning_rate": 0.0003925739320920044, "loss": 1.2107, "step": 2107 }, { "epoch": 0.14282931456496514, "grad_norm": 3.7674813270568848, "learning_rate": 0.00039256845564074485, "loss": 0.9477, "step": 2108 }, { "epoch": 0.14289707040678915, "grad_norm": 3.5109872817993164, "learning_rate": 0.00039256297918948525, "loss": 0.9526, "step": 2109 }, { "epoch": 0.14296482624861312, "grad_norm": 3.6948585510253906, "learning_rate": 0.00039255750273822565, "loss": 0.8935, "step": 2110 }, { "epoch": 0.14303258209043712, "grad_norm": 3.927109479904175, "learning_rate": 0.00039255202628696604, "loss": 1.3078, "step": 2111 }, { "epoch": 0.1431003379322611, "grad_norm": 3.035163402557373, "learning_rate": 0.00039254654983570644, "loss": 0.913, "step": 2112 }, { "epoch": 0.1431680937740851, "grad_norm": 2.9310853481292725, "learning_rate": 0.0003925410733844469, "loss": 0.8595, "step": 2113 }, { "epoch": 0.14323584961590907, "grad_norm": 3.4072773456573486, "learning_rate": 0.00039253559693318735, "loss": 1.0167, "step": 2114 }, { "epoch": 0.14330360545773305, "grad_norm": 3.48146390914917, "learning_rate": 0.00039253012048192775, "loss": 1.0699, "step": 2115 }, { "epoch": 0.14337136129955705, "grad_norm": 3.5380361080169678, "learning_rate": 0.00039252464403066815, "loss": 1.0225, "step": 2116 }, { "epoch": 0.14343911714138102, "grad_norm": 3.9286787509918213, "learning_rate": 0.00039251916757940855, "loss": 1.2311, "step": 2117 }, { "epoch": 0.14350687298320502, "grad_norm": 3.2067854404449463, "learning_rate": 0.000392513691128149, "loss": 0.9134, "step": 2118 }, { "epoch": 0.143574628825029, "grad_norm": 4.170510292053223, "learning_rate": 0.0003925082146768894, "loss": 1.2088, "step": 2119 }, { "epoch": 0.143642384666853, "grad_norm": 2.894944667816162, "learning_rate": 0.0003925027382256298, "loss": 0.8306, "step": 2120 }, { "epoch": 0.14371014050867698, "grad_norm": 3.0361433029174805, "learning_rate": 0.0003924972617743702, "loss": 1.1654, "step": 2121 }, { "epoch": 0.14377789635050098, "grad_norm": 5.335592746734619, "learning_rate": 0.00039249178532311065, "loss": 1.1184, "step": 2122 }, { "epoch": 0.14384565219232495, "grad_norm": 3.661141872406006, "learning_rate": 0.00039248630887185105, "loss": 0.913, "step": 2123 }, { "epoch": 0.14391340803414895, "grad_norm": 3.199061393737793, "learning_rate": 0.0003924808324205915, "loss": 0.8722, "step": 2124 }, { "epoch": 0.14398116387597293, "grad_norm": 3.088141679763794, "learning_rate": 0.0003924753559693319, "loss": 0.8965, "step": 2125 }, { "epoch": 0.14404891971779693, "grad_norm": 4.142609596252441, "learning_rate": 0.0003924698795180723, "loss": 1.3532, "step": 2126 }, { "epoch": 0.1441166755596209, "grad_norm": 3.54492449760437, "learning_rate": 0.0003924644030668127, "loss": 1.1457, "step": 2127 }, { "epoch": 0.1441844314014449, "grad_norm": 3.0541512966156006, "learning_rate": 0.0003924589266155531, "loss": 0.8593, "step": 2128 }, { "epoch": 0.14425218724326888, "grad_norm": 2.877804756164551, "learning_rate": 0.00039245345016429356, "loss": 0.7873, "step": 2129 }, { "epoch": 0.14431994308509286, "grad_norm": 3.437086820602417, "learning_rate": 0.000392447973713034, "loss": 0.9796, "step": 2130 }, { "epoch": 0.14438769892691686, "grad_norm": 4.922691822052002, "learning_rate": 0.0003924424972617744, "loss": 1.1888, "step": 2131 }, { "epoch": 0.14445545476874083, "grad_norm": 7.429305553436279, "learning_rate": 0.0003924370208105148, "loss": 0.9245, "step": 2132 }, { "epoch": 0.14452321061056483, "grad_norm": 3.9140686988830566, "learning_rate": 0.0003924315443592552, "loss": 1.1263, "step": 2133 }, { "epoch": 0.1445909664523888, "grad_norm": 3.129779577255249, "learning_rate": 0.0003924260679079956, "loss": 0.8501, "step": 2134 }, { "epoch": 0.1446587222942128, "grad_norm": 3.4149117469787598, "learning_rate": 0.00039242059145673606, "loss": 0.8251, "step": 2135 }, { "epoch": 0.14472647813603678, "grad_norm": 4.53984260559082, "learning_rate": 0.00039241511500547646, "loss": 1.0949, "step": 2136 }, { "epoch": 0.1447942339778608, "grad_norm": 2.7803573608398438, "learning_rate": 0.0003924096385542169, "loss": 0.8716, "step": 2137 }, { "epoch": 0.14486198981968476, "grad_norm": 3.4807686805725098, "learning_rate": 0.0003924041621029573, "loss": 1.1694, "step": 2138 }, { "epoch": 0.14492974566150876, "grad_norm": 4.289523601531982, "learning_rate": 0.0003923986856516977, "loss": 0.8649, "step": 2139 }, { "epoch": 0.14499750150333274, "grad_norm": 3.568579912185669, "learning_rate": 0.00039239320920043816, "loss": 1.0478, "step": 2140 }, { "epoch": 0.14506525734515674, "grad_norm": 3.8929920196533203, "learning_rate": 0.00039238773274917856, "loss": 0.9281, "step": 2141 }, { "epoch": 0.1451330131869807, "grad_norm": 2.9407284259796143, "learning_rate": 0.00039238225629791896, "loss": 0.9095, "step": 2142 }, { "epoch": 0.14520076902880472, "grad_norm": 3.0965287685394287, "learning_rate": 0.00039237677984665936, "loss": 0.9094, "step": 2143 }, { "epoch": 0.1452685248706287, "grad_norm": 3.28955340385437, "learning_rate": 0.00039237130339539976, "loss": 0.8101, "step": 2144 }, { "epoch": 0.14533628071245266, "grad_norm": 3.4915881156921387, "learning_rate": 0.0003923658269441402, "loss": 0.7588, "step": 2145 }, { "epoch": 0.14540403655427667, "grad_norm": 3.6683669090270996, "learning_rate": 0.00039236035049288067, "loss": 1.001, "step": 2146 }, { "epoch": 0.14547179239610064, "grad_norm": 3.052374839782715, "learning_rate": 0.00039235487404162107, "loss": 0.838, "step": 2147 }, { "epoch": 0.14553954823792464, "grad_norm": 4.590811729431152, "learning_rate": 0.00039234939759036147, "loss": 0.9947, "step": 2148 }, { "epoch": 0.14560730407974862, "grad_norm": 4.013795375823975, "learning_rate": 0.00039234392113910186, "loss": 1.0772, "step": 2149 }, { "epoch": 0.14567505992157262, "grad_norm": 3.562592029571533, "learning_rate": 0.00039233844468784226, "loss": 1.023, "step": 2150 }, { "epoch": 0.1457428157633966, "grad_norm": 11.617969512939453, "learning_rate": 0.0003923329682365827, "loss": 1.1349, "step": 2151 }, { "epoch": 0.1458105716052206, "grad_norm": 3.8551318645477295, "learning_rate": 0.0003923274917853231, "loss": 0.8366, "step": 2152 }, { "epoch": 0.14587832744704457, "grad_norm": 4.113894939422607, "learning_rate": 0.00039232201533406357, "loss": 1.1338, "step": 2153 }, { "epoch": 0.14594608328886857, "grad_norm": 6.330554485321045, "learning_rate": 0.00039231653888280397, "loss": 0.7766, "step": 2154 }, { "epoch": 0.14601383913069255, "grad_norm": 3.9672188758850098, "learning_rate": 0.00039231106243154437, "loss": 0.907, "step": 2155 }, { "epoch": 0.14608159497251655, "grad_norm": 4.124312877655029, "learning_rate": 0.0003923055859802848, "loss": 1.0132, "step": 2156 }, { "epoch": 0.14614935081434052, "grad_norm": 2.816448450088501, "learning_rate": 0.0003923001095290252, "loss": 0.8018, "step": 2157 }, { "epoch": 0.14621710665616452, "grad_norm": 4.131261348724365, "learning_rate": 0.0003922946330777656, "loss": 1.0888, "step": 2158 }, { "epoch": 0.1462848624979885, "grad_norm": 4.663915157318115, "learning_rate": 0.000392289156626506, "loss": 1.2924, "step": 2159 }, { "epoch": 0.14635261833981247, "grad_norm": 6.631914138793945, "learning_rate": 0.0003922836801752464, "loss": 0.9737, "step": 2160 }, { "epoch": 0.14642037418163648, "grad_norm": 3.5624353885650635, "learning_rate": 0.00039227820372398687, "loss": 1.0281, "step": 2161 }, { "epoch": 0.14648813002346045, "grad_norm": 5.444281578063965, "learning_rate": 0.0003922727272727273, "loss": 0.8537, "step": 2162 }, { "epoch": 0.14655588586528445, "grad_norm": 3.349419116973877, "learning_rate": 0.0003922672508214677, "loss": 0.8149, "step": 2163 }, { "epoch": 0.14662364170710843, "grad_norm": 4.304533004760742, "learning_rate": 0.0003922617743702081, "loss": 0.9076, "step": 2164 }, { "epoch": 0.14669139754893243, "grad_norm": 2.891998529434204, "learning_rate": 0.0003922562979189485, "loss": 0.7654, "step": 2165 }, { "epoch": 0.1467591533907564, "grad_norm": 4.707879543304443, "learning_rate": 0.0003922508214676889, "loss": 1.1893, "step": 2166 }, { "epoch": 0.1468269092325804, "grad_norm": 4.528392791748047, "learning_rate": 0.0003922453450164294, "loss": 1.2779, "step": 2167 }, { "epoch": 0.14689466507440438, "grad_norm": 3.851830005645752, "learning_rate": 0.00039223986856516983, "loss": 1.4486, "step": 2168 }, { "epoch": 0.14696242091622838, "grad_norm": 3.184576988220215, "learning_rate": 0.00039223439211391023, "loss": 1.0507, "step": 2169 }, { "epoch": 0.14703017675805236, "grad_norm": 3.7840793132781982, "learning_rate": 0.00039222891566265063, "loss": 1.0007, "step": 2170 }, { "epoch": 0.14709793259987636, "grad_norm": 3.729084014892578, "learning_rate": 0.00039222343921139103, "loss": 0.9816, "step": 2171 }, { "epoch": 0.14716568844170033, "grad_norm": 4.772675037384033, "learning_rate": 0.0003922179627601314, "loss": 0.9053, "step": 2172 }, { "epoch": 0.1472334442835243, "grad_norm": 4.377356052398682, "learning_rate": 0.0003922124863088719, "loss": 1.3553, "step": 2173 }, { "epoch": 0.1473012001253483, "grad_norm": 3.304994821548462, "learning_rate": 0.0003922070098576123, "loss": 0.8358, "step": 2174 }, { "epoch": 0.14736895596717228, "grad_norm": 2.856403112411499, "learning_rate": 0.0003922015334063527, "loss": 0.8754, "step": 2175 }, { "epoch": 0.14743671180899628, "grad_norm": 3.035207748413086, "learning_rate": 0.00039219605695509313, "loss": 0.704, "step": 2176 }, { "epoch": 0.14750446765082026, "grad_norm": 3.813202381134033, "learning_rate": 0.00039219058050383353, "loss": 0.836, "step": 2177 }, { "epoch": 0.14757222349264426, "grad_norm": 11.135214805603027, "learning_rate": 0.000392185104052574, "loss": 0.9285, "step": 2178 }, { "epoch": 0.14763997933446824, "grad_norm": 4.281454563140869, "learning_rate": 0.0003921796276013144, "loss": 1.0635, "step": 2179 }, { "epoch": 0.14770773517629224, "grad_norm": 5.054574489593506, "learning_rate": 0.0003921741511500548, "loss": 1.0832, "step": 2180 }, { "epoch": 0.1477754910181162, "grad_norm": 2.9585464000701904, "learning_rate": 0.0003921686746987952, "loss": 0.8989, "step": 2181 }, { "epoch": 0.1478432468599402, "grad_norm": 4.758214950561523, "learning_rate": 0.0003921631982475356, "loss": 1.2327, "step": 2182 }, { "epoch": 0.1479110027017642, "grad_norm": 3.2253663539886475, "learning_rate": 0.00039215772179627603, "loss": 0.8912, "step": 2183 }, { "epoch": 0.1479787585435882, "grad_norm": 3.5604875087738037, "learning_rate": 0.0003921522453450165, "loss": 1.2369, "step": 2184 }, { "epoch": 0.14804651438541216, "grad_norm": 3.8764712810516357, "learning_rate": 0.0003921467688937569, "loss": 0.9069, "step": 2185 }, { "epoch": 0.14811427022723617, "grad_norm": 3.3568320274353027, "learning_rate": 0.0003921412924424973, "loss": 0.9533, "step": 2186 }, { "epoch": 0.14818202606906014, "grad_norm": 3.796172618865967, "learning_rate": 0.0003921358159912377, "loss": 1.1698, "step": 2187 }, { "epoch": 0.14824978191088412, "grad_norm": 2.7328951358795166, "learning_rate": 0.0003921303395399781, "loss": 0.9762, "step": 2188 }, { "epoch": 0.14831753775270812, "grad_norm": 3.548103094100952, "learning_rate": 0.00039212486308871854, "loss": 0.97, "step": 2189 }, { "epoch": 0.1483852935945321, "grad_norm": 3.534048080444336, "learning_rate": 0.00039211938663745894, "loss": 0.7716, "step": 2190 }, { "epoch": 0.1484530494363561, "grad_norm": 3.7900633811950684, "learning_rate": 0.00039211391018619934, "loss": 0.8052, "step": 2191 }, { "epoch": 0.14852080527818007, "grad_norm": 2.778799533843994, "learning_rate": 0.0003921084337349398, "loss": 0.846, "step": 2192 }, { "epoch": 0.14858856112000407, "grad_norm": 4.366217613220215, "learning_rate": 0.0003921029572836802, "loss": 1.2339, "step": 2193 }, { "epoch": 0.14865631696182804, "grad_norm": 3.8015713691711426, "learning_rate": 0.00039209748083242064, "loss": 1.2771, "step": 2194 }, { "epoch": 0.14872407280365205, "grad_norm": 3.049508571624756, "learning_rate": 0.00039209200438116104, "loss": 0.7798, "step": 2195 }, { "epoch": 0.14879182864547602, "grad_norm": 6.033420085906982, "learning_rate": 0.00039208652792990144, "loss": 1.2439, "step": 2196 }, { "epoch": 0.14885958448730002, "grad_norm": 3.342482089996338, "learning_rate": 0.00039208105147864184, "loss": 0.8499, "step": 2197 }, { "epoch": 0.148927340329124, "grad_norm": 2.993528127670288, "learning_rate": 0.00039207557502738224, "loss": 0.702, "step": 2198 }, { "epoch": 0.148995096170948, "grad_norm": 3.35360050201416, "learning_rate": 0.0003920700985761227, "loss": 0.9834, "step": 2199 }, { "epoch": 0.14906285201277197, "grad_norm": 3.9648444652557373, "learning_rate": 0.00039206462212486315, "loss": 0.9954, "step": 2200 }, { "epoch": 0.14913060785459598, "grad_norm": 5.362756252288818, "learning_rate": 0.00039205914567360355, "loss": 1.3304, "step": 2201 }, { "epoch": 0.14919836369641995, "grad_norm": 3.2520275115966797, "learning_rate": 0.00039205366922234394, "loss": 0.8701, "step": 2202 }, { "epoch": 0.14926611953824392, "grad_norm": 4.183501243591309, "learning_rate": 0.00039204819277108434, "loss": 1.0009, "step": 2203 }, { "epoch": 0.14933387538006793, "grad_norm": 3.7564728260040283, "learning_rate": 0.00039204271631982474, "loss": 0.819, "step": 2204 }, { "epoch": 0.1494016312218919, "grad_norm": 2.428234100341797, "learning_rate": 0.0003920372398685652, "loss": 0.6578, "step": 2205 }, { "epoch": 0.1494693870637159, "grad_norm": 3.571819305419922, "learning_rate": 0.0003920317634173056, "loss": 0.9744, "step": 2206 }, { "epoch": 0.14953714290553988, "grad_norm": 3.7363381385803223, "learning_rate": 0.00039202628696604605, "loss": 1.2083, "step": 2207 }, { "epoch": 0.14960489874736388, "grad_norm": 3.2642152309417725, "learning_rate": 0.00039202081051478645, "loss": 0.9189, "step": 2208 }, { "epoch": 0.14967265458918785, "grad_norm": 3.3988795280456543, "learning_rate": 0.00039201533406352685, "loss": 0.8617, "step": 2209 }, { "epoch": 0.14974041043101186, "grad_norm": 3.4839141368865967, "learning_rate": 0.00039200985761226725, "loss": 1.0504, "step": 2210 }, { "epoch": 0.14980816627283583, "grad_norm": 4.23141622543335, "learning_rate": 0.0003920043811610077, "loss": 1.2958, "step": 2211 }, { "epoch": 0.14987592211465983, "grad_norm": 3.6334457397460938, "learning_rate": 0.0003919989047097481, "loss": 1.0198, "step": 2212 }, { "epoch": 0.1499436779564838, "grad_norm": 3.187537908554077, "learning_rate": 0.0003919934282584885, "loss": 0.8468, "step": 2213 }, { "epoch": 0.1500114337983078, "grad_norm": 3.3582940101623535, "learning_rate": 0.0003919879518072289, "loss": 0.9453, "step": 2214 }, { "epoch": 0.15007918964013178, "grad_norm": 4.020011901855469, "learning_rate": 0.00039198247535596935, "loss": 0.9365, "step": 2215 }, { "epoch": 0.15014694548195578, "grad_norm": 4.460612773895264, "learning_rate": 0.0003919769989047098, "loss": 0.9106, "step": 2216 }, { "epoch": 0.15021470132377976, "grad_norm": 3.0218677520751953, "learning_rate": 0.0003919715224534502, "loss": 1.0, "step": 2217 }, { "epoch": 0.15028245716560373, "grad_norm": 3.0925698280334473, "learning_rate": 0.0003919660460021906, "loss": 1.0344, "step": 2218 }, { "epoch": 0.15035021300742774, "grad_norm": 7.2655487060546875, "learning_rate": 0.000391960569550931, "loss": 1.0533, "step": 2219 }, { "epoch": 0.1504179688492517, "grad_norm": 3.533228874206543, "learning_rate": 0.0003919550930996714, "loss": 1.1094, "step": 2220 }, { "epoch": 0.1504857246910757, "grad_norm": 3.9258177280426025, "learning_rate": 0.00039194961664841186, "loss": 1.0608, "step": 2221 }, { "epoch": 0.1505534805328997, "grad_norm": 2.8459794521331787, "learning_rate": 0.00039194414019715225, "loss": 0.8872, "step": 2222 }, { "epoch": 0.1506212363747237, "grad_norm": 3.4895272254943848, "learning_rate": 0.0003919386637458927, "loss": 0.9805, "step": 2223 }, { "epoch": 0.15068899221654766, "grad_norm": 3.941066026687622, "learning_rate": 0.0003919331872946331, "loss": 1.2712, "step": 2224 }, { "epoch": 0.15075674805837166, "grad_norm": 3.0331318378448486, "learning_rate": 0.0003919277108433735, "loss": 0.9326, "step": 2225 }, { "epoch": 0.15082450390019564, "grad_norm": 3.08465576171875, "learning_rate": 0.0003919222343921139, "loss": 0.9931, "step": 2226 }, { "epoch": 0.15089225974201964, "grad_norm": 3.2743256092071533, "learning_rate": 0.00039191675794085436, "loss": 0.8881, "step": 2227 }, { "epoch": 0.15096001558384362, "grad_norm": 3.4027581214904785, "learning_rate": 0.00039191128148959476, "loss": 1.0742, "step": 2228 }, { "epoch": 0.15102777142566762, "grad_norm": 3.3248753547668457, "learning_rate": 0.00039190580503833516, "loss": 0.8918, "step": 2229 }, { "epoch": 0.1510955272674916, "grad_norm": 2.8444812297821045, "learning_rate": 0.0003919003285870756, "loss": 0.8646, "step": 2230 }, { "epoch": 0.1511632831093156, "grad_norm": 2.828279733657837, "learning_rate": 0.000391894852135816, "loss": 1.0387, "step": 2231 }, { "epoch": 0.15123103895113957, "grad_norm": 3.376667022705078, "learning_rate": 0.00039188937568455646, "loss": 1.1138, "step": 2232 }, { "epoch": 0.15129879479296354, "grad_norm": 3.6115779876708984, "learning_rate": 0.00039188389923329686, "loss": 1.0158, "step": 2233 }, { "epoch": 0.15136655063478754, "grad_norm": 3.4668235778808594, "learning_rate": 0.00039187842278203726, "loss": 1.0549, "step": 2234 }, { "epoch": 0.15143430647661152, "grad_norm": 6.9779767990112305, "learning_rate": 0.00039187294633077766, "loss": 1.0743, "step": 2235 }, { "epoch": 0.15150206231843552, "grad_norm": 2.8324077129364014, "learning_rate": 0.00039186746987951806, "loss": 0.6378, "step": 2236 }, { "epoch": 0.1515698181602595, "grad_norm": 3.062481164932251, "learning_rate": 0.0003918619934282585, "loss": 0.9118, "step": 2237 }, { "epoch": 0.1516375740020835, "grad_norm": 2.9558863639831543, "learning_rate": 0.00039185651697699897, "loss": 0.8893, "step": 2238 }, { "epoch": 0.15170532984390747, "grad_norm": 5.563960075378418, "learning_rate": 0.00039185104052573937, "loss": 0.8774, "step": 2239 }, { "epoch": 0.15177308568573147, "grad_norm": 3.3860373497009277, "learning_rate": 0.00039184556407447977, "loss": 0.8095, "step": 2240 }, { "epoch": 0.15184084152755545, "grad_norm": 3.8272602558135986, "learning_rate": 0.00039184008762322016, "loss": 0.9593, "step": 2241 }, { "epoch": 0.15190859736937945, "grad_norm": 4.192650318145752, "learning_rate": 0.00039183461117196056, "loss": 0.9229, "step": 2242 }, { "epoch": 0.15197635321120342, "grad_norm": 4.173397541046143, "learning_rate": 0.000391829134720701, "loss": 1.1817, "step": 2243 }, { "epoch": 0.15204410905302743, "grad_norm": 3.5099539756774902, "learning_rate": 0.0003918236582694414, "loss": 1.0553, "step": 2244 }, { "epoch": 0.1521118648948514, "grad_norm": 3.4042956829071045, "learning_rate": 0.0003918181818181818, "loss": 0.8956, "step": 2245 }, { "epoch": 0.1521796207366754, "grad_norm": 4.20835542678833, "learning_rate": 0.00039181270536692227, "loss": 0.9878, "step": 2246 }, { "epoch": 0.15224737657849938, "grad_norm": 3.3840863704681396, "learning_rate": 0.00039180722891566267, "loss": 0.9552, "step": 2247 }, { "epoch": 0.15231513242032335, "grad_norm": 2.9864425659179688, "learning_rate": 0.00039180175246440307, "loss": 0.9833, "step": 2248 }, { "epoch": 0.15238288826214735, "grad_norm": 3.2888622283935547, "learning_rate": 0.0003917962760131435, "loss": 0.9713, "step": 2249 }, { "epoch": 0.15245064410397133, "grad_norm": 3.2475526332855225, "learning_rate": 0.0003917907995618839, "loss": 1.1102, "step": 2250 }, { "epoch": 0.15251839994579533, "grad_norm": 3.051023244857788, "learning_rate": 0.0003917853231106243, "loss": 0.9727, "step": 2251 }, { "epoch": 0.1525861557876193, "grad_norm": 3.0472633838653564, "learning_rate": 0.0003917798466593647, "loss": 0.895, "step": 2252 }, { "epoch": 0.1526539116294433, "grad_norm": 3.274644613265991, "learning_rate": 0.00039177437020810517, "loss": 0.8118, "step": 2253 }, { "epoch": 0.15272166747126728, "grad_norm": 4.101376533508301, "learning_rate": 0.0003917688937568456, "loss": 1.0857, "step": 2254 }, { "epoch": 0.15278942331309128, "grad_norm": 2.2067580223083496, "learning_rate": 0.000391763417305586, "loss": 0.6782, "step": 2255 }, { "epoch": 0.15285717915491526, "grad_norm": 3.0936734676361084, "learning_rate": 0.0003917579408543264, "loss": 0.7118, "step": 2256 }, { "epoch": 0.15292493499673926, "grad_norm": 4.483651161193848, "learning_rate": 0.0003917524644030668, "loss": 0.9089, "step": 2257 }, { "epoch": 0.15299269083856323, "grad_norm": 3.09566330909729, "learning_rate": 0.0003917469879518072, "loss": 0.9912, "step": 2258 }, { "epoch": 0.15306044668038724, "grad_norm": 2.877511978149414, "learning_rate": 0.0003917415115005477, "loss": 0.8348, "step": 2259 }, { "epoch": 0.1531282025222112, "grad_norm": 3.1886019706726074, "learning_rate": 0.0003917360350492881, "loss": 0.926, "step": 2260 }, { "epoch": 0.1531959583640352, "grad_norm": 3.413902997970581, "learning_rate": 0.0003917305585980285, "loss": 1.1722, "step": 2261 }, { "epoch": 0.1532637142058592, "grad_norm": 3.527569055557251, "learning_rate": 0.00039172508214676893, "loss": 0.9802, "step": 2262 }, { "epoch": 0.15333147004768316, "grad_norm": 4.155728816986084, "learning_rate": 0.0003917196056955093, "loss": 0.9751, "step": 2263 }, { "epoch": 0.15339922588950716, "grad_norm": 4.1871161460876465, "learning_rate": 0.0003917141292442497, "loss": 1.0094, "step": 2264 }, { "epoch": 0.15346698173133114, "grad_norm": 3.4102444648742676, "learning_rate": 0.0003917086527929902, "loss": 0.8676, "step": 2265 }, { "epoch": 0.15353473757315514, "grad_norm": 3.842257022857666, "learning_rate": 0.0003917031763417306, "loss": 1.2218, "step": 2266 }, { "epoch": 0.1536024934149791, "grad_norm": 4.487432956695557, "learning_rate": 0.000391697699890471, "loss": 1.1234, "step": 2267 }, { "epoch": 0.15367024925680312, "grad_norm": 4.266439437866211, "learning_rate": 0.0003916922234392114, "loss": 1.0491, "step": 2268 }, { "epoch": 0.1537380050986271, "grad_norm": 4.349552631378174, "learning_rate": 0.00039168674698795183, "loss": 0.8451, "step": 2269 }, { "epoch": 0.1538057609404511, "grad_norm": 3.5002074241638184, "learning_rate": 0.0003916812705366923, "loss": 1.1064, "step": 2270 }, { "epoch": 0.15387351678227507, "grad_norm": 3.354480028152466, "learning_rate": 0.0003916757940854327, "loss": 1.0278, "step": 2271 }, { "epoch": 0.15394127262409907, "grad_norm": 3.434291362762451, "learning_rate": 0.0003916703176341731, "loss": 0.8235, "step": 2272 }, { "epoch": 0.15400902846592304, "grad_norm": 3.9096696376800537, "learning_rate": 0.0003916648411829135, "loss": 1.1045, "step": 2273 }, { "epoch": 0.15407678430774704, "grad_norm": 3.05324125289917, "learning_rate": 0.0003916593647316539, "loss": 0.8646, "step": 2274 }, { "epoch": 0.15414454014957102, "grad_norm": 3.145240068435669, "learning_rate": 0.00039165388828039433, "loss": 0.9912, "step": 2275 }, { "epoch": 0.15421229599139502, "grad_norm": 6.391587257385254, "learning_rate": 0.00039164841182913473, "loss": 0.9912, "step": 2276 }, { "epoch": 0.154280051833219, "grad_norm": 3.384464740753174, "learning_rate": 0.0003916429353778752, "loss": 0.875, "step": 2277 }, { "epoch": 0.15434780767504297, "grad_norm": 3.7162532806396484, "learning_rate": 0.0003916374589266156, "loss": 0.9127, "step": 2278 }, { "epoch": 0.15441556351686697, "grad_norm": 4.7119879722595215, "learning_rate": 0.000391631982475356, "loss": 1.0822, "step": 2279 }, { "epoch": 0.15448331935869095, "grad_norm": 4.590001106262207, "learning_rate": 0.0003916265060240964, "loss": 0.9078, "step": 2280 }, { "epoch": 0.15455107520051495, "grad_norm": 4.780983924865723, "learning_rate": 0.00039162102957283684, "loss": 1.1061, "step": 2281 }, { "epoch": 0.15461883104233892, "grad_norm": 4.1171393394470215, "learning_rate": 0.00039161555312157724, "loss": 1.0692, "step": 2282 }, { "epoch": 0.15468658688416292, "grad_norm": 2.6301543712615967, "learning_rate": 0.00039161007667031764, "loss": 0.6736, "step": 2283 }, { "epoch": 0.1547543427259869, "grad_norm": 3.0278191566467285, "learning_rate": 0.00039160460021905804, "loss": 0.8822, "step": 2284 }, { "epoch": 0.1548220985678109, "grad_norm": 3.533506393432617, "learning_rate": 0.0003915991237677985, "loss": 1.0576, "step": 2285 }, { "epoch": 0.15488985440963488, "grad_norm": 4.43066930770874, "learning_rate": 0.0003915936473165389, "loss": 0.9728, "step": 2286 }, { "epoch": 0.15495761025145888, "grad_norm": 2.989530086517334, "learning_rate": 0.00039158817086527934, "loss": 0.9439, "step": 2287 }, { "epoch": 0.15502536609328285, "grad_norm": 4.224128246307373, "learning_rate": 0.00039158269441401974, "loss": 0.9691, "step": 2288 }, { "epoch": 0.15509312193510685, "grad_norm": 4.779012680053711, "learning_rate": 0.00039157721796276014, "loss": 1.0763, "step": 2289 }, { "epoch": 0.15516087777693083, "grad_norm": 4.328718662261963, "learning_rate": 0.00039157174151150054, "loss": 1.2291, "step": 2290 }, { "epoch": 0.15522863361875483, "grad_norm": 3.2869222164154053, "learning_rate": 0.000391566265060241, "loss": 0.9877, "step": 2291 }, { "epoch": 0.1552963894605788, "grad_norm": 2.935812473297119, "learning_rate": 0.0003915607886089814, "loss": 0.7392, "step": 2292 }, { "epoch": 0.15536414530240278, "grad_norm": 3.7724058628082275, "learning_rate": 0.00039155531215772185, "loss": 0.9363, "step": 2293 }, { "epoch": 0.15543190114422678, "grad_norm": 3.956517219543457, "learning_rate": 0.00039154983570646224, "loss": 1.124, "step": 2294 }, { "epoch": 0.15549965698605075, "grad_norm": 2.9776771068573, "learning_rate": 0.00039154435925520264, "loss": 0.7236, "step": 2295 }, { "epoch": 0.15556741282787476, "grad_norm": 2.835200548171997, "learning_rate": 0.00039153888280394304, "loss": 0.8119, "step": 2296 }, { "epoch": 0.15563516866969873, "grad_norm": 3.4048142433166504, "learning_rate": 0.0003915334063526835, "loss": 0.8828, "step": 2297 }, { "epoch": 0.15570292451152273, "grad_norm": 6.744691371917725, "learning_rate": 0.0003915279299014239, "loss": 1.2655, "step": 2298 }, { "epoch": 0.1557706803533467, "grad_norm": 4.069280624389648, "learning_rate": 0.0003915224534501643, "loss": 0.802, "step": 2299 }, { "epoch": 0.1558384361951707, "grad_norm": 4.398569107055664, "learning_rate": 0.00039151697699890475, "loss": 1.3301, "step": 2300 }, { "epoch": 0.15590619203699468, "grad_norm": 3.0462827682495117, "learning_rate": 0.00039151150054764515, "loss": 0.9454, "step": 2301 }, { "epoch": 0.1559739478788187, "grad_norm": 3.5707690715789795, "learning_rate": 0.00039150602409638555, "loss": 0.8728, "step": 2302 }, { "epoch": 0.15604170372064266, "grad_norm": 4.72637414932251, "learning_rate": 0.000391500547645126, "loss": 1.2063, "step": 2303 }, { "epoch": 0.15610945956246666, "grad_norm": 4.286033630371094, "learning_rate": 0.0003914950711938664, "loss": 0.9753, "step": 2304 }, { "epoch": 0.15617721540429064, "grad_norm": 3.4821553230285645, "learning_rate": 0.0003914895947426068, "loss": 1.0313, "step": 2305 }, { "epoch": 0.15624497124611464, "grad_norm": 4.716664791107178, "learning_rate": 0.0003914841182913472, "loss": 0.878, "step": 2306 }, { "epoch": 0.1563127270879386, "grad_norm": 4.626977920532227, "learning_rate": 0.0003914786418400876, "loss": 1.0571, "step": 2307 }, { "epoch": 0.1563804829297626, "grad_norm": 4.215327739715576, "learning_rate": 0.0003914731653888281, "loss": 0.988, "step": 2308 }, { "epoch": 0.1564482387715866, "grad_norm": 4.7321577072143555, "learning_rate": 0.0003914676889375685, "loss": 1.0299, "step": 2309 }, { "epoch": 0.15651599461341056, "grad_norm": 3.076568126678467, "learning_rate": 0.0003914622124863089, "loss": 0.9726, "step": 2310 }, { "epoch": 0.15658375045523457, "grad_norm": 3.5624375343322754, "learning_rate": 0.0003914567360350493, "loss": 0.9258, "step": 2311 }, { "epoch": 0.15665150629705854, "grad_norm": 2.8745973110198975, "learning_rate": 0.0003914512595837897, "loss": 0.7874, "step": 2312 }, { "epoch": 0.15671926213888254, "grad_norm": 3.484389543533325, "learning_rate": 0.00039144578313253016, "loss": 0.8271, "step": 2313 }, { "epoch": 0.15678701798070652, "grad_norm": 2.9967684745788574, "learning_rate": 0.00039144030668127055, "loss": 0.6841, "step": 2314 }, { "epoch": 0.15685477382253052, "grad_norm": 3.3432979583740234, "learning_rate": 0.00039143483023001095, "loss": 0.7757, "step": 2315 }, { "epoch": 0.1569225296643545, "grad_norm": 3.500347137451172, "learning_rate": 0.0003914293537787514, "loss": 1.0069, "step": 2316 }, { "epoch": 0.1569902855061785, "grad_norm": 4.281485557556152, "learning_rate": 0.0003914238773274918, "loss": 0.7904, "step": 2317 }, { "epoch": 0.15705804134800247, "grad_norm": 3.8250153064727783, "learning_rate": 0.0003914184008762322, "loss": 0.9104, "step": 2318 }, { "epoch": 0.15712579718982647, "grad_norm": 3.9165241718292236, "learning_rate": 0.00039141292442497266, "loss": 1.1665, "step": 2319 }, { "epoch": 0.15719355303165045, "grad_norm": 5.408055782318115, "learning_rate": 0.00039140744797371306, "loss": 1.0558, "step": 2320 }, { "epoch": 0.15726130887347445, "grad_norm": 2.858224868774414, "learning_rate": 0.00039140197152245346, "loss": 0.8598, "step": 2321 }, { "epoch": 0.15732906471529842, "grad_norm": 3.4219090938568115, "learning_rate": 0.00039139649507119386, "loss": 1.2276, "step": 2322 }, { "epoch": 0.1573968205571224, "grad_norm": 3.4176478385925293, "learning_rate": 0.00039139101861993426, "loss": 0.9964, "step": 2323 }, { "epoch": 0.1574645763989464, "grad_norm": 2.2429585456848145, "learning_rate": 0.0003913855421686747, "loss": 0.6614, "step": 2324 }, { "epoch": 0.15753233224077037, "grad_norm": 3.5706393718719482, "learning_rate": 0.00039138006571741516, "loss": 1.0506, "step": 2325 }, { "epoch": 0.15760008808259438, "grad_norm": 2.80277156829834, "learning_rate": 0.00039137458926615556, "loss": 0.8488, "step": 2326 }, { "epoch": 0.15766784392441835, "grad_norm": 3.518329620361328, "learning_rate": 0.00039136911281489596, "loss": 1.1364, "step": 2327 }, { "epoch": 0.15773559976624235, "grad_norm": 3.4713735580444336, "learning_rate": 0.00039136363636363636, "loss": 0.9814, "step": 2328 }, { "epoch": 0.15780335560806633, "grad_norm": 3.9110569953918457, "learning_rate": 0.0003913581599123768, "loss": 1.0061, "step": 2329 }, { "epoch": 0.15787111144989033, "grad_norm": 9.212739944458008, "learning_rate": 0.0003913526834611172, "loss": 1.1774, "step": 2330 }, { "epoch": 0.1579388672917143, "grad_norm": 3.064990282058716, "learning_rate": 0.00039134720700985767, "loss": 0.9529, "step": 2331 }, { "epoch": 0.1580066231335383, "grad_norm": 2.9996888637542725, "learning_rate": 0.00039134173055859807, "loss": 0.8527, "step": 2332 }, { "epoch": 0.15807437897536228, "grad_norm": 3.304190158843994, "learning_rate": 0.00039133625410733846, "loss": 1.0561, "step": 2333 }, { "epoch": 0.15814213481718628, "grad_norm": 3.0462400913238525, "learning_rate": 0.00039133077765607886, "loss": 0.9801, "step": 2334 }, { "epoch": 0.15820989065901025, "grad_norm": 4.078166961669922, "learning_rate": 0.0003913253012048193, "loss": 1.1188, "step": 2335 }, { "epoch": 0.15827764650083426, "grad_norm": 3.2843191623687744, "learning_rate": 0.0003913198247535597, "loss": 1.0667, "step": 2336 }, { "epoch": 0.15834540234265823, "grad_norm": 3.945307731628418, "learning_rate": 0.0003913143483023001, "loss": 1.1438, "step": 2337 }, { "epoch": 0.1584131581844822, "grad_norm": 3.158125162124634, "learning_rate": 0.0003913088718510405, "loss": 0.9246, "step": 2338 }, { "epoch": 0.1584809140263062, "grad_norm": 3.2488436698913574, "learning_rate": 0.00039130339539978097, "loss": 0.8155, "step": 2339 }, { "epoch": 0.15854866986813018, "grad_norm": 3.2344892024993896, "learning_rate": 0.00039129791894852137, "loss": 0.7457, "step": 2340 }, { "epoch": 0.15861642570995418, "grad_norm": 10.447481155395508, "learning_rate": 0.0003912924424972618, "loss": 0.9385, "step": 2341 }, { "epoch": 0.15868418155177816, "grad_norm": 3.2391421794891357, "learning_rate": 0.0003912869660460022, "loss": 0.9638, "step": 2342 }, { "epoch": 0.15875193739360216, "grad_norm": 3.150144577026367, "learning_rate": 0.0003912814895947426, "loss": 0.8102, "step": 2343 }, { "epoch": 0.15881969323542613, "grad_norm": 3.88244366645813, "learning_rate": 0.000391276013143483, "loss": 1.1372, "step": 2344 }, { "epoch": 0.15888744907725014, "grad_norm": 5.775148391723633, "learning_rate": 0.0003912705366922234, "loss": 1.0088, "step": 2345 }, { "epoch": 0.1589552049190741, "grad_norm": 2.7865357398986816, "learning_rate": 0.00039126506024096387, "loss": 0.829, "step": 2346 }, { "epoch": 0.1590229607608981, "grad_norm": 3.307279586791992, "learning_rate": 0.0003912595837897043, "loss": 1.0321, "step": 2347 }, { "epoch": 0.1590907166027221, "grad_norm": 3.713334798812866, "learning_rate": 0.0003912541073384447, "loss": 0.9425, "step": 2348 }, { "epoch": 0.1591584724445461, "grad_norm": 3.826998710632324, "learning_rate": 0.0003912486308871851, "loss": 0.9558, "step": 2349 }, { "epoch": 0.15922622828637006, "grad_norm": 3.291170120239258, "learning_rate": 0.0003912431544359255, "loss": 0.8875, "step": 2350 }, { "epoch": 0.15929398412819407, "grad_norm": 3.2902164459228516, "learning_rate": 0.000391237677984666, "loss": 0.9305, "step": 2351 }, { "epoch": 0.15936173997001804, "grad_norm": 4.9762372970581055, "learning_rate": 0.0003912322015334064, "loss": 0.9433, "step": 2352 }, { "epoch": 0.15942949581184201, "grad_norm": 4.354738235473633, "learning_rate": 0.0003912267250821468, "loss": 1.1192, "step": 2353 }, { "epoch": 0.15949725165366602, "grad_norm": 5.2057271003723145, "learning_rate": 0.0003912212486308872, "loss": 0.7609, "step": 2354 }, { "epoch": 0.15956500749549, "grad_norm": 3.0585134029388428, "learning_rate": 0.0003912157721796276, "loss": 0.8057, "step": 2355 }, { "epoch": 0.159632763337314, "grad_norm": 3.8493916988372803, "learning_rate": 0.000391210295728368, "loss": 0.9733, "step": 2356 }, { "epoch": 0.15970051917913797, "grad_norm": 4.423927307128906, "learning_rate": 0.0003912048192771085, "loss": 1.2549, "step": 2357 }, { "epoch": 0.15976827502096197, "grad_norm": 3.4531209468841553, "learning_rate": 0.0003911993428258489, "loss": 1.0354, "step": 2358 }, { "epoch": 0.15983603086278594, "grad_norm": 3.800834894180298, "learning_rate": 0.0003911938663745893, "loss": 0.9978, "step": 2359 }, { "epoch": 0.15990378670460995, "grad_norm": 4.611058712005615, "learning_rate": 0.0003911883899233297, "loss": 1.0681, "step": 2360 }, { "epoch": 0.15997154254643392, "grad_norm": 3.2351226806640625, "learning_rate": 0.0003911829134720701, "loss": 0.8756, "step": 2361 }, { "epoch": 0.16003929838825792, "grad_norm": 4.632431983947754, "learning_rate": 0.00039117743702081053, "loss": 1.2032, "step": 2362 }, { "epoch": 0.1601070542300819, "grad_norm": 3.07958722114563, "learning_rate": 0.000391171960569551, "loss": 0.9315, "step": 2363 }, { "epoch": 0.1601748100719059, "grad_norm": 3.0759241580963135, "learning_rate": 0.0003911664841182914, "loss": 0.747, "step": 2364 }, { "epoch": 0.16024256591372987, "grad_norm": 3.3531603813171387, "learning_rate": 0.0003911610076670318, "loss": 1.1043, "step": 2365 }, { "epoch": 0.16031032175555388, "grad_norm": 2.83420991897583, "learning_rate": 0.0003911555312157722, "loss": 0.8363, "step": 2366 }, { "epoch": 0.16037807759737785, "grad_norm": 3.5032336711883545, "learning_rate": 0.00039115005476451263, "loss": 0.9617, "step": 2367 }, { "epoch": 0.16044583343920182, "grad_norm": 3.935238838195801, "learning_rate": 0.00039114457831325303, "loss": 1.2056, "step": 2368 }, { "epoch": 0.16051358928102583, "grad_norm": 4.242410182952881, "learning_rate": 0.00039113910186199343, "loss": 1.1656, "step": 2369 }, { "epoch": 0.1605813451228498, "grad_norm": 2.4447858333587646, "learning_rate": 0.0003911336254107339, "loss": 0.6654, "step": 2370 }, { "epoch": 0.1606491009646738, "grad_norm": 3.3764092922210693, "learning_rate": 0.0003911281489594743, "loss": 1.0379, "step": 2371 }, { "epoch": 0.16071685680649778, "grad_norm": 2.7722201347351074, "learning_rate": 0.0003911226725082147, "loss": 0.7055, "step": 2372 }, { "epoch": 0.16078461264832178, "grad_norm": 4.085498809814453, "learning_rate": 0.00039111719605695514, "loss": 1.0092, "step": 2373 }, { "epoch": 0.16085236849014575, "grad_norm": 3.5239946842193604, "learning_rate": 0.00039111171960569554, "loss": 0.9611, "step": 2374 }, { "epoch": 0.16092012433196975, "grad_norm": 3.5129261016845703, "learning_rate": 0.00039110624315443594, "loss": 0.9589, "step": 2375 }, { "epoch": 0.16098788017379373, "grad_norm": 3.942143440246582, "learning_rate": 0.00039110076670317634, "loss": 0.9712, "step": 2376 }, { "epoch": 0.16105563601561773, "grad_norm": 2.9521069526672363, "learning_rate": 0.00039109529025191673, "loss": 0.8362, "step": 2377 }, { "epoch": 0.1611233918574417, "grad_norm": 3.5620615482330322, "learning_rate": 0.0003910898138006572, "loss": 0.9572, "step": 2378 }, { "epoch": 0.1611911476992657, "grad_norm": 3.334449529647827, "learning_rate": 0.00039108433734939764, "loss": 0.9318, "step": 2379 }, { "epoch": 0.16125890354108968, "grad_norm": 3.3526742458343506, "learning_rate": 0.00039107886089813804, "loss": 0.9934, "step": 2380 }, { "epoch": 0.16132665938291368, "grad_norm": 2.76525616645813, "learning_rate": 0.00039107338444687844, "loss": 0.7018, "step": 2381 }, { "epoch": 0.16139441522473766, "grad_norm": 3.9329357147216797, "learning_rate": 0.00039106790799561884, "loss": 0.8203, "step": 2382 }, { "epoch": 0.16146217106656163, "grad_norm": 3.5432684421539307, "learning_rate": 0.00039106243154435924, "loss": 0.9169, "step": 2383 }, { "epoch": 0.16152992690838563, "grad_norm": 3.117920398712158, "learning_rate": 0.0003910569550930997, "loss": 0.9765, "step": 2384 }, { "epoch": 0.1615976827502096, "grad_norm": 3.467099666595459, "learning_rate": 0.0003910514786418401, "loss": 0.9997, "step": 2385 }, { "epoch": 0.1616654385920336, "grad_norm": 3.366712808609009, "learning_rate": 0.00039104600219058054, "loss": 0.7379, "step": 2386 }, { "epoch": 0.16173319443385759, "grad_norm": 4.257052898406982, "learning_rate": 0.00039104052573932094, "loss": 0.9985, "step": 2387 }, { "epoch": 0.1618009502756816, "grad_norm": 3.339301824569702, "learning_rate": 0.00039103504928806134, "loss": 1.0006, "step": 2388 }, { "epoch": 0.16186870611750556, "grad_norm": 3.987985849380493, "learning_rate": 0.0003910295728368018, "loss": 1.1376, "step": 2389 }, { "epoch": 0.16193646195932956, "grad_norm": 2.9686198234558105, "learning_rate": 0.0003910240963855422, "loss": 0.9392, "step": 2390 }, { "epoch": 0.16200421780115354, "grad_norm": 2.9727485179901123, "learning_rate": 0.0003910186199342826, "loss": 1.0688, "step": 2391 }, { "epoch": 0.16207197364297754, "grad_norm": 4.215958595275879, "learning_rate": 0.000391013143483023, "loss": 1.4286, "step": 2392 }, { "epoch": 0.16213972948480151, "grad_norm": 3.9825403690338135, "learning_rate": 0.0003910076670317634, "loss": 1.0671, "step": 2393 }, { "epoch": 0.16220748532662552, "grad_norm": 3.5320558547973633, "learning_rate": 0.00039100219058050385, "loss": 1.0737, "step": 2394 }, { "epoch": 0.1622752411684495, "grad_norm": 3.767899990081787, "learning_rate": 0.0003909967141292443, "loss": 1.2116, "step": 2395 }, { "epoch": 0.1623429970102735, "grad_norm": 2.673516273498535, "learning_rate": 0.0003909912376779847, "loss": 0.7047, "step": 2396 }, { "epoch": 0.16241075285209747, "grad_norm": 2.674363136291504, "learning_rate": 0.0003909857612267251, "loss": 0.8814, "step": 2397 }, { "epoch": 0.16247850869392144, "grad_norm": 3.7027153968811035, "learning_rate": 0.0003909802847754655, "loss": 1.0225, "step": 2398 }, { "epoch": 0.16254626453574544, "grad_norm": 3.7984888553619385, "learning_rate": 0.0003909748083242059, "loss": 0.9809, "step": 2399 }, { "epoch": 0.16261402037756942, "grad_norm": 2.950029134750366, "learning_rate": 0.00039096933187294635, "loss": 0.6951, "step": 2400 }, { "epoch": 0.16268177621939342, "grad_norm": 3.2700440883636475, "learning_rate": 0.0003909638554216868, "loss": 0.943, "step": 2401 }, { "epoch": 0.1627495320612174, "grad_norm": 2.6874001026153564, "learning_rate": 0.0003909583789704272, "loss": 0.6593, "step": 2402 }, { "epoch": 0.1628172879030414, "grad_norm": 4.0272979736328125, "learning_rate": 0.0003909529025191676, "loss": 1.1069, "step": 2403 }, { "epoch": 0.16288504374486537, "grad_norm": 4.092314720153809, "learning_rate": 0.000390947426067908, "loss": 1.1179, "step": 2404 }, { "epoch": 0.16295279958668937, "grad_norm": 5.414695739746094, "learning_rate": 0.00039094194961664845, "loss": 1.0901, "step": 2405 }, { "epoch": 0.16302055542851335, "grad_norm": 3.7071361541748047, "learning_rate": 0.00039093647316538885, "loss": 1.2141, "step": 2406 }, { "epoch": 0.16308831127033735, "grad_norm": 4.393648147583008, "learning_rate": 0.00039093099671412925, "loss": 1.0145, "step": 2407 }, { "epoch": 0.16315606711216132, "grad_norm": 3.34543776512146, "learning_rate": 0.00039092552026286965, "loss": 1.0512, "step": 2408 }, { "epoch": 0.16322382295398533, "grad_norm": 3.7193424701690674, "learning_rate": 0.0003909200438116101, "loss": 1.1034, "step": 2409 }, { "epoch": 0.1632915787958093, "grad_norm": 2.5605249404907227, "learning_rate": 0.0003909145673603505, "loss": 0.6876, "step": 2410 }, { "epoch": 0.1633593346376333, "grad_norm": 4.167562961578369, "learning_rate": 0.00039090909090909096, "loss": 1.2861, "step": 2411 }, { "epoch": 0.16342709047945728, "grad_norm": 3.20143723487854, "learning_rate": 0.00039090361445783136, "loss": 1.006, "step": 2412 }, { "epoch": 0.16349484632128125, "grad_norm": 3.333951234817505, "learning_rate": 0.00039089813800657176, "loss": 1.0943, "step": 2413 }, { "epoch": 0.16356260216310525, "grad_norm": 3.614236831665039, "learning_rate": 0.00039089266155531216, "loss": 0.9313, "step": 2414 }, { "epoch": 0.16363035800492923, "grad_norm": 3.5606918334960938, "learning_rate": 0.00039088718510405256, "loss": 1.2799, "step": 2415 }, { "epoch": 0.16369811384675323, "grad_norm": 3.7573323249816895, "learning_rate": 0.000390881708652793, "loss": 1.2246, "step": 2416 }, { "epoch": 0.1637658696885772, "grad_norm": 3.0211334228515625, "learning_rate": 0.00039087623220153346, "loss": 0.7964, "step": 2417 }, { "epoch": 0.1638336255304012, "grad_norm": 2.8421409130096436, "learning_rate": 0.00039087075575027386, "loss": 0.7261, "step": 2418 }, { "epoch": 0.16390138137222518, "grad_norm": 4.219807147979736, "learning_rate": 0.00039086527929901426, "loss": 1.2506, "step": 2419 }, { "epoch": 0.16396913721404918, "grad_norm": 3.1810686588287354, "learning_rate": 0.00039085980284775466, "loss": 1.1376, "step": 2420 }, { "epoch": 0.16403689305587316, "grad_norm": 3.150115728378296, "learning_rate": 0.00039085432639649506, "loss": 0.8848, "step": 2421 }, { "epoch": 0.16410464889769716, "grad_norm": 2.913374662399292, "learning_rate": 0.0003908488499452355, "loss": 0.8255, "step": 2422 }, { "epoch": 0.16417240473952113, "grad_norm": 3.515133857727051, "learning_rate": 0.0003908433734939759, "loss": 1.0325, "step": 2423 }, { "epoch": 0.16424016058134513, "grad_norm": 4.310667991638184, "learning_rate": 0.0003908378970427163, "loss": 1.0862, "step": 2424 }, { "epoch": 0.1643079164231691, "grad_norm": 6.09130859375, "learning_rate": 0.00039083242059145676, "loss": 0.8767, "step": 2425 }, { "epoch": 0.1643756722649931, "grad_norm": 4.40318489074707, "learning_rate": 0.00039082694414019716, "loss": 1.0716, "step": 2426 }, { "epoch": 0.16444342810681709, "grad_norm": 3.8440544605255127, "learning_rate": 0.0003908214676889376, "loss": 1.1218, "step": 2427 }, { "epoch": 0.16451118394864106, "grad_norm": 4.074769496917725, "learning_rate": 0.000390815991237678, "loss": 1.2436, "step": 2428 }, { "epoch": 0.16457893979046506, "grad_norm": 3.9255714416503906, "learning_rate": 0.0003908105147864184, "loss": 1.0215, "step": 2429 }, { "epoch": 0.16464669563228904, "grad_norm": 3.4815566539764404, "learning_rate": 0.0003908050383351588, "loss": 1.0192, "step": 2430 }, { "epoch": 0.16471445147411304, "grad_norm": 4.408581256866455, "learning_rate": 0.0003907995618838992, "loss": 1.0681, "step": 2431 }, { "epoch": 0.164782207315937, "grad_norm": 3.2579798698425293, "learning_rate": 0.00039079408543263967, "loss": 0.9485, "step": 2432 }, { "epoch": 0.16484996315776101, "grad_norm": 3.922394037246704, "learning_rate": 0.0003907886089813801, "loss": 1.2523, "step": 2433 }, { "epoch": 0.164917718999585, "grad_norm": 2.5622668266296387, "learning_rate": 0.0003907831325301205, "loss": 0.8427, "step": 2434 }, { "epoch": 0.164985474841409, "grad_norm": 3.2815206050872803, "learning_rate": 0.0003907776560788609, "loss": 1.017, "step": 2435 }, { "epoch": 0.16505323068323297, "grad_norm": 4.11004638671875, "learning_rate": 0.0003907721796276013, "loss": 0.8469, "step": 2436 }, { "epoch": 0.16512098652505697, "grad_norm": 3.5815510749816895, "learning_rate": 0.0003907667031763417, "loss": 1.0321, "step": 2437 }, { "epoch": 0.16518874236688094, "grad_norm": 4.084681510925293, "learning_rate": 0.00039076122672508217, "loss": 1.1594, "step": 2438 }, { "epoch": 0.16525649820870494, "grad_norm": 4.138881206512451, "learning_rate": 0.00039075575027382257, "loss": 1.2214, "step": 2439 }, { "epoch": 0.16532425405052892, "grad_norm": 3.9706532955169678, "learning_rate": 0.000390750273822563, "loss": 1.0061, "step": 2440 }, { "epoch": 0.1653920098923529, "grad_norm": 2.5234005451202393, "learning_rate": 0.0003907447973713034, "loss": 0.729, "step": 2441 }, { "epoch": 0.1654597657341769, "grad_norm": 2.7967615127563477, "learning_rate": 0.0003907393209200438, "loss": 0.6525, "step": 2442 }, { "epoch": 0.16552752157600087, "grad_norm": 4.568713188171387, "learning_rate": 0.0003907338444687843, "loss": 0.8736, "step": 2443 }, { "epoch": 0.16559527741782487, "grad_norm": 3.099701166152954, "learning_rate": 0.0003907283680175247, "loss": 0.7611, "step": 2444 }, { "epoch": 0.16566303325964885, "grad_norm": 3.5989129543304443, "learning_rate": 0.0003907228915662651, "loss": 1.0886, "step": 2445 }, { "epoch": 0.16573078910147285, "grad_norm": 3.7296478748321533, "learning_rate": 0.0003907174151150055, "loss": 1.1618, "step": 2446 }, { "epoch": 0.16579854494329682, "grad_norm": 4.262120723724365, "learning_rate": 0.00039071193866374587, "loss": 1.056, "step": 2447 }, { "epoch": 0.16586630078512082, "grad_norm": 3.680967330932617, "learning_rate": 0.0003907064622124863, "loss": 0.9768, "step": 2448 }, { "epoch": 0.1659340566269448, "grad_norm": 2.9749538898468018, "learning_rate": 0.0003907009857612268, "loss": 0.93, "step": 2449 }, { "epoch": 0.1660018124687688, "grad_norm": 4.246497631072998, "learning_rate": 0.0003906955093099672, "loss": 1.1383, "step": 2450 }, { "epoch": 0.16606956831059277, "grad_norm": 3.7326085567474365, "learning_rate": 0.0003906900328587076, "loss": 1.0418, "step": 2451 }, { "epoch": 0.16613732415241678, "grad_norm": 3.5544731616973877, "learning_rate": 0.000390684556407448, "loss": 1.06, "step": 2452 }, { "epoch": 0.16620507999424075, "grad_norm": 3.8475277423858643, "learning_rate": 0.0003906790799561884, "loss": 1.1003, "step": 2453 }, { "epoch": 0.16627283583606475, "grad_norm": 4.518758296966553, "learning_rate": 0.00039067360350492883, "loss": 1.1254, "step": 2454 }, { "epoch": 0.16634059167788873, "grad_norm": 3.011996269226074, "learning_rate": 0.00039066812705366923, "loss": 0.9466, "step": 2455 }, { "epoch": 0.1664083475197127, "grad_norm": 3.706911563873291, "learning_rate": 0.0003906626506024097, "loss": 0.9137, "step": 2456 }, { "epoch": 0.1664761033615367, "grad_norm": 3.9900949001312256, "learning_rate": 0.0003906571741511501, "loss": 1.1883, "step": 2457 }, { "epoch": 0.16654385920336068, "grad_norm": 3.853766679763794, "learning_rate": 0.0003906516976998905, "loss": 0.9743, "step": 2458 }, { "epoch": 0.16661161504518468, "grad_norm": 4.99286413192749, "learning_rate": 0.0003906462212486309, "loss": 0.6058, "step": 2459 }, { "epoch": 0.16667937088700865, "grad_norm": 3.200807809829712, "learning_rate": 0.00039064074479737133, "loss": 0.7169, "step": 2460 }, { "epoch": 0.16674712672883266, "grad_norm": 4.306271553039551, "learning_rate": 0.00039063526834611173, "loss": 0.9279, "step": 2461 }, { "epoch": 0.16681488257065663, "grad_norm": 3.0968384742736816, "learning_rate": 0.00039062979189485213, "loss": 1.0192, "step": 2462 }, { "epoch": 0.16688263841248063, "grad_norm": 4.1242289543151855, "learning_rate": 0.0003906243154435926, "loss": 0.9908, "step": 2463 }, { "epoch": 0.1669503942543046, "grad_norm": 3.7863428592681885, "learning_rate": 0.000390618838992333, "loss": 0.9677, "step": 2464 }, { "epoch": 0.1670181500961286, "grad_norm": 5.367023468017578, "learning_rate": 0.00039061336254107344, "loss": 1.1125, "step": 2465 }, { "epoch": 0.16708590593795258, "grad_norm": 3.8724138736724854, "learning_rate": 0.00039060788608981384, "loss": 1.1829, "step": 2466 }, { "epoch": 0.16715366177977659, "grad_norm": 4.036258220672607, "learning_rate": 0.00039060240963855424, "loss": 1.0198, "step": 2467 }, { "epoch": 0.16722141762160056, "grad_norm": 2.6432082653045654, "learning_rate": 0.00039059693318729464, "loss": 0.6928, "step": 2468 }, { "epoch": 0.16728917346342456, "grad_norm": 4.638551712036133, "learning_rate": 0.00039059145673603503, "loss": 0.892, "step": 2469 }, { "epoch": 0.16735692930524854, "grad_norm": 4.4014387130737305, "learning_rate": 0.0003905859802847755, "loss": 0.9948, "step": 2470 }, { "epoch": 0.1674246851470725, "grad_norm": 6.303602695465088, "learning_rate": 0.00039058050383351594, "loss": 0.9433, "step": 2471 }, { "epoch": 0.1674924409888965, "grad_norm": 2.4351272583007812, "learning_rate": 0.00039057502738225634, "loss": 0.7639, "step": 2472 }, { "epoch": 0.1675601968307205, "grad_norm": 3.6072940826416016, "learning_rate": 0.00039056955093099674, "loss": 1.0302, "step": 2473 }, { "epoch": 0.1676279526725445, "grad_norm": 4.892384052276611, "learning_rate": 0.00039056407447973714, "loss": 0.7655, "step": 2474 }, { "epoch": 0.16769570851436846, "grad_norm": 2.934410572052002, "learning_rate": 0.00039055859802847754, "loss": 0.9017, "step": 2475 }, { "epoch": 0.16776346435619247, "grad_norm": 3.499940872192383, "learning_rate": 0.000390553121577218, "loss": 1.1146, "step": 2476 }, { "epoch": 0.16783122019801644, "grad_norm": 4.725958824157715, "learning_rate": 0.0003905476451259584, "loss": 1.2517, "step": 2477 }, { "epoch": 0.16789897603984044, "grad_norm": 2.8330156803131104, "learning_rate": 0.0003905421686746988, "loss": 0.8041, "step": 2478 }, { "epoch": 0.16796673188166442, "grad_norm": 3.1319074630737305, "learning_rate": 0.00039053669222343924, "loss": 0.9445, "step": 2479 }, { "epoch": 0.16803448772348842, "grad_norm": 3.1208853721618652, "learning_rate": 0.00039053121577217964, "loss": 0.8958, "step": 2480 }, { "epoch": 0.1681022435653124, "grad_norm": 3.205109119415283, "learning_rate": 0.0003905257393209201, "loss": 0.9979, "step": 2481 }, { "epoch": 0.1681699994071364, "grad_norm": 2.689314126968384, "learning_rate": 0.0003905202628696605, "loss": 0.7155, "step": 2482 }, { "epoch": 0.16823775524896037, "grad_norm": 3.5805270671844482, "learning_rate": 0.0003905147864184009, "loss": 0.874, "step": 2483 }, { "epoch": 0.16830551109078437, "grad_norm": 5.0327301025390625, "learning_rate": 0.0003905093099671413, "loss": 1.0809, "step": 2484 }, { "epoch": 0.16837326693260835, "grad_norm": 2.962639093399048, "learning_rate": 0.0003905038335158817, "loss": 0.6237, "step": 2485 }, { "epoch": 0.16844102277443232, "grad_norm": 3.0607898235321045, "learning_rate": 0.00039049835706462215, "loss": 1.0555, "step": 2486 }, { "epoch": 0.16850877861625632, "grad_norm": 3.251007318496704, "learning_rate": 0.0003904928806133626, "loss": 0.86, "step": 2487 }, { "epoch": 0.1685765344580803, "grad_norm": 7.316134929656982, "learning_rate": 0.000390487404162103, "loss": 1.0998, "step": 2488 }, { "epoch": 0.1686442902999043, "grad_norm": 3.2383830547332764, "learning_rate": 0.0003904819277108434, "loss": 0.8463, "step": 2489 }, { "epoch": 0.16871204614172827, "grad_norm": 3.8393495082855225, "learning_rate": 0.0003904764512595838, "loss": 1.0925, "step": 2490 }, { "epoch": 0.16877980198355227, "grad_norm": 4.367223262786865, "learning_rate": 0.0003904709748083242, "loss": 0.9895, "step": 2491 }, { "epoch": 0.16884755782537625, "grad_norm": 3.1378276348114014, "learning_rate": 0.00039046549835706465, "loss": 0.8643, "step": 2492 }, { "epoch": 0.16891531366720025, "grad_norm": 3.3862216472625732, "learning_rate": 0.00039046002190580505, "loss": 0.7248, "step": 2493 }, { "epoch": 0.16898306950902423, "grad_norm": 4.475974082946777, "learning_rate": 0.0003904545454545455, "loss": 0.959, "step": 2494 }, { "epoch": 0.16905082535084823, "grad_norm": 3.610344648361206, "learning_rate": 0.0003904490690032859, "loss": 0.7547, "step": 2495 }, { "epoch": 0.1691185811926722, "grad_norm": 2.978452444076538, "learning_rate": 0.0003904435925520263, "loss": 0.838, "step": 2496 }, { "epoch": 0.1691863370344962, "grad_norm": 4.296289443969727, "learning_rate": 0.0003904381161007667, "loss": 1.1953, "step": 2497 }, { "epoch": 0.16925409287632018, "grad_norm": 2.882884979248047, "learning_rate": 0.00039043263964950715, "loss": 0.7595, "step": 2498 }, { "epoch": 0.16932184871814418, "grad_norm": 5.182488918304443, "learning_rate": 0.00039042716319824755, "loss": 0.9124, "step": 2499 }, { "epoch": 0.16938960455996815, "grad_norm": 3.2569730281829834, "learning_rate": 0.00039042168674698795, "loss": 0.9455, "step": 2500 }, { "epoch": 0.16945736040179213, "grad_norm": 3.6530795097351074, "learning_rate": 0.00039041621029572835, "loss": 1.2466, "step": 2501 }, { "epoch": 0.16952511624361613, "grad_norm": 3.1642515659332275, "learning_rate": 0.0003904107338444688, "loss": 0.9318, "step": 2502 }, { "epoch": 0.1695928720854401, "grad_norm": 5.807140350341797, "learning_rate": 0.00039040525739320926, "loss": 0.9086, "step": 2503 }, { "epoch": 0.1696606279272641, "grad_norm": 3.1718502044677734, "learning_rate": 0.00039039978094194966, "loss": 0.715, "step": 2504 }, { "epoch": 0.16972838376908808, "grad_norm": 3.395629644393921, "learning_rate": 0.00039039430449069006, "loss": 0.8206, "step": 2505 }, { "epoch": 0.16979613961091208, "grad_norm": 3.5122809410095215, "learning_rate": 0.00039038882803943046, "loss": 0.8728, "step": 2506 }, { "epoch": 0.16986389545273606, "grad_norm": 2.7259223461151123, "learning_rate": 0.00039038335158817086, "loss": 0.7943, "step": 2507 }, { "epoch": 0.16993165129456006, "grad_norm": 2.6649601459503174, "learning_rate": 0.0003903778751369113, "loss": 0.8234, "step": 2508 }, { "epoch": 0.16999940713638403, "grad_norm": 3.652932643890381, "learning_rate": 0.0003903723986856517, "loss": 0.929, "step": 2509 }, { "epoch": 0.17006716297820804, "grad_norm": 2.9952287673950195, "learning_rate": 0.00039036692223439216, "loss": 0.8891, "step": 2510 }, { "epoch": 0.170134918820032, "grad_norm": 4.975259304046631, "learning_rate": 0.00039036144578313256, "loss": 0.7084, "step": 2511 }, { "epoch": 0.170202674661856, "grad_norm": 2.84755277633667, "learning_rate": 0.00039035596933187296, "loss": 0.8879, "step": 2512 }, { "epoch": 0.17027043050368, "grad_norm": 4.363710403442383, "learning_rate": 0.00039035049288061336, "loss": 0.9035, "step": 2513 }, { "epoch": 0.170338186345504, "grad_norm": 3.009085178375244, "learning_rate": 0.0003903450164293538, "loss": 0.8504, "step": 2514 }, { "epoch": 0.17040594218732796, "grad_norm": 4.749852657318115, "learning_rate": 0.0003903395399780942, "loss": 1.2194, "step": 2515 }, { "epoch": 0.17047369802915194, "grad_norm": 3.3849310874938965, "learning_rate": 0.0003903340635268346, "loss": 0.9086, "step": 2516 }, { "epoch": 0.17054145387097594, "grad_norm": 3.9758949279785156, "learning_rate": 0.000390328587075575, "loss": 0.9983, "step": 2517 }, { "epoch": 0.17060920971279991, "grad_norm": 3.2182397842407227, "learning_rate": 0.00039032311062431546, "loss": 0.7665, "step": 2518 }, { "epoch": 0.17067696555462392, "grad_norm": 2.9485557079315186, "learning_rate": 0.0003903176341730559, "loss": 0.8612, "step": 2519 }, { "epoch": 0.1707447213964479, "grad_norm": 5.343372344970703, "learning_rate": 0.0003903121577217963, "loss": 1.2397, "step": 2520 }, { "epoch": 0.1708124772382719, "grad_norm": 2.915400743484497, "learning_rate": 0.0003903066812705367, "loss": 0.8531, "step": 2521 }, { "epoch": 0.17088023308009587, "grad_norm": 3.5305919647216797, "learning_rate": 0.0003903012048192771, "loss": 1.0432, "step": 2522 }, { "epoch": 0.17094798892191987, "grad_norm": 3.0573630332946777, "learning_rate": 0.0003902957283680175, "loss": 0.9158, "step": 2523 }, { "epoch": 0.17101574476374384, "grad_norm": 5.164576053619385, "learning_rate": 0.00039029025191675797, "loss": 1.3094, "step": 2524 }, { "epoch": 0.17108350060556785, "grad_norm": 3.91410756111145, "learning_rate": 0.0003902847754654984, "loss": 0.757, "step": 2525 }, { "epoch": 0.17115125644739182, "grad_norm": 4.13603401184082, "learning_rate": 0.0003902792990142388, "loss": 1.0597, "step": 2526 }, { "epoch": 0.17121901228921582, "grad_norm": 4.983597755432129, "learning_rate": 0.0003902738225629792, "loss": 0.8252, "step": 2527 }, { "epoch": 0.1712867681310398, "grad_norm": 3.5605082511901855, "learning_rate": 0.0003902683461117196, "loss": 1.0352, "step": 2528 }, { "epoch": 0.1713545239728638, "grad_norm": 2.942303419113159, "learning_rate": 0.00039026286966046, "loss": 0.6983, "step": 2529 }, { "epoch": 0.17142227981468777, "grad_norm": 4.3727335929870605, "learning_rate": 0.00039025739320920047, "loss": 1.139, "step": 2530 }, { "epoch": 0.17149003565651175, "grad_norm": 3.518662452697754, "learning_rate": 0.00039025191675794087, "loss": 0.7531, "step": 2531 }, { "epoch": 0.17155779149833575, "grad_norm": 3.054861307144165, "learning_rate": 0.00039024644030668127, "loss": 0.9009, "step": 2532 }, { "epoch": 0.17162554734015972, "grad_norm": 2.9179372787475586, "learning_rate": 0.0003902409638554217, "loss": 0.717, "step": 2533 }, { "epoch": 0.17169330318198373, "grad_norm": 3.4097423553466797, "learning_rate": 0.0003902354874041621, "loss": 0.9311, "step": 2534 }, { "epoch": 0.1717610590238077, "grad_norm": 4.934108734130859, "learning_rate": 0.0003902300109529025, "loss": 1.1504, "step": 2535 }, { "epoch": 0.1718288148656317, "grad_norm": 4.038305282592773, "learning_rate": 0.000390224534501643, "loss": 1.1144, "step": 2536 }, { "epoch": 0.17189657070745568, "grad_norm": 3.374476671218872, "learning_rate": 0.0003902190580503834, "loss": 0.7995, "step": 2537 }, { "epoch": 0.17196432654927968, "grad_norm": 3.6881179809570312, "learning_rate": 0.0003902135815991238, "loss": 1.1833, "step": 2538 }, { "epoch": 0.17203208239110365, "grad_norm": 3.08377742767334, "learning_rate": 0.00039020810514786417, "loss": 0.931, "step": 2539 }, { "epoch": 0.17209983823292765, "grad_norm": 3.472093343734741, "learning_rate": 0.0003902026286966046, "loss": 1.1611, "step": 2540 }, { "epoch": 0.17216759407475163, "grad_norm": 4.440534591674805, "learning_rate": 0.0003901971522453451, "loss": 1.1486, "step": 2541 }, { "epoch": 0.17223534991657563, "grad_norm": 11.117719650268555, "learning_rate": 0.0003901916757940855, "loss": 1.2, "step": 2542 }, { "epoch": 0.1723031057583996, "grad_norm": 3.0526633262634277, "learning_rate": 0.0003901861993428259, "loss": 0.9043, "step": 2543 }, { "epoch": 0.1723708616002236, "grad_norm": 2.7024383544921875, "learning_rate": 0.0003901807228915663, "loss": 0.7794, "step": 2544 }, { "epoch": 0.17243861744204758, "grad_norm": 3.114699125289917, "learning_rate": 0.0003901752464403067, "loss": 0.8526, "step": 2545 }, { "epoch": 0.17250637328387156, "grad_norm": 3.6229288578033447, "learning_rate": 0.00039016976998904713, "loss": 1.0887, "step": 2546 }, { "epoch": 0.17257412912569556, "grad_norm": 3.174581527709961, "learning_rate": 0.00039016429353778753, "loss": 0.8004, "step": 2547 }, { "epoch": 0.17264188496751953, "grad_norm": 3.8656678199768066, "learning_rate": 0.00039015881708652793, "loss": 1.0335, "step": 2548 }, { "epoch": 0.17270964080934353, "grad_norm": 3.8717849254608154, "learning_rate": 0.0003901533406352684, "loss": 1.0159, "step": 2549 }, { "epoch": 0.1727773966511675, "grad_norm": 3.6877241134643555, "learning_rate": 0.0003901478641840088, "loss": 1.2281, "step": 2550 }, { "epoch": 0.1728451524929915, "grad_norm": 4.0649542808532715, "learning_rate": 0.0003901423877327492, "loss": 1.054, "step": 2551 }, { "epoch": 0.17291290833481548, "grad_norm": 2.8179779052734375, "learning_rate": 0.00039013691128148963, "loss": 0.8153, "step": 2552 }, { "epoch": 0.1729806641766395, "grad_norm": 3.6458847522735596, "learning_rate": 0.00039013143483023003, "loss": 0.9427, "step": 2553 }, { "epoch": 0.17304842001846346, "grad_norm": 4.747622013092041, "learning_rate": 0.00039012595837897043, "loss": 1.1424, "step": 2554 }, { "epoch": 0.17311617586028746, "grad_norm": 3.742690324783325, "learning_rate": 0.00039012048192771083, "loss": 0.7889, "step": 2555 }, { "epoch": 0.17318393170211144, "grad_norm": 2.595752716064453, "learning_rate": 0.00039011500547645123, "loss": 0.84, "step": 2556 }, { "epoch": 0.17325168754393544, "grad_norm": 3.23945689201355, "learning_rate": 0.00039010952902519174, "loss": 1.0411, "step": 2557 }, { "epoch": 0.17331944338575941, "grad_norm": 3.4730265140533447, "learning_rate": 0.00039010405257393214, "loss": 1.0891, "step": 2558 }, { "epoch": 0.17338719922758342, "grad_norm": 3.289047956466675, "learning_rate": 0.00039009857612267254, "loss": 0.9431, "step": 2559 }, { "epoch": 0.1734549550694074, "grad_norm": 4.19510555267334, "learning_rate": 0.00039009309967141294, "loss": 1.0412, "step": 2560 }, { "epoch": 0.17352271091123136, "grad_norm": 4.516744613647461, "learning_rate": 0.00039008762322015333, "loss": 1.2181, "step": 2561 }, { "epoch": 0.17359046675305537, "grad_norm": 3.5150797367095947, "learning_rate": 0.0003900821467688938, "loss": 1.0449, "step": 2562 }, { "epoch": 0.17365822259487934, "grad_norm": 3.157424211502075, "learning_rate": 0.0003900766703176342, "loss": 0.9585, "step": 2563 }, { "epoch": 0.17372597843670334, "grad_norm": 3.0821104049682617, "learning_rate": 0.00039007119386637464, "loss": 0.9302, "step": 2564 }, { "epoch": 0.17379373427852732, "grad_norm": 4.197528839111328, "learning_rate": 0.00039006571741511504, "loss": 0.9406, "step": 2565 }, { "epoch": 0.17386149012035132, "grad_norm": 3.539961576461792, "learning_rate": 0.00039006024096385544, "loss": 0.9513, "step": 2566 }, { "epoch": 0.1739292459621753, "grad_norm": 4.81611442565918, "learning_rate": 0.00039005476451259584, "loss": 1.1297, "step": 2567 }, { "epoch": 0.1739970018039993, "grad_norm": 3.3031651973724365, "learning_rate": 0.0003900492880613363, "loss": 0.8257, "step": 2568 }, { "epoch": 0.17406475764582327, "grad_norm": 3.876675605773926, "learning_rate": 0.0003900438116100767, "loss": 1.1138, "step": 2569 }, { "epoch": 0.17413251348764727, "grad_norm": 3.430529832839966, "learning_rate": 0.0003900383351588171, "loss": 1.0249, "step": 2570 }, { "epoch": 0.17420026932947125, "grad_norm": 3.6733033657073975, "learning_rate": 0.0003900328587075575, "loss": 0.929, "step": 2571 }, { "epoch": 0.17426802517129525, "grad_norm": 3.7165138721466064, "learning_rate": 0.00039002738225629794, "loss": 0.923, "step": 2572 }, { "epoch": 0.17433578101311922, "grad_norm": 3.363859176635742, "learning_rate": 0.00039002190580503834, "loss": 0.8085, "step": 2573 }, { "epoch": 0.17440353685494323, "grad_norm": 3.9822096824645996, "learning_rate": 0.0003900164293537788, "loss": 1.0058, "step": 2574 }, { "epoch": 0.1744712926967672, "grad_norm": 4.498959541320801, "learning_rate": 0.0003900109529025192, "loss": 0.8737, "step": 2575 }, { "epoch": 0.17453904853859117, "grad_norm": 2.949816942214966, "learning_rate": 0.0003900054764512596, "loss": 0.8397, "step": 2576 }, { "epoch": 0.17460680438041518, "grad_norm": 3.4892256259918213, "learning_rate": 0.00039, "loss": 0.8412, "step": 2577 }, { "epoch": 0.17467456022223915, "grad_norm": 3.707641124725342, "learning_rate": 0.00038999452354874045, "loss": 0.7764, "step": 2578 }, { "epoch": 0.17474231606406315, "grad_norm": 3.977794647216797, "learning_rate": 0.00038998904709748085, "loss": 1.0539, "step": 2579 }, { "epoch": 0.17481007190588713, "grad_norm": 3.536850929260254, "learning_rate": 0.0003899835706462213, "loss": 0.901, "step": 2580 }, { "epoch": 0.17487782774771113, "grad_norm": 4.956271171569824, "learning_rate": 0.0003899780941949617, "loss": 1.0666, "step": 2581 }, { "epoch": 0.1749455835895351, "grad_norm": 4.243458271026611, "learning_rate": 0.0003899726177437021, "loss": 0.8712, "step": 2582 }, { "epoch": 0.1750133394313591, "grad_norm": 4.35375452041626, "learning_rate": 0.0003899671412924425, "loss": 0.9773, "step": 2583 }, { "epoch": 0.17508109527318308, "grad_norm": 3.2398815155029297, "learning_rate": 0.00038996166484118295, "loss": 1.0486, "step": 2584 }, { "epoch": 0.17514885111500708, "grad_norm": 5.159060955047607, "learning_rate": 0.00038995618838992335, "loss": 1.0913, "step": 2585 }, { "epoch": 0.17521660695683106, "grad_norm": 3.0955114364624023, "learning_rate": 0.00038995071193866375, "loss": 0.9304, "step": 2586 }, { "epoch": 0.17528436279865506, "grad_norm": 3.279528856277466, "learning_rate": 0.00038994523548740415, "loss": 0.7707, "step": 2587 }, { "epoch": 0.17535211864047903, "grad_norm": 2.55900502204895, "learning_rate": 0.0003899397590361446, "loss": 0.7498, "step": 2588 }, { "epoch": 0.17541987448230303, "grad_norm": 3.802781105041504, "learning_rate": 0.000389934282584885, "loss": 1.1252, "step": 2589 }, { "epoch": 0.175487630324127, "grad_norm": 2.3951821327209473, "learning_rate": 0.00038992880613362545, "loss": 0.7257, "step": 2590 }, { "epoch": 0.17555538616595098, "grad_norm": 3.6497018337249756, "learning_rate": 0.00038992332968236585, "loss": 0.9651, "step": 2591 }, { "epoch": 0.17562314200777498, "grad_norm": 4.077710151672363, "learning_rate": 0.00038991785323110625, "loss": 1.0033, "step": 2592 }, { "epoch": 0.17569089784959896, "grad_norm": 3.225461959838867, "learning_rate": 0.00038991237677984665, "loss": 1.0121, "step": 2593 }, { "epoch": 0.17575865369142296, "grad_norm": 2.669999122619629, "learning_rate": 0.00038990690032858705, "loss": 0.8283, "step": 2594 }, { "epoch": 0.17582640953324694, "grad_norm": 3.697070837020874, "learning_rate": 0.00038990142387732756, "loss": 1.0134, "step": 2595 }, { "epoch": 0.17589416537507094, "grad_norm": 3.4952752590179443, "learning_rate": 0.00038989594742606796, "loss": 0.8584, "step": 2596 }, { "epoch": 0.1759619212168949, "grad_norm": 2.3571763038635254, "learning_rate": 0.00038989047097480836, "loss": 0.5339, "step": 2597 }, { "epoch": 0.17602967705871891, "grad_norm": 3.5802249908447266, "learning_rate": 0.00038988499452354876, "loss": 1.0767, "step": 2598 }, { "epoch": 0.1760974329005429, "grad_norm": 3.3723952770233154, "learning_rate": 0.00038987951807228916, "loss": 0.922, "step": 2599 }, { "epoch": 0.1761651887423669, "grad_norm": 3.4926297664642334, "learning_rate": 0.0003898740416210296, "loss": 1.0347, "step": 2600 }, { "epoch": 0.17623294458419086, "grad_norm": 2.861858606338501, "learning_rate": 0.00038986856516977, "loss": 0.7524, "step": 2601 }, { "epoch": 0.17630070042601487, "grad_norm": 3.37429141998291, "learning_rate": 0.0003898630887185104, "loss": 0.8898, "step": 2602 }, { "epoch": 0.17636845626783884, "grad_norm": 2.9697158336639404, "learning_rate": 0.00038985761226725086, "loss": 0.9308, "step": 2603 }, { "epoch": 0.17643621210966284, "grad_norm": 2.912766456604004, "learning_rate": 0.00038985213581599126, "loss": 0.8449, "step": 2604 }, { "epoch": 0.17650396795148682, "grad_norm": 2.9587221145629883, "learning_rate": 0.00038984665936473166, "loss": 0.8446, "step": 2605 }, { "epoch": 0.1765717237933108, "grad_norm": 2.7218613624572754, "learning_rate": 0.0003898411829134721, "loss": 0.7121, "step": 2606 }, { "epoch": 0.1766394796351348, "grad_norm": 3.5207486152648926, "learning_rate": 0.0003898357064622125, "loss": 0.9856, "step": 2607 }, { "epoch": 0.17670723547695877, "grad_norm": 3.294046401977539, "learning_rate": 0.0003898302300109529, "loss": 0.9977, "step": 2608 }, { "epoch": 0.17677499131878277, "grad_norm": 3.4100379943847656, "learning_rate": 0.0003898247535596933, "loss": 1.0713, "step": 2609 }, { "epoch": 0.17684274716060674, "grad_norm": 3.8984200954437256, "learning_rate": 0.0003898192771084337, "loss": 1.2285, "step": 2610 }, { "epoch": 0.17691050300243075, "grad_norm": 2.80888032913208, "learning_rate": 0.00038981380065717416, "loss": 0.7347, "step": 2611 }, { "epoch": 0.17697825884425472, "grad_norm": 2.8719544410705566, "learning_rate": 0.0003898083242059146, "loss": 0.7561, "step": 2612 }, { "epoch": 0.17704601468607872, "grad_norm": 4.0782551765441895, "learning_rate": 0.000389802847754655, "loss": 1.0882, "step": 2613 }, { "epoch": 0.1771137705279027, "grad_norm": 3.844801902770996, "learning_rate": 0.0003897973713033954, "loss": 0.8765, "step": 2614 }, { "epoch": 0.1771815263697267, "grad_norm": 4.475718021392822, "learning_rate": 0.0003897918948521358, "loss": 0.9182, "step": 2615 }, { "epoch": 0.17724928221155067, "grad_norm": 3.4106149673461914, "learning_rate": 0.00038978641840087627, "loss": 0.9449, "step": 2616 }, { "epoch": 0.17731703805337468, "grad_norm": 3.4275550842285156, "learning_rate": 0.00038978094194961667, "loss": 0.8974, "step": 2617 }, { "epoch": 0.17738479389519865, "grad_norm": 3.4054598808288574, "learning_rate": 0.00038977546549835707, "loss": 1.0757, "step": 2618 }, { "epoch": 0.17745254973702265, "grad_norm": 3.75724458694458, "learning_rate": 0.0003897699890470975, "loss": 1.12, "step": 2619 }, { "epoch": 0.17752030557884663, "grad_norm": 6.18004846572876, "learning_rate": 0.0003897645125958379, "loss": 1.2614, "step": 2620 }, { "epoch": 0.1775880614206706, "grad_norm": 3.289705276489258, "learning_rate": 0.0003897590361445783, "loss": 1.1055, "step": 2621 }, { "epoch": 0.1776558172624946, "grad_norm": 3.257082939147949, "learning_rate": 0.00038975355969331877, "loss": 0.9413, "step": 2622 }, { "epoch": 0.17772357310431858, "grad_norm": 3.874941349029541, "learning_rate": 0.00038974808324205917, "loss": 0.9413, "step": 2623 }, { "epoch": 0.17779132894614258, "grad_norm": 2.6239776611328125, "learning_rate": 0.00038974260679079957, "loss": 0.8165, "step": 2624 }, { "epoch": 0.17785908478796655, "grad_norm": 2.9095847606658936, "learning_rate": 0.00038973713033953997, "loss": 1.0151, "step": 2625 }, { "epoch": 0.17792684062979056, "grad_norm": 3.940357208251953, "learning_rate": 0.0003897316538882804, "loss": 0.893, "step": 2626 }, { "epoch": 0.17799459647161453, "grad_norm": 4.15380859375, "learning_rate": 0.0003897261774370208, "loss": 0.9203, "step": 2627 }, { "epoch": 0.17806235231343853, "grad_norm": 2.5981051921844482, "learning_rate": 0.0003897207009857613, "loss": 0.7267, "step": 2628 }, { "epoch": 0.1781301081552625, "grad_norm": 3.4140498638153076, "learning_rate": 0.0003897152245345017, "loss": 1.1709, "step": 2629 }, { "epoch": 0.1781978639970865, "grad_norm": 5.037257194519043, "learning_rate": 0.00038970974808324207, "loss": 0.9983, "step": 2630 }, { "epoch": 0.17826561983891048, "grad_norm": 3.5703980922698975, "learning_rate": 0.00038970427163198247, "loss": 0.914, "step": 2631 }, { "epoch": 0.17833337568073448, "grad_norm": 3.9696242809295654, "learning_rate": 0.00038969879518072287, "loss": 0.8462, "step": 2632 }, { "epoch": 0.17840113152255846, "grad_norm": 3.3272550106048584, "learning_rate": 0.0003896933187294633, "loss": 0.9579, "step": 2633 }, { "epoch": 0.17846888736438246, "grad_norm": 2.634965419769287, "learning_rate": 0.0003896878422782038, "loss": 0.8297, "step": 2634 }, { "epoch": 0.17853664320620644, "grad_norm": 4.258874893188477, "learning_rate": 0.0003896823658269442, "loss": 1.1595, "step": 2635 }, { "epoch": 0.1786043990480304, "grad_norm": 4.283937454223633, "learning_rate": 0.0003896768893756846, "loss": 0.8804, "step": 2636 }, { "epoch": 0.1786721548898544, "grad_norm": 3.6384127140045166, "learning_rate": 0.000389671412924425, "loss": 1.2789, "step": 2637 }, { "epoch": 0.1787399107316784, "grad_norm": 5.409982204437256, "learning_rate": 0.00038966593647316543, "loss": 0.9387, "step": 2638 }, { "epoch": 0.1788076665735024, "grad_norm": 5.593501091003418, "learning_rate": 0.00038966046002190583, "loss": 0.9363, "step": 2639 }, { "epoch": 0.17887542241532636, "grad_norm": 4.182191848754883, "learning_rate": 0.00038965498357064623, "loss": 0.8834, "step": 2640 }, { "epoch": 0.17894317825715036, "grad_norm": 3.317460298538208, "learning_rate": 0.0003896495071193866, "loss": 1.0, "step": 2641 }, { "epoch": 0.17901093409897434, "grad_norm": 4.710450649261475, "learning_rate": 0.0003896440306681271, "loss": 0.8489, "step": 2642 }, { "epoch": 0.17907868994079834, "grad_norm": 3.936033010482788, "learning_rate": 0.0003896385542168675, "loss": 0.9309, "step": 2643 }, { "epoch": 0.17914644578262232, "grad_norm": 3.5020408630371094, "learning_rate": 0.00038963307776560793, "loss": 0.9269, "step": 2644 }, { "epoch": 0.17921420162444632, "grad_norm": 7.226351261138916, "learning_rate": 0.00038962760131434833, "loss": 0.7649, "step": 2645 }, { "epoch": 0.1792819574662703, "grad_norm": 2.956892490386963, "learning_rate": 0.00038962212486308873, "loss": 0.6984, "step": 2646 }, { "epoch": 0.1793497133080943, "grad_norm": 4.771556377410889, "learning_rate": 0.00038961664841182913, "loss": 1.158, "step": 2647 }, { "epoch": 0.17941746914991827, "grad_norm": 2.7597439289093018, "learning_rate": 0.00038961117196056953, "loss": 0.7287, "step": 2648 }, { "epoch": 0.17948522499174227, "grad_norm": 2.55896258354187, "learning_rate": 0.00038960569550931, "loss": 0.8061, "step": 2649 }, { "epoch": 0.17955298083356624, "grad_norm": 3.6491754055023193, "learning_rate": 0.00038960021905805044, "loss": 0.9915, "step": 2650 }, { "epoch": 0.17962073667539022, "grad_norm": 2.76542592048645, "learning_rate": 0.00038959474260679084, "loss": 0.8947, "step": 2651 }, { "epoch": 0.17968849251721422, "grad_norm": 4.276484966278076, "learning_rate": 0.00038958926615553124, "loss": 0.9419, "step": 2652 }, { "epoch": 0.1797562483590382, "grad_norm": 2.909879684448242, "learning_rate": 0.00038958378970427163, "loss": 0.6983, "step": 2653 }, { "epoch": 0.1798240042008622, "grad_norm": 2.697392702102661, "learning_rate": 0.0003895783132530121, "loss": 0.6049, "step": 2654 }, { "epoch": 0.17989176004268617, "grad_norm": 2.8968899250030518, "learning_rate": 0.0003895728368017525, "loss": 0.881, "step": 2655 }, { "epoch": 0.17995951588451017, "grad_norm": 3.7650716304779053, "learning_rate": 0.0003895673603504929, "loss": 1.1511, "step": 2656 }, { "epoch": 0.18002727172633415, "grad_norm": 3.2984042167663574, "learning_rate": 0.00038956188389923334, "loss": 0.9816, "step": 2657 }, { "epoch": 0.18009502756815815, "grad_norm": 3.3338100910186768, "learning_rate": 0.00038955640744797374, "loss": 0.8021, "step": 2658 }, { "epoch": 0.18016278340998212, "grad_norm": 3.511577606201172, "learning_rate": 0.00038955093099671414, "loss": 1.0287, "step": 2659 }, { "epoch": 0.18023053925180613, "grad_norm": 3.08599591255188, "learning_rate": 0.0003895454545454546, "loss": 0.8372, "step": 2660 }, { "epoch": 0.1802982950936301, "grad_norm": 4.34332799911499, "learning_rate": 0.000389539978094195, "loss": 1.3726, "step": 2661 }, { "epoch": 0.1803660509354541, "grad_norm": 3.9376471042633057, "learning_rate": 0.0003895345016429354, "loss": 0.893, "step": 2662 }, { "epoch": 0.18043380677727808, "grad_norm": 3.613227367401123, "learning_rate": 0.0003895290251916758, "loss": 1.067, "step": 2663 }, { "epoch": 0.18050156261910208, "grad_norm": 4.009829044342041, "learning_rate": 0.0003895235487404162, "loss": 1.0189, "step": 2664 }, { "epoch": 0.18056931846092605, "grad_norm": 2.7565722465515137, "learning_rate": 0.00038951807228915664, "loss": 0.9221, "step": 2665 }, { "epoch": 0.18063707430275003, "grad_norm": 3.824349880218506, "learning_rate": 0.0003895125958378971, "loss": 1.1327, "step": 2666 }, { "epoch": 0.18070483014457403, "grad_norm": 3.4602041244506836, "learning_rate": 0.0003895071193866375, "loss": 0.9262, "step": 2667 }, { "epoch": 0.180772585986398, "grad_norm": 4.573212146759033, "learning_rate": 0.0003895016429353779, "loss": 1.0516, "step": 2668 }, { "epoch": 0.180840341828222, "grad_norm": 3.433784008026123, "learning_rate": 0.0003894961664841183, "loss": 0.9526, "step": 2669 }, { "epoch": 0.18090809767004598, "grad_norm": 3.506720781326294, "learning_rate": 0.0003894906900328587, "loss": 0.9541, "step": 2670 }, { "epoch": 0.18097585351186998, "grad_norm": 3.599555015563965, "learning_rate": 0.00038948521358159915, "loss": 1.0681, "step": 2671 }, { "epoch": 0.18104360935369396, "grad_norm": 3.4419429302215576, "learning_rate": 0.00038947973713033954, "loss": 1.1236, "step": 2672 }, { "epoch": 0.18111136519551796, "grad_norm": 3.681448221206665, "learning_rate": 0.00038947426067908, "loss": 0.9423, "step": 2673 }, { "epoch": 0.18117912103734193, "grad_norm": 3.2584047317504883, "learning_rate": 0.0003894687842278204, "loss": 0.9144, "step": 2674 }, { "epoch": 0.18124687687916594, "grad_norm": 4.071553707122803, "learning_rate": 0.0003894633077765608, "loss": 1.1134, "step": 2675 }, { "epoch": 0.1813146327209899, "grad_norm": 3.0014286041259766, "learning_rate": 0.00038945783132530125, "loss": 0.9765, "step": 2676 }, { "epoch": 0.1813823885628139, "grad_norm": 3.6688241958618164, "learning_rate": 0.00038945235487404165, "loss": 1.1312, "step": 2677 }, { "epoch": 0.1814501444046379, "grad_norm": 3.805133104324341, "learning_rate": 0.00038944687842278205, "loss": 1.153, "step": 2678 }, { "epoch": 0.1815179002464619, "grad_norm": 7.35417366027832, "learning_rate": 0.00038944140197152245, "loss": 1.0345, "step": 2679 }, { "epoch": 0.18158565608828586, "grad_norm": 3.128613233566284, "learning_rate": 0.00038943592552026285, "loss": 0.9284, "step": 2680 }, { "epoch": 0.18165341193010984, "grad_norm": 2.820746660232544, "learning_rate": 0.0003894304490690033, "loss": 0.7568, "step": 2681 }, { "epoch": 0.18172116777193384, "grad_norm": 4.232748508453369, "learning_rate": 0.00038942497261774375, "loss": 1.1921, "step": 2682 }, { "epoch": 0.1817889236137578, "grad_norm": 3.090925931930542, "learning_rate": 0.00038941949616648415, "loss": 0.9605, "step": 2683 }, { "epoch": 0.18185667945558182, "grad_norm": 3.2545409202575684, "learning_rate": 0.00038941401971522455, "loss": 0.7772, "step": 2684 }, { "epoch": 0.1819244352974058, "grad_norm": 2.8999624252319336, "learning_rate": 0.00038940854326396495, "loss": 0.7736, "step": 2685 }, { "epoch": 0.1819921911392298, "grad_norm": 3.194016218185425, "learning_rate": 0.00038940306681270535, "loss": 0.9554, "step": 2686 }, { "epoch": 0.18205994698105377, "grad_norm": 3.151383399963379, "learning_rate": 0.0003893975903614458, "loss": 1.0019, "step": 2687 }, { "epoch": 0.18212770282287777, "grad_norm": 3.5041158199310303, "learning_rate": 0.0003893921139101862, "loss": 0.9024, "step": 2688 }, { "epoch": 0.18219545866470174, "grad_norm": 3.3110926151275635, "learning_rate": 0.00038938663745892666, "loss": 0.9726, "step": 2689 }, { "epoch": 0.18226321450652574, "grad_norm": 4.077019691467285, "learning_rate": 0.00038938116100766706, "loss": 1.0472, "step": 2690 }, { "epoch": 0.18233097034834972, "grad_norm": 3.9141652584075928, "learning_rate": 0.00038937568455640745, "loss": 1.2568, "step": 2691 }, { "epoch": 0.18239872619017372, "grad_norm": 3.8966596126556396, "learning_rate": 0.0003893702081051479, "loss": 0.9997, "step": 2692 }, { "epoch": 0.1824664820319977, "grad_norm": 3.6254265308380127, "learning_rate": 0.0003893647316538883, "loss": 1.2175, "step": 2693 }, { "epoch": 0.18253423787382167, "grad_norm": 3.1052329540252686, "learning_rate": 0.0003893592552026287, "loss": 1.0351, "step": 2694 }, { "epoch": 0.18260199371564567, "grad_norm": 3.3898918628692627, "learning_rate": 0.0003893537787513691, "loss": 1.1057, "step": 2695 }, { "epoch": 0.18266974955746965, "grad_norm": 2.928323984146118, "learning_rate": 0.00038934830230010956, "loss": 0.8844, "step": 2696 }, { "epoch": 0.18273750539929365, "grad_norm": 2.679624080657959, "learning_rate": 0.00038934282584884996, "loss": 0.85, "step": 2697 }, { "epoch": 0.18280526124111762, "grad_norm": 2.8958041667938232, "learning_rate": 0.0003893373493975904, "loss": 0.8185, "step": 2698 }, { "epoch": 0.18287301708294162, "grad_norm": 3.57362961769104, "learning_rate": 0.0003893318729463308, "loss": 1.1278, "step": 2699 }, { "epoch": 0.1829407729247656, "grad_norm": 4.073363304138184, "learning_rate": 0.0003893263964950712, "loss": 1.1645, "step": 2700 }, { "epoch": 0.1830085287665896, "grad_norm": 3.3202733993530273, "learning_rate": 0.0003893209200438116, "loss": 1.0613, "step": 2701 }, { "epoch": 0.18307628460841358, "grad_norm": 3.0552022457122803, "learning_rate": 0.000389315443592552, "loss": 0.8995, "step": 2702 }, { "epoch": 0.18314404045023758, "grad_norm": 3.6667277812957764, "learning_rate": 0.00038930996714129246, "loss": 1.0548, "step": 2703 }, { "epoch": 0.18321179629206155, "grad_norm": 2.921941041946411, "learning_rate": 0.0003893044906900329, "loss": 0.8421, "step": 2704 }, { "epoch": 0.18327955213388555, "grad_norm": 3.0407462120056152, "learning_rate": 0.0003892990142387733, "loss": 0.8049, "step": 2705 }, { "epoch": 0.18334730797570953, "grad_norm": 3.932305335998535, "learning_rate": 0.0003892935377875137, "loss": 0.8372, "step": 2706 }, { "epoch": 0.18341506381753353, "grad_norm": 4.510745048522949, "learning_rate": 0.0003892880613362541, "loss": 1.4213, "step": 2707 }, { "epoch": 0.1834828196593575, "grad_norm": 3.7805044651031494, "learning_rate": 0.0003892825848849945, "loss": 1.235, "step": 2708 }, { "epoch": 0.18355057550118148, "grad_norm": 3.113142728805542, "learning_rate": 0.00038927710843373497, "loss": 0.9122, "step": 2709 }, { "epoch": 0.18361833134300548, "grad_norm": 3.3371307849884033, "learning_rate": 0.00038927163198247537, "loss": 1.185, "step": 2710 }, { "epoch": 0.18368608718482946, "grad_norm": 3.1075308322906494, "learning_rate": 0.00038926615553121576, "loss": 0.9452, "step": 2711 }, { "epoch": 0.18375384302665346, "grad_norm": 3.0181798934936523, "learning_rate": 0.0003892606790799562, "loss": 0.9451, "step": 2712 }, { "epoch": 0.18382159886847743, "grad_norm": 2.9030022621154785, "learning_rate": 0.0003892552026286966, "loss": 0.6971, "step": 2713 }, { "epoch": 0.18388935471030143, "grad_norm": 3.294637680053711, "learning_rate": 0.00038924972617743707, "loss": 0.8706, "step": 2714 }, { "epoch": 0.1839571105521254, "grad_norm": 3.6005096435546875, "learning_rate": 0.00038924424972617747, "loss": 0.6631, "step": 2715 }, { "epoch": 0.1840248663939494, "grad_norm": 2.861650228500366, "learning_rate": 0.00038923877327491787, "loss": 0.8788, "step": 2716 }, { "epoch": 0.18409262223577338, "grad_norm": 2.8314208984375, "learning_rate": 0.00038923329682365827, "loss": 0.8282, "step": 2717 }, { "epoch": 0.1841603780775974, "grad_norm": 3.6045475006103516, "learning_rate": 0.00038922782037239867, "loss": 0.8872, "step": 2718 }, { "epoch": 0.18422813391942136, "grad_norm": 4.004055500030518, "learning_rate": 0.0003892223439211391, "loss": 1.1149, "step": 2719 }, { "epoch": 0.18429588976124536, "grad_norm": 3.295501708984375, "learning_rate": 0.0003892168674698796, "loss": 1.0293, "step": 2720 }, { "epoch": 0.18436364560306934, "grad_norm": 3.769365072250366, "learning_rate": 0.00038921139101862, "loss": 1.042, "step": 2721 }, { "epoch": 0.18443140144489334, "grad_norm": 3.303382396697998, "learning_rate": 0.00038920591456736037, "loss": 1.0317, "step": 2722 }, { "epoch": 0.1844991572867173, "grad_norm": 5.053825378417969, "learning_rate": 0.00038920043811610077, "loss": 1.2384, "step": 2723 }, { "epoch": 0.1845669131285413, "grad_norm": 3.6436846256256104, "learning_rate": 0.00038919496166484117, "loss": 0.8183, "step": 2724 }, { "epoch": 0.1846346689703653, "grad_norm": 2.6752846240997314, "learning_rate": 0.0003891894852135816, "loss": 0.792, "step": 2725 }, { "epoch": 0.18470242481218926, "grad_norm": 3.5337884426116943, "learning_rate": 0.000389184008762322, "loss": 1.1572, "step": 2726 }, { "epoch": 0.18477018065401327, "grad_norm": 3.323737382888794, "learning_rate": 0.0003891785323110625, "loss": 0.9435, "step": 2727 }, { "epoch": 0.18483793649583724, "grad_norm": 3.610866069793701, "learning_rate": 0.0003891730558598029, "loss": 1.1143, "step": 2728 }, { "epoch": 0.18490569233766124, "grad_norm": 2.9186441898345947, "learning_rate": 0.0003891675794085433, "loss": 0.8383, "step": 2729 }, { "epoch": 0.18497344817948522, "grad_norm": 4.304622173309326, "learning_rate": 0.00038916210295728373, "loss": 1.1049, "step": 2730 }, { "epoch": 0.18504120402130922, "grad_norm": 3.8744423389434814, "learning_rate": 0.00038915662650602413, "loss": 0.9221, "step": 2731 }, { "epoch": 0.1851089598631332, "grad_norm": 3.7568271160125732, "learning_rate": 0.00038915115005476453, "loss": 1.0329, "step": 2732 }, { "epoch": 0.1851767157049572, "grad_norm": 5.976889133453369, "learning_rate": 0.0003891456736035049, "loss": 0.8101, "step": 2733 }, { "epoch": 0.18524447154678117, "grad_norm": 2.8122339248657227, "learning_rate": 0.0003891401971522453, "loss": 0.8191, "step": 2734 }, { "epoch": 0.18531222738860517, "grad_norm": 3.552717685699463, "learning_rate": 0.0003891347207009858, "loss": 1.1971, "step": 2735 }, { "epoch": 0.18537998323042915, "grad_norm": 2.736872911453247, "learning_rate": 0.00038912924424972623, "loss": 0.9329, "step": 2736 }, { "epoch": 0.18544773907225315, "grad_norm": 2.921700954437256, "learning_rate": 0.00038912376779846663, "loss": 0.868, "step": 2737 }, { "epoch": 0.18551549491407712, "grad_norm": 3.485687732696533, "learning_rate": 0.00038911829134720703, "loss": 0.9068, "step": 2738 }, { "epoch": 0.1855832507559011, "grad_norm": 3.2625999450683594, "learning_rate": 0.00038911281489594743, "loss": 0.7244, "step": 2739 }, { "epoch": 0.1856510065977251, "grad_norm": 3.078843355178833, "learning_rate": 0.00038910733844468783, "loss": 0.8292, "step": 2740 }, { "epoch": 0.18571876243954907, "grad_norm": 3.0648934841156006, "learning_rate": 0.0003891018619934283, "loss": 0.8904, "step": 2741 }, { "epoch": 0.18578651828137308, "grad_norm": 4.389811038970947, "learning_rate": 0.0003890963855421687, "loss": 0.8412, "step": 2742 }, { "epoch": 0.18585427412319705, "grad_norm": 3.422853469848633, "learning_rate": 0.00038909090909090914, "loss": 1.0376, "step": 2743 }, { "epoch": 0.18592202996502105, "grad_norm": 3.5476505756378174, "learning_rate": 0.00038908543263964953, "loss": 0.953, "step": 2744 }, { "epoch": 0.18598978580684503, "grad_norm": 3.744685649871826, "learning_rate": 0.00038907995618838993, "loss": 0.7457, "step": 2745 }, { "epoch": 0.18605754164866903, "grad_norm": 3.5987589359283447, "learning_rate": 0.00038907447973713033, "loss": 0.8563, "step": 2746 }, { "epoch": 0.186125297490493, "grad_norm": 3.329843759536743, "learning_rate": 0.0003890690032858708, "loss": 0.9502, "step": 2747 }, { "epoch": 0.186193053332317, "grad_norm": 3.714345693588257, "learning_rate": 0.0003890635268346112, "loss": 1.0397, "step": 2748 }, { "epoch": 0.18626080917414098, "grad_norm": 3.18338680267334, "learning_rate": 0.0003890580503833516, "loss": 0.6911, "step": 2749 }, { "epoch": 0.18632856501596498, "grad_norm": 3.047652006149292, "learning_rate": 0.000389052573932092, "loss": 0.9154, "step": 2750 }, { "epoch": 0.18639632085778896, "grad_norm": 2.831117630004883, "learning_rate": 0.00038904709748083244, "loss": 0.7986, "step": 2751 }, { "epoch": 0.18646407669961296, "grad_norm": 2.417776107788086, "learning_rate": 0.0003890416210295729, "loss": 0.8015, "step": 2752 }, { "epoch": 0.18653183254143693, "grad_norm": 8.800686836242676, "learning_rate": 0.0003890361445783133, "loss": 1.11, "step": 2753 }, { "epoch": 0.1865995883832609, "grad_norm": 3.318408727645874, "learning_rate": 0.0003890306681270537, "loss": 0.8978, "step": 2754 }, { "epoch": 0.1866673442250849, "grad_norm": 4.237911224365234, "learning_rate": 0.0003890251916757941, "loss": 0.9389, "step": 2755 }, { "epoch": 0.18673510006690888, "grad_norm": 3.7788336277008057, "learning_rate": 0.0003890197152245345, "loss": 1.0492, "step": 2756 }, { "epoch": 0.18680285590873288, "grad_norm": 3.272289752960205, "learning_rate": 0.00038901423877327494, "loss": 0.8189, "step": 2757 }, { "epoch": 0.18687061175055686, "grad_norm": 3.770719528198242, "learning_rate": 0.0003890087623220154, "loss": 1.0237, "step": 2758 }, { "epoch": 0.18693836759238086, "grad_norm": 3.5051212310791016, "learning_rate": 0.0003890032858707558, "loss": 0.8965, "step": 2759 }, { "epoch": 0.18700612343420484, "grad_norm": 3.461427688598633, "learning_rate": 0.0003889978094194962, "loss": 0.9546, "step": 2760 }, { "epoch": 0.18707387927602884, "grad_norm": 3.920598268508911, "learning_rate": 0.0003889923329682366, "loss": 0.8705, "step": 2761 }, { "epoch": 0.1871416351178528, "grad_norm": 3.849118232727051, "learning_rate": 0.000388986856516977, "loss": 1.0791, "step": 2762 }, { "epoch": 0.1872093909596768, "grad_norm": 3.188839912414551, "learning_rate": 0.00038898138006571745, "loss": 0.8578, "step": 2763 }, { "epoch": 0.1872771468015008, "grad_norm": 3.7328360080718994, "learning_rate": 0.00038897590361445784, "loss": 1.1852, "step": 2764 }, { "epoch": 0.1873449026433248, "grad_norm": 4.0043416023254395, "learning_rate": 0.00038897042716319824, "loss": 0.8806, "step": 2765 }, { "epoch": 0.18741265848514876, "grad_norm": 2.7096457481384277, "learning_rate": 0.0003889649507119387, "loss": 0.8455, "step": 2766 }, { "epoch": 0.18748041432697277, "grad_norm": 3.324594020843506, "learning_rate": 0.0003889594742606791, "loss": 1.0915, "step": 2767 }, { "epoch": 0.18754817016879674, "grad_norm": 3.4153079986572266, "learning_rate": 0.00038895399780941955, "loss": 1.2219, "step": 2768 }, { "epoch": 0.18761592601062071, "grad_norm": 3.73614764213562, "learning_rate": 0.00038894852135815995, "loss": 0.8139, "step": 2769 }, { "epoch": 0.18768368185244472, "grad_norm": 3.6013848781585693, "learning_rate": 0.00038894304490690035, "loss": 1.1202, "step": 2770 }, { "epoch": 0.1877514376942687, "grad_norm": 3.0717055797576904, "learning_rate": 0.00038893756845564075, "loss": 0.9865, "step": 2771 }, { "epoch": 0.1878191935360927, "grad_norm": 4.379116058349609, "learning_rate": 0.00038893209200438115, "loss": 1.0945, "step": 2772 }, { "epoch": 0.18788694937791667, "grad_norm": 2.782845973968506, "learning_rate": 0.0003889266155531216, "loss": 0.7597, "step": 2773 }, { "epoch": 0.18795470521974067, "grad_norm": 3.087285041809082, "learning_rate": 0.00038892113910186205, "loss": 0.8965, "step": 2774 }, { "epoch": 0.18802246106156464, "grad_norm": 3.3503403663635254, "learning_rate": 0.00038891566265060245, "loss": 0.7468, "step": 2775 }, { "epoch": 0.18809021690338865, "grad_norm": 3.1630403995513916, "learning_rate": 0.00038891018619934285, "loss": 0.9173, "step": 2776 }, { "epoch": 0.18815797274521262, "grad_norm": 3.3000705242156982, "learning_rate": 0.00038890470974808325, "loss": 0.823, "step": 2777 }, { "epoch": 0.18822572858703662, "grad_norm": 3.7373993396759033, "learning_rate": 0.00038889923329682365, "loss": 1.2386, "step": 2778 }, { "epoch": 0.1882934844288606, "grad_norm": 4.010168075561523, "learning_rate": 0.0003888937568455641, "loss": 1.0299, "step": 2779 }, { "epoch": 0.1883612402706846, "grad_norm": 3.214586019515991, "learning_rate": 0.0003888882803943045, "loss": 0.8643, "step": 2780 }, { "epoch": 0.18842899611250857, "grad_norm": 3.3871943950653076, "learning_rate": 0.0003888828039430449, "loss": 0.7314, "step": 2781 }, { "epoch": 0.18849675195433258, "grad_norm": 4.106551170349121, "learning_rate": 0.00038887732749178536, "loss": 0.9923, "step": 2782 }, { "epoch": 0.18856450779615655, "grad_norm": 2.8578834533691406, "learning_rate": 0.00038887185104052575, "loss": 0.7854, "step": 2783 }, { "epoch": 0.18863226363798052, "grad_norm": 5.243823528289795, "learning_rate": 0.00038886637458926615, "loss": 0.9607, "step": 2784 }, { "epoch": 0.18870001947980453, "grad_norm": 2.833411455154419, "learning_rate": 0.0003888608981380066, "loss": 0.7695, "step": 2785 }, { "epoch": 0.1887677753216285, "grad_norm": 2.9352073669433594, "learning_rate": 0.000388855421686747, "loss": 0.7108, "step": 2786 }, { "epoch": 0.1888355311634525, "grad_norm": 3.3581740856170654, "learning_rate": 0.0003888499452354874, "loss": 0.8294, "step": 2787 }, { "epoch": 0.18890328700527648, "grad_norm": 5.8704962730407715, "learning_rate": 0.0003888444687842278, "loss": 1.4642, "step": 2788 }, { "epoch": 0.18897104284710048, "grad_norm": 2.691150188446045, "learning_rate": 0.00038883899233296826, "loss": 0.6854, "step": 2789 }, { "epoch": 0.18903879868892445, "grad_norm": 4.402223110198975, "learning_rate": 0.0003888335158817087, "loss": 1.0299, "step": 2790 }, { "epoch": 0.18910655453074846, "grad_norm": 3.609065055847168, "learning_rate": 0.0003888280394304491, "loss": 0.951, "step": 2791 }, { "epoch": 0.18917431037257243, "grad_norm": 3.6020936965942383, "learning_rate": 0.0003888225629791895, "loss": 0.8829, "step": 2792 }, { "epoch": 0.18924206621439643, "grad_norm": 3.3175535202026367, "learning_rate": 0.0003888170865279299, "loss": 0.9027, "step": 2793 }, { "epoch": 0.1893098220562204, "grad_norm": 3.529503107070923, "learning_rate": 0.0003888116100766703, "loss": 0.8035, "step": 2794 }, { "epoch": 0.1893775778980444, "grad_norm": 3.649784564971924, "learning_rate": 0.00038880613362541076, "loss": 1.1049, "step": 2795 }, { "epoch": 0.18944533373986838, "grad_norm": 2.818162441253662, "learning_rate": 0.00038880065717415116, "loss": 0.8623, "step": 2796 }, { "epoch": 0.18951308958169238, "grad_norm": 3.7919747829437256, "learning_rate": 0.0003887951807228916, "loss": 1.0414, "step": 2797 }, { "epoch": 0.18958084542351636, "grad_norm": 3.144446611404419, "learning_rate": 0.000388789704271632, "loss": 0.9603, "step": 2798 }, { "epoch": 0.18964860126534033, "grad_norm": 4.011592864990234, "learning_rate": 0.0003887842278203724, "loss": 1.2122, "step": 2799 }, { "epoch": 0.18971635710716434, "grad_norm": 5.217103481292725, "learning_rate": 0.0003887787513691128, "loss": 0.8854, "step": 2800 }, { "epoch": 0.1897841129489883, "grad_norm": 4.114294052124023, "learning_rate": 0.00038877327491785327, "loss": 0.9165, "step": 2801 }, { "epoch": 0.1898518687908123, "grad_norm": 3.6854746341705322, "learning_rate": 0.00038876779846659367, "loss": 1.2091, "step": 2802 }, { "epoch": 0.18991962463263629, "grad_norm": 3.5141451358795166, "learning_rate": 0.00038876232201533406, "loss": 0.9723, "step": 2803 }, { "epoch": 0.1899873804744603, "grad_norm": 3.343451499938965, "learning_rate": 0.00038875684556407446, "loss": 0.9458, "step": 2804 }, { "epoch": 0.19005513631628426, "grad_norm": 3.792968511581421, "learning_rate": 0.0003887513691128149, "loss": 0.8935, "step": 2805 }, { "epoch": 0.19012289215810826, "grad_norm": 3.1623423099517822, "learning_rate": 0.00038874589266155537, "loss": 1.0347, "step": 2806 }, { "epoch": 0.19019064799993224, "grad_norm": 3.3526084423065186, "learning_rate": 0.00038874041621029577, "loss": 1.0338, "step": 2807 }, { "epoch": 0.19025840384175624, "grad_norm": 3.289684772491455, "learning_rate": 0.00038873493975903617, "loss": 0.961, "step": 2808 }, { "epoch": 0.19032615968358021, "grad_norm": 3.482077121734619, "learning_rate": 0.00038872946330777657, "loss": 1.014, "step": 2809 }, { "epoch": 0.19039391552540422, "grad_norm": 2.5455565452575684, "learning_rate": 0.00038872398685651697, "loss": 0.7763, "step": 2810 }, { "epoch": 0.1904616713672282, "grad_norm": 3.986311435699463, "learning_rate": 0.0003887185104052574, "loss": 1.0587, "step": 2811 }, { "epoch": 0.1905294272090522, "grad_norm": 3.5373101234436035, "learning_rate": 0.0003887130339539978, "loss": 1.0648, "step": 2812 }, { "epoch": 0.19059718305087617, "grad_norm": 3.479529619216919, "learning_rate": 0.0003887075575027383, "loss": 0.9923, "step": 2813 }, { "epoch": 0.19066493889270014, "grad_norm": 3.5464696884155273, "learning_rate": 0.00038870208105147867, "loss": 1.0059, "step": 2814 }, { "epoch": 0.19073269473452414, "grad_norm": 3.3405473232269287, "learning_rate": 0.00038869660460021907, "loss": 0.8901, "step": 2815 }, { "epoch": 0.19080045057634812, "grad_norm": 4.152277946472168, "learning_rate": 0.00038869112814895947, "loss": 1.0844, "step": 2816 }, { "epoch": 0.19086820641817212, "grad_norm": 3.700972557067871, "learning_rate": 0.0003886856516976999, "loss": 0.9723, "step": 2817 }, { "epoch": 0.1909359622599961, "grad_norm": 3.3416385650634766, "learning_rate": 0.0003886801752464403, "loss": 0.8549, "step": 2818 }, { "epoch": 0.1910037181018201, "grad_norm": 4.603366851806641, "learning_rate": 0.0003886746987951807, "loss": 1.0604, "step": 2819 }, { "epoch": 0.19107147394364407, "grad_norm": 2.6596109867095947, "learning_rate": 0.0003886692223439211, "loss": 0.6537, "step": 2820 }, { "epoch": 0.19113922978546807, "grad_norm": 3.3411309719085693, "learning_rate": 0.0003886637458926616, "loss": 0.9467, "step": 2821 }, { "epoch": 0.19120698562729205, "grad_norm": 3.437974691390991, "learning_rate": 0.000388658269441402, "loss": 0.9295, "step": 2822 }, { "epoch": 0.19127474146911605, "grad_norm": 3.7563765048980713, "learning_rate": 0.00038865279299014243, "loss": 1.0671, "step": 2823 }, { "epoch": 0.19134249731094002, "grad_norm": 3.3614068031311035, "learning_rate": 0.00038864731653888283, "loss": 0.8864, "step": 2824 }, { "epoch": 0.19141025315276403, "grad_norm": 3.8564016819000244, "learning_rate": 0.0003886418400876232, "loss": 1.2477, "step": 2825 }, { "epoch": 0.191478008994588, "grad_norm": 4.71919059753418, "learning_rate": 0.0003886363636363636, "loss": 0.9753, "step": 2826 }, { "epoch": 0.191545764836412, "grad_norm": 3.0620319843292236, "learning_rate": 0.0003886308871851041, "loss": 0.7808, "step": 2827 }, { "epoch": 0.19161352067823598, "grad_norm": 3.8438165187835693, "learning_rate": 0.00038862541073384453, "loss": 1.0785, "step": 2828 }, { "epoch": 0.19168127652005995, "grad_norm": 4.259598255157471, "learning_rate": 0.00038861993428258493, "loss": 1.1103, "step": 2829 }, { "epoch": 0.19174903236188395, "grad_norm": 3.251192808151245, "learning_rate": 0.00038861445783132533, "loss": 0.9754, "step": 2830 }, { "epoch": 0.19181678820370793, "grad_norm": 2.5088682174682617, "learning_rate": 0.00038860898138006573, "loss": 0.6504, "step": 2831 }, { "epoch": 0.19188454404553193, "grad_norm": 3.704563856124878, "learning_rate": 0.00038860350492880613, "loss": 0.9141, "step": 2832 }, { "epoch": 0.1919522998873559, "grad_norm": 4.012651443481445, "learning_rate": 0.0003885980284775466, "loss": 0.8808, "step": 2833 }, { "epoch": 0.1920200557291799, "grad_norm": 3.077089786529541, "learning_rate": 0.000388592552026287, "loss": 1.0051, "step": 2834 }, { "epoch": 0.19208781157100388, "grad_norm": 3.3188600540161133, "learning_rate": 0.0003885870755750274, "loss": 0.8485, "step": 2835 }, { "epoch": 0.19215556741282788, "grad_norm": 3.9176952838897705, "learning_rate": 0.00038858159912376783, "loss": 0.9251, "step": 2836 }, { "epoch": 0.19222332325465186, "grad_norm": 3.9131100177764893, "learning_rate": 0.00038857612267250823, "loss": 1.3158, "step": 2837 }, { "epoch": 0.19229107909647586, "grad_norm": 3.2665579319000244, "learning_rate": 0.00038857064622124863, "loss": 0.9732, "step": 2838 }, { "epoch": 0.19235883493829983, "grad_norm": 3.4108076095581055, "learning_rate": 0.0003885651697699891, "loss": 0.9345, "step": 2839 }, { "epoch": 0.19242659078012384, "grad_norm": 2.991594076156616, "learning_rate": 0.0003885596933187295, "loss": 0.8311, "step": 2840 }, { "epoch": 0.1924943466219478, "grad_norm": 3.9856655597686768, "learning_rate": 0.0003885542168674699, "loss": 1.0621, "step": 2841 }, { "epoch": 0.1925621024637718, "grad_norm": 2.8245956897735596, "learning_rate": 0.0003885487404162103, "loss": 0.9845, "step": 2842 }, { "epoch": 0.19262985830559579, "grad_norm": 4.349861145019531, "learning_rate": 0.0003885432639649507, "loss": 1.007, "step": 2843 }, { "epoch": 0.19269761414741976, "grad_norm": 2.9254069328308105, "learning_rate": 0.0003885377875136912, "loss": 0.9285, "step": 2844 }, { "epoch": 0.19276536998924376, "grad_norm": 3.744016647338867, "learning_rate": 0.0003885323110624316, "loss": 1.2372, "step": 2845 }, { "epoch": 0.19283312583106774, "grad_norm": 3.735994338989258, "learning_rate": 0.000388526834611172, "loss": 0.7919, "step": 2846 }, { "epoch": 0.19290088167289174, "grad_norm": 3.766037940979004, "learning_rate": 0.0003885213581599124, "loss": 1.1431, "step": 2847 }, { "epoch": 0.1929686375147157, "grad_norm": 2.934973955154419, "learning_rate": 0.0003885158817086528, "loss": 0.8543, "step": 2848 }, { "epoch": 0.19303639335653971, "grad_norm": 3.1117990016937256, "learning_rate": 0.00038851040525739324, "loss": 0.9102, "step": 2849 }, { "epoch": 0.1931041491983637, "grad_norm": 3.7863378524780273, "learning_rate": 0.00038850492880613364, "loss": 0.8891, "step": 2850 }, { "epoch": 0.1931719050401877, "grad_norm": 3.22411847114563, "learning_rate": 0.00038849945235487404, "loss": 0.8789, "step": 2851 }, { "epoch": 0.19323966088201167, "grad_norm": 3.320915937423706, "learning_rate": 0.0003884939759036145, "loss": 0.9193, "step": 2852 }, { "epoch": 0.19330741672383567, "grad_norm": 3.0854501724243164, "learning_rate": 0.0003884884994523549, "loss": 0.8459, "step": 2853 }, { "epoch": 0.19337517256565964, "grad_norm": 3.4609007835388184, "learning_rate": 0.0003884830230010953, "loss": 0.8557, "step": 2854 }, { "epoch": 0.19344292840748364, "grad_norm": 3.226769208908081, "learning_rate": 0.00038847754654983575, "loss": 0.7877, "step": 2855 }, { "epoch": 0.19351068424930762, "grad_norm": 3.452515125274658, "learning_rate": 0.00038847207009857614, "loss": 0.8445, "step": 2856 }, { "epoch": 0.19357844009113162, "grad_norm": 2.7265677452087402, "learning_rate": 0.00038846659364731654, "loss": 0.8042, "step": 2857 }, { "epoch": 0.1936461959329556, "grad_norm": 3.52346134185791, "learning_rate": 0.00038846111719605694, "loss": 0.8998, "step": 2858 }, { "epoch": 0.19371395177477957, "grad_norm": 3.444248914718628, "learning_rate": 0.0003884556407447974, "loss": 0.9179, "step": 2859 }, { "epoch": 0.19378170761660357, "grad_norm": 3.4823927879333496, "learning_rate": 0.0003884501642935378, "loss": 1.0943, "step": 2860 }, { "epoch": 0.19384946345842755, "grad_norm": 3.679563283920288, "learning_rate": 0.00038844468784227825, "loss": 1.0095, "step": 2861 }, { "epoch": 0.19391721930025155, "grad_norm": 2.840827465057373, "learning_rate": 0.00038843921139101865, "loss": 0.8466, "step": 2862 }, { "epoch": 0.19398497514207552, "grad_norm": 2.8723158836364746, "learning_rate": 0.00038843373493975905, "loss": 0.9074, "step": 2863 }, { "epoch": 0.19405273098389952, "grad_norm": 3.3672428131103516, "learning_rate": 0.00038842825848849945, "loss": 0.7702, "step": 2864 }, { "epoch": 0.1941204868257235, "grad_norm": 3.102827787399292, "learning_rate": 0.0003884227820372399, "loss": 0.8358, "step": 2865 }, { "epoch": 0.1941882426675475, "grad_norm": 3.773686170578003, "learning_rate": 0.0003884173055859803, "loss": 0.9157, "step": 2866 }, { "epoch": 0.19425599850937147, "grad_norm": 3.287602663040161, "learning_rate": 0.00038841182913472075, "loss": 0.9637, "step": 2867 }, { "epoch": 0.19432375435119548, "grad_norm": 3.0248587131500244, "learning_rate": 0.00038840635268346115, "loss": 0.7526, "step": 2868 }, { "epoch": 0.19439151019301945, "grad_norm": 3.0648927688598633, "learning_rate": 0.00038840087623220155, "loss": 0.7493, "step": 2869 }, { "epoch": 0.19445926603484345, "grad_norm": 3.211517572402954, "learning_rate": 0.00038839539978094195, "loss": 0.8731, "step": 2870 }, { "epoch": 0.19452702187666743, "grad_norm": 2.5447211265563965, "learning_rate": 0.0003883899233296824, "loss": 0.7819, "step": 2871 }, { "epoch": 0.19459477771849143, "grad_norm": 3.0504274368286133, "learning_rate": 0.0003883844468784228, "loss": 0.8631, "step": 2872 }, { "epoch": 0.1946625335603154, "grad_norm": 6.16290283203125, "learning_rate": 0.0003883789704271632, "loss": 1.1057, "step": 2873 }, { "epoch": 0.19473028940213938, "grad_norm": 3.770357131958008, "learning_rate": 0.0003883734939759036, "loss": 0.9779, "step": 2874 }, { "epoch": 0.19479804524396338, "grad_norm": 4.432990550994873, "learning_rate": 0.00038836801752464405, "loss": 1.088, "step": 2875 }, { "epoch": 0.19486580108578735, "grad_norm": 3.4904263019561768, "learning_rate": 0.00038836254107338445, "loss": 1.1733, "step": 2876 }, { "epoch": 0.19493355692761136, "grad_norm": 4.487160682678223, "learning_rate": 0.0003883570646221249, "loss": 0.8655, "step": 2877 }, { "epoch": 0.19500131276943533, "grad_norm": 3.2880842685699463, "learning_rate": 0.0003883515881708653, "loss": 0.7608, "step": 2878 }, { "epoch": 0.19506906861125933, "grad_norm": 3.5662264823913574, "learning_rate": 0.0003883461117196057, "loss": 0.7713, "step": 2879 }, { "epoch": 0.1951368244530833, "grad_norm": 3.6133651733398438, "learning_rate": 0.0003883406352683461, "loss": 1.1589, "step": 2880 }, { "epoch": 0.1952045802949073, "grad_norm": 3.9296798706054688, "learning_rate": 0.0003883351588170865, "loss": 0.8892, "step": 2881 }, { "epoch": 0.19527233613673128, "grad_norm": 3.160841464996338, "learning_rate": 0.00038832968236582696, "loss": 1.0361, "step": 2882 }, { "epoch": 0.19534009197855529, "grad_norm": 2.5656821727752686, "learning_rate": 0.0003883242059145674, "loss": 0.7439, "step": 2883 }, { "epoch": 0.19540784782037926, "grad_norm": 4.165538311004639, "learning_rate": 0.0003883187294633078, "loss": 1.2375, "step": 2884 }, { "epoch": 0.19547560366220326, "grad_norm": 3.054222583770752, "learning_rate": 0.0003883132530120482, "loss": 0.9817, "step": 2885 }, { "epoch": 0.19554335950402724, "grad_norm": 3.955233335494995, "learning_rate": 0.0003883077765607886, "loss": 0.8546, "step": 2886 }, { "epoch": 0.19561111534585124, "grad_norm": 3.7192070484161377, "learning_rate": 0.00038830230010952906, "loss": 1.0685, "step": 2887 }, { "epoch": 0.1956788711876752, "grad_norm": 2.6796040534973145, "learning_rate": 0.00038829682365826946, "loss": 0.6622, "step": 2888 }, { "epoch": 0.1957466270294992, "grad_norm": 2.8797216415405273, "learning_rate": 0.00038829134720700986, "loss": 0.9228, "step": 2889 }, { "epoch": 0.1958143828713232, "grad_norm": 2.3468785285949707, "learning_rate": 0.0003882858707557503, "loss": 0.7853, "step": 2890 }, { "epoch": 0.19588213871314716, "grad_norm": 2.93365478515625, "learning_rate": 0.0003882803943044907, "loss": 0.7724, "step": 2891 }, { "epoch": 0.19594989455497117, "grad_norm": 2.797774076461792, "learning_rate": 0.0003882749178532311, "loss": 0.7918, "step": 2892 }, { "epoch": 0.19601765039679514, "grad_norm": 3.337390661239624, "learning_rate": 0.00038826944140197157, "loss": 1.1268, "step": 2893 }, { "epoch": 0.19608540623861914, "grad_norm": 3.9615604877471924, "learning_rate": 0.00038826396495071196, "loss": 1.0514, "step": 2894 }, { "epoch": 0.19615316208044312, "grad_norm": 3.5322580337524414, "learning_rate": 0.00038825848849945236, "loss": 0.9163, "step": 2895 }, { "epoch": 0.19622091792226712, "grad_norm": 3.612990140914917, "learning_rate": 0.00038825301204819276, "loss": 0.9093, "step": 2896 }, { "epoch": 0.1962886737640911, "grad_norm": 3.0081143379211426, "learning_rate": 0.00038824753559693316, "loss": 0.9885, "step": 2897 }, { "epoch": 0.1963564296059151, "grad_norm": 3.5394842624664307, "learning_rate": 0.0003882420591456736, "loss": 1.0406, "step": 2898 }, { "epoch": 0.19642418544773907, "grad_norm": 3.30678129196167, "learning_rate": 0.00038823658269441407, "loss": 1.0223, "step": 2899 }, { "epoch": 0.19649194128956307, "grad_norm": 3.34920072555542, "learning_rate": 0.00038823110624315447, "loss": 0.7228, "step": 2900 }, { "epoch": 0.19655969713138705, "grad_norm": 3.10904860496521, "learning_rate": 0.00038822562979189487, "loss": 0.7764, "step": 2901 }, { "epoch": 0.19662745297321105, "grad_norm": 3.1832408905029297, "learning_rate": 0.00038822015334063527, "loss": 0.8856, "step": 2902 }, { "epoch": 0.19669520881503502, "grad_norm": 3.5309600830078125, "learning_rate": 0.0003882146768893757, "loss": 1.1269, "step": 2903 }, { "epoch": 0.196762964656859, "grad_norm": 2.922008514404297, "learning_rate": 0.0003882092004381161, "loss": 0.7044, "step": 2904 }, { "epoch": 0.196830720498683, "grad_norm": 2.6173906326293945, "learning_rate": 0.0003882037239868565, "loss": 0.8005, "step": 2905 }, { "epoch": 0.19689847634050697, "grad_norm": 3.036520004272461, "learning_rate": 0.00038819824753559697, "loss": 0.7915, "step": 2906 }, { "epoch": 0.19696623218233097, "grad_norm": 3.1924149990081787, "learning_rate": 0.00038819277108433737, "loss": 0.9731, "step": 2907 }, { "epoch": 0.19703398802415495, "grad_norm": 3.872278928756714, "learning_rate": 0.00038818729463307777, "loss": 1.0911, "step": 2908 }, { "epoch": 0.19710174386597895, "grad_norm": 2.464524984359741, "learning_rate": 0.0003881818181818182, "loss": 0.713, "step": 2909 }, { "epoch": 0.19716949970780293, "grad_norm": 3.9767656326293945, "learning_rate": 0.0003881763417305586, "loss": 1.0382, "step": 2910 }, { "epoch": 0.19723725554962693, "grad_norm": 5.209201812744141, "learning_rate": 0.000388170865279299, "loss": 1.1775, "step": 2911 }, { "epoch": 0.1973050113914509, "grad_norm": 2.733712673187256, "learning_rate": 0.0003881653888280394, "loss": 0.7543, "step": 2912 }, { "epoch": 0.1973727672332749, "grad_norm": 3.7703781127929688, "learning_rate": 0.0003881599123767798, "loss": 1.0978, "step": 2913 }, { "epoch": 0.19744052307509888, "grad_norm": 3.3680343627929688, "learning_rate": 0.0003881544359255203, "loss": 1.0289, "step": 2914 }, { "epoch": 0.19750827891692288, "grad_norm": 2.3637564182281494, "learning_rate": 0.00038814895947426073, "loss": 0.6622, "step": 2915 }, { "epoch": 0.19757603475874685, "grad_norm": 3.969456195831299, "learning_rate": 0.00038814348302300113, "loss": 0.9732, "step": 2916 }, { "epoch": 0.19764379060057086, "grad_norm": 2.699615240097046, "learning_rate": 0.0003881380065717415, "loss": 0.9226, "step": 2917 }, { "epoch": 0.19771154644239483, "grad_norm": 4.2555952072143555, "learning_rate": 0.0003881325301204819, "loss": 1.0636, "step": 2918 }, { "epoch": 0.1977793022842188, "grad_norm": 3.04146146774292, "learning_rate": 0.0003881270536692223, "loss": 1.0585, "step": 2919 }, { "epoch": 0.1978470581260428, "grad_norm": 2.6904592514038086, "learning_rate": 0.0003881215772179628, "loss": 0.817, "step": 2920 }, { "epoch": 0.19791481396786678, "grad_norm": 2.9807913303375244, "learning_rate": 0.00038811610076670323, "loss": 1.0955, "step": 2921 }, { "epoch": 0.19798256980969078, "grad_norm": 3.082659959793091, "learning_rate": 0.00038811062431544363, "loss": 0.8816, "step": 2922 }, { "epoch": 0.19805032565151476, "grad_norm": 3.4324140548706055, "learning_rate": 0.00038810514786418403, "loss": 0.9182, "step": 2923 }, { "epoch": 0.19811808149333876, "grad_norm": 3.4310765266418457, "learning_rate": 0.00038809967141292443, "loss": 1.0241, "step": 2924 }, { "epoch": 0.19818583733516273, "grad_norm": 3.3917229175567627, "learning_rate": 0.0003880941949616649, "loss": 0.7229, "step": 2925 }, { "epoch": 0.19825359317698674, "grad_norm": 3.621485471725464, "learning_rate": 0.0003880887185104053, "loss": 1.0176, "step": 2926 }, { "epoch": 0.1983213490188107, "grad_norm": 3.299516201019287, "learning_rate": 0.0003880832420591457, "loss": 0.8587, "step": 2927 }, { "epoch": 0.1983891048606347, "grad_norm": 3.3287224769592285, "learning_rate": 0.0003880777656078861, "loss": 0.9476, "step": 2928 }, { "epoch": 0.1984568607024587, "grad_norm": 7.810877799987793, "learning_rate": 0.00038807228915662653, "loss": 1.2695, "step": 2929 }, { "epoch": 0.1985246165442827, "grad_norm": 3.242846727371216, "learning_rate": 0.00038806681270536693, "loss": 1.0626, "step": 2930 }, { "epoch": 0.19859237238610666, "grad_norm": 3.5498478412628174, "learning_rate": 0.0003880613362541074, "loss": 0.8431, "step": 2931 }, { "epoch": 0.19866012822793067, "grad_norm": 4.93901252746582, "learning_rate": 0.0003880558598028478, "loss": 1.0355, "step": 2932 }, { "epoch": 0.19872788406975464, "grad_norm": 4.194961071014404, "learning_rate": 0.0003880503833515882, "loss": 1.1942, "step": 2933 }, { "epoch": 0.19879563991157861, "grad_norm": 3.144320249557495, "learning_rate": 0.0003880449069003286, "loss": 1.0055, "step": 2934 }, { "epoch": 0.19886339575340262, "grad_norm": 2.446681261062622, "learning_rate": 0.000388039430449069, "loss": 0.6765, "step": 2935 }, { "epoch": 0.1989311515952266, "grad_norm": 3.9585554599761963, "learning_rate": 0.00038803395399780944, "loss": 0.7786, "step": 2936 }, { "epoch": 0.1989989074370506, "grad_norm": 4.166809558868408, "learning_rate": 0.0003880284775465499, "loss": 0.7984, "step": 2937 }, { "epoch": 0.19906666327887457, "grad_norm": 3.4924962520599365, "learning_rate": 0.0003880230010952903, "loss": 0.785, "step": 2938 }, { "epoch": 0.19913441912069857, "grad_norm": 2.6020219326019287, "learning_rate": 0.0003880175246440307, "loss": 0.6835, "step": 2939 }, { "epoch": 0.19920217496252254, "grad_norm": 3.0359699726104736, "learning_rate": 0.0003880120481927711, "loss": 0.9068, "step": 2940 }, { "epoch": 0.19926993080434655, "grad_norm": 4.5870771408081055, "learning_rate": 0.00038800657174151154, "loss": 1.0493, "step": 2941 }, { "epoch": 0.19933768664617052, "grad_norm": 4.453811168670654, "learning_rate": 0.00038800109529025194, "loss": 0.9529, "step": 2942 }, { "epoch": 0.19940544248799452, "grad_norm": 3.0521836280822754, "learning_rate": 0.00038799561883899234, "loss": 0.6147, "step": 2943 }, { "epoch": 0.1994731983298185, "grad_norm": 2.691821813583374, "learning_rate": 0.00038799014238773274, "loss": 0.7854, "step": 2944 }, { "epoch": 0.1995409541716425, "grad_norm": 3.0034406185150146, "learning_rate": 0.0003879846659364732, "loss": 0.7829, "step": 2945 }, { "epoch": 0.19960871001346647, "grad_norm": 4.020975112915039, "learning_rate": 0.0003879791894852136, "loss": 1.032, "step": 2946 }, { "epoch": 0.19967646585529045, "grad_norm": 3.4426751136779785, "learning_rate": 0.00038797371303395404, "loss": 1.2749, "step": 2947 }, { "epoch": 0.19974422169711445, "grad_norm": 2.9995882511138916, "learning_rate": 0.00038796823658269444, "loss": 0.8232, "step": 2948 }, { "epoch": 0.19981197753893842, "grad_norm": 2.9870011806488037, "learning_rate": 0.00038796276013143484, "loss": 0.7232, "step": 2949 }, { "epoch": 0.19987973338076243, "grad_norm": 3.5426650047302246, "learning_rate": 0.00038795728368017524, "loss": 0.8878, "step": 2950 }, { "epoch": 0.1999474892225864, "grad_norm": 2.684795618057251, "learning_rate": 0.00038795180722891564, "loss": 0.9354, "step": 2951 }, { "epoch": 0.2000152450644104, "grad_norm": 3.869802951812744, "learning_rate": 0.0003879463307776561, "loss": 1.0431, "step": 2952 }, { "epoch": 0.20008300090623438, "grad_norm": 4.170981407165527, "learning_rate": 0.00038794085432639655, "loss": 1.2185, "step": 2953 }, { "epoch": 0.20015075674805838, "grad_norm": 2.944383144378662, "learning_rate": 0.00038793537787513695, "loss": 0.7583, "step": 2954 }, { "epoch": 0.20021851258988235, "grad_norm": 5.020541667938232, "learning_rate": 0.00038792990142387735, "loss": 0.9039, "step": 2955 }, { "epoch": 0.20028626843170635, "grad_norm": 3.3172452449798584, "learning_rate": 0.00038792442497261775, "loss": 0.8774, "step": 2956 }, { "epoch": 0.20035402427353033, "grad_norm": 4.42636775970459, "learning_rate": 0.00038791894852135815, "loss": 1.1344, "step": 2957 }, { "epoch": 0.20042178011535433, "grad_norm": 3.51851749420166, "learning_rate": 0.0003879134720700986, "loss": 1.0192, "step": 2958 }, { "epoch": 0.2004895359571783, "grad_norm": 2.7377655506134033, "learning_rate": 0.000387907995618839, "loss": 0.8097, "step": 2959 }, { "epoch": 0.2005572917990023, "grad_norm": 4.000570774078369, "learning_rate": 0.00038790251916757945, "loss": 0.9769, "step": 2960 }, { "epoch": 0.20062504764082628, "grad_norm": 3.289140462875366, "learning_rate": 0.00038789704271631985, "loss": 0.8663, "step": 2961 }, { "epoch": 0.20069280348265026, "grad_norm": 3.1356420516967773, "learning_rate": 0.00038789156626506025, "loss": 0.8782, "step": 2962 }, { "epoch": 0.20076055932447426, "grad_norm": 3.1205527782440186, "learning_rate": 0.0003878860898138007, "loss": 0.6768, "step": 2963 }, { "epoch": 0.20082831516629823, "grad_norm": 3.3293068408966064, "learning_rate": 0.0003878806133625411, "loss": 0.905, "step": 2964 }, { "epoch": 0.20089607100812223, "grad_norm": 4.211151599884033, "learning_rate": 0.0003878751369112815, "loss": 1.205, "step": 2965 }, { "epoch": 0.2009638268499462, "grad_norm": 5.293976306915283, "learning_rate": 0.0003878696604600219, "loss": 1.1434, "step": 2966 }, { "epoch": 0.2010315826917702, "grad_norm": 4.372865676879883, "learning_rate": 0.0003878641840087623, "loss": 1.0981, "step": 2967 }, { "epoch": 0.20109933853359419, "grad_norm": 4.066809177398682, "learning_rate": 0.00038785870755750275, "loss": 0.9831, "step": 2968 }, { "epoch": 0.2011670943754182, "grad_norm": 4.448725700378418, "learning_rate": 0.0003878532311062432, "loss": 0.995, "step": 2969 }, { "epoch": 0.20123485021724216, "grad_norm": 3.098972797393799, "learning_rate": 0.0003878477546549836, "loss": 0.9696, "step": 2970 }, { "epoch": 0.20130260605906616, "grad_norm": 3.9107375144958496, "learning_rate": 0.000387842278203724, "loss": 1.1097, "step": 2971 }, { "epoch": 0.20137036190089014, "grad_norm": 3.3919646739959717, "learning_rate": 0.0003878368017524644, "loss": 0.8155, "step": 2972 }, { "epoch": 0.20143811774271414, "grad_norm": 4.422256946563721, "learning_rate": 0.0003878313253012048, "loss": 0.9722, "step": 2973 }, { "epoch": 0.20150587358453811, "grad_norm": 3.44040846824646, "learning_rate": 0.00038782584884994526, "loss": 0.8639, "step": 2974 }, { "epoch": 0.20157362942636212, "grad_norm": 3.9588050842285156, "learning_rate": 0.00038782037239868566, "loss": 1.0882, "step": 2975 }, { "epoch": 0.2016413852681861, "grad_norm": 4.006252765655518, "learning_rate": 0.0003878148959474261, "loss": 1.0616, "step": 2976 }, { "epoch": 0.20170914111001007, "grad_norm": 2.9318010807037354, "learning_rate": 0.0003878094194961665, "loss": 0.8486, "step": 2977 }, { "epoch": 0.20177689695183407, "grad_norm": 2.7023775577545166, "learning_rate": 0.0003878039430449069, "loss": 0.8735, "step": 2978 }, { "epoch": 0.20184465279365804, "grad_norm": 3.5952556133270264, "learning_rate": 0.00038779846659364736, "loss": 0.8897, "step": 2979 }, { "epoch": 0.20191240863548204, "grad_norm": 2.8645665645599365, "learning_rate": 0.00038779299014238776, "loss": 0.7177, "step": 2980 }, { "epoch": 0.20198016447730602, "grad_norm": 3.171987533569336, "learning_rate": 0.00038778751369112816, "loss": 0.8614, "step": 2981 }, { "epoch": 0.20204792031913002, "grad_norm": 2.974250555038452, "learning_rate": 0.00038778203723986856, "loss": 0.9177, "step": 2982 }, { "epoch": 0.202115676160954, "grad_norm": 3.443190097808838, "learning_rate": 0.00038777656078860896, "loss": 0.8449, "step": 2983 }, { "epoch": 0.202183432002778, "grad_norm": 3.9158647060394287, "learning_rate": 0.0003877710843373494, "loss": 0.7569, "step": 2984 }, { "epoch": 0.20225118784460197, "grad_norm": 4.267241954803467, "learning_rate": 0.00038776560788608987, "loss": 0.9139, "step": 2985 }, { "epoch": 0.20231894368642597, "grad_norm": 3.2630350589752197, "learning_rate": 0.00038776013143483026, "loss": 0.8679, "step": 2986 }, { "epoch": 0.20238669952824995, "grad_norm": 9.023880958557129, "learning_rate": 0.00038775465498357066, "loss": 0.6787, "step": 2987 }, { "epoch": 0.20245445537007395, "grad_norm": 2.9599552154541016, "learning_rate": 0.00038774917853231106, "loss": 0.7772, "step": 2988 }, { "epoch": 0.20252221121189792, "grad_norm": 4.031742572784424, "learning_rate": 0.00038774370208105146, "loss": 0.9991, "step": 2989 }, { "epoch": 0.20258996705372193, "grad_norm": 2.2382352352142334, "learning_rate": 0.0003877382256297919, "loss": 0.5522, "step": 2990 }, { "epoch": 0.2026577228955459, "grad_norm": 2.7017180919647217, "learning_rate": 0.00038773274917853237, "loss": 0.8779, "step": 2991 }, { "epoch": 0.20272547873736987, "grad_norm": 2.520508289337158, "learning_rate": 0.00038772727272727277, "loss": 0.7455, "step": 2992 }, { "epoch": 0.20279323457919388, "grad_norm": 3.901690721511841, "learning_rate": 0.00038772179627601317, "loss": 0.9256, "step": 2993 }, { "epoch": 0.20286099042101785, "grad_norm": 3.673201322555542, "learning_rate": 0.00038771631982475357, "loss": 0.9651, "step": 2994 }, { "epoch": 0.20292874626284185, "grad_norm": 3.2190515995025635, "learning_rate": 0.00038771084337349397, "loss": 0.7486, "step": 2995 }, { "epoch": 0.20299650210466583, "grad_norm": 3.5136494636535645, "learning_rate": 0.0003877053669222344, "loss": 0.9897, "step": 2996 }, { "epoch": 0.20306425794648983, "grad_norm": 3.8646607398986816, "learning_rate": 0.0003876998904709748, "loss": 1.047, "step": 2997 }, { "epoch": 0.2031320137883138, "grad_norm": 3.926952600479126, "learning_rate": 0.0003876944140197152, "loss": 1.1799, "step": 2998 }, { "epoch": 0.2031997696301378, "grad_norm": 2.6277692317962646, "learning_rate": 0.00038768893756845567, "loss": 0.8088, "step": 2999 }, { "epoch": 0.20326752547196178, "grad_norm": 3.3782174587249756, "learning_rate": 0.00038768346111719607, "loss": 1.0853, "step": 3000 }, { "epoch": 0.20333528131378578, "grad_norm": 2.427452325820923, "learning_rate": 0.0003876779846659365, "loss": 0.7884, "step": 3001 }, { "epoch": 0.20340303715560976, "grad_norm": 5.444807529449463, "learning_rate": 0.0003876725082146769, "loss": 1.0673, "step": 3002 }, { "epoch": 0.20347079299743376, "grad_norm": 3.8093421459198, "learning_rate": 0.0003876670317634173, "loss": 1.1959, "step": 3003 }, { "epoch": 0.20353854883925773, "grad_norm": 3.9400832653045654, "learning_rate": 0.0003876615553121577, "loss": 0.9064, "step": 3004 }, { "epoch": 0.20360630468108173, "grad_norm": 2.7309412956237793, "learning_rate": 0.0003876560788608981, "loss": 0.7175, "step": 3005 }, { "epoch": 0.2036740605229057, "grad_norm": 3.4516921043395996, "learning_rate": 0.0003876506024096386, "loss": 0.9031, "step": 3006 }, { "epoch": 0.20374181636472968, "grad_norm": 3.0753893852233887, "learning_rate": 0.00038764512595837903, "loss": 0.9436, "step": 3007 }, { "epoch": 0.20380957220655369, "grad_norm": 3.9933080673217773, "learning_rate": 0.0003876396495071194, "loss": 1.0872, "step": 3008 }, { "epoch": 0.20387732804837766, "grad_norm": 3.4695262908935547, "learning_rate": 0.0003876341730558598, "loss": 1.0169, "step": 3009 }, { "epoch": 0.20394508389020166, "grad_norm": 4.436805248260498, "learning_rate": 0.0003876286966046002, "loss": 1.2448, "step": 3010 }, { "epoch": 0.20401283973202564, "grad_norm": 3.8343169689178467, "learning_rate": 0.0003876232201533406, "loss": 0.889, "step": 3011 }, { "epoch": 0.20408059557384964, "grad_norm": 3.2676072120666504, "learning_rate": 0.0003876177437020811, "loss": 1.0095, "step": 3012 }, { "epoch": 0.2041483514156736, "grad_norm": 3.435744047164917, "learning_rate": 0.0003876122672508215, "loss": 0.9986, "step": 3013 }, { "epoch": 0.20421610725749761, "grad_norm": 2.862062454223633, "learning_rate": 0.0003876067907995619, "loss": 0.7056, "step": 3014 }, { "epoch": 0.2042838630993216, "grad_norm": 3.753910779953003, "learning_rate": 0.00038760131434830233, "loss": 0.8493, "step": 3015 }, { "epoch": 0.2043516189411456, "grad_norm": 2.116185426712036, "learning_rate": 0.00038759583789704273, "loss": 0.6275, "step": 3016 }, { "epoch": 0.20441937478296957, "grad_norm": 3.509892702102661, "learning_rate": 0.0003875903614457832, "loss": 0.9279, "step": 3017 }, { "epoch": 0.20448713062479357, "grad_norm": 4.098609447479248, "learning_rate": 0.0003875848849945236, "loss": 1.1334, "step": 3018 }, { "epoch": 0.20455488646661754, "grad_norm": 3.273157835006714, "learning_rate": 0.000387579408543264, "loss": 0.9214, "step": 3019 }, { "epoch": 0.20462264230844154, "grad_norm": 3.5070319175720215, "learning_rate": 0.0003875739320920044, "loss": 0.815, "step": 3020 }, { "epoch": 0.20469039815026552, "grad_norm": 2.5359714031219482, "learning_rate": 0.0003875684556407448, "loss": 0.7331, "step": 3021 }, { "epoch": 0.2047581539920895, "grad_norm": 3.425753116607666, "learning_rate": 0.00038756297918948523, "loss": 0.7633, "step": 3022 }, { "epoch": 0.2048259098339135, "grad_norm": 3.675658941268921, "learning_rate": 0.0003875575027382257, "loss": 1.1037, "step": 3023 }, { "epoch": 0.20489366567573747, "grad_norm": 2.5491268634796143, "learning_rate": 0.0003875520262869661, "loss": 0.6575, "step": 3024 }, { "epoch": 0.20496142151756147, "grad_norm": 4.113846778869629, "learning_rate": 0.0003875465498357065, "loss": 0.8881, "step": 3025 }, { "epoch": 0.20502917735938544, "grad_norm": 3.828291893005371, "learning_rate": 0.0003875410733844469, "loss": 0.9057, "step": 3026 }, { "epoch": 0.20509693320120945, "grad_norm": 2.85843563079834, "learning_rate": 0.0003875355969331873, "loss": 0.719, "step": 3027 }, { "epoch": 0.20516468904303342, "grad_norm": 2.6541266441345215, "learning_rate": 0.00038753012048192774, "loss": 0.825, "step": 3028 }, { "epoch": 0.20523244488485742, "grad_norm": 3.2358591556549072, "learning_rate": 0.00038752464403066814, "loss": 0.9772, "step": 3029 }, { "epoch": 0.2053002007266814, "grad_norm": 3.1505749225616455, "learning_rate": 0.0003875191675794086, "loss": 0.8697, "step": 3030 }, { "epoch": 0.2053679565685054, "grad_norm": 3.205974578857422, "learning_rate": 0.000387513691128149, "loss": 1.0819, "step": 3031 }, { "epoch": 0.20543571241032937, "grad_norm": 3.558382749557495, "learning_rate": 0.0003875082146768894, "loss": 0.8795, "step": 3032 }, { "epoch": 0.20550346825215338, "grad_norm": 3.715158700942993, "learning_rate": 0.0003875027382256298, "loss": 1.2681, "step": 3033 }, { "epoch": 0.20557122409397735, "grad_norm": 3.849266529083252, "learning_rate": 0.00038749726177437024, "loss": 1.1093, "step": 3034 }, { "epoch": 0.20563897993580135, "grad_norm": 2.420072555541992, "learning_rate": 0.00038749178532311064, "loss": 0.7177, "step": 3035 }, { "epoch": 0.20570673577762533, "grad_norm": 3.3123974800109863, "learning_rate": 0.00038748630887185104, "loss": 0.973, "step": 3036 }, { "epoch": 0.2057744916194493, "grad_norm": 5.708742618560791, "learning_rate": 0.00038748083242059144, "loss": 0.8842, "step": 3037 }, { "epoch": 0.2058422474612733, "grad_norm": 4.232296943664551, "learning_rate": 0.0003874753559693319, "loss": 1.3733, "step": 3038 }, { "epoch": 0.20591000330309728, "grad_norm": 4.40988302230835, "learning_rate": 0.00038746987951807234, "loss": 0.8927, "step": 3039 }, { "epoch": 0.20597775914492128, "grad_norm": 2.910693883895874, "learning_rate": 0.00038746440306681274, "loss": 0.8717, "step": 3040 }, { "epoch": 0.20604551498674525, "grad_norm": 3.609844207763672, "learning_rate": 0.00038745892661555314, "loss": 0.9452, "step": 3041 }, { "epoch": 0.20611327082856926, "grad_norm": 3.769583225250244, "learning_rate": 0.00038745345016429354, "loss": 1.0423, "step": 3042 }, { "epoch": 0.20618102667039323, "grad_norm": 2.7205891609191895, "learning_rate": 0.00038744797371303394, "loss": 0.7227, "step": 3043 }, { "epoch": 0.20624878251221723, "grad_norm": 3.284769058227539, "learning_rate": 0.0003874424972617744, "loss": 0.8812, "step": 3044 }, { "epoch": 0.2063165383540412, "grad_norm": 3.0832784175872803, "learning_rate": 0.0003874370208105148, "loss": 0.9679, "step": 3045 }, { "epoch": 0.2063842941958652, "grad_norm": 4.454905986785889, "learning_rate": 0.00038743154435925525, "loss": 1.18, "step": 3046 }, { "epoch": 0.20645205003768918, "grad_norm": 5.9112548828125, "learning_rate": 0.00038742606790799565, "loss": 0.8368, "step": 3047 }, { "epoch": 0.20651980587951319, "grad_norm": 4.081108570098877, "learning_rate": 0.00038742059145673605, "loss": 0.9317, "step": 3048 }, { "epoch": 0.20658756172133716, "grad_norm": 3.965015411376953, "learning_rate": 0.00038741511500547645, "loss": 0.9889, "step": 3049 }, { "epoch": 0.20665531756316116, "grad_norm": 3.206282377243042, "learning_rate": 0.0003874096385542169, "loss": 0.8683, "step": 3050 }, { "epoch": 0.20672307340498514, "grad_norm": 3.1499555110931396, "learning_rate": 0.0003874041621029573, "loss": 0.9119, "step": 3051 }, { "epoch": 0.2067908292468091, "grad_norm": 4.203317165374756, "learning_rate": 0.0003873986856516977, "loss": 1.1122, "step": 3052 }, { "epoch": 0.2068585850886331, "grad_norm": 3.7479679584503174, "learning_rate": 0.00038739320920043815, "loss": 0.8592, "step": 3053 }, { "epoch": 0.2069263409304571, "grad_norm": 2.524534225463867, "learning_rate": 0.00038738773274917855, "loss": 0.6836, "step": 3054 }, { "epoch": 0.2069940967722811, "grad_norm": 3.2653846740722656, "learning_rate": 0.000387382256297919, "loss": 0.7931, "step": 3055 }, { "epoch": 0.20706185261410506, "grad_norm": 3.3260385990142822, "learning_rate": 0.0003873767798466594, "loss": 0.8665, "step": 3056 }, { "epoch": 0.20712960845592907, "grad_norm": 3.0397768020629883, "learning_rate": 0.0003873713033953998, "loss": 0.7832, "step": 3057 }, { "epoch": 0.20719736429775304, "grad_norm": 3.086559295654297, "learning_rate": 0.0003873658269441402, "loss": 0.8847, "step": 3058 }, { "epoch": 0.20726512013957704, "grad_norm": 3.479982614517212, "learning_rate": 0.0003873603504928806, "loss": 0.9956, "step": 3059 }, { "epoch": 0.20733287598140102, "grad_norm": 3.2430217266082764, "learning_rate": 0.00038735487404162105, "loss": 0.8956, "step": 3060 }, { "epoch": 0.20740063182322502, "grad_norm": 2.7269043922424316, "learning_rate": 0.0003873493975903615, "loss": 0.6624, "step": 3061 }, { "epoch": 0.207468387665049, "grad_norm": 2.7710084915161133, "learning_rate": 0.0003873439211391019, "loss": 0.84, "step": 3062 }, { "epoch": 0.207536143506873, "grad_norm": 3.279461145401001, "learning_rate": 0.0003873384446878423, "loss": 0.9323, "step": 3063 }, { "epoch": 0.20760389934869697, "grad_norm": 2.4109456539154053, "learning_rate": 0.0003873329682365827, "loss": 0.7564, "step": 3064 }, { "epoch": 0.20767165519052097, "grad_norm": 3.398273229598999, "learning_rate": 0.0003873274917853231, "loss": 0.9354, "step": 3065 }, { "epoch": 0.20773941103234494, "grad_norm": 3.6894140243530273, "learning_rate": 0.00038732201533406356, "loss": 1.1319, "step": 3066 }, { "epoch": 0.20780716687416892, "grad_norm": 3.255312204360962, "learning_rate": 0.00038731653888280396, "loss": 0.879, "step": 3067 }, { "epoch": 0.20787492271599292, "grad_norm": 2.3051600456237793, "learning_rate": 0.00038731106243154436, "loss": 0.7277, "step": 3068 }, { "epoch": 0.2079426785578169, "grad_norm": 2.9135687351226807, "learning_rate": 0.0003873055859802848, "loss": 0.9002, "step": 3069 }, { "epoch": 0.2080104343996409, "grad_norm": 4.4830522537231445, "learning_rate": 0.0003873001095290252, "loss": 0.9469, "step": 3070 }, { "epoch": 0.20807819024146487, "grad_norm": 3.2175025939941406, "learning_rate": 0.0003872946330777656, "loss": 0.943, "step": 3071 }, { "epoch": 0.20814594608328887, "grad_norm": 3.3367183208465576, "learning_rate": 0.00038728915662650606, "loss": 0.9921, "step": 3072 }, { "epoch": 0.20821370192511285, "grad_norm": 3.4488627910614014, "learning_rate": 0.00038728368017524646, "loss": 0.8805, "step": 3073 }, { "epoch": 0.20828145776693685, "grad_norm": 3.7233943939208984, "learning_rate": 0.00038727820372398686, "loss": 1.2822, "step": 3074 }, { "epoch": 0.20834921360876082, "grad_norm": 2.8348429203033447, "learning_rate": 0.00038727272727272726, "loss": 0.7892, "step": 3075 }, { "epoch": 0.20841696945058483, "grad_norm": 3.1588516235351562, "learning_rate": 0.0003872672508214677, "loss": 1.0107, "step": 3076 }, { "epoch": 0.2084847252924088, "grad_norm": 2.604145050048828, "learning_rate": 0.00038726177437020817, "loss": 0.8472, "step": 3077 }, { "epoch": 0.2085524811342328, "grad_norm": 3.444066047668457, "learning_rate": 0.00038725629791894856, "loss": 0.7593, "step": 3078 }, { "epoch": 0.20862023697605678, "grad_norm": 2.6919965744018555, "learning_rate": 0.00038725082146768896, "loss": 0.9178, "step": 3079 }, { "epoch": 0.20868799281788078, "grad_norm": 3.8902170658111572, "learning_rate": 0.00038724534501642936, "loss": 1.2513, "step": 3080 }, { "epoch": 0.20875574865970475, "grad_norm": 4.317392349243164, "learning_rate": 0.00038723986856516976, "loss": 1.026, "step": 3081 }, { "epoch": 0.20882350450152873, "grad_norm": 2.897770881652832, "learning_rate": 0.0003872343921139102, "loss": 0.91, "step": 3082 }, { "epoch": 0.20889126034335273, "grad_norm": 3.7551252841949463, "learning_rate": 0.0003872289156626506, "loss": 1.0085, "step": 3083 }, { "epoch": 0.2089590161851767, "grad_norm": 2.75354266166687, "learning_rate": 0.00038722343921139107, "loss": 0.7836, "step": 3084 }, { "epoch": 0.2090267720270007, "grad_norm": 2.7469089031219482, "learning_rate": 0.00038721796276013147, "loss": 0.8302, "step": 3085 }, { "epoch": 0.20909452786882468, "grad_norm": 3.335780382156372, "learning_rate": 0.00038721248630887187, "loss": 0.988, "step": 3086 }, { "epoch": 0.20916228371064868, "grad_norm": 3.8519208431243896, "learning_rate": 0.00038720700985761227, "loss": 1.077, "step": 3087 }, { "epoch": 0.20923003955247266, "grad_norm": 3.1594107151031494, "learning_rate": 0.0003872015334063527, "loss": 0.8523, "step": 3088 }, { "epoch": 0.20929779539429666, "grad_norm": 3.792149305343628, "learning_rate": 0.0003871960569550931, "loss": 1.0773, "step": 3089 }, { "epoch": 0.20936555123612063, "grad_norm": 4.654153823852539, "learning_rate": 0.0003871905805038335, "loss": 1.1554, "step": 3090 }, { "epoch": 0.20943330707794464, "grad_norm": 4.500635147094727, "learning_rate": 0.0003871851040525739, "loss": 1.3423, "step": 3091 }, { "epoch": 0.2095010629197686, "grad_norm": 2.9006171226501465, "learning_rate": 0.00038717962760131437, "loss": 0.8354, "step": 3092 }, { "epoch": 0.2095688187615926, "grad_norm": 3.1118552684783936, "learning_rate": 0.0003871741511500548, "loss": 0.9194, "step": 3093 }, { "epoch": 0.2096365746034166, "grad_norm": 3.410881996154785, "learning_rate": 0.0003871686746987952, "loss": 0.8852, "step": 3094 }, { "epoch": 0.2097043304452406, "grad_norm": 3.783707618713379, "learning_rate": 0.0003871631982475356, "loss": 0.9804, "step": 3095 }, { "epoch": 0.20977208628706456, "grad_norm": 3.084458589553833, "learning_rate": 0.000387157721796276, "loss": 0.8004, "step": 3096 }, { "epoch": 0.20983984212888854, "grad_norm": 2.707623243331909, "learning_rate": 0.0003871522453450164, "loss": 0.8773, "step": 3097 }, { "epoch": 0.20990759797071254, "grad_norm": 3.749215841293335, "learning_rate": 0.0003871467688937569, "loss": 0.9985, "step": 3098 }, { "epoch": 0.2099753538125365, "grad_norm": 3.891214609146118, "learning_rate": 0.0003871412924424973, "loss": 1.0906, "step": 3099 }, { "epoch": 0.21004310965436052, "grad_norm": 3.404768943786621, "learning_rate": 0.0003871358159912377, "loss": 0.967, "step": 3100 }, { "epoch": 0.2101108654961845, "grad_norm": 3.955134153366089, "learning_rate": 0.0003871303395399781, "loss": 0.9697, "step": 3101 }, { "epoch": 0.2101786213380085, "grad_norm": 3.0474636554718018, "learning_rate": 0.0003871248630887185, "loss": 0.8256, "step": 3102 }, { "epoch": 0.21024637717983247, "grad_norm": 3.5473544597625732, "learning_rate": 0.0003871193866374589, "loss": 0.8495, "step": 3103 }, { "epoch": 0.21031413302165647, "grad_norm": 4.0794854164123535, "learning_rate": 0.0003871139101861994, "loss": 1.0686, "step": 3104 }, { "epoch": 0.21038188886348044, "grad_norm": 3.6187057495117188, "learning_rate": 0.0003871084337349398, "loss": 0.9369, "step": 3105 }, { "epoch": 0.21044964470530445, "grad_norm": 3.510897397994995, "learning_rate": 0.0003871029572836802, "loss": 1.1107, "step": 3106 }, { "epoch": 0.21051740054712842, "grad_norm": 3.079817295074463, "learning_rate": 0.0003870974808324206, "loss": 0.8698, "step": 3107 }, { "epoch": 0.21058515638895242, "grad_norm": 3.6142637729644775, "learning_rate": 0.00038709200438116103, "loss": 0.9384, "step": 3108 }, { "epoch": 0.2106529122307764, "grad_norm": 3.942026138305664, "learning_rate": 0.00038708652792990143, "loss": 1.0713, "step": 3109 }, { "epoch": 0.2107206680726004, "grad_norm": 3.389601945877075, "learning_rate": 0.0003870810514786419, "loss": 0.9488, "step": 3110 }, { "epoch": 0.21078842391442437, "grad_norm": 2.8886373043060303, "learning_rate": 0.0003870755750273823, "loss": 1.0434, "step": 3111 }, { "epoch": 0.21085617975624835, "grad_norm": 2.563314199447632, "learning_rate": 0.0003870700985761227, "loss": 0.794, "step": 3112 }, { "epoch": 0.21092393559807235, "grad_norm": 2.9957382678985596, "learning_rate": 0.0003870646221248631, "loss": 0.7061, "step": 3113 }, { "epoch": 0.21099169143989632, "grad_norm": 4.0754170417785645, "learning_rate": 0.00038705914567360353, "loss": 0.9812, "step": 3114 }, { "epoch": 0.21105944728172032, "grad_norm": 3.1081702709198, "learning_rate": 0.00038705366922234393, "loss": 0.6549, "step": 3115 }, { "epoch": 0.2111272031235443, "grad_norm": 3.2139029502868652, "learning_rate": 0.0003870481927710844, "loss": 0.7877, "step": 3116 }, { "epoch": 0.2111949589653683, "grad_norm": 3.662554979324341, "learning_rate": 0.0003870427163198248, "loss": 1.1303, "step": 3117 }, { "epoch": 0.21126271480719228, "grad_norm": 4.457626819610596, "learning_rate": 0.0003870372398685652, "loss": 0.9923, "step": 3118 }, { "epoch": 0.21133047064901628, "grad_norm": 7.068685531616211, "learning_rate": 0.0003870317634173056, "loss": 0.941, "step": 3119 }, { "epoch": 0.21139822649084025, "grad_norm": 3.881685972213745, "learning_rate": 0.00038702628696604604, "loss": 1.0968, "step": 3120 }, { "epoch": 0.21146598233266425, "grad_norm": 4.029959201812744, "learning_rate": 0.00038702081051478644, "loss": 1.0996, "step": 3121 }, { "epoch": 0.21153373817448823, "grad_norm": 2.6053762435913086, "learning_rate": 0.00038701533406352683, "loss": 0.8367, "step": 3122 }, { "epoch": 0.21160149401631223, "grad_norm": 3.769785165786743, "learning_rate": 0.0003870098576122673, "loss": 0.9545, "step": 3123 }, { "epoch": 0.2116692498581362, "grad_norm": 3.5461783409118652, "learning_rate": 0.0003870043811610077, "loss": 0.9154, "step": 3124 }, { "epoch": 0.2117370056999602, "grad_norm": 4.105605125427246, "learning_rate": 0.0003869989047097481, "loss": 1.1228, "step": 3125 }, { "epoch": 0.21180476154178418, "grad_norm": 3.5911357402801514, "learning_rate": 0.00038699342825848854, "loss": 0.9285, "step": 3126 }, { "epoch": 0.21187251738360816, "grad_norm": 2.5789434909820557, "learning_rate": 0.00038698795180722894, "loss": 0.6774, "step": 3127 }, { "epoch": 0.21194027322543216, "grad_norm": 5.305942058563232, "learning_rate": 0.00038698247535596934, "loss": 0.8623, "step": 3128 }, { "epoch": 0.21200802906725613, "grad_norm": 3.409611225128174, "learning_rate": 0.00038697699890470974, "loss": 1.1521, "step": 3129 }, { "epoch": 0.21207578490908013, "grad_norm": 3.1284379959106445, "learning_rate": 0.00038697152245345014, "loss": 0.7601, "step": 3130 }, { "epoch": 0.2121435407509041, "grad_norm": 2.8303184509277344, "learning_rate": 0.00038696604600219064, "loss": 0.8204, "step": 3131 }, { "epoch": 0.2122112965927281, "grad_norm": 3.074784278869629, "learning_rate": 0.00038696056955093104, "loss": 0.7463, "step": 3132 }, { "epoch": 0.21227905243455208, "grad_norm": 4.0078229904174805, "learning_rate": 0.00038695509309967144, "loss": 0.995, "step": 3133 }, { "epoch": 0.2123468082763761, "grad_norm": 2.8091914653778076, "learning_rate": 0.00038694961664841184, "loss": 0.6638, "step": 3134 }, { "epoch": 0.21241456411820006, "grad_norm": 4.509549140930176, "learning_rate": 0.00038694414019715224, "loss": 0.9929, "step": 3135 }, { "epoch": 0.21248231996002406, "grad_norm": 3.168811321258545, "learning_rate": 0.0003869386637458927, "loss": 0.7937, "step": 3136 }, { "epoch": 0.21255007580184804, "grad_norm": 4.677186012268066, "learning_rate": 0.0003869331872946331, "loss": 0.9251, "step": 3137 }, { "epoch": 0.21261783164367204, "grad_norm": 3.096297264099121, "learning_rate": 0.0003869277108433735, "loss": 0.8592, "step": 3138 }, { "epoch": 0.212685587485496, "grad_norm": 4.172886371612549, "learning_rate": 0.00038692223439211395, "loss": 1.0343, "step": 3139 }, { "epoch": 0.21275334332732002, "grad_norm": 3.6096184253692627, "learning_rate": 0.00038691675794085435, "loss": 1.0737, "step": 3140 }, { "epoch": 0.212821099169144, "grad_norm": 3.596954345703125, "learning_rate": 0.00038691128148959475, "loss": 0.9474, "step": 3141 }, { "epoch": 0.21288885501096796, "grad_norm": 3.5519378185272217, "learning_rate": 0.0003869058050383352, "loss": 1.0144, "step": 3142 }, { "epoch": 0.21295661085279197, "grad_norm": 3.6861398220062256, "learning_rate": 0.0003869003285870756, "loss": 1.1177, "step": 3143 }, { "epoch": 0.21302436669461594, "grad_norm": 3.0142388343811035, "learning_rate": 0.000386894852135816, "loss": 1.0247, "step": 3144 }, { "epoch": 0.21309212253643994, "grad_norm": 2.605402708053589, "learning_rate": 0.0003868893756845564, "loss": 0.783, "step": 3145 }, { "epoch": 0.21315987837826392, "grad_norm": 4.018885612487793, "learning_rate": 0.0003868838992332968, "loss": 1.1006, "step": 3146 }, { "epoch": 0.21322763422008792, "grad_norm": 2.9617702960968018, "learning_rate": 0.00038687842278203725, "loss": 0.9044, "step": 3147 }, { "epoch": 0.2132953900619119, "grad_norm": 3.6294941902160645, "learning_rate": 0.0003868729463307777, "loss": 0.885, "step": 3148 }, { "epoch": 0.2133631459037359, "grad_norm": 4.113534450531006, "learning_rate": 0.0003868674698795181, "loss": 1.2714, "step": 3149 }, { "epoch": 0.21343090174555987, "grad_norm": 2.841132164001465, "learning_rate": 0.0003868619934282585, "loss": 0.9587, "step": 3150 }, { "epoch": 0.21349865758738387, "grad_norm": 3.4558990001678467, "learning_rate": 0.0003868565169769989, "loss": 0.9674, "step": 3151 }, { "epoch": 0.21356641342920785, "grad_norm": 3.669705629348755, "learning_rate": 0.00038685104052573935, "loss": 0.9453, "step": 3152 }, { "epoch": 0.21363416927103185, "grad_norm": 4.54177188873291, "learning_rate": 0.00038684556407447975, "loss": 0.9363, "step": 3153 }, { "epoch": 0.21370192511285582, "grad_norm": 5.341638565063477, "learning_rate": 0.0003868400876232202, "loss": 0.8942, "step": 3154 }, { "epoch": 0.21376968095467982, "grad_norm": 3.188917636871338, "learning_rate": 0.0003868346111719606, "loss": 0.9156, "step": 3155 }, { "epoch": 0.2138374367965038, "grad_norm": 2.9051613807678223, "learning_rate": 0.000386829134720701, "loss": 0.9587, "step": 3156 }, { "epoch": 0.21390519263832777, "grad_norm": 4.053856372833252, "learning_rate": 0.0003868236582694414, "loss": 1.0757, "step": 3157 }, { "epoch": 0.21397294848015178, "grad_norm": 3.427412509918213, "learning_rate": 0.00038681818181818186, "loss": 0.8125, "step": 3158 }, { "epoch": 0.21404070432197575, "grad_norm": 4.6371917724609375, "learning_rate": 0.00038681270536692226, "loss": 1.1889, "step": 3159 }, { "epoch": 0.21410846016379975, "grad_norm": 2.95896053314209, "learning_rate": 0.00038680722891566266, "loss": 0.8206, "step": 3160 }, { "epoch": 0.21417621600562373, "grad_norm": 3.255932569503784, "learning_rate": 0.00038680175246440305, "loss": 1.0005, "step": 3161 }, { "epoch": 0.21424397184744773, "grad_norm": 3.863858938217163, "learning_rate": 0.0003867962760131435, "loss": 0.9829, "step": 3162 }, { "epoch": 0.2143117276892717, "grad_norm": 3.808131217956543, "learning_rate": 0.0003867907995618839, "loss": 1.1583, "step": 3163 }, { "epoch": 0.2143794835310957, "grad_norm": 3.7781200408935547, "learning_rate": 0.00038678532311062436, "loss": 0.732, "step": 3164 }, { "epoch": 0.21444723937291968, "grad_norm": 2.9623594284057617, "learning_rate": 0.00038677984665936476, "loss": 0.8591, "step": 3165 }, { "epoch": 0.21451499521474368, "grad_norm": 2.8965985774993896, "learning_rate": 0.00038677437020810516, "loss": 0.9441, "step": 3166 }, { "epoch": 0.21458275105656766, "grad_norm": 2.694587469100952, "learning_rate": 0.00038676889375684556, "loss": 0.8323, "step": 3167 }, { "epoch": 0.21465050689839166, "grad_norm": 2.5995397567749023, "learning_rate": 0.00038676341730558596, "loss": 0.7472, "step": 3168 }, { "epoch": 0.21471826274021563, "grad_norm": 3.668618679046631, "learning_rate": 0.0003867579408543264, "loss": 0.7061, "step": 3169 }, { "epoch": 0.21478601858203963, "grad_norm": 4.599798202514648, "learning_rate": 0.00038675246440306686, "loss": 0.9915, "step": 3170 }, { "epoch": 0.2148537744238636, "grad_norm": 3.325688362121582, "learning_rate": 0.00038674698795180726, "loss": 0.9439, "step": 3171 }, { "epoch": 0.21492153026568758, "grad_norm": 2.875779151916504, "learning_rate": 0.00038674151150054766, "loss": 0.6822, "step": 3172 }, { "epoch": 0.21498928610751158, "grad_norm": 3.5521769523620605, "learning_rate": 0.00038673603504928806, "loss": 0.8075, "step": 3173 }, { "epoch": 0.21505704194933556, "grad_norm": 4.692173004150391, "learning_rate": 0.0003867305585980285, "loss": 1.2805, "step": 3174 }, { "epoch": 0.21512479779115956, "grad_norm": 3.8736305236816406, "learning_rate": 0.0003867250821467689, "loss": 0.8393, "step": 3175 }, { "epoch": 0.21519255363298354, "grad_norm": 3.734736204147339, "learning_rate": 0.0003867196056955093, "loss": 1.0848, "step": 3176 }, { "epoch": 0.21526030947480754, "grad_norm": 2.8379523754119873, "learning_rate": 0.0003867141292442497, "loss": 0.8798, "step": 3177 }, { "epoch": 0.2153280653166315, "grad_norm": 2.8278801441192627, "learning_rate": 0.00038670865279299017, "loss": 0.8569, "step": 3178 }, { "epoch": 0.2153958211584555, "grad_norm": 3.4038522243499756, "learning_rate": 0.00038670317634173057, "loss": 0.8173, "step": 3179 }, { "epoch": 0.2154635770002795, "grad_norm": 3.670212507247925, "learning_rate": 0.000386697699890471, "loss": 0.9258, "step": 3180 }, { "epoch": 0.2155313328421035, "grad_norm": 2.3656914234161377, "learning_rate": 0.0003866922234392114, "loss": 0.6878, "step": 3181 }, { "epoch": 0.21559908868392746, "grad_norm": 3.1545722484588623, "learning_rate": 0.0003866867469879518, "loss": 0.964, "step": 3182 }, { "epoch": 0.21566684452575147, "grad_norm": 3.670808792114258, "learning_rate": 0.0003866812705366922, "loss": 1.0518, "step": 3183 }, { "epoch": 0.21573460036757544, "grad_norm": 2.640873908996582, "learning_rate": 0.0003866757940854326, "loss": 0.7049, "step": 3184 }, { "epoch": 0.21580235620939944, "grad_norm": 3.5294442176818848, "learning_rate": 0.00038667031763417307, "loss": 0.8125, "step": 3185 }, { "epoch": 0.21587011205122342, "grad_norm": 2.996356725692749, "learning_rate": 0.0003866648411829135, "loss": 1.0532, "step": 3186 }, { "epoch": 0.2159378678930474, "grad_norm": 3.573580026626587, "learning_rate": 0.0003866593647316539, "loss": 0.8542, "step": 3187 }, { "epoch": 0.2160056237348714, "grad_norm": 4.565465450286865, "learning_rate": 0.0003866538882803943, "loss": 1.0028, "step": 3188 }, { "epoch": 0.21607337957669537, "grad_norm": 3.44734525680542, "learning_rate": 0.0003866484118291347, "loss": 1.1246, "step": 3189 }, { "epoch": 0.21614113541851937, "grad_norm": 2.922597646713257, "learning_rate": 0.0003866429353778752, "loss": 0.6692, "step": 3190 }, { "epoch": 0.21620889126034334, "grad_norm": 6.10439395904541, "learning_rate": 0.0003866374589266156, "loss": 0.9091, "step": 3191 }, { "epoch": 0.21627664710216735, "grad_norm": 3.820887327194214, "learning_rate": 0.00038663198247535597, "loss": 0.8253, "step": 3192 }, { "epoch": 0.21634440294399132, "grad_norm": 3.5457265377044678, "learning_rate": 0.0003866265060240964, "loss": 0.9971, "step": 3193 }, { "epoch": 0.21641215878581532, "grad_norm": 5.0301361083984375, "learning_rate": 0.0003866210295728368, "loss": 0.8361, "step": 3194 }, { "epoch": 0.2164799146276393, "grad_norm": 4.070746421813965, "learning_rate": 0.0003866155531215772, "loss": 1.129, "step": 3195 }, { "epoch": 0.2165476704694633, "grad_norm": 3.0056049823760986, "learning_rate": 0.0003866100766703177, "loss": 0.8028, "step": 3196 }, { "epoch": 0.21661542631128727, "grad_norm": 3.5070459842681885, "learning_rate": 0.0003866046002190581, "loss": 1.0661, "step": 3197 }, { "epoch": 0.21668318215311128, "grad_norm": 4.141531944274902, "learning_rate": 0.0003865991237677985, "loss": 1.0264, "step": 3198 }, { "epoch": 0.21675093799493525, "grad_norm": 3.307365655899048, "learning_rate": 0.0003865936473165389, "loss": 0.8389, "step": 3199 }, { "epoch": 0.21681869383675922, "grad_norm": 3.986813545227051, "learning_rate": 0.0003865881708652793, "loss": 0.8368, "step": 3200 }, { "epoch": 0.21688644967858323, "grad_norm": 3.709231376647949, "learning_rate": 0.00038658269441401973, "loss": 1.0078, "step": 3201 }, { "epoch": 0.2169542055204072, "grad_norm": 3.3721349239349365, "learning_rate": 0.0003865772179627602, "loss": 0.9174, "step": 3202 }, { "epoch": 0.2170219613622312, "grad_norm": 3.6890652179718018, "learning_rate": 0.0003865717415115006, "loss": 0.9724, "step": 3203 }, { "epoch": 0.21708971720405518, "grad_norm": 3.4990837574005127, "learning_rate": 0.000386566265060241, "loss": 0.9037, "step": 3204 }, { "epoch": 0.21715747304587918, "grad_norm": 3.7443063259124756, "learning_rate": 0.0003865607886089814, "loss": 0.9933, "step": 3205 }, { "epoch": 0.21722522888770315, "grad_norm": 4.51320219039917, "learning_rate": 0.0003865553121577218, "loss": 1.2428, "step": 3206 }, { "epoch": 0.21729298472952716, "grad_norm": 3.54730224609375, "learning_rate": 0.00038654983570646223, "loss": 0.8428, "step": 3207 }, { "epoch": 0.21736074057135113, "grad_norm": 2.9358415603637695, "learning_rate": 0.00038654435925520263, "loss": 0.7421, "step": 3208 }, { "epoch": 0.21742849641317513, "grad_norm": 3.6926915645599365, "learning_rate": 0.0003865388828039431, "loss": 0.8903, "step": 3209 }, { "epoch": 0.2174962522549991, "grad_norm": 3.064396858215332, "learning_rate": 0.0003865334063526835, "loss": 0.7064, "step": 3210 }, { "epoch": 0.2175640080968231, "grad_norm": 3.6823248863220215, "learning_rate": 0.0003865279299014239, "loss": 0.8738, "step": 3211 }, { "epoch": 0.21763176393864708, "grad_norm": 3.0942206382751465, "learning_rate": 0.00038652245345016434, "loss": 0.865, "step": 3212 }, { "epoch": 0.21769951978047108, "grad_norm": 2.995757818222046, "learning_rate": 0.00038651697699890474, "loss": 0.8362, "step": 3213 }, { "epoch": 0.21776727562229506, "grad_norm": 3.0151493549346924, "learning_rate": 0.00038651150054764513, "loss": 0.7578, "step": 3214 }, { "epoch": 0.21783503146411903, "grad_norm": 3.0874853134155273, "learning_rate": 0.00038650602409638553, "loss": 0.683, "step": 3215 }, { "epoch": 0.21790278730594304, "grad_norm": 3.3230679035186768, "learning_rate": 0.000386500547645126, "loss": 1.1995, "step": 3216 }, { "epoch": 0.217970543147767, "grad_norm": 3.588575601577759, "learning_rate": 0.0003864950711938664, "loss": 0.6937, "step": 3217 }, { "epoch": 0.218038298989591, "grad_norm": 4.005873203277588, "learning_rate": 0.00038648959474260684, "loss": 0.8626, "step": 3218 }, { "epoch": 0.21810605483141499, "grad_norm": 3.6696572303771973, "learning_rate": 0.00038648411829134724, "loss": 1.0165, "step": 3219 }, { "epoch": 0.218173810673239, "grad_norm": 3.4730069637298584, "learning_rate": 0.00038647864184008764, "loss": 0.8819, "step": 3220 }, { "epoch": 0.21824156651506296, "grad_norm": 3.596906900405884, "learning_rate": 0.00038647316538882804, "loss": 0.9022, "step": 3221 }, { "epoch": 0.21830932235688696, "grad_norm": 5.192864894866943, "learning_rate": 0.00038646768893756844, "loss": 1.1926, "step": 3222 }, { "epoch": 0.21837707819871094, "grad_norm": 4.08858585357666, "learning_rate": 0.0003864622124863089, "loss": 1.1517, "step": 3223 }, { "epoch": 0.21844483404053494, "grad_norm": 3.3537964820861816, "learning_rate": 0.00038645673603504934, "loss": 0.8688, "step": 3224 }, { "epoch": 0.21851258988235892, "grad_norm": 3.6729698181152344, "learning_rate": 0.00038645125958378974, "loss": 0.9936, "step": 3225 }, { "epoch": 0.21858034572418292, "grad_norm": 4.167658805847168, "learning_rate": 0.00038644578313253014, "loss": 1.1169, "step": 3226 }, { "epoch": 0.2186481015660069, "grad_norm": 2.9464125633239746, "learning_rate": 0.00038644030668127054, "loss": 0.8648, "step": 3227 }, { "epoch": 0.2187158574078309, "grad_norm": 4.127668380737305, "learning_rate": 0.000386434830230011, "loss": 1.1282, "step": 3228 }, { "epoch": 0.21878361324965487, "grad_norm": 3.5825445652008057, "learning_rate": 0.0003864293537787514, "loss": 0.8803, "step": 3229 }, { "epoch": 0.21885136909147884, "grad_norm": 3.144740343093872, "learning_rate": 0.0003864238773274918, "loss": 0.8019, "step": 3230 }, { "epoch": 0.21891912493330284, "grad_norm": 3.5088999271392822, "learning_rate": 0.0003864184008762322, "loss": 0.9729, "step": 3231 }, { "epoch": 0.21898688077512682, "grad_norm": 4.277069568634033, "learning_rate": 0.00038641292442497265, "loss": 1.2088, "step": 3232 }, { "epoch": 0.21905463661695082, "grad_norm": 3.9226300716400146, "learning_rate": 0.00038640744797371304, "loss": 0.8578, "step": 3233 }, { "epoch": 0.2191223924587748, "grad_norm": 3.2542054653167725, "learning_rate": 0.0003864019715224535, "loss": 0.8491, "step": 3234 }, { "epoch": 0.2191901483005988, "grad_norm": 4.1487555503845215, "learning_rate": 0.0003863964950711939, "loss": 0.937, "step": 3235 }, { "epoch": 0.21925790414242277, "grad_norm": 3.1875462532043457, "learning_rate": 0.0003863910186199343, "loss": 0.8816, "step": 3236 }, { "epoch": 0.21932565998424677, "grad_norm": 3.487698554992676, "learning_rate": 0.0003863855421686747, "loss": 0.8083, "step": 3237 }, { "epoch": 0.21939341582607075, "grad_norm": 2.930428981781006, "learning_rate": 0.0003863800657174151, "loss": 0.8553, "step": 3238 }, { "epoch": 0.21946117166789475, "grad_norm": 3.352018356323242, "learning_rate": 0.00038637458926615555, "loss": 1.0124, "step": 3239 }, { "epoch": 0.21952892750971872, "grad_norm": 3.139572858810425, "learning_rate": 0.000386369112814896, "loss": 0.9454, "step": 3240 }, { "epoch": 0.21959668335154273, "grad_norm": 3.50776743888855, "learning_rate": 0.0003863636363636364, "loss": 1.0861, "step": 3241 }, { "epoch": 0.2196644391933667, "grad_norm": 4.472263813018799, "learning_rate": 0.0003863581599123768, "loss": 1.1886, "step": 3242 }, { "epoch": 0.2197321950351907, "grad_norm": 3.2098097801208496, "learning_rate": 0.0003863526834611172, "loss": 0.759, "step": 3243 }, { "epoch": 0.21979995087701468, "grad_norm": 3.656064748764038, "learning_rate": 0.0003863472070098576, "loss": 0.7931, "step": 3244 }, { "epoch": 0.21986770671883865, "grad_norm": 3.4928789138793945, "learning_rate": 0.00038634173055859805, "loss": 1.1735, "step": 3245 }, { "epoch": 0.21993546256066265, "grad_norm": 3.3951058387756348, "learning_rate": 0.00038633625410733845, "loss": 0.8016, "step": 3246 }, { "epoch": 0.22000321840248663, "grad_norm": 5.2316508293151855, "learning_rate": 0.0003863307776560789, "loss": 1.4139, "step": 3247 }, { "epoch": 0.22007097424431063, "grad_norm": 4.763803958892822, "learning_rate": 0.0003863253012048193, "loss": 1.0731, "step": 3248 }, { "epoch": 0.2201387300861346, "grad_norm": 3.7690165042877197, "learning_rate": 0.0003863198247535597, "loss": 0.8311, "step": 3249 }, { "epoch": 0.2202064859279586, "grad_norm": 3.196415424346924, "learning_rate": 0.00038631434830230016, "loss": 0.8192, "step": 3250 }, { "epoch": 0.22027424176978258, "grad_norm": 3.4688775539398193, "learning_rate": 0.00038630887185104056, "loss": 0.8734, "step": 3251 }, { "epoch": 0.22034199761160658, "grad_norm": 2.9779715538024902, "learning_rate": 0.00038630339539978096, "loss": 0.9098, "step": 3252 }, { "epoch": 0.22040975345343056, "grad_norm": 4.155899524688721, "learning_rate": 0.00038629791894852135, "loss": 1.1303, "step": 3253 }, { "epoch": 0.22047750929525456, "grad_norm": 2.8792154788970947, "learning_rate": 0.00038629244249726175, "loss": 0.8388, "step": 3254 }, { "epoch": 0.22054526513707853, "grad_norm": 3.0970845222473145, "learning_rate": 0.0003862869660460022, "loss": 0.7923, "step": 3255 }, { "epoch": 0.22061302097890254, "grad_norm": 2.97513747215271, "learning_rate": 0.00038628148959474266, "loss": 0.8896, "step": 3256 }, { "epoch": 0.2206807768207265, "grad_norm": 3.0410900115966797, "learning_rate": 0.00038627601314348306, "loss": 0.9037, "step": 3257 }, { "epoch": 0.2207485326625505, "grad_norm": 4.922347545623779, "learning_rate": 0.00038627053669222346, "loss": 1.2886, "step": 3258 }, { "epoch": 0.22081628850437449, "grad_norm": 3.047973155975342, "learning_rate": 0.00038626506024096386, "loss": 0.8655, "step": 3259 }, { "epoch": 0.22088404434619846, "grad_norm": 4.60847806930542, "learning_rate": 0.00038625958378970426, "loss": 0.928, "step": 3260 }, { "epoch": 0.22095180018802246, "grad_norm": 3.3728489875793457, "learning_rate": 0.0003862541073384447, "loss": 0.9607, "step": 3261 }, { "epoch": 0.22101955602984644, "grad_norm": 4.806407928466797, "learning_rate": 0.0003862486308871851, "loss": 1.2125, "step": 3262 }, { "epoch": 0.22108731187167044, "grad_norm": 3.5676422119140625, "learning_rate": 0.00038624315443592556, "loss": 0.8729, "step": 3263 }, { "epoch": 0.2211550677134944, "grad_norm": 3.9356565475463867, "learning_rate": 0.00038623767798466596, "loss": 0.9958, "step": 3264 }, { "epoch": 0.22122282355531842, "grad_norm": 2.7237555980682373, "learning_rate": 0.00038623220153340636, "loss": 0.7361, "step": 3265 }, { "epoch": 0.2212905793971424, "grad_norm": 3.026766300201416, "learning_rate": 0.0003862267250821468, "loss": 0.9174, "step": 3266 }, { "epoch": 0.2213583352389664, "grad_norm": 3.4095518589019775, "learning_rate": 0.0003862212486308872, "loss": 1.0723, "step": 3267 }, { "epoch": 0.22142609108079037, "grad_norm": 5.167072296142578, "learning_rate": 0.0003862157721796276, "loss": 0.8879, "step": 3268 }, { "epoch": 0.22149384692261437, "grad_norm": 2.764686107635498, "learning_rate": 0.000386210295728368, "loss": 0.7583, "step": 3269 }, { "epoch": 0.22156160276443834, "grad_norm": 4.608572483062744, "learning_rate": 0.0003862048192771084, "loss": 1.243, "step": 3270 }, { "epoch": 0.22162935860626234, "grad_norm": 3.428542375564575, "learning_rate": 0.00038619934282584887, "loss": 0.8606, "step": 3271 }, { "epoch": 0.22169711444808632, "grad_norm": 3.8383169174194336, "learning_rate": 0.0003861938663745893, "loss": 1.0265, "step": 3272 }, { "epoch": 0.22176487028991032, "grad_norm": 3.5066847801208496, "learning_rate": 0.0003861883899233297, "loss": 0.9652, "step": 3273 }, { "epoch": 0.2218326261317343, "grad_norm": 3.234522819519043, "learning_rate": 0.0003861829134720701, "loss": 0.986, "step": 3274 }, { "epoch": 0.22190038197355827, "grad_norm": 3.424370765686035, "learning_rate": 0.0003861774370208105, "loss": 0.9005, "step": 3275 }, { "epoch": 0.22196813781538227, "grad_norm": 3.1895580291748047, "learning_rate": 0.0003861719605695509, "loss": 0.8121, "step": 3276 }, { "epoch": 0.22203589365720625, "grad_norm": 3.1825530529022217, "learning_rate": 0.00038616648411829137, "loss": 0.9125, "step": 3277 }, { "epoch": 0.22210364949903025, "grad_norm": 3.2350778579711914, "learning_rate": 0.00038616100766703177, "loss": 0.9312, "step": 3278 }, { "epoch": 0.22217140534085422, "grad_norm": 3.469127655029297, "learning_rate": 0.0003861555312157722, "loss": 0.9424, "step": 3279 }, { "epoch": 0.22223916118267822, "grad_norm": 2.920137882232666, "learning_rate": 0.0003861500547645126, "loss": 0.7592, "step": 3280 }, { "epoch": 0.2223069170245022, "grad_norm": 3.4994685649871826, "learning_rate": 0.000386144578313253, "loss": 1.076, "step": 3281 }, { "epoch": 0.2223746728663262, "grad_norm": 3.880659818649292, "learning_rate": 0.0003861391018619934, "loss": 0.9621, "step": 3282 }, { "epoch": 0.22244242870815017, "grad_norm": 4.283945083618164, "learning_rate": 0.0003861336254107339, "loss": 1.321, "step": 3283 }, { "epoch": 0.22251018454997418, "grad_norm": 3.0589187145233154, "learning_rate": 0.00038612814895947427, "loss": 0.9018, "step": 3284 }, { "epoch": 0.22257794039179815, "grad_norm": 3.9865033626556396, "learning_rate": 0.00038612267250821467, "loss": 1.0113, "step": 3285 }, { "epoch": 0.22264569623362215, "grad_norm": 3.5975570678710938, "learning_rate": 0.0003861171960569551, "loss": 1.2628, "step": 3286 }, { "epoch": 0.22271345207544613, "grad_norm": 3.0980255603790283, "learning_rate": 0.0003861117196056955, "loss": 0.9407, "step": 3287 }, { "epoch": 0.22278120791727013, "grad_norm": 3.8600916862487793, "learning_rate": 0.000386106243154436, "loss": 0.9274, "step": 3288 }, { "epoch": 0.2228489637590941, "grad_norm": 3.670548439025879, "learning_rate": 0.0003861007667031764, "loss": 0.8947, "step": 3289 }, { "epoch": 0.22291671960091808, "grad_norm": 3.208301067352295, "learning_rate": 0.0003860952902519168, "loss": 0.7756, "step": 3290 }, { "epoch": 0.22298447544274208, "grad_norm": 2.8278963565826416, "learning_rate": 0.0003860898138006572, "loss": 0.6993, "step": 3291 }, { "epoch": 0.22305223128456605, "grad_norm": 9.34854793548584, "learning_rate": 0.0003860843373493976, "loss": 0.9927, "step": 3292 }, { "epoch": 0.22311998712639006, "grad_norm": 3.388206720352173, "learning_rate": 0.00038607886089813803, "loss": 1.0319, "step": 3293 }, { "epoch": 0.22318774296821403, "grad_norm": 4.022616863250732, "learning_rate": 0.0003860733844468785, "loss": 0.9612, "step": 3294 }, { "epoch": 0.22325549881003803, "grad_norm": 3.1215081214904785, "learning_rate": 0.0003860679079956189, "loss": 0.824, "step": 3295 }, { "epoch": 0.223323254651862, "grad_norm": 2.60457181930542, "learning_rate": 0.0003860624315443593, "loss": 0.6243, "step": 3296 }, { "epoch": 0.223391010493686, "grad_norm": 2.602731704711914, "learning_rate": 0.0003860569550930997, "loss": 0.8362, "step": 3297 }, { "epoch": 0.22345876633550998, "grad_norm": 4.482247829437256, "learning_rate": 0.0003860514786418401, "loss": 0.9832, "step": 3298 }, { "epoch": 0.22352652217733399, "grad_norm": 3.846708059310913, "learning_rate": 0.00038604600219058053, "loss": 1.0121, "step": 3299 }, { "epoch": 0.22359427801915796, "grad_norm": 4.125100135803223, "learning_rate": 0.00038604052573932093, "loss": 1.015, "step": 3300 }, { "epoch": 0.22366203386098196, "grad_norm": 3.5581533908843994, "learning_rate": 0.00038603504928806133, "loss": 1.0234, "step": 3301 }, { "epoch": 0.22372978970280594, "grad_norm": 3.0680363178253174, "learning_rate": 0.0003860295728368018, "loss": 0.8798, "step": 3302 }, { "epoch": 0.22379754554462994, "grad_norm": 4.137269496917725, "learning_rate": 0.0003860240963855422, "loss": 1.146, "step": 3303 }, { "epoch": 0.2238653013864539, "grad_norm": 3.1315622329711914, "learning_rate": 0.00038601861993428264, "loss": 0.8505, "step": 3304 }, { "epoch": 0.2239330572282779, "grad_norm": 2.8968334197998047, "learning_rate": 0.00038601314348302304, "loss": 0.7175, "step": 3305 }, { "epoch": 0.2240008130701019, "grad_norm": 2.7565760612487793, "learning_rate": 0.00038600766703176343, "loss": 0.7164, "step": 3306 }, { "epoch": 0.22406856891192586, "grad_norm": 4.303454875946045, "learning_rate": 0.00038600219058050383, "loss": 1.217, "step": 3307 }, { "epoch": 0.22413632475374987, "grad_norm": 3.8144803047180176, "learning_rate": 0.00038599671412924423, "loss": 0.7857, "step": 3308 }, { "epoch": 0.22420408059557384, "grad_norm": 5.346485137939453, "learning_rate": 0.0003859912376779847, "loss": 1.0353, "step": 3309 }, { "epoch": 0.22427183643739784, "grad_norm": 3.725198984146118, "learning_rate": 0.00038598576122672514, "loss": 0.8318, "step": 3310 }, { "epoch": 0.22433959227922182, "grad_norm": 4.739047527313232, "learning_rate": 0.00038598028477546554, "loss": 1.013, "step": 3311 }, { "epoch": 0.22440734812104582, "grad_norm": 3.1888105869293213, "learning_rate": 0.00038597480832420594, "loss": 0.7755, "step": 3312 }, { "epoch": 0.2244751039628698, "grad_norm": 3.478789806365967, "learning_rate": 0.00038596933187294634, "loss": 0.7876, "step": 3313 }, { "epoch": 0.2245428598046938, "grad_norm": 3.094334363937378, "learning_rate": 0.00038596385542168674, "loss": 0.8198, "step": 3314 }, { "epoch": 0.22461061564651777, "grad_norm": 3.139134407043457, "learning_rate": 0.0003859583789704272, "loss": 0.6934, "step": 3315 }, { "epoch": 0.22467837148834177, "grad_norm": 4.222721099853516, "learning_rate": 0.0003859529025191676, "loss": 0.995, "step": 3316 }, { "epoch": 0.22474612733016575, "grad_norm": 6.486714839935303, "learning_rate": 0.00038594742606790804, "loss": 1.092, "step": 3317 }, { "epoch": 0.22481388317198975, "grad_norm": 5.751864910125732, "learning_rate": 0.00038594194961664844, "loss": 1.3278, "step": 3318 }, { "epoch": 0.22488163901381372, "grad_norm": 5.510232448577881, "learning_rate": 0.00038593647316538884, "loss": 0.9396, "step": 3319 }, { "epoch": 0.2249493948556377, "grad_norm": 3.875171422958374, "learning_rate": 0.00038593099671412924, "loss": 0.9943, "step": 3320 }, { "epoch": 0.2250171506974617, "grad_norm": 4.330343246459961, "learning_rate": 0.0003859255202628697, "loss": 0.9082, "step": 3321 }, { "epoch": 0.22508490653928567, "grad_norm": 3.1461009979248047, "learning_rate": 0.0003859200438116101, "loss": 0.8741, "step": 3322 }, { "epoch": 0.22515266238110968, "grad_norm": 3.735506772994995, "learning_rate": 0.0003859145673603505, "loss": 1.1276, "step": 3323 }, { "epoch": 0.22522041822293365, "grad_norm": 3.0065128803253174, "learning_rate": 0.0003859090909090909, "loss": 0.8093, "step": 3324 }, { "epoch": 0.22528817406475765, "grad_norm": 4.059788703918457, "learning_rate": 0.00038590361445783134, "loss": 1.1634, "step": 3325 }, { "epoch": 0.22535592990658163, "grad_norm": 3.328057289123535, "learning_rate": 0.0003858981380065718, "loss": 1.0669, "step": 3326 }, { "epoch": 0.22542368574840563, "grad_norm": 3.5306568145751953, "learning_rate": 0.0003858926615553122, "loss": 0.9011, "step": 3327 }, { "epoch": 0.2254914415902296, "grad_norm": 3.616150140762329, "learning_rate": 0.0003858871851040526, "loss": 0.7039, "step": 3328 }, { "epoch": 0.2255591974320536, "grad_norm": 3.138782262802124, "learning_rate": 0.000385881708652793, "loss": 0.8822, "step": 3329 }, { "epoch": 0.22562695327387758, "grad_norm": 2.966705560684204, "learning_rate": 0.0003858762322015334, "loss": 0.6991, "step": 3330 }, { "epoch": 0.22569470911570158, "grad_norm": 2.5443427562713623, "learning_rate": 0.00038587075575027385, "loss": 0.765, "step": 3331 }, { "epoch": 0.22576246495752555, "grad_norm": 3.599168300628662, "learning_rate": 0.00038586527929901425, "loss": 1.0406, "step": 3332 }, { "epoch": 0.22583022079934956, "grad_norm": 3.751220226287842, "learning_rate": 0.0003858598028477547, "loss": 1.2506, "step": 3333 }, { "epoch": 0.22589797664117353, "grad_norm": 4.456590175628662, "learning_rate": 0.0003858543263964951, "loss": 1.2845, "step": 3334 }, { "epoch": 0.2259657324829975, "grad_norm": 4.015080451965332, "learning_rate": 0.0003858488499452355, "loss": 1.0018, "step": 3335 }, { "epoch": 0.2260334883248215, "grad_norm": 3.462250232696533, "learning_rate": 0.0003858433734939759, "loss": 0.9436, "step": 3336 }, { "epoch": 0.22610124416664548, "grad_norm": 4.5326385498046875, "learning_rate": 0.00038583789704271635, "loss": 0.9292, "step": 3337 }, { "epoch": 0.22616900000846948, "grad_norm": 2.59982967376709, "learning_rate": 0.00038583242059145675, "loss": 0.6989, "step": 3338 }, { "epoch": 0.22623675585029346, "grad_norm": 4.161834716796875, "learning_rate": 0.00038582694414019715, "loss": 0.968, "step": 3339 }, { "epoch": 0.22630451169211746, "grad_norm": 3.513258218765259, "learning_rate": 0.00038582146768893755, "loss": 1.1126, "step": 3340 }, { "epoch": 0.22637226753394143, "grad_norm": 3.807417869567871, "learning_rate": 0.000385815991237678, "loss": 0.9582, "step": 3341 }, { "epoch": 0.22644002337576544, "grad_norm": 2.849374532699585, "learning_rate": 0.00038581051478641846, "loss": 0.6842, "step": 3342 }, { "epoch": 0.2265077792175894, "grad_norm": 2.980308771133423, "learning_rate": 0.00038580503833515886, "loss": 0.8717, "step": 3343 }, { "epoch": 0.2265755350594134, "grad_norm": 5.147204399108887, "learning_rate": 0.00038579956188389926, "loss": 1.1525, "step": 3344 }, { "epoch": 0.2266432909012374, "grad_norm": 2.9929161071777344, "learning_rate": 0.00038579408543263965, "loss": 0.7712, "step": 3345 }, { "epoch": 0.2267110467430614, "grad_norm": 17.001392364501953, "learning_rate": 0.00038578860898138005, "loss": 0.9209, "step": 3346 }, { "epoch": 0.22677880258488536, "grad_norm": 90.29581451416016, "learning_rate": 0.0003857831325301205, "loss": 4.8926, "step": 3347 }, { "epoch": 0.22684655842670937, "grad_norm": 2.4456446170806885, "learning_rate": 0.00038577765607886096, "loss": 0.6685, "step": 3348 }, { "epoch": 0.22691431426853334, "grad_norm": 3.6926991939544678, "learning_rate": 0.00038577217962760136, "loss": 1.1418, "step": 3349 }, { "epoch": 0.22698207011035731, "grad_norm": 3.314230442047119, "learning_rate": 0.00038576670317634176, "loss": 0.9646, "step": 3350 }, { "epoch": 0.22704982595218132, "grad_norm": 4.015190601348877, "learning_rate": 0.00038576122672508216, "loss": 1.0425, "step": 3351 }, { "epoch": 0.2271175817940053, "grad_norm": 2.56612491607666, "learning_rate": 0.00038575575027382256, "loss": 0.7559, "step": 3352 }, { "epoch": 0.2271853376358293, "grad_norm": 3.4861040115356445, "learning_rate": 0.000385750273822563, "loss": 0.7995, "step": 3353 }, { "epoch": 0.22725309347765327, "grad_norm": 2.8572921752929688, "learning_rate": 0.0003857447973713034, "loss": 0.9158, "step": 3354 }, { "epoch": 0.22732084931947727, "grad_norm": 2.811467409133911, "learning_rate": 0.0003857393209200438, "loss": 0.772, "step": 3355 }, { "epoch": 0.22738860516130124, "grad_norm": 2.514394521713257, "learning_rate": 0.00038573384446878426, "loss": 0.7494, "step": 3356 }, { "epoch": 0.22745636100312525, "grad_norm": 2.8694980144500732, "learning_rate": 0.00038572836801752466, "loss": 0.8199, "step": 3357 }, { "epoch": 0.22752411684494922, "grad_norm": 2.919438362121582, "learning_rate": 0.00038572289156626506, "loss": 0.7662, "step": 3358 }, { "epoch": 0.22759187268677322, "grad_norm": 2.8485865592956543, "learning_rate": 0.0003857174151150055, "loss": 0.8239, "step": 3359 }, { "epoch": 0.2276596285285972, "grad_norm": 3.329538345336914, "learning_rate": 0.0003857119386637459, "loss": 0.9703, "step": 3360 }, { "epoch": 0.2277273843704212, "grad_norm": 6.457708835601807, "learning_rate": 0.0003857064622124863, "loss": 0.7194, "step": 3361 }, { "epoch": 0.22779514021224517, "grad_norm": 3.5136470794677734, "learning_rate": 0.0003857009857612267, "loss": 1.0188, "step": 3362 }, { "epoch": 0.22786289605406918, "grad_norm": 3.066953182220459, "learning_rate": 0.00038569550930996717, "loss": 0.9819, "step": 3363 }, { "epoch": 0.22793065189589315, "grad_norm": 3.5111289024353027, "learning_rate": 0.0003856900328587076, "loss": 0.9593, "step": 3364 }, { "epoch": 0.22799840773771712, "grad_norm": 3.7057344913482666, "learning_rate": 0.000385684556407448, "loss": 0.9064, "step": 3365 }, { "epoch": 0.22806616357954113, "grad_norm": 4.136794567108154, "learning_rate": 0.0003856790799561884, "loss": 0.8064, "step": 3366 }, { "epoch": 0.2281339194213651, "grad_norm": 5.08695650100708, "learning_rate": 0.0003856736035049288, "loss": 0.8585, "step": 3367 }, { "epoch": 0.2282016752631891, "grad_norm": 3.313508987426758, "learning_rate": 0.0003856681270536692, "loss": 0.9318, "step": 3368 }, { "epoch": 0.22826943110501308, "grad_norm": 3.9543840885162354, "learning_rate": 0.00038566265060240967, "loss": 0.9329, "step": 3369 }, { "epoch": 0.22833718694683708, "grad_norm": 4.701480388641357, "learning_rate": 0.00038565717415115007, "loss": 0.8461, "step": 3370 }, { "epoch": 0.22840494278866105, "grad_norm": 3.0694451332092285, "learning_rate": 0.00038565169769989047, "loss": 0.8549, "step": 3371 }, { "epoch": 0.22847269863048505, "grad_norm": 4.941560745239258, "learning_rate": 0.0003856462212486309, "loss": 0.9822, "step": 3372 }, { "epoch": 0.22854045447230903, "grad_norm": 3.8070406913757324, "learning_rate": 0.0003856407447973713, "loss": 1.073, "step": 3373 }, { "epoch": 0.22860821031413303, "grad_norm": 2.937474012374878, "learning_rate": 0.0003856352683461117, "loss": 0.8574, "step": 3374 }, { "epoch": 0.228675966155957, "grad_norm": 3.4327750205993652, "learning_rate": 0.00038562979189485217, "loss": 0.9061, "step": 3375 }, { "epoch": 0.228743721997781, "grad_norm": 3.4981727600097656, "learning_rate": 0.00038562431544359257, "loss": 0.9346, "step": 3376 }, { "epoch": 0.22881147783960498, "grad_norm": 3.516223430633545, "learning_rate": 0.00038561883899233297, "loss": 0.8493, "step": 3377 }, { "epoch": 0.22887923368142898, "grad_norm": 3.2403836250305176, "learning_rate": 0.00038561336254107337, "loss": 1.0263, "step": 3378 }, { "epoch": 0.22894698952325296, "grad_norm": 3.306828737258911, "learning_rate": 0.0003856078860898138, "loss": 0.9544, "step": 3379 }, { "epoch": 0.22901474536507693, "grad_norm": 2.5655391216278076, "learning_rate": 0.0003856024096385543, "loss": 0.7852, "step": 3380 }, { "epoch": 0.22908250120690093, "grad_norm": 2.8811683654785156, "learning_rate": 0.0003855969331872947, "loss": 0.8604, "step": 3381 }, { "epoch": 0.2291502570487249, "grad_norm": 4.709669589996338, "learning_rate": 0.0003855914567360351, "loss": 0.9792, "step": 3382 }, { "epoch": 0.2292180128905489, "grad_norm": 3.180150032043457, "learning_rate": 0.0003855859802847755, "loss": 0.9009, "step": 3383 }, { "epoch": 0.22928576873237289, "grad_norm": 4.754932880401611, "learning_rate": 0.0003855805038335159, "loss": 1.0886, "step": 3384 }, { "epoch": 0.2293535245741969, "grad_norm": 3.078165054321289, "learning_rate": 0.00038557502738225633, "loss": 0.7447, "step": 3385 }, { "epoch": 0.22942128041602086, "grad_norm": 4.425560474395752, "learning_rate": 0.0003855695509309967, "loss": 0.9518, "step": 3386 }, { "epoch": 0.22948903625784486, "grad_norm": 3.0872108936309814, "learning_rate": 0.0003855640744797372, "loss": 0.8921, "step": 3387 }, { "epoch": 0.22955679209966884, "grad_norm": 3.4044382572174072, "learning_rate": 0.0003855585980284776, "loss": 0.9192, "step": 3388 }, { "epoch": 0.22962454794149284, "grad_norm": 3.983921766281128, "learning_rate": 0.000385553121577218, "loss": 0.9262, "step": 3389 }, { "epoch": 0.22969230378331681, "grad_norm": 3.5800323486328125, "learning_rate": 0.0003855476451259584, "loss": 1.1124, "step": 3390 }, { "epoch": 0.22976005962514082, "grad_norm": 3.3075690269470215, "learning_rate": 0.00038554216867469883, "loss": 0.6932, "step": 3391 }, { "epoch": 0.2298278154669648, "grad_norm": 3.4148035049438477, "learning_rate": 0.00038553669222343923, "loss": 1.0199, "step": 3392 }, { "epoch": 0.2298955713087888, "grad_norm": 3.788975238800049, "learning_rate": 0.00038553121577217963, "loss": 1.0431, "step": 3393 }, { "epoch": 0.22996332715061277, "grad_norm": 4.082870006561279, "learning_rate": 0.00038552573932092003, "loss": 0.9449, "step": 3394 }, { "epoch": 0.23003108299243674, "grad_norm": 4.332869052886963, "learning_rate": 0.0003855202628696605, "loss": 1.0971, "step": 3395 }, { "epoch": 0.23009883883426074, "grad_norm": 3.3177127838134766, "learning_rate": 0.0003855147864184009, "loss": 0.8408, "step": 3396 }, { "epoch": 0.23016659467608472, "grad_norm": 3.0329833030700684, "learning_rate": 0.00038550930996714133, "loss": 0.7704, "step": 3397 }, { "epoch": 0.23023435051790872, "grad_norm": 3.886368751525879, "learning_rate": 0.00038550383351588173, "loss": 1.2036, "step": 3398 }, { "epoch": 0.2303021063597327, "grad_norm": 2.6347174644470215, "learning_rate": 0.00038549835706462213, "loss": 0.7221, "step": 3399 }, { "epoch": 0.2303698622015567, "grad_norm": 3.0649521350860596, "learning_rate": 0.00038549288061336253, "loss": 0.8738, "step": 3400 }, { "epoch": 0.23043761804338067, "grad_norm": 3.466278314590454, "learning_rate": 0.000385487404162103, "loss": 0.746, "step": 3401 }, { "epoch": 0.23050537388520467, "grad_norm": 3.7289535999298096, "learning_rate": 0.0003854819277108434, "loss": 0.8961, "step": 3402 }, { "epoch": 0.23057312972702865, "grad_norm": 3.3163657188415527, "learning_rate": 0.00038547645125958384, "loss": 0.8762, "step": 3403 }, { "epoch": 0.23064088556885265, "grad_norm": 3.7055232524871826, "learning_rate": 0.00038547097480832424, "loss": 0.915, "step": 3404 }, { "epoch": 0.23070864141067662, "grad_norm": 4.6322407722473145, "learning_rate": 0.00038546549835706464, "loss": 0.9481, "step": 3405 }, { "epoch": 0.23077639725250063, "grad_norm": 3.7476983070373535, "learning_rate": 0.00038546002190580504, "loss": 1.107, "step": 3406 }, { "epoch": 0.2308441530943246, "grad_norm": 4.79381799697876, "learning_rate": 0.0003854545454545455, "loss": 0.8741, "step": 3407 }, { "epoch": 0.2309119089361486, "grad_norm": 2.9633309841156006, "learning_rate": 0.0003854490690032859, "loss": 0.8226, "step": 3408 }, { "epoch": 0.23097966477797258, "grad_norm": 3.6430296897888184, "learning_rate": 0.0003854435925520263, "loss": 0.7634, "step": 3409 }, { "epoch": 0.23104742061979655, "grad_norm": 3.2793545722961426, "learning_rate": 0.0003854381161007667, "loss": 0.9798, "step": 3410 }, { "epoch": 0.23111517646162055, "grad_norm": 3.385540246963501, "learning_rate": 0.00038543263964950714, "loss": 1.0561, "step": 3411 }, { "epoch": 0.23118293230344453, "grad_norm": 2.9721062183380127, "learning_rate": 0.00038542716319824754, "loss": 0.7212, "step": 3412 }, { "epoch": 0.23125068814526853, "grad_norm": 3.544701337814331, "learning_rate": 0.000385421686746988, "loss": 0.9381, "step": 3413 }, { "epoch": 0.2313184439870925, "grad_norm": 2.6547632217407227, "learning_rate": 0.0003854162102957284, "loss": 0.8176, "step": 3414 }, { "epoch": 0.2313861998289165, "grad_norm": 3.5925381183624268, "learning_rate": 0.0003854107338444688, "loss": 1.0717, "step": 3415 }, { "epoch": 0.23145395567074048, "grad_norm": 2.7051608562469482, "learning_rate": 0.0003854052573932092, "loss": 0.8203, "step": 3416 }, { "epoch": 0.23152171151256448, "grad_norm": 4.542996883392334, "learning_rate": 0.00038539978094194964, "loss": 0.9346, "step": 3417 }, { "epoch": 0.23158946735438846, "grad_norm": 3.785576581954956, "learning_rate": 0.0003853943044906901, "loss": 1.0932, "step": 3418 }, { "epoch": 0.23165722319621246, "grad_norm": 3.438602924346924, "learning_rate": 0.0003853888280394305, "loss": 0.835, "step": 3419 }, { "epoch": 0.23172497903803643, "grad_norm": 2.952747106552124, "learning_rate": 0.0003853833515881709, "loss": 0.8179, "step": 3420 }, { "epoch": 0.23179273487986043, "grad_norm": 3.240570545196533, "learning_rate": 0.0003853778751369113, "loss": 0.8928, "step": 3421 }, { "epoch": 0.2318604907216844, "grad_norm": 2.6418635845184326, "learning_rate": 0.0003853723986856517, "loss": 0.6984, "step": 3422 }, { "epoch": 0.2319282465635084, "grad_norm": 4.2474541664123535, "learning_rate": 0.00038536692223439215, "loss": 1.0001, "step": 3423 }, { "epoch": 0.23199600240533239, "grad_norm": 3.3554728031158447, "learning_rate": 0.00038536144578313255, "loss": 0.8196, "step": 3424 }, { "epoch": 0.23206375824715636, "grad_norm": 3.1898434162139893, "learning_rate": 0.00038535596933187295, "loss": 0.9446, "step": 3425 }, { "epoch": 0.23213151408898036, "grad_norm": 4.611783027648926, "learning_rate": 0.0003853504928806134, "loss": 0.884, "step": 3426 }, { "epoch": 0.23219926993080434, "grad_norm": 3.369152784347534, "learning_rate": 0.0003853450164293538, "loss": 0.8741, "step": 3427 }, { "epoch": 0.23226702577262834, "grad_norm": 3.848773241043091, "learning_rate": 0.0003853395399780942, "loss": 0.7307, "step": 3428 }, { "epoch": 0.2323347816144523, "grad_norm": 3.504258394241333, "learning_rate": 0.00038533406352683465, "loss": 0.9336, "step": 3429 }, { "epoch": 0.23240253745627631, "grad_norm": 3.5585741996765137, "learning_rate": 0.00038532858707557505, "loss": 0.7945, "step": 3430 }, { "epoch": 0.2324702932981003, "grad_norm": 3.4187331199645996, "learning_rate": 0.00038532311062431545, "loss": 0.9932, "step": 3431 }, { "epoch": 0.2325380491399243, "grad_norm": 3.8284411430358887, "learning_rate": 0.00038531763417305585, "loss": 1.0536, "step": 3432 }, { "epoch": 0.23260580498174827, "grad_norm": 2.968740224838257, "learning_rate": 0.00038531215772179625, "loss": 0.8009, "step": 3433 }, { "epoch": 0.23267356082357227, "grad_norm": 3.044172763824463, "learning_rate": 0.0003853066812705367, "loss": 0.6621, "step": 3434 }, { "epoch": 0.23274131666539624, "grad_norm": 3.588742971420288, "learning_rate": 0.00038530120481927716, "loss": 1.0881, "step": 3435 }, { "epoch": 0.23280907250722024, "grad_norm": 6.516396522521973, "learning_rate": 0.00038529572836801755, "loss": 0.7891, "step": 3436 }, { "epoch": 0.23287682834904422, "grad_norm": 2.7659990787506104, "learning_rate": 0.00038529025191675795, "loss": 0.8283, "step": 3437 }, { "epoch": 0.23294458419086822, "grad_norm": 3.939044237136841, "learning_rate": 0.00038528477546549835, "loss": 0.9839, "step": 3438 }, { "epoch": 0.2330123400326922, "grad_norm": 3.7872183322906494, "learning_rate": 0.0003852792990142388, "loss": 0.8786, "step": 3439 }, { "epoch": 0.23308009587451617, "grad_norm": 3.5533335208892822, "learning_rate": 0.0003852738225629792, "loss": 1.1712, "step": 3440 }, { "epoch": 0.23314785171634017, "grad_norm": 3.5558178424835205, "learning_rate": 0.0003852683461117196, "loss": 0.9026, "step": 3441 }, { "epoch": 0.23321560755816415, "grad_norm": 5.821711540222168, "learning_rate": 0.00038526286966046006, "loss": 1.2298, "step": 3442 }, { "epoch": 0.23328336339998815, "grad_norm": 3.5713350772857666, "learning_rate": 0.00038525739320920046, "loss": 0.9837, "step": 3443 }, { "epoch": 0.23335111924181212, "grad_norm": 3.7953977584838867, "learning_rate": 0.00038525191675794086, "loss": 0.9427, "step": 3444 }, { "epoch": 0.23341887508363612, "grad_norm": 3.675002098083496, "learning_rate": 0.0003852464403066813, "loss": 0.7763, "step": 3445 }, { "epoch": 0.2334866309254601, "grad_norm": 2.8685288429260254, "learning_rate": 0.0003852409638554217, "loss": 0.8524, "step": 3446 }, { "epoch": 0.2335543867672841, "grad_norm": 3.051927328109741, "learning_rate": 0.0003852354874041621, "loss": 0.7917, "step": 3447 }, { "epoch": 0.23362214260910807, "grad_norm": 3.42769718170166, "learning_rate": 0.0003852300109529025, "loss": 0.7829, "step": 3448 }, { "epoch": 0.23368989845093208, "grad_norm": 3.420353889465332, "learning_rate": 0.00038522453450164296, "loss": 0.9384, "step": 3449 }, { "epoch": 0.23375765429275605, "grad_norm": 4.482166290283203, "learning_rate": 0.00038521905805038336, "loss": 0.8393, "step": 3450 }, { "epoch": 0.23382541013458005, "grad_norm": 3.7440428733825684, "learning_rate": 0.0003852135815991238, "loss": 1.0556, "step": 3451 }, { "epoch": 0.23389316597640403, "grad_norm": 4.012784004211426, "learning_rate": 0.0003852081051478642, "loss": 0.9514, "step": 3452 }, { "epoch": 0.233960921818228, "grad_norm": 3.5449283123016357, "learning_rate": 0.0003852026286966046, "loss": 0.7942, "step": 3453 }, { "epoch": 0.234028677660052, "grad_norm": 3.6617982387542725, "learning_rate": 0.000385197152245345, "loss": 0.9537, "step": 3454 }, { "epoch": 0.23409643350187598, "grad_norm": 3.6189146041870117, "learning_rate": 0.00038519167579408547, "loss": 1.041, "step": 3455 }, { "epoch": 0.23416418934369998, "grad_norm": 3.9514875411987305, "learning_rate": 0.00038518619934282586, "loss": 0.9627, "step": 3456 }, { "epoch": 0.23423194518552395, "grad_norm": 3.212777614593506, "learning_rate": 0.0003851807228915663, "loss": 0.9405, "step": 3457 }, { "epoch": 0.23429970102734796, "grad_norm": 3.4932689666748047, "learning_rate": 0.0003851752464403067, "loss": 1.075, "step": 3458 }, { "epoch": 0.23436745686917193, "grad_norm": 2.6850225925445557, "learning_rate": 0.0003851697699890471, "loss": 0.8483, "step": 3459 }, { "epoch": 0.23443521271099593, "grad_norm": 3.243868589401245, "learning_rate": 0.0003851642935377875, "loss": 0.8097, "step": 3460 }, { "epoch": 0.2345029685528199, "grad_norm": 3.221231698989868, "learning_rate": 0.00038515881708652797, "loss": 0.9772, "step": 3461 }, { "epoch": 0.2345707243946439, "grad_norm": 3.6649513244628906, "learning_rate": 0.00038515334063526837, "loss": 0.9905, "step": 3462 }, { "epoch": 0.23463848023646788, "grad_norm": 2.813424587249756, "learning_rate": 0.00038514786418400877, "loss": 0.7572, "step": 3463 }, { "epoch": 0.23470623607829189, "grad_norm": 4.343161106109619, "learning_rate": 0.00038514238773274917, "loss": 0.7859, "step": 3464 }, { "epoch": 0.23477399192011586, "grad_norm": 3.841916799545288, "learning_rate": 0.0003851369112814896, "loss": 1.0273, "step": 3465 }, { "epoch": 0.23484174776193986, "grad_norm": 3.720700263977051, "learning_rate": 0.00038513143483023, "loss": 1.0152, "step": 3466 }, { "epoch": 0.23490950360376384, "grad_norm": 4.789869785308838, "learning_rate": 0.00038512595837897047, "loss": 0.9471, "step": 3467 }, { "epoch": 0.2349772594455878, "grad_norm": 3.556755304336548, "learning_rate": 0.00038512048192771087, "loss": 0.9468, "step": 3468 }, { "epoch": 0.2350450152874118, "grad_norm": 3.355682849884033, "learning_rate": 0.00038511500547645127, "loss": 0.9641, "step": 3469 }, { "epoch": 0.2351127711292358, "grad_norm": 2.8267431259155273, "learning_rate": 0.00038510952902519167, "loss": 0.6691, "step": 3470 }, { "epoch": 0.2351805269710598, "grad_norm": 4.00866174697876, "learning_rate": 0.00038510405257393207, "loss": 0.9212, "step": 3471 }, { "epoch": 0.23524828281288376, "grad_norm": 3.6491472721099854, "learning_rate": 0.0003850985761226725, "loss": 1.061, "step": 3472 }, { "epoch": 0.23531603865470777, "grad_norm": 2.7635536193847656, "learning_rate": 0.000385093099671413, "loss": 0.625, "step": 3473 }, { "epoch": 0.23538379449653174, "grad_norm": 3.980212450027466, "learning_rate": 0.0003850876232201534, "loss": 0.9621, "step": 3474 }, { "epoch": 0.23545155033835574, "grad_norm": 3.0759336948394775, "learning_rate": 0.0003850821467688938, "loss": 0.7169, "step": 3475 }, { "epoch": 0.23551930618017972, "grad_norm": 2.9005484580993652, "learning_rate": 0.0003850766703176342, "loss": 0.8001, "step": 3476 }, { "epoch": 0.23558706202200372, "grad_norm": 3.1984972953796387, "learning_rate": 0.00038507119386637463, "loss": 0.8531, "step": 3477 }, { "epoch": 0.2356548178638277, "grad_norm": 3.1212916374206543, "learning_rate": 0.000385065717415115, "loss": 0.8028, "step": 3478 }, { "epoch": 0.2357225737056517, "grad_norm": 4.0563578605651855, "learning_rate": 0.0003850602409638554, "loss": 1.0634, "step": 3479 }, { "epoch": 0.23579032954747567, "grad_norm": 3.4054884910583496, "learning_rate": 0.0003850547645125959, "loss": 1.0115, "step": 3480 }, { "epoch": 0.23585808538929967, "grad_norm": 3.257073402404785, "learning_rate": 0.0003850492880613363, "loss": 1.047, "step": 3481 }, { "epoch": 0.23592584123112365, "grad_norm": 3.7565500736236572, "learning_rate": 0.0003850438116100767, "loss": 0.9759, "step": 3482 }, { "epoch": 0.23599359707294762, "grad_norm": 6.433558940887451, "learning_rate": 0.00038503833515881713, "loss": 0.9733, "step": 3483 }, { "epoch": 0.23606135291477162, "grad_norm": 3.777838945388794, "learning_rate": 0.00038503285870755753, "loss": 0.8168, "step": 3484 }, { "epoch": 0.2361291087565956, "grad_norm": 3.1287827491760254, "learning_rate": 0.00038502738225629793, "loss": 0.8772, "step": 3485 }, { "epoch": 0.2361968645984196, "grad_norm": 2.5524959564208984, "learning_rate": 0.00038502190580503833, "loss": 0.8454, "step": 3486 }, { "epoch": 0.23626462044024357, "grad_norm": 2.5516135692596436, "learning_rate": 0.00038501642935377873, "loss": 0.8229, "step": 3487 }, { "epoch": 0.23633237628206757, "grad_norm": 3.3609588146209717, "learning_rate": 0.0003850109529025192, "loss": 0.9663, "step": 3488 }, { "epoch": 0.23640013212389155, "grad_norm": 3.0082757472991943, "learning_rate": 0.00038500547645125963, "loss": 0.7959, "step": 3489 }, { "epoch": 0.23646788796571555, "grad_norm": 3.1708953380584717, "learning_rate": 0.00038500000000000003, "loss": 0.9676, "step": 3490 }, { "epoch": 0.23653564380753953, "grad_norm": 3.4143319129943848, "learning_rate": 0.00038499452354874043, "loss": 0.8733, "step": 3491 }, { "epoch": 0.23660339964936353, "grad_norm": 5.2941999435424805, "learning_rate": 0.00038498904709748083, "loss": 0.9099, "step": 3492 }, { "epoch": 0.2366711554911875, "grad_norm": 3.165513038635254, "learning_rate": 0.0003849835706462213, "loss": 0.8935, "step": 3493 }, { "epoch": 0.2367389113330115, "grad_norm": 4.730727672576904, "learning_rate": 0.0003849780941949617, "loss": 1.0097, "step": 3494 }, { "epoch": 0.23680666717483548, "grad_norm": 2.826871395111084, "learning_rate": 0.0003849726177437021, "loss": 0.8137, "step": 3495 }, { "epoch": 0.23687442301665948, "grad_norm": 3.622166633605957, "learning_rate": 0.00038496714129244254, "loss": 1.1167, "step": 3496 }, { "epoch": 0.23694217885848345, "grad_norm": 3.233224868774414, "learning_rate": 0.00038496166484118294, "loss": 0.8041, "step": 3497 }, { "epoch": 0.23700993470030743, "grad_norm": 5.051055431365967, "learning_rate": 0.00038495618838992334, "loss": 1.0417, "step": 3498 }, { "epoch": 0.23707769054213143, "grad_norm": 2.873582124710083, "learning_rate": 0.0003849507119386638, "loss": 0.8768, "step": 3499 }, { "epoch": 0.2371454463839554, "grad_norm": 3.918994665145874, "learning_rate": 0.0003849452354874042, "loss": 0.7139, "step": 3500 }, { "epoch": 0.2372132022257794, "grad_norm": 2.7591445446014404, "learning_rate": 0.0003849397590361446, "loss": 0.7101, "step": 3501 }, { "epoch": 0.23728095806760338, "grad_norm": 2.902021646499634, "learning_rate": 0.000384934282584885, "loss": 0.7779, "step": 3502 }, { "epoch": 0.23734871390942738, "grad_norm": 3.3899986743927, "learning_rate": 0.0003849288061336254, "loss": 1.0221, "step": 3503 }, { "epoch": 0.23741646975125136, "grad_norm": 3.125257968902588, "learning_rate": 0.00038492332968236584, "loss": 0.8824, "step": 3504 }, { "epoch": 0.23748422559307536, "grad_norm": 2.6779568195343018, "learning_rate": 0.0003849178532311063, "loss": 0.6717, "step": 3505 }, { "epoch": 0.23755198143489933, "grad_norm": 3.4571759700775146, "learning_rate": 0.0003849123767798467, "loss": 0.9512, "step": 3506 }, { "epoch": 0.23761973727672334, "grad_norm": 3.9164414405822754, "learning_rate": 0.0003849069003285871, "loss": 1.0673, "step": 3507 }, { "epoch": 0.2376874931185473, "grad_norm": 3.7116291522979736, "learning_rate": 0.0003849014238773275, "loss": 0.8889, "step": 3508 }, { "epoch": 0.2377552489603713, "grad_norm": 3.6754040718078613, "learning_rate": 0.0003848959474260679, "loss": 0.9608, "step": 3509 }, { "epoch": 0.2378230048021953, "grad_norm": 3.6156272888183594, "learning_rate": 0.00038489047097480834, "loss": 1.0282, "step": 3510 }, { "epoch": 0.2378907606440193, "grad_norm": 3.9002699851989746, "learning_rate": 0.0003848849945235488, "loss": 0.9404, "step": 3511 }, { "epoch": 0.23795851648584326, "grad_norm": 4.33397102355957, "learning_rate": 0.0003848795180722892, "loss": 0.9854, "step": 3512 }, { "epoch": 0.23802627232766724, "grad_norm": 3.4970011711120605, "learning_rate": 0.0003848740416210296, "loss": 0.8529, "step": 3513 }, { "epoch": 0.23809402816949124, "grad_norm": 3.519333600997925, "learning_rate": 0.00038486856516977, "loss": 0.952, "step": 3514 }, { "epoch": 0.23816178401131521, "grad_norm": 3.3597099781036377, "learning_rate": 0.00038486308871851045, "loss": 1.0487, "step": 3515 }, { "epoch": 0.23822953985313922, "grad_norm": 4.351285934448242, "learning_rate": 0.00038485761226725085, "loss": 0.881, "step": 3516 }, { "epoch": 0.2382972956949632, "grad_norm": 4.334640026092529, "learning_rate": 0.00038485213581599125, "loss": 0.8924, "step": 3517 }, { "epoch": 0.2383650515367872, "grad_norm": 3.781426191329956, "learning_rate": 0.00038484665936473165, "loss": 0.7698, "step": 3518 }, { "epoch": 0.23843280737861117, "grad_norm": 4.889671802520752, "learning_rate": 0.0003848411829134721, "loss": 0.89, "step": 3519 }, { "epoch": 0.23850056322043517, "grad_norm": 3.7696127891540527, "learning_rate": 0.0003848357064622125, "loss": 1.0221, "step": 3520 }, { "epoch": 0.23856831906225914, "grad_norm": 3.7246408462524414, "learning_rate": 0.00038483023001095295, "loss": 1.0707, "step": 3521 }, { "epoch": 0.23863607490408315, "grad_norm": 3.759204149246216, "learning_rate": 0.00038482475355969335, "loss": 1.0519, "step": 3522 }, { "epoch": 0.23870383074590712, "grad_norm": 3.753209114074707, "learning_rate": 0.00038481927710843375, "loss": 0.8522, "step": 3523 }, { "epoch": 0.23877158658773112, "grad_norm": 2.9252936840057373, "learning_rate": 0.00038481380065717415, "loss": 1.0213, "step": 3524 }, { "epoch": 0.2388393424295551, "grad_norm": 3.0121278762817383, "learning_rate": 0.00038480832420591455, "loss": 0.9012, "step": 3525 }, { "epoch": 0.2389070982713791, "grad_norm": 3.9473061561584473, "learning_rate": 0.000384802847754655, "loss": 1.0919, "step": 3526 }, { "epoch": 0.23897485411320307, "grad_norm": 5.666573524475098, "learning_rate": 0.00038479737130339546, "loss": 0.7552, "step": 3527 }, { "epoch": 0.23904260995502705, "grad_norm": 4.097467422485352, "learning_rate": 0.00038479189485213585, "loss": 1.1349, "step": 3528 }, { "epoch": 0.23911036579685105, "grad_norm": 3.3828299045562744, "learning_rate": 0.00038478641840087625, "loss": 0.9631, "step": 3529 }, { "epoch": 0.23917812163867502, "grad_norm": 3.756110191345215, "learning_rate": 0.00038478094194961665, "loss": 0.9188, "step": 3530 }, { "epoch": 0.23924587748049903, "grad_norm": 3.6144723892211914, "learning_rate": 0.0003847754654983571, "loss": 0.8087, "step": 3531 }, { "epoch": 0.239313633322323, "grad_norm": 3.5948920249938965, "learning_rate": 0.0003847699890470975, "loss": 0.8524, "step": 3532 }, { "epoch": 0.239381389164147, "grad_norm": 3.6412158012390137, "learning_rate": 0.0003847645125958379, "loss": 0.8304, "step": 3533 }, { "epoch": 0.23944914500597098, "grad_norm": 3.1989147663116455, "learning_rate": 0.0003847590361445783, "loss": 0.8756, "step": 3534 }, { "epoch": 0.23951690084779498, "grad_norm": 5.724002361297607, "learning_rate": 0.00038475355969331876, "loss": 1.0716, "step": 3535 }, { "epoch": 0.23958465668961895, "grad_norm": 3.8183436393737793, "learning_rate": 0.00038474808324205916, "loss": 1.0032, "step": 3536 }, { "epoch": 0.23965241253144295, "grad_norm": 3.840303897857666, "learning_rate": 0.0003847426067907996, "loss": 1.042, "step": 3537 }, { "epoch": 0.23972016837326693, "grad_norm": 3.46730637550354, "learning_rate": 0.00038473713033954, "loss": 0.7219, "step": 3538 }, { "epoch": 0.23978792421509093, "grad_norm": 3.6060099601745605, "learning_rate": 0.0003847316538882804, "loss": 0.8408, "step": 3539 }, { "epoch": 0.2398556800569149, "grad_norm": 3.0371289253234863, "learning_rate": 0.0003847261774370208, "loss": 0.8476, "step": 3540 }, { "epoch": 0.2399234358987389, "grad_norm": 4.043479919433594, "learning_rate": 0.0003847207009857612, "loss": 0.8577, "step": 3541 }, { "epoch": 0.23999119174056288, "grad_norm": 3.5064337253570557, "learning_rate": 0.00038471522453450166, "loss": 1.0555, "step": 3542 }, { "epoch": 0.24005894758238686, "grad_norm": 3.5323991775512695, "learning_rate": 0.0003847097480832421, "loss": 0.8611, "step": 3543 }, { "epoch": 0.24012670342421086, "grad_norm": 4.869713306427002, "learning_rate": 0.0003847042716319825, "loss": 1.1686, "step": 3544 }, { "epoch": 0.24019445926603483, "grad_norm": 3.4327447414398193, "learning_rate": 0.0003846987951807229, "loss": 0.806, "step": 3545 }, { "epoch": 0.24026221510785883, "grad_norm": 3.1635055541992188, "learning_rate": 0.0003846933187294633, "loss": 0.9743, "step": 3546 }, { "epoch": 0.2403299709496828, "grad_norm": 2.770780324935913, "learning_rate": 0.0003846878422782037, "loss": 0.7404, "step": 3547 }, { "epoch": 0.2403977267915068, "grad_norm": 2.9262092113494873, "learning_rate": 0.00038468236582694416, "loss": 0.9241, "step": 3548 }, { "epoch": 0.24046548263333078, "grad_norm": 2.8331191539764404, "learning_rate": 0.00038467688937568456, "loss": 0.8595, "step": 3549 }, { "epoch": 0.2405332384751548, "grad_norm": 3.5966126918792725, "learning_rate": 0.000384671412924425, "loss": 1.1457, "step": 3550 }, { "epoch": 0.24060099431697876, "grad_norm": 3.2753615379333496, "learning_rate": 0.0003846659364731654, "loss": 0.7817, "step": 3551 }, { "epoch": 0.24066875015880276, "grad_norm": 3.666522264480591, "learning_rate": 0.0003846604600219058, "loss": 1.0483, "step": 3552 }, { "epoch": 0.24073650600062674, "grad_norm": 3.658071994781494, "learning_rate": 0.00038465498357064627, "loss": 0.8674, "step": 3553 }, { "epoch": 0.24080426184245074, "grad_norm": 2.676384210586548, "learning_rate": 0.00038464950711938667, "loss": 0.8398, "step": 3554 }, { "epoch": 0.24087201768427471, "grad_norm": 2.86118221282959, "learning_rate": 0.00038464403066812707, "loss": 0.8704, "step": 3555 }, { "epoch": 0.24093977352609872, "grad_norm": 2.81595778465271, "learning_rate": 0.00038463855421686747, "loss": 0.7396, "step": 3556 }, { "epoch": 0.2410075293679227, "grad_norm": 4.023396968841553, "learning_rate": 0.00038463307776560787, "loss": 1.0471, "step": 3557 }, { "epoch": 0.24107528520974666, "grad_norm": 2.933497667312622, "learning_rate": 0.0003846276013143483, "loss": 0.8205, "step": 3558 }, { "epoch": 0.24114304105157067, "grad_norm": 4.319410800933838, "learning_rate": 0.00038462212486308877, "loss": 1.0236, "step": 3559 }, { "epoch": 0.24121079689339464, "grad_norm": 2.817918539047241, "learning_rate": 0.00038461664841182917, "loss": 0.9278, "step": 3560 }, { "epoch": 0.24127855273521864, "grad_norm": 8.175501823425293, "learning_rate": 0.00038461117196056957, "loss": 0.932, "step": 3561 }, { "epoch": 0.24134630857704262, "grad_norm": 2.6383726596832275, "learning_rate": 0.00038460569550930997, "loss": 0.746, "step": 3562 }, { "epoch": 0.24141406441886662, "grad_norm": 3.5217936038970947, "learning_rate": 0.00038460021905805037, "loss": 0.9239, "step": 3563 }, { "epoch": 0.2414818202606906, "grad_norm": 4.681241989135742, "learning_rate": 0.0003845947426067908, "loss": 0.789, "step": 3564 }, { "epoch": 0.2415495761025146, "grad_norm": 4.095000267028809, "learning_rate": 0.0003845892661555312, "loss": 1.0066, "step": 3565 }, { "epoch": 0.24161733194433857, "grad_norm": 2.88785457611084, "learning_rate": 0.0003845837897042717, "loss": 0.7456, "step": 3566 }, { "epoch": 0.24168508778616257, "grad_norm": 3.384957790374756, "learning_rate": 0.0003845783132530121, "loss": 0.8015, "step": 3567 }, { "epoch": 0.24175284362798655, "grad_norm": 2.8680386543273926, "learning_rate": 0.0003845728368017525, "loss": 0.9248, "step": 3568 }, { "epoch": 0.24182059946981055, "grad_norm": 3.7184362411499023, "learning_rate": 0.0003845673603504929, "loss": 0.8646, "step": 3569 }, { "epoch": 0.24188835531163452, "grad_norm": 3.1110970973968506, "learning_rate": 0.0003845618838992333, "loss": 0.6712, "step": 3570 }, { "epoch": 0.24195611115345853, "grad_norm": 6.174555778503418, "learning_rate": 0.0003845564074479737, "loss": 0.9191, "step": 3571 }, { "epoch": 0.2420238669952825, "grad_norm": 3.1740756034851074, "learning_rate": 0.0003845509309967141, "loss": 0.7829, "step": 3572 }, { "epoch": 0.24209162283710647, "grad_norm": 3.3278098106384277, "learning_rate": 0.0003845454545454545, "loss": 0.9331, "step": 3573 }, { "epoch": 0.24215937867893048, "grad_norm": 3.312324285507202, "learning_rate": 0.000384539978094195, "loss": 0.785, "step": 3574 }, { "epoch": 0.24222713452075445, "grad_norm": 3.792536497116089, "learning_rate": 0.00038453450164293543, "loss": 1.0792, "step": 3575 }, { "epoch": 0.24229489036257845, "grad_norm": 3.6044788360595703, "learning_rate": 0.00038452902519167583, "loss": 0.8771, "step": 3576 }, { "epoch": 0.24236264620440243, "grad_norm": 5.130095481872559, "learning_rate": 0.00038452354874041623, "loss": 0.8476, "step": 3577 }, { "epoch": 0.24243040204622643, "grad_norm": 3.3492159843444824, "learning_rate": 0.00038451807228915663, "loss": 0.8217, "step": 3578 }, { "epoch": 0.2424981578880504, "grad_norm": 4.7940826416015625, "learning_rate": 0.00038451259583789703, "loss": 1.1556, "step": 3579 }, { "epoch": 0.2425659137298744, "grad_norm": 4.007096290588379, "learning_rate": 0.0003845071193866375, "loss": 0.8479, "step": 3580 }, { "epoch": 0.24263366957169838, "grad_norm": 3.4432461261749268, "learning_rate": 0.00038450164293537793, "loss": 1.1665, "step": 3581 }, { "epoch": 0.24270142541352238, "grad_norm": 4.2289042472839355, "learning_rate": 0.00038449616648411833, "loss": 0.8096, "step": 3582 }, { "epoch": 0.24276918125534636, "grad_norm": 3.279273509979248, "learning_rate": 0.00038449069003285873, "loss": 1.0627, "step": 3583 }, { "epoch": 0.24283693709717036, "grad_norm": 3.1666338443756104, "learning_rate": 0.00038448521358159913, "loss": 0.8252, "step": 3584 }, { "epoch": 0.24290469293899433, "grad_norm": 5.830041885375977, "learning_rate": 0.00038447973713033953, "loss": 0.8419, "step": 3585 }, { "epoch": 0.24297244878081833, "grad_norm": 3.1612913608551025, "learning_rate": 0.00038447426067908, "loss": 0.963, "step": 3586 }, { "epoch": 0.2430402046226423, "grad_norm": 3.0637261867523193, "learning_rate": 0.0003844687842278204, "loss": 0.7428, "step": 3587 }, { "epoch": 0.24310796046446628, "grad_norm": 4.1766357421875, "learning_rate": 0.0003844633077765608, "loss": 0.9666, "step": 3588 }, { "epoch": 0.24317571630629028, "grad_norm": 2.6001627445220947, "learning_rate": 0.00038445783132530124, "loss": 0.6888, "step": 3589 }, { "epoch": 0.24324347214811426, "grad_norm": 4.364114761352539, "learning_rate": 0.00038445235487404164, "loss": 1.0244, "step": 3590 }, { "epoch": 0.24331122798993826, "grad_norm": 3.9264612197875977, "learning_rate": 0.0003844468784227821, "loss": 0.985, "step": 3591 }, { "epoch": 0.24337898383176224, "grad_norm": 3.905233860015869, "learning_rate": 0.0003844414019715225, "loss": 0.892, "step": 3592 }, { "epoch": 0.24344673967358624, "grad_norm": 2.610853433609009, "learning_rate": 0.0003844359255202629, "loss": 0.7664, "step": 3593 }, { "epoch": 0.2435144955154102, "grad_norm": 3.0197854042053223, "learning_rate": 0.0003844304490690033, "loss": 0.8688, "step": 3594 }, { "epoch": 0.24358225135723421, "grad_norm": 2.698666572570801, "learning_rate": 0.0003844249726177437, "loss": 0.8186, "step": 3595 }, { "epoch": 0.2436500071990582, "grad_norm": 3.040863275527954, "learning_rate": 0.00038441949616648414, "loss": 1.0042, "step": 3596 }, { "epoch": 0.2437177630408822, "grad_norm": 3.1363484859466553, "learning_rate": 0.0003844140197152246, "loss": 0.8847, "step": 3597 }, { "epoch": 0.24378551888270616, "grad_norm": 2.8835747241973877, "learning_rate": 0.000384408543263965, "loss": 0.8451, "step": 3598 }, { "epoch": 0.24385327472453017, "grad_norm": 3.3994038105010986, "learning_rate": 0.0003844030668127054, "loss": 0.72, "step": 3599 }, { "epoch": 0.24392103056635414, "grad_norm": 3.5576508045196533, "learning_rate": 0.0003843975903614458, "loss": 0.9419, "step": 3600 }, { "epoch": 0.24398878640817814, "grad_norm": 2.9685850143432617, "learning_rate": 0.0003843921139101862, "loss": 0.7367, "step": 3601 }, { "epoch": 0.24405654225000212, "grad_norm": 2.9869747161865234, "learning_rate": 0.00038438663745892664, "loss": 0.9096, "step": 3602 }, { "epoch": 0.2441242980918261, "grad_norm": 3.1118412017822266, "learning_rate": 0.00038438116100766704, "loss": 0.7894, "step": 3603 }, { "epoch": 0.2441920539336501, "grad_norm": 3.8750014305114746, "learning_rate": 0.00038437568455640744, "loss": 0.9525, "step": 3604 }, { "epoch": 0.24425980977547407, "grad_norm": 3.0228381156921387, "learning_rate": 0.0003843702081051479, "loss": 0.6956, "step": 3605 }, { "epoch": 0.24432756561729807, "grad_norm": 4.395471096038818, "learning_rate": 0.0003843647316538883, "loss": 1.2877, "step": 3606 }, { "epoch": 0.24439532145912204, "grad_norm": 3.6463301181793213, "learning_rate": 0.0003843592552026287, "loss": 1.0584, "step": 3607 }, { "epoch": 0.24446307730094605, "grad_norm": 3.233916759490967, "learning_rate": 0.00038435377875136915, "loss": 0.6924, "step": 3608 }, { "epoch": 0.24453083314277002, "grad_norm": 3.1753616333007812, "learning_rate": 0.00038434830230010955, "loss": 0.7678, "step": 3609 }, { "epoch": 0.24459858898459402, "grad_norm": 2.5761382579803467, "learning_rate": 0.00038434282584884995, "loss": 0.7123, "step": 3610 }, { "epoch": 0.244666344826418, "grad_norm": 4.036708831787109, "learning_rate": 0.00038433734939759034, "loss": 0.8565, "step": 3611 }, { "epoch": 0.244734100668242, "grad_norm": 4.253012657165527, "learning_rate": 0.0003843318729463308, "loss": 0.8407, "step": 3612 }, { "epoch": 0.24480185651006597, "grad_norm": 3.0888428688049316, "learning_rate": 0.00038432639649507125, "loss": 0.7569, "step": 3613 }, { "epoch": 0.24486961235188998, "grad_norm": 3.641345500946045, "learning_rate": 0.00038432092004381165, "loss": 0.9349, "step": 3614 }, { "epoch": 0.24493736819371395, "grad_norm": 3.3644180297851562, "learning_rate": 0.00038431544359255205, "loss": 1.1062, "step": 3615 }, { "epoch": 0.24500512403553795, "grad_norm": 4.179390907287598, "learning_rate": 0.00038430996714129245, "loss": 0.9211, "step": 3616 }, { "epoch": 0.24507287987736193, "grad_norm": 3.50895619392395, "learning_rate": 0.00038430449069003285, "loss": 0.9339, "step": 3617 }, { "epoch": 0.2451406357191859, "grad_norm": 5.801991939544678, "learning_rate": 0.0003842990142387733, "loss": 1.0981, "step": 3618 }, { "epoch": 0.2452083915610099, "grad_norm": 3.113947629928589, "learning_rate": 0.0003842935377875137, "loss": 1.0309, "step": 3619 }, { "epoch": 0.24527614740283388, "grad_norm": 3.378770351409912, "learning_rate": 0.00038428806133625415, "loss": 0.689, "step": 3620 }, { "epoch": 0.24534390324465788, "grad_norm": 3.041553497314453, "learning_rate": 0.00038428258488499455, "loss": 0.8364, "step": 3621 }, { "epoch": 0.24541165908648185, "grad_norm": 3.19685959815979, "learning_rate": 0.00038427710843373495, "loss": 0.7991, "step": 3622 }, { "epoch": 0.24547941492830586, "grad_norm": 3.5349223613739014, "learning_rate": 0.00038427163198247535, "loss": 1.0397, "step": 3623 }, { "epoch": 0.24554717077012983, "grad_norm": 4.404790878295898, "learning_rate": 0.0003842661555312158, "loss": 1.0932, "step": 3624 }, { "epoch": 0.24561492661195383, "grad_norm": 3.362189769744873, "learning_rate": 0.0003842606790799562, "loss": 0.8153, "step": 3625 }, { "epoch": 0.2456826824537778, "grad_norm": 3.7418336868286133, "learning_rate": 0.0003842552026286966, "loss": 0.9646, "step": 3626 }, { "epoch": 0.2457504382956018, "grad_norm": 2.701110601425171, "learning_rate": 0.000384249726177437, "loss": 0.7417, "step": 3627 }, { "epoch": 0.24581819413742578, "grad_norm": 3.8121039867401123, "learning_rate": 0.00038424424972617746, "loss": 1.3079, "step": 3628 }, { "epoch": 0.24588594997924978, "grad_norm": 2.9305543899536133, "learning_rate": 0.0003842387732749179, "loss": 0.6759, "step": 3629 }, { "epoch": 0.24595370582107376, "grad_norm": 3.2821357250213623, "learning_rate": 0.0003842332968236583, "loss": 0.8017, "step": 3630 }, { "epoch": 0.24602146166289776, "grad_norm": 6.656386852264404, "learning_rate": 0.0003842278203723987, "loss": 1.005, "step": 3631 }, { "epoch": 0.24608921750472174, "grad_norm": 3.3356103897094727, "learning_rate": 0.0003842223439211391, "loss": 0.8939, "step": 3632 }, { "epoch": 0.2461569733465457, "grad_norm": 2.781855583190918, "learning_rate": 0.0003842168674698795, "loss": 0.7511, "step": 3633 }, { "epoch": 0.2462247291883697, "grad_norm": 3.0457804203033447, "learning_rate": 0.00038421139101861996, "loss": 0.9523, "step": 3634 }, { "epoch": 0.2462924850301937, "grad_norm": 2.585984945297241, "learning_rate": 0.00038420591456736036, "loss": 0.7108, "step": 3635 }, { "epoch": 0.2463602408720177, "grad_norm": 2.7748820781707764, "learning_rate": 0.0003842004381161008, "loss": 0.7178, "step": 3636 }, { "epoch": 0.24642799671384166, "grad_norm": 3.4772417545318604, "learning_rate": 0.0003841949616648412, "loss": 0.8056, "step": 3637 }, { "epoch": 0.24649575255566566, "grad_norm": 3.6336772441864014, "learning_rate": 0.0003841894852135816, "loss": 0.9024, "step": 3638 }, { "epoch": 0.24656350839748964, "grad_norm": 2.643249750137329, "learning_rate": 0.000384184008762322, "loss": 0.6573, "step": 3639 }, { "epoch": 0.24663126423931364, "grad_norm": 3.0842294692993164, "learning_rate": 0.00038417853231106246, "loss": 0.9045, "step": 3640 }, { "epoch": 0.24669902008113762, "grad_norm": 2.8832128047943115, "learning_rate": 0.00038417305585980286, "loss": 0.7339, "step": 3641 }, { "epoch": 0.24676677592296162, "grad_norm": 3.883462905883789, "learning_rate": 0.00038416757940854326, "loss": 1.0677, "step": 3642 }, { "epoch": 0.2468345317647856, "grad_norm": 4.286027908325195, "learning_rate": 0.0003841621029572837, "loss": 1.0355, "step": 3643 }, { "epoch": 0.2469022876066096, "grad_norm": 3.275578737258911, "learning_rate": 0.0003841566265060241, "loss": 0.758, "step": 3644 }, { "epoch": 0.24697004344843357, "grad_norm": 3.220132350921631, "learning_rate": 0.0003841511500547645, "loss": 0.8784, "step": 3645 }, { "epoch": 0.24703779929025757, "grad_norm": 2.976148843765259, "learning_rate": 0.00038414567360350497, "loss": 0.8917, "step": 3646 }, { "epoch": 0.24710555513208154, "grad_norm": 3.7267327308654785, "learning_rate": 0.00038414019715224537, "loss": 1.0633, "step": 3647 }, { "epoch": 0.24717331097390552, "grad_norm": 3.152019739151001, "learning_rate": 0.00038413472070098577, "loss": 0.7872, "step": 3648 }, { "epoch": 0.24724106681572952, "grad_norm": 3.847932815551758, "learning_rate": 0.00038412924424972617, "loss": 0.9946, "step": 3649 }, { "epoch": 0.2473088226575535, "grad_norm": 2.987445831298828, "learning_rate": 0.0003841237677984666, "loss": 0.8574, "step": 3650 }, { "epoch": 0.2473765784993775, "grad_norm": 3.6175968647003174, "learning_rate": 0.00038411829134720707, "loss": 0.7839, "step": 3651 }, { "epoch": 0.24744433434120147, "grad_norm": 2.919098138809204, "learning_rate": 0.00038411281489594747, "loss": 0.9751, "step": 3652 }, { "epoch": 0.24751209018302547, "grad_norm": 3.8991780281066895, "learning_rate": 0.00038410733844468787, "loss": 1.0142, "step": 3653 }, { "epoch": 0.24757984602484945, "grad_norm": 3.620443105697632, "learning_rate": 0.00038410186199342827, "loss": 0.9295, "step": 3654 }, { "epoch": 0.24764760186667345, "grad_norm": 4.178861618041992, "learning_rate": 0.00038409638554216867, "loss": 0.9741, "step": 3655 }, { "epoch": 0.24771535770849742, "grad_norm": 2.68290376663208, "learning_rate": 0.0003840909090909091, "loss": 0.758, "step": 3656 }, { "epoch": 0.24778311355032143, "grad_norm": 4.443676471710205, "learning_rate": 0.0003840854326396495, "loss": 1.1349, "step": 3657 }, { "epoch": 0.2478508693921454, "grad_norm": 3.5604474544525146, "learning_rate": 0.0003840799561883899, "loss": 0.8092, "step": 3658 }, { "epoch": 0.2479186252339694, "grad_norm": 3.1093716621398926, "learning_rate": 0.0003840744797371304, "loss": 0.8698, "step": 3659 }, { "epoch": 0.24798638107579338, "grad_norm": 3.4392693042755127, "learning_rate": 0.0003840690032858708, "loss": 1.0999, "step": 3660 }, { "epoch": 0.24805413691761738, "grad_norm": 4.430802822113037, "learning_rate": 0.00038406352683461117, "loss": 0.8996, "step": 3661 }, { "epoch": 0.24812189275944135, "grad_norm": 3.1374351978302, "learning_rate": 0.0003840580503833516, "loss": 0.8434, "step": 3662 }, { "epoch": 0.24818964860126533, "grad_norm": 3.239025115966797, "learning_rate": 0.000384052573932092, "loss": 0.6915, "step": 3663 }, { "epoch": 0.24825740444308933, "grad_norm": 5.312155723571777, "learning_rate": 0.0003840470974808324, "loss": 0.8982, "step": 3664 }, { "epoch": 0.2483251602849133, "grad_norm": 4.996529579162598, "learning_rate": 0.0003840416210295728, "loss": 1.2739, "step": 3665 }, { "epoch": 0.2483929161267373, "grad_norm": 3.8779194355010986, "learning_rate": 0.0003840361445783133, "loss": 0.8554, "step": 3666 }, { "epoch": 0.24846067196856128, "grad_norm": 4.90615177154541, "learning_rate": 0.00038403066812705373, "loss": 1.1101, "step": 3667 }, { "epoch": 0.24852842781038528, "grad_norm": 3.0782370567321777, "learning_rate": 0.00038402519167579413, "loss": 0.9249, "step": 3668 }, { "epoch": 0.24859618365220926, "grad_norm": 3.553218364715576, "learning_rate": 0.00038401971522453453, "loss": 1.0345, "step": 3669 }, { "epoch": 0.24866393949403326, "grad_norm": 4.608200550079346, "learning_rate": 0.00038401423877327493, "loss": 1.0789, "step": 3670 }, { "epoch": 0.24873169533585723, "grad_norm": 2.9339096546173096, "learning_rate": 0.00038400876232201533, "loss": 0.8319, "step": 3671 }, { "epoch": 0.24879945117768124, "grad_norm": 3.093270778656006, "learning_rate": 0.0003840032858707558, "loss": 0.8568, "step": 3672 }, { "epoch": 0.2488672070195052, "grad_norm": 3.6574976444244385, "learning_rate": 0.0003839978094194962, "loss": 0.9469, "step": 3673 }, { "epoch": 0.2489349628613292, "grad_norm": 2.7866458892822266, "learning_rate": 0.00038399233296823663, "loss": 0.7737, "step": 3674 }, { "epoch": 0.2490027187031532, "grad_norm": 4.70128059387207, "learning_rate": 0.00038398685651697703, "loss": 1.1959, "step": 3675 }, { "epoch": 0.2490704745449772, "grad_norm": 3.5798444747924805, "learning_rate": 0.00038398138006571743, "loss": 1.0328, "step": 3676 }, { "epoch": 0.24913823038680116, "grad_norm": 3.2754852771759033, "learning_rate": 0.00038397590361445783, "loss": 0.9629, "step": 3677 }, { "epoch": 0.24920598622862514, "grad_norm": 3.3738067150115967, "learning_rate": 0.0003839704271631983, "loss": 1.0307, "step": 3678 }, { "epoch": 0.24927374207044914, "grad_norm": 2.7617058753967285, "learning_rate": 0.0003839649507119387, "loss": 0.8117, "step": 3679 }, { "epoch": 0.2493414979122731, "grad_norm": 3.428276538848877, "learning_rate": 0.0003839594742606791, "loss": 1.1433, "step": 3680 }, { "epoch": 0.24940925375409712, "grad_norm": 2.8827009201049805, "learning_rate": 0.0003839539978094195, "loss": 0.7803, "step": 3681 }, { "epoch": 0.2494770095959211, "grad_norm": 3.3733298778533936, "learning_rate": 0.00038394852135815994, "loss": 0.8253, "step": 3682 }, { "epoch": 0.2495447654377451, "grad_norm": 3.572075605392456, "learning_rate": 0.00038394304490690034, "loss": 0.7953, "step": 3683 }, { "epoch": 0.24961252127956907, "grad_norm": 3.9213287830352783, "learning_rate": 0.0003839375684556408, "loss": 0.9151, "step": 3684 }, { "epoch": 0.24968027712139307, "grad_norm": 2.8898632526397705, "learning_rate": 0.0003839320920043812, "loss": 0.8617, "step": 3685 }, { "epoch": 0.24974803296321704, "grad_norm": 3.041987180709839, "learning_rate": 0.0003839266155531216, "loss": 0.8889, "step": 3686 }, { "epoch": 0.24981578880504104, "grad_norm": 3.3263607025146484, "learning_rate": 0.000383921139101862, "loss": 0.9432, "step": 3687 }, { "epoch": 0.24988354464686502, "grad_norm": 2.908055543899536, "learning_rate": 0.00038391566265060244, "loss": 0.7205, "step": 3688 }, { "epoch": 0.24995130048868902, "grad_norm": 3.964475154876709, "learning_rate": 0.00038391018619934284, "loss": 0.8656, "step": 3689 }, { "epoch": 0.24995130048868902, "eval_loss": 0.8432719707489014, "eval_noise_accuracy": 0.0, "eval_runtime": 15206.8395, "eval_samples_per_second": 0.338, "eval_steps_per_second": 0.085, "eval_wer": 62.08277152527448, "step": 3689 }, { "epoch": 0.250019056330513, "grad_norm": 3.945115089416504, "learning_rate": 0.0003839047097480833, "loss": 1.2082, "step": 3690 }, { "epoch": 0.25008681217233697, "grad_norm": 3.726381540298462, "learning_rate": 0.0003838992332968237, "loss": 1.0024, "step": 3691 }, { "epoch": 0.25015456801416097, "grad_norm": 3.1030168533325195, "learning_rate": 0.0003838937568455641, "loss": 0.7252, "step": 3692 }, { "epoch": 0.250222323855985, "grad_norm": 3.0350656509399414, "learning_rate": 0.0003838882803943045, "loss": 0.8437, "step": 3693 }, { "epoch": 0.2502900796978089, "grad_norm": 2.8735952377319336, "learning_rate": 0.00038388280394304494, "loss": 0.8282, "step": 3694 }, { "epoch": 0.2503578355396329, "grad_norm": 2.7129311561584473, "learning_rate": 0.00038387732749178534, "loss": 0.8063, "step": 3695 }, { "epoch": 0.2504255913814569, "grad_norm": 4.505862236022949, "learning_rate": 0.00038387185104052574, "loss": 0.8901, "step": 3696 }, { "epoch": 0.2504933472232809, "grad_norm": 2.7261810302734375, "learning_rate": 0.00038386637458926614, "loss": 0.7798, "step": 3697 }, { "epoch": 0.2505611030651049, "grad_norm": 3.1617021560668945, "learning_rate": 0.0003838608981380066, "loss": 0.9804, "step": 3698 }, { "epoch": 0.2506288589069289, "grad_norm": 2.7768478393554688, "learning_rate": 0.000383855421686747, "loss": 0.7679, "step": 3699 }, { "epoch": 0.2506966147487529, "grad_norm": 2.542478084564209, "learning_rate": 0.00038384994523548745, "loss": 0.7997, "step": 3700 }, { "epoch": 0.2507643705905769, "grad_norm": 3.274712324142456, "learning_rate": 0.00038384446878422785, "loss": 0.9881, "step": 3701 }, { "epoch": 0.2508321264324008, "grad_norm": 3.2481839656829834, "learning_rate": 0.00038383899233296825, "loss": 0.7841, "step": 3702 }, { "epoch": 0.25089988227422483, "grad_norm": 5.498259544372559, "learning_rate": 0.00038383351588170864, "loss": 1.0789, "step": 3703 }, { "epoch": 0.25096763811604883, "grad_norm": 4.471577167510986, "learning_rate": 0.0003838280394304491, "loss": 1.0375, "step": 3704 }, { "epoch": 0.25103539395787283, "grad_norm": 3.2552685737609863, "learning_rate": 0.0003838225629791895, "loss": 0.8797, "step": 3705 }, { "epoch": 0.2511031497996968, "grad_norm": 3.576436996459961, "learning_rate": 0.00038381708652792995, "loss": 0.7832, "step": 3706 }, { "epoch": 0.2511709056415208, "grad_norm": 3.494497537612915, "learning_rate": 0.00038381161007667035, "loss": 1.0856, "step": 3707 }, { "epoch": 0.2512386614833448, "grad_norm": 2.9934325218200684, "learning_rate": 0.00038380613362541075, "loss": 0.848, "step": 3708 }, { "epoch": 0.25130641732516873, "grad_norm": 3.768519401550293, "learning_rate": 0.00038380065717415115, "loss": 1.0488, "step": 3709 }, { "epoch": 0.25137417316699273, "grad_norm": 3.906024694442749, "learning_rate": 0.0003837951807228916, "loss": 0.9045, "step": 3710 }, { "epoch": 0.25144192900881673, "grad_norm": 3.9467475414276123, "learning_rate": 0.000383789704271632, "loss": 0.9267, "step": 3711 }, { "epoch": 0.25150968485064074, "grad_norm": 2.8772778511047363, "learning_rate": 0.0003837842278203724, "loss": 0.8188, "step": 3712 }, { "epoch": 0.2515774406924647, "grad_norm": 2.647195339202881, "learning_rate": 0.00038377875136911285, "loss": 0.8542, "step": 3713 }, { "epoch": 0.2516451965342887, "grad_norm": 3.744684934616089, "learning_rate": 0.00038377327491785325, "loss": 1.0178, "step": 3714 }, { "epoch": 0.2517129523761127, "grad_norm": 2.7992427349090576, "learning_rate": 0.00038376779846659365, "loss": 0.7445, "step": 3715 }, { "epoch": 0.2517807082179367, "grad_norm": 3.307591676712036, "learning_rate": 0.0003837623220153341, "loss": 0.8653, "step": 3716 }, { "epoch": 0.25184846405976063, "grad_norm": 3.6580865383148193, "learning_rate": 0.0003837568455640745, "loss": 1.0988, "step": 3717 }, { "epoch": 0.25191621990158464, "grad_norm": 5.759903430938721, "learning_rate": 0.0003837513691128149, "loss": 1.0088, "step": 3718 }, { "epoch": 0.25198397574340864, "grad_norm": 3.041792392730713, "learning_rate": 0.0003837458926615553, "loss": 0.8923, "step": 3719 }, { "epoch": 0.25205173158523264, "grad_norm": 3.8686976432800293, "learning_rate": 0.0003837404162102957, "loss": 0.9509, "step": 3720 }, { "epoch": 0.2521194874270566, "grad_norm": 3.335965156555176, "learning_rate": 0.00038373493975903616, "loss": 1.0425, "step": 3721 }, { "epoch": 0.2521872432688806, "grad_norm": 3.9542038440704346, "learning_rate": 0.0003837294633077766, "loss": 0.7781, "step": 3722 }, { "epoch": 0.2522549991107046, "grad_norm": 3.333540439605713, "learning_rate": 0.000383723986856517, "loss": 0.9906, "step": 3723 }, { "epoch": 0.25232275495252854, "grad_norm": 3.276549816131592, "learning_rate": 0.0003837185104052574, "loss": 0.8608, "step": 3724 }, { "epoch": 0.25239051079435254, "grad_norm": 2.620260000228882, "learning_rate": 0.0003837130339539978, "loss": 0.8812, "step": 3725 }, { "epoch": 0.25245826663617654, "grad_norm": 3.5397908687591553, "learning_rate": 0.00038370755750273826, "loss": 0.8974, "step": 3726 }, { "epoch": 0.25252602247800054, "grad_norm": 3.0434844493865967, "learning_rate": 0.00038370208105147866, "loss": 0.7951, "step": 3727 }, { "epoch": 0.2525937783198245, "grad_norm": 3.2263176441192627, "learning_rate": 0.00038369660460021906, "loss": 0.7483, "step": 3728 }, { "epoch": 0.2526615341616485, "grad_norm": 2.7935893535614014, "learning_rate": 0.0003836911281489595, "loss": 0.7375, "step": 3729 }, { "epoch": 0.2527292900034725, "grad_norm": 3.244950771331787, "learning_rate": 0.0003836856516976999, "loss": 0.7737, "step": 3730 }, { "epoch": 0.2527970458452965, "grad_norm": 3.7162082195281982, "learning_rate": 0.0003836801752464403, "loss": 0.8675, "step": 3731 }, { "epoch": 0.25286480168712044, "grad_norm": 3.286712169647217, "learning_rate": 0.00038367469879518076, "loss": 0.8553, "step": 3732 }, { "epoch": 0.25293255752894445, "grad_norm": 4.761713981628418, "learning_rate": 0.00038366922234392116, "loss": 0.9591, "step": 3733 }, { "epoch": 0.25300031337076845, "grad_norm": 3.431506872177124, "learning_rate": 0.00038366374589266156, "loss": 0.9939, "step": 3734 }, { "epoch": 0.25306806921259245, "grad_norm": 4.148658275604248, "learning_rate": 0.00038365826944140196, "loss": 1.0675, "step": 3735 }, { "epoch": 0.2531358250544164, "grad_norm": 3.306816339492798, "learning_rate": 0.00038365279299014236, "loss": 1.0673, "step": 3736 }, { "epoch": 0.2532035808962404, "grad_norm": 3.8047678470611572, "learning_rate": 0.0003836473165388828, "loss": 0.8961, "step": 3737 }, { "epoch": 0.2532713367380644, "grad_norm": 5.400696277618408, "learning_rate": 0.00038364184008762327, "loss": 0.9628, "step": 3738 }, { "epoch": 0.25333909257988835, "grad_norm": 4.231796741485596, "learning_rate": 0.00038363636363636367, "loss": 0.9805, "step": 3739 }, { "epoch": 0.25340684842171235, "grad_norm": 2.741238832473755, "learning_rate": 0.00038363088718510407, "loss": 0.7717, "step": 3740 }, { "epoch": 0.25347460426353635, "grad_norm": 3.78739333152771, "learning_rate": 0.00038362541073384447, "loss": 1.209, "step": 3741 }, { "epoch": 0.25354236010536035, "grad_norm": 3.427900791168213, "learning_rate": 0.0003836199342825849, "loss": 0.9675, "step": 3742 }, { "epoch": 0.2536101159471843, "grad_norm": 4.223872184753418, "learning_rate": 0.0003836144578313253, "loss": 1.147, "step": 3743 }, { "epoch": 0.2536778717890083, "grad_norm": 3.0559303760528564, "learning_rate": 0.00038360898138006577, "loss": 0.8764, "step": 3744 }, { "epoch": 0.2537456276308323, "grad_norm": 3.7905776500701904, "learning_rate": 0.00038360350492880617, "loss": 0.8678, "step": 3745 }, { "epoch": 0.2538133834726563, "grad_norm": 2.898543119430542, "learning_rate": 0.00038359802847754657, "loss": 0.7072, "step": 3746 }, { "epoch": 0.25388113931448025, "grad_norm": 3.2805566787719727, "learning_rate": 0.00038359255202628697, "loss": 0.8785, "step": 3747 }, { "epoch": 0.25394889515630426, "grad_norm": 3.2746689319610596, "learning_rate": 0.0003835870755750274, "loss": 0.8281, "step": 3748 }, { "epoch": 0.25401665099812826, "grad_norm": 3.4209580421447754, "learning_rate": 0.0003835815991237678, "loss": 0.879, "step": 3749 }, { "epoch": 0.25408440683995226, "grad_norm": 4.869858264923096, "learning_rate": 0.0003835761226725082, "loss": 1.212, "step": 3750 }, { "epoch": 0.2541521626817762, "grad_norm": 3.1479673385620117, "learning_rate": 0.0003835706462212486, "loss": 0.8662, "step": 3751 }, { "epoch": 0.2542199185236002, "grad_norm": 2.6611757278442383, "learning_rate": 0.0003835651697699891, "loss": 0.7113, "step": 3752 }, { "epoch": 0.2542876743654242, "grad_norm": 3.4080986976623535, "learning_rate": 0.00038355969331872947, "loss": 0.7316, "step": 3753 }, { "epoch": 0.25435543020724816, "grad_norm": 4.091248512268066, "learning_rate": 0.0003835542168674699, "loss": 0.9961, "step": 3754 }, { "epoch": 0.25442318604907216, "grad_norm": 3.3971917629241943, "learning_rate": 0.0003835487404162103, "loss": 0.7496, "step": 3755 }, { "epoch": 0.25449094189089616, "grad_norm": 2.857682943344116, "learning_rate": 0.0003835432639649507, "loss": 0.9128, "step": 3756 }, { "epoch": 0.25455869773272016, "grad_norm": 3.9774670600891113, "learning_rate": 0.0003835377875136911, "loss": 1.008, "step": 3757 }, { "epoch": 0.2546264535745441, "grad_norm": 2.7955613136291504, "learning_rate": 0.0003835323110624315, "loss": 0.9472, "step": 3758 }, { "epoch": 0.2546942094163681, "grad_norm": 2.1893701553344727, "learning_rate": 0.000383526834611172, "loss": 0.67, "step": 3759 }, { "epoch": 0.2547619652581921, "grad_norm": 3.1054859161376953, "learning_rate": 0.00038352135815991243, "loss": 0.7587, "step": 3760 }, { "epoch": 0.2548297211000161, "grad_norm": 3.2473466396331787, "learning_rate": 0.00038351588170865283, "loss": 0.8714, "step": 3761 }, { "epoch": 0.25489747694184006, "grad_norm": 4.202857494354248, "learning_rate": 0.00038351040525739323, "loss": 1.0372, "step": 3762 }, { "epoch": 0.25496523278366406, "grad_norm": 3.3349661827087402, "learning_rate": 0.00038350492880613363, "loss": 0.8378, "step": 3763 }, { "epoch": 0.25503298862548807, "grad_norm": 3.414837121963501, "learning_rate": 0.0003834994523548741, "loss": 0.9739, "step": 3764 }, { "epoch": 0.25510074446731207, "grad_norm": 5.924951076507568, "learning_rate": 0.0003834939759036145, "loss": 0.7899, "step": 3765 }, { "epoch": 0.255168500309136, "grad_norm": 2.9982686042785645, "learning_rate": 0.0003834884994523549, "loss": 0.884, "step": 3766 }, { "epoch": 0.25523625615096, "grad_norm": 2.9299068450927734, "learning_rate": 0.0003834830230010953, "loss": 0.8864, "step": 3767 }, { "epoch": 0.255304011992784, "grad_norm": 3.691833257675171, "learning_rate": 0.00038347754654983573, "loss": 0.9687, "step": 3768 }, { "epoch": 0.25537176783460797, "grad_norm": 3.061525821685791, "learning_rate": 0.00038347207009857613, "loss": 0.7272, "step": 3769 }, { "epoch": 0.25543952367643197, "grad_norm": 5.900145530700684, "learning_rate": 0.0003834665936473166, "loss": 0.8881, "step": 3770 }, { "epoch": 0.25550727951825597, "grad_norm": 2.968076229095459, "learning_rate": 0.000383461117196057, "loss": 0.767, "step": 3771 }, { "epoch": 0.25557503536007997, "grad_norm": 4.432077884674072, "learning_rate": 0.0003834556407447974, "loss": 0.8, "step": 3772 }, { "epoch": 0.2556427912019039, "grad_norm": 2.8231563568115234, "learning_rate": 0.0003834501642935378, "loss": 0.7223, "step": 3773 }, { "epoch": 0.2557105470437279, "grad_norm": 3.3790507316589355, "learning_rate": 0.0003834446878422782, "loss": 0.89, "step": 3774 }, { "epoch": 0.2557783028855519, "grad_norm": 3.4450955390930176, "learning_rate": 0.00038343921139101863, "loss": 1.1283, "step": 3775 }, { "epoch": 0.2558460587273759, "grad_norm": 11.767523765563965, "learning_rate": 0.0003834337349397591, "loss": 1.2681, "step": 3776 }, { "epoch": 0.25591381456919987, "grad_norm": 3.401125192642212, "learning_rate": 0.0003834282584884995, "loss": 0.905, "step": 3777 }, { "epoch": 0.2559815704110239, "grad_norm": 4.4450907707214355, "learning_rate": 0.0003834227820372399, "loss": 0.9932, "step": 3778 }, { "epoch": 0.2560493262528479, "grad_norm": 4.3434295654296875, "learning_rate": 0.0003834173055859803, "loss": 1.0969, "step": 3779 }, { "epoch": 0.2561170820946719, "grad_norm": 3.205775022506714, "learning_rate": 0.00038341182913472074, "loss": 1.0064, "step": 3780 }, { "epoch": 0.2561848379364958, "grad_norm": 3.3582472801208496, "learning_rate": 0.00038340635268346114, "loss": 0.9701, "step": 3781 }, { "epoch": 0.2562525937783198, "grad_norm": 3.231454372406006, "learning_rate": 0.00038340087623220154, "loss": 0.854, "step": 3782 }, { "epoch": 0.25632034962014383, "grad_norm": 3.5778987407684326, "learning_rate": 0.000383395399780942, "loss": 1.0409, "step": 3783 }, { "epoch": 0.2563881054619678, "grad_norm": 3.7966370582580566, "learning_rate": 0.0003833899233296824, "loss": 1.03, "step": 3784 }, { "epoch": 0.2564558613037918, "grad_norm": 5.163334846496582, "learning_rate": 0.0003833844468784228, "loss": 1.3242, "step": 3785 }, { "epoch": 0.2565236171456158, "grad_norm": 3.0766894817352295, "learning_rate": 0.00038337897042716324, "loss": 0.8096, "step": 3786 }, { "epoch": 0.2565913729874398, "grad_norm": 3.6382365226745605, "learning_rate": 0.00038337349397590364, "loss": 1.1954, "step": 3787 }, { "epoch": 0.2566591288292637, "grad_norm": 2.8911855220794678, "learning_rate": 0.00038336801752464404, "loss": 0.8138, "step": 3788 }, { "epoch": 0.25672688467108773, "grad_norm": 6.181971549987793, "learning_rate": 0.00038336254107338444, "loss": 1.3154, "step": 3789 }, { "epoch": 0.25679464051291173, "grad_norm": 2.7549397945404053, "learning_rate": 0.00038335706462212484, "loss": 0.7052, "step": 3790 }, { "epoch": 0.25686239635473573, "grad_norm": 3.0612099170684814, "learning_rate": 0.0003833515881708653, "loss": 0.8409, "step": 3791 }, { "epoch": 0.2569301521965597, "grad_norm": 3.156012535095215, "learning_rate": 0.00038334611171960575, "loss": 0.8129, "step": 3792 }, { "epoch": 0.2569979080383837, "grad_norm": 2.99743914604187, "learning_rate": 0.00038334063526834615, "loss": 1.0105, "step": 3793 }, { "epoch": 0.2570656638802077, "grad_norm": 2.574354887008667, "learning_rate": 0.00038333515881708655, "loss": 0.6397, "step": 3794 }, { "epoch": 0.2571334197220317, "grad_norm": 6.679385662078857, "learning_rate": 0.00038332968236582694, "loss": 1.0633, "step": 3795 }, { "epoch": 0.25720117556385563, "grad_norm": 3.1182756423950195, "learning_rate": 0.00038332420591456734, "loss": 0.8464, "step": 3796 }, { "epoch": 0.25726893140567964, "grad_norm": 7.024462699890137, "learning_rate": 0.0003833187294633078, "loss": 0.9934, "step": 3797 }, { "epoch": 0.25733668724750364, "grad_norm": 4.020702838897705, "learning_rate": 0.0003833132530120482, "loss": 1.0777, "step": 3798 }, { "epoch": 0.2574044430893276, "grad_norm": 3.3972349166870117, "learning_rate": 0.00038330777656078865, "loss": 0.7847, "step": 3799 }, { "epoch": 0.2574721989311516, "grad_norm": 5.67258358001709, "learning_rate": 0.00038330230010952905, "loss": 0.9187, "step": 3800 }, { "epoch": 0.2575399547729756, "grad_norm": 5.1876220703125, "learning_rate": 0.00038329682365826945, "loss": 1.0144, "step": 3801 }, { "epoch": 0.2576077106147996, "grad_norm": 4.013278484344482, "learning_rate": 0.0003832913472070099, "loss": 0.8928, "step": 3802 }, { "epoch": 0.25767546645662354, "grad_norm": 3.7680952548980713, "learning_rate": 0.0003832858707557503, "loss": 0.9298, "step": 3803 }, { "epoch": 0.25774322229844754, "grad_norm": 4.013795852661133, "learning_rate": 0.0003832803943044907, "loss": 0.902, "step": 3804 }, { "epoch": 0.25781097814027154, "grad_norm": 3.5484025478363037, "learning_rate": 0.0003832749178532311, "loss": 0.9708, "step": 3805 }, { "epoch": 0.25787873398209554, "grad_norm": 2.855067491531372, "learning_rate": 0.00038326944140197155, "loss": 0.7901, "step": 3806 }, { "epoch": 0.2579464898239195, "grad_norm": 3.4689276218414307, "learning_rate": 0.00038326396495071195, "loss": 0.8623, "step": 3807 }, { "epoch": 0.2580142456657435, "grad_norm": 2.7453114986419678, "learning_rate": 0.0003832584884994524, "loss": 0.7586, "step": 3808 }, { "epoch": 0.2580820015075675, "grad_norm": 5.138735294342041, "learning_rate": 0.0003832530120481928, "loss": 1.1546, "step": 3809 }, { "epoch": 0.2581497573493915, "grad_norm": 4.025753498077393, "learning_rate": 0.0003832475355969332, "loss": 0.9713, "step": 3810 }, { "epoch": 0.25821751319121544, "grad_norm": 3.834397315979004, "learning_rate": 0.0003832420591456736, "loss": 0.9858, "step": 3811 }, { "epoch": 0.25828526903303944, "grad_norm": 4.4391398429870605, "learning_rate": 0.000383236582694414, "loss": 0.7663, "step": 3812 }, { "epoch": 0.25835302487486345, "grad_norm": 3.530118942260742, "learning_rate": 0.00038323110624315446, "loss": 0.9302, "step": 3813 }, { "epoch": 0.2584207807166874, "grad_norm": 3.5284953117370605, "learning_rate": 0.0003832256297918949, "loss": 0.9246, "step": 3814 }, { "epoch": 0.2584885365585114, "grad_norm": 3.7423524856567383, "learning_rate": 0.0003832201533406353, "loss": 1.0674, "step": 3815 }, { "epoch": 0.2585562924003354, "grad_norm": 3.8444557189941406, "learning_rate": 0.0003832146768893757, "loss": 0.9041, "step": 3816 }, { "epoch": 0.2586240482421594, "grad_norm": 3.160054922103882, "learning_rate": 0.0003832092004381161, "loss": 0.8215, "step": 3817 }, { "epoch": 0.25869180408398335, "grad_norm": 3.602114200592041, "learning_rate": 0.00038320372398685656, "loss": 1.0354, "step": 3818 }, { "epoch": 0.25875955992580735, "grad_norm": 4.9594621658325195, "learning_rate": 0.00038319824753559696, "loss": 0.9933, "step": 3819 }, { "epoch": 0.25882731576763135, "grad_norm": 2.556272506713867, "learning_rate": 0.00038319277108433736, "loss": 0.7722, "step": 3820 }, { "epoch": 0.25889507160945535, "grad_norm": 3.246297597885132, "learning_rate": 0.00038318729463307776, "loss": 0.8644, "step": 3821 }, { "epoch": 0.2589628274512793, "grad_norm": 2.6900722980499268, "learning_rate": 0.0003831818181818182, "loss": 0.8842, "step": 3822 }, { "epoch": 0.2590305832931033, "grad_norm": 3.4152801036834717, "learning_rate": 0.0003831763417305586, "loss": 0.977, "step": 3823 }, { "epoch": 0.2590983391349273, "grad_norm": 2.9592761993408203, "learning_rate": 0.00038317086527929906, "loss": 0.7673, "step": 3824 }, { "epoch": 0.2591660949767513, "grad_norm": 3.0182785987854004, "learning_rate": 0.00038316538882803946, "loss": 0.8766, "step": 3825 }, { "epoch": 0.25923385081857525, "grad_norm": 4.031710624694824, "learning_rate": 0.00038315991237677986, "loss": 0.9645, "step": 3826 }, { "epoch": 0.25930160666039925, "grad_norm": 2.9774582386016846, "learning_rate": 0.00038315443592552026, "loss": 0.8, "step": 3827 }, { "epoch": 0.25936936250222326, "grad_norm": 3.8333871364593506, "learning_rate": 0.00038314895947426066, "loss": 1.2132, "step": 3828 }, { "epoch": 0.2594371183440472, "grad_norm": 3.0023181438446045, "learning_rate": 0.0003831434830230011, "loss": 0.6734, "step": 3829 }, { "epoch": 0.2595048741858712, "grad_norm": 2.6555066108703613, "learning_rate": 0.00038313800657174157, "loss": 0.7455, "step": 3830 }, { "epoch": 0.2595726300276952, "grad_norm": 3.5629100799560547, "learning_rate": 0.00038313253012048197, "loss": 0.9898, "step": 3831 }, { "epoch": 0.2596403858695192, "grad_norm": 3.2459683418273926, "learning_rate": 0.00038312705366922237, "loss": 0.8824, "step": 3832 }, { "epoch": 0.25970814171134315, "grad_norm": 3.1705644130706787, "learning_rate": 0.00038312157721796277, "loss": 0.9174, "step": 3833 }, { "epoch": 0.25977589755316716, "grad_norm": 3.2605936527252197, "learning_rate": 0.00038311610076670316, "loss": 1.0074, "step": 3834 }, { "epoch": 0.25984365339499116, "grad_norm": 3.234121561050415, "learning_rate": 0.0003831106243154436, "loss": 0.6709, "step": 3835 }, { "epoch": 0.25991140923681516, "grad_norm": 3.5711047649383545, "learning_rate": 0.000383105147864184, "loss": 1.0973, "step": 3836 }, { "epoch": 0.2599791650786391, "grad_norm": 5.482245922088623, "learning_rate": 0.0003830996714129244, "loss": 1.0875, "step": 3837 }, { "epoch": 0.2600469209204631, "grad_norm": 3.498448610305786, "learning_rate": 0.00038309419496166487, "loss": 0.8831, "step": 3838 }, { "epoch": 0.2601146767622871, "grad_norm": 3.0265636444091797, "learning_rate": 0.00038308871851040527, "loss": 0.8589, "step": 3839 }, { "epoch": 0.26018243260411106, "grad_norm": 3.747673749923706, "learning_rate": 0.0003830832420591457, "loss": 1.0573, "step": 3840 }, { "epoch": 0.26025018844593506, "grad_norm": 3.9995877742767334, "learning_rate": 0.0003830777656078861, "loss": 1.0355, "step": 3841 }, { "epoch": 0.26031794428775906, "grad_norm": 3.4097793102264404, "learning_rate": 0.0003830722891566265, "loss": 0.921, "step": 3842 }, { "epoch": 0.26038570012958306, "grad_norm": 2.804934501647949, "learning_rate": 0.0003830668127053669, "loss": 0.8389, "step": 3843 }, { "epoch": 0.260453455971407, "grad_norm": 3.6106488704681396, "learning_rate": 0.0003830613362541073, "loss": 0.8062, "step": 3844 }, { "epoch": 0.260521211813231, "grad_norm": 4.053573131561279, "learning_rate": 0.00038305585980284777, "loss": 1.0809, "step": 3845 }, { "epoch": 0.260588967655055, "grad_norm": 4.229400634765625, "learning_rate": 0.0003830503833515882, "loss": 0.9928, "step": 3846 }, { "epoch": 0.260656723496879, "grad_norm": 3.3701770305633545, "learning_rate": 0.0003830449069003286, "loss": 0.7538, "step": 3847 }, { "epoch": 0.26072447933870296, "grad_norm": 4.125498294830322, "learning_rate": 0.000383039430449069, "loss": 1.0355, "step": 3848 }, { "epoch": 0.26079223518052697, "grad_norm": 3.5209505558013916, "learning_rate": 0.0003830339539978094, "loss": 0.9445, "step": 3849 }, { "epoch": 0.26085999102235097, "grad_norm": 2.8481569290161133, "learning_rate": 0.0003830284775465498, "loss": 0.7807, "step": 3850 }, { "epoch": 0.26092774686417497, "grad_norm": 3.1698009967803955, "learning_rate": 0.0003830230010952903, "loss": 0.8908, "step": 3851 }, { "epoch": 0.2609955027059989, "grad_norm": 3.3761160373687744, "learning_rate": 0.0003830175246440307, "loss": 0.8463, "step": 3852 }, { "epoch": 0.2610632585478229, "grad_norm": 3.892441987991333, "learning_rate": 0.00038301204819277113, "loss": 0.6868, "step": 3853 }, { "epoch": 0.2611310143896469, "grad_norm": 4.324263095855713, "learning_rate": 0.00038300657174151153, "loss": 0.9675, "step": 3854 }, { "epoch": 0.26119877023147087, "grad_norm": 2.836991310119629, "learning_rate": 0.00038300109529025193, "loss": 0.75, "step": 3855 }, { "epoch": 0.26126652607329487, "grad_norm": 3.2646713256835938, "learning_rate": 0.0003829956188389924, "loss": 0.9905, "step": 3856 }, { "epoch": 0.26133428191511887, "grad_norm": 3.525623083114624, "learning_rate": 0.0003829901423877328, "loss": 0.9662, "step": 3857 }, { "epoch": 0.2614020377569429, "grad_norm": 3.1224725246429443, "learning_rate": 0.0003829846659364732, "loss": 0.7944, "step": 3858 }, { "epoch": 0.2614697935987668, "grad_norm": 4.051478862762451, "learning_rate": 0.0003829791894852136, "loss": 0.9206, "step": 3859 }, { "epoch": 0.2615375494405908, "grad_norm": 3.823009967803955, "learning_rate": 0.000382973713033954, "loss": 1.0563, "step": 3860 }, { "epoch": 0.2616053052824148, "grad_norm": 3.088054895401001, "learning_rate": 0.00038296823658269443, "loss": 0.6629, "step": 3861 }, { "epoch": 0.2616730611242388, "grad_norm": 3.3027758598327637, "learning_rate": 0.0003829627601314349, "loss": 0.9081, "step": 3862 }, { "epoch": 0.2617408169660628, "grad_norm": 3.1604514122009277, "learning_rate": 0.0003829572836801753, "loss": 0.9608, "step": 3863 }, { "epoch": 0.2618085728078868, "grad_norm": 2.9599483013153076, "learning_rate": 0.0003829518072289157, "loss": 0.948, "step": 3864 }, { "epoch": 0.2618763286497108, "grad_norm": 3.041414499282837, "learning_rate": 0.0003829463307776561, "loss": 0.871, "step": 3865 }, { "epoch": 0.2619440844915348, "grad_norm": 3.6486713886260986, "learning_rate": 0.0003829408543263965, "loss": 0.9299, "step": 3866 }, { "epoch": 0.2620118403333587, "grad_norm": 3.5397045612335205, "learning_rate": 0.00038293537787513693, "loss": 0.866, "step": 3867 }, { "epoch": 0.2620795961751827, "grad_norm": 4.200387954711914, "learning_rate": 0.00038292990142387733, "loss": 1.1121, "step": 3868 }, { "epoch": 0.26214735201700673, "grad_norm": 2.97200870513916, "learning_rate": 0.0003829244249726178, "loss": 0.983, "step": 3869 }, { "epoch": 0.2622151078588307, "grad_norm": 3.2636642456054688, "learning_rate": 0.0003829189485213582, "loss": 1.0594, "step": 3870 }, { "epoch": 0.2622828637006547, "grad_norm": 2.5356268882751465, "learning_rate": 0.0003829134720700986, "loss": 0.8303, "step": 3871 }, { "epoch": 0.2623506195424787, "grad_norm": 3.038771152496338, "learning_rate": 0.000382907995618839, "loss": 0.7437, "step": 3872 }, { "epoch": 0.2624183753843027, "grad_norm": 2.9753763675689697, "learning_rate": 0.00038290251916757944, "loss": 0.8612, "step": 3873 }, { "epoch": 0.26248613122612663, "grad_norm": 3.129913330078125, "learning_rate": 0.00038289704271631984, "loss": 1.0074, "step": 3874 }, { "epoch": 0.26255388706795063, "grad_norm": 5.079966068267822, "learning_rate": 0.00038289156626506024, "loss": 1.1109, "step": 3875 }, { "epoch": 0.26262164290977463, "grad_norm": 3.2109076976776123, "learning_rate": 0.0003828860898138007, "loss": 0.8455, "step": 3876 }, { "epoch": 0.26268939875159864, "grad_norm": 3.1828322410583496, "learning_rate": 0.0003828806133625411, "loss": 0.8699, "step": 3877 }, { "epoch": 0.2627571545934226, "grad_norm": 3.0322105884552, "learning_rate": 0.00038287513691128154, "loss": 0.8932, "step": 3878 }, { "epoch": 0.2628249104352466, "grad_norm": 4.043949604034424, "learning_rate": 0.00038286966046002194, "loss": 1.076, "step": 3879 }, { "epoch": 0.2628926662770706, "grad_norm": 3.5644052028656006, "learning_rate": 0.00038286418400876234, "loss": 1.0405, "step": 3880 }, { "epoch": 0.2629604221188946, "grad_norm": 3.385065793991089, "learning_rate": 0.00038285870755750274, "loss": 0.9329, "step": 3881 }, { "epoch": 0.26302817796071853, "grad_norm": 4.520881175994873, "learning_rate": 0.00038285323110624314, "loss": 0.9092, "step": 3882 }, { "epoch": 0.26309593380254254, "grad_norm": 3.6209871768951416, "learning_rate": 0.0003828477546549836, "loss": 0.9839, "step": 3883 }, { "epoch": 0.26316368964436654, "grad_norm": 4.704605579376221, "learning_rate": 0.00038284227820372405, "loss": 1.001, "step": 3884 }, { "epoch": 0.2632314454861905, "grad_norm": 3.31378173828125, "learning_rate": 0.00038283680175246445, "loss": 0.9151, "step": 3885 }, { "epoch": 0.2632992013280145, "grad_norm": 3.9950809478759766, "learning_rate": 0.00038283132530120485, "loss": 1.3458, "step": 3886 }, { "epoch": 0.2633669571698385, "grad_norm": 3.5771446228027344, "learning_rate": 0.00038282584884994524, "loss": 0.9859, "step": 3887 }, { "epoch": 0.2634347130116625, "grad_norm": 3.109168767929077, "learning_rate": 0.00038282037239868564, "loss": 0.9542, "step": 3888 }, { "epoch": 0.26350246885348644, "grad_norm": 2.692270040512085, "learning_rate": 0.0003828148959474261, "loss": 0.7427, "step": 3889 }, { "epoch": 0.26357022469531044, "grad_norm": 4.227486610412598, "learning_rate": 0.0003828094194961665, "loss": 0.9147, "step": 3890 }, { "epoch": 0.26363798053713444, "grad_norm": 4.064394474029541, "learning_rate": 0.0003828039430449069, "loss": 1.1386, "step": 3891 }, { "epoch": 0.26370573637895844, "grad_norm": 4.0192999839782715, "learning_rate": 0.00038279846659364735, "loss": 0.8778, "step": 3892 }, { "epoch": 0.2637734922207824, "grad_norm": 2.9453155994415283, "learning_rate": 0.00038279299014238775, "loss": 0.8548, "step": 3893 }, { "epoch": 0.2638412480626064, "grad_norm": 4.041325569152832, "learning_rate": 0.0003827875136911282, "loss": 0.9365, "step": 3894 }, { "epoch": 0.2639090039044304, "grad_norm": 2.821990728378296, "learning_rate": 0.0003827820372398686, "loss": 0.8119, "step": 3895 }, { "epoch": 0.2639767597462544, "grad_norm": 3.1739964485168457, "learning_rate": 0.000382776560788609, "loss": 0.7323, "step": 3896 }, { "epoch": 0.26404451558807834, "grad_norm": 3.4655818939208984, "learning_rate": 0.0003827710843373494, "loss": 0.9336, "step": 3897 }, { "epoch": 0.26411227142990235, "grad_norm": 2.953963279724121, "learning_rate": 0.0003827656078860898, "loss": 0.9646, "step": 3898 }, { "epoch": 0.26418002727172635, "grad_norm": 3.1523964405059814, "learning_rate": 0.00038276013143483025, "loss": 1.0196, "step": 3899 }, { "epoch": 0.2642477831135503, "grad_norm": 3.8185946941375732, "learning_rate": 0.0003827546549835707, "loss": 0.8119, "step": 3900 }, { "epoch": 0.2643155389553743, "grad_norm": 3.01875638961792, "learning_rate": 0.0003827491785323111, "loss": 0.7968, "step": 3901 }, { "epoch": 0.2643832947971983, "grad_norm": 2.9579169750213623, "learning_rate": 0.0003827437020810515, "loss": 0.7681, "step": 3902 }, { "epoch": 0.2644510506390223, "grad_norm": 3.232266902923584, "learning_rate": 0.0003827382256297919, "loss": 0.8349, "step": 3903 }, { "epoch": 0.26451880648084625, "grad_norm": 2.797696352005005, "learning_rate": 0.0003827327491785323, "loss": 0.8843, "step": 3904 }, { "epoch": 0.26458656232267025, "grad_norm": 4.049548625946045, "learning_rate": 0.00038272727272727276, "loss": 0.9139, "step": 3905 }, { "epoch": 0.26465431816449425, "grad_norm": 2.907792806625366, "learning_rate": 0.00038272179627601315, "loss": 0.841, "step": 3906 }, { "epoch": 0.26472207400631825, "grad_norm": 3.0350453853607178, "learning_rate": 0.0003827163198247536, "loss": 0.7614, "step": 3907 }, { "epoch": 0.2647898298481422, "grad_norm": 3.5818538665771484, "learning_rate": 0.000382710843373494, "loss": 0.9094, "step": 3908 }, { "epoch": 0.2648575856899662, "grad_norm": 3.4420864582061768, "learning_rate": 0.0003827053669222344, "loss": 0.9483, "step": 3909 }, { "epoch": 0.2649253415317902, "grad_norm": 13.82105541229248, "learning_rate": 0.0003826998904709748, "loss": 0.8486, "step": 3910 }, { "epoch": 0.2649930973736142, "grad_norm": 5.8914265632629395, "learning_rate": 0.00038269441401971526, "loss": 0.9463, "step": 3911 }, { "epoch": 0.26506085321543815, "grad_norm": 3.3003058433532715, "learning_rate": 0.00038268893756845566, "loss": 0.882, "step": 3912 }, { "epoch": 0.26512860905726215, "grad_norm": 4.067704677581787, "learning_rate": 0.00038268346111719606, "loss": 0.8904, "step": 3913 }, { "epoch": 0.26519636489908616, "grad_norm": 3.318633794784546, "learning_rate": 0.00038267798466593646, "loss": 0.8208, "step": 3914 }, { "epoch": 0.2652641207409101, "grad_norm": 3.2929890155792236, "learning_rate": 0.0003826725082146769, "loss": 0.6402, "step": 3915 }, { "epoch": 0.2653318765827341, "grad_norm": 3.8115742206573486, "learning_rate": 0.00038266703176341736, "loss": 0.9851, "step": 3916 }, { "epoch": 0.2653996324245581, "grad_norm": 3.623486042022705, "learning_rate": 0.00038266155531215776, "loss": 1.0891, "step": 3917 }, { "epoch": 0.2654673882663821, "grad_norm": 3.228214979171753, "learning_rate": 0.00038265607886089816, "loss": 1.055, "step": 3918 }, { "epoch": 0.26553514410820606, "grad_norm": 3.0068366527557373, "learning_rate": 0.00038265060240963856, "loss": 0.844, "step": 3919 }, { "epoch": 0.26560289995003006, "grad_norm": 3.049351930618286, "learning_rate": 0.00038264512595837896, "loss": 0.968, "step": 3920 }, { "epoch": 0.26567065579185406, "grad_norm": 2.82605242729187, "learning_rate": 0.0003826396495071194, "loss": 0.8343, "step": 3921 }, { "epoch": 0.26573841163367806, "grad_norm": 3.048635721206665, "learning_rate": 0.0003826341730558598, "loss": 0.766, "step": 3922 }, { "epoch": 0.265806167475502, "grad_norm": 3.2172892093658447, "learning_rate": 0.00038262869660460027, "loss": 0.8611, "step": 3923 }, { "epoch": 0.265873923317326, "grad_norm": 3.6851248741149902, "learning_rate": 0.00038262322015334067, "loss": 0.977, "step": 3924 }, { "epoch": 0.26594167915915, "grad_norm": 3.5320701599121094, "learning_rate": 0.00038261774370208106, "loss": 0.8857, "step": 3925 }, { "epoch": 0.266009435000974, "grad_norm": 3.714339017868042, "learning_rate": 0.00038261226725082146, "loss": 0.9373, "step": 3926 }, { "epoch": 0.26607719084279796, "grad_norm": 3.137050151824951, "learning_rate": 0.0003826067907995619, "loss": 0.756, "step": 3927 }, { "epoch": 0.26614494668462196, "grad_norm": 3.260514259338379, "learning_rate": 0.0003826013143483023, "loss": 0.733, "step": 3928 }, { "epoch": 0.26621270252644597, "grad_norm": 5.229447364807129, "learning_rate": 0.0003825958378970427, "loss": 1.2776, "step": 3929 }, { "epoch": 0.2662804583682699, "grad_norm": 3.2068936824798584, "learning_rate": 0.0003825903614457831, "loss": 0.9781, "step": 3930 }, { "epoch": 0.2663482142100939, "grad_norm": 3.27554988861084, "learning_rate": 0.00038258488499452357, "loss": 0.9302, "step": 3931 }, { "epoch": 0.2664159700519179, "grad_norm": 4.322659492492676, "learning_rate": 0.000382579408543264, "loss": 1.1433, "step": 3932 }, { "epoch": 0.2664837258937419, "grad_norm": 4.469461917877197, "learning_rate": 0.0003825739320920044, "loss": 1.0061, "step": 3933 }, { "epoch": 0.26655148173556586, "grad_norm": 3.7429487705230713, "learning_rate": 0.0003825684556407448, "loss": 1.0448, "step": 3934 }, { "epoch": 0.26661923757738987, "grad_norm": 2.894131898880005, "learning_rate": 0.0003825629791894852, "loss": 0.7489, "step": 3935 }, { "epoch": 0.26668699341921387, "grad_norm": 3.656524181365967, "learning_rate": 0.0003825575027382256, "loss": 1.0108, "step": 3936 }, { "epoch": 0.26675474926103787, "grad_norm": 3.3090386390686035, "learning_rate": 0.00038255202628696607, "loss": 0.8166, "step": 3937 }, { "epoch": 0.2668225051028618, "grad_norm": 3.1446077823638916, "learning_rate": 0.0003825465498357065, "loss": 0.903, "step": 3938 }, { "epoch": 0.2668902609446858, "grad_norm": 2.462174654006958, "learning_rate": 0.0003825410733844469, "loss": 0.756, "step": 3939 }, { "epoch": 0.2669580167865098, "grad_norm": 3.395325183868408, "learning_rate": 0.0003825355969331873, "loss": 0.9387, "step": 3940 }, { "epoch": 0.2670257726283338, "grad_norm": 2.6862692832946777, "learning_rate": 0.0003825301204819277, "loss": 0.6313, "step": 3941 }, { "epoch": 0.26709352847015777, "grad_norm": 3.2138006687164307, "learning_rate": 0.0003825246440306681, "loss": 0.8793, "step": 3942 }, { "epoch": 0.2671612843119818, "grad_norm": 4.024855613708496, "learning_rate": 0.0003825191675794086, "loss": 1.0299, "step": 3943 }, { "epoch": 0.2672290401538058, "grad_norm": 3.4614429473876953, "learning_rate": 0.000382513691128149, "loss": 0.8609, "step": 3944 }, { "epoch": 0.2672967959956297, "grad_norm": 3.462606430053711, "learning_rate": 0.0003825082146768894, "loss": 0.9953, "step": 3945 }, { "epoch": 0.2673645518374537, "grad_norm": 3.481815814971924, "learning_rate": 0.00038250273822562983, "loss": 0.7377, "step": 3946 }, { "epoch": 0.2674323076792777, "grad_norm": 4.268529415130615, "learning_rate": 0.0003824972617743702, "loss": 1.1668, "step": 3947 }, { "epoch": 0.2675000635211017, "grad_norm": 4.099143028259277, "learning_rate": 0.0003824917853231106, "loss": 1.1512, "step": 3948 }, { "epoch": 0.2675678193629257, "grad_norm": 2.844421863555908, "learning_rate": 0.0003824863088718511, "loss": 0.8891, "step": 3949 }, { "epoch": 0.2676355752047497, "grad_norm": 4.097158432006836, "learning_rate": 0.0003824808324205915, "loss": 0.8665, "step": 3950 }, { "epoch": 0.2677033310465737, "grad_norm": 2.971714735031128, "learning_rate": 0.0003824753559693319, "loss": 0.7624, "step": 3951 }, { "epoch": 0.2677710868883977, "grad_norm": 3.0117478370666504, "learning_rate": 0.0003824698795180723, "loss": 0.7864, "step": 3952 }, { "epoch": 0.2678388427302216, "grad_norm": 3.536701202392578, "learning_rate": 0.00038246440306681273, "loss": 0.9392, "step": 3953 }, { "epoch": 0.26790659857204563, "grad_norm": 3.26162052154541, "learning_rate": 0.0003824589266155532, "loss": 0.8078, "step": 3954 }, { "epoch": 0.26797435441386963, "grad_norm": 5.642796039581299, "learning_rate": 0.0003824534501642936, "loss": 1.2257, "step": 3955 }, { "epoch": 0.26804211025569363, "grad_norm": 4.00987434387207, "learning_rate": 0.000382447973713034, "loss": 0.9048, "step": 3956 }, { "epoch": 0.2681098660975176, "grad_norm": 3.5627636909484863, "learning_rate": 0.0003824424972617744, "loss": 0.8541, "step": 3957 }, { "epoch": 0.2681776219393416, "grad_norm": 4.042552471160889, "learning_rate": 0.0003824370208105148, "loss": 1.0149, "step": 3958 }, { "epoch": 0.2682453777811656, "grad_norm": 2.8894193172454834, "learning_rate": 0.00038243154435925523, "loss": 0.8854, "step": 3959 }, { "epoch": 0.26831313362298953, "grad_norm": 4.409813404083252, "learning_rate": 0.00038242606790799563, "loss": 1.0059, "step": 3960 }, { "epoch": 0.26838088946481353, "grad_norm": 2.5898194313049316, "learning_rate": 0.00038242059145673603, "loss": 0.7304, "step": 3961 }, { "epoch": 0.26844864530663753, "grad_norm": 3.7198126316070557, "learning_rate": 0.0003824151150054765, "loss": 0.9464, "step": 3962 }, { "epoch": 0.26851640114846154, "grad_norm": 3.1773693561553955, "learning_rate": 0.0003824096385542169, "loss": 0.807, "step": 3963 }, { "epoch": 0.2685841569902855, "grad_norm": 10.517654418945312, "learning_rate": 0.0003824041621029573, "loss": 1.0788, "step": 3964 }, { "epoch": 0.2686519128321095, "grad_norm": 3.459690570831299, "learning_rate": 0.00038239868565169774, "loss": 0.9473, "step": 3965 }, { "epoch": 0.2687196686739335, "grad_norm": 2.6266582012176514, "learning_rate": 0.00038239320920043814, "loss": 0.7211, "step": 3966 }, { "epoch": 0.2687874245157575, "grad_norm": 3.4249653816223145, "learning_rate": 0.00038238773274917854, "loss": 0.8925, "step": 3967 }, { "epoch": 0.26885518035758144, "grad_norm": 2.913900852203369, "learning_rate": 0.00038238225629791894, "loss": 0.8705, "step": 3968 }, { "epoch": 0.26892293619940544, "grad_norm": 2.961716890335083, "learning_rate": 0.00038237677984665934, "loss": 1.0162, "step": 3969 }, { "epoch": 0.26899069204122944, "grad_norm": 3.080411195755005, "learning_rate": 0.00038237130339539984, "loss": 0.9244, "step": 3970 }, { "epoch": 0.26905844788305344, "grad_norm": 5.795470714569092, "learning_rate": 0.00038236582694414024, "loss": 1.1026, "step": 3971 }, { "epoch": 0.2691262037248774, "grad_norm": 2.48157000541687, "learning_rate": 0.00038236035049288064, "loss": 0.8178, "step": 3972 }, { "epoch": 0.2691939595667014, "grad_norm": 2.293914794921875, "learning_rate": 0.00038235487404162104, "loss": 0.6835, "step": 3973 }, { "epoch": 0.2692617154085254, "grad_norm": 3.0530192852020264, "learning_rate": 0.00038234939759036144, "loss": 0.9522, "step": 3974 }, { "epoch": 0.26932947125034934, "grad_norm": 2.517580270767212, "learning_rate": 0.0003823439211391019, "loss": 0.7028, "step": 3975 }, { "epoch": 0.26939722709217334, "grad_norm": 3.213243007659912, "learning_rate": 0.0003823384446878423, "loss": 0.8664, "step": 3976 }, { "epoch": 0.26946498293399734, "grad_norm": 4.226165771484375, "learning_rate": 0.00038233296823658275, "loss": 0.8611, "step": 3977 }, { "epoch": 0.26953273877582135, "grad_norm": 3.135887384414673, "learning_rate": 0.00038232749178532314, "loss": 0.8444, "step": 3978 }, { "epoch": 0.2696004946176453, "grad_norm": 4.725459098815918, "learning_rate": 0.00038232201533406354, "loss": 1.0426, "step": 3979 }, { "epoch": 0.2696682504594693, "grad_norm": 3.1413400173187256, "learning_rate": 0.00038231653888280394, "loss": 0.8836, "step": 3980 }, { "epoch": 0.2697360063012933, "grad_norm": 2.509124279022217, "learning_rate": 0.0003823110624315444, "loss": 0.8561, "step": 3981 }, { "epoch": 0.2698037621431173, "grad_norm": 2.9058916568756104, "learning_rate": 0.0003823055859802848, "loss": 0.6985, "step": 3982 }, { "epoch": 0.26987151798494124, "grad_norm": 4.849409580230713, "learning_rate": 0.0003823001095290252, "loss": 1.0485, "step": 3983 }, { "epoch": 0.26993927382676525, "grad_norm": 2.6813621520996094, "learning_rate": 0.0003822946330777656, "loss": 0.8275, "step": 3984 }, { "epoch": 0.27000702966858925, "grad_norm": 2.7491683959960938, "learning_rate": 0.00038228915662650605, "loss": 0.9288, "step": 3985 }, { "epoch": 0.27007478551041325, "grad_norm": 4.176777362823486, "learning_rate": 0.00038228368017524645, "loss": 0.8126, "step": 3986 }, { "epoch": 0.2701425413522372, "grad_norm": 3.134134292602539, "learning_rate": 0.0003822782037239869, "loss": 0.7982, "step": 3987 }, { "epoch": 0.2702102971940612, "grad_norm": 2.9810664653778076, "learning_rate": 0.0003822727272727273, "loss": 0.9644, "step": 3988 }, { "epoch": 0.2702780530358852, "grad_norm": 3.097368001937866, "learning_rate": 0.0003822672508214677, "loss": 0.8411, "step": 3989 }, { "epoch": 0.27034580887770915, "grad_norm": 4.500667572021484, "learning_rate": 0.0003822617743702081, "loss": 0.9027, "step": 3990 }, { "epoch": 0.27041356471953315, "grad_norm": 3.681870698928833, "learning_rate": 0.00038225629791894855, "loss": 0.8929, "step": 3991 }, { "epoch": 0.27048132056135715, "grad_norm": 4.2831926345825195, "learning_rate": 0.00038225082146768895, "loss": 0.8382, "step": 3992 }, { "epoch": 0.27054907640318115, "grad_norm": 3.131566047668457, "learning_rate": 0.0003822453450164294, "loss": 0.9752, "step": 3993 }, { "epoch": 0.2706168322450051, "grad_norm": 2.613801956176758, "learning_rate": 0.0003822398685651698, "loss": 0.9044, "step": 3994 }, { "epoch": 0.2706845880868291, "grad_norm": 3.192380666732788, "learning_rate": 0.0003822343921139102, "loss": 0.9458, "step": 3995 }, { "epoch": 0.2707523439286531, "grad_norm": 2.953549861907959, "learning_rate": 0.0003822289156626506, "loss": 0.8104, "step": 3996 }, { "epoch": 0.2708200997704771, "grad_norm": 3.525301694869995, "learning_rate": 0.00038222343921139106, "loss": 1.0033, "step": 3997 }, { "epoch": 0.27088785561230105, "grad_norm": 3.5732033252716064, "learning_rate": 0.00038221796276013145, "loss": 0.8766, "step": 3998 }, { "epoch": 0.27095561145412506, "grad_norm": 3.130110263824463, "learning_rate": 0.00038221248630887185, "loss": 0.7737, "step": 3999 }, { "epoch": 0.27102336729594906, "grad_norm": 4.055689334869385, "learning_rate": 0.00038220700985761225, "loss": 0.8993, "step": 4000 }, { "epoch": 0.27109112313777306, "grad_norm": 4.256817817687988, "learning_rate": 0.0003822015334063527, "loss": 1.0844, "step": 4001 }, { "epoch": 0.271158878979597, "grad_norm": 2.7871289253234863, "learning_rate": 0.0003821960569550931, "loss": 0.6967, "step": 4002 }, { "epoch": 0.271226634821421, "grad_norm": 3.547945499420166, "learning_rate": 0.00038219058050383356, "loss": 0.8508, "step": 4003 }, { "epoch": 0.271294390663245, "grad_norm": 2.5149266719818115, "learning_rate": 0.00038218510405257396, "loss": 0.6564, "step": 4004 }, { "epoch": 0.27136214650506896, "grad_norm": 3.994830369949341, "learning_rate": 0.00038217962760131436, "loss": 1.1406, "step": 4005 }, { "epoch": 0.27142990234689296, "grad_norm": 3.3032026290893555, "learning_rate": 0.00038217415115005476, "loss": 0.9317, "step": 4006 }, { "epoch": 0.27149765818871696, "grad_norm": 4.938766002655029, "learning_rate": 0.00038216867469879516, "loss": 0.9945, "step": 4007 }, { "epoch": 0.27156541403054096, "grad_norm": 8.041346549987793, "learning_rate": 0.00038216319824753566, "loss": 1.0259, "step": 4008 }, { "epoch": 0.2716331698723649, "grad_norm": 3.6692895889282227, "learning_rate": 0.00038215772179627606, "loss": 1.0459, "step": 4009 }, { "epoch": 0.2717009257141889, "grad_norm": 2.7289421558380127, "learning_rate": 0.00038215224534501646, "loss": 0.6758, "step": 4010 }, { "epoch": 0.2717686815560129, "grad_norm": 3.876985788345337, "learning_rate": 0.00038214676889375686, "loss": 1.0939, "step": 4011 }, { "epoch": 0.2718364373978369, "grad_norm": 3.87770938873291, "learning_rate": 0.00038214129244249726, "loss": 1.1006, "step": 4012 }, { "epoch": 0.27190419323966086, "grad_norm": 6.086846828460693, "learning_rate": 0.0003821358159912377, "loss": 0.9772, "step": 4013 }, { "epoch": 0.27197194908148487, "grad_norm": 3.417285442352295, "learning_rate": 0.0003821303395399781, "loss": 1.1757, "step": 4014 }, { "epoch": 0.27203970492330887, "grad_norm": 3.329106092453003, "learning_rate": 0.0003821248630887185, "loss": 0.9355, "step": 4015 }, { "epoch": 0.27210746076513287, "grad_norm": 3.498250961303711, "learning_rate": 0.00038211938663745897, "loss": 1.0169, "step": 4016 }, { "epoch": 0.2721752166069568, "grad_norm": 3.1945204734802246, "learning_rate": 0.00038211391018619936, "loss": 0.8246, "step": 4017 }, { "epoch": 0.2722429724487808, "grad_norm": 3.91314435005188, "learning_rate": 0.00038210843373493976, "loss": 0.9715, "step": 4018 }, { "epoch": 0.2723107282906048, "grad_norm": 3.2258596420288086, "learning_rate": 0.0003821029572836802, "loss": 0.9901, "step": 4019 }, { "epoch": 0.27237848413242877, "grad_norm": 3.6434123516082764, "learning_rate": 0.0003820974808324206, "loss": 0.8751, "step": 4020 }, { "epoch": 0.27244623997425277, "grad_norm": 2.6564643383026123, "learning_rate": 0.000382092004381161, "loss": 0.8794, "step": 4021 }, { "epoch": 0.27251399581607677, "grad_norm": 3.104003667831421, "learning_rate": 0.0003820865279299014, "loss": 0.9286, "step": 4022 }, { "epoch": 0.2725817516579008, "grad_norm": 3.2834601402282715, "learning_rate": 0.0003820810514786418, "loss": 0.865, "step": 4023 }, { "epoch": 0.2726495074997247, "grad_norm": 2.9736814498901367, "learning_rate": 0.00038207557502738227, "loss": 0.7461, "step": 4024 }, { "epoch": 0.2727172633415487, "grad_norm": 4.012521266937256, "learning_rate": 0.0003820700985761227, "loss": 0.7992, "step": 4025 }, { "epoch": 0.2727850191833727, "grad_norm": 2.974020481109619, "learning_rate": 0.0003820646221248631, "loss": 0.9054, "step": 4026 }, { "epoch": 0.2728527750251967, "grad_norm": 3.645437717437744, "learning_rate": 0.0003820591456736035, "loss": 1.0196, "step": 4027 }, { "epoch": 0.27292053086702067, "grad_norm": 3.3574273586273193, "learning_rate": 0.0003820536692223439, "loss": 0.944, "step": 4028 }, { "epoch": 0.2729882867088447, "grad_norm": 2.7715325355529785, "learning_rate": 0.00038204819277108437, "loss": 0.7948, "step": 4029 }, { "epoch": 0.2730560425506687, "grad_norm": 3.599559783935547, "learning_rate": 0.00038204271631982477, "loss": 1.0705, "step": 4030 }, { "epoch": 0.2731237983924927, "grad_norm": 3.2188572883605957, "learning_rate": 0.00038203723986856517, "loss": 0.9494, "step": 4031 }, { "epoch": 0.2731915542343166, "grad_norm": 3.3054254055023193, "learning_rate": 0.0003820317634173056, "loss": 0.8883, "step": 4032 }, { "epoch": 0.2732593100761406, "grad_norm": 2.617253541946411, "learning_rate": 0.000382026286966046, "loss": 0.7202, "step": 4033 }, { "epoch": 0.27332706591796463, "grad_norm": 3.0894718170166016, "learning_rate": 0.0003820208105147864, "loss": 0.8384, "step": 4034 }, { "epoch": 0.2733948217597886, "grad_norm": 3.6716904640197754, "learning_rate": 0.0003820153340635269, "loss": 0.8283, "step": 4035 }, { "epoch": 0.2734625776016126, "grad_norm": 2.7300541400909424, "learning_rate": 0.0003820098576122673, "loss": 0.7392, "step": 4036 }, { "epoch": 0.2735303334434366, "grad_norm": 4.321869850158691, "learning_rate": 0.0003820043811610077, "loss": 0.7801, "step": 4037 }, { "epoch": 0.2735980892852606, "grad_norm": 4.418529510498047, "learning_rate": 0.0003819989047097481, "loss": 0.9019, "step": 4038 }, { "epoch": 0.27366584512708453, "grad_norm": 3.566715955734253, "learning_rate": 0.0003819934282584885, "loss": 0.8516, "step": 4039 }, { "epoch": 0.27373360096890853, "grad_norm": 6.217451572418213, "learning_rate": 0.0003819879518072289, "loss": 1.1943, "step": 4040 }, { "epoch": 0.27380135681073253, "grad_norm": 6.6527581214904785, "learning_rate": 0.0003819824753559694, "loss": 1.291, "step": 4041 }, { "epoch": 0.27386911265255653, "grad_norm": 3.496772289276123, "learning_rate": 0.0003819769989047098, "loss": 0.8292, "step": 4042 }, { "epoch": 0.2739368684943805, "grad_norm": 3.15877366065979, "learning_rate": 0.0003819715224534502, "loss": 1.0229, "step": 4043 }, { "epoch": 0.2740046243362045, "grad_norm": 3.320312023162842, "learning_rate": 0.0003819660460021906, "loss": 0.9907, "step": 4044 }, { "epoch": 0.2740723801780285, "grad_norm": 2.528569221496582, "learning_rate": 0.000381960569550931, "loss": 0.6492, "step": 4045 }, { "epoch": 0.2741401360198525, "grad_norm": 3.6831698417663574, "learning_rate": 0.00038195509309967143, "loss": 1.1349, "step": 4046 }, { "epoch": 0.27420789186167643, "grad_norm": 3.1160051822662354, "learning_rate": 0.0003819496166484119, "loss": 0.7589, "step": 4047 }, { "epoch": 0.27427564770350044, "grad_norm": 2.644473075866699, "learning_rate": 0.0003819441401971523, "loss": 0.6929, "step": 4048 }, { "epoch": 0.27434340354532444, "grad_norm": 5.394436359405518, "learning_rate": 0.0003819386637458927, "loss": 0.9215, "step": 4049 }, { "epoch": 0.2744111593871484, "grad_norm": 4.916804790496826, "learning_rate": 0.0003819331872946331, "loss": 0.8374, "step": 4050 }, { "epoch": 0.2744789152289724, "grad_norm": 2.6971280574798584, "learning_rate": 0.00038192771084337353, "loss": 0.8468, "step": 4051 }, { "epoch": 0.2745466710707964, "grad_norm": 3.762861490249634, "learning_rate": 0.00038192223439211393, "loss": 0.94, "step": 4052 }, { "epoch": 0.2746144269126204, "grad_norm": 4.627941131591797, "learning_rate": 0.00038191675794085433, "loss": 1.0758, "step": 4053 }, { "epoch": 0.27468218275444434, "grad_norm": 3.5531585216522217, "learning_rate": 0.00038191128148959473, "loss": 1.0566, "step": 4054 }, { "epoch": 0.27474993859626834, "grad_norm": 3.4275104999542236, "learning_rate": 0.0003819058050383352, "loss": 1.0533, "step": 4055 }, { "epoch": 0.27481769443809234, "grad_norm": 4.9304914474487305, "learning_rate": 0.0003819003285870756, "loss": 0.9746, "step": 4056 }, { "epoch": 0.27488545027991634, "grad_norm": 3.0089645385742188, "learning_rate": 0.00038189485213581604, "loss": 0.7693, "step": 4057 }, { "epoch": 0.2749532061217403, "grad_norm": 3.568584442138672, "learning_rate": 0.00038188937568455644, "loss": 0.7714, "step": 4058 }, { "epoch": 0.2750209619635643, "grad_norm": 2.8504855632781982, "learning_rate": 0.00038188389923329684, "loss": 0.7183, "step": 4059 }, { "epoch": 0.2750887178053883, "grad_norm": 2.644907236099243, "learning_rate": 0.00038187842278203724, "loss": 0.7709, "step": 4060 }, { "epoch": 0.2751564736472123, "grad_norm": 3.1195411682128906, "learning_rate": 0.00038187294633077763, "loss": 0.8878, "step": 4061 }, { "epoch": 0.27522422948903624, "grad_norm": 2.629922389984131, "learning_rate": 0.0003818674698795181, "loss": 0.9706, "step": 4062 }, { "epoch": 0.27529198533086024, "grad_norm": 3.657754898071289, "learning_rate": 0.00038186199342825854, "loss": 1.0225, "step": 4063 }, { "epoch": 0.27535974117268425, "grad_norm": 3.5767621994018555, "learning_rate": 0.00038185651697699894, "loss": 0.9285, "step": 4064 }, { "epoch": 0.2754274970145082, "grad_norm": 2.9870235919952393, "learning_rate": 0.00038185104052573934, "loss": 0.7844, "step": 4065 }, { "epoch": 0.2754952528563322, "grad_norm": 3.327420234680176, "learning_rate": 0.00038184556407447974, "loss": 1.005, "step": 4066 }, { "epoch": 0.2755630086981562, "grad_norm": 4.1224589347839355, "learning_rate": 0.0003818400876232202, "loss": 1.0236, "step": 4067 }, { "epoch": 0.2756307645399802, "grad_norm": 3.5683131217956543, "learning_rate": 0.0003818346111719606, "loss": 1.0451, "step": 4068 }, { "epoch": 0.27569852038180415, "grad_norm": 2.908520460128784, "learning_rate": 0.000381829134720701, "loss": 0.645, "step": 4069 }, { "epoch": 0.27576627622362815, "grad_norm": 3.238879442214966, "learning_rate": 0.00038182365826944144, "loss": 1.1051, "step": 4070 }, { "epoch": 0.27583403206545215, "grad_norm": 3.1980319023132324, "learning_rate": 0.00038181818181818184, "loss": 1.0559, "step": 4071 }, { "epoch": 0.27590178790727615, "grad_norm": 3.3101282119750977, "learning_rate": 0.00038181270536692224, "loss": 0.9588, "step": 4072 }, { "epoch": 0.2759695437491001, "grad_norm": 3.1713321208953857, "learning_rate": 0.0003818072289156627, "loss": 0.9168, "step": 4073 }, { "epoch": 0.2760372995909241, "grad_norm": 3.1641547679901123, "learning_rate": 0.0003818017524644031, "loss": 0.9364, "step": 4074 }, { "epoch": 0.2761050554327481, "grad_norm": 3.419308662414551, "learning_rate": 0.0003817962760131435, "loss": 0.9467, "step": 4075 }, { "epoch": 0.2761728112745721, "grad_norm": 2.7005910873413086, "learning_rate": 0.0003817907995618839, "loss": 0.7366, "step": 4076 }, { "epoch": 0.27624056711639605, "grad_norm": 2.860635280609131, "learning_rate": 0.0003817853231106243, "loss": 0.8415, "step": 4077 }, { "epoch": 0.27630832295822005, "grad_norm": 3.04925274848938, "learning_rate": 0.00038177984665936475, "loss": 0.8771, "step": 4078 }, { "epoch": 0.27637607880004406, "grad_norm": 2.920382261276245, "learning_rate": 0.0003817743702081052, "loss": 0.8464, "step": 4079 }, { "epoch": 0.276443834641868, "grad_norm": 3.051085948944092, "learning_rate": 0.0003817688937568456, "loss": 0.9665, "step": 4080 }, { "epoch": 0.276511590483692, "grad_norm": 3.2919366359710693, "learning_rate": 0.000381763417305586, "loss": 1.0326, "step": 4081 }, { "epoch": 0.276579346325516, "grad_norm": 3.3904595375061035, "learning_rate": 0.0003817579408543264, "loss": 0.806, "step": 4082 }, { "epoch": 0.27664710216734, "grad_norm": 3.749739408493042, "learning_rate": 0.0003817524644030668, "loss": 1.1682, "step": 4083 }, { "epoch": 0.27671485800916396, "grad_norm": 3.2533981800079346, "learning_rate": 0.00038174698795180725, "loss": 0.892, "step": 4084 }, { "epoch": 0.27678261385098796, "grad_norm": 3.993776798248291, "learning_rate": 0.00038174151150054765, "loss": 0.9924, "step": 4085 }, { "epoch": 0.27685036969281196, "grad_norm": 4.380088806152344, "learning_rate": 0.0003817360350492881, "loss": 1.0645, "step": 4086 }, { "epoch": 0.27691812553463596, "grad_norm": 2.9247562885284424, "learning_rate": 0.0003817305585980285, "loss": 0.8138, "step": 4087 }, { "epoch": 0.2769858813764599, "grad_norm": 3.096566915512085, "learning_rate": 0.0003817250821467689, "loss": 0.7839, "step": 4088 }, { "epoch": 0.2770536372182839, "grad_norm": 3.596449375152588, "learning_rate": 0.00038171960569550935, "loss": 0.9181, "step": 4089 }, { "epoch": 0.2771213930601079, "grad_norm": 4.291566371917725, "learning_rate": 0.00038171412924424975, "loss": 0.8012, "step": 4090 }, { "epoch": 0.2771891489019319, "grad_norm": 3.9304351806640625, "learning_rate": 0.00038170865279299015, "loss": 1.0448, "step": 4091 }, { "epoch": 0.27725690474375586, "grad_norm": 3.2358169555664062, "learning_rate": 0.00038170317634173055, "loss": 0.7477, "step": 4092 }, { "epoch": 0.27732466058557986, "grad_norm": 3.198188543319702, "learning_rate": 0.00038169769989047095, "loss": 0.8359, "step": 4093 }, { "epoch": 0.27739241642740387, "grad_norm": 4.492866516113281, "learning_rate": 0.0003816922234392114, "loss": 0.7611, "step": 4094 }, { "epoch": 0.2774601722692278, "grad_norm": 3.130707025527954, "learning_rate": 0.00038168674698795186, "loss": 0.8505, "step": 4095 }, { "epoch": 0.2775279281110518, "grad_norm": 4.035998344421387, "learning_rate": 0.00038168127053669226, "loss": 0.8725, "step": 4096 }, { "epoch": 0.2775956839528758, "grad_norm": 4.322450637817383, "learning_rate": 0.00038167579408543266, "loss": 1.252, "step": 4097 }, { "epoch": 0.2776634397946998, "grad_norm": 3.0692806243896484, "learning_rate": 0.00038167031763417306, "loss": 0.927, "step": 4098 }, { "epoch": 0.27773119563652376, "grad_norm": 4.681273937225342, "learning_rate": 0.00038166484118291346, "loss": 1.0099, "step": 4099 }, { "epoch": 0.27779895147834777, "grad_norm": 3.1008107662200928, "learning_rate": 0.0003816593647316539, "loss": 0.5991, "step": 4100 }, { "epoch": 0.27786670732017177, "grad_norm": 4.139404773712158, "learning_rate": 0.00038165388828039436, "loss": 1.0566, "step": 4101 }, { "epoch": 0.27793446316199577, "grad_norm": 3.6974925994873047, "learning_rate": 0.00038164841182913476, "loss": 0.9068, "step": 4102 }, { "epoch": 0.2780022190038197, "grad_norm": 3.259319543838501, "learning_rate": 0.00038164293537787516, "loss": 0.8952, "step": 4103 }, { "epoch": 0.2780699748456437, "grad_norm": 3.458962917327881, "learning_rate": 0.00038163745892661556, "loss": 1.0688, "step": 4104 }, { "epoch": 0.2781377306874677, "grad_norm": 3.0376532077789307, "learning_rate": 0.000381631982475356, "loss": 0.8232, "step": 4105 }, { "epoch": 0.2782054865292917, "grad_norm": 3.26987886428833, "learning_rate": 0.0003816265060240964, "loss": 0.8688, "step": 4106 }, { "epoch": 0.27827324237111567, "grad_norm": 3.3548731803894043, "learning_rate": 0.0003816210295728368, "loss": 0.9914, "step": 4107 }, { "epoch": 0.27834099821293967, "grad_norm": 3.197094440460205, "learning_rate": 0.0003816155531215772, "loss": 0.7232, "step": 4108 }, { "epoch": 0.2784087540547637, "grad_norm": 3.6699981689453125, "learning_rate": 0.00038161007667031766, "loss": 1.0213, "step": 4109 }, { "epoch": 0.2784765098965876, "grad_norm": 3.51353120803833, "learning_rate": 0.00038160460021905806, "loss": 1.0974, "step": 4110 }, { "epoch": 0.2785442657384116, "grad_norm": 3.0213685035705566, "learning_rate": 0.0003815991237677985, "loss": 0.8696, "step": 4111 }, { "epoch": 0.2786120215802356, "grad_norm": 4.087697505950928, "learning_rate": 0.0003815936473165389, "loss": 0.9665, "step": 4112 }, { "epoch": 0.2786797774220596, "grad_norm": 3.008803367614746, "learning_rate": 0.0003815881708652793, "loss": 0.7481, "step": 4113 }, { "epoch": 0.2787475332638836, "grad_norm": 2.539384126663208, "learning_rate": 0.0003815826944140197, "loss": 0.7212, "step": 4114 }, { "epoch": 0.2788152891057076, "grad_norm": 3.956040143966675, "learning_rate": 0.0003815772179627601, "loss": 0.9458, "step": 4115 }, { "epoch": 0.2788830449475316, "grad_norm": 3.81215763092041, "learning_rate": 0.00038157174151150057, "loss": 0.9363, "step": 4116 }, { "epoch": 0.2789508007893556, "grad_norm": 3.0907187461853027, "learning_rate": 0.000381566265060241, "loss": 0.8902, "step": 4117 }, { "epoch": 0.2790185566311795, "grad_norm": 3.4065299034118652, "learning_rate": 0.0003815607886089814, "loss": 0.8206, "step": 4118 }, { "epoch": 0.27908631247300353, "grad_norm": 5.058658599853516, "learning_rate": 0.0003815553121577218, "loss": 1.0584, "step": 4119 }, { "epoch": 0.27915406831482753, "grad_norm": 4.014410018920898, "learning_rate": 0.0003815498357064622, "loss": 1.3403, "step": 4120 }, { "epoch": 0.27922182415665153, "grad_norm": 12.225587844848633, "learning_rate": 0.0003815443592552026, "loss": 1.1558, "step": 4121 }, { "epoch": 0.2792895799984755, "grad_norm": 21.19126319885254, "learning_rate": 0.00038153888280394307, "loss": 1.0802, "step": 4122 }, { "epoch": 0.2793573358402995, "grad_norm": 4.571727752685547, "learning_rate": 0.00038153340635268347, "loss": 1.2375, "step": 4123 }, { "epoch": 0.2794250916821235, "grad_norm": 3.017731189727783, "learning_rate": 0.00038152792990142387, "loss": 0.7802, "step": 4124 }, { "epoch": 0.27949284752394743, "grad_norm": 4.550093173980713, "learning_rate": 0.0003815224534501643, "loss": 0.6953, "step": 4125 }, { "epoch": 0.27956060336577143, "grad_norm": 3.8193111419677734, "learning_rate": 0.0003815169769989047, "loss": 1.125, "step": 4126 }, { "epoch": 0.27962835920759543, "grad_norm": 3.364194393157959, "learning_rate": 0.0003815115005476452, "loss": 0.8717, "step": 4127 }, { "epoch": 0.27969611504941944, "grad_norm": 3.27457594871521, "learning_rate": 0.0003815060240963856, "loss": 0.8807, "step": 4128 }, { "epoch": 0.2797638708912434, "grad_norm": 4.674449443817139, "learning_rate": 0.000381500547645126, "loss": 1.1339, "step": 4129 }, { "epoch": 0.2798316267330674, "grad_norm": 4.1100172996521, "learning_rate": 0.0003814950711938664, "loss": 0.9049, "step": 4130 }, { "epoch": 0.2798993825748914, "grad_norm": 3.7363455295562744, "learning_rate": 0.00038148959474260677, "loss": 1.023, "step": 4131 }, { "epoch": 0.2799671384167154, "grad_norm": 3.5350401401519775, "learning_rate": 0.0003814841182913472, "loss": 0.9671, "step": 4132 }, { "epoch": 0.28003489425853934, "grad_norm": 3.7527897357940674, "learning_rate": 0.0003814786418400877, "loss": 0.8129, "step": 4133 }, { "epoch": 0.28010265010036334, "grad_norm": 4.515902042388916, "learning_rate": 0.0003814731653888281, "loss": 1.0466, "step": 4134 }, { "epoch": 0.28017040594218734, "grad_norm": 3.2605173587799072, "learning_rate": 0.0003814676889375685, "loss": 0.8275, "step": 4135 }, { "epoch": 0.28023816178401134, "grad_norm": 3.1442453861236572, "learning_rate": 0.0003814622124863089, "loss": 0.734, "step": 4136 }, { "epoch": 0.2803059176258353, "grad_norm": 3.7602202892303467, "learning_rate": 0.0003814567360350493, "loss": 0.7907, "step": 4137 }, { "epoch": 0.2803736734676593, "grad_norm": 4.3316779136657715, "learning_rate": 0.00038145125958378973, "loss": 0.9421, "step": 4138 }, { "epoch": 0.2804414293094833, "grad_norm": 3.0202901363372803, "learning_rate": 0.00038144578313253013, "loss": 0.8221, "step": 4139 }, { "epoch": 0.28050918515130724, "grad_norm": 3.457354784011841, "learning_rate": 0.0003814403066812706, "loss": 0.8981, "step": 4140 }, { "epoch": 0.28057694099313124, "grad_norm": 3.3700647354125977, "learning_rate": 0.000381434830230011, "loss": 0.8883, "step": 4141 }, { "epoch": 0.28064469683495524, "grad_norm": 3.5013773441314697, "learning_rate": 0.0003814293537787514, "loss": 1.022, "step": 4142 }, { "epoch": 0.28071245267677924, "grad_norm": 3.041679859161377, "learning_rate": 0.00038142387732749183, "loss": 0.7417, "step": 4143 }, { "epoch": 0.2807802085186032, "grad_norm": 3.146716833114624, "learning_rate": 0.00038141840087623223, "loss": 0.8995, "step": 4144 }, { "epoch": 0.2808479643604272, "grad_norm": 2.494267463684082, "learning_rate": 0.00038141292442497263, "loss": 0.7032, "step": 4145 }, { "epoch": 0.2809157202022512, "grad_norm": 3.6580934524536133, "learning_rate": 0.00038140744797371303, "loss": 0.8673, "step": 4146 }, { "epoch": 0.2809834760440752, "grad_norm": 3.1825475692749023, "learning_rate": 0.00038140197152245343, "loss": 0.7547, "step": 4147 }, { "epoch": 0.28105123188589914, "grad_norm": 2.935293197631836, "learning_rate": 0.0003813964950711939, "loss": 0.889, "step": 4148 }, { "epoch": 0.28111898772772315, "grad_norm": 3.4600260257720947, "learning_rate": 0.00038139101861993434, "loss": 1.0635, "step": 4149 }, { "epoch": 0.28118674356954715, "grad_norm": 3.673569917678833, "learning_rate": 0.00038138554216867474, "loss": 0.9555, "step": 4150 }, { "epoch": 0.28125449941137115, "grad_norm": 3.5231266021728516, "learning_rate": 0.00038138006571741514, "loss": 1.0169, "step": 4151 }, { "epoch": 0.2813222552531951, "grad_norm": 3.572376251220703, "learning_rate": 0.00038137458926615554, "loss": 1.0355, "step": 4152 }, { "epoch": 0.2813900110950191, "grad_norm": 3.297841787338257, "learning_rate": 0.00038136911281489593, "loss": 0.9757, "step": 4153 }, { "epoch": 0.2814577669368431, "grad_norm": 2.6277480125427246, "learning_rate": 0.0003813636363636364, "loss": 0.8449, "step": 4154 }, { "epoch": 0.28152552277866705, "grad_norm": 4.188483238220215, "learning_rate": 0.0003813581599123768, "loss": 0.8903, "step": 4155 }, { "epoch": 0.28159327862049105, "grad_norm": 3.4031150341033936, "learning_rate": 0.00038135268346111724, "loss": 0.8995, "step": 4156 }, { "epoch": 0.28166103446231505, "grad_norm": 3.849982261657715, "learning_rate": 0.00038134720700985764, "loss": 0.7848, "step": 4157 }, { "epoch": 0.28172879030413905, "grad_norm": 3.3642117977142334, "learning_rate": 0.00038134173055859804, "loss": 0.8917, "step": 4158 }, { "epoch": 0.281796546145963, "grad_norm": 3.0539591312408447, "learning_rate": 0.00038133625410733844, "loss": 0.7747, "step": 4159 }, { "epoch": 0.281864301987787, "grad_norm": 3.3993096351623535, "learning_rate": 0.0003813307776560789, "loss": 0.8998, "step": 4160 }, { "epoch": 0.281932057829611, "grad_norm": 4.065631866455078, "learning_rate": 0.0003813253012048193, "loss": 0.9247, "step": 4161 }, { "epoch": 0.281999813671435, "grad_norm": 4.975888252258301, "learning_rate": 0.0003813198247535597, "loss": 1.1327, "step": 4162 }, { "epoch": 0.28206756951325895, "grad_norm": 3.145251989364624, "learning_rate": 0.0003813143483023001, "loss": 0.6017, "step": 4163 }, { "epoch": 0.28213532535508296, "grad_norm": 4.094858646392822, "learning_rate": 0.00038130887185104054, "loss": 0.891, "step": 4164 }, { "epoch": 0.28220308119690696, "grad_norm": 5.092297077178955, "learning_rate": 0.000381303395399781, "loss": 1.1061, "step": 4165 }, { "epoch": 0.28227083703873096, "grad_norm": 3.5173451900482178, "learning_rate": 0.0003812979189485214, "loss": 0.7177, "step": 4166 }, { "epoch": 0.2823385928805549, "grad_norm": 4.876333236694336, "learning_rate": 0.0003812924424972618, "loss": 0.8722, "step": 4167 }, { "epoch": 0.2824063487223789, "grad_norm": 3.390796661376953, "learning_rate": 0.0003812869660460022, "loss": 0.9685, "step": 4168 }, { "epoch": 0.2824741045642029, "grad_norm": 2.9970672130584717, "learning_rate": 0.0003812814895947426, "loss": 0.8554, "step": 4169 }, { "epoch": 0.28254186040602686, "grad_norm": 3.4347097873687744, "learning_rate": 0.00038127601314348305, "loss": 0.8554, "step": 4170 }, { "epoch": 0.28260961624785086, "grad_norm": 3.865476131439209, "learning_rate": 0.0003812705366922235, "loss": 1.1949, "step": 4171 }, { "epoch": 0.28267737208967486, "grad_norm": 2.6927409172058105, "learning_rate": 0.0003812650602409639, "loss": 0.8671, "step": 4172 }, { "epoch": 0.28274512793149886, "grad_norm": 3.7539374828338623, "learning_rate": 0.0003812595837897043, "loss": 1.155, "step": 4173 }, { "epoch": 0.2828128837733228, "grad_norm": 3.3788347244262695, "learning_rate": 0.0003812541073384447, "loss": 0.951, "step": 4174 }, { "epoch": 0.2828806396151468, "grad_norm": 3.269413948059082, "learning_rate": 0.0003812486308871851, "loss": 0.9034, "step": 4175 }, { "epoch": 0.2829483954569708, "grad_norm": 3.470550537109375, "learning_rate": 0.00038124315443592555, "loss": 1.0275, "step": 4176 }, { "epoch": 0.2830161512987948, "grad_norm": 3.7513339519500732, "learning_rate": 0.00038123767798466595, "loss": 0.8048, "step": 4177 }, { "epoch": 0.28308390714061876, "grad_norm": 3.3224732875823975, "learning_rate": 0.00038123220153340635, "loss": 0.7414, "step": 4178 }, { "epoch": 0.28315166298244276, "grad_norm": 3.5182204246520996, "learning_rate": 0.0003812267250821468, "loss": 0.8228, "step": 4179 }, { "epoch": 0.28321941882426677, "grad_norm": 3.8641185760498047, "learning_rate": 0.0003812212486308872, "loss": 0.873, "step": 4180 }, { "epoch": 0.28328717466609077, "grad_norm": 2.3649520874023438, "learning_rate": 0.00038121577217962765, "loss": 0.7512, "step": 4181 }, { "epoch": 0.2833549305079147, "grad_norm": 2.963038682937622, "learning_rate": 0.00038121029572836805, "loss": 0.8126, "step": 4182 }, { "epoch": 0.2834226863497387, "grad_norm": 3.024508237838745, "learning_rate": 0.00038120481927710845, "loss": 0.8242, "step": 4183 }, { "epoch": 0.2834904421915627, "grad_norm": 3.2557058334350586, "learning_rate": 0.00038119934282584885, "loss": 0.917, "step": 4184 }, { "epoch": 0.28355819803338667, "grad_norm": 3.3483870029449463, "learning_rate": 0.00038119386637458925, "loss": 1.1143, "step": 4185 }, { "epoch": 0.28362595387521067, "grad_norm": 3.6157288551330566, "learning_rate": 0.0003811883899233297, "loss": 1.2197, "step": 4186 }, { "epoch": 0.28369370971703467, "grad_norm": 3.5104551315307617, "learning_rate": 0.00038118291347207016, "loss": 0.8817, "step": 4187 }, { "epoch": 0.28376146555885867, "grad_norm": 3.9131431579589844, "learning_rate": 0.00038117743702081056, "loss": 0.9427, "step": 4188 }, { "epoch": 0.2838292214006826, "grad_norm": 3.8723204135894775, "learning_rate": 0.00038117196056955096, "loss": 1.0884, "step": 4189 }, { "epoch": 0.2838969772425066, "grad_norm": 3.2777366638183594, "learning_rate": 0.00038116648411829136, "loss": 0.7902, "step": 4190 }, { "epoch": 0.2839647330843306, "grad_norm": 3.404507637023926, "learning_rate": 0.00038116100766703176, "loss": 0.9458, "step": 4191 }, { "epoch": 0.2840324889261546, "grad_norm": 2.953594446182251, "learning_rate": 0.0003811555312157722, "loss": 0.8518, "step": 4192 }, { "epoch": 0.28410024476797857, "grad_norm": 3.4678421020507812, "learning_rate": 0.0003811500547645126, "loss": 0.7276, "step": 4193 }, { "epoch": 0.2841680006098026, "grad_norm": 3.8672354221343994, "learning_rate": 0.000381144578313253, "loss": 0.9902, "step": 4194 }, { "epoch": 0.2842357564516266, "grad_norm": 5.013310432434082, "learning_rate": 0.00038113910186199346, "loss": 1.1647, "step": 4195 }, { "epoch": 0.2843035122934506, "grad_norm": 4.147918224334717, "learning_rate": 0.00038113362541073386, "loss": 0.8274, "step": 4196 }, { "epoch": 0.2843712681352745, "grad_norm": 3.7439465522766113, "learning_rate": 0.00038112814895947426, "loss": 1.0529, "step": 4197 }, { "epoch": 0.2844390239770985, "grad_norm": 2.9598395824432373, "learning_rate": 0.0003811226725082147, "loss": 0.8556, "step": 4198 }, { "epoch": 0.28450677981892253, "grad_norm": 4.3479413986206055, "learning_rate": 0.0003811171960569551, "loss": 1.1328, "step": 4199 }, { "epoch": 0.2845745356607465, "grad_norm": 3.5710036754608154, "learning_rate": 0.0003811117196056955, "loss": 0.9303, "step": 4200 }, { "epoch": 0.2846422915025705, "grad_norm": 3.2518625259399414, "learning_rate": 0.0003811062431544359, "loss": 0.8616, "step": 4201 }, { "epoch": 0.2847100473443945, "grad_norm": 3.222858428955078, "learning_rate": 0.00038110076670317636, "loss": 0.9411, "step": 4202 }, { "epoch": 0.2847778031862185, "grad_norm": 2.8735997676849365, "learning_rate": 0.0003810952902519168, "loss": 0.6203, "step": 4203 }, { "epoch": 0.2848455590280424, "grad_norm": 2.9268639087677, "learning_rate": 0.0003810898138006572, "loss": 0.7417, "step": 4204 }, { "epoch": 0.28491331486986643, "grad_norm": 3.5703887939453125, "learning_rate": 0.0003810843373493976, "loss": 0.8003, "step": 4205 }, { "epoch": 0.28498107071169043, "grad_norm": 3.083660840988159, "learning_rate": 0.000381078860898138, "loss": 0.9438, "step": 4206 }, { "epoch": 0.28504882655351443, "grad_norm": 3.3901126384735107, "learning_rate": 0.0003810733844468784, "loss": 1.0873, "step": 4207 }, { "epoch": 0.2851165823953384, "grad_norm": 4.2037177085876465, "learning_rate": 0.00038106790799561887, "loss": 0.8824, "step": 4208 }, { "epoch": 0.2851843382371624, "grad_norm": 2.982907772064209, "learning_rate": 0.00038106243154435927, "loss": 0.8038, "step": 4209 }, { "epoch": 0.2852520940789864, "grad_norm": 6.450176239013672, "learning_rate": 0.0003810569550930997, "loss": 1.1874, "step": 4210 }, { "epoch": 0.2853198499208104, "grad_norm": 3.504517078399658, "learning_rate": 0.0003810514786418401, "loss": 0.9117, "step": 4211 }, { "epoch": 0.28538760576263433, "grad_norm": 4.855213642120361, "learning_rate": 0.0003810460021905805, "loss": 0.9068, "step": 4212 }, { "epoch": 0.28545536160445834, "grad_norm": 3.3940505981445312, "learning_rate": 0.0003810405257393209, "loss": 0.9177, "step": 4213 }, { "epoch": 0.28552311744628234, "grad_norm": 3.773569345474243, "learning_rate": 0.00038103504928806137, "loss": 0.9228, "step": 4214 }, { "epoch": 0.2855908732881063, "grad_norm": 2.97664475440979, "learning_rate": 0.00038102957283680177, "loss": 0.6664, "step": 4215 }, { "epoch": 0.2856586291299303, "grad_norm": 4.4931769371032715, "learning_rate": 0.00038102409638554217, "loss": 0.7809, "step": 4216 }, { "epoch": 0.2857263849717543, "grad_norm": 3.8344554901123047, "learning_rate": 0.00038101861993428257, "loss": 0.9144, "step": 4217 }, { "epoch": 0.2857941408135783, "grad_norm": 3.5489773750305176, "learning_rate": 0.000381013143483023, "loss": 1.0449, "step": 4218 }, { "epoch": 0.28586189665540224, "grad_norm": 4.168195724487305, "learning_rate": 0.0003810076670317635, "loss": 0.9697, "step": 4219 }, { "epoch": 0.28592965249722624, "grad_norm": 4.4977827072143555, "learning_rate": 0.0003810021905805039, "loss": 1.1294, "step": 4220 }, { "epoch": 0.28599740833905024, "grad_norm": 2.7469124794006348, "learning_rate": 0.0003809967141292443, "loss": 0.769, "step": 4221 }, { "epoch": 0.28606516418087424, "grad_norm": 3.4717230796813965, "learning_rate": 0.0003809912376779847, "loss": 0.8619, "step": 4222 }, { "epoch": 0.2861329200226982, "grad_norm": 3.145761489868164, "learning_rate": 0.00038098576122672507, "loss": 0.9375, "step": 4223 }, { "epoch": 0.2862006758645222, "grad_norm": 4.648340702056885, "learning_rate": 0.0003809802847754655, "loss": 0.9226, "step": 4224 }, { "epoch": 0.2862684317063462, "grad_norm": 3.929830312728882, "learning_rate": 0.0003809748083242059, "loss": 1.0492, "step": 4225 }, { "epoch": 0.2863361875481702, "grad_norm": 3.2500739097595215, "learning_rate": 0.0003809693318729464, "loss": 0.9349, "step": 4226 }, { "epoch": 0.28640394338999414, "grad_norm": 3.442087173461914, "learning_rate": 0.0003809638554216868, "loss": 0.8283, "step": 4227 }, { "epoch": 0.28647169923181814, "grad_norm": 3.290076971054077, "learning_rate": 0.0003809583789704272, "loss": 1.026, "step": 4228 }, { "epoch": 0.28653945507364215, "grad_norm": 2.675825357437134, "learning_rate": 0.0003809529025191676, "loss": 0.757, "step": 4229 }, { "epoch": 0.2866072109154661, "grad_norm": 3.5977437496185303, "learning_rate": 0.00038094742606790803, "loss": 0.8018, "step": 4230 }, { "epoch": 0.2866749667572901, "grad_norm": 2.7480294704437256, "learning_rate": 0.00038094194961664843, "loss": 0.7995, "step": 4231 }, { "epoch": 0.2867427225991141, "grad_norm": 2.962670087814331, "learning_rate": 0.00038093647316538883, "loss": 0.7788, "step": 4232 }, { "epoch": 0.2868104784409381, "grad_norm": 3.4720308780670166, "learning_rate": 0.0003809309967141293, "loss": 0.8809, "step": 4233 }, { "epoch": 0.28687823428276205, "grad_norm": 4.182247161865234, "learning_rate": 0.0003809255202628697, "loss": 1.1444, "step": 4234 }, { "epoch": 0.28694599012458605, "grad_norm": 3.8075146675109863, "learning_rate": 0.0003809200438116101, "loss": 1.1192, "step": 4235 }, { "epoch": 0.28701374596641005, "grad_norm": 3.8724253177642822, "learning_rate": 0.00038091456736035053, "loss": 0.8418, "step": 4236 }, { "epoch": 0.28708150180823405, "grad_norm": 3.1556854248046875, "learning_rate": 0.00038090909090909093, "loss": 1.0038, "step": 4237 }, { "epoch": 0.287149257650058, "grad_norm": 3.920008897781372, "learning_rate": 0.00038090361445783133, "loss": 1.0923, "step": 4238 }, { "epoch": 0.287217013491882, "grad_norm": 3.725320816040039, "learning_rate": 0.00038089813800657173, "loss": 1.0353, "step": 4239 }, { "epoch": 0.287284769333706, "grad_norm": 4.828123092651367, "learning_rate": 0.0003808926615553122, "loss": 0.8563, "step": 4240 }, { "epoch": 0.28735252517553, "grad_norm": 3.469817638397217, "learning_rate": 0.00038088718510405264, "loss": 1.0601, "step": 4241 }, { "epoch": 0.28742028101735395, "grad_norm": 4.846755504608154, "learning_rate": 0.00038088170865279304, "loss": 0.9166, "step": 4242 }, { "epoch": 0.28748803685917795, "grad_norm": 3.974705934524536, "learning_rate": 0.00038087623220153344, "loss": 1.0069, "step": 4243 }, { "epoch": 0.28755579270100196, "grad_norm": 2.9341962337493896, "learning_rate": 0.00038087075575027384, "loss": 0.9444, "step": 4244 }, { "epoch": 0.2876235485428259, "grad_norm": 5.168619632720947, "learning_rate": 0.00038086527929901423, "loss": 0.8049, "step": 4245 }, { "epoch": 0.2876913043846499, "grad_norm": 3.5353424549102783, "learning_rate": 0.0003808598028477547, "loss": 1.017, "step": 4246 }, { "epoch": 0.2877590602264739, "grad_norm": 2.5154051780700684, "learning_rate": 0.0003808543263964951, "loss": 0.7073, "step": 4247 }, { "epoch": 0.2878268160682979, "grad_norm": 2.9759600162506104, "learning_rate": 0.0003808488499452355, "loss": 0.8158, "step": 4248 }, { "epoch": 0.28789457191012185, "grad_norm": 3.621062994003296, "learning_rate": 0.00038084337349397594, "loss": 0.8933, "step": 4249 }, { "epoch": 0.28796232775194586, "grad_norm": 3.113044500350952, "learning_rate": 0.00038083789704271634, "loss": 0.7788, "step": 4250 }, { "epoch": 0.28803008359376986, "grad_norm": 5.05565071105957, "learning_rate": 0.00038083242059145674, "loss": 0.935, "step": 4251 }, { "epoch": 0.28809783943559386, "grad_norm": 3.412968158721924, "learning_rate": 0.0003808269441401972, "loss": 1.0211, "step": 4252 }, { "epoch": 0.2881655952774178, "grad_norm": 2.8301756381988525, "learning_rate": 0.0003808214676889376, "loss": 0.8862, "step": 4253 }, { "epoch": 0.2882333511192418, "grad_norm": 3.542484998703003, "learning_rate": 0.000380815991237678, "loss": 1.0307, "step": 4254 }, { "epoch": 0.2883011069610658, "grad_norm": 3.7638983726501465, "learning_rate": 0.0003808105147864184, "loss": 1.057, "step": 4255 }, { "epoch": 0.2883688628028898, "grad_norm": 3.5850274562835693, "learning_rate": 0.0003808050383351588, "loss": 0.8637, "step": 4256 }, { "epoch": 0.28843661864471376, "grad_norm": 2.5674705505371094, "learning_rate": 0.0003807995618838993, "loss": 0.7942, "step": 4257 }, { "epoch": 0.28850437448653776, "grad_norm": 3.134467363357544, "learning_rate": 0.0003807940854326397, "loss": 0.8128, "step": 4258 }, { "epoch": 0.28857213032836176, "grad_norm": 2.7699155807495117, "learning_rate": 0.0003807886089813801, "loss": 0.7509, "step": 4259 }, { "epoch": 0.2886398861701857, "grad_norm": 3.2916338443756104, "learning_rate": 0.0003807831325301205, "loss": 0.8952, "step": 4260 }, { "epoch": 0.2887076420120097, "grad_norm": 2.7321741580963135, "learning_rate": 0.0003807776560788609, "loss": 0.7374, "step": 4261 }, { "epoch": 0.2887753978538337, "grad_norm": 3.2019588947296143, "learning_rate": 0.00038077217962760135, "loss": 0.8525, "step": 4262 }, { "epoch": 0.2888431536956577, "grad_norm": 3.3928987979888916, "learning_rate": 0.00038076670317634175, "loss": 0.9933, "step": 4263 }, { "epoch": 0.28891090953748166, "grad_norm": 3.99440598487854, "learning_rate": 0.00038076122672508214, "loss": 0.9219, "step": 4264 }, { "epoch": 0.28897866537930567, "grad_norm": 3.083383560180664, "learning_rate": 0.0003807557502738226, "loss": 0.7322, "step": 4265 }, { "epoch": 0.28904642122112967, "grad_norm": 3.8985190391540527, "learning_rate": 0.000380750273822563, "loss": 0.8686, "step": 4266 }, { "epoch": 0.28911417706295367, "grad_norm": 3.1525466442108154, "learning_rate": 0.0003807447973713034, "loss": 0.8842, "step": 4267 }, { "epoch": 0.2891819329047776, "grad_norm": 3.974451780319214, "learning_rate": 0.00038073932092004385, "loss": 0.9009, "step": 4268 }, { "epoch": 0.2892496887466016, "grad_norm": 3.2337396144866943, "learning_rate": 0.00038073384446878425, "loss": 0.821, "step": 4269 }, { "epoch": 0.2893174445884256, "grad_norm": 3.643644094467163, "learning_rate": 0.00038072836801752465, "loss": 0.9867, "step": 4270 }, { "epoch": 0.2893852004302496, "grad_norm": 3.3926374912261963, "learning_rate": 0.00038072289156626505, "loss": 1.0414, "step": 4271 }, { "epoch": 0.28945295627207357, "grad_norm": 4.29252815246582, "learning_rate": 0.0003807174151150055, "loss": 0.9851, "step": 4272 }, { "epoch": 0.28952071211389757, "grad_norm": 3.2176711559295654, "learning_rate": 0.0003807119386637459, "loss": 0.9531, "step": 4273 }, { "epoch": 0.2895884679557216, "grad_norm": 3.039893388748169, "learning_rate": 0.00038070646221248635, "loss": 1.0329, "step": 4274 }, { "epoch": 0.2896562237975455, "grad_norm": 3.1474177837371826, "learning_rate": 0.00038070098576122675, "loss": 0.9168, "step": 4275 }, { "epoch": 0.2897239796393695, "grad_norm": 3.3008651733398438, "learning_rate": 0.00038069550930996715, "loss": 0.8886, "step": 4276 }, { "epoch": 0.2897917354811935, "grad_norm": 3.9384498596191406, "learning_rate": 0.00038069003285870755, "loss": 0.8792, "step": 4277 }, { "epoch": 0.2898594913230175, "grad_norm": 2.6484317779541016, "learning_rate": 0.000380684556407448, "loss": 0.7738, "step": 4278 }, { "epoch": 0.2899272471648415, "grad_norm": 3.569589614868164, "learning_rate": 0.0003806790799561884, "loss": 1.0703, "step": 4279 }, { "epoch": 0.2899950030066655, "grad_norm": 2.9471025466918945, "learning_rate": 0.00038067360350492886, "loss": 0.8217, "step": 4280 }, { "epoch": 0.2900627588484895, "grad_norm": 9.082399368286133, "learning_rate": 0.00038066812705366926, "loss": 0.8689, "step": 4281 }, { "epoch": 0.2901305146903135, "grad_norm": 3.229081630706787, "learning_rate": 0.00038066265060240966, "loss": 0.8576, "step": 4282 }, { "epoch": 0.2901982705321374, "grad_norm": 3.3105978965759277, "learning_rate": 0.00038065717415115006, "loss": 0.9743, "step": 4283 }, { "epoch": 0.2902660263739614, "grad_norm": 4.002555847167969, "learning_rate": 0.0003806516976998905, "loss": 0.8978, "step": 4284 }, { "epoch": 0.29033378221578543, "grad_norm": 3.8652195930480957, "learning_rate": 0.0003806462212486309, "loss": 0.8953, "step": 4285 }, { "epoch": 0.29040153805760943, "grad_norm": 3.3684518337249756, "learning_rate": 0.0003806407447973713, "loss": 0.8815, "step": 4286 }, { "epoch": 0.2904692938994334, "grad_norm": 3.6892974376678467, "learning_rate": 0.0003806352683461117, "loss": 0.9018, "step": 4287 }, { "epoch": 0.2905370497412574, "grad_norm": 4.574551582336426, "learning_rate": 0.00038062979189485216, "loss": 0.7216, "step": 4288 }, { "epoch": 0.2906048055830814, "grad_norm": 3.9689059257507324, "learning_rate": 0.00038062431544359256, "loss": 1.0522, "step": 4289 }, { "epoch": 0.29067256142490533, "grad_norm": 3.5185515880584717, "learning_rate": 0.000380618838992333, "loss": 0.8815, "step": 4290 }, { "epoch": 0.29074031726672933, "grad_norm": 2.3992514610290527, "learning_rate": 0.0003806133625410734, "loss": 0.6638, "step": 4291 }, { "epoch": 0.29080807310855333, "grad_norm": 2.950704336166382, "learning_rate": 0.0003806078860898138, "loss": 1.0387, "step": 4292 }, { "epoch": 0.29087582895037734, "grad_norm": 3.1277101039886475, "learning_rate": 0.0003806024096385542, "loss": 1.0101, "step": 4293 }, { "epoch": 0.2909435847922013, "grad_norm": 3.926677942276001, "learning_rate": 0.0003805969331872946, "loss": 0.9978, "step": 4294 }, { "epoch": 0.2910113406340253, "grad_norm": 3.0831704139709473, "learning_rate": 0.00038059145673603506, "loss": 1.0105, "step": 4295 }, { "epoch": 0.2910790964758493, "grad_norm": 3.1600961685180664, "learning_rate": 0.0003805859802847755, "loss": 0.7138, "step": 4296 }, { "epoch": 0.2911468523176733, "grad_norm": 2.5180630683898926, "learning_rate": 0.0003805805038335159, "loss": 0.6925, "step": 4297 }, { "epoch": 0.29121460815949723, "grad_norm": 3.085513114929199, "learning_rate": 0.0003805750273822563, "loss": 0.9102, "step": 4298 }, { "epoch": 0.29128236400132124, "grad_norm": 3.205209732055664, "learning_rate": 0.0003805695509309967, "loss": 0.8193, "step": 4299 }, { "epoch": 0.29135011984314524, "grad_norm": 2.9040768146514893, "learning_rate": 0.00038056407447973717, "loss": 0.7397, "step": 4300 }, { "epoch": 0.29141787568496924, "grad_norm": 3.247999668121338, "learning_rate": 0.00038055859802847757, "loss": 1.0184, "step": 4301 }, { "epoch": 0.2914856315267932, "grad_norm": 3.8056952953338623, "learning_rate": 0.00038055312157721797, "loss": 1.2406, "step": 4302 }, { "epoch": 0.2915533873686172, "grad_norm": 4.596077919006348, "learning_rate": 0.0003805476451259584, "loss": 1.0072, "step": 4303 }, { "epoch": 0.2916211432104412, "grad_norm": 4.431901454925537, "learning_rate": 0.0003805421686746988, "loss": 0.9834, "step": 4304 }, { "epoch": 0.29168889905226514, "grad_norm": 4.988735198974609, "learning_rate": 0.0003805366922234392, "loss": 1.064, "step": 4305 }, { "epoch": 0.29175665489408914, "grad_norm": 2.8431379795074463, "learning_rate": 0.00038053121577217967, "loss": 0.8996, "step": 4306 }, { "epoch": 0.29182441073591314, "grad_norm": 2.74552059173584, "learning_rate": 0.00038052573932092007, "loss": 0.7339, "step": 4307 }, { "epoch": 0.29189216657773714, "grad_norm": 2.653697967529297, "learning_rate": 0.00038052026286966047, "loss": 0.6637, "step": 4308 }, { "epoch": 0.2919599224195611, "grad_norm": 4.0540385246276855, "learning_rate": 0.00038051478641840087, "loss": 0.8749, "step": 4309 }, { "epoch": 0.2920276782613851, "grad_norm": 2.830134153366089, "learning_rate": 0.00038050930996714127, "loss": 0.8039, "step": 4310 }, { "epoch": 0.2920954341032091, "grad_norm": 3.8398423194885254, "learning_rate": 0.0003805038335158817, "loss": 0.8932, "step": 4311 }, { "epoch": 0.2921631899450331, "grad_norm": 2.518918514251709, "learning_rate": 0.0003804983570646222, "loss": 0.7453, "step": 4312 }, { "epoch": 0.29223094578685704, "grad_norm": 4.985368728637695, "learning_rate": 0.0003804928806133626, "loss": 1.069, "step": 4313 }, { "epoch": 0.29229870162868105, "grad_norm": 2.992016315460205, "learning_rate": 0.000380487404162103, "loss": 0.8367, "step": 4314 }, { "epoch": 0.29236645747050505, "grad_norm": 3.2323968410491943, "learning_rate": 0.00038048192771084337, "loss": 0.7991, "step": 4315 }, { "epoch": 0.29243421331232905, "grad_norm": 3.117027997970581, "learning_rate": 0.0003804764512595838, "loss": 0.8026, "step": 4316 }, { "epoch": 0.292501969154153, "grad_norm": 4.022603511810303, "learning_rate": 0.0003804709748083242, "loss": 1.0356, "step": 4317 }, { "epoch": 0.292569724995977, "grad_norm": 3.9422216415405273, "learning_rate": 0.0003804654983570646, "loss": 1.1424, "step": 4318 }, { "epoch": 0.292637480837801, "grad_norm": 3.5007150173187256, "learning_rate": 0.0003804600219058051, "loss": 0.981, "step": 4319 }, { "epoch": 0.29270523667962495, "grad_norm": 3.4595749378204346, "learning_rate": 0.0003804545454545455, "loss": 0.8592, "step": 4320 }, { "epoch": 0.29277299252144895, "grad_norm": 3.7405474185943604, "learning_rate": 0.0003804490690032859, "loss": 0.8383, "step": 4321 }, { "epoch": 0.29284074836327295, "grad_norm": 2.9829776287078857, "learning_rate": 0.00038044359255202633, "loss": 0.938, "step": 4322 }, { "epoch": 0.29290850420509695, "grad_norm": 3.201439142227173, "learning_rate": 0.00038043811610076673, "loss": 0.8759, "step": 4323 }, { "epoch": 0.2929762600469209, "grad_norm": 2.9549875259399414, "learning_rate": 0.00038043263964950713, "loss": 0.798, "step": 4324 }, { "epoch": 0.2930440158887449, "grad_norm": 2.477733850479126, "learning_rate": 0.0003804271631982475, "loss": 0.7722, "step": 4325 }, { "epoch": 0.2931117717305689, "grad_norm": 3.18991756439209, "learning_rate": 0.0003804216867469879, "loss": 0.9344, "step": 4326 }, { "epoch": 0.2931795275723929, "grad_norm": 4.135895729064941, "learning_rate": 0.0003804162102957284, "loss": 0.9928, "step": 4327 }, { "epoch": 0.29324728341421685, "grad_norm": 4.411729335784912, "learning_rate": 0.00038041073384446883, "loss": 1.0569, "step": 4328 }, { "epoch": 0.29331503925604085, "grad_norm": 3.6488797664642334, "learning_rate": 0.00038040525739320923, "loss": 0.7272, "step": 4329 }, { "epoch": 0.29338279509786486, "grad_norm": 3.0957894325256348, "learning_rate": 0.00038039978094194963, "loss": 0.6943, "step": 4330 }, { "epoch": 0.29345055093968886, "grad_norm": 5.358152866363525, "learning_rate": 0.00038039430449069003, "loss": 1.0911, "step": 4331 }, { "epoch": 0.2935183067815128, "grad_norm": 4.135717868804932, "learning_rate": 0.00038038882803943043, "loss": 1.0927, "step": 4332 }, { "epoch": 0.2935860626233368, "grad_norm": 3.908083200454712, "learning_rate": 0.0003803833515881709, "loss": 1.014, "step": 4333 }, { "epoch": 0.2936538184651608, "grad_norm": 2.5688416957855225, "learning_rate": 0.00038037787513691134, "loss": 0.7724, "step": 4334 }, { "epoch": 0.29372157430698476, "grad_norm": 2.855346202850342, "learning_rate": 0.00038037239868565174, "loss": 0.8456, "step": 4335 }, { "epoch": 0.29378933014880876, "grad_norm": 3.7636311054229736, "learning_rate": 0.00038036692223439214, "loss": 0.9784, "step": 4336 }, { "epoch": 0.29385708599063276, "grad_norm": 2.7198569774627686, "learning_rate": 0.00038036144578313253, "loss": 0.8011, "step": 4337 }, { "epoch": 0.29392484183245676, "grad_norm": 3.7273168563842773, "learning_rate": 0.000380355969331873, "loss": 0.7948, "step": 4338 }, { "epoch": 0.2939925976742807, "grad_norm": 4.051516056060791, "learning_rate": 0.0003803504928806134, "loss": 1.0058, "step": 4339 }, { "epoch": 0.2940603535161047, "grad_norm": 3.832998752593994, "learning_rate": 0.0003803450164293538, "loss": 1.13, "step": 4340 }, { "epoch": 0.2941281093579287, "grad_norm": 4.376303672790527, "learning_rate": 0.0003803395399780942, "loss": 1.0352, "step": 4341 }, { "epoch": 0.2941958651997527, "grad_norm": 2.7881293296813965, "learning_rate": 0.00038033406352683464, "loss": 0.7934, "step": 4342 }, { "epoch": 0.29426362104157666, "grad_norm": 2.9450955390930176, "learning_rate": 0.00038032858707557504, "loss": 0.8149, "step": 4343 }, { "epoch": 0.29433137688340066, "grad_norm": 2.8366193771362305, "learning_rate": 0.0003803231106243155, "loss": 0.7986, "step": 4344 }, { "epoch": 0.29439913272522467, "grad_norm": 3.1993319988250732, "learning_rate": 0.0003803176341730559, "loss": 0.78, "step": 4345 }, { "epoch": 0.2944668885670486, "grad_norm": 3.7250428199768066, "learning_rate": 0.0003803121577217963, "loss": 0.9829, "step": 4346 }, { "epoch": 0.2945346444088726, "grad_norm": 3.4920644760131836, "learning_rate": 0.0003803066812705367, "loss": 0.9139, "step": 4347 }, { "epoch": 0.2946024002506966, "grad_norm": 4.434370040893555, "learning_rate": 0.0003803012048192771, "loss": 1.0057, "step": 4348 }, { "epoch": 0.2946701560925206, "grad_norm": 3.291375160217285, "learning_rate": 0.00038029572836801754, "loss": 0.9686, "step": 4349 }, { "epoch": 0.29473791193434457, "grad_norm": 4.215330600738525, "learning_rate": 0.000380290251916758, "loss": 0.8479, "step": 4350 }, { "epoch": 0.29480566777616857, "grad_norm": 4.709025859832764, "learning_rate": 0.0003802847754654984, "loss": 1.1157, "step": 4351 }, { "epoch": 0.29487342361799257, "grad_norm": 3.182203531265259, "learning_rate": 0.0003802792990142388, "loss": 0.7398, "step": 4352 }, { "epoch": 0.29494117945981657, "grad_norm": 3.556027412414551, "learning_rate": 0.0003802738225629792, "loss": 0.8623, "step": 4353 }, { "epoch": 0.2950089353016405, "grad_norm": 3.790976047515869, "learning_rate": 0.00038026834611171965, "loss": 1.1392, "step": 4354 }, { "epoch": 0.2950766911434645, "grad_norm": 3.453033208847046, "learning_rate": 0.00038026286966046005, "loss": 0.7597, "step": 4355 }, { "epoch": 0.2951444469852885, "grad_norm": 4.075577735900879, "learning_rate": 0.00038025739320920044, "loss": 0.9211, "step": 4356 }, { "epoch": 0.2952122028271125, "grad_norm": 4.265800952911377, "learning_rate": 0.00038025191675794084, "loss": 1.1545, "step": 4357 }, { "epoch": 0.29527995866893647, "grad_norm": 3.633449077606201, "learning_rate": 0.0003802464403066813, "loss": 0.9322, "step": 4358 }, { "epoch": 0.2953477145107605, "grad_norm": 3.01690673828125, "learning_rate": 0.0003802409638554217, "loss": 0.7897, "step": 4359 }, { "epoch": 0.2954154703525845, "grad_norm": 3.4099857807159424, "learning_rate": 0.00038023548740416215, "loss": 0.9133, "step": 4360 }, { "epoch": 0.2954832261944084, "grad_norm": 2.7241127490997314, "learning_rate": 0.00038023001095290255, "loss": 0.7261, "step": 4361 }, { "epoch": 0.2955509820362324, "grad_norm": 3.176345109939575, "learning_rate": 0.00038022453450164295, "loss": 0.8464, "step": 4362 }, { "epoch": 0.2956187378780564, "grad_norm": 3.7260196208953857, "learning_rate": 0.00038021905805038335, "loss": 0.8218, "step": 4363 }, { "epoch": 0.2956864937198804, "grad_norm": 3.823957920074463, "learning_rate": 0.00038021358159912375, "loss": 1.0976, "step": 4364 }, { "epoch": 0.2957542495617044, "grad_norm": 4.368053913116455, "learning_rate": 0.0003802081051478642, "loss": 1.1805, "step": 4365 }, { "epoch": 0.2958220054035284, "grad_norm": 4.897889614105225, "learning_rate": 0.00038020262869660465, "loss": 1.108, "step": 4366 }, { "epoch": 0.2958897612453524, "grad_norm": 2.964261054992676, "learning_rate": 0.00038019715224534505, "loss": 0.9646, "step": 4367 }, { "epoch": 0.2959575170871764, "grad_norm": 3.2946619987487793, "learning_rate": 0.00038019167579408545, "loss": 0.9927, "step": 4368 }, { "epoch": 0.2960252729290003, "grad_norm": 2.4871509075164795, "learning_rate": 0.00038018619934282585, "loss": 0.6857, "step": 4369 }, { "epoch": 0.29609302877082433, "grad_norm": 3.992846727371216, "learning_rate": 0.00038018072289156625, "loss": 1.0676, "step": 4370 }, { "epoch": 0.29616078461264833, "grad_norm": 2.812670946121216, "learning_rate": 0.0003801752464403067, "loss": 0.9887, "step": 4371 }, { "epoch": 0.29622854045447233, "grad_norm": 4.818793773651123, "learning_rate": 0.0003801697699890471, "loss": 0.9708, "step": 4372 }, { "epoch": 0.2962962962962963, "grad_norm": 3.4273343086242676, "learning_rate": 0.00038016429353778756, "loss": 0.9955, "step": 4373 }, { "epoch": 0.2963640521381203, "grad_norm": 3.4689981937408447, "learning_rate": 0.00038015881708652796, "loss": 0.9672, "step": 4374 }, { "epoch": 0.2964318079799443, "grad_norm": 5.151220798492432, "learning_rate": 0.00038015334063526836, "loss": 0.9725, "step": 4375 }, { "epoch": 0.29649956382176823, "grad_norm": 4.226764678955078, "learning_rate": 0.0003801478641840088, "loss": 0.967, "step": 4376 }, { "epoch": 0.29656731966359223, "grad_norm": 4.003239154815674, "learning_rate": 0.0003801423877327492, "loss": 0.863, "step": 4377 }, { "epoch": 0.29663507550541623, "grad_norm": 3.715407371520996, "learning_rate": 0.0003801369112814896, "loss": 0.954, "step": 4378 }, { "epoch": 0.29670283134724024, "grad_norm": 11.5781831741333, "learning_rate": 0.00038013143483023, "loss": 0.9465, "step": 4379 }, { "epoch": 0.2967705871890642, "grad_norm": 3.559690237045288, "learning_rate": 0.0003801259583789704, "loss": 1.0064, "step": 4380 }, { "epoch": 0.2968383430308882, "grad_norm": 3.3289124965667725, "learning_rate": 0.00038012048192771086, "loss": 1.0539, "step": 4381 }, { "epoch": 0.2969060988727122, "grad_norm": 3.177163600921631, "learning_rate": 0.0003801150054764513, "loss": 0.833, "step": 4382 }, { "epoch": 0.2969738547145362, "grad_norm": 3.3239693641662598, "learning_rate": 0.0003801095290251917, "loss": 0.8716, "step": 4383 }, { "epoch": 0.29704161055636014, "grad_norm": 3.148430347442627, "learning_rate": 0.0003801040525739321, "loss": 0.9429, "step": 4384 }, { "epoch": 0.29710936639818414, "grad_norm": 3.7606542110443115, "learning_rate": 0.0003800985761226725, "loss": 0.8017, "step": 4385 }, { "epoch": 0.29717712224000814, "grad_norm": 3.3007149696350098, "learning_rate": 0.0003800930996714129, "loss": 0.8909, "step": 4386 }, { "epoch": 0.29724487808183214, "grad_norm": 3.892944097518921, "learning_rate": 0.00038008762322015336, "loss": 0.9845, "step": 4387 }, { "epoch": 0.2973126339236561, "grad_norm": 3.0537233352661133, "learning_rate": 0.00038008214676889376, "loss": 0.855, "step": 4388 }, { "epoch": 0.2973803897654801, "grad_norm": 3.188215970993042, "learning_rate": 0.0003800766703176342, "loss": 0.8795, "step": 4389 }, { "epoch": 0.2974481456073041, "grad_norm": 3.0596938133239746, "learning_rate": 0.0003800711938663746, "loss": 0.7622, "step": 4390 }, { "epoch": 0.29751590144912804, "grad_norm": 2.6917057037353516, "learning_rate": 0.000380065717415115, "loss": 0.7589, "step": 4391 }, { "epoch": 0.29758365729095204, "grad_norm": 3.6473495960235596, "learning_rate": 0.00038006024096385547, "loss": 0.8854, "step": 4392 }, { "epoch": 0.29765141313277604, "grad_norm": 4.2144775390625, "learning_rate": 0.00038005476451259587, "loss": 0.9877, "step": 4393 }, { "epoch": 0.29771916897460005, "grad_norm": 3.3141822814941406, "learning_rate": 0.00038004928806133627, "loss": 0.7945, "step": 4394 }, { "epoch": 0.297786924816424, "grad_norm": 3.6599018573760986, "learning_rate": 0.00038004381161007666, "loss": 0.9207, "step": 4395 }, { "epoch": 0.297854680658248, "grad_norm": 3.877842903137207, "learning_rate": 0.00038003833515881706, "loss": 1.0039, "step": 4396 }, { "epoch": 0.297922436500072, "grad_norm": 3.4832518100738525, "learning_rate": 0.0003800328587075575, "loss": 0.9578, "step": 4397 }, { "epoch": 0.297990192341896, "grad_norm": 5.874382972717285, "learning_rate": 0.00038002738225629797, "loss": 1.0786, "step": 4398 }, { "epoch": 0.29805794818371995, "grad_norm": 5.890988349914551, "learning_rate": 0.00038002190580503837, "loss": 1.0781, "step": 4399 }, { "epoch": 0.29812570402554395, "grad_norm": 3.634049892425537, "learning_rate": 0.00038001642935377877, "loss": 0.8917, "step": 4400 }, { "epoch": 0.29819345986736795, "grad_norm": 3.591689348220825, "learning_rate": 0.00038001095290251917, "loss": 0.8511, "step": 4401 }, { "epoch": 0.29826121570919195, "grad_norm": 2.9758284091949463, "learning_rate": 0.00038000547645125957, "loss": 0.7485, "step": 4402 }, { "epoch": 0.2983289715510159, "grad_norm": 3.7794888019561768, "learning_rate": 0.00038, "loss": 0.8905, "step": 4403 }, { "epoch": 0.2983967273928399, "grad_norm": 3.780477285385132, "learning_rate": 0.0003799945235487405, "loss": 1.0766, "step": 4404 }, { "epoch": 0.2984644832346639, "grad_norm": 3.5104832649230957, "learning_rate": 0.0003799890470974809, "loss": 0.8216, "step": 4405 }, { "epoch": 0.29853223907648785, "grad_norm": 2.9930553436279297, "learning_rate": 0.00037998357064622127, "loss": 0.8322, "step": 4406 }, { "epoch": 0.29859999491831185, "grad_norm": 3.821732997894287, "learning_rate": 0.00037997809419496167, "loss": 0.8219, "step": 4407 }, { "epoch": 0.29866775076013585, "grad_norm": 3.52270770072937, "learning_rate": 0.00037997261774370207, "loss": 0.9795, "step": 4408 }, { "epoch": 0.29873550660195985, "grad_norm": 3.02400279045105, "learning_rate": 0.0003799671412924425, "loss": 0.785, "step": 4409 }, { "epoch": 0.2988032624437838, "grad_norm": 4.531204700469971, "learning_rate": 0.0003799616648411829, "loss": 0.9056, "step": 4410 }, { "epoch": 0.2988710182856078, "grad_norm": 4.234500885009766, "learning_rate": 0.0003799561883899233, "loss": 0.9984, "step": 4411 }, { "epoch": 0.2989387741274318, "grad_norm": 2.7530860900878906, "learning_rate": 0.0003799507119386638, "loss": 0.7368, "step": 4412 }, { "epoch": 0.2990065299692558, "grad_norm": 4.414149284362793, "learning_rate": 0.0003799452354874042, "loss": 0.9116, "step": 4413 }, { "epoch": 0.29907428581107975, "grad_norm": 4.961386680603027, "learning_rate": 0.00037993975903614463, "loss": 1.2861, "step": 4414 }, { "epoch": 0.29914204165290376, "grad_norm": 3.7531275749206543, "learning_rate": 0.00037993428258488503, "loss": 0.8906, "step": 4415 }, { "epoch": 0.29920979749472776, "grad_norm": 3.8779497146606445, "learning_rate": 0.00037992880613362543, "loss": 0.9586, "step": 4416 }, { "epoch": 0.29927755333655176, "grad_norm": 4.087754726409912, "learning_rate": 0.0003799233296823658, "loss": 0.8118, "step": 4417 }, { "epoch": 0.2993453091783757, "grad_norm": 4.045356273651123, "learning_rate": 0.0003799178532311062, "loss": 0.781, "step": 4418 }, { "epoch": 0.2994130650201997, "grad_norm": 3.581976890563965, "learning_rate": 0.0003799123767798467, "loss": 0.8975, "step": 4419 }, { "epoch": 0.2994808208620237, "grad_norm": 3.202779531478882, "learning_rate": 0.00037990690032858713, "loss": 0.9771, "step": 4420 }, { "epoch": 0.29954857670384766, "grad_norm": 3.313912868499756, "learning_rate": 0.00037990142387732753, "loss": 0.8832, "step": 4421 }, { "epoch": 0.29961633254567166, "grad_norm": 5.032614231109619, "learning_rate": 0.00037989594742606793, "loss": 1.1622, "step": 4422 }, { "epoch": 0.29968408838749566, "grad_norm": 3.6696817874908447, "learning_rate": 0.00037989047097480833, "loss": 0.9584, "step": 4423 }, { "epoch": 0.29975184422931966, "grad_norm": 2.768148183822632, "learning_rate": 0.00037988499452354873, "loss": 0.7099, "step": 4424 }, { "epoch": 0.2998196000711436, "grad_norm": 2.4647433757781982, "learning_rate": 0.0003798795180722892, "loss": 0.683, "step": 4425 }, { "epoch": 0.2998873559129676, "grad_norm": 3.742246150970459, "learning_rate": 0.0003798740416210296, "loss": 1.3981, "step": 4426 }, { "epoch": 0.2999551117547916, "grad_norm": 3.560584545135498, "learning_rate": 0.00037986856516977, "loss": 0.9353, "step": 4427 }, { "epoch": 0.3000228675966156, "grad_norm": 3.407031536102295, "learning_rate": 0.00037986308871851043, "loss": 0.8298, "step": 4428 }, { "epoch": 0.30009062343843956, "grad_norm": 2.922713041305542, "learning_rate": 0.00037985761226725083, "loss": 0.9429, "step": 4429 }, { "epoch": 0.30015837928026357, "grad_norm": 4.89544153213501, "learning_rate": 0.0003798521358159913, "loss": 1.0936, "step": 4430 }, { "epoch": 0.30022613512208757, "grad_norm": 4.240882873535156, "learning_rate": 0.0003798466593647317, "loss": 1.1234, "step": 4431 }, { "epoch": 0.30029389096391157, "grad_norm": 3.45776629447937, "learning_rate": 0.0003798411829134721, "loss": 0.9246, "step": 4432 }, { "epoch": 0.3003616468057355, "grad_norm": 4.292581558227539, "learning_rate": 0.0003798357064622125, "loss": 1.1641, "step": 4433 }, { "epoch": 0.3004294026475595, "grad_norm": 3.923431396484375, "learning_rate": 0.0003798302300109529, "loss": 0.9645, "step": 4434 }, { "epoch": 0.3004971584893835, "grad_norm": 3.4448068141937256, "learning_rate": 0.00037982475355969334, "loss": 0.8785, "step": 4435 }, { "epoch": 0.30056491433120747, "grad_norm": 3.1524956226348877, "learning_rate": 0.0003798192771084338, "loss": 0.7267, "step": 4436 }, { "epoch": 0.30063267017303147, "grad_norm": 4.642340660095215, "learning_rate": 0.0003798138006571742, "loss": 1.1826, "step": 4437 }, { "epoch": 0.30070042601485547, "grad_norm": 4.231451511383057, "learning_rate": 0.0003798083242059146, "loss": 0.804, "step": 4438 }, { "epoch": 0.3007681818566795, "grad_norm": 3.0743515491485596, "learning_rate": 0.000379802847754655, "loss": 0.9763, "step": 4439 }, { "epoch": 0.3008359376985034, "grad_norm": 3.738386631011963, "learning_rate": 0.0003797973713033954, "loss": 0.9925, "step": 4440 }, { "epoch": 0.3009036935403274, "grad_norm": 4.080605506896973, "learning_rate": 0.00037979189485213584, "loss": 1.3171, "step": 4441 }, { "epoch": 0.3009714493821514, "grad_norm": 3.880580425262451, "learning_rate": 0.00037978641840087624, "loss": 0.7246, "step": 4442 }, { "epoch": 0.3010392052239754, "grad_norm": 3.057377338409424, "learning_rate": 0.0003797809419496167, "loss": 0.8893, "step": 4443 }, { "epoch": 0.3011069610657994, "grad_norm": 3.557811737060547, "learning_rate": 0.0003797754654983571, "loss": 1.1083, "step": 4444 }, { "epoch": 0.3011747169076234, "grad_norm": 3.711604595184326, "learning_rate": 0.0003797699890470975, "loss": 0.7983, "step": 4445 }, { "epoch": 0.3012424727494474, "grad_norm": 2.7069451808929443, "learning_rate": 0.0003797645125958379, "loss": 0.8782, "step": 4446 }, { "epoch": 0.3013102285912714, "grad_norm": 3.267634153366089, "learning_rate": 0.00037975903614457835, "loss": 0.753, "step": 4447 }, { "epoch": 0.3013779844330953, "grad_norm": 3.3387465476989746, "learning_rate": 0.00037975355969331874, "loss": 1.0109, "step": 4448 }, { "epoch": 0.3014457402749193, "grad_norm": 3.2700443267822266, "learning_rate": 0.00037974808324205914, "loss": 0.9332, "step": 4449 }, { "epoch": 0.30151349611674333, "grad_norm": 4.567131042480469, "learning_rate": 0.00037974260679079954, "loss": 1.0524, "step": 4450 }, { "epoch": 0.3015812519585673, "grad_norm": 2.6771926879882812, "learning_rate": 0.00037973713033954, "loss": 0.7223, "step": 4451 }, { "epoch": 0.3016490078003913, "grad_norm": 3.1960675716400146, "learning_rate": 0.00037973165388828045, "loss": 0.8622, "step": 4452 }, { "epoch": 0.3017167636422153, "grad_norm": 3.5057806968688965, "learning_rate": 0.00037972617743702085, "loss": 0.8426, "step": 4453 }, { "epoch": 0.3017845194840393, "grad_norm": 3.1351516246795654, "learning_rate": 0.00037972070098576125, "loss": 0.7808, "step": 4454 }, { "epoch": 0.30185227532586323, "grad_norm": 4.103443145751953, "learning_rate": 0.00037971522453450165, "loss": 1.0708, "step": 4455 }, { "epoch": 0.30192003116768723, "grad_norm": 5.45054817199707, "learning_rate": 0.00037970974808324205, "loss": 1.1272, "step": 4456 }, { "epoch": 0.30198778700951123, "grad_norm": 3.9042348861694336, "learning_rate": 0.0003797042716319825, "loss": 0.7891, "step": 4457 }, { "epoch": 0.30205554285133523, "grad_norm": 2.997398853302002, "learning_rate": 0.0003796987951807229, "loss": 0.828, "step": 4458 }, { "epoch": 0.3021232986931592, "grad_norm": 3.768979787826538, "learning_rate": 0.00037969331872946335, "loss": 1.1547, "step": 4459 }, { "epoch": 0.3021910545349832, "grad_norm": 4.097525596618652, "learning_rate": 0.00037968784227820375, "loss": 1.0393, "step": 4460 }, { "epoch": 0.3022588103768072, "grad_norm": 3.500101089477539, "learning_rate": 0.00037968236582694415, "loss": 0.887, "step": 4461 }, { "epoch": 0.3023265662186312, "grad_norm": 4.243471622467041, "learning_rate": 0.00037967688937568455, "loss": 1.0061, "step": 4462 }, { "epoch": 0.30239432206045513, "grad_norm": 3.176670789718628, "learning_rate": 0.000379671412924425, "loss": 0.9531, "step": 4463 }, { "epoch": 0.30246207790227914, "grad_norm": 4.051902770996094, "learning_rate": 0.0003796659364731654, "loss": 1.0692, "step": 4464 }, { "epoch": 0.30252983374410314, "grad_norm": 2.909684419631958, "learning_rate": 0.0003796604600219058, "loss": 0.9729, "step": 4465 }, { "epoch": 0.3025975895859271, "grad_norm": 4.806419372558594, "learning_rate": 0.00037965498357064626, "loss": 0.9121, "step": 4466 }, { "epoch": 0.3026653454277511, "grad_norm": 3.4060628414154053, "learning_rate": 0.00037964950711938665, "loss": 0.8313, "step": 4467 }, { "epoch": 0.3027331012695751, "grad_norm": 3.161572217941284, "learning_rate": 0.0003796440306681271, "loss": 0.6922, "step": 4468 }, { "epoch": 0.3028008571113991, "grad_norm": 3.4175078868865967, "learning_rate": 0.0003796385542168675, "loss": 0.9403, "step": 4469 }, { "epoch": 0.30286861295322304, "grad_norm": 2.9648659229278564, "learning_rate": 0.0003796330777656079, "loss": 0.759, "step": 4470 }, { "epoch": 0.30293636879504704, "grad_norm": 4.5808024406433105, "learning_rate": 0.0003796276013143483, "loss": 1.0698, "step": 4471 }, { "epoch": 0.30300412463687104, "grad_norm": 2.79052734375, "learning_rate": 0.0003796221248630887, "loss": 0.7783, "step": 4472 }, { "epoch": 0.30307188047869504, "grad_norm": 3.9093217849731445, "learning_rate": 0.00037961664841182916, "loss": 0.911, "step": 4473 }, { "epoch": 0.303139636320519, "grad_norm": 4.116833209991455, "learning_rate": 0.0003796111719605696, "loss": 0.8192, "step": 4474 }, { "epoch": 0.303207392162343, "grad_norm": 3.0296576023101807, "learning_rate": 0.00037960569550931, "loss": 0.8881, "step": 4475 }, { "epoch": 0.303275148004167, "grad_norm": 4.223501205444336, "learning_rate": 0.0003796002190580504, "loss": 1.0161, "step": 4476 }, { "epoch": 0.303342903845991, "grad_norm": 2.8571929931640625, "learning_rate": 0.0003795947426067908, "loss": 0.726, "step": 4477 }, { "epoch": 0.30341065968781494, "grad_norm": 3.0375771522521973, "learning_rate": 0.0003795892661555312, "loss": 0.7534, "step": 4478 }, { "epoch": 0.30347841552963895, "grad_norm": 3.5569345951080322, "learning_rate": 0.00037958378970427166, "loss": 0.8345, "step": 4479 }, { "epoch": 0.30354617137146295, "grad_norm": 3.8938302993774414, "learning_rate": 0.00037957831325301206, "loss": 1.044, "step": 4480 }, { "epoch": 0.3036139272132869, "grad_norm": 4.29720401763916, "learning_rate": 0.00037957283680175246, "loss": 0.8777, "step": 4481 }, { "epoch": 0.3036816830551109, "grad_norm": 3.534524440765381, "learning_rate": 0.0003795673603504929, "loss": 0.832, "step": 4482 }, { "epoch": 0.3037494388969349, "grad_norm": 3.370337963104248, "learning_rate": 0.0003795618838992333, "loss": 0.8838, "step": 4483 }, { "epoch": 0.3038171947387589, "grad_norm": 3.978529691696167, "learning_rate": 0.0003795564074479737, "loss": 0.9639, "step": 4484 }, { "epoch": 0.30388495058058285, "grad_norm": 4.540329456329346, "learning_rate": 0.00037955093099671417, "loss": 1.0672, "step": 4485 }, { "epoch": 0.30395270642240685, "grad_norm": 4.023577690124512, "learning_rate": 0.00037954545454545457, "loss": 0.705, "step": 4486 }, { "epoch": 0.30402046226423085, "grad_norm": 3.2200231552124023, "learning_rate": 0.00037953997809419496, "loss": 0.7078, "step": 4487 }, { "epoch": 0.30408821810605485, "grad_norm": 2.5875887870788574, "learning_rate": 0.00037953450164293536, "loss": 0.7021, "step": 4488 }, { "epoch": 0.3041559739478788, "grad_norm": 4.6905741691589355, "learning_rate": 0.0003795290251916758, "loss": 1.0186, "step": 4489 }, { "epoch": 0.3042237297897028, "grad_norm": 2.8177239894866943, "learning_rate": 0.00037952354874041627, "loss": 0.7465, "step": 4490 }, { "epoch": 0.3042914856315268, "grad_norm": 4.139516353607178, "learning_rate": 0.00037951807228915667, "loss": 1.2017, "step": 4491 }, { "epoch": 0.3043592414733508, "grad_norm": 4.003684997558594, "learning_rate": 0.00037951259583789707, "loss": 0.9728, "step": 4492 }, { "epoch": 0.30442699731517475, "grad_norm": 3.696483612060547, "learning_rate": 0.00037950711938663747, "loss": 1.0453, "step": 4493 }, { "epoch": 0.30449475315699875, "grad_norm": 4.00968599319458, "learning_rate": 0.00037950164293537787, "loss": 0.808, "step": 4494 }, { "epoch": 0.30456250899882276, "grad_norm": 4.667564392089844, "learning_rate": 0.0003794961664841183, "loss": 0.9742, "step": 4495 }, { "epoch": 0.3046302648406467, "grad_norm": 3.1734423637390137, "learning_rate": 0.0003794906900328587, "loss": 0.8966, "step": 4496 }, { "epoch": 0.3046980206824707, "grad_norm": 3.902451992034912, "learning_rate": 0.0003794852135815992, "loss": 1.0063, "step": 4497 }, { "epoch": 0.3047657765242947, "grad_norm": 3.0951461791992188, "learning_rate": 0.00037947973713033957, "loss": 0.8427, "step": 4498 }, { "epoch": 0.3048335323661187, "grad_norm": 3.3436310291290283, "learning_rate": 0.00037947426067907997, "loss": 0.937, "step": 4499 }, { "epoch": 0.30490128820794266, "grad_norm": 4.164231777191162, "learning_rate": 0.00037946878422782037, "loss": 0.8722, "step": 4500 }, { "epoch": 0.30496904404976666, "grad_norm": 4.280994415283203, "learning_rate": 0.0003794633077765608, "loss": 0.8204, "step": 4501 }, { "epoch": 0.30503679989159066, "grad_norm": 3.2911837100982666, "learning_rate": 0.0003794578313253012, "loss": 0.9152, "step": 4502 }, { "epoch": 0.30510455573341466, "grad_norm": 2.5122861862182617, "learning_rate": 0.0003794523548740416, "loss": 0.7216, "step": 4503 }, { "epoch": 0.3051723115752386, "grad_norm": 3.355670213699341, "learning_rate": 0.000379446878422782, "loss": 0.9203, "step": 4504 }, { "epoch": 0.3052400674170626, "grad_norm": 4.242062091827393, "learning_rate": 0.0003794414019715225, "loss": 0.8833, "step": 4505 }, { "epoch": 0.3053078232588866, "grad_norm": 3.445676803588867, "learning_rate": 0.00037943592552026293, "loss": 0.8005, "step": 4506 }, { "epoch": 0.3053755791007106, "grad_norm": 3.3087706565856934, "learning_rate": 0.00037943044906900333, "loss": 0.8494, "step": 4507 }, { "epoch": 0.30544333494253456, "grad_norm": 4.040024280548096, "learning_rate": 0.00037942497261774373, "loss": 1.1455, "step": 4508 }, { "epoch": 0.30551109078435856, "grad_norm": 2.8252992630004883, "learning_rate": 0.0003794194961664841, "loss": 0.9267, "step": 4509 }, { "epoch": 0.30557884662618257, "grad_norm": 3.6444008350372314, "learning_rate": 0.0003794140197152245, "loss": 1.03, "step": 4510 }, { "epoch": 0.3056466024680065, "grad_norm": 3.083338975906372, "learning_rate": 0.000379408543263965, "loss": 0.8092, "step": 4511 }, { "epoch": 0.3057143583098305, "grad_norm": 4.379929065704346, "learning_rate": 0.0003794030668127054, "loss": 1.2829, "step": 4512 }, { "epoch": 0.3057821141516545, "grad_norm": 3.499091148376465, "learning_rate": 0.00037939759036144583, "loss": 0.9874, "step": 4513 }, { "epoch": 0.3058498699934785, "grad_norm": 3.471676826477051, "learning_rate": 0.00037939211391018623, "loss": 0.9353, "step": 4514 }, { "epoch": 0.30591762583530246, "grad_norm": 3.1136460304260254, "learning_rate": 0.00037938663745892663, "loss": 0.7933, "step": 4515 }, { "epoch": 0.30598538167712647, "grad_norm": 4.521962642669678, "learning_rate": 0.00037938116100766703, "loss": 0.9357, "step": 4516 }, { "epoch": 0.30605313751895047, "grad_norm": 4.902437686920166, "learning_rate": 0.0003793756845564075, "loss": 0.8488, "step": 4517 }, { "epoch": 0.30612089336077447, "grad_norm": 3.8929879665374756, "learning_rate": 0.0003793702081051479, "loss": 0.9405, "step": 4518 }, { "epoch": 0.3061886492025984, "grad_norm": 4.141440391540527, "learning_rate": 0.0003793647316538883, "loss": 1.1188, "step": 4519 }, { "epoch": 0.3062564050444224, "grad_norm": 2.6068155765533447, "learning_rate": 0.0003793592552026287, "loss": 0.83, "step": 4520 }, { "epoch": 0.3063241608862464, "grad_norm": 3.8935952186584473, "learning_rate": 0.00037935377875136913, "loss": 1.1156, "step": 4521 }, { "epoch": 0.3063919167280704, "grad_norm": 2.6715023517608643, "learning_rate": 0.00037934830230010953, "loss": 0.817, "step": 4522 }, { "epoch": 0.30645967256989437, "grad_norm": 4.171996593475342, "learning_rate": 0.00037934282584885, "loss": 0.9674, "step": 4523 }, { "epoch": 0.3065274284117184, "grad_norm": 3.3241946697235107, "learning_rate": 0.0003793373493975904, "loss": 0.9547, "step": 4524 }, { "epoch": 0.3065951842535424, "grad_norm": 3.4356980323791504, "learning_rate": 0.0003793318729463308, "loss": 0.8787, "step": 4525 }, { "epoch": 0.3066629400953663, "grad_norm": 2.711308717727661, "learning_rate": 0.0003793263964950712, "loss": 0.7506, "step": 4526 }, { "epoch": 0.3067306959371903, "grad_norm": 2.8993990421295166, "learning_rate": 0.00037932092004381164, "loss": 0.7413, "step": 4527 }, { "epoch": 0.3067984517790143, "grad_norm": 2.618000030517578, "learning_rate": 0.0003793154435925521, "loss": 0.8051, "step": 4528 }, { "epoch": 0.3068662076208383, "grad_norm": 3.1433002948760986, "learning_rate": 0.0003793099671412925, "loss": 0.7482, "step": 4529 }, { "epoch": 0.3069339634626623, "grad_norm": 3.4148519039154053, "learning_rate": 0.0003793044906900329, "loss": 0.5992, "step": 4530 }, { "epoch": 0.3070017193044863, "grad_norm": 3.388731002807617, "learning_rate": 0.0003792990142387733, "loss": 0.8927, "step": 4531 }, { "epoch": 0.3070694751463103, "grad_norm": 3.8736164569854736, "learning_rate": 0.0003792935377875137, "loss": 1.0028, "step": 4532 }, { "epoch": 0.3071372309881343, "grad_norm": 3.228879451751709, "learning_rate": 0.00037928806133625414, "loss": 0.914, "step": 4533 }, { "epoch": 0.3072049868299582, "grad_norm": 3.0897772312164307, "learning_rate": 0.00037928258488499454, "loss": 0.8088, "step": 4534 }, { "epoch": 0.30727274267178223, "grad_norm": 3.1566524505615234, "learning_rate": 0.00037927710843373494, "loss": 0.8058, "step": 4535 }, { "epoch": 0.30734049851360623, "grad_norm": 3.2445037364959717, "learning_rate": 0.0003792716319824754, "loss": 0.7743, "step": 4536 }, { "epoch": 0.30740825435543023, "grad_norm": 3.8021023273468018, "learning_rate": 0.0003792661555312158, "loss": 1.0253, "step": 4537 }, { "epoch": 0.3074760101972542, "grad_norm": 4.067466735839844, "learning_rate": 0.0003792606790799562, "loss": 0.8531, "step": 4538 }, { "epoch": 0.3075437660390782, "grad_norm": 3.9581139087677, "learning_rate": 0.00037925520262869665, "loss": 1.0856, "step": 4539 }, { "epoch": 0.3076115218809022, "grad_norm": 3.2924106121063232, "learning_rate": 0.00037924972617743704, "loss": 0.8255, "step": 4540 }, { "epoch": 0.30767927772272613, "grad_norm": 3.498828649520874, "learning_rate": 0.00037924424972617744, "loss": 0.8321, "step": 4541 }, { "epoch": 0.30774703356455013, "grad_norm": 2.811779737472534, "learning_rate": 0.00037923877327491784, "loss": 0.7523, "step": 4542 }, { "epoch": 0.30781478940637413, "grad_norm": 3.228208303451538, "learning_rate": 0.00037923329682365824, "loss": 0.7652, "step": 4543 }, { "epoch": 0.30788254524819814, "grad_norm": 2.99822998046875, "learning_rate": 0.00037922782037239875, "loss": 0.8523, "step": 4544 }, { "epoch": 0.3079503010900221, "grad_norm": 3.167470693588257, "learning_rate": 0.00037922234392113915, "loss": 0.8702, "step": 4545 }, { "epoch": 0.3080180569318461, "grad_norm": 2.238658905029297, "learning_rate": 0.00037921686746987955, "loss": 0.6953, "step": 4546 }, { "epoch": 0.3080858127736701, "grad_norm": 3.0333893299102783, "learning_rate": 0.00037921139101861995, "loss": 0.9545, "step": 4547 }, { "epoch": 0.3081535686154941, "grad_norm": 5.767490386962891, "learning_rate": 0.00037920591456736035, "loss": 1.0246, "step": 4548 }, { "epoch": 0.30822132445731804, "grad_norm": 3.0156989097595215, "learning_rate": 0.0003792004381161008, "loss": 0.8378, "step": 4549 }, { "epoch": 0.30828908029914204, "grad_norm": 3.5109500885009766, "learning_rate": 0.0003791949616648412, "loss": 0.9548, "step": 4550 }, { "epoch": 0.30835683614096604, "grad_norm": 2.7485909461975098, "learning_rate": 0.0003791894852135816, "loss": 0.8005, "step": 4551 }, { "epoch": 0.30842459198279004, "grad_norm": 2.906681537628174, "learning_rate": 0.00037918400876232205, "loss": 0.7289, "step": 4552 }, { "epoch": 0.308492347824614, "grad_norm": 2.776196241378784, "learning_rate": 0.00037917853231106245, "loss": 0.7407, "step": 4553 }, { "epoch": 0.308560103666438, "grad_norm": 2.824981451034546, "learning_rate": 0.00037917305585980285, "loss": 0.9009, "step": 4554 }, { "epoch": 0.308627859508262, "grad_norm": 2.9164481163024902, "learning_rate": 0.0003791675794085433, "loss": 0.8229, "step": 4555 }, { "epoch": 0.30869561535008594, "grad_norm": 3.773353099822998, "learning_rate": 0.0003791621029572837, "loss": 1.0639, "step": 4556 }, { "epoch": 0.30876337119190994, "grad_norm": 3.3177669048309326, "learning_rate": 0.0003791566265060241, "loss": 1.0204, "step": 4557 }, { "epoch": 0.30883112703373394, "grad_norm": 3.4843225479125977, "learning_rate": 0.0003791511500547645, "loss": 0.9328, "step": 4558 }, { "epoch": 0.30889888287555795, "grad_norm": 4.365907192230225, "learning_rate": 0.0003791456736035049, "loss": 1.2133, "step": 4559 }, { "epoch": 0.3089666387173819, "grad_norm": 2.8668618202209473, "learning_rate": 0.00037914019715224535, "loss": 0.8108, "step": 4560 }, { "epoch": 0.3090343945592059, "grad_norm": 3.049483299255371, "learning_rate": 0.0003791347207009858, "loss": 0.8461, "step": 4561 }, { "epoch": 0.3091021504010299, "grad_norm": 3.330443859100342, "learning_rate": 0.0003791292442497262, "loss": 0.9655, "step": 4562 }, { "epoch": 0.3091699062428539, "grad_norm": 3.4636497497558594, "learning_rate": 0.0003791237677984666, "loss": 0.8656, "step": 4563 }, { "epoch": 0.30923766208467784, "grad_norm": 4.817673683166504, "learning_rate": 0.000379118291347207, "loss": 0.8806, "step": 4564 }, { "epoch": 0.30930541792650185, "grad_norm": 3.603158950805664, "learning_rate": 0.00037911281489594746, "loss": 0.9683, "step": 4565 }, { "epoch": 0.30937317376832585, "grad_norm": 7.666048526763916, "learning_rate": 0.00037910733844468786, "loss": 0.9416, "step": 4566 }, { "epoch": 0.30944092961014985, "grad_norm": 4.162021160125732, "learning_rate": 0.0003791018619934283, "loss": 0.8062, "step": 4567 }, { "epoch": 0.3095086854519738, "grad_norm": 4.953823566436768, "learning_rate": 0.0003790963855421687, "loss": 1.0458, "step": 4568 }, { "epoch": 0.3095764412937978, "grad_norm": 3.1732187271118164, "learning_rate": 0.0003790909090909091, "loss": 0.7868, "step": 4569 }, { "epoch": 0.3096441971356218, "grad_norm": 4.341715335845947, "learning_rate": 0.0003790854326396495, "loss": 0.9908, "step": 4570 }, { "epoch": 0.30971195297744575, "grad_norm": 3.8978583812713623, "learning_rate": 0.00037907995618838996, "loss": 0.9023, "step": 4571 }, { "epoch": 0.30977970881926975, "grad_norm": 4.321927547454834, "learning_rate": 0.00037907447973713036, "loss": 1.5028, "step": 4572 }, { "epoch": 0.30984746466109375, "grad_norm": 3.139608860015869, "learning_rate": 0.00037906900328587076, "loss": 0.8185, "step": 4573 }, { "epoch": 0.30991522050291775, "grad_norm": 2.5016229152679443, "learning_rate": 0.00037906352683461116, "loss": 0.7993, "step": 4574 }, { "epoch": 0.3099829763447417, "grad_norm": 3.0935263633728027, "learning_rate": 0.0003790580503833516, "loss": 0.9485, "step": 4575 }, { "epoch": 0.3100507321865657, "grad_norm": 4.6556220054626465, "learning_rate": 0.000379052573932092, "loss": 0.9044, "step": 4576 }, { "epoch": 0.3101184880283897, "grad_norm": 4.109011173248291, "learning_rate": 0.00037904709748083247, "loss": 1.1298, "step": 4577 }, { "epoch": 0.3101862438702137, "grad_norm": 3.27964186668396, "learning_rate": 0.00037904162102957287, "loss": 0.7542, "step": 4578 }, { "epoch": 0.31025399971203765, "grad_norm": 2.718545436859131, "learning_rate": 0.00037903614457831326, "loss": 0.6935, "step": 4579 }, { "epoch": 0.31032175555386166, "grad_norm": 3.383640766143799, "learning_rate": 0.00037903066812705366, "loss": 0.9993, "step": 4580 }, { "epoch": 0.31038951139568566, "grad_norm": 3.862971782684326, "learning_rate": 0.00037902519167579406, "loss": 1.1462, "step": 4581 }, { "epoch": 0.31045726723750966, "grad_norm": 4.5790510177612305, "learning_rate": 0.0003790197152245345, "loss": 1.0718, "step": 4582 }, { "epoch": 0.3105250230793336, "grad_norm": 4.134291648864746, "learning_rate": 0.00037901423877327497, "loss": 1.12, "step": 4583 }, { "epoch": 0.3105927789211576, "grad_norm": 4.388884544372559, "learning_rate": 0.00037900876232201537, "loss": 1.1492, "step": 4584 }, { "epoch": 0.3106605347629816, "grad_norm": 2.931276559829712, "learning_rate": 0.00037900328587075577, "loss": 0.7821, "step": 4585 }, { "epoch": 0.31072829060480556, "grad_norm": 4.025268077850342, "learning_rate": 0.00037899780941949617, "loss": 1.0539, "step": 4586 }, { "epoch": 0.31079604644662956, "grad_norm": 3.256430149078369, "learning_rate": 0.0003789923329682366, "loss": 0.9294, "step": 4587 }, { "epoch": 0.31086380228845356, "grad_norm": 4.318219184875488, "learning_rate": 0.000378986856516977, "loss": 1.0911, "step": 4588 }, { "epoch": 0.31093155813027756, "grad_norm": 2.506279706954956, "learning_rate": 0.0003789813800657174, "loss": 0.6322, "step": 4589 }, { "epoch": 0.3109993139721015, "grad_norm": 2.8441672325134277, "learning_rate": 0.0003789759036144578, "loss": 0.8745, "step": 4590 }, { "epoch": 0.3110670698139255, "grad_norm": 3.8599133491516113, "learning_rate": 0.00037897042716319827, "loss": 0.8819, "step": 4591 }, { "epoch": 0.3111348256557495, "grad_norm": 4.410976886749268, "learning_rate": 0.00037896495071193867, "loss": 0.9654, "step": 4592 }, { "epoch": 0.3112025814975735, "grad_norm": 3.8373639583587646, "learning_rate": 0.0003789594742606791, "loss": 0.9569, "step": 4593 }, { "epoch": 0.31127033733939746, "grad_norm": 2.78944993019104, "learning_rate": 0.0003789539978094195, "loss": 0.8078, "step": 4594 }, { "epoch": 0.31133809318122146, "grad_norm": 3.001875877380371, "learning_rate": 0.0003789485213581599, "loss": 0.8723, "step": 4595 }, { "epoch": 0.31140584902304547, "grad_norm": 4.413790702819824, "learning_rate": 0.0003789430449069003, "loss": 0.9799, "step": 4596 }, { "epoch": 0.31147360486486947, "grad_norm": 2.7290873527526855, "learning_rate": 0.0003789375684556407, "loss": 0.7828, "step": 4597 }, { "epoch": 0.3115413607066934, "grad_norm": 3.1180129051208496, "learning_rate": 0.0003789320920043812, "loss": 0.8674, "step": 4598 }, { "epoch": 0.3116091165485174, "grad_norm": 3.158015251159668, "learning_rate": 0.00037892661555312163, "loss": 0.7737, "step": 4599 }, { "epoch": 0.3116768723903414, "grad_norm": 4.671678066253662, "learning_rate": 0.00037892113910186203, "loss": 1.1673, "step": 4600 }, { "epoch": 0.31174462823216537, "grad_norm": 3.0967347621917725, "learning_rate": 0.0003789156626506024, "loss": 0.6849, "step": 4601 }, { "epoch": 0.31181238407398937, "grad_norm": 3.6035404205322266, "learning_rate": 0.0003789101861993428, "loss": 0.8661, "step": 4602 }, { "epoch": 0.31188013991581337, "grad_norm": 6.099546909332275, "learning_rate": 0.0003789047097480833, "loss": 0.9663, "step": 4603 }, { "epoch": 0.3119478957576374, "grad_norm": 4.470375061035156, "learning_rate": 0.0003788992332968237, "loss": 1.1123, "step": 4604 }, { "epoch": 0.3120156515994613, "grad_norm": 4.220643997192383, "learning_rate": 0.0003788937568455641, "loss": 1.1166, "step": 4605 }, { "epoch": 0.3120834074412853, "grad_norm": 3.481326103210449, "learning_rate": 0.00037888828039430453, "loss": 0.89, "step": 4606 }, { "epoch": 0.3121511632831093, "grad_norm": 3.809579610824585, "learning_rate": 0.00037888280394304493, "loss": 0.8796, "step": 4607 }, { "epoch": 0.3122189191249333, "grad_norm": 3.099407911300659, "learning_rate": 0.00037887732749178533, "loss": 0.8136, "step": 4608 }, { "epoch": 0.31228667496675727, "grad_norm": 3.786062002182007, "learning_rate": 0.0003788718510405258, "loss": 0.9796, "step": 4609 }, { "epoch": 0.3123544308085813, "grad_norm": 3.3820106983184814, "learning_rate": 0.0003788663745892662, "loss": 0.9832, "step": 4610 }, { "epoch": 0.3124221866504053, "grad_norm": 4.085707664489746, "learning_rate": 0.0003788608981380066, "loss": 0.9759, "step": 4611 }, { "epoch": 0.3124899424922293, "grad_norm": 4.434978485107422, "learning_rate": 0.000378855421686747, "loss": 0.9126, "step": 4612 }, { "epoch": 0.3125576983340532, "grad_norm": 3.998748540878296, "learning_rate": 0.0003788499452354874, "loss": 0.7536, "step": 4613 }, { "epoch": 0.3126254541758772, "grad_norm": 3.5413758754730225, "learning_rate": 0.00037884446878422783, "loss": 1.1436, "step": 4614 }, { "epoch": 0.31269321001770123, "grad_norm": 4.227705478668213, "learning_rate": 0.0003788389923329683, "loss": 1.0661, "step": 4615 }, { "epoch": 0.3127609658595252, "grad_norm": 2.2903711795806885, "learning_rate": 0.0003788335158817087, "loss": 0.6982, "step": 4616 }, { "epoch": 0.3128287217013492, "grad_norm": 3.2300753593444824, "learning_rate": 0.0003788280394304491, "loss": 0.8518, "step": 4617 }, { "epoch": 0.3128964775431732, "grad_norm": 2.794217109680176, "learning_rate": 0.0003788225629791895, "loss": 0.7786, "step": 4618 }, { "epoch": 0.3129642333849972, "grad_norm": 3.6442604064941406, "learning_rate": 0.0003788170865279299, "loss": 0.9333, "step": 4619 }, { "epoch": 0.31303198922682113, "grad_norm": 3.184316873550415, "learning_rate": 0.00037881161007667034, "loss": 0.9023, "step": 4620 }, { "epoch": 0.31309974506864513, "grad_norm": 3.9806787967681885, "learning_rate": 0.00037880613362541074, "loss": 0.9762, "step": 4621 }, { "epoch": 0.31316750091046913, "grad_norm": 3.9702556133270264, "learning_rate": 0.0003788006571741512, "loss": 1.0699, "step": 4622 }, { "epoch": 0.31323525675229313, "grad_norm": 3.0698258876800537, "learning_rate": 0.0003787951807228916, "loss": 0.9036, "step": 4623 }, { "epoch": 0.3133030125941171, "grad_norm": 2.736729145050049, "learning_rate": 0.000378789704271632, "loss": 0.6791, "step": 4624 }, { "epoch": 0.3133707684359411, "grad_norm": 2.823978900909424, "learning_rate": 0.00037878422782037244, "loss": 0.8017, "step": 4625 }, { "epoch": 0.3134385242777651, "grad_norm": 2.8283133506774902, "learning_rate": 0.00037877875136911284, "loss": 0.8272, "step": 4626 }, { "epoch": 0.3135062801195891, "grad_norm": 3.2315690517425537, "learning_rate": 0.00037877327491785324, "loss": 0.9289, "step": 4627 }, { "epoch": 0.31357403596141303, "grad_norm": 3.1178383827209473, "learning_rate": 0.00037876779846659364, "loss": 1.0113, "step": 4628 }, { "epoch": 0.31364179180323704, "grad_norm": 3.502802610397339, "learning_rate": 0.0003787623220153341, "loss": 0.8226, "step": 4629 }, { "epoch": 0.31370954764506104, "grad_norm": 2.7579305171966553, "learning_rate": 0.0003787568455640745, "loss": 0.7943, "step": 4630 }, { "epoch": 0.313777303486885, "grad_norm": 3.079449415206909, "learning_rate": 0.00037875136911281494, "loss": 0.8419, "step": 4631 }, { "epoch": 0.313845059328709, "grad_norm": 3.1961159706115723, "learning_rate": 0.00037874589266155534, "loss": 0.7657, "step": 4632 }, { "epoch": 0.313912815170533, "grad_norm": 3.367253303527832, "learning_rate": 0.00037874041621029574, "loss": 0.805, "step": 4633 }, { "epoch": 0.313980571012357, "grad_norm": 4.040487766265869, "learning_rate": 0.00037873493975903614, "loss": 0.8753, "step": 4634 }, { "epoch": 0.31404832685418094, "grad_norm": 3.457158088684082, "learning_rate": 0.00037872946330777654, "loss": 0.8623, "step": 4635 }, { "epoch": 0.31411608269600494, "grad_norm": 3.4282166957855225, "learning_rate": 0.000378723986856517, "loss": 0.7817, "step": 4636 }, { "epoch": 0.31418383853782894, "grad_norm": 3.1567749977111816, "learning_rate": 0.00037871851040525745, "loss": 0.9189, "step": 4637 }, { "epoch": 0.31425159437965294, "grad_norm": 2.902388334274292, "learning_rate": 0.00037871303395399785, "loss": 0.7319, "step": 4638 }, { "epoch": 0.3143193502214769, "grad_norm": 3.429194688796997, "learning_rate": 0.00037870755750273825, "loss": 0.6947, "step": 4639 }, { "epoch": 0.3143871060633009, "grad_norm": 2.5025811195373535, "learning_rate": 0.00037870208105147865, "loss": 0.5711, "step": 4640 }, { "epoch": 0.3144548619051249, "grad_norm": 3.066978931427002, "learning_rate": 0.0003786966046002191, "loss": 0.675, "step": 4641 }, { "epoch": 0.3145226177469489, "grad_norm": 3.387676477432251, "learning_rate": 0.0003786911281489595, "loss": 1.0245, "step": 4642 }, { "epoch": 0.31459037358877284, "grad_norm": 3.4964938163757324, "learning_rate": 0.0003786856516976999, "loss": 0.7639, "step": 4643 }, { "epoch": 0.31465812943059684, "grad_norm": 3.586219310760498, "learning_rate": 0.0003786801752464403, "loss": 0.7405, "step": 4644 }, { "epoch": 0.31472588527242085, "grad_norm": 3.2271339893341064, "learning_rate": 0.00037867469879518075, "loss": 0.8906, "step": 4645 }, { "epoch": 0.3147936411142448, "grad_norm": 2.84842848777771, "learning_rate": 0.00037866922234392115, "loss": 0.6648, "step": 4646 }, { "epoch": 0.3148613969560688, "grad_norm": 2.8892788887023926, "learning_rate": 0.0003786637458926616, "loss": 0.7892, "step": 4647 }, { "epoch": 0.3149291527978928, "grad_norm": 2.915573835372925, "learning_rate": 0.000378658269441402, "loss": 0.7889, "step": 4648 }, { "epoch": 0.3149969086397168, "grad_norm": 4.047836780548096, "learning_rate": 0.0003786527929901424, "loss": 1.052, "step": 4649 }, { "epoch": 0.31506466448154075, "grad_norm": 3.420975685119629, "learning_rate": 0.0003786473165388828, "loss": 0.8062, "step": 4650 }, { "epoch": 0.31513242032336475, "grad_norm": 3.898582696914673, "learning_rate": 0.0003786418400876232, "loss": 1.0188, "step": 4651 }, { "epoch": 0.31520017616518875, "grad_norm": 3.4556851387023926, "learning_rate": 0.00037863636363636365, "loss": 0.8441, "step": 4652 }, { "epoch": 0.31526793200701275, "grad_norm": 3.2790021896362305, "learning_rate": 0.0003786308871851041, "loss": 0.7247, "step": 4653 }, { "epoch": 0.3153356878488367, "grad_norm": 3.8260836601257324, "learning_rate": 0.0003786254107338445, "loss": 0.9848, "step": 4654 }, { "epoch": 0.3154034436906607, "grad_norm": 4.868940830230713, "learning_rate": 0.0003786199342825849, "loss": 0.9228, "step": 4655 }, { "epoch": 0.3154711995324847, "grad_norm": 4.164638996124268, "learning_rate": 0.0003786144578313253, "loss": 1.0859, "step": 4656 }, { "epoch": 0.3155389553743087, "grad_norm": 3.9221911430358887, "learning_rate": 0.0003786089813800657, "loss": 1.0688, "step": 4657 }, { "epoch": 0.31560671121613265, "grad_norm": 4.283849716186523, "learning_rate": 0.00037860350492880616, "loss": 1.0418, "step": 4658 }, { "epoch": 0.31567446705795665, "grad_norm": 3.0096805095672607, "learning_rate": 0.00037859802847754656, "loss": 0.7325, "step": 4659 }, { "epoch": 0.31574222289978066, "grad_norm": 3.707134962081909, "learning_rate": 0.000378592552026287, "loss": 1.1385, "step": 4660 }, { "epoch": 0.3158099787416046, "grad_norm": 3.6009268760681152, "learning_rate": 0.0003785870755750274, "loss": 1.0908, "step": 4661 }, { "epoch": 0.3158777345834286, "grad_norm": 2.856957197189331, "learning_rate": 0.0003785815991237678, "loss": 0.8308, "step": 4662 }, { "epoch": 0.3159454904252526, "grad_norm": 2.894627094268799, "learning_rate": 0.00037857612267250826, "loss": 0.7617, "step": 4663 }, { "epoch": 0.3160132462670766, "grad_norm": 3.3188095092773438, "learning_rate": 0.00037857064622124866, "loss": 0.8026, "step": 4664 }, { "epoch": 0.31608100210890056, "grad_norm": 3.3723158836364746, "learning_rate": 0.00037856516976998906, "loss": 0.7513, "step": 4665 }, { "epoch": 0.31614875795072456, "grad_norm": 6.758175849914551, "learning_rate": 0.00037855969331872946, "loss": 1.0111, "step": 4666 }, { "epoch": 0.31621651379254856, "grad_norm": 2.6532793045043945, "learning_rate": 0.00037855421686746986, "loss": 0.5652, "step": 4667 }, { "epoch": 0.31628426963437256, "grad_norm": 3.087677240371704, "learning_rate": 0.0003785487404162103, "loss": 0.7973, "step": 4668 }, { "epoch": 0.3163520254761965, "grad_norm": 3.5102059841156006, "learning_rate": 0.00037854326396495077, "loss": 0.9356, "step": 4669 }, { "epoch": 0.3164197813180205, "grad_norm": 3.5487282276153564, "learning_rate": 0.00037853778751369116, "loss": 0.8853, "step": 4670 }, { "epoch": 0.3164875371598445, "grad_norm": 3.0964252948760986, "learning_rate": 0.00037853231106243156, "loss": 0.8361, "step": 4671 }, { "epoch": 0.3165552930016685, "grad_norm": 3.128133535385132, "learning_rate": 0.00037852683461117196, "loss": 0.7276, "step": 4672 }, { "epoch": 0.31662304884349246, "grad_norm": 2.7896862030029297, "learning_rate": 0.00037852135815991236, "loss": 0.8198, "step": 4673 }, { "epoch": 0.31669080468531646, "grad_norm": 3.495328426361084, "learning_rate": 0.0003785158817086528, "loss": 0.9238, "step": 4674 }, { "epoch": 0.31675856052714046, "grad_norm": 3.8337149620056152, "learning_rate": 0.0003785104052573932, "loss": 0.9606, "step": 4675 }, { "epoch": 0.3168263163689644, "grad_norm": 3.1337485313415527, "learning_rate": 0.00037850492880613367, "loss": 0.7651, "step": 4676 }, { "epoch": 0.3168940722107884, "grad_norm": 2.9257493019104004, "learning_rate": 0.00037849945235487407, "loss": 0.896, "step": 4677 }, { "epoch": 0.3169618280526124, "grad_norm": 3.8320834636688232, "learning_rate": 0.00037849397590361447, "loss": 1.0705, "step": 4678 }, { "epoch": 0.3170295838944364, "grad_norm": 2.895055055618286, "learning_rate": 0.0003784884994523549, "loss": 0.8283, "step": 4679 }, { "epoch": 0.31709733973626036, "grad_norm": 2.796659231185913, "learning_rate": 0.0003784830230010953, "loss": 0.8152, "step": 4680 }, { "epoch": 0.31716509557808437, "grad_norm": 4.82125186920166, "learning_rate": 0.0003784775465498357, "loss": 1.0351, "step": 4681 }, { "epoch": 0.31723285141990837, "grad_norm": 3.360107660293579, "learning_rate": 0.0003784720700985761, "loss": 1.168, "step": 4682 }, { "epoch": 0.31730060726173237, "grad_norm": 4.076196193695068, "learning_rate": 0.0003784665936473165, "loss": 0.8264, "step": 4683 }, { "epoch": 0.3173683631035563, "grad_norm": 3.519021987915039, "learning_rate": 0.00037846111719605697, "loss": 1.0775, "step": 4684 }, { "epoch": 0.3174361189453803, "grad_norm": 3.0912837982177734, "learning_rate": 0.0003784556407447974, "loss": 0.9267, "step": 4685 }, { "epoch": 0.3175038747872043, "grad_norm": 3.4666285514831543, "learning_rate": 0.0003784501642935378, "loss": 0.8538, "step": 4686 }, { "epoch": 0.3175716306290283, "grad_norm": 3.159575939178467, "learning_rate": 0.0003784446878422782, "loss": 0.8547, "step": 4687 }, { "epoch": 0.31763938647085227, "grad_norm": 2.9505927562713623, "learning_rate": 0.0003784392113910186, "loss": 0.8166, "step": 4688 }, { "epoch": 0.31770714231267627, "grad_norm": 3.6095385551452637, "learning_rate": 0.000378433734939759, "loss": 0.9674, "step": 4689 }, { "epoch": 0.3177748981545003, "grad_norm": 3.9311225414276123, "learning_rate": 0.0003784282584884995, "loss": 0.9102, "step": 4690 }, { "epoch": 0.3178426539963242, "grad_norm": 3.922656297683716, "learning_rate": 0.0003784227820372399, "loss": 0.976, "step": 4691 }, { "epoch": 0.3179104098381482, "grad_norm": 3.6320712566375732, "learning_rate": 0.0003784173055859803, "loss": 0.9917, "step": 4692 }, { "epoch": 0.3179781656799722, "grad_norm": 3.947233200073242, "learning_rate": 0.0003784118291347207, "loss": 0.992, "step": 4693 }, { "epoch": 0.3180459215217962, "grad_norm": 4.068166255950928, "learning_rate": 0.0003784063526834611, "loss": 1.0512, "step": 4694 }, { "epoch": 0.3181136773636202, "grad_norm": 3.2760400772094727, "learning_rate": 0.0003784008762322015, "loss": 0.8951, "step": 4695 }, { "epoch": 0.3181814332054442, "grad_norm": 3.3671886920928955, "learning_rate": 0.000378395399780942, "loss": 1.0541, "step": 4696 }, { "epoch": 0.3182491890472682, "grad_norm": 3.419539451599121, "learning_rate": 0.0003783899233296824, "loss": 0.8772, "step": 4697 }, { "epoch": 0.3183169448890922, "grad_norm": 3.2745048999786377, "learning_rate": 0.0003783844468784228, "loss": 0.872, "step": 4698 }, { "epoch": 0.3183847007309161, "grad_norm": 3.280949831008911, "learning_rate": 0.00037837897042716323, "loss": 0.6969, "step": 4699 }, { "epoch": 0.31845245657274013, "grad_norm": 2.972790241241455, "learning_rate": 0.00037837349397590363, "loss": 0.9331, "step": 4700 }, { "epoch": 0.31852021241456413, "grad_norm": 2.9714748859405518, "learning_rate": 0.0003783680175246441, "loss": 0.8667, "step": 4701 }, { "epoch": 0.31858796825638813, "grad_norm": 3.5762295722961426, "learning_rate": 0.0003783625410733845, "loss": 0.9229, "step": 4702 }, { "epoch": 0.3186557240982121, "grad_norm": 3.2809977531433105, "learning_rate": 0.0003783570646221249, "loss": 0.7859, "step": 4703 }, { "epoch": 0.3187234799400361, "grad_norm": 3.1284782886505127, "learning_rate": 0.0003783515881708653, "loss": 0.8405, "step": 4704 }, { "epoch": 0.3187912357818601, "grad_norm": 3.4072093963623047, "learning_rate": 0.0003783461117196057, "loss": 0.6454, "step": 4705 }, { "epoch": 0.31885899162368403, "grad_norm": 3.228945732116699, "learning_rate": 0.00037834063526834613, "loss": 0.8015, "step": 4706 }, { "epoch": 0.31892674746550803, "grad_norm": 3.010326623916626, "learning_rate": 0.0003783351588170866, "loss": 0.8102, "step": 4707 }, { "epoch": 0.31899450330733203, "grad_norm": 3.395674467086792, "learning_rate": 0.000378329682365827, "loss": 0.7361, "step": 4708 }, { "epoch": 0.31906225914915604, "grad_norm": 3.364664077758789, "learning_rate": 0.0003783242059145674, "loss": 0.7535, "step": 4709 }, { "epoch": 0.31913001499098, "grad_norm": 3.5595521926879883, "learning_rate": 0.0003783187294633078, "loss": 1.056, "step": 4710 }, { "epoch": 0.319197770832804, "grad_norm": 3.7385122776031494, "learning_rate": 0.0003783132530120482, "loss": 1.0873, "step": 4711 }, { "epoch": 0.319265526674628, "grad_norm": 3.320122241973877, "learning_rate": 0.00037830777656078864, "loss": 0.7781, "step": 4712 }, { "epoch": 0.319333282516452, "grad_norm": 3.610445261001587, "learning_rate": 0.00037830230010952904, "loss": 0.7441, "step": 4713 }, { "epoch": 0.31940103835827593, "grad_norm": 4.168842792510986, "learning_rate": 0.00037829682365826943, "loss": 0.9132, "step": 4714 }, { "epoch": 0.31946879420009994, "grad_norm": 3.9578146934509277, "learning_rate": 0.0003782913472070099, "loss": 0.9847, "step": 4715 }, { "epoch": 0.31953655004192394, "grad_norm": 2.828723430633545, "learning_rate": 0.0003782858707557503, "loss": 0.7718, "step": 4716 }, { "epoch": 0.31960430588374794, "grad_norm": 5.104121208190918, "learning_rate": 0.00037828039430449074, "loss": 0.8096, "step": 4717 }, { "epoch": 0.3196720617255719, "grad_norm": 3.587599277496338, "learning_rate": 0.00037827491785323114, "loss": 0.9577, "step": 4718 }, { "epoch": 0.3197398175673959, "grad_norm": 3.7260866165161133, "learning_rate": 0.00037826944140197154, "loss": 1.0058, "step": 4719 }, { "epoch": 0.3198075734092199, "grad_norm": 3.691438913345337, "learning_rate": 0.00037826396495071194, "loss": 0.9833, "step": 4720 }, { "epoch": 0.31987532925104384, "grad_norm": 5.268035411834717, "learning_rate": 0.00037825848849945234, "loss": 0.9236, "step": 4721 }, { "epoch": 0.31994308509286784, "grad_norm": 4.58246374130249, "learning_rate": 0.0003782530120481928, "loss": 1.0366, "step": 4722 }, { "epoch": 0.32001084093469184, "grad_norm": 5.0231404304504395, "learning_rate": 0.00037824753559693324, "loss": 0.9779, "step": 4723 }, { "epoch": 0.32007859677651584, "grad_norm": 3.745047092437744, "learning_rate": 0.00037824205914567364, "loss": 0.9819, "step": 4724 }, { "epoch": 0.3201463526183398, "grad_norm": 2.7917802333831787, "learning_rate": 0.00037823658269441404, "loss": 0.7236, "step": 4725 }, { "epoch": 0.3202141084601638, "grad_norm": 2.7867701053619385, "learning_rate": 0.00037823110624315444, "loss": 0.7858, "step": 4726 }, { "epoch": 0.3202818643019878, "grad_norm": 3.185622453689575, "learning_rate": 0.00037822562979189484, "loss": 0.8655, "step": 4727 }, { "epoch": 0.3203496201438118, "grad_norm": 3.3427796363830566, "learning_rate": 0.0003782201533406353, "loss": 0.8736, "step": 4728 }, { "epoch": 0.32041737598563574, "grad_norm": 3.3331212997436523, "learning_rate": 0.0003782146768893757, "loss": 0.9656, "step": 4729 }, { "epoch": 0.32048513182745975, "grad_norm": 2.427016258239746, "learning_rate": 0.00037820920043811615, "loss": 0.6119, "step": 4730 }, { "epoch": 0.32055288766928375, "grad_norm": 2.4421639442443848, "learning_rate": 0.00037820372398685655, "loss": 0.6599, "step": 4731 }, { "epoch": 0.32062064351110775, "grad_norm": 3.7084274291992188, "learning_rate": 0.00037819824753559695, "loss": 0.9631, "step": 4732 }, { "epoch": 0.3206883993529317, "grad_norm": 2.9902501106262207, "learning_rate": 0.00037819277108433735, "loss": 0.7197, "step": 4733 }, { "epoch": 0.3207561551947557, "grad_norm": 3.553417682647705, "learning_rate": 0.0003781872946330778, "loss": 0.862, "step": 4734 }, { "epoch": 0.3208239110365797, "grad_norm": 4.679892063140869, "learning_rate": 0.0003781818181818182, "loss": 1.0786, "step": 4735 }, { "epoch": 0.32089166687840365, "grad_norm": 5.547882080078125, "learning_rate": 0.0003781763417305586, "loss": 1.0849, "step": 4736 }, { "epoch": 0.32095942272022765, "grad_norm": 2.923304557800293, "learning_rate": 0.000378170865279299, "loss": 0.8679, "step": 4737 }, { "epoch": 0.32102717856205165, "grad_norm": 3.6654045581817627, "learning_rate": 0.00037816538882803945, "loss": 1.0165, "step": 4738 }, { "epoch": 0.32109493440387565, "grad_norm": 3.64471697807312, "learning_rate": 0.0003781599123767799, "loss": 0.7679, "step": 4739 }, { "epoch": 0.3211626902456996, "grad_norm": 2.9286139011383057, "learning_rate": 0.0003781544359255203, "loss": 0.9428, "step": 4740 }, { "epoch": 0.3212304460875236, "grad_norm": 3.1060848236083984, "learning_rate": 0.0003781489594742607, "loss": 0.9402, "step": 4741 }, { "epoch": 0.3212982019293476, "grad_norm": 3.7226762771606445, "learning_rate": 0.0003781434830230011, "loss": 0.8819, "step": 4742 }, { "epoch": 0.3213659577711716, "grad_norm": 3.4312050342559814, "learning_rate": 0.0003781380065717415, "loss": 1.0445, "step": 4743 }, { "epoch": 0.32143371361299555, "grad_norm": 4.8454270362854, "learning_rate": 0.00037813253012048195, "loss": 0.7426, "step": 4744 }, { "epoch": 0.32150146945481956, "grad_norm": 3.5073869228363037, "learning_rate": 0.00037812705366922235, "loss": 0.9175, "step": 4745 }, { "epoch": 0.32156922529664356, "grad_norm": 3.2707531452178955, "learning_rate": 0.0003781215772179628, "loss": 1.0419, "step": 4746 }, { "epoch": 0.32163698113846756, "grad_norm": 2.6057660579681396, "learning_rate": 0.0003781161007667032, "loss": 0.572, "step": 4747 }, { "epoch": 0.3217047369802915, "grad_norm": 3.949493885040283, "learning_rate": 0.0003781106243154436, "loss": 1.0414, "step": 4748 }, { "epoch": 0.3217724928221155, "grad_norm": 7.39580774307251, "learning_rate": 0.000378105147864184, "loss": 0.7601, "step": 4749 }, { "epoch": 0.3218402486639395, "grad_norm": 2.568756103515625, "learning_rate": 0.00037809967141292446, "loss": 0.8627, "step": 4750 }, { "epoch": 0.32190800450576346, "grad_norm": 3.54057240486145, "learning_rate": 0.00037809419496166486, "loss": 1.1164, "step": 4751 }, { "epoch": 0.32197576034758746, "grad_norm": 4.055639266967773, "learning_rate": 0.00037808871851040526, "loss": 1.0164, "step": 4752 }, { "epoch": 0.32204351618941146, "grad_norm": 3.347550630569458, "learning_rate": 0.00037808324205914565, "loss": 0.8285, "step": 4753 }, { "epoch": 0.32211127203123546, "grad_norm": 3.7531137466430664, "learning_rate": 0.0003780777656078861, "loss": 1.1067, "step": 4754 }, { "epoch": 0.3221790278730594, "grad_norm": 4.507245063781738, "learning_rate": 0.00037807228915662656, "loss": 1.1435, "step": 4755 }, { "epoch": 0.3222467837148834, "grad_norm": 3.171062707901001, "learning_rate": 0.00037806681270536696, "loss": 0.7836, "step": 4756 }, { "epoch": 0.3223145395567074, "grad_norm": 4.0265583992004395, "learning_rate": 0.00037806133625410736, "loss": 1.0246, "step": 4757 }, { "epoch": 0.3223822953985314, "grad_norm": 3.5723683834075928, "learning_rate": 0.00037805585980284776, "loss": 1.0613, "step": 4758 }, { "epoch": 0.32245005124035536, "grad_norm": 4.619985580444336, "learning_rate": 0.00037805038335158816, "loss": 0.8866, "step": 4759 }, { "epoch": 0.32251780708217936, "grad_norm": 3.7391250133514404, "learning_rate": 0.0003780449069003286, "loss": 0.7985, "step": 4760 }, { "epoch": 0.32258556292400337, "grad_norm": 3.505823850631714, "learning_rate": 0.00037803943044906907, "loss": 0.9755, "step": 4761 }, { "epoch": 0.32265331876582737, "grad_norm": 3.7636072635650635, "learning_rate": 0.00037803395399780946, "loss": 1.0963, "step": 4762 }, { "epoch": 0.3227210746076513, "grad_norm": 4.241861343383789, "learning_rate": 0.00037802847754654986, "loss": 1.0247, "step": 4763 }, { "epoch": 0.3227888304494753, "grad_norm": 3.9200100898742676, "learning_rate": 0.00037802300109529026, "loss": 0.8765, "step": 4764 }, { "epoch": 0.3228565862912993, "grad_norm": 4.593047142028809, "learning_rate": 0.00037801752464403066, "loss": 1.0083, "step": 4765 }, { "epoch": 0.32292434213312327, "grad_norm": 4.557302474975586, "learning_rate": 0.0003780120481927711, "loss": 0.9617, "step": 4766 }, { "epoch": 0.32299209797494727, "grad_norm": 3.522256851196289, "learning_rate": 0.0003780065717415115, "loss": 0.9679, "step": 4767 }, { "epoch": 0.32305985381677127, "grad_norm": 2.2135138511657715, "learning_rate": 0.0003780010952902519, "loss": 0.6338, "step": 4768 }, { "epoch": 0.32312760965859527, "grad_norm": 3.479118824005127, "learning_rate": 0.00037799561883899237, "loss": 1.0722, "step": 4769 }, { "epoch": 0.3231953655004192, "grad_norm": 3.3947670459747314, "learning_rate": 0.00037799014238773277, "loss": 0.9272, "step": 4770 }, { "epoch": 0.3232631213422432, "grad_norm": 4.017377853393555, "learning_rate": 0.00037798466593647317, "loss": 1.1883, "step": 4771 }, { "epoch": 0.3233308771840672, "grad_norm": 3.52482533454895, "learning_rate": 0.0003779791894852136, "loss": 0.6827, "step": 4772 }, { "epoch": 0.3233986330258912, "grad_norm": 2.975405216217041, "learning_rate": 0.000377973713033954, "loss": 0.9322, "step": 4773 }, { "epoch": 0.32346638886771517, "grad_norm": 3.0349233150482178, "learning_rate": 0.0003779682365826944, "loss": 0.9818, "step": 4774 }, { "epoch": 0.3235341447095392, "grad_norm": 2.7322332859039307, "learning_rate": 0.0003779627601314348, "loss": 0.6649, "step": 4775 }, { "epoch": 0.3236019005513632, "grad_norm": 3.0596487522125244, "learning_rate": 0.00037795728368017527, "loss": 0.9405, "step": 4776 }, { "epoch": 0.3236696563931872, "grad_norm": 3.9006717205047607, "learning_rate": 0.0003779518072289157, "loss": 1.1537, "step": 4777 }, { "epoch": 0.3237374122350111, "grad_norm": 3.5776913166046143, "learning_rate": 0.0003779463307776561, "loss": 0.9251, "step": 4778 }, { "epoch": 0.3238051680768351, "grad_norm": 2.804893732070923, "learning_rate": 0.0003779408543263965, "loss": 0.9237, "step": 4779 }, { "epoch": 0.32387292391865913, "grad_norm": 3.633993148803711, "learning_rate": 0.0003779353778751369, "loss": 1.079, "step": 4780 }, { "epoch": 0.3239406797604831, "grad_norm": 2.996457815170288, "learning_rate": 0.0003779299014238773, "loss": 0.7164, "step": 4781 }, { "epoch": 0.3240084356023071, "grad_norm": 3.516148567199707, "learning_rate": 0.0003779244249726178, "loss": 1.02, "step": 4782 }, { "epoch": 0.3240761914441311, "grad_norm": 3.919649124145508, "learning_rate": 0.0003779189485213582, "loss": 0.8689, "step": 4783 }, { "epoch": 0.3241439472859551, "grad_norm": 3.663322925567627, "learning_rate": 0.00037791347207009857, "loss": 0.9717, "step": 4784 }, { "epoch": 0.324211703127779, "grad_norm": 3.772228956222534, "learning_rate": 0.000377907995618839, "loss": 0.8056, "step": 4785 }, { "epoch": 0.32427945896960303, "grad_norm": 2.632607936859131, "learning_rate": 0.0003779025191675794, "loss": 0.6958, "step": 4786 }, { "epoch": 0.32434721481142703, "grad_norm": 3.077054023742676, "learning_rate": 0.0003778970427163198, "loss": 0.7921, "step": 4787 }, { "epoch": 0.32441497065325103, "grad_norm": 4.5157294273376465, "learning_rate": 0.0003778915662650603, "loss": 1.185, "step": 4788 }, { "epoch": 0.324482726495075, "grad_norm": 3.8090898990631104, "learning_rate": 0.0003778860898138007, "loss": 0.9372, "step": 4789 }, { "epoch": 0.324550482336899, "grad_norm": 4.700262546539307, "learning_rate": 0.0003778806133625411, "loss": 1.1177, "step": 4790 }, { "epoch": 0.324618238178723, "grad_norm": 3.431194543838501, "learning_rate": 0.0003778751369112815, "loss": 1.0546, "step": 4791 }, { "epoch": 0.324685994020547, "grad_norm": 3.102712631225586, "learning_rate": 0.00037786966046002193, "loss": 0.8971, "step": 4792 }, { "epoch": 0.32475374986237093, "grad_norm": 2.9600613117218018, "learning_rate": 0.0003778641840087624, "loss": 0.9304, "step": 4793 }, { "epoch": 0.32482150570419493, "grad_norm": 5.688563346862793, "learning_rate": 0.0003778587075575028, "loss": 0.6711, "step": 4794 }, { "epoch": 0.32488926154601894, "grad_norm": 2.691591739654541, "learning_rate": 0.0003778532311062432, "loss": 0.756, "step": 4795 }, { "epoch": 0.3249570173878429, "grad_norm": 2.701831579208374, "learning_rate": 0.0003778477546549836, "loss": 0.6863, "step": 4796 }, { "epoch": 0.3250247732296669, "grad_norm": 3.668613910675049, "learning_rate": 0.000377842278203724, "loss": 0.8213, "step": 4797 }, { "epoch": 0.3250925290714909, "grad_norm": 2.770779848098755, "learning_rate": 0.00037783680175246443, "loss": 0.7887, "step": 4798 }, { "epoch": 0.3251602849133149, "grad_norm": 9.102119445800781, "learning_rate": 0.00037783132530120483, "loss": 0.7578, "step": 4799 }, { "epoch": 0.32522804075513884, "grad_norm": 4.864066123962402, "learning_rate": 0.0003778258488499453, "loss": 0.7389, "step": 4800 }, { "epoch": 0.32529579659696284, "grad_norm": 3.7036781311035156, "learning_rate": 0.0003778203723986857, "loss": 0.8352, "step": 4801 }, { "epoch": 0.32536355243878684, "grad_norm": 4.885022163391113, "learning_rate": 0.0003778148959474261, "loss": 1.1091, "step": 4802 }, { "epoch": 0.32543130828061084, "grad_norm": 3.518411874771118, "learning_rate": 0.0003778094194961665, "loss": 0.9199, "step": 4803 }, { "epoch": 0.3254990641224348, "grad_norm": 3.9507288932800293, "learning_rate": 0.00037780394304490694, "loss": 0.8998, "step": 4804 }, { "epoch": 0.3255668199642588, "grad_norm": 4.199492931365967, "learning_rate": 0.00037779846659364734, "loss": 0.9523, "step": 4805 }, { "epoch": 0.3256345758060828, "grad_norm": 5.616729259490967, "learning_rate": 0.00037779299014238773, "loss": 0.9782, "step": 4806 }, { "epoch": 0.3257023316479068, "grad_norm": 3.623136043548584, "learning_rate": 0.00037778751369112813, "loss": 0.9725, "step": 4807 }, { "epoch": 0.32577008748973074, "grad_norm": 8.61198616027832, "learning_rate": 0.0003777820372398686, "loss": 0.8929, "step": 4808 }, { "epoch": 0.32583784333155474, "grad_norm": 3.3595643043518066, "learning_rate": 0.000377776560788609, "loss": 0.9456, "step": 4809 }, { "epoch": 0.32590559917337875, "grad_norm": 2.6960859298706055, "learning_rate": 0.00037777108433734944, "loss": 0.5942, "step": 4810 }, { "epoch": 0.3259733550152027, "grad_norm": 3.4749326705932617, "learning_rate": 0.00037776560788608984, "loss": 0.9446, "step": 4811 }, { "epoch": 0.3260411108570267, "grad_norm": 3.4385569095611572, "learning_rate": 0.00037776013143483024, "loss": 0.9821, "step": 4812 }, { "epoch": 0.3261088666988507, "grad_norm": 2.8618202209472656, "learning_rate": 0.00037775465498357064, "loss": 0.8202, "step": 4813 }, { "epoch": 0.3261766225406747, "grad_norm": 3.217937707901001, "learning_rate": 0.0003777491785323111, "loss": 1.0013, "step": 4814 }, { "epoch": 0.32624437838249865, "grad_norm": 3.612497091293335, "learning_rate": 0.0003777437020810515, "loss": 1.0733, "step": 4815 }, { "epoch": 0.32631213422432265, "grad_norm": 4.122450351715088, "learning_rate": 0.00037773822562979194, "loss": 0.8836, "step": 4816 }, { "epoch": 0.32637989006614665, "grad_norm": 3.932715892791748, "learning_rate": 0.00037773274917853234, "loss": 0.9751, "step": 4817 }, { "epoch": 0.32644764590797065, "grad_norm": 3.6524529457092285, "learning_rate": 0.00037772727272727274, "loss": 0.9565, "step": 4818 }, { "epoch": 0.3265154017497946, "grad_norm": 4.013326644897461, "learning_rate": 0.00037772179627601314, "loss": 0.967, "step": 4819 }, { "epoch": 0.3265831575916186, "grad_norm": 2.962691068649292, "learning_rate": 0.0003777163198247536, "loss": 0.6576, "step": 4820 }, { "epoch": 0.3266509134334426, "grad_norm": 3.015850067138672, "learning_rate": 0.000377710843373494, "loss": 0.8705, "step": 4821 }, { "epoch": 0.3267186692752666, "grad_norm": 2.7478840351104736, "learning_rate": 0.0003777053669222344, "loss": 0.8059, "step": 4822 }, { "epoch": 0.32678642511709055, "grad_norm": 4.876291275024414, "learning_rate": 0.0003776998904709748, "loss": 1.0818, "step": 4823 }, { "epoch": 0.32685418095891455, "grad_norm": 3.1532340049743652, "learning_rate": 0.00037769441401971525, "loss": 0.7537, "step": 4824 }, { "epoch": 0.32692193680073856, "grad_norm": 2.6689541339874268, "learning_rate": 0.00037768893756845565, "loss": 0.8487, "step": 4825 }, { "epoch": 0.3269896926425625, "grad_norm": 3.045830726623535, "learning_rate": 0.0003776834611171961, "loss": 0.678, "step": 4826 }, { "epoch": 0.3270574484843865, "grad_norm": 3.4502205848693848, "learning_rate": 0.0003776779846659365, "loss": 1.0227, "step": 4827 }, { "epoch": 0.3271252043262105, "grad_norm": 2.9441394805908203, "learning_rate": 0.0003776725082146769, "loss": 0.8509, "step": 4828 }, { "epoch": 0.3271929601680345, "grad_norm": 3.018313407897949, "learning_rate": 0.0003776670317634173, "loss": 0.7666, "step": 4829 }, { "epoch": 0.32726071600985845, "grad_norm": 5.296263694763184, "learning_rate": 0.0003776615553121577, "loss": 1.0057, "step": 4830 }, { "epoch": 0.32732847185168246, "grad_norm": 2.8210995197296143, "learning_rate": 0.0003776560788608982, "loss": 0.8043, "step": 4831 }, { "epoch": 0.32739622769350646, "grad_norm": 2.828364133834839, "learning_rate": 0.0003776506024096386, "loss": 0.8271, "step": 4832 }, { "epoch": 0.32746398353533046, "grad_norm": 3.079010009765625, "learning_rate": 0.000377645125958379, "loss": 0.7529, "step": 4833 }, { "epoch": 0.3275317393771544, "grad_norm": 2.7812650203704834, "learning_rate": 0.0003776396495071194, "loss": 0.6154, "step": 4834 }, { "epoch": 0.3275994952189784, "grad_norm": 3.1177875995635986, "learning_rate": 0.0003776341730558598, "loss": 0.7962, "step": 4835 }, { "epoch": 0.3276672510608024, "grad_norm": 3.2179830074310303, "learning_rate": 0.00037762869660460025, "loss": 0.8745, "step": 4836 }, { "epoch": 0.3277350069026264, "grad_norm": 2.9602255821228027, "learning_rate": 0.00037762322015334065, "loss": 0.9649, "step": 4837 }, { "epoch": 0.32780276274445036, "grad_norm": 3.8863937854766846, "learning_rate": 0.00037761774370208105, "loss": 0.955, "step": 4838 }, { "epoch": 0.32787051858627436, "grad_norm": 3.9536046981811523, "learning_rate": 0.0003776122672508215, "loss": 0.9546, "step": 4839 }, { "epoch": 0.32793827442809836, "grad_norm": 5.496060371398926, "learning_rate": 0.0003776067907995619, "loss": 0.7967, "step": 4840 }, { "epoch": 0.3280060302699223, "grad_norm": 2.5900092124938965, "learning_rate": 0.0003776013143483023, "loss": 0.714, "step": 4841 }, { "epoch": 0.3280737861117463, "grad_norm": 2.9863123893737793, "learning_rate": 0.00037759583789704276, "loss": 0.74, "step": 4842 }, { "epoch": 0.3281415419535703, "grad_norm": 3.8077919483184814, "learning_rate": 0.00037759036144578316, "loss": 1.0269, "step": 4843 }, { "epoch": 0.3282092977953943, "grad_norm": 4.101451873779297, "learning_rate": 0.00037758488499452356, "loss": 0.9343, "step": 4844 }, { "epoch": 0.32827705363721826, "grad_norm": 3.72536301612854, "learning_rate": 0.00037757940854326395, "loss": 0.9366, "step": 4845 }, { "epoch": 0.32834480947904227, "grad_norm": 3.406101942062378, "learning_rate": 0.00037757393209200435, "loss": 0.7827, "step": 4846 }, { "epoch": 0.32841256532086627, "grad_norm": 2.516517400741577, "learning_rate": 0.0003775684556407448, "loss": 0.8617, "step": 4847 }, { "epoch": 0.32848032116269027, "grad_norm": 3.460503578186035, "learning_rate": 0.00037756297918948526, "loss": 1.2011, "step": 4848 }, { "epoch": 0.3285480770045142, "grad_norm": 3.9432005882263184, "learning_rate": 0.00037755750273822566, "loss": 0.8443, "step": 4849 }, { "epoch": 0.3286158328463382, "grad_norm": 3.4059197902679443, "learning_rate": 0.00037755202628696606, "loss": 0.8206, "step": 4850 }, { "epoch": 0.3286835886881622, "grad_norm": 4.184998512268066, "learning_rate": 0.00037754654983570646, "loss": 0.9472, "step": 4851 }, { "epoch": 0.3287513445299862, "grad_norm": 4.208993434906006, "learning_rate": 0.0003775410733844469, "loss": 0.9259, "step": 4852 }, { "epoch": 0.32881910037181017, "grad_norm": 2.986640214920044, "learning_rate": 0.0003775355969331873, "loss": 0.8322, "step": 4853 }, { "epoch": 0.32888685621363417, "grad_norm": 4.245830535888672, "learning_rate": 0.0003775301204819277, "loss": 1.0934, "step": 4854 }, { "epoch": 0.3289546120554582, "grad_norm": 3.779360771179199, "learning_rate": 0.00037752464403066816, "loss": 0.8643, "step": 4855 }, { "epoch": 0.3290223678972821, "grad_norm": 3.116886615753174, "learning_rate": 0.00037751916757940856, "loss": 0.775, "step": 4856 }, { "epoch": 0.3290901237391061, "grad_norm": 3.145733594894409, "learning_rate": 0.00037751369112814896, "loss": 0.7979, "step": 4857 }, { "epoch": 0.3291578795809301, "grad_norm": 2.750140905380249, "learning_rate": 0.0003775082146768894, "loss": 0.8624, "step": 4858 }, { "epoch": 0.3292256354227541, "grad_norm": 3.660961866378784, "learning_rate": 0.0003775027382256298, "loss": 1.1117, "step": 4859 }, { "epoch": 0.3292933912645781, "grad_norm": 3.408566951751709, "learning_rate": 0.0003774972617743702, "loss": 0.9367, "step": 4860 }, { "epoch": 0.3293611471064021, "grad_norm": 3.0668740272521973, "learning_rate": 0.0003774917853231106, "loss": 0.8218, "step": 4861 }, { "epoch": 0.3294289029482261, "grad_norm": 3.3916473388671875, "learning_rate": 0.00037748630887185107, "loss": 0.7844, "step": 4862 }, { "epoch": 0.3294966587900501, "grad_norm": 3.6464576721191406, "learning_rate": 0.00037748083242059147, "loss": 1.067, "step": 4863 }, { "epoch": 0.329564414631874, "grad_norm": 3.3979837894439697, "learning_rate": 0.0003774753559693319, "loss": 0.6818, "step": 4864 }, { "epoch": 0.329632170473698, "grad_norm": 3.404534101486206, "learning_rate": 0.0003774698795180723, "loss": 0.9208, "step": 4865 }, { "epoch": 0.32969992631552203, "grad_norm": 2.754348039627075, "learning_rate": 0.0003774644030668127, "loss": 0.765, "step": 4866 }, { "epoch": 0.329767682157346, "grad_norm": 3.2564730644226074, "learning_rate": 0.0003774589266155531, "loss": 1.0966, "step": 4867 }, { "epoch": 0.32983543799917, "grad_norm": 3.589618682861328, "learning_rate": 0.0003774534501642935, "loss": 0.7026, "step": 4868 }, { "epoch": 0.329903193840994, "grad_norm": 2.9354960918426514, "learning_rate": 0.00037744797371303397, "loss": 0.8632, "step": 4869 }, { "epoch": 0.329970949682818, "grad_norm": 3.382918119430542, "learning_rate": 0.0003774424972617744, "loss": 0.6488, "step": 4870 }, { "epoch": 0.33003870552464193, "grad_norm": 3.1659481525421143, "learning_rate": 0.0003774370208105148, "loss": 0.8103, "step": 4871 }, { "epoch": 0.33010646136646593, "grad_norm": 2.3839051723480225, "learning_rate": 0.0003774315443592552, "loss": 0.7031, "step": 4872 }, { "epoch": 0.33017421720828993, "grad_norm": 3.3259668350219727, "learning_rate": 0.0003774260679079956, "loss": 0.9733, "step": 4873 }, { "epoch": 0.33024197305011393, "grad_norm": 3.475327968597412, "learning_rate": 0.0003774205914567361, "loss": 0.8235, "step": 4874 }, { "epoch": 0.3303097288919379, "grad_norm": 3.0242292881011963, "learning_rate": 0.0003774151150054765, "loss": 0.7111, "step": 4875 }, { "epoch": 0.3303774847337619, "grad_norm": 4.297941207885742, "learning_rate": 0.00037740963855421687, "loss": 1.1426, "step": 4876 }, { "epoch": 0.3304452405755859, "grad_norm": 3.0206961631774902, "learning_rate": 0.00037740416210295727, "loss": 0.7827, "step": 4877 }, { "epoch": 0.3305129964174099, "grad_norm": 2.5455715656280518, "learning_rate": 0.0003773986856516977, "loss": 0.8304, "step": 4878 }, { "epoch": 0.33058075225923383, "grad_norm": 2.7556354999542236, "learning_rate": 0.0003773932092004381, "loss": 0.6618, "step": 4879 }, { "epoch": 0.33064850810105784, "grad_norm": 3.9946417808532715, "learning_rate": 0.0003773877327491786, "loss": 0.9296, "step": 4880 }, { "epoch": 0.33071626394288184, "grad_norm": 3.0390727519989014, "learning_rate": 0.000377382256297919, "loss": 0.8344, "step": 4881 }, { "epoch": 0.3307840197847058, "grad_norm": 3.5239200592041016, "learning_rate": 0.0003773767798466594, "loss": 0.9187, "step": 4882 }, { "epoch": 0.3308517756265298, "grad_norm": 3.2823586463928223, "learning_rate": 0.0003773713033953998, "loss": 0.8864, "step": 4883 }, { "epoch": 0.3309195314683538, "grad_norm": 3.4736974239349365, "learning_rate": 0.0003773658269441402, "loss": 0.9454, "step": 4884 }, { "epoch": 0.3309872873101778, "grad_norm": 3.1953632831573486, "learning_rate": 0.00037736035049288063, "loss": 0.8901, "step": 4885 }, { "epoch": 0.33105504315200174, "grad_norm": 4.397337913513184, "learning_rate": 0.0003773548740416211, "loss": 1.0889, "step": 4886 }, { "epoch": 0.33112279899382574, "grad_norm": 3.2640557289123535, "learning_rate": 0.0003773493975903615, "loss": 0.7979, "step": 4887 }, { "epoch": 0.33119055483564974, "grad_norm": 3.109494924545288, "learning_rate": 0.0003773439211391019, "loss": 0.9413, "step": 4888 }, { "epoch": 0.33125831067747374, "grad_norm": 5.449960231781006, "learning_rate": 0.0003773384446878423, "loss": 0.9486, "step": 4889 }, { "epoch": 0.3313260665192977, "grad_norm": 4.07262659072876, "learning_rate": 0.00037733296823658273, "loss": 0.9672, "step": 4890 }, { "epoch": 0.3313938223611217, "grad_norm": 2.9764161109924316, "learning_rate": 0.00037732749178532313, "loss": 0.8449, "step": 4891 }, { "epoch": 0.3314615782029457, "grad_norm": 3.1681344509124756, "learning_rate": 0.00037732201533406353, "loss": 0.6899, "step": 4892 }, { "epoch": 0.3315293340447697, "grad_norm": 3.2738876342773438, "learning_rate": 0.000377316538882804, "loss": 0.8995, "step": 4893 }, { "epoch": 0.33159708988659364, "grad_norm": 3.743032693862915, "learning_rate": 0.0003773110624315444, "loss": 0.8186, "step": 4894 }, { "epoch": 0.33166484572841765, "grad_norm": 2.521437883377075, "learning_rate": 0.0003773055859802848, "loss": 0.7259, "step": 4895 }, { "epoch": 0.33173260157024165, "grad_norm": 3.1364316940307617, "learning_rate": 0.00037730010952902524, "loss": 0.8655, "step": 4896 }, { "epoch": 0.3318003574120656, "grad_norm": 4.003754138946533, "learning_rate": 0.00037729463307776564, "loss": 0.9612, "step": 4897 }, { "epoch": 0.3318681132538896, "grad_norm": 4.261973857879639, "learning_rate": 0.00037728915662650603, "loss": 1.0066, "step": 4898 }, { "epoch": 0.3319358690957136, "grad_norm": 2.6097159385681152, "learning_rate": 0.00037728368017524643, "loss": 0.7179, "step": 4899 }, { "epoch": 0.3320036249375376, "grad_norm": 3.6279969215393066, "learning_rate": 0.00037727820372398683, "loss": 1.1224, "step": 4900 }, { "epoch": 0.33207138077936155, "grad_norm": 3.572078227996826, "learning_rate": 0.0003772727272727273, "loss": 1.0143, "step": 4901 }, { "epoch": 0.33213913662118555, "grad_norm": 3.2363052368164062, "learning_rate": 0.00037726725082146774, "loss": 0.8427, "step": 4902 }, { "epoch": 0.33220689246300955, "grad_norm": 3.271697998046875, "learning_rate": 0.00037726177437020814, "loss": 0.983, "step": 4903 }, { "epoch": 0.33227464830483355, "grad_norm": 2.3740196228027344, "learning_rate": 0.00037725629791894854, "loss": 0.5712, "step": 4904 }, { "epoch": 0.3323424041466575, "grad_norm": 3.698887586593628, "learning_rate": 0.00037725082146768894, "loss": 0.9685, "step": 4905 }, { "epoch": 0.3324101599884815, "grad_norm": 3.8815503120422363, "learning_rate": 0.00037724534501642934, "loss": 1.0935, "step": 4906 }, { "epoch": 0.3324779158303055, "grad_norm": 3.1357977390289307, "learning_rate": 0.0003772398685651698, "loss": 1.0571, "step": 4907 }, { "epoch": 0.3325456716721295, "grad_norm": 2.962766408920288, "learning_rate": 0.0003772343921139102, "loss": 0.8506, "step": 4908 }, { "epoch": 0.33261342751395345, "grad_norm": 2.813995361328125, "learning_rate": 0.00037722891566265064, "loss": 0.7137, "step": 4909 }, { "epoch": 0.33268118335577745, "grad_norm": 3.2030067443847656, "learning_rate": 0.00037722343921139104, "loss": 0.8312, "step": 4910 }, { "epoch": 0.33274893919760146, "grad_norm": 2.8236184120178223, "learning_rate": 0.00037721796276013144, "loss": 0.9677, "step": 4911 }, { "epoch": 0.3328166950394254, "grad_norm": 4.116222381591797, "learning_rate": 0.0003772124863088719, "loss": 0.9846, "step": 4912 }, { "epoch": 0.3328844508812494, "grad_norm": 4.211592197418213, "learning_rate": 0.0003772070098576123, "loss": 1.0503, "step": 4913 }, { "epoch": 0.3329522067230734, "grad_norm": 3.6325056552886963, "learning_rate": 0.0003772015334063527, "loss": 0.8354, "step": 4914 }, { "epoch": 0.3330199625648974, "grad_norm": 3.6922388076782227, "learning_rate": 0.0003771960569550931, "loss": 1.1047, "step": 4915 }, { "epoch": 0.33308771840672136, "grad_norm": 4.251829624176025, "learning_rate": 0.0003771905805038335, "loss": 0.7942, "step": 4916 }, { "epoch": 0.33315547424854536, "grad_norm": 4.402500152587891, "learning_rate": 0.00037718510405257394, "loss": 0.9073, "step": 4917 }, { "epoch": 0.33322323009036936, "grad_norm": 4.449902057647705, "learning_rate": 0.0003771796276013144, "loss": 0.9141, "step": 4918 }, { "epoch": 0.33329098593219336, "grad_norm": 3.1582283973693848, "learning_rate": 0.0003771741511500548, "loss": 0.8192, "step": 4919 }, { "epoch": 0.3333587417740173, "grad_norm": 3.154041290283203, "learning_rate": 0.0003771686746987952, "loss": 0.8389, "step": 4920 }, { "epoch": 0.3334264976158413, "grad_norm": 3.63498592376709, "learning_rate": 0.0003771631982475356, "loss": 0.8713, "step": 4921 }, { "epoch": 0.3334942534576653, "grad_norm": 2.5749244689941406, "learning_rate": 0.000377157721796276, "loss": 0.6686, "step": 4922 }, { "epoch": 0.3335620092994893, "grad_norm": 3.299356460571289, "learning_rate": 0.00037715224534501645, "loss": 0.7708, "step": 4923 }, { "epoch": 0.33362976514131326, "grad_norm": 3.183009147644043, "learning_rate": 0.0003771467688937569, "loss": 0.7719, "step": 4924 }, { "epoch": 0.33369752098313726, "grad_norm": 4.467050552368164, "learning_rate": 0.0003771412924424973, "loss": 0.8717, "step": 4925 }, { "epoch": 0.33376527682496127, "grad_norm": 3.4509928226470947, "learning_rate": 0.0003771358159912377, "loss": 0.951, "step": 4926 }, { "epoch": 0.3338330326667852, "grad_norm": 3.6228761672973633, "learning_rate": 0.0003771303395399781, "loss": 0.8198, "step": 4927 }, { "epoch": 0.3339007885086092, "grad_norm": 2.890279769897461, "learning_rate": 0.00037712486308871855, "loss": 0.8933, "step": 4928 }, { "epoch": 0.3339685443504332, "grad_norm": 3.4607372283935547, "learning_rate": 0.00037711938663745895, "loss": 0.7968, "step": 4929 }, { "epoch": 0.3340363001922572, "grad_norm": 3.0653247833251953, "learning_rate": 0.00037711391018619935, "loss": 0.7197, "step": 4930 }, { "epoch": 0.33410405603408116, "grad_norm": 4.08336877822876, "learning_rate": 0.00037710843373493975, "loss": 0.9248, "step": 4931 }, { "epoch": 0.33417181187590517, "grad_norm": 3.624298095703125, "learning_rate": 0.0003771029572836802, "loss": 0.9032, "step": 4932 }, { "epoch": 0.33423956771772917, "grad_norm": 2.925844192504883, "learning_rate": 0.0003770974808324206, "loss": 0.6638, "step": 4933 }, { "epoch": 0.33430732355955317, "grad_norm": 3.0293521881103516, "learning_rate": 0.00037709200438116106, "loss": 0.8064, "step": 4934 }, { "epoch": 0.3343750794013771, "grad_norm": 2.684248924255371, "learning_rate": 0.00037708652792990146, "loss": 0.6148, "step": 4935 }, { "epoch": 0.3344428352432011, "grad_norm": 5.37571382522583, "learning_rate": 0.00037708105147864186, "loss": 1.0006, "step": 4936 }, { "epoch": 0.3345105910850251, "grad_norm": 3.6323859691619873, "learning_rate": 0.00037707557502738225, "loss": 1.1376, "step": 4937 }, { "epoch": 0.3345783469268491, "grad_norm": 2.8613483905792236, "learning_rate": 0.00037707009857612265, "loss": 0.8172, "step": 4938 }, { "epoch": 0.33464610276867307, "grad_norm": 3.509146213531494, "learning_rate": 0.0003770646221248631, "loss": 1.0899, "step": 4939 }, { "epoch": 0.3347138586104971, "grad_norm": 3.6138076782226562, "learning_rate": 0.00037705914567360356, "loss": 0.6413, "step": 4940 }, { "epoch": 0.3347816144523211, "grad_norm": 2.874725580215454, "learning_rate": 0.00037705366922234396, "loss": 0.7442, "step": 4941 }, { "epoch": 0.334849370294145, "grad_norm": 3.0771851539611816, "learning_rate": 0.00037704819277108436, "loss": 0.9413, "step": 4942 }, { "epoch": 0.334917126135969, "grad_norm": 4.43841028213501, "learning_rate": 0.00037704271631982476, "loss": 0.907, "step": 4943 }, { "epoch": 0.334984881977793, "grad_norm": 4.14668607711792, "learning_rate": 0.00037703723986856516, "loss": 0.813, "step": 4944 }, { "epoch": 0.335052637819617, "grad_norm": 3.2889535427093506, "learning_rate": 0.0003770317634173056, "loss": 0.9143, "step": 4945 }, { "epoch": 0.335120393661441, "grad_norm": 2.6484122276306152, "learning_rate": 0.000377026286966046, "loss": 0.7522, "step": 4946 }, { "epoch": 0.335188149503265, "grad_norm": 3.2083327770233154, "learning_rate": 0.0003770208105147864, "loss": 0.8619, "step": 4947 }, { "epoch": 0.335255905345089, "grad_norm": 3.151399612426758, "learning_rate": 0.00037701533406352686, "loss": 0.8951, "step": 4948 }, { "epoch": 0.335323661186913, "grad_norm": 3.2234737873077393, "learning_rate": 0.00037700985761226726, "loss": 0.8039, "step": 4949 }, { "epoch": 0.3353914170287369, "grad_norm": 2.9242804050445557, "learning_rate": 0.0003770043811610077, "loss": 0.8602, "step": 4950 }, { "epoch": 0.33545917287056093, "grad_norm": 4.96194314956665, "learning_rate": 0.0003769989047097481, "loss": 1.0264, "step": 4951 }, { "epoch": 0.33552692871238493, "grad_norm": 3.9680233001708984, "learning_rate": 0.0003769934282584885, "loss": 0.9281, "step": 4952 }, { "epoch": 0.33559468455420893, "grad_norm": 2.5851659774780273, "learning_rate": 0.0003769879518072289, "loss": 0.7025, "step": 4953 }, { "epoch": 0.3356624403960329, "grad_norm": 3.1484315395355225, "learning_rate": 0.0003769824753559693, "loss": 0.8692, "step": 4954 }, { "epoch": 0.3357301962378569, "grad_norm": 2.814706563949585, "learning_rate": 0.00037697699890470977, "loss": 0.9375, "step": 4955 }, { "epoch": 0.3357979520796809, "grad_norm": 3.356046676635742, "learning_rate": 0.0003769715224534502, "loss": 0.7986, "step": 4956 }, { "epoch": 0.33586570792150483, "grad_norm": 3.425145387649536, "learning_rate": 0.0003769660460021906, "loss": 1.1241, "step": 4957 }, { "epoch": 0.33593346376332883, "grad_norm": 2.4477193355560303, "learning_rate": 0.000376960569550931, "loss": 0.7478, "step": 4958 }, { "epoch": 0.33600121960515283, "grad_norm": 3.4577741622924805, "learning_rate": 0.0003769550930996714, "loss": 0.9163, "step": 4959 }, { "epoch": 0.33606897544697684, "grad_norm": 3.628051996231079, "learning_rate": 0.0003769496166484118, "loss": 1.0893, "step": 4960 }, { "epoch": 0.3361367312888008, "grad_norm": 3.4093761444091797, "learning_rate": 0.00037694414019715227, "loss": 0.9982, "step": 4961 }, { "epoch": 0.3362044871306248, "grad_norm": 2.949155807495117, "learning_rate": 0.00037693866374589267, "loss": 0.8863, "step": 4962 }, { "epoch": 0.3362722429724488, "grad_norm": 2.8342103958129883, "learning_rate": 0.0003769331872946331, "loss": 0.7635, "step": 4963 }, { "epoch": 0.3363399988142728, "grad_norm": 2.78885555267334, "learning_rate": 0.0003769277108433735, "loss": 0.8022, "step": 4964 }, { "epoch": 0.33640775465609674, "grad_norm": 2.8747434616088867, "learning_rate": 0.0003769222343921139, "loss": 0.7537, "step": 4965 }, { "epoch": 0.33647551049792074, "grad_norm": 3.63836407661438, "learning_rate": 0.0003769167579408544, "loss": 1.0179, "step": 4966 }, { "epoch": 0.33654326633974474, "grad_norm": 3.3180136680603027, "learning_rate": 0.0003769112814895948, "loss": 0.8333, "step": 4967 }, { "epoch": 0.33661102218156874, "grad_norm": 2.7614989280700684, "learning_rate": 0.00037690580503833517, "loss": 0.8496, "step": 4968 }, { "epoch": 0.3366787780233927, "grad_norm": 3.7802276611328125, "learning_rate": 0.00037690032858707557, "loss": 0.8481, "step": 4969 }, { "epoch": 0.3367465338652167, "grad_norm": 2.9744691848754883, "learning_rate": 0.00037689485213581597, "loss": 0.7717, "step": 4970 }, { "epoch": 0.3368142897070407, "grad_norm": 3.8168113231658936, "learning_rate": 0.0003768893756845564, "loss": 0.9738, "step": 4971 }, { "epoch": 0.33688204554886464, "grad_norm": 3.8291420936584473, "learning_rate": 0.0003768838992332969, "loss": 1.026, "step": 4972 }, { "epoch": 0.33694980139068864, "grad_norm": 3.1292333602905273, "learning_rate": 0.0003768784227820373, "loss": 0.7624, "step": 4973 }, { "epoch": 0.33701755723251264, "grad_norm": 2.608389377593994, "learning_rate": 0.0003768729463307777, "loss": 0.6576, "step": 4974 }, { "epoch": 0.33708531307433665, "grad_norm": 3.7811622619628906, "learning_rate": 0.0003768674698795181, "loss": 1.0889, "step": 4975 }, { "epoch": 0.3371530689161606, "grad_norm": 2.5749168395996094, "learning_rate": 0.0003768619934282585, "loss": 0.7419, "step": 4976 }, { "epoch": 0.3372208247579846, "grad_norm": 2.9783565998077393, "learning_rate": 0.00037685651697699893, "loss": 0.8177, "step": 4977 }, { "epoch": 0.3372885805998086, "grad_norm": 3.3722851276397705, "learning_rate": 0.0003768510405257393, "loss": 0.8865, "step": 4978 }, { "epoch": 0.3373563364416326, "grad_norm": 4.233557224273682, "learning_rate": 0.0003768455640744798, "loss": 1.2387, "step": 4979 }, { "epoch": 0.33742409228345654, "grad_norm": 3.0492982864379883, "learning_rate": 0.0003768400876232202, "loss": 0.6887, "step": 4980 }, { "epoch": 0.33749184812528055, "grad_norm": 3.1897687911987305, "learning_rate": 0.0003768346111719606, "loss": 0.6915, "step": 4981 }, { "epoch": 0.33755960396710455, "grad_norm": 4.039340496063232, "learning_rate": 0.000376829134720701, "loss": 0.9939, "step": 4982 }, { "epoch": 0.33762735980892855, "grad_norm": 4.101314067840576, "learning_rate": 0.00037682365826944143, "loss": 1.1014, "step": 4983 }, { "epoch": 0.3376951156507525, "grad_norm": 2.774752378463745, "learning_rate": 0.00037681818181818183, "loss": 0.8703, "step": 4984 }, { "epoch": 0.3377628714925765, "grad_norm": 3.36700177192688, "learning_rate": 0.00037681270536692223, "loss": 1.0307, "step": 4985 }, { "epoch": 0.3378306273344005, "grad_norm": 3.208885908126831, "learning_rate": 0.00037680722891566263, "loss": 0.6332, "step": 4986 }, { "epoch": 0.33789838317622445, "grad_norm": 3.533313274383545, "learning_rate": 0.0003768017524644031, "loss": 0.9042, "step": 4987 }, { "epoch": 0.33796613901804845, "grad_norm": 3.0991311073303223, "learning_rate": 0.00037679627601314354, "loss": 0.7472, "step": 4988 }, { "epoch": 0.33803389485987245, "grad_norm": 3.3074610233306885, "learning_rate": 0.00037679079956188394, "loss": 0.8751, "step": 4989 }, { "epoch": 0.33810165070169645, "grad_norm": 3.2287940979003906, "learning_rate": 0.00037678532311062433, "loss": 0.9195, "step": 4990 }, { "epoch": 0.3381694065435204, "grad_norm": 2.6190855503082275, "learning_rate": 0.00037677984665936473, "loss": 0.6556, "step": 4991 }, { "epoch": 0.3382371623853444, "grad_norm": 2.6581599712371826, "learning_rate": 0.00037677437020810513, "loss": 0.8328, "step": 4992 }, { "epoch": 0.3383049182271684, "grad_norm": 3.815898895263672, "learning_rate": 0.0003767688937568456, "loss": 1.0558, "step": 4993 }, { "epoch": 0.3383726740689924, "grad_norm": 4.121740341186523, "learning_rate": 0.00037676341730558604, "loss": 1.0761, "step": 4994 }, { "epoch": 0.33844042991081635, "grad_norm": 3.88034725189209, "learning_rate": 0.00037675794085432644, "loss": 1.0724, "step": 4995 }, { "epoch": 0.33850818575264036, "grad_norm": 3.204780340194702, "learning_rate": 0.00037675246440306684, "loss": 0.8049, "step": 4996 }, { "epoch": 0.33857594159446436, "grad_norm": 3.7630844116210938, "learning_rate": 0.00037674698795180724, "loss": 0.8334, "step": 4997 }, { "epoch": 0.33864369743628836, "grad_norm": 2.9404919147491455, "learning_rate": 0.00037674151150054764, "loss": 0.7854, "step": 4998 }, { "epoch": 0.3387114532781123, "grad_norm": 3.200589179992676, "learning_rate": 0.0003767360350492881, "loss": 0.8408, "step": 4999 }, { "epoch": 0.3387792091199363, "grad_norm": 3.2373056411743164, "learning_rate": 0.0003767305585980285, "loss": 0.8869, "step": 5000 }, { "epoch": 0.3388469649617603, "grad_norm": 2.5882556438446045, "learning_rate": 0.0003767250821467689, "loss": 0.7226, "step": 5001 }, { "epoch": 0.33891472080358426, "grad_norm": 2.9489247798919678, "learning_rate": 0.00037671960569550934, "loss": 0.6913, "step": 5002 }, { "epoch": 0.33898247664540826, "grad_norm": 4.458138465881348, "learning_rate": 0.00037671412924424974, "loss": 1.0558, "step": 5003 }, { "epoch": 0.33905023248723226, "grad_norm": 3.787205934524536, "learning_rate": 0.0003767086527929902, "loss": 0.7591, "step": 5004 }, { "epoch": 0.33911798832905626, "grad_norm": 2.838386297225952, "learning_rate": 0.0003767031763417306, "loss": 0.7466, "step": 5005 }, { "epoch": 0.3391857441708802, "grad_norm": 3.3596978187561035, "learning_rate": 0.000376697699890471, "loss": 0.9245, "step": 5006 }, { "epoch": 0.3392535000127042, "grad_norm": 4.177923679351807, "learning_rate": 0.0003766922234392114, "loss": 1.128, "step": 5007 }, { "epoch": 0.3393212558545282, "grad_norm": 4.2101664543151855, "learning_rate": 0.0003766867469879518, "loss": 1.0013, "step": 5008 }, { "epoch": 0.3393890116963522, "grad_norm": 3.90531587600708, "learning_rate": 0.00037668127053669224, "loss": 0.9946, "step": 5009 }, { "epoch": 0.33945676753817616, "grad_norm": 3.150461196899414, "learning_rate": 0.0003766757940854327, "loss": 0.8549, "step": 5010 }, { "epoch": 0.33952452338000016, "grad_norm": 2.855290412902832, "learning_rate": 0.0003766703176341731, "loss": 0.8064, "step": 5011 }, { "epoch": 0.33959227922182417, "grad_norm": 3.081005334854126, "learning_rate": 0.0003766648411829135, "loss": 0.8724, "step": 5012 }, { "epoch": 0.33966003506364817, "grad_norm": 3.574631452560425, "learning_rate": 0.0003766593647316539, "loss": 1.0038, "step": 5013 }, { "epoch": 0.3397277909054721, "grad_norm": 3.3850607872009277, "learning_rate": 0.0003766538882803943, "loss": 0.8771, "step": 5014 }, { "epoch": 0.3397955467472961, "grad_norm": 3.223717212677002, "learning_rate": 0.00037664841182913475, "loss": 0.8208, "step": 5015 }, { "epoch": 0.3398633025891201, "grad_norm": 3.189300775527954, "learning_rate": 0.00037664293537787515, "loss": 0.9421, "step": 5016 }, { "epoch": 0.33993105843094407, "grad_norm": 6.308364391326904, "learning_rate": 0.00037663745892661555, "loss": 0.8566, "step": 5017 }, { "epoch": 0.33999881427276807, "grad_norm": 2.880709648132324, "learning_rate": 0.000376631982475356, "loss": 0.8291, "step": 5018 }, { "epoch": 0.34006657011459207, "grad_norm": 3.6492741107940674, "learning_rate": 0.0003766265060240964, "loss": 0.8648, "step": 5019 }, { "epoch": 0.3401343259564161, "grad_norm": 3.625805377960205, "learning_rate": 0.0003766210295728368, "loss": 0.9856, "step": 5020 }, { "epoch": 0.34020208179824, "grad_norm": 3.3752975463867188, "learning_rate": 0.00037661555312157725, "loss": 1.0701, "step": 5021 }, { "epoch": 0.340269837640064, "grad_norm": 3.0131261348724365, "learning_rate": 0.00037661007667031765, "loss": 0.9259, "step": 5022 }, { "epoch": 0.340337593481888, "grad_norm": 4.169709205627441, "learning_rate": 0.00037660460021905805, "loss": 0.6726, "step": 5023 }, { "epoch": 0.340405349323712, "grad_norm": 2.847313165664673, "learning_rate": 0.00037659912376779845, "loss": 0.7648, "step": 5024 }, { "epoch": 0.34047310516553597, "grad_norm": 3.186001777648926, "learning_rate": 0.0003765936473165389, "loss": 1.0373, "step": 5025 }, { "epoch": 0.34054086100736, "grad_norm": 3.0262069702148438, "learning_rate": 0.00037658817086527936, "loss": 0.7806, "step": 5026 }, { "epoch": 0.340608616849184, "grad_norm": 3.0305471420288086, "learning_rate": 0.00037658269441401976, "loss": 0.9192, "step": 5027 }, { "epoch": 0.340676372691008, "grad_norm": 2.9327173233032227, "learning_rate": 0.00037657721796276016, "loss": 0.8636, "step": 5028 }, { "epoch": 0.3407441285328319, "grad_norm": 3.325068473815918, "learning_rate": 0.00037657174151150055, "loss": 0.8409, "step": 5029 }, { "epoch": 0.3408118843746559, "grad_norm": 2.486204147338867, "learning_rate": 0.00037656626506024095, "loss": 0.6565, "step": 5030 }, { "epoch": 0.34087964021647993, "grad_norm": 5.3322367668151855, "learning_rate": 0.0003765607886089814, "loss": 0.9732, "step": 5031 }, { "epoch": 0.3409473960583039, "grad_norm": 4.602205276489258, "learning_rate": 0.0003765553121577218, "loss": 0.783, "step": 5032 }, { "epoch": 0.3410151519001279, "grad_norm": 3.2608301639556885, "learning_rate": 0.00037654983570646226, "loss": 0.9534, "step": 5033 }, { "epoch": 0.3410829077419519, "grad_norm": 3.7370903491973877, "learning_rate": 0.00037654435925520266, "loss": 0.9895, "step": 5034 }, { "epoch": 0.3411506635837759, "grad_norm": 3.0668070316314697, "learning_rate": 0.00037653888280394306, "loss": 0.9221, "step": 5035 }, { "epoch": 0.34121841942559983, "grad_norm": 3.7982609272003174, "learning_rate": 0.00037653340635268346, "loss": 0.9238, "step": 5036 }, { "epoch": 0.34128617526742383, "grad_norm": 3.4727632999420166, "learning_rate": 0.0003765279299014239, "loss": 1.1049, "step": 5037 }, { "epoch": 0.34135393110924783, "grad_norm": 3.7480740547180176, "learning_rate": 0.0003765224534501643, "loss": 0.9414, "step": 5038 }, { "epoch": 0.34142168695107183, "grad_norm": 3.2118139266967773, "learning_rate": 0.0003765169769989047, "loss": 0.8155, "step": 5039 }, { "epoch": 0.3414894427928958, "grad_norm": 3.3508617877960205, "learning_rate": 0.0003765115005476451, "loss": 0.7883, "step": 5040 }, { "epoch": 0.3415571986347198, "grad_norm": 2.5680081844329834, "learning_rate": 0.00037650602409638556, "loss": 0.7672, "step": 5041 }, { "epoch": 0.3416249544765438, "grad_norm": 3.2609715461730957, "learning_rate": 0.000376500547645126, "loss": 0.8573, "step": 5042 }, { "epoch": 0.3416927103183678, "grad_norm": 3.0949490070343018, "learning_rate": 0.0003764950711938664, "loss": 0.9086, "step": 5043 }, { "epoch": 0.34176046616019173, "grad_norm": 2.8835599422454834, "learning_rate": 0.0003764895947426068, "loss": 0.7345, "step": 5044 }, { "epoch": 0.34182822200201574, "grad_norm": 4.4174604415893555, "learning_rate": 0.0003764841182913472, "loss": 1.1569, "step": 5045 }, { "epoch": 0.34189597784383974, "grad_norm": 2.763707160949707, "learning_rate": 0.0003764786418400876, "loss": 0.7759, "step": 5046 }, { "epoch": 0.3419637336856637, "grad_norm": 3.8473896980285645, "learning_rate": 0.00037647316538882807, "loss": 0.8258, "step": 5047 }, { "epoch": 0.3420314895274877, "grad_norm": 4.691788673400879, "learning_rate": 0.00037646768893756846, "loss": 1.198, "step": 5048 }, { "epoch": 0.3420992453693117, "grad_norm": 3.062039852142334, "learning_rate": 0.0003764622124863089, "loss": 0.8657, "step": 5049 }, { "epoch": 0.3421670012111357, "grad_norm": 3.384232759475708, "learning_rate": 0.0003764567360350493, "loss": 0.9562, "step": 5050 }, { "epoch": 0.34223475705295964, "grad_norm": 4.520860195159912, "learning_rate": 0.0003764512595837897, "loss": 1.0147, "step": 5051 }, { "epoch": 0.34230251289478364, "grad_norm": 3.494816780090332, "learning_rate": 0.0003764457831325301, "loss": 1.0771, "step": 5052 }, { "epoch": 0.34237026873660764, "grad_norm": 3.938711166381836, "learning_rate": 0.00037644030668127057, "loss": 0.903, "step": 5053 }, { "epoch": 0.34243802457843164, "grad_norm": 2.745666980743408, "learning_rate": 0.00037643483023001097, "loss": 0.822, "step": 5054 }, { "epoch": 0.3425057804202556, "grad_norm": 2.9111196994781494, "learning_rate": 0.00037642935377875137, "loss": 0.8944, "step": 5055 }, { "epoch": 0.3425735362620796, "grad_norm": 2.57936429977417, "learning_rate": 0.0003764238773274918, "loss": 0.6658, "step": 5056 }, { "epoch": 0.3426412921039036, "grad_norm": 3.146329641342163, "learning_rate": 0.0003764184008762322, "loss": 0.8714, "step": 5057 }, { "epoch": 0.3427090479457276, "grad_norm": 3.2245113849639893, "learning_rate": 0.0003764129244249726, "loss": 1.0093, "step": 5058 }, { "epoch": 0.34277680378755154, "grad_norm": 3.3798344135284424, "learning_rate": 0.00037640744797371307, "loss": 0.837, "step": 5059 }, { "epoch": 0.34284455962937554, "grad_norm": 5.309645652770996, "learning_rate": 0.00037640197152245347, "loss": 0.8361, "step": 5060 }, { "epoch": 0.34291231547119955, "grad_norm": 2.9010486602783203, "learning_rate": 0.00037639649507119387, "loss": 0.7548, "step": 5061 }, { "epoch": 0.3429800713130235, "grad_norm": 3.2986178398132324, "learning_rate": 0.00037639101861993427, "loss": 0.9129, "step": 5062 }, { "epoch": 0.3430478271548475, "grad_norm": 3.1198127269744873, "learning_rate": 0.0003763855421686747, "loss": 0.9482, "step": 5063 }, { "epoch": 0.3431155829966715, "grad_norm": 3.5641989707946777, "learning_rate": 0.0003763800657174152, "loss": 0.9903, "step": 5064 }, { "epoch": 0.3431833388384955, "grad_norm": 3.5956993103027344, "learning_rate": 0.0003763745892661556, "loss": 0.9612, "step": 5065 }, { "epoch": 0.34325109468031945, "grad_norm": 3.8420283794403076, "learning_rate": 0.000376369112814896, "loss": 0.9465, "step": 5066 }, { "epoch": 0.34331885052214345, "grad_norm": 3.9463605880737305, "learning_rate": 0.0003763636363636364, "loss": 1.0525, "step": 5067 }, { "epoch": 0.34338660636396745, "grad_norm": 2.7849409580230713, "learning_rate": 0.0003763581599123768, "loss": 0.7764, "step": 5068 }, { "epoch": 0.34345436220579145, "grad_norm": 4.1936798095703125, "learning_rate": 0.00037635268346111723, "loss": 1.1547, "step": 5069 }, { "epoch": 0.3435221180476154, "grad_norm": 3.146047592163086, "learning_rate": 0.0003763472070098576, "loss": 0.8928, "step": 5070 }, { "epoch": 0.3435898738894394, "grad_norm": 3.3086209297180176, "learning_rate": 0.000376341730558598, "loss": 0.9185, "step": 5071 }, { "epoch": 0.3436576297312634, "grad_norm": 2.8878378868103027, "learning_rate": 0.0003763362541073385, "loss": 0.6462, "step": 5072 }, { "epoch": 0.3437253855730874, "grad_norm": 3.6715173721313477, "learning_rate": 0.0003763307776560789, "loss": 1.1424, "step": 5073 }, { "epoch": 0.34379314141491135, "grad_norm": 3.3849387168884277, "learning_rate": 0.0003763253012048193, "loss": 0.6771, "step": 5074 }, { "epoch": 0.34386089725673535, "grad_norm": 3.3846840858459473, "learning_rate": 0.00037631982475355973, "loss": 0.8954, "step": 5075 }, { "epoch": 0.34392865309855936, "grad_norm": 3.469606876373291, "learning_rate": 0.00037631434830230013, "loss": 0.9519, "step": 5076 }, { "epoch": 0.3439964089403833, "grad_norm": 3.9002342224121094, "learning_rate": 0.00037630887185104053, "loss": 1.0706, "step": 5077 }, { "epoch": 0.3440641647822073, "grad_norm": 3.0695137977600098, "learning_rate": 0.00037630339539978093, "loss": 0.7999, "step": 5078 }, { "epoch": 0.3441319206240313, "grad_norm": 3.2045865058898926, "learning_rate": 0.00037629791894852133, "loss": 0.9878, "step": 5079 }, { "epoch": 0.3441996764658553, "grad_norm": 3.6769115924835205, "learning_rate": 0.00037629244249726184, "loss": 0.8011, "step": 5080 }, { "epoch": 0.34426743230767926, "grad_norm": 5.460148334503174, "learning_rate": 0.00037628696604600224, "loss": 0.9583, "step": 5081 }, { "epoch": 0.34433518814950326, "grad_norm": 3.3912148475646973, "learning_rate": 0.00037628148959474263, "loss": 0.7733, "step": 5082 }, { "epoch": 0.34440294399132726, "grad_norm": 4.164949417114258, "learning_rate": 0.00037627601314348303, "loss": 1.0038, "step": 5083 }, { "epoch": 0.34447069983315126, "grad_norm": 4.240345478057861, "learning_rate": 0.00037627053669222343, "loss": 1.1104, "step": 5084 }, { "epoch": 0.3445384556749752, "grad_norm": 3.6639785766601562, "learning_rate": 0.0003762650602409639, "loss": 0.9839, "step": 5085 }, { "epoch": 0.3446062115167992, "grad_norm": 2.934575080871582, "learning_rate": 0.0003762595837897043, "loss": 0.7992, "step": 5086 }, { "epoch": 0.3446739673586232, "grad_norm": 5.06642484664917, "learning_rate": 0.00037625410733844474, "loss": 0.7651, "step": 5087 }, { "epoch": 0.3447417232004472, "grad_norm": 2.8361568450927734, "learning_rate": 0.00037624863088718514, "loss": 0.8034, "step": 5088 }, { "epoch": 0.34480947904227116, "grad_norm": 2.4699809551239014, "learning_rate": 0.00037624315443592554, "loss": 0.7061, "step": 5089 }, { "epoch": 0.34487723488409516, "grad_norm": 3.6372711658477783, "learning_rate": 0.00037623767798466594, "loss": 1.0681, "step": 5090 }, { "epoch": 0.34494499072591916, "grad_norm": 3.2409114837646484, "learning_rate": 0.0003762322015334064, "loss": 0.7764, "step": 5091 }, { "epoch": 0.3450127465677431, "grad_norm": 3.9119350910186768, "learning_rate": 0.0003762267250821468, "loss": 1.0222, "step": 5092 }, { "epoch": 0.3450805024095671, "grad_norm": 3.032582998275757, "learning_rate": 0.0003762212486308872, "loss": 0.7903, "step": 5093 }, { "epoch": 0.3451482582513911, "grad_norm": 3.6312668323516846, "learning_rate": 0.0003762157721796276, "loss": 1.0294, "step": 5094 }, { "epoch": 0.3452160140932151, "grad_norm": 3.0966081619262695, "learning_rate": 0.00037621029572836804, "loss": 0.8433, "step": 5095 }, { "epoch": 0.34528376993503906, "grad_norm": 3.5048069953918457, "learning_rate": 0.00037620481927710844, "loss": 0.994, "step": 5096 }, { "epoch": 0.34535152577686307, "grad_norm": 4.069911003112793, "learning_rate": 0.0003761993428258489, "loss": 1.0562, "step": 5097 }, { "epoch": 0.34541928161868707, "grad_norm": 4.061741352081299, "learning_rate": 0.0003761938663745893, "loss": 1.1311, "step": 5098 }, { "epoch": 0.34548703746051107, "grad_norm": 3.220573663711548, "learning_rate": 0.0003761883899233297, "loss": 0.8681, "step": 5099 }, { "epoch": 0.345554793302335, "grad_norm": 3.1633524894714355, "learning_rate": 0.0003761829134720701, "loss": 0.8209, "step": 5100 }, { "epoch": 0.345622549144159, "grad_norm": 2.7386488914489746, "learning_rate": 0.00037617743702081054, "loss": 0.7962, "step": 5101 }, { "epoch": 0.345690304985983, "grad_norm": 4.328852653503418, "learning_rate": 0.00037617196056955094, "loss": 1.0804, "step": 5102 }, { "epoch": 0.345758060827807, "grad_norm": 3.8472342491149902, "learning_rate": 0.0003761664841182914, "loss": 0.9875, "step": 5103 }, { "epoch": 0.34582581666963097, "grad_norm": 3.265984296798706, "learning_rate": 0.0003761610076670318, "loss": 0.8384, "step": 5104 }, { "epoch": 0.34589357251145497, "grad_norm": 3.3709774017333984, "learning_rate": 0.0003761555312157722, "loss": 0.844, "step": 5105 }, { "epoch": 0.345961328353279, "grad_norm": 2.9741339683532715, "learning_rate": 0.0003761500547645126, "loss": 0.8416, "step": 5106 }, { "epoch": 0.3460290841951029, "grad_norm": 3.308405637741089, "learning_rate": 0.00037614457831325305, "loss": 0.8041, "step": 5107 }, { "epoch": 0.3460968400369269, "grad_norm": 4.0263752937316895, "learning_rate": 0.00037613910186199345, "loss": 0.9297, "step": 5108 }, { "epoch": 0.3461645958787509, "grad_norm": 3.775108575820923, "learning_rate": 0.00037613362541073385, "loss": 0.868, "step": 5109 }, { "epoch": 0.3462323517205749, "grad_norm": 3.1609115600585938, "learning_rate": 0.00037612814895947425, "loss": 0.9027, "step": 5110 }, { "epoch": 0.3463001075623989, "grad_norm": 3.612380027770996, "learning_rate": 0.0003761226725082147, "loss": 0.8938, "step": 5111 }, { "epoch": 0.3463678634042229, "grad_norm": 2.790661334991455, "learning_rate": 0.0003761171960569551, "loss": 0.8044, "step": 5112 }, { "epoch": 0.3464356192460469, "grad_norm": 2.885685443878174, "learning_rate": 0.00037611171960569555, "loss": 0.6516, "step": 5113 }, { "epoch": 0.3465033750878709, "grad_norm": 3.388699531555176, "learning_rate": 0.00037610624315443595, "loss": 0.8779, "step": 5114 }, { "epoch": 0.3465711309296948, "grad_norm": 2.7017433643341064, "learning_rate": 0.00037610076670317635, "loss": 0.629, "step": 5115 }, { "epoch": 0.34663888677151883, "grad_norm": 4.662996292114258, "learning_rate": 0.00037609529025191675, "loss": 1.0558, "step": 5116 }, { "epoch": 0.34670664261334283, "grad_norm": 2.6289291381835938, "learning_rate": 0.00037608981380065715, "loss": 0.6272, "step": 5117 }, { "epoch": 0.34677439845516683, "grad_norm": 3.6141810417175293, "learning_rate": 0.0003760843373493976, "loss": 0.8619, "step": 5118 }, { "epoch": 0.3468421542969908, "grad_norm": 5.7031755447387695, "learning_rate": 0.00037607886089813806, "loss": 1.0026, "step": 5119 }, { "epoch": 0.3469099101388148, "grad_norm": 5.12413215637207, "learning_rate": 0.00037607338444687845, "loss": 0.8573, "step": 5120 }, { "epoch": 0.3469776659806388, "grad_norm": 4.246881008148193, "learning_rate": 0.00037606790799561885, "loss": 1.0809, "step": 5121 }, { "epoch": 0.34704542182246273, "grad_norm": 3.905601739883423, "learning_rate": 0.00037606243154435925, "loss": 1.423, "step": 5122 }, { "epoch": 0.34711317766428673, "grad_norm": 4.553896903991699, "learning_rate": 0.0003760569550930997, "loss": 1.2415, "step": 5123 }, { "epoch": 0.34718093350611073, "grad_norm": 5.42608118057251, "learning_rate": 0.0003760514786418401, "loss": 0.8792, "step": 5124 }, { "epoch": 0.34724868934793474, "grad_norm": 2.718259811401367, "learning_rate": 0.0003760460021905805, "loss": 0.7587, "step": 5125 }, { "epoch": 0.3473164451897587, "grad_norm": 3.4320085048675537, "learning_rate": 0.00037604052573932096, "loss": 0.929, "step": 5126 }, { "epoch": 0.3473842010315827, "grad_norm": 3.6428029537200928, "learning_rate": 0.00037603504928806136, "loss": 1.0656, "step": 5127 }, { "epoch": 0.3474519568734067, "grad_norm": 2.5888748168945312, "learning_rate": 0.00037602957283680176, "loss": 0.5771, "step": 5128 }, { "epoch": 0.3475197127152307, "grad_norm": 2.826669216156006, "learning_rate": 0.0003760240963855422, "loss": 0.5769, "step": 5129 }, { "epoch": 0.34758746855705464, "grad_norm": 3.8632564544677734, "learning_rate": 0.0003760186199342826, "loss": 1.2827, "step": 5130 }, { "epoch": 0.34765522439887864, "grad_norm": 2.5670359134674072, "learning_rate": 0.000376013143483023, "loss": 0.8316, "step": 5131 }, { "epoch": 0.34772298024070264, "grad_norm": 3.326375961303711, "learning_rate": 0.0003760076670317634, "loss": 0.887, "step": 5132 }, { "epoch": 0.34779073608252664, "grad_norm": 2.8373517990112305, "learning_rate": 0.0003760021905805038, "loss": 0.8219, "step": 5133 }, { "epoch": 0.3478584919243506, "grad_norm": 3.8381571769714355, "learning_rate": 0.00037599671412924426, "loss": 1.0155, "step": 5134 }, { "epoch": 0.3479262477661746, "grad_norm": 3.233539342880249, "learning_rate": 0.0003759912376779847, "loss": 0.8988, "step": 5135 }, { "epoch": 0.3479940036079986, "grad_norm": 3.6156184673309326, "learning_rate": 0.0003759857612267251, "loss": 0.8384, "step": 5136 }, { "epoch": 0.34806175944982254, "grad_norm": 2.5259053707122803, "learning_rate": 0.0003759802847754655, "loss": 0.6199, "step": 5137 }, { "epoch": 0.34812951529164654, "grad_norm": 3.3385629653930664, "learning_rate": 0.0003759748083242059, "loss": 0.9242, "step": 5138 }, { "epoch": 0.34819727113347054, "grad_norm": 3.782566547393799, "learning_rate": 0.00037596933187294637, "loss": 1.0061, "step": 5139 }, { "epoch": 0.34826502697529454, "grad_norm": 3.0341527462005615, "learning_rate": 0.00037596385542168676, "loss": 0.847, "step": 5140 }, { "epoch": 0.3483327828171185, "grad_norm": 3.684995174407959, "learning_rate": 0.00037595837897042716, "loss": 1.1244, "step": 5141 }, { "epoch": 0.3484005386589425, "grad_norm": 3.1822807788848877, "learning_rate": 0.0003759529025191676, "loss": 0.7823, "step": 5142 }, { "epoch": 0.3484682945007665, "grad_norm": 3.6888821125030518, "learning_rate": 0.000375947426067908, "loss": 0.9563, "step": 5143 }, { "epoch": 0.3485360503425905, "grad_norm": 3.3095715045928955, "learning_rate": 0.0003759419496166484, "loss": 0.8292, "step": 5144 }, { "epoch": 0.34860380618441444, "grad_norm": 2.9738340377807617, "learning_rate": 0.00037593647316538887, "loss": 0.9071, "step": 5145 }, { "epoch": 0.34867156202623845, "grad_norm": 4.192621231079102, "learning_rate": 0.00037593099671412927, "loss": 0.8509, "step": 5146 }, { "epoch": 0.34873931786806245, "grad_norm": 2.9742395877838135, "learning_rate": 0.00037592552026286967, "loss": 0.815, "step": 5147 }, { "epoch": 0.34880707370988645, "grad_norm": 4.2247700691223145, "learning_rate": 0.00037592004381161007, "loss": 0.9858, "step": 5148 }, { "epoch": 0.3488748295517104, "grad_norm": 2.9334652423858643, "learning_rate": 0.00037591456736035047, "loss": 0.8041, "step": 5149 }, { "epoch": 0.3489425853935344, "grad_norm": 3.121922016143799, "learning_rate": 0.0003759090909090909, "loss": 0.8933, "step": 5150 }, { "epoch": 0.3490103412353584, "grad_norm": 2.7427825927734375, "learning_rate": 0.00037590361445783137, "loss": 0.6599, "step": 5151 }, { "epoch": 0.34907809707718235, "grad_norm": 3.913397789001465, "learning_rate": 0.00037589813800657177, "loss": 0.9081, "step": 5152 }, { "epoch": 0.34914585291900635, "grad_norm": 2.923035144805908, "learning_rate": 0.00037589266155531217, "loss": 0.883, "step": 5153 }, { "epoch": 0.34921360876083035, "grad_norm": 3.299661159515381, "learning_rate": 0.00037588718510405257, "loss": 0.8497, "step": 5154 }, { "epoch": 0.34928136460265435, "grad_norm": 3.561575174331665, "learning_rate": 0.00037588170865279297, "loss": 1.0524, "step": 5155 }, { "epoch": 0.3493491204444783, "grad_norm": 3.6553986072540283, "learning_rate": 0.0003758762322015334, "loss": 0.8221, "step": 5156 }, { "epoch": 0.3494168762863023, "grad_norm": 4.715927600860596, "learning_rate": 0.0003758707557502739, "loss": 1.201, "step": 5157 }, { "epoch": 0.3494846321281263, "grad_norm": 4.384438514709473, "learning_rate": 0.0003758652792990143, "loss": 0.9812, "step": 5158 }, { "epoch": 0.3495523879699503, "grad_norm": 2.7696688175201416, "learning_rate": 0.0003758598028477547, "loss": 0.7631, "step": 5159 }, { "epoch": 0.34962014381177425, "grad_norm": 4.916648864746094, "learning_rate": 0.0003758543263964951, "loss": 1.1322, "step": 5160 }, { "epoch": 0.34968789965359826, "grad_norm": 2.287231206893921, "learning_rate": 0.00037584884994523553, "loss": 0.6131, "step": 5161 }, { "epoch": 0.34975565549542226, "grad_norm": 3.435894012451172, "learning_rate": 0.0003758433734939759, "loss": 1.0281, "step": 5162 }, { "epoch": 0.34982341133724626, "grad_norm": 3.737528085708618, "learning_rate": 0.0003758378970427163, "loss": 0.9806, "step": 5163 }, { "epoch": 0.3498911671790702, "grad_norm": 3.0270915031433105, "learning_rate": 0.0003758324205914567, "loss": 0.975, "step": 5164 }, { "epoch": 0.3499589230208942, "grad_norm": 4.166554927825928, "learning_rate": 0.0003758269441401972, "loss": 0.972, "step": 5165 }, { "epoch": 0.3500266788627182, "grad_norm": 4.215291500091553, "learning_rate": 0.0003758214676889376, "loss": 0.911, "step": 5166 }, { "epoch": 0.35009443470454216, "grad_norm": 3.080124855041504, "learning_rate": 0.00037581599123767803, "loss": 0.8316, "step": 5167 }, { "epoch": 0.35016219054636616, "grad_norm": 2.670156478881836, "learning_rate": 0.00037581051478641843, "loss": 0.8159, "step": 5168 }, { "epoch": 0.35022994638819016, "grad_norm": 3.5733144283294678, "learning_rate": 0.00037580503833515883, "loss": 0.946, "step": 5169 }, { "epoch": 0.35029770223001416, "grad_norm": 4.061295509338379, "learning_rate": 0.00037579956188389923, "loss": 0.9805, "step": 5170 }, { "epoch": 0.3503654580718381, "grad_norm": 3.364845037460327, "learning_rate": 0.00037579408543263963, "loss": 0.762, "step": 5171 }, { "epoch": 0.3504332139136621, "grad_norm": 2.8903603553771973, "learning_rate": 0.0003757886089813801, "loss": 0.8226, "step": 5172 }, { "epoch": 0.3505009697554861, "grad_norm": 3.448245048522949, "learning_rate": 0.00037578313253012053, "loss": 1.0855, "step": 5173 }, { "epoch": 0.3505687255973101, "grad_norm": 3.483637809753418, "learning_rate": 0.00037577765607886093, "loss": 0.8091, "step": 5174 }, { "epoch": 0.35063648143913406, "grad_norm": 3.2031972408294678, "learning_rate": 0.00037577217962760133, "loss": 0.9688, "step": 5175 }, { "epoch": 0.35070423728095806, "grad_norm": 3.105860471725464, "learning_rate": 0.00037576670317634173, "loss": 1.0037, "step": 5176 }, { "epoch": 0.35077199312278207, "grad_norm": 4.112181186676025, "learning_rate": 0.0003757612267250822, "loss": 1.0412, "step": 5177 }, { "epoch": 0.35083974896460607, "grad_norm": 3.7044577598571777, "learning_rate": 0.0003757557502738226, "loss": 0.9592, "step": 5178 }, { "epoch": 0.35090750480643, "grad_norm": 3.226240396499634, "learning_rate": 0.000375750273822563, "loss": 0.9395, "step": 5179 }, { "epoch": 0.350975260648254, "grad_norm": 2.76747727394104, "learning_rate": 0.0003757447973713034, "loss": 0.8394, "step": 5180 }, { "epoch": 0.351043016490078, "grad_norm": 3.0998144149780273, "learning_rate": 0.00037573932092004384, "loss": 0.7586, "step": 5181 }, { "epoch": 0.35111077233190197, "grad_norm": 3.528352975845337, "learning_rate": 0.00037573384446878424, "loss": 1.2193, "step": 5182 }, { "epoch": 0.35117852817372597, "grad_norm": 3.1877050399780273, "learning_rate": 0.0003757283680175247, "loss": 0.7128, "step": 5183 }, { "epoch": 0.35124628401554997, "grad_norm": 4.352890491485596, "learning_rate": 0.0003757228915662651, "loss": 1.1263, "step": 5184 }, { "epoch": 0.35131403985737397, "grad_norm": 3.9518513679504395, "learning_rate": 0.0003757174151150055, "loss": 0.6712, "step": 5185 }, { "epoch": 0.3513817956991979, "grad_norm": 2.732499122619629, "learning_rate": 0.0003757119386637459, "loss": 0.733, "step": 5186 }, { "epoch": 0.3514495515410219, "grad_norm": 5.209923267364502, "learning_rate": 0.0003757064622124863, "loss": 1.053, "step": 5187 }, { "epoch": 0.3515173073828459, "grad_norm": 3.0385587215423584, "learning_rate": 0.00037570098576122674, "loss": 0.8511, "step": 5188 }, { "epoch": 0.3515850632246699, "grad_norm": 3.1799428462982178, "learning_rate": 0.0003756955093099672, "loss": 0.9203, "step": 5189 }, { "epoch": 0.35165281906649387, "grad_norm": 3.7628538608551025, "learning_rate": 0.0003756900328587076, "loss": 0.6712, "step": 5190 }, { "epoch": 0.3517205749083179, "grad_norm": 2.935734748840332, "learning_rate": 0.000375684556407448, "loss": 0.7065, "step": 5191 }, { "epoch": 0.3517883307501419, "grad_norm": 2.526853084564209, "learning_rate": 0.0003756790799561884, "loss": 0.7265, "step": 5192 }, { "epoch": 0.3518560865919659, "grad_norm": 3.2546043395996094, "learning_rate": 0.0003756736035049288, "loss": 1.0058, "step": 5193 }, { "epoch": 0.3519238424337898, "grad_norm": 2.640350818634033, "learning_rate": 0.00037566812705366924, "loss": 0.6127, "step": 5194 }, { "epoch": 0.3519915982756138, "grad_norm": 4.069823741912842, "learning_rate": 0.00037566265060240964, "loss": 1.0572, "step": 5195 }, { "epoch": 0.35205935411743783, "grad_norm": 2.839674711227417, "learning_rate": 0.0003756571741511501, "loss": 0.7615, "step": 5196 }, { "epoch": 0.3521271099592618, "grad_norm": 5.329535961151123, "learning_rate": 0.0003756516976998905, "loss": 1.2517, "step": 5197 }, { "epoch": 0.3521948658010858, "grad_norm": 4.562623500823975, "learning_rate": 0.0003756462212486309, "loss": 0.9415, "step": 5198 }, { "epoch": 0.3522626216429098, "grad_norm": 3.9002339839935303, "learning_rate": 0.00037564074479737135, "loss": 0.8645, "step": 5199 }, { "epoch": 0.3523303774847338, "grad_norm": 3.068230152130127, "learning_rate": 0.00037563526834611175, "loss": 0.7735, "step": 5200 }, { "epoch": 0.3523981333265577, "grad_norm": 3.5892891883850098, "learning_rate": 0.00037562979189485215, "loss": 0.8782, "step": 5201 }, { "epoch": 0.35246588916838173, "grad_norm": 2.525381565093994, "learning_rate": 0.00037562431544359255, "loss": 0.8089, "step": 5202 }, { "epoch": 0.35253364501020573, "grad_norm": 5.083377838134766, "learning_rate": 0.00037561883899233295, "loss": 0.7821, "step": 5203 }, { "epoch": 0.35260140085202973, "grad_norm": 4.012070178985596, "learning_rate": 0.0003756133625410734, "loss": 1.0821, "step": 5204 }, { "epoch": 0.3526691566938537, "grad_norm": 3.140345335006714, "learning_rate": 0.00037560788608981385, "loss": 0.741, "step": 5205 }, { "epoch": 0.3527369125356777, "grad_norm": 3.98285174369812, "learning_rate": 0.00037560240963855425, "loss": 1.0136, "step": 5206 }, { "epoch": 0.3528046683775017, "grad_norm": 3.117504358291626, "learning_rate": 0.00037559693318729465, "loss": 0.9353, "step": 5207 }, { "epoch": 0.3528724242193257, "grad_norm": 5.528158664703369, "learning_rate": 0.00037559145673603505, "loss": 0.8428, "step": 5208 }, { "epoch": 0.35294018006114963, "grad_norm": 6.372541427612305, "learning_rate": 0.00037558598028477545, "loss": 0.8021, "step": 5209 }, { "epoch": 0.35300793590297364, "grad_norm": 2.4689781665802, "learning_rate": 0.0003755805038335159, "loss": 0.6849, "step": 5210 }, { "epoch": 0.35307569174479764, "grad_norm": 4.135880947113037, "learning_rate": 0.0003755750273822563, "loss": 1.1235, "step": 5211 }, { "epoch": 0.3531434475866216, "grad_norm": 3.0400452613830566, "learning_rate": 0.00037556955093099675, "loss": 0.8061, "step": 5212 }, { "epoch": 0.3532112034284456, "grad_norm": 3.7942349910736084, "learning_rate": 0.00037556407447973715, "loss": 0.946, "step": 5213 }, { "epoch": 0.3532789592702696, "grad_norm": 3.703481912612915, "learning_rate": 0.00037555859802847755, "loss": 0.9957, "step": 5214 }, { "epoch": 0.3533467151120936, "grad_norm": 3.537858247756958, "learning_rate": 0.000375553121577218, "loss": 0.7405, "step": 5215 }, { "epoch": 0.35341447095391754, "grad_norm": 3.1363351345062256, "learning_rate": 0.0003755476451259584, "loss": 0.8286, "step": 5216 }, { "epoch": 0.35348222679574154, "grad_norm": 3.6820123195648193, "learning_rate": 0.0003755421686746988, "loss": 1.208, "step": 5217 }, { "epoch": 0.35354998263756554, "grad_norm": 3.400709390640259, "learning_rate": 0.0003755366922234392, "loss": 0.8608, "step": 5218 }, { "epoch": 0.35361773847938954, "grad_norm": 4.472260475158691, "learning_rate": 0.00037553121577217966, "loss": 0.9769, "step": 5219 }, { "epoch": 0.3536854943212135, "grad_norm": 3.7792160511016846, "learning_rate": 0.00037552573932092006, "loss": 0.986, "step": 5220 }, { "epoch": 0.3537532501630375, "grad_norm": 3.7401158809661865, "learning_rate": 0.0003755202628696605, "loss": 0.9221, "step": 5221 }, { "epoch": 0.3538210060048615, "grad_norm": 4.463958263397217, "learning_rate": 0.0003755147864184009, "loss": 1.0676, "step": 5222 }, { "epoch": 0.3538887618466855, "grad_norm": 3.7129576206207275, "learning_rate": 0.0003755093099671413, "loss": 0.8341, "step": 5223 }, { "epoch": 0.35395651768850944, "grad_norm": 3.505922555923462, "learning_rate": 0.0003755038335158817, "loss": 1.0176, "step": 5224 }, { "epoch": 0.35402427353033344, "grad_norm": 3.396984577178955, "learning_rate": 0.0003754983570646221, "loss": 0.8362, "step": 5225 }, { "epoch": 0.35409202937215745, "grad_norm": 3.0877010822296143, "learning_rate": 0.00037549288061336256, "loss": 0.7934, "step": 5226 }, { "epoch": 0.3541597852139814, "grad_norm": 4.263288974761963, "learning_rate": 0.000375487404162103, "loss": 0.9631, "step": 5227 }, { "epoch": 0.3542275410558054, "grad_norm": 2.5173354148864746, "learning_rate": 0.0003754819277108434, "loss": 0.6487, "step": 5228 }, { "epoch": 0.3542952968976294, "grad_norm": 2.871943473815918, "learning_rate": 0.0003754764512595838, "loss": 0.7732, "step": 5229 }, { "epoch": 0.3543630527394534, "grad_norm": 3.3010661602020264, "learning_rate": 0.0003754709748083242, "loss": 0.7927, "step": 5230 }, { "epoch": 0.35443080858127735, "grad_norm": 3.431598663330078, "learning_rate": 0.0003754654983570646, "loss": 0.8314, "step": 5231 }, { "epoch": 0.35449856442310135, "grad_norm": 3.57837176322937, "learning_rate": 0.00037546002190580506, "loss": 0.7858, "step": 5232 }, { "epoch": 0.35456632026492535, "grad_norm": 3.6178135871887207, "learning_rate": 0.00037545454545454546, "loss": 0.8568, "step": 5233 }, { "epoch": 0.35463407610674935, "grad_norm": 5.069169044494629, "learning_rate": 0.00037544906900328586, "loss": 0.8733, "step": 5234 }, { "epoch": 0.3547018319485733, "grad_norm": 3.037543296813965, "learning_rate": 0.0003754435925520263, "loss": 0.8555, "step": 5235 }, { "epoch": 0.3547695877903973, "grad_norm": 6.814340114593506, "learning_rate": 0.0003754381161007667, "loss": 0.9531, "step": 5236 }, { "epoch": 0.3548373436322213, "grad_norm": 4.747251987457275, "learning_rate": 0.00037543263964950717, "loss": 1.1139, "step": 5237 }, { "epoch": 0.3549050994740453, "grad_norm": 3.5612335205078125, "learning_rate": 0.00037542716319824757, "loss": 0.9444, "step": 5238 }, { "epoch": 0.35497285531586925, "grad_norm": 3.584312677383423, "learning_rate": 0.00037542168674698797, "loss": 0.9071, "step": 5239 }, { "epoch": 0.35504061115769325, "grad_norm": 3.902804374694824, "learning_rate": 0.00037541621029572837, "loss": 0.9278, "step": 5240 }, { "epoch": 0.35510836699951726, "grad_norm": 2.9638261795043945, "learning_rate": 0.00037541073384446877, "loss": 0.8368, "step": 5241 }, { "epoch": 0.3551761228413412, "grad_norm": 2.5804615020751953, "learning_rate": 0.0003754052573932092, "loss": 0.639, "step": 5242 }, { "epoch": 0.3552438786831652, "grad_norm": 3.0720252990722656, "learning_rate": 0.00037539978094194967, "loss": 0.9088, "step": 5243 }, { "epoch": 0.3553116345249892, "grad_norm": 3.661824941635132, "learning_rate": 0.00037539430449069007, "loss": 0.8979, "step": 5244 }, { "epoch": 0.3553793903668132, "grad_norm": 2.298072338104248, "learning_rate": 0.00037538882803943047, "loss": 0.7848, "step": 5245 }, { "epoch": 0.35544714620863715, "grad_norm": 2.6522982120513916, "learning_rate": 0.00037538335158817087, "loss": 0.8229, "step": 5246 }, { "epoch": 0.35551490205046116, "grad_norm": 3.5729618072509766, "learning_rate": 0.00037537787513691127, "loss": 0.9302, "step": 5247 }, { "epoch": 0.35558265789228516, "grad_norm": 2.9825613498687744, "learning_rate": 0.0003753723986856517, "loss": 0.9219, "step": 5248 }, { "epoch": 0.35565041373410916, "grad_norm": 2.965501070022583, "learning_rate": 0.0003753669222343921, "loss": 0.7003, "step": 5249 }, { "epoch": 0.3557181695759331, "grad_norm": 3.4753518104553223, "learning_rate": 0.0003753614457831325, "loss": 0.888, "step": 5250 }, { "epoch": 0.3557859254177571, "grad_norm": 4.103094100952148, "learning_rate": 0.000375355969331873, "loss": 1.0145, "step": 5251 }, { "epoch": 0.3558536812595811, "grad_norm": 4.622770309448242, "learning_rate": 0.0003753504928806134, "loss": 1.264, "step": 5252 }, { "epoch": 0.3559214371014051, "grad_norm": 5.3619384765625, "learning_rate": 0.00037534501642935383, "loss": 1.1948, "step": 5253 }, { "epoch": 0.35598919294322906, "grad_norm": 4.087440013885498, "learning_rate": 0.0003753395399780942, "loss": 0.7233, "step": 5254 }, { "epoch": 0.35605694878505306, "grad_norm": 4.5514678955078125, "learning_rate": 0.0003753340635268346, "loss": 1.2091, "step": 5255 }, { "epoch": 0.35612470462687706, "grad_norm": 4.482836723327637, "learning_rate": 0.000375328587075575, "loss": 0.8891, "step": 5256 }, { "epoch": 0.356192460468701, "grad_norm": 4.187050819396973, "learning_rate": 0.0003753231106243154, "loss": 0.8885, "step": 5257 }, { "epoch": 0.356260216310525, "grad_norm": 3.117799997329712, "learning_rate": 0.0003753176341730559, "loss": 0.8808, "step": 5258 }, { "epoch": 0.356327972152349, "grad_norm": 2.9851255416870117, "learning_rate": 0.00037531215772179633, "loss": 0.7723, "step": 5259 }, { "epoch": 0.356395727994173, "grad_norm": 3.2250404357910156, "learning_rate": 0.00037530668127053673, "loss": 0.8467, "step": 5260 }, { "epoch": 0.35646348383599696, "grad_norm": 5.4106035232543945, "learning_rate": 0.00037530120481927713, "loss": 0.7054, "step": 5261 }, { "epoch": 0.35653123967782097, "grad_norm": 3.099815607070923, "learning_rate": 0.00037529572836801753, "loss": 0.8568, "step": 5262 }, { "epoch": 0.35659899551964497, "grad_norm": 4.91948127746582, "learning_rate": 0.00037529025191675793, "loss": 1.2932, "step": 5263 }, { "epoch": 0.35666675136146897, "grad_norm": 3.2795543670654297, "learning_rate": 0.0003752847754654984, "loss": 0.9941, "step": 5264 }, { "epoch": 0.3567345072032929, "grad_norm": 3.040208339691162, "learning_rate": 0.0003752792990142388, "loss": 0.7272, "step": 5265 }, { "epoch": 0.3568022630451169, "grad_norm": 3.75807523727417, "learning_rate": 0.00037527382256297923, "loss": 0.9843, "step": 5266 }, { "epoch": 0.3568700188869409, "grad_norm": 2.5221774578094482, "learning_rate": 0.00037526834611171963, "loss": 0.7537, "step": 5267 }, { "epoch": 0.3569377747287649, "grad_norm": 2.9787867069244385, "learning_rate": 0.00037526286966046003, "loss": 0.8916, "step": 5268 }, { "epoch": 0.35700553057058887, "grad_norm": 4.7267961502075195, "learning_rate": 0.00037525739320920043, "loss": 0.8626, "step": 5269 }, { "epoch": 0.35707328641241287, "grad_norm": 4.687716484069824, "learning_rate": 0.0003752519167579409, "loss": 0.9299, "step": 5270 }, { "epoch": 0.3571410422542369, "grad_norm": 3.3784003257751465, "learning_rate": 0.0003752464403066813, "loss": 0.8396, "step": 5271 }, { "epoch": 0.3572087980960608, "grad_norm": 3.4913077354431152, "learning_rate": 0.0003752409638554217, "loss": 0.9507, "step": 5272 }, { "epoch": 0.3572765539378848, "grad_norm": 3.498615026473999, "learning_rate": 0.0003752354874041621, "loss": 0.8403, "step": 5273 }, { "epoch": 0.3573443097797088, "grad_norm": 9.602298736572266, "learning_rate": 0.00037523001095290254, "loss": 0.8762, "step": 5274 }, { "epoch": 0.3574120656215328, "grad_norm": 3.916182518005371, "learning_rate": 0.000375224534501643, "loss": 0.9652, "step": 5275 }, { "epoch": 0.3574798214633568, "grad_norm": 3.700988531112671, "learning_rate": 0.0003752190580503834, "loss": 0.9498, "step": 5276 }, { "epoch": 0.3575475773051808, "grad_norm": 3.8978474140167236, "learning_rate": 0.0003752135815991238, "loss": 0.9469, "step": 5277 }, { "epoch": 0.3576153331470048, "grad_norm": 2.9933013916015625, "learning_rate": 0.0003752081051478642, "loss": 0.7941, "step": 5278 }, { "epoch": 0.3576830889888288, "grad_norm": 3.3093342781066895, "learning_rate": 0.0003752026286966046, "loss": 0.87, "step": 5279 }, { "epoch": 0.3577508448306527, "grad_norm": 2.561608076095581, "learning_rate": 0.00037519715224534504, "loss": 0.797, "step": 5280 }, { "epoch": 0.3578186006724767, "grad_norm": 3.848283529281616, "learning_rate": 0.00037519167579408544, "loss": 0.9462, "step": 5281 }, { "epoch": 0.35788635651430073, "grad_norm": 3.5649309158325195, "learning_rate": 0.0003751861993428259, "loss": 0.8111, "step": 5282 }, { "epoch": 0.35795411235612473, "grad_norm": 3.1415412425994873, "learning_rate": 0.0003751807228915663, "loss": 1.0036, "step": 5283 }, { "epoch": 0.3580218681979487, "grad_norm": 3.8053858280181885, "learning_rate": 0.0003751752464403067, "loss": 0.909, "step": 5284 }, { "epoch": 0.3580896240397727, "grad_norm": 3.3017733097076416, "learning_rate": 0.0003751697699890471, "loss": 0.9742, "step": 5285 }, { "epoch": 0.3581573798815967, "grad_norm": 3.0735089778900146, "learning_rate": 0.00037516429353778754, "loss": 0.7757, "step": 5286 }, { "epoch": 0.35822513572342063, "grad_norm": 5.549263954162598, "learning_rate": 0.00037515881708652794, "loss": 0.7215, "step": 5287 }, { "epoch": 0.35829289156524463, "grad_norm": 4.021141529083252, "learning_rate": 0.00037515334063526834, "loss": 0.831, "step": 5288 }, { "epoch": 0.35836064740706863, "grad_norm": 4.32338809967041, "learning_rate": 0.0003751478641840088, "loss": 1.245, "step": 5289 }, { "epoch": 0.35842840324889264, "grad_norm": 3.167405843734741, "learning_rate": 0.0003751423877327492, "loss": 0.9715, "step": 5290 }, { "epoch": 0.3584961590907166, "grad_norm": 3.8975563049316406, "learning_rate": 0.00037513691128148965, "loss": 0.8192, "step": 5291 }, { "epoch": 0.3585639149325406, "grad_norm": 3.915180206298828, "learning_rate": 0.00037513143483023005, "loss": 1.0887, "step": 5292 }, { "epoch": 0.3586316707743646, "grad_norm": 3.1252009868621826, "learning_rate": 0.00037512595837897045, "loss": 0.8284, "step": 5293 }, { "epoch": 0.3586994266161886, "grad_norm": 2.9816744327545166, "learning_rate": 0.00037512048192771085, "loss": 0.738, "step": 5294 }, { "epoch": 0.35876718245801253, "grad_norm": 2.6410839557647705, "learning_rate": 0.00037511500547645124, "loss": 0.9273, "step": 5295 }, { "epoch": 0.35883493829983654, "grad_norm": 2.539437770843506, "learning_rate": 0.0003751095290251917, "loss": 0.6603, "step": 5296 }, { "epoch": 0.35890269414166054, "grad_norm": 3.3621127605438232, "learning_rate": 0.00037510405257393215, "loss": 0.7776, "step": 5297 }, { "epoch": 0.35897044998348454, "grad_norm": 3.1120567321777344, "learning_rate": 0.00037509857612267255, "loss": 0.9242, "step": 5298 }, { "epoch": 0.3590382058253085, "grad_norm": 3.358152389526367, "learning_rate": 0.00037509309967141295, "loss": 0.9384, "step": 5299 }, { "epoch": 0.3591059616671325, "grad_norm": 3.177192211151123, "learning_rate": 0.00037508762322015335, "loss": 0.8414, "step": 5300 }, { "epoch": 0.3591737175089565, "grad_norm": 4.123505592346191, "learning_rate": 0.00037508214676889375, "loss": 0.968, "step": 5301 }, { "epoch": 0.35924147335078044, "grad_norm": 3.382920980453491, "learning_rate": 0.0003750766703176342, "loss": 0.8461, "step": 5302 }, { "epoch": 0.35930922919260444, "grad_norm": 4.884204864501953, "learning_rate": 0.0003750711938663746, "loss": 1.0417, "step": 5303 }, { "epoch": 0.35937698503442844, "grad_norm": 3.033381462097168, "learning_rate": 0.000375065717415115, "loss": 0.7815, "step": 5304 }, { "epoch": 0.35944474087625244, "grad_norm": 3.228245973587036, "learning_rate": 0.00037506024096385545, "loss": 0.8155, "step": 5305 }, { "epoch": 0.3595124967180764, "grad_norm": 3.5197646617889404, "learning_rate": 0.00037505476451259585, "loss": 0.8599, "step": 5306 }, { "epoch": 0.3595802525599004, "grad_norm": 4.456432819366455, "learning_rate": 0.00037504928806133625, "loss": 0.8373, "step": 5307 }, { "epoch": 0.3596480084017244, "grad_norm": 4.820195198059082, "learning_rate": 0.0003750438116100767, "loss": 1.1995, "step": 5308 }, { "epoch": 0.3597157642435484, "grad_norm": 4.720876216888428, "learning_rate": 0.0003750383351588171, "loss": 1.1065, "step": 5309 }, { "epoch": 0.35978352008537234, "grad_norm": 3.1989428997039795, "learning_rate": 0.0003750328587075575, "loss": 0.7985, "step": 5310 }, { "epoch": 0.35985127592719635, "grad_norm": 3.1615402698516846, "learning_rate": 0.0003750273822562979, "loss": 0.7413, "step": 5311 }, { "epoch": 0.35991903176902035, "grad_norm": 3.2439658641815186, "learning_rate": 0.00037502190580503836, "loss": 0.8451, "step": 5312 }, { "epoch": 0.35998678761084435, "grad_norm": 6.153263092041016, "learning_rate": 0.0003750164293537788, "loss": 0.8092, "step": 5313 }, { "epoch": 0.3600545434526683, "grad_norm": 3.706691026687622, "learning_rate": 0.0003750109529025192, "loss": 0.857, "step": 5314 }, { "epoch": 0.3601222992944923, "grad_norm": 3.29728364944458, "learning_rate": 0.0003750054764512596, "loss": 0.7236, "step": 5315 }, { "epoch": 0.3601900551363163, "grad_norm": 4.237719535827637, "learning_rate": 0.000375, "loss": 1.1314, "step": 5316 }, { "epoch": 0.36025781097814025, "grad_norm": 3.347698450088501, "learning_rate": 0.0003749945235487404, "loss": 0.8853, "step": 5317 }, { "epoch": 0.36032556681996425, "grad_norm": 3.186636447906494, "learning_rate": 0.00037498904709748086, "loss": 0.7718, "step": 5318 }, { "epoch": 0.36039332266178825, "grad_norm": 3.954906940460205, "learning_rate": 0.00037498357064622126, "loss": 0.871, "step": 5319 }, { "epoch": 0.36046107850361225, "grad_norm": 2.848229169845581, "learning_rate": 0.0003749780941949617, "loss": 0.7957, "step": 5320 }, { "epoch": 0.3605288343454362, "grad_norm": 3.126863956451416, "learning_rate": 0.0003749726177437021, "loss": 0.7592, "step": 5321 }, { "epoch": 0.3605965901872602, "grad_norm": 2.6204352378845215, "learning_rate": 0.0003749671412924425, "loss": 0.6864, "step": 5322 }, { "epoch": 0.3606643460290842, "grad_norm": 4.353353977203369, "learning_rate": 0.0003749616648411829, "loss": 1.0232, "step": 5323 }, { "epoch": 0.3607321018709082, "grad_norm": 3.761713981628418, "learning_rate": 0.00037495618838992336, "loss": 0.8696, "step": 5324 }, { "epoch": 0.36079985771273215, "grad_norm": 3.640532970428467, "learning_rate": 0.00037495071193866376, "loss": 0.9105, "step": 5325 }, { "epoch": 0.36086761355455615, "grad_norm": 3.504530191421509, "learning_rate": 0.00037494523548740416, "loss": 1.0116, "step": 5326 }, { "epoch": 0.36093536939638016, "grad_norm": 2.9961044788360596, "learning_rate": 0.00037493975903614456, "loss": 0.8104, "step": 5327 }, { "epoch": 0.36100312523820416, "grad_norm": 3.504249095916748, "learning_rate": 0.000374934282584885, "loss": 0.9986, "step": 5328 }, { "epoch": 0.3610708810800281, "grad_norm": 3.450448751449585, "learning_rate": 0.00037492880613362547, "loss": 0.7597, "step": 5329 }, { "epoch": 0.3611386369218521, "grad_norm": 2.8367819786071777, "learning_rate": 0.00037492332968236587, "loss": 0.8387, "step": 5330 }, { "epoch": 0.3612063927636761, "grad_norm": 4.405371189117432, "learning_rate": 0.00037491785323110627, "loss": 1.1001, "step": 5331 }, { "epoch": 0.36127414860550006, "grad_norm": 3.1953725814819336, "learning_rate": 0.00037491237677984667, "loss": 0.9446, "step": 5332 }, { "epoch": 0.36134190444732406, "grad_norm": 5.582015037536621, "learning_rate": 0.00037490690032858707, "loss": 0.9018, "step": 5333 }, { "epoch": 0.36140966028914806, "grad_norm": 4.6093902587890625, "learning_rate": 0.0003749014238773275, "loss": 0.7225, "step": 5334 }, { "epoch": 0.36147741613097206, "grad_norm": 4.95910120010376, "learning_rate": 0.0003748959474260679, "loss": 1.0063, "step": 5335 }, { "epoch": 0.361545171972796, "grad_norm": 3.115673542022705, "learning_rate": 0.00037489047097480837, "loss": 0.8628, "step": 5336 }, { "epoch": 0.36161292781462, "grad_norm": 3.1270761489868164, "learning_rate": 0.00037488499452354877, "loss": 0.8159, "step": 5337 }, { "epoch": 0.361680683656444, "grad_norm": 3.9813075065612793, "learning_rate": 0.00037487951807228917, "loss": 1.0157, "step": 5338 }, { "epoch": 0.361748439498268, "grad_norm": 3.1259140968322754, "learning_rate": 0.00037487404162102957, "loss": 0.6732, "step": 5339 }, { "epoch": 0.36181619534009196, "grad_norm": 3.471842050552368, "learning_rate": 0.00037486856516977, "loss": 0.9661, "step": 5340 }, { "epoch": 0.36188395118191596, "grad_norm": 4.13902473449707, "learning_rate": 0.0003748630887185104, "loss": 1.0165, "step": 5341 }, { "epoch": 0.36195170702373997, "grad_norm": 4.526461124420166, "learning_rate": 0.0003748576122672508, "loss": 1.0564, "step": 5342 }, { "epoch": 0.36201946286556397, "grad_norm": 3.1764745712280273, "learning_rate": 0.0003748521358159912, "loss": 0.8757, "step": 5343 }, { "epoch": 0.3620872187073879, "grad_norm": 3.292126178741455, "learning_rate": 0.0003748466593647317, "loss": 0.9732, "step": 5344 }, { "epoch": 0.3621549745492119, "grad_norm": 4.996058464050293, "learning_rate": 0.0003748411829134721, "loss": 1.0605, "step": 5345 }, { "epoch": 0.3622227303910359, "grad_norm": 4.166463851928711, "learning_rate": 0.0003748357064622125, "loss": 1.0655, "step": 5346 }, { "epoch": 0.36229048623285987, "grad_norm": 4.191798686981201, "learning_rate": 0.0003748302300109529, "loss": 0.9102, "step": 5347 }, { "epoch": 0.36235824207468387, "grad_norm": 3.2994272708892822, "learning_rate": 0.0003748247535596933, "loss": 0.8686, "step": 5348 }, { "epoch": 0.36242599791650787, "grad_norm": 3.0593926906585693, "learning_rate": 0.0003748192771084337, "loss": 1.0556, "step": 5349 }, { "epoch": 0.36249375375833187, "grad_norm": 3.241879463195801, "learning_rate": 0.0003748138006571742, "loss": 0.8863, "step": 5350 }, { "epoch": 0.3625615096001558, "grad_norm": 2.744612455368042, "learning_rate": 0.00037480832420591463, "loss": 0.7116, "step": 5351 }, { "epoch": 0.3626292654419798, "grad_norm": 3.8527004718780518, "learning_rate": 0.00037480284775465503, "loss": 1.0755, "step": 5352 }, { "epoch": 0.3626970212838038, "grad_norm": 2.916987657546997, "learning_rate": 0.00037479737130339543, "loss": 0.969, "step": 5353 }, { "epoch": 0.3627647771256278, "grad_norm": 3.6509549617767334, "learning_rate": 0.00037479189485213583, "loss": 0.9319, "step": 5354 }, { "epoch": 0.36283253296745177, "grad_norm": 3.858877420425415, "learning_rate": 0.00037478641840087623, "loss": 1.0843, "step": 5355 }, { "epoch": 0.3629002888092758, "grad_norm": 3.1152849197387695, "learning_rate": 0.0003747809419496167, "loss": 0.8305, "step": 5356 }, { "epoch": 0.3629680446510998, "grad_norm": 4.071075916290283, "learning_rate": 0.0003747754654983571, "loss": 0.9655, "step": 5357 }, { "epoch": 0.3630358004929238, "grad_norm": 3.2037417888641357, "learning_rate": 0.0003747699890470975, "loss": 0.8046, "step": 5358 }, { "epoch": 0.3631035563347477, "grad_norm": 3.53132963180542, "learning_rate": 0.00037476451259583793, "loss": 0.9385, "step": 5359 }, { "epoch": 0.3631713121765717, "grad_norm": 3.0029678344726562, "learning_rate": 0.00037475903614457833, "loss": 0.912, "step": 5360 }, { "epoch": 0.3632390680183957, "grad_norm": 3.8646602630615234, "learning_rate": 0.00037475355969331873, "loss": 1.0578, "step": 5361 }, { "epoch": 0.3633068238602197, "grad_norm": 4.695596694946289, "learning_rate": 0.0003747480832420592, "loss": 1.1899, "step": 5362 }, { "epoch": 0.3633745797020437, "grad_norm": 2.862194538116455, "learning_rate": 0.0003747426067907996, "loss": 0.8132, "step": 5363 }, { "epoch": 0.3634423355438677, "grad_norm": 3.524240016937256, "learning_rate": 0.00037473713033954, "loss": 1.0246, "step": 5364 }, { "epoch": 0.3635100913856917, "grad_norm": 3.288257360458374, "learning_rate": 0.0003747316538882804, "loss": 0.8424, "step": 5365 }, { "epoch": 0.3635778472275156, "grad_norm": 3.5029993057250977, "learning_rate": 0.00037472617743702084, "loss": 0.9025, "step": 5366 }, { "epoch": 0.36364560306933963, "grad_norm": 3.1058220863342285, "learning_rate": 0.0003747207009857613, "loss": 0.8779, "step": 5367 }, { "epoch": 0.36371335891116363, "grad_norm": 3.462888479232788, "learning_rate": 0.0003747152245345017, "loss": 0.65, "step": 5368 }, { "epoch": 0.36378111475298763, "grad_norm": 3.495150327682495, "learning_rate": 0.0003747097480832421, "loss": 0.9757, "step": 5369 }, { "epoch": 0.3638488705948116, "grad_norm": 2.4618706703186035, "learning_rate": 0.0003747042716319825, "loss": 0.6794, "step": 5370 }, { "epoch": 0.3639166264366356, "grad_norm": 3.671041965484619, "learning_rate": 0.0003746987951807229, "loss": 0.8306, "step": 5371 }, { "epoch": 0.3639843822784596, "grad_norm": 3.169339656829834, "learning_rate": 0.00037469331872946334, "loss": 0.7189, "step": 5372 }, { "epoch": 0.36405213812028353, "grad_norm": 2.782782793045044, "learning_rate": 0.00037468784227820374, "loss": 0.8572, "step": 5373 }, { "epoch": 0.36411989396210753, "grad_norm": 2.6313319206237793, "learning_rate": 0.00037468236582694414, "loss": 0.647, "step": 5374 }, { "epoch": 0.36418764980393153, "grad_norm": 2.980165481567383, "learning_rate": 0.0003746768893756846, "loss": 0.8614, "step": 5375 }, { "epoch": 0.36425540564575554, "grad_norm": 3.2244374752044678, "learning_rate": 0.000374671412924425, "loss": 0.8935, "step": 5376 }, { "epoch": 0.3643231614875795, "grad_norm": 3.2004127502441406, "learning_rate": 0.0003746659364731654, "loss": 0.9251, "step": 5377 }, { "epoch": 0.3643909173294035, "grad_norm": 3.951181173324585, "learning_rate": 0.00037466046002190584, "loss": 0.8746, "step": 5378 }, { "epoch": 0.3644586731712275, "grad_norm": 3.1199169158935547, "learning_rate": 0.00037465498357064624, "loss": 0.7455, "step": 5379 }, { "epoch": 0.3645264290130515, "grad_norm": 3.6075358390808105, "learning_rate": 0.00037464950711938664, "loss": 1.0378, "step": 5380 }, { "epoch": 0.36459418485487544, "grad_norm": 2.8567612171173096, "learning_rate": 0.00037464403066812704, "loss": 0.9343, "step": 5381 }, { "epoch": 0.36466194069669944, "grad_norm": 3.363304615020752, "learning_rate": 0.0003746385542168675, "loss": 0.9005, "step": 5382 }, { "epoch": 0.36472969653852344, "grad_norm": 3.2922897338867188, "learning_rate": 0.0003746330777656079, "loss": 0.8917, "step": 5383 }, { "epoch": 0.36479745238034744, "grad_norm": 3.7587101459503174, "learning_rate": 0.00037462760131434835, "loss": 1.2059, "step": 5384 }, { "epoch": 0.3648652082221714, "grad_norm": 3.531836986541748, "learning_rate": 0.00037462212486308875, "loss": 0.9279, "step": 5385 }, { "epoch": 0.3649329640639954, "grad_norm": 3.788323402404785, "learning_rate": 0.00037461664841182915, "loss": 0.7052, "step": 5386 }, { "epoch": 0.3650007199058194, "grad_norm": 3.864682912826538, "learning_rate": 0.00037461117196056954, "loss": 1.2182, "step": 5387 }, { "epoch": 0.36506847574764334, "grad_norm": 4.187288284301758, "learning_rate": 0.00037460569550931, "loss": 0.8592, "step": 5388 }, { "epoch": 0.36513623158946734, "grad_norm": 3.2868242263793945, "learning_rate": 0.0003746002190580504, "loss": 1.0426, "step": 5389 }, { "epoch": 0.36520398743129134, "grad_norm": 4.037953853607178, "learning_rate": 0.00037459474260679085, "loss": 1.0541, "step": 5390 }, { "epoch": 0.36527174327311535, "grad_norm": 3.524847984313965, "learning_rate": 0.00037458926615553125, "loss": 0.965, "step": 5391 }, { "epoch": 0.3653394991149393, "grad_norm": 3.7108588218688965, "learning_rate": 0.00037458378970427165, "loss": 0.9804, "step": 5392 }, { "epoch": 0.3654072549567633, "grad_norm": 3.4689865112304688, "learning_rate": 0.00037457831325301205, "loss": 1.0011, "step": 5393 }, { "epoch": 0.3654750107985873, "grad_norm": 4.499992847442627, "learning_rate": 0.0003745728368017525, "loss": 0.8824, "step": 5394 }, { "epoch": 0.3655427666404113, "grad_norm": 3.5356943607330322, "learning_rate": 0.0003745673603504929, "loss": 0.9039, "step": 5395 }, { "epoch": 0.36561052248223525, "grad_norm": 2.9342541694641113, "learning_rate": 0.0003745618838992333, "loss": 0.9271, "step": 5396 }, { "epoch": 0.36567827832405925, "grad_norm": 4.110143661499023, "learning_rate": 0.0003745564074479737, "loss": 0.9639, "step": 5397 }, { "epoch": 0.36574603416588325, "grad_norm": 3.578934669494629, "learning_rate": 0.00037455093099671415, "loss": 0.8362, "step": 5398 }, { "epoch": 0.36581379000770725, "grad_norm": 3.171110153198242, "learning_rate": 0.00037454545454545455, "loss": 0.7895, "step": 5399 }, { "epoch": 0.3658815458495312, "grad_norm": 3.124711275100708, "learning_rate": 0.000374539978094195, "loss": 0.7781, "step": 5400 }, { "epoch": 0.3659493016913552, "grad_norm": 3.827589750289917, "learning_rate": 0.0003745345016429354, "loss": 1.0685, "step": 5401 }, { "epoch": 0.3660170575331792, "grad_norm": 4.412734508514404, "learning_rate": 0.0003745290251916758, "loss": 1.0027, "step": 5402 }, { "epoch": 0.36608481337500315, "grad_norm": 4.058601379394531, "learning_rate": 0.0003745235487404162, "loss": 1.2721, "step": 5403 }, { "epoch": 0.36615256921682715, "grad_norm": 3.881699562072754, "learning_rate": 0.00037451807228915666, "loss": 0.9844, "step": 5404 }, { "epoch": 0.36622032505865115, "grad_norm": 3.2257468700408936, "learning_rate": 0.00037451259583789706, "loss": 0.8235, "step": 5405 }, { "epoch": 0.36628808090047515, "grad_norm": 3.2964189052581787, "learning_rate": 0.0003745071193866375, "loss": 0.8881, "step": 5406 }, { "epoch": 0.3663558367422991, "grad_norm": 5.733243465423584, "learning_rate": 0.0003745016429353779, "loss": 1.1591, "step": 5407 }, { "epoch": 0.3664235925841231, "grad_norm": 2.9986631870269775, "learning_rate": 0.0003744961664841183, "loss": 0.9022, "step": 5408 }, { "epoch": 0.3664913484259471, "grad_norm": 3.4962732791900635, "learning_rate": 0.0003744906900328587, "loss": 1.1567, "step": 5409 }, { "epoch": 0.3665591042677711, "grad_norm": 3.5910472869873047, "learning_rate": 0.00037448521358159916, "loss": 0.9749, "step": 5410 }, { "epoch": 0.36662686010959505, "grad_norm": 2.6154730319976807, "learning_rate": 0.00037447973713033956, "loss": 0.6357, "step": 5411 }, { "epoch": 0.36669461595141906, "grad_norm": 2.9841790199279785, "learning_rate": 0.00037447426067907996, "loss": 0.7036, "step": 5412 }, { "epoch": 0.36676237179324306, "grad_norm": 2.4721925258636475, "learning_rate": 0.00037446878422782036, "loss": 0.7765, "step": 5413 }, { "epoch": 0.36683012763506706, "grad_norm": 3.285203456878662, "learning_rate": 0.0003744633077765608, "loss": 0.7789, "step": 5414 }, { "epoch": 0.366897883476891, "grad_norm": 2.7214183807373047, "learning_rate": 0.0003744578313253012, "loss": 0.7162, "step": 5415 }, { "epoch": 0.366965639318715, "grad_norm": 3.6289854049682617, "learning_rate": 0.00037445235487404166, "loss": 0.7316, "step": 5416 }, { "epoch": 0.367033395160539, "grad_norm": 2.9602062702178955, "learning_rate": 0.00037444687842278206, "loss": 0.6899, "step": 5417 }, { "epoch": 0.36710115100236296, "grad_norm": 4.808314800262451, "learning_rate": 0.00037444140197152246, "loss": 1.1222, "step": 5418 }, { "epoch": 0.36716890684418696, "grad_norm": 3.631767988204956, "learning_rate": 0.00037443592552026286, "loss": 0.9478, "step": 5419 }, { "epoch": 0.36723666268601096, "grad_norm": 2.967015027999878, "learning_rate": 0.00037443044906900326, "loss": 0.7626, "step": 5420 }, { "epoch": 0.36730441852783496, "grad_norm": 4.049998760223389, "learning_rate": 0.0003744249726177437, "loss": 1.0511, "step": 5421 }, { "epoch": 0.3673721743696589, "grad_norm": 2.9781908988952637, "learning_rate": 0.00037441949616648417, "loss": 0.8875, "step": 5422 }, { "epoch": 0.3674399302114829, "grad_norm": 3.3044538497924805, "learning_rate": 0.00037441401971522457, "loss": 0.8343, "step": 5423 }, { "epoch": 0.3675076860533069, "grad_norm": 3.731454849243164, "learning_rate": 0.00037440854326396497, "loss": 0.9613, "step": 5424 }, { "epoch": 0.3675754418951309, "grad_norm": 2.6973676681518555, "learning_rate": 0.00037440306681270537, "loss": 0.7769, "step": 5425 }, { "epoch": 0.36764319773695486, "grad_norm": 3.542736530303955, "learning_rate": 0.0003743975903614458, "loss": 1.0885, "step": 5426 }, { "epoch": 0.36771095357877887, "grad_norm": 3.442056179046631, "learning_rate": 0.0003743921139101862, "loss": 0.9477, "step": 5427 }, { "epoch": 0.36777870942060287, "grad_norm": 3.0729806423187256, "learning_rate": 0.0003743866374589266, "loss": 0.8693, "step": 5428 }, { "epoch": 0.36784646526242687, "grad_norm": 3.7244439125061035, "learning_rate": 0.00037438116100766707, "loss": 0.8599, "step": 5429 }, { "epoch": 0.3679142211042508, "grad_norm": 2.9524025917053223, "learning_rate": 0.00037437568455640747, "loss": 0.8364, "step": 5430 }, { "epoch": 0.3679819769460748, "grad_norm": 2.8142693042755127, "learning_rate": 0.00037437020810514787, "loss": 0.7366, "step": 5431 }, { "epoch": 0.3680497327878988, "grad_norm": 3.2113399505615234, "learning_rate": 0.0003743647316538883, "loss": 0.8578, "step": 5432 }, { "epoch": 0.36811748862972277, "grad_norm": 2.2345659732818604, "learning_rate": 0.0003743592552026287, "loss": 0.6139, "step": 5433 }, { "epoch": 0.36818524447154677, "grad_norm": 2.8539280891418457, "learning_rate": 0.0003743537787513691, "loss": 0.7332, "step": 5434 }, { "epoch": 0.36825300031337077, "grad_norm": 4.634270668029785, "learning_rate": 0.0003743483023001095, "loss": 0.9495, "step": 5435 }, { "epoch": 0.3683207561551948, "grad_norm": 2.6371827125549316, "learning_rate": 0.0003743428258488499, "loss": 0.6922, "step": 5436 }, { "epoch": 0.3683885119970187, "grad_norm": 4.219168186187744, "learning_rate": 0.00037433734939759037, "loss": 0.8702, "step": 5437 }, { "epoch": 0.3684562678388427, "grad_norm": 3.2009689807891846, "learning_rate": 0.0003743318729463308, "loss": 0.8081, "step": 5438 }, { "epoch": 0.3685240236806667, "grad_norm": 4.558162212371826, "learning_rate": 0.0003743263964950712, "loss": 0.7678, "step": 5439 }, { "epoch": 0.3685917795224907, "grad_norm": 3.5690677165985107, "learning_rate": 0.0003743209200438116, "loss": 0.9604, "step": 5440 }, { "epoch": 0.36865953536431467, "grad_norm": 3.1745691299438477, "learning_rate": 0.000374315443592552, "loss": 0.8717, "step": 5441 }, { "epoch": 0.3687272912061387, "grad_norm": 6.693586826324463, "learning_rate": 0.0003743099671412925, "loss": 1.1077, "step": 5442 }, { "epoch": 0.3687950470479627, "grad_norm": 4.210892200469971, "learning_rate": 0.0003743044906900329, "loss": 1.0107, "step": 5443 }, { "epoch": 0.3688628028897867, "grad_norm": 4.4334001541137695, "learning_rate": 0.0003742990142387733, "loss": 1.0449, "step": 5444 }, { "epoch": 0.3689305587316106, "grad_norm": 2.5816431045532227, "learning_rate": 0.00037429353778751373, "loss": 0.6099, "step": 5445 }, { "epoch": 0.3689983145734346, "grad_norm": 2.9959185123443604, "learning_rate": 0.00037428806133625413, "loss": 0.8546, "step": 5446 }, { "epoch": 0.36906607041525863, "grad_norm": 3.576401710510254, "learning_rate": 0.00037428258488499453, "loss": 0.883, "step": 5447 }, { "epoch": 0.3691338262570826, "grad_norm": 3.7672712802886963, "learning_rate": 0.000374277108433735, "loss": 0.8715, "step": 5448 }, { "epoch": 0.3692015820989066, "grad_norm": 2.8705081939697266, "learning_rate": 0.0003742716319824754, "loss": 0.7913, "step": 5449 }, { "epoch": 0.3692693379407306, "grad_norm": 4.1896071434021, "learning_rate": 0.0003742661555312158, "loss": 0.7753, "step": 5450 }, { "epoch": 0.3693370937825546, "grad_norm": 3.741680860519409, "learning_rate": 0.0003742606790799562, "loss": 1.0963, "step": 5451 }, { "epoch": 0.36940484962437853, "grad_norm": 3.2601795196533203, "learning_rate": 0.00037425520262869663, "loss": 1.0227, "step": 5452 }, { "epoch": 0.36947260546620253, "grad_norm": 3.7297489643096924, "learning_rate": 0.00037424972617743703, "loss": 1.0866, "step": 5453 }, { "epoch": 0.36954036130802653, "grad_norm": 3.126919746398926, "learning_rate": 0.0003742442497261775, "loss": 0.9734, "step": 5454 }, { "epoch": 0.36960811714985053, "grad_norm": 3.3928933143615723, "learning_rate": 0.0003742387732749179, "loss": 1.0356, "step": 5455 }, { "epoch": 0.3696758729916745, "grad_norm": 2.5734899044036865, "learning_rate": 0.0003742332968236583, "loss": 0.648, "step": 5456 }, { "epoch": 0.3697436288334985, "grad_norm": 4.490499019622803, "learning_rate": 0.0003742278203723987, "loss": 1.2074, "step": 5457 }, { "epoch": 0.3698113846753225, "grad_norm": 3.0614688396453857, "learning_rate": 0.0003742223439211391, "loss": 0.8477, "step": 5458 }, { "epoch": 0.3698791405171465, "grad_norm": 3.2148077487945557, "learning_rate": 0.00037421686746987953, "loss": 0.9235, "step": 5459 }, { "epoch": 0.36994689635897043, "grad_norm": 3.270489454269409, "learning_rate": 0.00037421139101862, "loss": 0.8491, "step": 5460 }, { "epoch": 0.37001465220079444, "grad_norm": 5.29260778427124, "learning_rate": 0.0003742059145673604, "loss": 0.8706, "step": 5461 }, { "epoch": 0.37008240804261844, "grad_norm": 3.857942819595337, "learning_rate": 0.0003742004381161008, "loss": 0.9493, "step": 5462 }, { "epoch": 0.3701501638844424, "grad_norm": 3.3467447757720947, "learning_rate": 0.0003741949616648412, "loss": 0.9389, "step": 5463 }, { "epoch": 0.3702179197262664, "grad_norm": 3.2844865322113037, "learning_rate": 0.00037418948521358164, "loss": 0.8593, "step": 5464 }, { "epoch": 0.3702856755680904, "grad_norm": 3.3368470668792725, "learning_rate": 0.00037418400876232204, "loss": 0.7478, "step": 5465 }, { "epoch": 0.3703534314099144, "grad_norm": 3.2852165699005127, "learning_rate": 0.00037417853231106244, "loss": 0.6113, "step": 5466 }, { "epoch": 0.37042118725173834, "grad_norm": 3.417721748352051, "learning_rate": 0.00037417305585980284, "loss": 0.958, "step": 5467 }, { "epoch": 0.37048894309356234, "grad_norm": 2.9814271926879883, "learning_rate": 0.0003741675794085433, "loss": 0.8396, "step": 5468 }, { "epoch": 0.37055669893538634, "grad_norm": 5.948635101318359, "learning_rate": 0.0003741621029572837, "loss": 0.7437, "step": 5469 }, { "epoch": 0.37062445477721034, "grad_norm": 2.884425401687622, "learning_rate": 0.00037415662650602414, "loss": 0.8083, "step": 5470 }, { "epoch": 0.3706922106190343, "grad_norm": 4.079087257385254, "learning_rate": 0.00037415115005476454, "loss": 0.99, "step": 5471 }, { "epoch": 0.3707599664608583, "grad_norm": 3.0150539875030518, "learning_rate": 0.00037414567360350494, "loss": 0.8883, "step": 5472 }, { "epoch": 0.3708277223026823, "grad_norm": 4.43287992477417, "learning_rate": 0.00037414019715224534, "loss": 1.1428, "step": 5473 }, { "epoch": 0.3708954781445063, "grad_norm": 3.885472536087036, "learning_rate": 0.00037413472070098574, "loss": 1.0521, "step": 5474 }, { "epoch": 0.37096323398633024, "grad_norm": 3.4181435108184814, "learning_rate": 0.0003741292442497262, "loss": 0.8354, "step": 5475 }, { "epoch": 0.37103098982815425, "grad_norm": 3.1993231773376465, "learning_rate": 0.00037412376779846665, "loss": 0.9168, "step": 5476 }, { "epoch": 0.37109874566997825, "grad_norm": 3.2113263607025146, "learning_rate": 0.00037411829134720705, "loss": 0.8941, "step": 5477 }, { "epoch": 0.3711665015118022, "grad_norm": 2.6420891284942627, "learning_rate": 0.00037411281489594745, "loss": 0.9716, "step": 5478 }, { "epoch": 0.3712342573536262, "grad_norm": 3.167240858078003, "learning_rate": 0.00037410733844468784, "loss": 0.8373, "step": 5479 }, { "epoch": 0.3713020131954502, "grad_norm": 2.518369197845459, "learning_rate": 0.0003741018619934283, "loss": 0.6072, "step": 5480 }, { "epoch": 0.3713697690372742, "grad_norm": 3.4090709686279297, "learning_rate": 0.0003740963855421687, "loss": 0.9963, "step": 5481 }, { "epoch": 0.37143752487909815, "grad_norm": 3.6266143321990967, "learning_rate": 0.0003740909090909091, "loss": 0.8417, "step": 5482 }, { "epoch": 0.37150528072092215, "grad_norm": 3.8296926021575928, "learning_rate": 0.00037408543263964955, "loss": 1.0946, "step": 5483 }, { "epoch": 0.37157303656274615, "grad_norm": 3.9235920906066895, "learning_rate": 0.00037407995618838995, "loss": 0.9707, "step": 5484 }, { "epoch": 0.37164079240457015, "grad_norm": 3.1368892192840576, "learning_rate": 0.00037407447973713035, "loss": 0.6871, "step": 5485 }, { "epoch": 0.3717085482463941, "grad_norm": 3.4654994010925293, "learning_rate": 0.0003740690032858708, "loss": 0.7636, "step": 5486 }, { "epoch": 0.3717763040882181, "grad_norm": 3.1460530757904053, "learning_rate": 0.0003740635268346112, "loss": 1.0312, "step": 5487 }, { "epoch": 0.3718440599300421, "grad_norm": 2.5835204124450684, "learning_rate": 0.0003740580503833516, "loss": 0.7095, "step": 5488 }, { "epoch": 0.3719118157718661, "grad_norm": 4.138077259063721, "learning_rate": 0.000374052573932092, "loss": 0.9388, "step": 5489 }, { "epoch": 0.37197957161369005, "grad_norm": 4.1887593269348145, "learning_rate": 0.0003740470974808324, "loss": 0.8457, "step": 5490 }, { "epoch": 0.37204732745551405, "grad_norm": 3.561706304550171, "learning_rate": 0.00037404162102957285, "loss": 1.0036, "step": 5491 }, { "epoch": 0.37211508329733806, "grad_norm": 2.733820915222168, "learning_rate": 0.0003740361445783133, "loss": 0.6091, "step": 5492 }, { "epoch": 0.372182839139162, "grad_norm": 4.387457370758057, "learning_rate": 0.0003740306681270537, "loss": 0.9384, "step": 5493 }, { "epoch": 0.372250594980986, "grad_norm": 3.014604330062866, "learning_rate": 0.0003740251916757941, "loss": 0.9135, "step": 5494 }, { "epoch": 0.37231835082281, "grad_norm": 3.102618455886841, "learning_rate": 0.0003740197152245345, "loss": 1.0101, "step": 5495 }, { "epoch": 0.372386106664634, "grad_norm": 3.0674660205841064, "learning_rate": 0.0003740142387732749, "loss": 0.8359, "step": 5496 }, { "epoch": 0.37245386250645796, "grad_norm": 3.7825260162353516, "learning_rate": 0.00037400876232201536, "loss": 0.8308, "step": 5497 }, { "epoch": 0.37252161834828196, "grad_norm": 3.262922525405884, "learning_rate": 0.00037400328587075575, "loss": 0.9257, "step": 5498 }, { "epoch": 0.37258937419010596, "grad_norm": 3.3529722690582275, "learning_rate": 0.0003739978094194962, "loss": 0.747, "step": 5499 }, { "epoch": 0.37265713003192996, "grad_norm": 2.788849115371704, "learning_rate": 0.0003739923329682366, "loss": 0.7855, "step": 5500 }, { "epoch": 0.3727248858737539, "grad_norm": 2.797544240951538, "learning_rate": 0.000373986856516977, "loss": 0.695, "step": 5501 }, { "epoch": 0.3727926417155779, "grad_norm": 2.7967660427093506, "learning_rate": 0.00037398138006571746, "loss": 0.9259, "step": 5502 }, { "epoch": 0.3728603975574019, "grad_norm": 3.873281478881836, "learning_rate": 0.00037397590361445786, "loss": 1.0934, "step": 5503 }, { "epoch": 0.3729281533992259, "grad_norm": 3.830362558364868, "learning_rate": 0.00037397042716319826, "loss": 0.9681, "step": 5504 }, { "epoch": 0.37299590924104986, "grad_norm": 3.5410654544830322, "learning_rate": 0.00037396495071193866, "loss": 0.9459, "step": 5505 }, { "epoch": 0.37306366508287386, "grad_norm": 3.721290349960327, "learning_rate": 0.00037395947426067906, "loss": 0.9818, "step": 5506 }, { "epoch": 0.37313142092469787, "grad_norm": 3.9920461177825928, "learning_rate": 0.0003739539978094195, "loss": 1.0779, "step": 5507 }, { "epoch": 0.3731991767665218, "grad_norm": 6.7537031173706055, "learning_rate": 0.00037394852135815996, "loss": 1.0013, "step": 5508 }, { "epoch": 0.3732669326083458, "grad_norm": 3.580101490020752, "learning_rate": 0.00037394304490690036, "loss": 0.8734, "step": 5509 }, { "epoch": 0.3733346884501698, "grad_norm": 4.863016128540039, "learning_rate": 0.00037393756845564076, "loss": 1.2528, "step": 5510 }, { "epoch": 0.3734024442919938, "grad_norm": 4.829370975494385, "learning_rate": 0.00037393209200438116, "loss": 1.1107, "step": 5511 }, { "epoch": 0.37347020013381776, "grad_norm": 3.791393280029297, "learning_rate": 0.00037392661555312156, "loss": 1.0733, "step": 5512 }, { "epoch": 0.37353795597564177, "grad_norm": 4.297725200653076, "learning_rate": 0.000373921139101862, "loss": 1.1601, "step": 5513 }, { "epoch": 0.37360571181746577, "grad_norm": 3.2719030380249023, "learning_rate": 0.00037391566265060247, "loss": 0.8056, "step": 5514 }, { "epoch": 0.37367346765928977, "grad_norm": 2.9165515899658203, "learning_rate": 0.00037391018619934287, "loss": 0.8524, "step": 5515 }, { "epoch": 0.3737412235011137, "grad_norm": 2.6887080669403076, "learning_rate": 0.00037390470974808327, "loss": 0.7772, "step": 5516 }, { "epoch": 0.3738089793429377, "grad_norm": 2.5748469829559326, "learning_rate": 0.00037389923329682367, "loss": 0.7391, "step": 5517 }, { "epoch": 0.3738767351847617, "grad_norm": 3.5153889656066895, "learning_rate": 0.0003738937568455641, "loss": 0.9017, "step": 5518 }, { "epoch": 0.3739444910265857, "grad_norm": 2.869889974594116, "learning_rate": 0.0003738882803943045, "loss": 0.7319, "step": 5519 }, { "epoch": 0.37401224686840967, "grad_norm": 4.706810474395752, "learning_rate": 0.0003738828039430449, "loss": 1.1458, "step": 5520 }, { "epoch": 0.37408000271023367, "grad_norm": 2.921612501144409, "learning_rate": 0.0003738773274917853, "loss": 0.6987, "step": 5521 }, { "epoch": 0.3741477585520577, "grad_norm": 3.7244222164154053, "learning_rate": 0.00037387185104052577, "loss": 1.0592, "step": 5522 }, { "epoch": 0.3742155143938816, "grad_norm": 4.219874382019043, "learning_rate": 0.00037386637458926617, "loss": 1.1761, "step": 5523 }, { "epoch": 0.3742832702357056, "grad_norm": 2.658328056335449, "learning_rate": 0.0003738608981380066, "loss": 0.6189, "step": 5524 }, { "epoch": 0.3743510260775296, "grad_norm": 3.1813385486602783, "learning_rate": 0.000373855421686747, "loss": 0.997, "step": 5525 }, { "epoch": 0.3744187819193536, "grad_norm": 3.6119306087493896, "learning_rate": 0.0003738499452354874, "loss": 0.9174, "step": 5526 }, { "epoch": 0.3744865377611776, "grad_norm": 3.692286968231201, "learning_rate": 0.0003738444687842278, "loss": 1.0469, "step": 5527 }, { "epoch": 0.3745542936030016, "grad_norm": 2.803877353668213, "learning_rate": 0.0003738389923329682, "loss": 0.9497, "step": 5528 }, { "epoch": 0.3746220494448256, "grad_norm": 4.18275785446167, "learning_rate": 0.00037383351588170867, "loss": 0.8474, "step": 5529 }, { "epoch": 0.3746898052866496, "grad_norm": 3.7972629070281982, "learning_rate": 0.0003738280394304491, "loss": 1.2151, "step": 5530 }, { "epoch": 0.3747575611284735, "grad_norm": 6.474215030670166, "learning_rate": 0.0003738225629791895, "loss": 0.8396, "step": 5531 }, { "epoch": 0.37482531697029753, "grad_norm": 3.6295552253723145, "learning_rate": 0.0003738170865279299, "loss": 0.97, "step": 5532 }, { "epoch": 0.37489307281212153, "grad_norm": 3.1849606037139893, "learning_rate": 0.0003738116100766703, "loss": 1.1236, "step": 5533 }, { "epoch": 0.37496082865394553, "grad_norm": 3.3738596439361572, "learning_rate": 0.0003738061336254107, "loss": 0.9585, "step": 5534 }, { "epoch": 0.3750285844957695, "grad_norm": 3.738487482070923, "learning_rate": 0.0003738006571741512, "loss": 0.9774, "step": 5535 }, { "epoch": 0.3750963403375935, "grad_norm": 2.547308921813965, "learning_rate": 0.0003737951807228916, "loss": 0.8569, "step": 5536 }, { "epoch": 0.3751640961794175, "grad_norm": 2.788638114929199, "learning_rate": 0.000373789704271632, "loss": 0.7547, "step": 5537 }, { "epoch": 0.37523185202124143, "grad_norm": 3.2794029712677, "learning_rate": 0.00037378422782037243, "loss": 1.0575, "step": 5538 }, { "epoch": 0.37529960786306543, "grad_norm": 2.6530280113220215, "learning_rate": 0.00037377875136911283, "loss": 0.7482, "step": 5539 }, { "epoch": 0.37536736370488943, "grad_norm": 3.719312906265259, "learning_rate": 0.0003737732749178533, "loss": 1.0847, "step": 5540 }, { "epoch": 0.37543511954671344, "grad_norm": 3.6388256549835205, "learning_rate": 0.0003737677984665937, "loss": 0.8735, "step": 5541 }, { "epoch": 0.3755028753885374, "grad_norm": 3.6545653343200684, "learning_rate": 0.0003737623220153341, "loss": 1.224, "step": 5542 }, { "epoch": 0.3755706312303614, "grad_norm": 3.2416560649871826, "learning_rate": 0.0003737568455640745, "loss": 0.8311, "step": 5543 }, { "epoch": 0.3756383870721854, "grad_norm": 3.089407205581665, "learning_rate": 0.0003737513691128149, "loss": 0.8562, "step": 5544 }, { "epoch": 0.3757061429140094, "grad_norm": 5.180947303771973, "learning_rate": 0.00037374589266155533, "loss": 0.7639, "step": 5545 }, { "epoch": 0.37577389875583334, "grad_norm": 3.296557903289795, "learning_rate": 0.0003737404162102958, "loss": 0.9206, "step": 5546 }, { "epoch": 0.37584165459765734, "grad_norm": 2.9241280555725098, "learning_rate": 0.0003737349397590362, "loss": 0.8554, "step": 5547 }, { "epoch": 0.37590941043948134, "grad_norm": 4.719516277313232, "learning_rate": 0.0003737294633077766, "loss": 0.9596, "step": 5548 }, { "epoch": 0.37597716628130534, "grad_norm": 2.9782066345214844, "learning_rate": 0.000373723986856517, "loss": 1.0355, "step": 5549 }, { "epoch": 0.3760449221231293, "grad_norm": 3.389206647872925, "learning_rate": 0.0003737185104052574, "loss": 0.9359, "step": 5550 }, { "epoch": 0.3761126779649533, "grad_norm": 3.7610385417938232, "learning_rate": 0.00037371303395399783, "loss": 1.0111, "step": 5551 }, { "epoch": 0.3761804338067773, "grad_norm": 2.892889976501465, "learning_rate": 0.00037370755750273823, "loss": 0.804, "step": 5552 }, { "epoch": 0.37624818964860124, "grad_norm": 2.940634250640869, "learning_rate": 0.0003737020810514787, "loss": 0.7339, "step": 5553 }, { "epoch": 0.37631594549042524, "grad_norm": 3.5232269763946533, "learning_rate": 0.0003736966046002191, "loss": 0.9974, "step": 5554 }, { "epoch": 0.37638370133224924, "grad_norm": 3.779869794845581, "learning_rate": 0.0003736911281489595, "loss": 1.0366, "step": 5555 }, { "epoch": 0.37645145717407325, "grad_norm": 3.4162049293518066, "learning_rate": 0.00037368565169769994, "loss": 0.8846, "step": 5556 }, { "epoch": 0.3765192130158972, "grad_norm": 3.6522111892700195, "learning_rate": 0.00037368017524644034, "loss": 0.7742, "step": 5557 }, { "epoch": 0.3765869688577212, "grad_norm": 3.7084481716156006, "learning_rate": 0.00037367469879518074, "loss": 0.8717, "step": 5558 }, { "epoch": 0.3766547246995452, "grad_norm": 3.307502269744873, "learning_rate": 0.00037366922234392114, "loss": 0.9685, "step": 5559 }, { "epoch": 0.3767224805413692, "grad_norm": 3.845003843307495, "learning_rate": 0.00037366374589266154, "loss": 1.1354, "step": 5560 }, { "epoch": 0.37679023638319314, "grad_norm": 3.344883441925049, "learning_rate": 0.000373658269441402, "loss": 0.7374, "step": 5561 }, { "epoch": 0.37685799222501715, "grad_norm": 4.12806510925293, "learning_rate": 0.00037365279299014244, "loss": 0.9341, "step": 5562 }, { "epoch": 0.37692574806684115, "grad_norm": 4.024311542510986, "learning_rate": 0.00037364731653888284, "loss": 0.8937, "step": 5563 }, { "epoch": 0.37699350390866515, "grad_norm": 3.3960378170013428, "learning_rate": 0.00037364184008762324, "loss": 1.0191, "step": 5564 }, { "epoch": 0.3770612597504891, "grad_norm": 3.6756842136383057, "learning_rate": 0.00037363636363636364, "loss": 0.888, "step": 5565 }, { "epoch": 0.3771290155923131, "grad_norm": 4.734492778778076, "learning_rate": 0.00037363088718510404, "loss": 1.046, "step": 5566 }, { "epoch": 0.3771967714341371, "grad_norm": 4.220418453216553, "learning_rate": 0.0003736254107338445, "loss": 0.9391, "step": 5567 }, { "epoch": 0.37726452727596105, "grad_norm": 3.47002911567688, "learning_rate": 0.0003736199342825849, "loss": 0.9805, "step": 5568 }, { "epoch": 0.37733228311778505, "grad_norm": 2.759300947189331, "learning_rate": 0.00037361445783132535, "loss": 0.6234, "step": 5569 }, { "epoch": 0.37740003895960905, "grad_norm": 3.5229220390319824, "learning_rate": 0.00037360898138006575, "loss": 0.8086, "step": 5570 }, { "epoch": 0.37746779480143305, "grad_norm": 3.5099215507507324, "learning_rate": 0.00037360350492880614, "loss": 0.933, "step": 5571 }, { "epoch": 0.377535550643257, "grad_norm": 3.489349603652954, "learning_rate": 0.00037359802847754654, "loss": 0.9085, "step": 5572 }, { "epoch": 0.377603306485081, "grad_norm": 3.5174615383148193, "learning_rate": 0.000373592552026287, "loss": 0.8382, "step": 5573 }, { "epoch": 0.377671062326905, "grad_norm": 3.366302013397217, "learning_rate": 0.0003735870755750274, "loss": 0.9052, "step": 5574 }, { "epoch": 0.377738818168729, "grad_norm": 2.774963140487671, "learning_rate": 0.0003735815991237678, "loss": 0.8397, "step": 5575 }, { "epoch": 0.37780657401055295, "grad_norm": 3.29779052734375, "learning_rate": 0.0003735761226725082, "loss": 0.8412, "step": 5576 }, { "epoch": 0.37787432985237696, "grad_norm": 3.1824560165405273, "learning_rate": 0.00037357064622124865, "loss": 0.8339, "step": 5577 }, { "epoch": 0.37794208569420096, "grad_norm": 3.428473949432373, "learning_rate": 0.0003735651697699891, "loss": 0.8713, "step": 5578 }, { "epoch": 0.37800984153602496, "grad_norm": 3.568931818008423, "learning_rate": 0.0003735596933187295, "loss": 0.9415, "step": 5579 }, { "epoch": 0.3780775973778489, "grad_norm": 3.965294599533081, "learning_rate": 0.0003735542168674699, "loss": 0.9982, "step": 5580 }, { "epoch": 0.3781453532196729, "grad_norm": 2.309150218963623, "learning_rate": 0.0003735487404162103, "loss": 0.6185, "step": 5581 }, { "epoch": 0.3782131090614969, "grad_norm": 3.1919145584106445, "learning_rate": 0.0003735432639649507, "loss": 0.6796, "step": 5582 }, { "epoch": 0.37828086490332086, "grad_norm": 5.1115899085998535, "learning_rate": 0.00037353778751369115, "loss": 0.8469, "step": 5583 }, { "epoch": 0.37834862074514486, "grad_norm": 3.6079094409942627, "learning_rate": 0.0003735323110624316, "loss": 0.8467, "step": 5584 }, { "epoch": 0.37841637658696886, "grad_norm": 4.780299663543701, "learning_rate": 0.000373526834611172, "loss": 0.9558, "step": 5585 }, { "epoch": 0.37848413242879286, "grad_norm": 3.5846643447875977, "learning_rate": 0.0003735213581599124, "loss": 0.7702, "step": 5586 }, { "epoch": 0.3785518882706168, "grad_norm": 4.912752151489258, "learning_rate": 0.0003735158817086528, "loss": 0.9127, "step": 5587 }, { "epoch": 0.3786196441124408, "grad_norm": 3.3448352813720703, "learning_rate": 0.0003735104052573932, "loss": 0.9817, "step": 5588 }, { "epoch": 0.3786873999542648, "grad_norm": 3.3351690769195557, "learning_rate": 0.00037350492880613366, "loss": 0.9062, "step": 5589 }, { "epoch": 0.3787551557960888, "grad_norm": 3.7072384357452393, "learning_rate": 0.00037349945235487405, "loss": 0.7023, "step": 5590 }, { "epoch": 0.37882291163791276, "grad_norm": 4.3971381187438965, "learning_rate": 0.00037349397590361445, "loss": 0.9517, "step": 5591 }, { "epoch": 0.37889066747973676, "grad_norm": 6.254388809204102, "learning_rate": 0.0003734884994523549, "loss": 0.8973, "step": 5592 }, { "epoch": 0.37895842332156077, "grad_norm": 3.522000551223755, "learning_rate": 0.0003734830230010953, "loss": 0.927, "step": 5593 }, { "epoch": 0.37902617916338477, "grad_norm": 2.875279664993286, "learning_rate": 0.00037347754654983576, "loss": 0.8155, "step": 5594 }, { "epoch": 0.3790939350052087, "grad_norm": 3.7094688415527344, "learning_rate": 0.00037347207009857616, "loss": 0.6219, "step": 5595 }, { "epoch": 0.3791616908470327, "grad_norm": 3.0379767417907715, "learning_rate": 0.00037346659364731656, "loss": 0.7695, "step": 5596 }, { "epoch": 0.3792294466888567, "grad_norm": 3.7309961318969727, "learning_rate": 0.00037346111719605696, "loss": 0.9306, "step": 5597 }, { "epoch": 0.37929720253068067, "grad_norm": 2.9712741374969482, "learning_rate": 0.00037345564074479736, "loss": 0.7869, "step": 5598 }, { "epoch": 0.37936495837250467, "grad_norm": 4.215527057647705, "learning_rate": 0.0003734501642935378, "loss": 0.6491, "step": 5599 }, { "epoch": 0.37943271421432867, "grad_norm": 3.4577810764312744, "learning_rate": 0.00037344468784227826, "loss": 0.9312, "step": 5600 }, { "epoch": 0.3795004700561527, "grad_norm": 3.542017936706543, "learning_rate": 0.00037343921139101866, "loss": 0.6716, "step": 5601 }, { "epoch": 0.3795682258979766, "grad_norm": 3.842251777648926, "learning_rate": 0.00037343373493975906, "loss": 0.7903, "step": 5602 }, { "epoch": 0.3796359817398006, "grad_norm": 3.128040313720703, "learning_rate": 0.00037342825848849946, "loss": 0.752, "step": 5603 }, { "epoch": 0.3797037375816246, "grad_norm": 3.016474723815918, "learning_rate": 0.00037342278203723986, "loss": 0.9372, "step": 5604 }, { "epoch": 0.3797714934234486, "grad_norm": 4.050227165222168, "learning_rate": 0.0003734173055859803, "loss": 0.7427, "step": 5605 }, { "epoch": 0.37983924926527257, "grad_norm": 3.008732795715332, "learning_rate": 0.0003734118291347207, "loss": 0.7857, "step": 5606 }, { "epoch": 0.3799070051070966, "grad_norm": 3.6325457096099854, "learning_rate": 0.0003734063526834611, "loss": 0.9834, "step": 5607 }, { "epoch": 0.3799747609489206, "grad_norm": 2.5838451385498047, "learning_rate": 0.00037340087623220157, "loss": 0.7176, "step": 5608 }, { "epoch": 0.3800425167907446, "grad_norm": 3.0087196826934814, "learning_rate": 0.00037339539978094196, "loss": 0.8014, "step": 5609 }, { "epoch": 0.3801102726325685, "grad_norm": 3.51365065574646, "learning_rate": 0.00037338992332968236, "loss": 1.0851, "step": 5610 }, { "epoch": 0.3801780284743925, "grad_norm": 2.4896225929260254, "learning_rate": 0.0003733844468784228, "loss": 0.6299, "step": 5611 }, { "epoch": 0.38024578431621653, "grad_norm": 3.380060911178589, "learning_rate": 0.0003733789704271632, "loss": 0.8686, "step": 5612 }, { "epoch": 0.3803135401580405, "grad_norm": 3.420835256576538, "learning_rate": 0.0003733734939759036, "loss": 0.9346, "step": 5613 }, { "epoch": 0.3803812959998645, "grad_norm": 3.3178863525390625, "learning_rate": 0.000373368017524644, "loss": 0.8398, "step": 5614 }, { "epoch": 0.3804490518416885, "grad_norm": 2.9064102172851562, "learning_rate": 0.00037336254107338447, "loss": 0.8714, "step": 5615 }, { "epoch": 0.3805168076835125, "grad_norm": 3.730821132659912, "learning_rate": 0.0003733570646221249, "loss": 0.7602, "step": 5616 }, { "epoch": 0.38058456352533643, "grad_norm": 3.483551263809204, "learning_rate": 0.0003733515881708653, "loss": 1.0404, "step": 5617 }, { "epoch": 0.38065231936716043, "grad_norm": 3.445470094680786, "learning_rate": 0.0003733461117196057, "loss": 0.8755, "step": 5618 }, { "epoch": 0.38072007520898443, "grad_norm": 4.128312587738037, "learning_rate": 0.0003733406352683461, "loss": 0.8568, "step": 5619 }, { "epoch": 0.38078783105080843, "grad_norm": 4.003019332885742, "learning_rate": 0.0003733351588170865, "loss": 1.0717, "step": 5620 }, { "epoch": 0.3808555868926324, "grad_norm": 6.26792573928833, "learning_rate": 0.00037332968236582697, "loss": 0.6769, "step": 5621 }, { "epoch": 0.3809233427344564, "grad_norm": 3.7117669582366943, "learning_rate": 0.00037332420591456737, "loss": 0.7317, "step": 5622 }, { "epoch": 0.3809910985762804, "grad_norm": 3.2926225662231445, "learning_rate": 0.0003733187294633078, "loss": 1.1271, "step": 5623 }, { "epoch": 0.3810588544181044, "grad_norm": 2.5908968448638916, "learning_rate": 0.0003733132530120482, "loss": 0.7378, "step": 5624 }, { "epoch": 0.38112661025992833, "grad_norm": 4.366575717926025, "learning_rate": 0.0003733077765607886, "loss": 0.8962, "step": 5625 }, { "epoch": 0.38119436610175234, "grad_norm": 3.64981746673584, "learning_rate": 0.000373302300109529, "loss": 0.9104, "step": 5626 }, { "epoch": 0.38126212194357634, "grad_norm": 2.8721959590911865, "learning_rate": 0.0003732968236582695, "loss": 0.837, "step": 5627 }, { "epoch": 0.3813298777854003, "grad_norm": 2.958799362182617, "learning_rate": 0.0003732913472070099, "loss": 0.8567, "step": 5628 }, { "epoch": 0.3813976336272243, "grad_norm": 3.253523349761963, "learning_rate": 0.0003732858707557503, "loss": 0.8906, "step": 5629 }, { "epoch": 0.3814653894690483, "grad_norm": 5.040193557739258, "learning_rate": 0.0003732803943044907, "loss": 0.9656, "step": 5630 }, { "epoch": 0.3815331453108723, "grad_norm": 3.2559962272644043, "learning_rate": 0.00037327491785323113, "loss": 0.8748, "step": 5631 }, { "epoch": 0.38160090115269624, "grad_norm": 3.49210524559021, "learning_rate": 0.0003732694414019716, "loss": 1.0954, "step": 5632 }, { "epoch": 0.38166865699452024, "grad_norm": 4.060378074645996, "learning_rate": 0.000373263964950712, "loss": 1.1031, "step": 5633 }, { "epoch": 0.38173641283634424, "grad_norm": 3.0602505207061768, "learning_rate": 0.0003732584884994524, "loss": 0.7949, "step": 5634 }, { "epoch": 0.38180416867816824, "grad_norm": 5.676823616027832, "learning_rate": 0.0003732530120481928, "loss": 1.0016, "step": 5635 }, { "epoch": 0.3818719245199922, "grad_norm": 3.5235233306884766, "learning_rate": 0.0003732475355969332, "loss": 1.0212, "step": 5636 }, { "epoch": 0.3819396803618162, "grad_norm": 3.4950430393218994, "learning_rate": 0.00037324205914567363, "loss": 0.883, "step": 5637 }, { "epoch": 0.3820074362036402, "grad_norm": 3.5332722663879395, "learning_rate": 0.00037323658269441403, "loss": 0.742, "step": 5638 }, { "epoch": 0.3820751920454642, "grad_norm": 4.886946678161621, "learning_rate": 0.0003732311062431545, "loss": 0.7946, "step": 5639 }, { "epoch": 0.38214294788728814, "grad_norm": 3.615126132965088, "learning_rate": 0.0003732256297918949, "loss": 1.1047, "step": 5640 }, { "epoch": 0.38221070372911214, "grad_norm": 6.098995685577393, "learning_rate": 0.0003732201533406353, "loss": 1.2313, "step": 5641 }, { "epoch": 0.38227845957093615, "grad_norm": 3.969189167022705, "learning_rate": 0.0003732146768893757, "loss": 1.0219, "step": 5642 }, { "epoch": 0.3823462154127601, "grad_norm": 6.572059631347656, "learning_rate": 0.00037320920043811613, "loss": 1.0149, "step": 5643 }, { "epoch": 0.3824139712545841, "grad_norm": 3.780245065689087, "learning_rate": 0.00037320372398685653, "loss": 1.0238, "step": 5644 }, { "epoch": 0.3824817270964081, "grad_norm": 5.2216386795043945, "learning_rate": 0.00037319824753559693, "loss": 1.0756, "step": 5645 }, { "epoch": 0.3825494829382321, "grad_norm": 4.310647487640381, "learning_rate": 0.0003731927710843374, "loss": 0.7134, "step": 5646 }, { "epoch": 0.38261723878005605, "grad_norm": 3.6789400577545166, "learning_rate": 0.0003731872946330778, "loss": 0.8476, "step": 5647 }, { "epoch": 0.38268499462188005, "grad_norm": 2.946244239807129, "learning_rate": 0.0003731818181818182, "loss": 0.7414, "step": 5648 }, { "epoch": 0.38275275046370405, "grad_norm": 4.926070690155029, "learning_rate": 0.00037317634173055864, "loss": 0.8437, "step": 5649 }, { "epoch": 0.38282050630552805, "grad_norm": 5.091270446777344, "learning_rate": 0.00037317086527929904, "loss": 1.1829, "step": 5650 }, { "epoch": 0.382888262147352, "grad_norm": 3.142005205154419, "learning_rate": 0.00037316538882803944, "loss": 0.8715, "step": 5651 }, { "epoch": 0.382956017989176, "grad_norm": 3.9134702682495117, "learning_rate": 0.00037315991237677984, "loss": 0.8303, "step": 5652 }, { "epoch": 0.383023773831, "grad_norm": 3.545004367828369, "learning_rate": 0.0003731544359255203, "loss": 0.8482, "step": 5653 }, { "epoch": 0.383091529672824, "grad_norm": 4.35734224319458, "learning_rate": 0.00037314895947426074, "loss": 0.8891, "step": 5654 }, { "epoch": 0.38315928551464795, "grad_norm": 3.227043628692627, "learning_rate": 0.00037314348302300114, "loss": 0.923, "step": 5655 }, { "epoch": 0.38322704135647195, "grad_norm": 4.997923851013184, "learning_rate": 0.00037313800657174154, "loss": 0.9333, "step": 5656 }, { "epoch": 0.38329479719829596, "grad_norm": 2.6297287940979004, "learning_rate": 0.00037313253012048194, "loss": 0.7711, "step": 5657 }, { "epoch": 0.3833625530401199, "grad_norm": 3.1763811111450195, "learning_rate": 0.00037312705366922234, "loss": 0.8466, "step": 5658 }, { "epoch": 0.3834303088819439, "grad_norm": 4.1992316246032715, "learning_rate": 0.0003731215772179628, "loss": 0.8186, "step": 5659 }, { "epoch": 0.3834980647237679, "grad_norm": 3.621764898300171, "learning_rate": 0.0003731161007667032, "loss": 0.8232, "step": 5660 }, { "epoch": 0.3835658205655919, "grad_norm": 3.205979585647583, "learning_rate": 0.0003731106243154436, "loss": 0.8545, "step": 5661 }, { "epoch": 0.38363357640741585, "grad_norm": 3.577003240585327, "learning_rate": 0.00037310514786418404, "loss": 0.7594, "step": 5662 }, { "epoch": 0.38370133224923986, "grad_norm": 4.670361518859863, "learning_rate": 0.00037309967141292444, "loss": 1.0836, "step": 5663 }, { "epoch": 0.38376908809106386, "grad_norm": 4.350311279296875, "learning_rate": 0.00037309419496166484, "loss": 1.0642, "step": 5664 }, { "epoch": 0.38383684393288786, "grad_norm": 3.1789393424987793, "learning_rate": 0.0003730887185104053, "loss": 0.782, "step": 5665 }, { "epoch": 0.3839045997747118, "grad_norm": 4.468333721160889, "learning_rate": 0.0003730832420591457, "loss": 1.1986, "step": 5666 }, { "epoch": 0.3839723556165358, "grad_norm": 3.1480653285980225, "learning_rate": 0.0003730777656078861, "loss": 0.8434, "step": 5667 }, { "epoch": 0.3840401114583598, "grad_norm": 5.948314666748047, "learning_rate": 0.0003730722891566265, "loss": 1.0219, "step": 5668 }, { "epoch": 0.3841078673001838, "grad_norm": 3.7166264057159424, "learning_rate": 0.0003730668127053669, "loss": 0.916, "step": 5669 }, { "epoch": 0.38417562314200776, "grad_norm": 2.8507208824157715, "learning_rate": 0.0003730613362541074, "loss": 0.8128, "step": 5670 }, { "epoch": 0.38424337898383176, "grad_norm": 4.266461372375488, "learning_rate": 0.0003730558598028478, "loss": 1.1186, "step": 5671 }, { "epoch": 0.38431113482565576, "grad_norm": 3.010840892791748, "learning_rate": 0.0003730503833515882, "loss": 0.7582, "step": 5672 }, { "epoch": 0.3843788906674797, "grad_norm": 4.067538738250732, "learning_rate": 0.0003730449069003286, "loss": 1.1703, "step": 5673 }, { "epoch": 0.3844466465093037, "grad_norm": 2.8814852237701416, "learning_rate": 0.000373039430449069, "loss": 0.907, "step": 5674 }, { "epoch": 0.3845144023511277, "grad_norm": 3.1572916507720947, "learning_rate": 0.00037303395399780945, "loss": 0.7624, "step": 5675 }, { "epoch": 0.3845821581929517, "grad_norm": 2.9414420127868652, "learning_rate": 0.00037302847754654985, "loss": 1.0256, "step": 5676 }, { "epoch": 0.38464991403477566, "grad_norm": 3.5407488346099854, "learning_rate": 0.00037302300109529025, "loss": 1.0413, "step": 5677 }, { "epoch": 0.38471766987659967, "grad_norm": 2.99794602394104, "learning_rate": 0.0003730175246440307, "loss": 0.954, "step": 5678 }, { "epoch": 0.38478542571842367, "grad_norm": 3.8192763328552246, "learning_rate": 0.0003730120481927711, "loss": 0.7777, "step": 5679 }, { "epoch": 0.38485318156024767, "grad_norm": 3.6395955085754395, "learning_rate": 0.0003730065717415115, "loss": 1.1653, "step": 5680 }, { "epoch": 0.3849209374020716, "grad_norm": 2.4381673336029053, "learning_rate": 0.00037300109529025196, "loss": 0.7761, "step": 5681 }, { "epoch": 0.3849886932438956, "grad_norm": 2.9364173412323, "learning_rate": 0.00037299561883899235, "loss": 0.8563, "step": 5682 }, { "epoch": 0.3850564490857196, "grad_norm": 3.56234073638916, "learning_rate": 0.00037299014238773275, "loss": 0.8346, "step": 5683 }, { "epoch": 0.3851242049275436, "grad_norm": 4.432115077972412, "learning_rate": 0.00037298466593647315, "loss": 0.8241, "step": 5684 }, { "epoch": 0.38519196076936757, "grad_norm": 3.4827826023101807, "learning_rate": 0.0003729791894852136, "loss": 1.1644, "step": 5685 }, { "epoch": 0.38525971661119157, "grad_norm": 2.5738203525543213, "learning_rate": 0.000372973713033954, "loss": 0.7677, "step": 5686 }, { "epoch": 0.3853274724530156, "grad_norm": 2.9560251235961914, "learning_rate": 0.00037296823658269446, "loss": 0.7703, "step": 5687 }, { "epoch": 0.3853952282948395, "grad_norm": 2.6398942470550537, "learning_rate": 0.00037296276013143486, "loss": 0.7792, "step": 5688 }, { "epoch": 0.3854629841366635, "grad_norm": 3.272855281829834, "learning_rate": 0.00037295728368017526, "loss": 0.8291, "step": 5689 }, { "epoch": 0.3855307399784875, "grad_norm": 3.286444664001465, "learning_rate": 0.00037295180722891566, "loss": 1.0518, "step": 5690 }, { "epoch": 0.3855984958203115, "grad_norm": 3.5332088470458984, "learning_rate": 0.0003729463307776561, "loss": 0.8747, "step": 5691 }, { "epoch": 0.3856662516621355, "grad_norm": 3.6044111251831055, "learning_rate": 0.0003729408543263965, "loss": 0.8392, "step": 5692 }, { "epoch": 0.3857340075039595, "grad_norm": 3.5607593059539795, "learning_rate": 0.00037293537787513696, "loss": 0.9251, "step": 5693 }, { "epoch": 0.3858017633457835, "grad_norm": 3.8145382404327393, "learning_rate": 0.00037292990142387736, "loss": 0.7078, "step": 5694 }, { "epoch": 0.3858695191876075, "grad_norm": 4.276382923126221, "learning_rate": 0.00037292442497261776, "loss": 1.0978, "step": 5695 }, { "epoch": 0.3859372750294314, "grad_norm": 3.030289888381958, "learning_rate": 0.00037291894852135816, "loss": 0.945, "step": 5696 }, { "epoch": 0.38600503087125543, "grad_norm": 3.5261635780334473, "learning_rate": 0.0003729134720700986, "loss": 1.0455, "step": 5697 }, { "epoch": 0.38607278671307943, "grad_norm": 6.240113735198975, "learning_rate": 0.000372907995618839, "loss": 0.8996, "step": 5698 }, { "epoch": 0.38614054255490343, "grad_norm": 3.4179487228393555, "learning_rate": 0.0003729025191675794, "loss": 1.0081, "step": 5699 }, { "epoch": 0.3862082983967274, "grad_norm": 5.238987922668457, "learning_rate": 0.0003728970427163198, "loss": 0.8719, "step": 5700 }, { "epoch": 0.3862760542385514, "grad_norm": 2.6136162281036377, "learning_rate": 0.00037289156626506026, "loss": 0.7753, "step": 5701 }, { "epoch": 0.3863438100803754, "grad_norm": 3.8824832439422607, "learning_rate": 0.00037288608981380066, "loss": 1.0596, "step": 5702 }, { "epoch": 0.38641156592219933, "grad_norm": 3.1032378673553467, "learning_rate": 0.0003728806133625411, "loss": 0.8171, "step": 5703 }, { "epoch": 0.38647932176402333, "grad_norm": 3.6826374530792236, "learning_rate": 0.0003728751369112815, "loss": 0.9321, "step": 5704 }, { "epoch": 0.38654707760584733, "grad_norm": 6.439670085906982, "learning_rate": 0.0003728696604600219, "loss": 0.943, "step": 5705 }, { "epoch": 0.38661483344767134, "grad_norm": 2.6833062171936035, "learning_rate": 0.0003728641840087623, "loss": 0.7379, "step": 5706 }, { "epoch": 0.3866825892894953, "grad_norm": 4.6848883628845215, "learning_rate": 0.0003728587075575027, "loss": 1.1416, "step": 5707 }, { "epoch": 0.3867503451313193, "grad_norm": 6.286462306976318, "learning_rate": 0.00037285323110624317, "loss": 0.9652, "step": 5708 }, { "epoch": 0.3868181009731433, "grad_norm": 4.6423563957214355, "learning_rate": 0.0003728477546549836, "loss": 1.1057, "step": 5709 }, { "epoch": 0.3868858568149673, "grad_norm": 3.2530059814453125, "learning_rate": 0.000372842278203724, "loss": 0.7107, "step": 5710 }, { "epoch": 0.38695361265679123, "grad_norm": 4.687932014465332, "learning_rate": 0.0003728368017524644, "loss": 0.7853, "step": 5711 }, { "epoch": 0.38702136849861524, "grad_norm": 6.128671646118164, "learning_rate": 0.0003728313253012048, "loss": 1.1379, "step": 5712 }, { "epoch": 0.38708912434043924, "grad_norm": 3.3019657135009766, "learning_rate": 0.00037282584884994527, "loss": 1.0823, "step": 5713 }, { "epoch": 0.38715688018226324, "grad_norm": 2.9050681591033936, "learning_rate": 0.00037282037239868567, "loss": 0.6091, "step": 5714 }, { "epoch": 0.3872246360240872, "grad_norm": 3.31951642036438, "learning_rate": 0.00037281489594742607, "loss": 0.9607, "step": 5715 }, { "epoch": 0.3872923918659112, "grad_norm": 3.4114198684692383, "learning_rate": 0.0003728094194961665, "loss": 0.8778, "step": 5716 }, { "epoch": 0.3873601477077352, "grad_norm": 5.346656322479248, "learning_rate": 0.0003728039430449069, "loss": 0.9134, "step": 5717 }, { "epoch": 0.38742790354955914, "grad_norm": 5.175989627838135, "learning_rate": 0.0003727984665936473, "loss": 0.8612, "step": 5718 }, { "epoch": 0.38749565939138314, "grad_norm": 4.005059242248535, "learning_rate": 0.0003727929901423878, "loss": 0.9094, "step": 5719 }, { "epoch": 0.38756341523320714, "grad_norm": 3.8251962661743164, "learning_rate": 0.0003727875136911282, "loss": 0.8752, "step": 5720 }, { "epoch": 0.38763117107503114, "grad_norm": 4.607692241668701, "learning_rate": 0.0003727820372398686, "loss": 1.0929, "step": 5721 }, { "epoch": 0.3876989269168551, "grad_norm": 3.626004695892334, "learning_rate": 0.000372776560788609, "loss": 1.1465, "step": 5722 }, { "epoch": 0.3877666827586791, "grad_norm": 2.946340799331665, "learning_rate": 0.00037277108433734937, "loss": 0.9964, "step": 5723 }, { "epoch": 0.3878344386005031, "grad_norm": 3.730943441390991, "learning_rate": 0.0003727656078860898, "loss": 0.851, "step": 5724 }, { "epoch": 0.3879021944423271, "grad_norm": 2.4083893299102783, "learning_rate": 0.0003727601314348303, "loss": 0.8373, "step": 5725 }, { "epoch": 0.38796995028415104, "grad_norm": 3.1129889488220215, "learning_rate": 0.0003727546549835707, "loss": 0.9795, "step": 5726 }, { "epoch": 0.38803770612597505, "grad_norm": 4.731565952301025, "learning_rate": 0.0003727491785323111, "loss": 0.8841, "step": 5727 }, { "epoch": 0.38810546196779905, "grad_norm": 3.382277727127075, "learning_rate": 0.0003727437020810515, "loss": 1.0514, "step": 5728 }, { "epoch": 0.38817321780962305, "grad_norm": 3.9093406200408936, "learning_rate": 0.00037273822562979193, "loss": 1.0243, "step": 5729 }, { "epoch": 0.388240973651447, "grad_norm": 3.464470863342285, "learning_rate": 0.00037273274917853233, "loss": 0.8781, "step": 5730 }, { "epoch": 0.388308729493271, "grad_norm": 3.3356125354766846, "learning_rate": 0.00037272727272727273, "loss": 0.6655, "step": 5731 }, { "epoch": 0.388376485335095, "grad_norm": 3.4475388526916504, "learning_rate": 0.0003727217962760132, "loss": 1.0045, "step": 5732 }, { "epoch": 0.38844424117691895, "grad_norm": 3.695841073989868, "learning_rate": 0.0003727163198247536, "loss": 0.9133, "step": 5733 }, { "epoch": 0.38851199701874295, "grad_norm": 3.021550416946411, "learning_rate": 0.000372710843373494, "loss": 0.8447, "step": 5734 }, { "epoch": 0.38857975286056695, "grad_norm": 5.364546298980713, "learning_rate": 0.00037270536692223443, "loss": 0.9165, "step": 5735 }, { "epoch": 0.38864750870239095, "grad_norm": 3.6096067428588867, "learning_rate": 0.00037269989047097483, "loss": 0.9122, "step": 5736 }, { "epoch": 0.3887152645442149, "grad_norm": 3.0058515071868896, "learning_rate": 0.00037269441401971523, "loss": 0.8739, "step": 5737 }, { "epoch": 0.3887830203860389, "grad_norm": 3.42826509475708, "learning_rate": 0.00037268893756845563, "loss": 0.8761, "step": 5738 }, { "epoch": 0.3888507762278629, "grad_norm": 3.665440797805786, "learning_rate": 0.00037268346111719603, "loss": 1.0653, "step": 5739 }, { "epoch": 0.3889185320696869, "grad_norm": 4.829284191131592, "learning_rate": 0.0003726779846659365, "loss": 0.921, "step": 5740 }, { "epoch": 0.38898628791151085, "grad_norm": 3.3632285594940186, "learning_rate": 0.00037267250821467694, "loss": 0.9021, "step": 5741 }, { "epoch": 0.38905404375333486, "grad_norm": 3.5224595069885254, "learning_rate": 0.00037266703176341734, "loss": 1.0803, "step": 5742 }, { "epoch": 0.38912179959515886, "grad_norm": 4.732120990753174, "learning_rate": 0.00037266155531215774, "loss": 0.8234, "step": 5743 }, { "epoch": 0.38918955543698286, "grad_norm": 2.923804998397827, "learning_rate": 0.00037265607886089814, "loss": 0.7575, "step": 5744 }, { "epoch": 0.3892573112788068, "grad_norm": 3.083683729171753, "learning_rate": 0.00037265060240963853, "loss": 0.8861, "step": 5745 }, { "epoch": 0.3893250671206308, "grad_norm": 3.137216806411743, "learning_rate": 0.000372645125958379, "loss": 0.9119, "step": 5746 }, { "epoch": 0.3893928229624548, "grad_norm": 2.498845100402832, "learning_rate": 0.00037263964950711944, "loss": 0.666, "step": 5747 }, { "epoch": 0.38946057880427876, "grad_norm": 3.21086049079895, "learning_rate": 0.00037263417305585984, "loss": 0.8355, "step": 5748 }, { "epoch": 0.38952833464610276, "grad_norm": 3.269085168838501, "learning_rate": 0.00037262869660460024, "loss": 0.8251, "step": 5749 }, { "epoch": 0.38959609048792676, "grad_norm": 2.502598762512207, "learning_rate": 0.00037262322015334064, "loss": 0.6415, "step": 5750 }, { "epoch": 0.38966384632975076, "grad_norm": 4.170949935913086, "learning_rate": 0.0003726177437020811, "loss": 1.0144, "step": 5751 }, { "epoch": 0.3897316021715747, "grad_norm": 4.71359920501709, "learning_rate": 0.0003726122672508215, "loss": 0.786, "step": 5752 }, { "epoch": 0.3897993580133987, "grad_norm": 4.1418633460998535, "learning_rate": 0.0003726067907995619, "loss": 0.8491, "step": 5753 }, { "epoch": 0.3898671138552227, "grad_norm": 3.5643043518066406, "learning_rate": 0.0003726013143483023, "loss": 0.7447, "step": 5754 }, { "epoch": 0.3899348696970467, "grad_norm": 3.5562713146209717, "learning_rate": 0.00037259583789704274, "loss": 0.9139, "step": 5755 }, { "epoch": 0.39000262553887066, "grad_norm": 3.483555793762207, "learning_rate": 0.00037259036144578314, "loss": 1.0152, "step": 5756 }, { "epoch": 0.39007038138069466, "grad_norm": 6.950501918792725, "learning_rate": 0.0003725848849945236, "loss": 0.9365, "step": 5757 }, { "epoch": 0.39013813722251867, "grad_norm": 3.7641103267669678, "learning_rate": 0.000372579408543264, "loss": 0.7158, "step": 5758 }, { "epoch": 0.39020589306434267, "grad_norm": 3.6401641368865967, "learning_rate": 0.0003725739320920044, "loss": 1.0876, "step": 5759 }, { "epoch": 0.3902736489061666, "grad_norm": 3.292019844055176, "learning_rate": 0.0003725684556407448, "loss": 0.6374, "step": 5760 }, { "epoch": 0.3903414047479906, "grad_norm": 3.414107322692871, "learning_rate": 0.0003725629791894852, "loss": 0.9099, "step": 5761 }, { "epoch": 0.3904091605898146, "grad_norm": 3.3732047080993652, "learning_rate": 0.00037255750273822565, "loss": 0.9779, "step": 5762 }, { "epoch": 0.39047691643163857, "grad_norm": 2.8073513507843018, "learning_rate": 0.0003725520262869661, "loss": 0.8567, "step": 5763 }, { "epoch": 0.39054467227346257, "grad_norm": 3.440371036529541, "learning_rate": 0.0003725465498357065, "loss": 0.8538, "step": 5764 }, { "epoch": 0.39061242811528657, "grad_norm": 3.347675323486328, "learning_rate": 0.0003725410733844469, "loss": 1.0258, "step": 5765 }, { "epoch": 0.39068018395711057, "grad_norm": 3.6461119651794434, "learning_rate": 0.0003725355969331873, "loss": 1.049, "step": 5766 }, { "epoch": 0.3907479397989345, "grad_norm": 2.7625184059143066, "learning_rate": 0.00037253012048192775, "loss": 0.8637, "step": 5767 }, { "epoch": 0.3908156956407585, "grad_norm": 2.6958138942718506, "learning_rate": 0.00037252464403066815, "loss": 0.5733, "step": 5768 }, { "epoch": 0.3908834514825825, "grad_norm": 3.241187572479248, "learning_rate": 0.00037251916757940855, "loss": 0.7696, "step": 5769 }, { "epoch": 0.3909512073244065, "grad_norm": 3.9826819896698, "learning_rate": 0.00037251369112814895, "loss": 1.0034, "step": 5770 }, { "epoch": 0.39101896316623047, "grad_norm": 3.437929630279541, "learning_rate": 0.0003725082146768894, "loss": 0.8461, "step": 5771 }, { "epoch": 0.3910867190080545, "grad_norm": 7.222502708435059, "learning_rate": 0.0003725027382256298, "loss": 0.9428, "step": 5772 }, { "epoch": 0.3911544748498785, "grad_norm": 4.613614559173584, "learning_rate": 0.00037249726177437026, "loss": 0.8806, "step": 5773 }, { "epoch": 0.3912222306917025, "grad_norm": 3.01949405670166, "learning_rate": 0.00037249178532311065, "loss": 0.7964, "step": 5774 }, { "epoch": 0.3912899865335264, "grad_norm": 3.584259033203125, "learning_rate": 0.00037248630887185105, "loss": 0.9209, "step": 5775 }, { "epoch": 0.3913577423753504, "grad_norm": 6.051064491271973, "learning_rate": 0.00037248083242059145, "loss": 0.865, "step": 5776 }, { "epoch": 0.39142549821717443, "grad_norm": 3.1361780166625977, "learning_rate": 0.00037247535596933185, "loss": 0.94, "step": 5777 }, { "epoch": 0.3914932540589984, "grad_norm": 2.962134838104248, "learning_rate": 0.0003724698795180723, "loss": 0.892, "step": 5778 }, { "epoch": 0.3915610099008224, "grad_norm": 2.6165618896484375, "learning_rate": 0.00037246440306681276, "loss": 0.8609, "step": 5779 }, { "epoch": 0.3916287657426464, "grad_norm": 3.622157096862793, "learning_rate": 0.00037245892661555316, "loss": 0.9011, "step": 5780 }, { "epoch": 0.3916965215844704, "grad_norm": 3.1355724334716797, "learning_rate": 0.00037245345016429356, "loss": 0.7099, "step": 5781 }, { "epoch": 0.3917642774262943, "grad_norm": 3.647145986557007, "learning_rate": 0.00037244797371303396, "loss": 0.9337, "step": 5782 }, { "epoch": 0.39183203326811833, "grad_norm": 3.07231068611145, "learning_rate": 0.00037244249726177436, "loss": 0.7047, "step": 5783 }, { "epoch": 0.39189978910994233, "grad_norm": 5.120461463928223, "learning_rate": 0.0003724370208105148, "loss": 1.0706, "step": 5784 }, { "epoch": 0.39196754495176633, "grad_norm": 2.861295700073242, "learning_rate": 0.0003724315443592552, "loss": 0.7372, "step": 5785 }, { "epoch": 0.3920353007935903, "grad_norm": 3.3383100032806396, "learning_rate": 0.00037242606790799566, "loss": 0.7289, "step": 5786 }, { "epoch": 0.3921030566354143, "grad_norm": 4.556958198547363, "learning_rate": 0.00037242059145673606, "loss": 1.1215, "step": 5787 }, { "epoch": 0.3921708124772383, "grad_norm": 3.831092119216919, "learning_rate": 0.00037241511500547646, "loss": 0.8175, "step": 5788 }, { "epoch": 0.3922385683190623, "grad_norm": 4.112597942352295, "learning_rate": 0.0003724096385542169, "loss": 1.0193, "step": 5789 }, { "epoch": 0.39230632416088623, "grad_norm": 3.521785259246826, "learning_rate": 0.0003724041621029573, "loss": 1.0168, "step": 5790 }, { "epoch": 0.39237408000271023, "grad_norm": 2.49566388130188, "learning_rate": 0.0003723986856516977, "loss": 0.7231, "step": 5791 }, { "epoch": 0.39244183584453424, "grad_norm": 2.9293577671051025, "learning_rate": 0.0003723932092004381, "loss": 0.7756, "step": 5792 }, { "epoch": 0.3925095916863582, "grad_norm": 3.8611841201782227, "learning_rate": 0.0003723877327491785, "loss": 0.9672, "step": 5793 }, { "epoch": 0.3925773475281822, "grad_norm": 4.040246963500977, "learning_rate": 0.00037238225629791896, "loss": 1.0196, "step": 5794 }, { "epoch": 0.3926451033700062, "grad_norm": 3.3568320274353027, "learning_rate": 0.0003723767798466594, "loss": 0.8957, "step": 5795 }, { "epoch": 0.3927128592118302, "grad_norm": 3.4666435718536377, "learning_rate": 0.0003723713033953998, "loss": 0.7032, "step": 5796 }, { "epoch": 0.39278061505365414, "grad_norm": 3.8620541095733643, "learning_rate": 0.0003723658269441402, "loss": 0.8548, "step": 5797 }, { "epoch": 0.39284837089547814, "grad_norm": 2.527400255203247, "learning_rate": 0.0003723603504928806, "loss": 0.6974, "step": 5798 }, { "epoch": 0.39291612673730214, "grad_norm": 2.742635726928711, "learning_rate": 0.000372354874041621, "loss": 1.0059, "step": 5799 }, { "epoch": 0.39298388257912614, "grad_norm": 3.49054217338562, "learning_rate": 0.00037234939759036147, "loss": 0.9216, "step": 5800 }, { "epoch": 0.3930516384209501, "grad_norm": 3.7595481872558594, "learning_rate": 0.00037234392113910187, "loss": 0.9818, "step": 5801 }, { "epoch": 0.3931193942627741, "grad_norm": 4.02399206161499, "learning_rate": 0.0003723384446878423, "loss": 0.9121, "step": 5802 }, { "epoch": 0.3931871501045981, "grad_norm": 3.6168291568756104, "learning_rate": 0.0003723329682365827, "loss": 0.9445, "step": 5803 }, { "epoch": 0.3932549059464221, "grad_norm": 3.194375991821289, "learning_rate": 0.0003723274917853231, "loss": 0.9036, "step": 5804 }, { "epoch": 0.39332266178824604, "grad_norm": 3.561800003051758, "learning_rate": 0.00037232201533406357, "loss": 1.0688, "step": 5805 }, { "epoch": 0.39339041763007004, "grad_norm": 3.579075336456299, "learning_rate": 0.00037231653888280397, "loss": 0.9971, "step": 5806 }, { "epoch": 0.39345817347189405, "grad_norm": 5.061302661895752, "learning_rate": 0.00037231106243154437, "loss": 0.8831, "step": 5807 }, { "epoch": 0.393525929313718, "grad_norm": 3.374065637588501, "learning_rate": 0.00037230558598028477, "loss": 1.0021, "step": 5808 }, { "epoch": 0.393593685155542, "grad_norm": 4.774482250213623, "learning_rate": 0.0003723001095290252, "loss": 1.1168, "step": 5809 }, { "epoch": 0.393661440997366, "grad_norm": 4.330333232879639, "learning_rate": 0.0003722946330777656, "loss": 0.932, "step": 5810 }, { "epoch": 0.39372919683919, "grad_norm": 5.402697563171387, "learning_rate": 0.0003722891566265061, "loss": 0.9078, "step": 5811 }, { "epoch": 0.39379695268101395, "grad_norm": 2.760050058364868, "learning_rate": 0.0003722836801752465, "loss": 0.7882, "step": 5812 }, { "epoch": 0.39386470852283795, "grad_norm": 5.676943302154541, "learning_rate": 0.0003722782037239869, "loss": 0.778, "step": 5813 }, { "epoch": 0.39393246436466195, "grad_norm": 3.2505764961242676, "learning_rate": 0.0003722727272727273, "loss": 1.0126, "step": 5814 }, { "epoch": 0.39400022020648595, "grad_norm": 2.5747766494750977, "learning_rate": 0.00037226725082146767, "loss": 0.6086, "step": 5815 }, { "epoch": 0.3940679760483099, "grad_norm": 4.712752342224121, "learning_rate": 0.0003722617743702081, "loss": 0.9526, "step": 5816 }, { "epoch": 0.3941357318901339, "grad_norm": 4.472970962524414, "learning_rate": 0.0003722562979189486, "loss": 1.1122, "step": 5817 }, { "epoch": 0.3942034877319579, "grad_norm": 3.3717803955078125, "learning_rate": 0.000372250821467689, "loss": 0.9568, "step": 5818 }, { "epoch": 0.3942712435737819, "grad_norm": 3.590395450592041, "learning_rate": 0.0003722453450164294, "loss": 0.885, "step": 5819 }, { "epoch": 0.39433899941560585, "grad_norm": 3.200228214263916, "learning_rate": 0.0003722398685651698, "loss": 0.7318, "step": 5820 }, { "epoch": 0.39440675525742985, "grad_norm": 2.8622822761535645, "learning_rate": 0.0003722343921139102, "loss": 0.8197, "step": 5821 }, { "epoch": 0.39447451109925386, "grad_norm": 3.3089959621429443, "learning_rate": 0.00037222891566265063, "loss": 0.7608, "step": 5822 }, { "epoch": 0.3945422669410778, "grad_norm": 3.651536226272583, "learning_rate": 0.00037222343921139103, "loss": 0.997, "step": 5823 }, { "epoch": 0.3946100227829018, "grad_norm": 4.164370059967041, "learning_rate": 0.00037221796276013143, "loss": 1.0397, "step": 5824 }, { "epoch": 0.3946777786247258, "grad_norm": 3.5936367511749268, "learning_rate": 0.0003722124863088719, "loss": 1.0303, "step": 5825 }, { "epoch": 0.3947455344665498, "grad_norm": 3.6184372901916504, "learning_rate": 0.0003722070098576123, "loss": 1.0407, "step": 5826 }, { "epoch": 0.39481329030837375, "grad_norm": 6.376008987426758, "learning_rate": 0.00037220153340635273, "loss": 0.7717, "step": 5827 }, { "epoch": 0.39488104615019776, "grad_norm": 5.782670974731445, "learning_rate": 0.00037219605695509313, "loss": 1.0921, "step": 5828 }, { "epoch": 0.39494880199202176, "grad_norm": 3.4445223808288574, "learning_rate": 0.00037219058050383353, "loss": 0.7572, "step": 5829 }, { "epoch": 0.39501655783384576, "grad_norm": 6.912054061889648, "learning_rate": 0.00037218510405257393, "loss": 1.0543, "step": 5830 }, { "epoch": 0.3950843136756697, "grad_norm": 4.027739524841309, "learning_rate": 0.00037217962760131433, "loss": 0.7836, "step": 5831 }, { "epoch": 0.3951520695174937, "grad_norm": 4.235279560089111, "learning_rate": 0.0003721741511500548, "loss": 1.115, "step": 5832 }, { "epoch": 0.3952198253593177, "grad_norm": 2.9453225135803223, "learning_rate": 0.00037216867469879524, "loss": 0.7444, "step": 5833 }, { "epoch": 0.3952875812011417, "grad_norm": 3.330876350402832, "learning_rate": 0.00037216319824753564, "loss": 0.8208, "step": 5834 }, { "epoch": 0.39535533704296566, "grad_norm": 2.922603130340576, "learning_rate": 0.00037215772179627604, "loss": 0.7614, "step": 5835 }, { "epoch": 0.39542309288478966, "grad_norm": 3.1447739601135254, "learning_rate": 0.00037215224534501644, "loss": 0.8531, "step": 5836 }, { "epoch": 0.39549084872661366, "grad_norm": 3.4216420650482178, "learning_rate": 0.00037214676889375683, "loss": 0.6681, "step": 5837 }, { "epoch": 0.3955586045684376, "grad_norm": 3.0554568767547607, "learning_rate": 0.0003721412924424973, "loss": 0.9342, "step": 5838 }, { "epoch": 0.3956263604102616, "grad_norm": 2.74393367767334, "learning_rate": 0.0003721358159912377, "loss": 0.9402, "step": 5839 }, { "epoch": 0.3956941162520856, "grad_norm": 2.7820587158203125, "learning_rate": 0.0003721303395399781, "loss": 0.7563, "step": 5840 }, { "epoch": 0.3957618720939096, "grad_norm": 3.779273271560669, "learning_rate": 0.00037212486308871854, "loss": 1.0643, "step": 5841 }, { "epoch": 0.39582962793573356, "grad_norm": 2.9592125415802, "learning_rate": 0.00037211938663745894, "loss": 0.6573, "step": 5842 }, { "epoch": 0.39589738377755757, "grad_norm": 3.1732852458953857, "learning_rate": 0.0003721139101861994, "loss": 0.8159, "step": 5843 }, { "epoch": 0.39596513961938157, "grad_norm": 4.506868362426758, "learning_rate": 0.0003721084337349398, "loss": 0.8866, "step": 5844 }, { "epoch": 0.39603289546120557, "grad_norm": 3.730112075805664, "learning_rate": 0.0003721029572836802, "loss": 0.7646, "step": 5845 }, { "epoch": 0.3961006513030295, "grad_norm": 3.805182933807373, "learning_rate": 0.0003720974808324206, "loss": 0.9267, "step": 5846 }, { "epoch": 0.3961684071448535, "grad_norm": 4.3987135887146, "learning_rate": 0.000372092004381161, "loss": 0.8203, "step": 5847 }, { "epoch": 0.3962361629866775, "grad_norm": 3.9069972038269043, "learning_rate": 0.00037208652792990144, "loss": 0.7608, "step": 5848 }, { "epoch": 0.3963039188285015, "grad_norm": 3.093172788619995, "learning_rate": 0.0003720810514786419, "loss": 0.6152, "step": 5849 }, { "epoch": 0.39637167467032547, "grad_norm": 4.5341339111328125, "learning_rate": 0.0003720755750273823, "loss": 0.7152, "step": 5850 }, { "epoch": 0.39643943051214947, "grad_norm": 5.283268451690674, "learning_rate": 0.0003720700985761227, "loss": 1.0482, "step": 5851 }, { "epoch": 0.3965071863539735, "grad_norm": 4.487185478210449, "learning_rate": 0.0003720646221248631, "loss": 0.7805, "step": 5852 }, { "epoch": 0.3965749421957974, "grad_norm": 4.511610984802246, "learning_rate": 0.0003720591456736035, "loss": 1.1518, "step": 5853 }, { "epoch": 0.3966426980376214, "grad_norm": 3.585480213165283, "learning_rate": 0.00037205366922234395, "loss": 0.6369, "step": 5854 }, { "epoch": 0.3967104538794454, "grad_norm": 3.2640535831451416, "learning_rate": 0.00037204819277108435, "loss": 0.9136, "step": 5855 }, { "epoch": 0.3967782097212694, "grad_norm": 3.1337380409240723, "learning_rate": 0.0003720427163198248, "loss": 0.8263, "step": 5856 }, { "epoch": 0.3968459655630934, "grad_norm": 3.9153671264648438, "learning_rate": 0.0003720372398685652, "loss": 0.8865, "step": 5857 }, { "epoch": 0.3969137214049174, "grad_norm": 4.064603805541992, "learning_rate": 0.0003720317634173056, "loss": 0.8944, "step": 5858 }, { "epoch": 0.3969814772467414, "grad_norm": 2.654696464538574, "learning_rate": 0.000372026286966046, "loss": 0.7513, "step": 5859 }, { "epoch": 0.3970492330885654, "grad_norm": 3.3164217472076416, "learning_rate": 0.00037202081051478645, "loss": 0.9064, "step": 5860 }, { "epoch": 0.3971169889303893, "grad_norm": 2.8612797260284424, "learning_rate": 0.00037201533406352685, "loss": 0.7955, "step": 5861 }, { "epoch": 0.3971847447722133, "grad_norm": 2.8935656547546387, "learning_rate": 0.00037200985761226725, "loss": 0.8153, "step": 5862 }, { "epoch": 0.39725250061403733, "grad_norm": 3.314621925354004, "learning_rate": 0.00037200438116100765, "loss": 0.8265, "step": 5863 }, { "epoch": 0.39732025645586133, "grad_norm": 3.328719139099121, "learning_rate": 0.0003719989047097481, "loss": 0.694, "step": 5864 }, { "epoch": 0.3973880122976853, "grad_norm": 3.2973179817199707, "learning_rate": 0.00037199342825848855, "loss": 0.896, "step": 5865 }, { "epoch": 0.3974557681395093, "grad_norm": 3.8667564392089844, "learning_rate": 0.00037198795180722895, "loss": 0.8472, "step": 5866 }, { "epoch": 0.3975235239813333, "grad_norm": 3.1432652473449707, "learning_rate": 0.00037198247535596935, "loss": 0.9463, "step": 5867 }, { "epoch": 0.39759127982315723, "grad_norm": 3.3278615474700928, "learning_rate": 0.00037197699890470975, "loss": 0.8195, "step": 5868 }, { "epoch": 0.39765903566498123, "grad_norm": 3.6191627979278564, "learning_rate": 0.00037197152245345015, "loss": 0.8601, "step": 5869 }, { "epoch": 0.39772679150680523, "grad_norm": 3.117333173751831, "learning_rate": 0.0003719660460021906, "loss": 0.8878, "step": 5870 }, { "epoch": 0.39779454734862923, "grad_norm": 3.9103646278381348, "learning_rate": 0.000371960569550931, "loss": 0.9177, "step": 5871 }, { "epoch": 0.3978623031904532, "grad_norm": 3.266974687576294, "learning_rate": 0.00037195509309967146, "loss": 0.7572, "step": 5872 }, { "epoch": 0.3979300590322772, "grad_norm": 3.0820319652557373, "learning_rate": 0.00037194961664841186, "loss": 0.914, "step": 5873 }, { "epoch": 0.3979978148741012, "grad_norm": 3.2103185653686523, "learning_rate": 0.00037194414019715226, "loss": 0.8404, "step": 5874 }, { "epoch": 0.3980655707159252, "grad_norm": 2.999655246734619, "learning_rate": 0.00037193866374589266, "loss": 0.8789, "step": 5875 }, { "epoch": 0.39813332655774913, "grad_norm": 3.2069571018218994, "learning_rate": 0.0003719331872946331, "loss": 0.7548, "step": 5876 }, { "epoch": 0.39820108239957314, "grad_norm": 3.6432600021362305, "learning_rate": 0.0003719277108433735, "loss": 1.0126, "step": 5877 }, { "epoch": 0.39826883824139714, "grad_norm": 4.392848491668701, "learning_rate": 0.0003719222343921139, "loss": 0.7085, "step": 5878 }, { "epoch": 0.3983365940832211, "grad_norm": 4.116287708282471, "learning_rate": 0.00037191675794085436, "loss": 1.1197, "step": 5879 }, { "epoch": 0.3984043499250451, "grad_norm": 3.109602451324463, "learning_rate": 0.00037191128148959476, "loss": 0.9561, "step": 5880 }, { "epoch": 0.3984721057668691, "grad_norm": 3.621999502182007, "learning_rate": 0.0003719058050383352, "loss": 1.0065, "step": 5881 }, { "epoch": 0.3985398616086931, "grad_norm": 3.654029130935669, "learning_rate": 0.0003719003285870756, "loss": 0.817, "step": 5882 }, { "epoch": 0.39860761745051704, "grad_norm": 3.081130266189575, "learning_rate": 0.000371894852135816, "loss": 0.6745, "step": 5883 }, { "epoch": 0.39867537329234104, "grad_norm": 3.1952476501464844, "learning_rate": 0.0003718893756845564, "loss": 0.9441, "step": 5884 }, { "epoch": 0.39874312913416504, "grad_norm": 3.105992555618286, "learning_rate": 0.0003718838992332968, "loss": 0.9629, "step": 5885 }, { "epoch": 0.39881088497598904, "grad_norm": 2.7267510890960693, "learning_rate": 0.00037187842278203726, "loss": 0.6799, "step": 5886 }, { "epoch": 0.398878640817813, "grad_norm": 3.8132717609405518, "learning_rate": 0.0003718729463307777, "loss": 0.873, "step": 5887 }, { "epoch": 0.398946396659637, "grad_norm": 3.463404417037964, "learning_rate": 0.0003718674698795181, "loss": 0.7539, "step": 5888 }, { "epoch": 0.399014152501461, "grad_norm": 8.739164352416992, "learning_rate": 0.0003718619934282585, "loss": 0.8578, "step": 5889 }, { "epoch": 0.399081908343285, "grad_norm": 4.800802707672119, "learning_rate": 0.0003718565169769989, "loss": 1.2418, "step": 5890 }, { "epoch": 0.39914966418510894, "grad_norm": 4.027976989746094, "learning_rate": 0.0003718510405257393, "loss": 1.0673, "step": 5891 }, { "epoch": 0.39921742002693295, "grad_norm": 2.7829198837280273, "learning_rate": 0.00037184556407447977, "loss": 0.8997, "step": 5892 }, { "epoch": 0.39928517586875695, "grad_norm": 4.058901309967041, "learning_rate": 0.00037184008762322017, "loss": 1.0136, "step": 5893 }, { "epoch": 0.3993529317105809, "grad_norm": 3.6752419471740723, "learning_rate": 0.00037183461117196057, "loss": 0.8392, "step": 5894 }, { "epoch": 0.3994206875524049, "grad_norm": 4.103543281555176, "learning_rate": 0.000371829134720701, "loss": 0.8543, "step": 5895 }, { "epoch": 0.3994884433942289, "grad_norm": 5.371847152709961, "learning_rate": 0.0003718236582694414, "loss": 1.0137, "step": 5896 }, { "epoch": 0.3995561992360529, "grad_norm": 2.943697214126587, "learning_rate": 0.0003718181818181818, "loss": 0.7723, "step": 5897 }, { "epoch": 0.39962395507787685, "grad_norm": 2.954071283340454, "learning_rate": 0.00037181270536692227, "loss": 0.8249, "step": 5898 }, { "epoch": 0.39969171091970085, "grad_norm": 3.4149231910705566, "learning_rate": 0.00037180722891566267, "loss": 0.8036, "step": 5899 }, { "epoch": 0.39975946676152485, "grad_norm": 3.5561885833740234, "learning_rate": 0.00037180175246440307, "loss": 0.7774, "step": 5900 }, { "epoch": 0.39982722260334885, "grad_norm": 2.624089479446411, "learning_rate": 0.00037179627601314347, "loss": 0.63, "step": 5901 }, { "epoch": 0.3998949784451728, "grad_norm": 3.0911900997161865, "learning_rate": 0.0003717907995618839, "loss": 0.8053, "step": 5902 }, { "epoch": 0.3999627342869968, "grad_norm": 4.026041030883789, "learning_rate": 0.0003717853231106244, "loss": 1.0525, "step": 5903 }, { "epoch": 0.4000304901288208, "grad_norm": 3.2021000385284424, "learning_rate": 0.0003717798466593648, "loss": 0.9783, "step": 5904 }, { "epoch": 0.4000982459706448, "grad_norm": 3.630492925643921, "learning_rate": 0.0003717743702081052, "loss": 0.8971, "step": 5905 }, { "epoch": 0.40016600181246875, "grad_norm": 3.789916753768921, "learning_rate": 0.0003717688937568456, "loss": 0.7086, "step": 5906 }, { "epoch": 0.40023375765429275, "grad_norm": 4.6678314208984375, "learning_rate": 0.00037176341730558597, "loss": 0.9332, "step": 5907 }, { "epoch": 0.40030151349611676, "grad_norm": 2.624103307723999, "learning_rate": 0.0003717579408543264, "loss": 0.7817, "step": 5908 }, { "epoch": 0.4003692693379407, "grad_norm": 3.000589370727539, "learning_rate": 0.0003717524644030668, "loss": 0.79, "step": 5909 }, { "epoch": 0.4004370251797647, "grad_norm": 2.965672016143799, "learning_rate": 0.0003717469879518073, "loss": 0.7408, "step": 5910 }, { "epoch": 0.4005047810215887, "grad_norm": 3.2524781227111816, "learning_rate": 0.0003717415115005477, "loss": 0.7736, "step": 5911 }, { "epoch": 0.4005725368634127, "grad_norm": 3.8696463108062744, "learning_rate": 0.0003717360350492881, "loss": 0.8597, "step": 5912 }, { "epoch": 0.40064029270523666, "grad_norm": 3.1021318435668945, "learning_rate": 0.0003717305585980285, "loss": 0.871, "step": 5913 }, { "epoch": 0.40070804854706066, "grad_norm": 2.8446717262268066, "learning_rate": 0.00037172508214676893, "loss": 0.695, "step": 5914 }, { "epoch": 0.40077580438888466, "grad_norm": 4.240732669830322, "learning_rate": 0.00037171960569550933, "loss": 0.7565, "step": 5915 }, { "epoch": 0.40084356023070866, "grad_norm": 3.607851266860962, "learning_rate": 0.00037171412924424973, "loss": 0.8905, "step": 5916 }, { "epoch": 0.4009113160725326, "grad_norm": 3.495004415512085, "learning_rate": 0.00037170865279299013, "loss": 0.7177, "step": 5917 }, { "epoch": 0.4009790719143566, "grad_norm": 4.531461715698242, "learning_rate": 0.0003717031763417306, "loss": 0.7992, "step": 5918 }, { "epoch": 0.4010468277561806, "grad_norm": 2.9029741287231445, "learning_rate": 0.00037169769989047103, "loss": 0.833, "step": 5919 }, { "epoch": 0.4011145835980046, "grad_norm": 3.630387544631958, "learning_rate": 0.00037169222343921143, "loss": 0.9952, "step": 5920 }, { "epoch": 0.40118233943982856, "grad_norm": 4.381357192993164, "learning_rate": 0.00037168674698795183, "loss": 0.9694, "step": 5921 }, { "epoch": 0.40125009528165256, "grad_norm": 4.04451322555542, "learning_rate": 0.00037168127053669223, "loss": 0.8167, "step": 5922 }, { "epoch": 0.40131785112347657, "grad_norm": 3.63335919380188, "learning_rate": 0.00037167579408543263, "loss": 0.9107, "step": 5923 }, { "epoch": 0.4013856069653005, "grad_norm": 3.193741798400879, "learning_rate": 0.0003716703176341731, "loss": 0.9547, "step": 5924 }, { "epoch": 0.4014533628071245, "grad_norm": 3.566638231277466, "learning_rate": 0.0003716648411829135, "loss": 0.8594, "step": 5925 }, { "epoch": 0.4015211186489485, "grad_norm": 2.741093635559082, "learning_rate": 0.00037165936473165394, "loss": 0.7136, "step": 5926 }, { "epoch": 0.4015888744907725, "grad_norm": 4.293385028839111, "learning_rate": 0.00037165388828039434, "loss": 0.8436, "step": 5927 }, { "epoch": 0.40165663033259646, "grad_norm": 3.8447885513305664, "learning_rate": 0.00037164841182913474, "loss": 1.2456, "step": 5928 }, { "epoch": 0.40172438617442047, "grad_norm": 4.023305416107178, "learning_rate": 0.00037164293537787513, "loss": 1.0018, "step": 5929 }, { "epoch": 0.40179214201624447, "grad_norm": 3.0256094932556152, "learning_rate": 0.0003716374589266156, "loss": 0.9334, "step": 5930 }, { "epoch": 0.40185989785806847, "grad_norm": 2.8122665882110596, "learning_rate": 0.000371631982475356, "loss": 0.7141, "step": 5931 }, { "epoch": 0.4019276536998924, "grad_norm": 2.4987576007843018, "learning_rate": 0.0003716265060240964, "loss": 0.57, "step": 5932 }, { "epoch": 0.4019954095417164, "grad_norm": 2.9414095878601074, "learning_rate": 0.0003716210295728368, "loss": 0.736, "step": 5933 }, { "epoch": 0.4020631653835404, "grad_norm": 3.1646952629089355, "learning_rate": 0.00037161555312157724, "loss": 0.9344, "step": 5934 }, { "epoch": 0.4021309212253644, "grad_norm": 3.1468257904052734, "learning_rate": 0.00037161007667031764, "loss": 1.035, "step": 5935 }, { "epoch": 0.40219867706718837, "grad_norm": 3.5826570987701416, "learning_rate": 0.0003716046002190581, "loss": 0.8337, "step": 5936 }, { "epoch": 0.4022664329090124, "grad_norm": 3.636890172958374, "learning_rate": 0.0003715991237677985, "loss": 0.8021, "step": 5937 }, { "epoch": 0.4023341887508364, "grad_norm": 2.9025254249572754, "learning_rate": 0.0003715936473165389, "loss": 0.8402, "step": 5938 }, { "epoch": 0.4024019445926603, "grad_norm": 3.17258358001709, "learning_rate": 0.0003715881708652793, "loss": 0.8926, "step": 5939 }, { "epoch": 0.4024697004344843, "grad_norm": 3.9675025939941406, "learning_rate": 0.00037158269441401974, "loss": 0.929, "step": 5940 }, { "epoch": 0.4025374562763083, "grad_norm": 4.643914222717285, "learning_rate": 0.0003715772179627602, "loss": 0.9264, "step": 5941 }, { "epoch": 0.4026052121181323, "grad_norm": 3.2560648918151855, "learning_rate": 0.0003715717415115006, "loss": 0.9041, "step": 5942 }, { "epoch": 0.4026729679599563, "grad_norm": 4.194705963134766, "learning_rate": 0.000371566265060241, "loss": 0.9865, "step": 5943 }, { "epoch": 0.4027407238017803, "grad_norm": 3.1267313957214355, "learning_rate": 0.0003715607886089814, "loss": 0.9362, "step": 5944 }, { "epoch": 0.4028084796436043, "grad_norm": 3.6758675575256348, "learning_rate": 0.0003715553121577218, "loss": 0.8261, "step": 5945 }, { "epoch": 0.4028762354854283, "grad_norm": 3.0016863346099854, "learning_rate": 0.00037154983570646225, "loss": 0.9033, "step": 5946 }, { "epoch": 0.4029439913272522, "grad_norm": 3.1406967639923096, "learning_rate": 0.00037154435925520265, "loss": 0.8036, "step": 5947 }, { "epoch": 0.40301174716907623, "grad_norm": 4.462352275848389, "learning_rate": 0.00037153888280394304, "loss": 1.285, "step": 5948 }, { "epoch": 0.40307950301090023, "grad_norm": 3.244993209838867, "learning_rate": 0.0003715334063526835, "loss": 0.9692, "step": 5949 }, { "epoch": 0.40314725885272423, "grad_norm": 3.444673538208008, "learning_rate": 0.0003715279299014239, "loss": 1.1392, "step": 5950 }, { "epoch": 0.4032150146945482, "grad_norm": 2.978667736053467, "learning_rate": 0.0003715224534501643, "loss": 0.9324, "step": 5951 }, { "epoch": 0.4032827705363722, "grad_norm": 3.4016616344451904, "learning_rate": 0.00037151697699890475, "loss": 1.0893, "step": 5952 }, { "epoch": 0.4033505263781962, "grad_norm": 2.4901795387268066, "learning_rate": 0.00037151150054764515, "loss": 0.5619, "step": 5953 }, { "epoch": 0.40341828222002013, "grad_norm": 2.467582941055298, "learning_rate": 0.00037150602409638555, "loss": 0.674, "step": 5954 }, { "epoch": 0.40348603806184413, "grad_norm": 3.1439976692199707, "learning_rate": 0.00037150054764512595, "loss": 0.8961, "step": 5955 }, { "epoch": 0.40355379390366813, "grad_norm": 6.339845180511475, "learning_rate": 0.00037149507119386635, "loss": 1.0456, "step": 5956 }, { "epoch": 0.40362154974549214, "grad_norm": 3.1283044815063477, "learning_rate": 0.00037148959474260685, "loss": 0.8527, "step": 5957 }, { "epoch": 0.4036893055873161, "grad_norm": 3.2099814414978027, "learning_rate": 0.00037148411829134725, "loss": 0.6726, "step": 5958 }, { "epoch": 0.4037570614291401, "grad_norm": 2.881326675415039, "learning_rate": 0.00037147864184008765, "loss": 0.8292, "step": 5959 }, { "epoch": 0.4038248172709641, "grad_norm": 2.9597489833831787, "learning_rate": 0.00037147316538882805, "loss": 0.77, "step": 5960 }, { "epoch": 0.4038925731127881, "grad_norm": 3.3758187294006348, "learning_rate": 0.00037146768893756845, "loss": 0.7842, "step": 5961 }, { "epoch": 0.40396032895461204, "grad_norm": 3.638763666152954, "learning_rate": 0.0003714622124863089, "loss": 0.7516, "step": 5962 }, { "epoch": 0.40402808479643604, "grad_norm": 3.318192958831787, "learning_rate": 0.0003714567360350493, "loss": 1.0513, "step": 5963 }, { "epoch": 0.40409584063826004, "grad_norm": 4.458415985107422, "learning_rate": 0.0003714512595837897, "loss": 0.9513, "step": 5964 }, { "epoch": 0.40416359648008404, "grad_norm": 2.811741352081299, "learning_rate": 0.00037144578313253016, "loss": 0.8505, "step": 5965 }, { "epoch": 0.404231352321908, "grad_norm": 4.0488810539245605, "learning_rate": 0.00037144030668127056, "loss": 1.1677, "step": 5966 }, { "epoch": 0.404299108163732, "grad_norm": 3.6150028705596924, "learning_rate": 0.00037143483023001096, "loss": 0.8254, "step": 5967 }, { "epoch": 0.404366864005556, "grad_norm": 3.7667441368103027, "learning_rate": 0.0003714293537787514, "loss": 0.8377, "step": 5968 }, { "epoch": 0.40443461984737994, "grad_norm": 7.2486252784729, "learning_rate": 0.0003714238773274918, "loss": 1.1357, "step": 5969 }, { "epoch": 0.40450237568920394, "grad_norm": 2.8473987579345703, "learning_rate": 0.0003714184008762322, "loss": 0.9338, "step": 5970 }, { "epoch": 0.40457013153102794, "grad_norm": 4.086564540863037, "learning_rate": 0.0003714129244249726, "loss": 1.1958, "step": 5971 }, { "epoch": 0.40463788737285195, "grad_norm": 3.5304882526397705, "learning_rate": 0.000371407447973713, "loss": 0.7755, "step": 5972 }, { "epoch": 0.4047056432146759, "grad_norm": 4.465012073516846, "learning_rate": 0.00037140197152245346, "loss": 0.9383, "step": 5973 }, { "epoch": 0.4047733990564999, "grad_norm": 3.8455686569213867, "learning_rate": 0.0003713964950711939, "loss": 1.0538, "step": 5974 }, { "epoch": 0.4048411548983239, "grad_norm": 3.6967718601226807, "learning_rate": 0.0003713910186199343, "loss": 0.9587, "step": 5975 }, { "epoch": 0.4049089107401479, "grad_norm": 3.3169424533843994, "learning_rate": 0.0003713855421686747, "loss": 0.9658, "step": 5976 }, { "epoch": 0.40497666658197184, "grad_norm": 3.8088719844818115, "learning_rate": 0.0003713800657174151, "loss": 0.6887, "step": 5977 }, { "epoch": 0.40504442242379585, "grad_norm": 3.4730722904205322, "learning_rate": 0.00037137458926615556, "loss": 0.7917, "step": 5978 }, { "epoch": 0.40511217826561985, "grad_norm": 4.576694488525391, "learning_rate": 0.00037136911281489596, "loss": 0.8876, "step": 5979 }, { "epoch": 0.40517993410744385, "grad_norm": 3.379927158355713, "learning_rate": 0.0003713636363636364, "loss": 0.9433, "step": 5980 }, { "epoch": 0.4052476899492678, "grad_norm": 4.211075782775879, "learning_rate": 0.0003713581599123768, "loss": 1.2516, "step": 5981 }, { "epoch": 0.4053154457910918, "grad_norm": 4.17887020111084, "learning_rate": 0.0003713526834611172, "loss": 0.863, "step": 5982 }, { "epoch": 0.4053832016329158, "grad_norm": 3.7063992023468018, "learning_rate": 0.0003713472070098576, "loss": 0.7312, "step": 5983 }, { "epoch": 0.40545095747473975, "grad_norm": 2.8870418071746826, "learning_rate": 0.00037134173055859807, "loss": 0.7214, "step": 5984 }, { "epoch": 0.40551871331656375, "grad_norm": 3.3999710083007812, "learning_rate": 0.00037133625410733847, "loss": 0.9927, "step": 5985 }, { "epoch": 0.40558646915838775, "grad_norm": 2.999285936355591, "learning_rate": 0.00037133077765607887, "loss": 0.8795, "step": 5986 }, { "epoch": 0.40565422500021175, "grad_norm": 3.3157548904418945, "learning_rate": 0.00037132530120481926, "loss": 0.9109, "step": 5987 }, { "epoch": 0.4057219808420357, "grad_norm": 3.954935312271118, "learning_rate": 0.0003713198247535597, "loss": 1.0942, "step": 5988 }, { "epoch": 0.4057897366838597, "grad_norm": 2.9029433727264404, "learning_rate": 0.0003713143483023001, "loss": 0.8642, "step": 5989 }, { "epoch": 0.4058574925256837, "grad_norm": 2.6748085021972656, "learning_rate": 0.00037130887185104057, "loss": 0.7833, "step": 5990 }, { "epoch": 0.4059252483675077, "grad_norm": 3.6855356693267822, "learning_rate": 0.00037130339539978097, "loss": 1.0079, "step": 5991 }, { "epoch": 0.40599300420933165, "grad_norm": 3.2469100952148438, "learning_rate": 0.00037129791894852137, "loss": 0.8678, "step": 5992 }, { "epoch": 0.40606076005115566, "grad_norm": 4.097865581512451, "learning_rate": 0.00037129244249726177, "loss": 1.1234, "step": 5993 }, { "epoch": 0.40612851589297966, "grad_norm": 3.694542407989502, "learning_rate": 0.00037128696604600217, "loss": 0.9702, "step": 5994 }, { "epoch": 0.40619627173480366, "grad_norm": 2.4767558574676514, "learning_rate": 0.0003712814895947426, "loss": 0.5677, "step": 5995 }, { "epoch": 0.4062640275766276, "grad_norm": 3.112030267715454, "learning_rate": 0.0003712760131434831, "loss": 0.7115, "step": 5996 }, { "epoch": 0.4063317834184516, "grad_norm": 3.1427721977233887, "learning_rate": 0.0003712705366922235, "loss": 0.7514, "step": 5997 }, { "epoch": 0.4063995392602756, "grad_norm": 3.0621776580810547, "learning_rate": 0.0003712650602409639, "loss": 0.8209, "step": 5998 }, { "epoch": 0.40646729510209956, "grad_norm": 2.698216199874878, "learning_rate": 0.00037125958378970427, "loss": 0.6634, "step": 5999 }, { "epoch": 0.40653505094392356, "grad_norm": 3.609895944595337, "learning_rate": 0.0003712541073384447, "loss": 0.8635, "step": 6000 }, { "epoch": 0.40660280678574756, "grad_norm": 4.036843776702881, "learning_rate": 0.0003712486308871851, "loss": 1.0607, "step": 6001 }, { "epoch": 0.40667056262757156, "grad_norm": 3.7841882705688477, "learning_rate": 0.0003712431544359255, "loss": 0.9054, "step": 6002 }, { "epoch": 0.4067383184693955, "grad_norm": 3.660630464553833, "learning_rate": 0.0003712376779846659, "loss": 0.9104, "step": 6003 }, { "epoch": 0.4068060743112195, "grad_norm": 2.992619276046753, "learning_rate": 0.0003712322015334064, "loss": 0.8369, "step": 6004 }, { "epoch": 0.4068738301530435, "grad_norm": 3.5021955966949463, "learning_rate": 0.0003712267250821468, "loss": 0.847, "step": 6005 }, { "epoch": 0.4069415859948675, "grad_norm": 3.3146812915802, "learning_rate": 0.00037122124863088723, "loss": 0.6451, "step": 6006 }, { "epoch": 0.40700934183669146, "grad_norm": 4.295387268066406, "learning_rate": 0.00037121577217962763, "loss": 0.8384, "step": 6007 }, { "epoch": 0.40707709767851546, "grad_norm": 4.785434246063232, "learning_rate": 0.00037121029572836803, "loss": 0.7708, "step": 6008 }, { "epoch": 0.40714485352033947, "grad_norm": 3.668070077896118, "learning_rate": 0.0003712048192771084, "loss": 1.0883, "step": 6009 }, { "epoch": 0.40721260936216347, "grad_norm": 3.831789970397949, "learning_rate": 0.0003711993428258488, "loss": 0.7658, "step": 6010 }, { "epoch": 0.4072803652039874, "grad_norm": 4.161891460418701, "learning_rate": 0.0003711938663745893, "loss": 1.2052, "step": 6011 }, { "epoch": 0.4073481210458114, "grad_norm": 3.4146525859832764, "learning_rate": 0.00037118838992332973, "loss": 0.9506, "step": 6012 }, { "epoch": 0.4074158768876354, "grad_norm": 3.3198506832122803, "learning_rate": 0.00037118291347207013, "loss": 1.1281, "step": 6013 }, { "epoch": 0.40748363272945937, "grad_norm": 3.0557281970977783, "learning_rate": 0.00037117743702081053, "loss": 0.7519, "step": 6014 }, { "epoch": 0.40755138857128337, "grad_norm": 3.5487380027770996, "learning_rate": 0.00037117196056955093, "loss": 0.908, "step": 6015 }, { "epoch": 0.40761914441310737, "grad_norm": 3.130018472671509, "learning_rate": 0.0003711664841182914, "loss": 0.8231, "step": 6016 }, { "epoch": 0.4076869002549314, "grad_norm": 3.7140414714813232, "learning_rate": 0.0003711610076670318, "loss": 0.8685, "step": 6017 }, { "epoch": 0.4077546560967553, "grad_norm": 3.4325106143951416, "learning_rate": 0.0003711555312157722, "loss": 0.9992, "step": 6018 }, { "epoch": 0.4078224119385793, "grad_norm": 3.0785744190216064, "learning_rate": 0.00037115005476451264, "loss": 0.9411, "step": 6019 }, { "epoch": 0.4078901677804033, "grad_norm": 3.891824722290039, "learning_rate": 0.00037114457831325304, "loss": 0.828, "step": 6020 }, { "epoch": 0.4079579236222273, "grad_norm": 2.9085335731506348, "learning_rate": 0.00037113910186199343, "loss": 0.7497, "step": 6021 }, { "epoch": 0.40802567946405127, "grad_norm": 2.7960293292999268, "learning_rate": 0.0003711336254107339, "loss": 0.7551, "step": 6022 }, { "epoch": 0.4080934353058753, "grad_norm": 4.01334285736084, "learning_rate": 0.0003711281489594743, "loss": 0.937, "step": 6023 }, { "epoch": 0.4081611911476993, "grad_norm": 3.410127878189087, "learning_rate": 0.0003711226725082147, "loss": 1.0221, "step": 6024 }, { "epoch": 0.4082289469895233, "grad_norm": 3.3562190532684326, "learning_rate": 0.0003711171960569551, "loss": 0.7944, "step": 6025 }, { "epoch": 0.4082967028313472, "grad_norm": 4.562429428100586, "learning_rate": 0.0003711117196056955, "loss": 0.8548, "step": 6026 }, { "epoch": 0.4083644586731712, "grad_norm": 3.0243585109710693, "learning_rate": 0.00037110624315443594, "loss": 0.6838, "step": 6027 }, { "epoch": 0.40843221451499523, "grad_norm": 5.551209926605225, "learning_rate": 0.0003711007667031764, "loss": 0.8889, "step": 6028 }, { "epoch": 0.4084999703568192, "grad_norm": 3.4398953914642334, "learning_rate": 0.0003710952902519168, "loss": 0.9756, "step": 6029 }, { "epoch": 0.4085677261986432, "grad_norm": 3.2508420944213867, "learning_rate": 0.0003710898138006572, "loss": 0.7805, "step": 6030 }, { "epoch": 0.4086354820404672, "grad_norm": 4.192214488983154, "learning_rate": 0.0003710843373493976, "loss": 1.0183, "step": 6031 }, { "epoch": 0.4087032378822912, "grad_norm": 3.908640146255493, "learning_rate": 0.000371078860898138, "loss": 0.9221, "step": 6032 }, { "epoch": 0.40877099372411513, "grad_norm": 5.154355049133301, "learning_rate": 0.00037107338444687844, "loss": 1.1323, "step": 6033 }, { "epoch": 0.40883874956593913, "grad_norm": 2.8071084022521973, "learning_rate": 0.00037106790799561884, "loss": 0.7911, "step": 6034 }, { "epoch": 0.40890650540776313, "grad_norm": 4.556288719177246, "learning_rate": 0.0003710624315443593, "loss": 1.1102, "step": 6035 }, { "epoch": 0.40897426124958713, "grad_norm": 3.2200303077697754, "learning_rate": 0.0003710569550930997, "loss": 0.7397, "step": 6036 }, { "epoch": 0.4090420170914111, "grad_norm": 3.8558897972106934, "learning_rate": 0.0003710514786418401, "loss": 0.9095, "step": 6037 }, { "epoch": 0.4091097729332351, "grad_norm": 3.6266441345214844, "learning_rate": 0.00037104600219058055, "loss": 0.9614, "step": 6038 }, { "epoch": 0.4091775287750591, "grad_norm": 3.0982096195220947, "learning_rate": 0.00037104052573932095, "loss": 0.8829, "step": 6039 }, { "epoch": 0.4092452846168831, "grad_norm": 3.4114956855773926, "learning_rate": 0.00037103504928806134, "loss": 0.9621, "step": 6040 }, { "epoch": 0.40931304045870703, "grad_norm": 3.1909968852996826, "learning_rate": 0.00037102957283680174, "loss": 0.9254, "step": 6041 }, { "epoch": 0.40938079630053104, "grad_norm": 3.1883256435394287, "learning_rate": 0.0003710240963855422, "loss": 0.9483, "step": 6042 }, { "epoch": 0.40944855214235504, "grad_norm": 2.955845832824707, "learning_rate": 0.0003710186199342826, "loss": 0.7913, "step": 6043 }, { "epoch": 0.409516307984179, "grad_norm": 3.78985595703125, "learning_rate": 0.00037101314348302305, "loss": 0.9422, "step": 6044 }, { "epoch": 0.409584063826003, "grad_norm": 3.125870704650879, "learning_rate": 0.00037100766703176345, "loss": 0.8437, "step": 6045 }, { "epoch": 0.409651819667827, "grad_norm": 3.7176554203033447, "learning_rate": 0.00037100219058050385, "loss": 0.9584, "step": 6046 }, { "epoch": 0.409719575509651, "grad_norm": 3.2223093509674072, "learning_rate": 0.00037099671412924425, "loss": 0.8557, "step": 6047 }, { "epoch": 0.40978733135147494, "grad_norm": 3.5592241287231445, "learning_rate": 0.00037099123767798465, "loss": 0.9008, "step": 6048 }, { "epoch": 0.40985508719329894, "grad_norm": 2.9492757320404053, "learning_rate": 0.0003709857612267251, "loss": 0.7627, "step": 6049 }, { "epoch": 0.40992284303512294, "grad_norm": 3.3168795108795166, "learning_rate": 0.00037098028477546555, "loss": 0.9957, "step": 6050 }, { "epoch": 0.40999059887694694, "grad_norm": 3.997814416885376, "learning_rate": 0.00037097480832420595, "loss": 1.0616, "step": 6051 }, { "epoch": 0.4100583547187709, "grad_norm": 3.703521251678467, "learning_rate": 0.00037096933187294635, "loss": 0.834, "step": 6052 }, { "epoch": 0.4101261105605949, "grad_norm": 4.459621906280518, "learning_rate": 0.00037096385542168675, "loss": 1.1405, "step": 6053 }, { "epoch": 0.4101938664024189, "grad_norm": 3.558887243270874, "learning_rate": 0.0003709583789704272, "loss": 0.9912, "step": 6054 }, { "epoch": 0.4102616222442429, "grad_norm": 3.3295536041259766, "learning_rate": 0.0003709529025191676, "loss": 0.843, "step": 6055 }, { "epoch": 0.41032937808606684, "grad_norm": 2.9329957962036133, "learning_rate": 0.000370947426067908, "loss": 0.8531, "step": 6056 }, { "epoch": 0.41039713392789084, "grad_norm": 3.024730682373047, "learning_rate": 0.0003709419496166484, "loss": 0.7965, "step": 6057 }, { "epoch": 0.41046488976971485, "grad_norm": 3.411458730697632, "learning_rate": 0.00037093647316538886, "loss": 0.9755, "step": 6058 }, { "epoch": 0.4105326456115388, "grad_norm": 3.7057018280029297, "learning_rate": 0.00037093099671412926, "loss": 0.9421, "step": 6059 }, { "epoch": 0.4106004014533628, "grad_norm": 4.960656642913818, "learning_rate": 0.0003709255202628697, "loss": 1.1573, "step": 6060 }, { "epoch": 0.4106681572951868, "grad_norm": 2.599731683731079, "learning_rate": 0.0003709200438116101, "loss": 0.7638, "step": 6061 }, { "epoch": 0.4107359131370108, "grad_norm": 4.875538349151611, "learning_rate": 0.0003709145673603505, "loss": 1.3594, "step": 6062 }, { "epoch": 0.41080366897883475, "grad_norm": 3.6525232791900635, "learning_rate": 0.0003709090909090909, "loss": 1.2116, "step": 6063 }, { "epoch": 0.41087142482065875, "grad_norm": 3.114755153656006, "learning_rate": 0.0003709036144578313, "loss": 0.9417, "step": 6064 }, { "epoch": 0.41093918066248275, "grad_norm": 3.221829414367676, "learning_rate": 0.00037089813800657176, "loss": 1.102, "step": 6065 }, { "epoch": 0.41100693650430675, "grad_norm": 4.004273414611816, "learning_rate": 0.0003708926615553122, "loss": 1.0515, "step": 6066 }, { "epoch": 0.4110746923461307, "grad_norm": 2.5917727947235107, "learning_rate": 0.0003708871851040526, "loss": 0.7086, "step": 6067 }, { "epoch": 0.4111424481879547, "grad_norm": 3.7074480056762695, "learning_rate": 0.000370881708652793, "loss": 0.8104, "step": 6068 }, { "epoch": 0.4112102040297787, "grad_norm": 4.804320812225342, "learning_rate": 0.0003708762322015334, "loss": 0.998, "step": 6069 }, { "epoch": 0.4112779598716027, "grad_norm": 4.032692909240723, "learning_rate": 0.0003708707557502738, "loss": 0.886, "step": 6070 }, { "epoch": 0.41134571571342665, "grad_norm": 4.0400309562683105, "learning_rate": 0.00037086527929901426, "loss": 0.9215, "step": 6071 }, { "epoch": 0.41141347155525065, "grad_norm": 3.8866360187530518, "learning_rate": 0.00037085980284775466, "loss": 0.9206, "step": 6072 }, { "epoch": 0.41148122739707466, "grad_norm": 3.103803873062134, "learning_rate": 0.0003708543263964951, "loss": 0.8326, "step": 6073 }, { "epoch": 0.4115489832388986, "grad_norm": 3.3380112648010254, "learning_rate": 0.0003708488499452355, "loss": 0.8885, "step": 6074 }, { "epoch": 0.4116167390807226, "grad_norm": 3.924215793609619, "learning_rate": 0.0003708433734939759, "loss": 1.1525, "step": 6075 }, { "epoch": 0.4116844949225466, "grad_norm": 2.5355336666107178, "learning_rate": 0.00037083789704271637, "loss": 0.7176, "step": 6076 }, { "epoch": 0.4117522507643706, "grad_norm": 6.266172885894775, "learning_rate": 0.00037083242059145677, "loss": 1.0664, "step": 6077 }, { "epoch": 0.41182000660619456, "grad_norm": 3.829345941543579, "learning_rate": 0.00037082694414019717, "loss": 0.8457, "step": 6078 }, { "epoch": 0.41188776244801856, "grad_norm": 3.712602376937866, "learning_rate": 0.00037082146768893756, "loss": 0.8681, "step": 6079 }, { "epoch": 0.41195551828984256, "grad_norm": 4.151834964752197, "learning_rate": 0.00037081599123767796, "loss": 1.1982, "step": 6080 }, { "epoch": 0.41202327413166656, "grad_norm": 4.010295391082764, "learning_rate": 0.0003708105147864184, "loss": 1.011, "step": 6081 }, { "epoch": 0.4120910299734905, "grad_norm": 3.6486284732818604, "learning_rate": 0.00037080503833515887, "loss": 0.8625, "step": 6082 }, { "epoch": 0.4121587858153145, "grad_norm": 3.5427520275115967, "learning_rate": 0.00037079956188389927, "loss": 1.0549, "step": 6083 }, { "epoch": 0.4122265416571385, "grad_norm": 3.396989583969116, "learning_rate": 0.00037079408543263967, "loss": 0.9626, "step": 6084 }, { "epoch": 0.4122942974989625, "grad_norm": 3.74003267288208, "learning_rate": 0.00037078860898138007, "loss": 1.0135, "step": 6085 }, { "epoch": 0.41236205334078646, "grad_norm": 4.897467613220215, "learning_rate": 0.00037078313253012047, "loss": 0.9915, "step": 6086 }, { "epoch": 0.41242980918261046, "grad_norm": 3.9507055282592773, "learning_rate": 0.0003707776560788609, "loss": 0.8965, "step": 6087 }, { "epoch": 0.41249756502443446, "grad_norm": 3.0722994804382324, "learning_rate": 0.0003707721796276013, "loss": 0.8248, "step": 6088 }, { "epoch": 0.4125653208662584, "grad_norm": 4.155635833740234, "learning_rate": 0.0003707667031763418, "loss": 0.9678, "step": 6089 }, { "epoch": 0.4126330767080824, "grad_norm": 2.7862792015075684, "learning_rate": 0.00037076122672508217, "loss": 0.7148, "step": 6090 }, { "epoch": 0.4127008325499064, "grad_norm": 3.1624417304992676, "learning_rate": 0.00037075575027382257, "loss": 0.8042, "step": 6091 }, { "epoch": 0.4127685883917304, "grad_norm": 2.94054913520813, "learning_rate": 0.000370750273822563, "loss": 0.8706, "step": 6092 }, { "epoch": 0.41283634423355436, "grad_norm": 4.207178115844727, "learning_rate": 0.0003707447973713034, "loss": 1.1075, "step": 6093 }, { "epoch": 0.41290410007537837, "grad_norm": 2.7412447929382324, "learning_rate": 0.0003707393209200438, "loss": 0.8548, "step": 6094 }, { "epoch": 0.41297185591720237, "grad_norm": 3.1036124229431152, "learning_rate": 0.0003707338444687842, "loss": 0.8478, "step": 6095 }, { "epoch": 0.41303961175902637, "grad_norm": 3.1289165019989014, "learning_rate": 0.0003707283680175246, "loss": 0.6297, "step": 6096 }, { "epoch": 0.4131073676008503, "grad_norm": 4.006036281585693, "learning_rate": 0.0003707228915662651, "loss": 1.0973, "step": 6097 }, { "epoch": 0.4131751234426743, "grad_norm": 4.145560264587402, "learning_rate": 0.00037071741511500553, "loss": 0.8361, "step": 6098 }, { "epoch": 0.4132428792844983, "grad_norm": 2.72009539604187, "learning_rate": 0.00037071193866374593, "loss": 0.6245, "step": 6099 }, { "epoch": 0.4133106351263223, "grad_norm": 4.36569881439209, "learning_rate": 0.00037070646221248633, "loss": 1.0204, "step": 6100 }, { "epoch": 0.41337839096814627, "grad_norm": 2.7295382022857666, "learning_rate": 0.0003707009857612267, "loss": 0.7969, "step": 6101 }, { "epoch": 0.41344614680997027, "grad_norm": 4.514069080352783, "learning_rate": 0.0003706955093099671, "loss": 0.9662, "step": 6102 }, { "epoch": 0.4135139026517943, "grad_norm": 3.3044259548187256, "learning_rate": 0.0003706900328587076, "loss": 0.7576, "step": 6103 }, { "epoch": 0.4135816584936182, "grad_norm": 4.668038845062256, "learning_rate": 0.000370684556407448, "loss": 1.1707, "step": 6104 }, { "epoch": 0.4136494143354422, "grad_norm": 3.361274242401123, "learning_rate": 0.00037067907995618843, "loss": 0.7444, "step": 6105 }, { "epoch": 0.4137171701772662, "grad_norm": 3.1252405643463135, "learning_rate": 0.00037067360350492883, "loss": 0.9104, "step": 6106 }, { "epoch": 0.4137849260190902, "grad_norm": 6.500646591186523, "learning_rate": 0.00037066812705366923, "loss": 1.0316, "step": 6107 }, { "epoch": 0.4138526818609142, "grad_norm": 5.252775192260742, "learning_rate": 0.00037066265060240963, "loss": 0.9169, "step": 6108 }, { "epoch": 0.4139204377027382, "grad_norm": 3.6348650455474854, "learning_rate": 0.0003706571741511501, "loss": 0.7263, "step": 6109 }, { "epoch": 0.4139881935445622, "grad_norm": 3.185882091522217, "learning_rate": 0.0003706516976998905, "loss": 0.8311, "step": 6110 }, { "epoch": 0.4140559493863862, "grad_norm": 3.5737063884735107, "learning_rate": 0.0003706462212486309, "loss": 0.9263, "step": 6111 }, { "epoch": 0.4141237052282101, "grad_norm": 3.0396697521209717, "learning_rate": 0.00037064074479737134, "loss": 0.842, "step": 6112 }, { "epoch": 0.41419146107003413, "grad_norm": 3.368103504180908, "learning_rate": 0.00037063526834611173, "loss": 0.7855, "step": 6113 }, { "epoch": 0.41425921691185813, "grad_norm": 4.269338607788086, "learning_rate": 0.0003706297918948522, "loss": 0.9983, "step": 6114 }, { "epoch": 0.41432697275368213, "grad_norm": 2.8505311012268066, "learning_rate": 0.0003706243154435926, "loss": 0.83, "step": 6115 }, { "epoch": 0.4143947285955061, "grad_norm": 4.870187759399414, "learning_rate": 0.000370618838992333, "loss": 1.0483, "step": 6116 }, { "epoch": 0.4144624844373301, "grad_norm": 3.900129795074463, "learning_rate": 0.0003706133625410734, "loss": 0.9453, "step": 6117 }, { "epoch": 0.4145302402791541, "grad_norm": 3.7271854877471924, "learning_rate": 0.0003706078860898138, "loss": 0.8122, "step": 6118 }, { "epoch": 0.41459799612097803, "grad_norm": 5.437984943389893, "learning_rate": 0.00037060240963855424, "loss": 0.7223, "step": 6119 }, { "epoch": 0.41466575196280203, "grad_norm": 3.4118454456329346, "learning_rate": 0.0003705969331872947, "loss": 0.8581, "step": 6120 }, { "epoch": 0.41473350780462603, "grad_norm": 5.739293098449707, "learning_rate": 0.0003705914567360351, "loss": 0.8696, "step": 6121 }, { "epoch": 0.41480126364645004, "grad_norm": 2.9224798679351807, "learning_rate": 0.0003705859802847755, "loss": 0.7493, "step": 6122 }, { "epoch": 0.414869019488274, "grad_norm": 4.693022727966309, "learning_rate": 0.0003705805038335159, "loss": 1.0194, "step": 6123 }, { "epoch": 0.414936775330098, "grad_norm": 8.475019454956055, "learning_rate": 0.0003705750273822563, "loss": 0.77, "step": 6124 }, { "epoch": 0.415004531171922, "grad_norm": 2.9382572174072266, "learning_rate": 0.00037056955093099674, "loss": 0.7335, "step": 6125 }, { "epoch": 0.415072287013746, "grad_norm": 4.4232683181762695, "learning_rate": 0.00037056407447973714, "loss": 1.0831, "step": 6126 }, { "epoch": 0.41514004285556994, "grad_norm": 4.470012187957764, "learning_rate": 0.00037055859802847754, "loss": 0.8785, "step": 6127 }, { "epoch": 0.41520779869739394, "grad_norm": 2.9399962425231934, "learning_rate": 0.000370553121577218, "loss": 0.7637, "step": 6128 }, { "epoch": 0.41527555453921794, "grad_norm": 3.813950300216675, "learning_rate": 0.0003705476451259584, "loss": 1.0586, "step": 6129 }, { "epoch": 0.41534331038104194, "grad_norm": 4.709053039550781, "learning_rate": 0.00037054216867469885, "loss": 1.0231, "step": 6130 }, { "epoch": 0.4154110662228659, "grad_norm": 2.8284759521484375, "learning_rate": 0.00037053669222343925, "loss": 0.8166, "step": 6131 }, { "epoch": 0.4154788220646899, "grad_norm": 3.6345653533935547, "learning_rate": 0.00037053121577217964, "loss": 1.0151, "step": 6132 }, { "epoch": 0.4155465779065139, "grad_norm": 3.0683374404907227, "learning_rate": 0.00037052573932092004, "loss": 0.8853, "step": 6133 }, { "epoch": 0.41561433374833784, "grad_norm": 2.624093770980835, "learning_rate": 0.00037052026286966044, "loss": 0.7759, "step": 6134 }, { "epoch": 0.41568208959016184, "grad_norm": 2.2613329887390137, "learning_rate": 0.0003705147864184009, "loss": 0.6382, "step": 6135 }, { "epoch": 0.41574984543198584, "grad_norm": 3.175206184387207, "learning_rate": 0.00037050930996714135, "loss": 0.854, "step": 6136 }, { "epoch": 0.41581760127380984, "grad_norm": 2.849184274673462, "learning_rate": 0.00037050383351588175, "loss": 0.7538, "step": 6137 }, { "epoch": 0.4158853571156338, "grad_norm": 3.4436376094818115, "learning_rate": 0.00037049835706462215, "loss": 1.0104, "step": 6138 }, { "epoch": 0.4159531129574578, "grad_norm": 2.4233052730560303, "learning_rate": 0.00037049288061336255, "loss": 0.6094, "step": 6139 }, { "epoch": 0.4160208687992818, "grad_norm": 3.1631572246551514, "learning_rate": 0.00037048740416210295, "loss": 0.9662, "step": 6140 }, { "epoch": 0.4160886246411058, "grad_norm": 3.4730257987976074, "learning_rate": 0.0003704819277108434, "loss": 0.669, "step": 6141 }, { "epoch": 0.41615638048292974, "grad_norm": 3.350109577178955, "learning_rate": 0.0003704764512595838, "loss": 0.811, "step": 6142 }, { "epoch": 0.41622413632475375, "grad_norm": 2.913679599761963, "learning_rate": 0.00037047097480832425, "loss": 0.8505, "step": 6143 }, { "epoch": 0.41629189216657775, "grad_norm": 4.117281913757324, "learning_rate": 0.00037046549835706465, "loss": 0.96, "step": 6144 }, { "epoch": 0.41635964800840175, "grad_norm": 2.5774874687194824, "learning_rate": 0.00037046002190580505, "loss": 0.5683, "step": 6145 }, { "epoch": 0.4164274038502257, "grad_norm": 3.4137587547302246, "learning_rate": 0.00037045454545454545, "loss": 0.8074, "step": 6146 }, { "epoch": 0.4164951596920497, "grad_norm": 4.6130595207214355, "learning_rate": 0.0003704490690032859, "loss": 1.1646, "step": 6147 }, { "epoch": 0.4165629155338737, "grad_norm": 2.8249013423919678, "learning_rate": 0.0003704435925520263, "loss": 0.6841, "step": 6148 }, { "epoch": 0.41663067137569765, "grad_norm": 3.1338231563568115, "learning_rate": 0.0003704381161007667, "loss": 0.6289, "step": 6149 }, { "epoch": 0.41669842721752165, "grad_norm": 4.570322513580322, "learning_rate": 0.0003704326396495071, "loss": 0.9571, "step": 6150 }, { "epoch": 0.41676618305934565, "grad_norm": 3.569533109664917, "learning_rate": 0.00037042716319824755, "loss": 0.9345, "step": 6151 }, { "epoch": 0.41683393890116965, "grad_norm": 3.564207077026367, "learning_rate": 0.000370421686746988, "loss": 0.9179, "step": 6152 }, { "epoch": 0.4169016947429936, "grad_norm": 3.674199104309082, "learning_rate": 0.0003704162102957284, "loss": 0.8616, "step": 6153 }, { "epoch": 0.4169694505848176, "grad_norm": 3.920586109161377, "learning_rate": 0.0003704107338444688, "loss": 0.9378, "step": 6154 }, { "epoch": 0.4170372064266416, "grad_norm": 4.686420440673828, "learning_rate": 0.0003704052573932092, "loss": 1.1516, "step": 6155 }, { "epoch": 0.4171049622684656, "grad_norm": 3.209467649459839, "learning_rate": 0.0003703997809419496, "loss": 0.6498, "step": 6156 }, { "epoch": 0.41717271811028955, "grad_norm": 3.0936646461486816, "learning_rate": 0.00037039430449069006, "loss": 0.7819, "step": 6157 }, { "epoch": 0.41724047395211356, "grad_norm": 3.954408645629883, "learning_rate": 0.00037038882803943046, "loss": 1.0145, "step": 6158 }, { "epoch": 0.41730822979393756, "grad_norm": 2.628511667251587, "learning_rate": 0.0003703833515881709, "loss": 0.6539, "step": 6159 }, { "epoch": 0.41737598563576156, "grad_norm": 3.0730738639831543, "learning_rate": 0.0003703778751369113, "loss": 0.7768, "step": 6160 }, { "epoch": 0.4174437414775855, "grad_norm": 3.7101423740386963, "learning_rate": 0.0003703723986856517, "loss": 1.0445, "step": 6161 }, { "epoch": 0.4175114973194095, "grad_norm": 2.8876140117645264, "learning_rate": 0.0003703669222343921, "loss": 0.7527, "step": 6162 }, { "epoch": 0.4175792531612335, "grad_norm": 3.739457130432129, "learning_rate": 0.00037036144578313256, "loss": 1.021, "step": 6163 }, { "epoch": 0.41764700900305746, "grad_norm": 3.15006685256958, "learning_rate": 0.00037035596933187296, "loss": 0.7096, "step": 6164 }, { "epoch": 0.41771476484488146, "grad_norm": 3.7074811458587646, "learning_rate": 0.00037035049288061336, "loss": 0.9648, "step": 6165 }, { "epoch": 0.41778252068670546, "grad_norm": 4.064733505249023, "learning_rate": 0.00037034501642935376, "loss": 1.0088, "step": 6166 }, { "epoch": 0.41785027652852946, "grad_norm": 4.01060152053833, "learning_rate": 0.0003703395399780942, "loss": 0.7688, "step": 6167 }, { "epoch": 0.4179180323703534, "grad_norm": 4.027651309967041, "learning_rate": 0.00037033406352683467, "loss": 1.072, "step": 6168 }, { "epoch": 0.4179857882121774, "grad_norm": 3.648397922515869, "learning_rate": 0.00037032858707557507, "loss": 0.8771, "step": 6169 }, { "epoch": 0.4180535440540014, "grad_norm": 3.491786241531372, "learning_rate": 0.00037032311062431547, "loss": 0.9279, "step": 6170 }, { "epoch": 0.4181212998958254, "grad_norm": 4.624340057373047, "learning_rate": 0.00037031763417305586, "loss": 1.0948, "step": 6171 }, { "epoch": 0.41818905573764936, "grad_norm": 3.116774797439575, "learning_rate": 0.00037031215772179626, "loss": 0.6476, "step": 6172 }, { "epoch": 0.41825681157947336, "grad_norm": 3.673370122909546, "learning_rate": 0.0003703066812705367, "loss": 0.657, "step": 6173 }, { "epoch": 0.41832456742129737, "grad_norm": 3.3255693912506104, "learning_rate": 0.00037030120481927717, "loss": 1.1002, "step": 6174 }, { "epoch": 0.41839232326312137, "grad_norm": 3.5820837020874023, "learning_rate": 0.00037029572836801757, "loss": 0.9113, "step": 6175 }, { "epoch": 0.4184600791049453, "grad_norm": 2.9466958045959473, "learning_rate": 0.00037029025191675797, "loss": 0.7842, "step": 6176 }, { "epoch": 0.4185278349467693, "grad_norm": 3.8720855712890625, "learning_rate": 0.00037028477546549837, "loss": 0.9034, "step": 6177 }, { "epoch": 0.4185955907885933, "grad_norm": 4.032097339630127, "learning_rate": 0.00037027929901423877, "loss": 1.117, "step": 6178 }, { "epoch": 0.41866334663041727, "grad_norm": 3.7864058017730713, "learning_rate": 0.0003702738225629792, "loss": 0.7947, "step": 6179 }, { "epoch": 0.41873110247224127, "grad_norm": 3.2456672191619873, "learning_rate": 0.0003702683461117196, "loss": 0.8826, "step": 6180 }, { "epoch": 0.41879885831406527, "grad_norm": 4.122774600982666, "learning_rate": 0.00037026286966046, "loss": 1.3751, "step": 6181 }, { "epoch": 0.41886661415588927, "grad_norm": 3.266185760498047, "learning_rate": 0.00037025739320920047, "loss": 0.8995, "step": 6182 }, { "epoch": 0.4189343699977132, "grad_norm": 2.954339027404785, "learning_rate": 0.00037025191675794087, "loss": 0.8859, "step": 6183 }, { "epoch": 0.4190021258395372, "grad_norm": 3.910494565963745, "learning_rate": 0.00037024644030668127, "loss": 0.9333, "step": 6184 }, { "epoch": 0.4190698816813612, "grad_norm": 3.966935873031616, "learning_rate": 0.0003702409638554217, "loss": 0.9849, "step": 6185 }, { "epoch": 0.4191376375231852, "grad_norm": 2.8647704124450684, "learning_rate": 0.0003702354874041621, "loss": 0.8636, "step": 6186 }, { "epoch": 0.41920539336500917, "grad_norm": 3.095559597015381, "learning_rate": 0.0003702300109529025, "loss": 0.8029, "step": 6187 }, { "epoch": 0.4192731492068332, "grad_norm": 2.461210012435913, "learning_rate": 0.0003702245345016429, "loss": 0.7502, "step": 6188 }, { "epoch": 0.4193409050486572, "grad_norm": 4.687287330627441, "learning_rate": 0.0003702190580503834, "loss": 1.1086, "step": 6189 }, { "epoch": 0.4194086608904812, "grad_norm": 3.8829033374786377, "learning_rate": 0.00037021358159912383, "loss": 1.0034, "step": 6190 }, { "epoch": 0.4194764167323051, "grad_norm": 2.6002354621887207, "learning_rate": 0.00037020810514786423, "loss": 0.6049, "step": 6191 }, { "epoch": 0.4195441725741291, "grad_norm": 3.675715684890747, "learning_rate": 0.00037020262869660463, "loss": 0.8392, "step": 6192 }, { "epoch": 0.41961192841595313, "grad_norm": 4.520908832550049, "learning_rate": 0.000370197152245345, "loss": 0.8376, "step": 6193 }, { "epoch": 0.4196796842577771, "grad_norm": 4.424047946929932, "learning_rate": 0.0003701916757940854, "loss": 0.8383, "step": 6194 }, { "epoch": 0.4197474400996011, "grad_norm": 3.2530438899993896, "learning_rate": 0.0003701861993428259, "loss": 0.7668, "step": 6195 }, { "epoch": 0.4198151959414251, "grad_norm": 4.868038177490234, "learning_rate": 0.0003701807228915663, "loss": 1.0001, "step": 6196 }, { "epoch": 0.4198829517832491, "grad_norm": 4.747625827789307, "learning_rate": 0.0003701752464403067, "loss": 0.9985, "step": 6197 }, { "epoch": 0.419950707625073, "grad_norm": 2.535489559173584, "learning_rate": 0.00037016976998904713, "loss": 0.6983, "step": 6198 }, { "epoch": 0.42001846346689703, "grad_norm": 2.455125331878662, "learning_rate": 0.00037016429353778753, "loss": 0.733, "step": 6199 }, { "epoch": 0.42008621930872103, "grad_norm": 3.3819751739501953, "learning_rate": 0.00037015881708652793, "loss": 0.9317, "step": 6200 }, { "epoch": 0.42015397515054503, "grad_norm": 2.1471362113952637, "learning_rate": 0.0003701533406352684, "loss": 0.6759, "step": 6201 }, { "epoch": 0.420221730992369, "grad_norm": 4.250528812408447, "learning_rate": 0.0003701478641840088, "loss": 0.7671, "step": 6202 }, { "epoch": 0.420289486834193, "grad_norm": 3.3704628944396973, "learning_rate": 0.0003701423877327492, "loss": 0.8101, "step": 6203 }, { "epoch": 0.420357242676017, "grad_norm": 2.7211146354675293, "learning_rate": 0.0003701369112814896, "loss": 0.739, "step": 6204 }, { "epoch": 0.420424998517841, "grad_norm": 2.8132147789001465, "learning_rate": 0.00037013143483023003, "loss": 0.7979, "step": 6205 }, { "epoch": 0.42049275435966493, "grad_norm": 4.355744361877441, "learning_rate": 0.0003701259583789705, "loss": 0.7744, "step": 6206 }, { "epoch": 0.42056051020148894, "grad_norm": 3.5817699432373047, "learning_rate": 0.0003701204819277109, "loss": 0.9073, "step": 6207 }, { "epoch": 0.42062826604331294, "grad_norm": 3.7320308685302734, "learning_rate": 0.0003701150054764513, "loss": 1.08, "step": 6208 }, { "epoch": 0.4206960218851369, "grad_norm": 5.014627456665039, "learning_rate": 0.0003701095290251917, "loss": 0.8656, "step": 6209 }, { "epoch": 0.4207637777269609, "grad_norm": 4.211979389190674, "learning_rate": 0.0003701040525739321, "loss": 1.2161, "step": 6210 }, { "epoch": 0.4208315335687849, "grad_norm": 3.3095107078552246, "learning_rate": 0.00037009857612267254, "loss": 1.0133, "step": 6211 }, { "epoch": 0.4208992894106089, "grad_norm": 3.772671699523926, "learning_rate": 0.00037009309967141294, "loss": 0.8917, "step": 6212 }, { "epoch": 0.42096704525243284, "grad_norm": 3.614650249481201, "learning_rate": 0.0003700876232201534, "loss": 0.7923, "step": 6213 }, { "epoch": 0.42103480109425684, "grad_norm": 3.3228232860565186, "learning_rate": 0.0003700821467688938, "loss": 0.916, "step": 6214 }, { "epoch": 0.42110255693608084, "grad_norm": 2.6083931922912598, "learning_rate": 0.0003700766703176342, "loss": 0.6582, "step": 6215 }, { "epoch": 0.42117031277790484, "grad_norm": 3.6740622520446777, "learning_rate": 0.0003700711938663746, "loss": 1.0176, "step": 6216 }, { "epoch": 0.4212380686197288, "grad_norm": 3.5848512649536133, "learning_rate": 0.00037006571741511504, "loss": 0.8277, "step": 6217 }, { "epoch": 0.4213058244615528, "grad_norm": 2.1016857624053955, "learning_rate": 0.00037006024096385544, "loss": 0.6415, "step": 6218 }, { "epoch": 0.4213735803033768, "grad_norm": 2.87442684173584, "learning_rate": 0.00037005476451259584, "loss": 0.7644, "step": 6219 }, { "epoch": 0.4214413361452008, "grad_norm": 4.019791126251221, "learning_rate": 0.00037004928806133624, "loss": 0.9969, "step": 6220 }, { "epoch": 0.42150909198702474, "grad_norm": 3.543807029724121, "learning_rate": 0.0003700438116100767, "loss": 1.0221, "step": 6221 }, { "epoch": 0.42157684782884874, "grad_norm": 2.7422337532043457, "learning_rate": 0.0003700383351588171, "loss": 0.6956, "step": 6222 }, { "epoch": 0.42164460367067275, "grad_norm": 2.425027847290039, "learning_rate": 0.00037003285870755755, "loss": 0.5717, "step": 6223 }, { "epoch": 0.4217123595124967, "grad_norm": 2.8827388286590576, "learning_rate": 0.00037002738225629794, "loss": 0.8287, "step": 6224 }, { "epoch": 0.4217801153543207, "grad_norm": 2.6585280895233154, "learning_rate": 0.00037002190580503834, "loss": 0.7266, "step": 6225 }, { "epoch": 0.4218478711961447, "grad_norm": 3.062368392944336, "learning_rate": 0.00037001642935377874, "loss": 0.8749, "step": 6226 }, { "epoch": 0.4219156270379687, "grad_norm": 3.5983870029449463, "learning_rate": 0.0003700109529025192, "loss": 0.8979, "step": 6227 }, { "epoch": 0.42198338287979265, "grad_norm": 3.822279214859009, "learning_rate": 0.0003700054764512596, "loss": 0.8351, "step": 6228 }, { "epoch": 0.42205113872161665, "grad_norm": 4.193334579467773, "learning_rate": 0.00037000000000000005, "loss": 0.8716, "step": 6229 }, { "epoch": 0.42211889456344065, "grad_norm": 3.1663506031036377, "learning_rate": 0.00036999452354874045, "loss": 0.8548, "step": 6230 }, { "epoch": 0.42218665040526465, "grad_norm": 3.5401456356048584, "learning_rate": 0.00036998904709748085, "loss": 0.7949, "step": 6231 }, { "epoch": 0.4222544062470886, "grad_norm": 4.418153762817383, "learning_rate": 0.00036998357064622125, "loss": 1.0412, "step": 6232 }, { "epoch": 0.4223221620889126, "grad_norm": 4.2316575050354, "learning_rate": 0.0003699780941949617, "loss": 1.0467, "step": 6233 }, { "epoch": 0.4223899179307366, "grad_norm": 2.870851516723633, "learning_rate": 0.0003699726177437021, "loss": 0.6088, "step": 6234 }, { "epoch": 0.4224576737725606, "grad_norm": 4.238577365875244, "learning_rate": 0.0003699671412924425, "loss": 1.1509, "step": 6235 }, { "epoch": 0.42252542961438455, "grad_norm": 4.858729839324951, "learning_rate": 0.00036996166484118295, "loss": 0.9678, "step": 6236 }, { "epoch": 0.42259318545620855, "grad_norm": 2.9878032207489014, "learning_rate": 0.00036995618838992335, "loss": 0.7988, "step": 6237 }, { "epoch": 0.42266094129803256, "grad_norm": 3.048920154571533, "learning_rate": 0.00036995071193866375, "loss": 0.8159, "step": 6238 }, { "epoch": 0.4227286971398565, "grad_norm": 2.2306630611419678, "learning_rate": 0.0003699452354874042, "loss": 0.6371, "step": 6239 }, { "epoch": 0.4227964529816805, "grad_norm": 2.873110294342041, "learning_rate": 0.0003699397590361446, "loss": 0.705, "step": 6240 }, { "epoch": 0.4228642088235045, "grad_norm": 3.3113081455230713, "learning_rate": 0.000369934282584885, "loss": 0.9736, "step": 6241 }, { "epoch": 0.4229319646653285, "grad_norm": 3.2594234943389893, "learning_rate": 0.0003699288061336254, "loss": 0.9121, "step": 6242 }, { "epoch": 0.42299972050715245, "grad_norm": 3.5728747844696045, "learning_rate": 0.0003699233296823658, "loss": 0.9968, "step": 6243 }, { "epoch": 0.42306747634897646, "grad_norm": 4.291263103485107, "learning_rate": 0.0003699178532311063, "loss": 1.0365, "step": 6244 }, { "epoch": 0.42313523219080046, "grad_norm": 3.6043386459350586, "learning_rate": 0.0003699123767798467, "loss": 0.8282, "step": 6245 }, { "epoch": 0.42320298803262446, "grad_norm": 3.1277477741241455, "learning_rate": 0.0003699069003285871, "loss": 0.9271, "step": 6246 }, { "epoch": 0.4232707438744484, "grad_norm": 2.9749057292938232, "learning_rate": 0.0003699014238773275, "loss": 0.7502, "step": 6247 }, { "epoch": 0.4233384997162724, "grad_norm": 2.86210036277771, "learning_rate": 0.0003698959474260679, "loss": 0.7504, "step": 6248 }, { "epoch": 0.4234062555580964, "grad_norm": 4.2416815757751465, "learning_rate": 0.00036989047097480836, "loss": 0.9574, "step": 6249 }, { "epoch": 0.4234740113999204, "grad_norm": 2.487778425216675, "learning_rate": 0.00036988499452354876, "loss": 0.6842, "step": 6250 }, { "epoch": 0.42354176724174436, "grad_norm": 3.39168119430542, "learning_rate": 0.00036987951807228916, "loss": 0.7759, "step": 6251 }, { "epoch": 0.42360952308356836, "grad_norm": 3.763737916946411, "learning_rate": 0.0003698740416210296, "loss": 0.8937, "step": 6252 }, { "epoch": 0.42367727892539236, "grad_norm": 3.005854845046997, "learning_rate": 0.00036986856516977, "loss": 0.739, "step": 6253 }, { "epoch": 0.4237450347672163, "grad_norm": 4.022545337677002, "learning_rate": 0.0003698630887185104, "loss": 1.1218, "step": 6254 }, { "epoch": 0.4238127906090403, "grad_norm": 2.9552979469299316, "learning_rate": 0.00036985761226725086, "loss": 0.9028, "step": 6255 }, { "epoch": 0.4238805464508643, "grad_norm": 4.670953273773193, "learning_rate": 0.00036985213581599126, "loss": 1.0622, "step": 6256 }, { "epoch": 0.4239483022926883, "grad_norm": 3.1552205085754395, "learning_rate": 0.00036984665936473166, "loss": 0.8587, "step": 6257 }, { "epoch": 0.42401605813451226, "grad_norm": 3.398550033569336, "learning_rate": 0.00036984118291347206, "loss": 1.1154, "step": 6258 }, { "epoch": 0.42408381397633627, "grad_norm": 3.5295748710632324, "learning_rate": 0.00036983570646221246, "loss": 0.8334, "step": 6259 }, { "epoch": 0.42415156981816027, "grad_norm": 3.1108057498931885, "learning_rate": 0.0003698302300109529, "loss": 0.7013, "step": 6260 }, { "epoch": 0.42421932565998427, "grad_norm": 3.6390902996063232, "learning_rate": 0.00036982475355969337, "loss": 0.851, "step": 6261 }, { "epoch": 0.4242870815018082, "grad_norm": 3.977881908416748, "learning_rate": 0.00036981927710843377, "loss": 0.8593, "step": 6262 }, { "epoch": 0.4243548373436322, "grad_norm": 3.0145297050476074, "learning_rate": 0.00036981380065717416, "loss": 0.7736, "step": 6263 }, { "epoch": 0.4244225931854562, "grad_norm": 3.285266637802124, "learning_rate": 0.00036980832420591456, "loss": 0.8248, "step": 6264 }, { "epoch": 0.4244903490272802, "grad_norm": 2.7737667560577393, "learning_rate": 0.000369802847754655, "loss": 0.7361, "step": 6265 }, { "epoch": 0.42455810486910417, "grad_norm": 2.960221529006958, "learning_rate": 0.0003697973713033954, "loss": 0.8857, "step": 6266 }, { "epoch": 0.42462586071092817, "grad_norm": 3.967716693878174, "learning_rate": 0.0003697918948521358, "loss": 1.0692, "step": 6267 }, { "epoch": 0.4246936165527522, "grad_norm": 3.3838980197906494, "learning_rate": 0.00036978641840087627, "loss": 0.7113, "step": 6268 }, { "epoch": 0.4247613723945761, "grad_norm": 3.3343701362609863, "learning_rate": 0.00036978094194961667, "loss": 0.8707, "step": 6269 }, { "epoch": 0.4248291282364001, "grad_norm": 2.8949599266052246, "learning_rate": 0.00036977546549835707, "loss": 0.7788, "step": 6270 }, { "epoch": 0.4248968840782241, "grad_norm": 4.640323162078857, "learning_rate": 0.0003697699890470975, "loss": 1.0005, "step": 6271 }, { "epoch": 0.4249646399200481, "grad_norm": 2.6837897300720215, "learning_rate": 0.0003697645125958379, "loss": 0.8732, "step": 6272 }, { "epoch": 0.4250323957618721, "grad_norm": 4.110270977020264, "learning_rate": 0.0003697590361445783, "loss": 0.9608, "step": 6273 }, { "epoch": 0.4251001516036961, "grad_norm": 2.396899700164795, "learning_rate": 0.0003697535596933187, "loss": 0.6338, "step": 6274 }, { "epoch": 0.4251679074455201, "grad_norm": 2.8783318996429443, "learning_rate": 0.00036974808324205917, "loss": 0.7734, "step": 6275 }, { "epoch": 0.4252356632873441, "grad_norm": 2.7166292667388916, "learning_rate": 0.00036974260679079957, "loss": 0.7357, "step": 6276 }, { "epoch": 0.425303419129168, "grad_norm": 3.3487653732299805, "learning_rate": 0.00036973713033954, "loss": 0.8222, "step": 6277 }, { "epoch": 0.425371174970992, "grad_norm": 4.046281337738037, "learning_rate": 0.0003697316538882804, "loss": 0.7705, "step": 6278 }, { "epoch": 0.42543893081281603, "grad_norm": 2.721919059753418, "learning_rate": 0.0003697261774370208, "loss": 0.8095, "step": 6279 }, { "epoch": 0.42550668665464003, "grad_norm": 4.340028762817383, "learning_rate": 0.0003697207009857612, "loss": 1.331, "step": 6280 }, { "epoch": 0.425574442496464, "grad_norm": 4.470715522766113, "learning_rate": 0.0003697152245345016, "loss": 0.9305, "step": 6281 }, { "epoch": 0.425642198338288, "grad_norm": 2.9356040954589844, "learning_rate": 0.0003697097480832421, "loss": 0.7771, "step": 6282 }, { "epoch": 0.425709954180112, "grad_norm": 4.247237682342529, "learning_rate": 0.00036970427163198253, "loss": 0.7456, "step": 6283 }, { "epoch": 0.42577771002193593, "grad_norm": 3.1693193912506104, "learning_rate": 0.00036969879518072293, "loss": 0.7478, "step": 6284 }, { "epoch": 0.42584546586375993, "grad_norm": 2.8572189807891846, "learning_rate": 0.0003696933187294633, "loss": 0.8036, "step": 6285 }, { "epoch": 0.42591322170558393, "grad_norm": 3.374826431274414, "learning_rate": 0.0003696878422782037, "loss": 0.7511, "step": 6286 }, { "epoch": 0.42598097754740794, "grad_norm": 3.4292900562286377, "learning_rate": 0.0003696823658269442, "loss": 0.9632, "step": 6287 }, { "epoch": 0.4260487333892319, "grad_norm": 2.551957607269287, "learning_rate": 0.0003696768893756846, "loss": 0.7148, "step": 6288 }, { "epoch": 0.4261164892310559, "grad_norm": 3.518465757369995, "learning_rate": 0.000369671412924425, "loss": 0.9183, "step": 6289 }, { "epoch": 0.4261842450728799, "grad_norm": 3.4415621757507324, "learning_rate": 0.0003696659364731654, "loss": 0.9597, "step": 6290 }, { "epoch": 0.4262520009147039, "grad_norm": 3.5428833961486816, "learning_rate": 0.00036966046002190583, "loss": 0.9688, "step": 6291 }, { "epoch": 0.42631975675652783, "grad_norm": 3.1086366176605225, "learning_rate": 0.00036965498357064623, "loss": 0.8561, "step": 6292 }, { "epoch": 0.42638751259835184, "grad_norm": 3.193326234817505, "learning_rate": 0.0003696495071193867, "loss": 0.9733, "step": 6293 }, { "epoch": 0.42645526844017584, "grad_norm": 3.107881784439087, "learning_rate": 0.0003696440306681271, "loss": 0.8897, "step": 6294 }, { "epoch": 0.42652302428199984, "grad_norm": 2.5665807723999023, "learning_rate": 0.0003696385542168675, "loss": 0.7154, "step": 6295 }, { "epoch": 0.4265907801238238, "grad_norm": 2.5737321376800537, "learning_rate": 0.0003696330777656079, "loss": 0.6748, "step": 6296 }, { "epoch": 0.4266585359656478, "grad_norm": 2.6794486045837402, "learning_rate": 0.0003696276013143483, "loss": 0.6782, "step": 6297 }, { "epoch": 0.4267262918074718, "grad_norm": 2.6599795818328857, "learning_rate": 0.00036962212486308873, "loss": 0.8371, "step": 6298 }, { "epoch": 0.42679404764929574, "grad_norm": 2.61659574508667, "learning_rate": 0.0003696166484118292, "loss": 0.726, "step": 6299 }, { "epoch": 0.42686180349111974, "grad_norm": 3.217559814453125, "learning_rate": 0.0003696111719605696, "loss": 0.7782, "step": 6300 }, { "epoch": 0.42692955933294374, "grad_norm": 4.028039932250977, "learning_rate": 0.00036960569550931, "loss": 1.1054, "step": 6301 }, { "epoch": 0.42699731517476774, "grad_norm": 5.199770450592041, "learning_rate": 0.0003696002190580504, "loss": 1.0663, "step": 6302 }, { "epoch": 0.4270650710165917, "grad_norm": 3.1791436672210693, "learning_rate": 0.00036959474260679084, "loss": 0.7912, "step": 6303 }, { "epoch": 0.4271328268584157, "grad_norm": 4.10256814956665, "learning_rate": 0.00036958926615553124, "loss": 0.8453, "step": 6304 }, { "epoch": 0.4272005827002397, "grad_norm": 2.922481060028076, "learning_rate": 0.00036958378970427164, "loss": 0.6775, "step": 6305 }, { "epoch": 0.4272683385420637, "grad_norm": 3.4723284244537354, "learning_rate": 0.0003695783132530121, "loss": 0.8286, "step": 6306 }, { "epoch": 0.42733609438388764, "grad_norm": 3.633917808532715, "learning_rate": 0.0003695728368017525, "loss": 0.9016, "step": 6307 }, { "epoch": 0.42740385022571165, "grad_norm": 3.986288070678711, "learning_rate": 0.0003695673603504929, "loss": 1.1119, "step": 6308 }, { "epoch": 0.42747160606753565, "grad_norm": 4.422970771789551, "learning_rate": 0.00036956188389923334, "loss": 0.8613, "step": 6309 }, { "epoch": 0.42753936190935965, "grad_norm": 4.631990432739258, "learning_rate": 0.00036955640744797374, "loss": 0.8641, "step": 6310 }, { "epoch": 0.4276071177511836, "grad_norm": 5.667660713195801, "learning_rate": 0.00036955093099671414, "loss": 0.89, "step": 6311 }, { "epoch": 0.4276748735930076, "grad_norm": 4.845278739929199, "learning_rate": 0.00036954545454545454, "loss": 1.2328, "step": 6312 }, { "epoch": 0.4277426294348316, "grad_norm": 3.712631940841675, "learning_rate": 0.00036953997809419494, "loss": 0.9715, "step": 6313 }, { "epoch": 0.42781038527665555, "grad_norm": 4.212069034576416, "learning_rate": 0.0003695345016429354, "loss": 0.644, "step": 6314 }, { "epoch": 0.42787814111847955, "grad_norm": 4.3607001304626465, "learning_rate": 0.00036952902519167585, "loss": 1.068, "step": 6315 }, { "epoch": 0.42794589696030355, "grad_norm": 2.706895589828491, "learning_rate": 0.00036952354874041624, "loss": 0.6091, "step": 6316 }, { "epoch": 0.42801365280212755, "grad_norm": 3.6120285987854004, "learning_rate": 0.00036951807228915664, "loss": 1.0565, "step": 6317 }, { "epoch": 0.4280814086439515, "grad_norm": 3.1597917079925537, "learning_rate": 0.00036951259583789704, "loss": 0.8243, "step": 6318 }, { "epoch": 0.4281491644857755, "grad_norm": 3.230849266052246, "learning_rate": 0.00036950711938663744, "loss": 0.9919, "step": 6319 }, { "epoch": 0.4282169203275995, "grad_norm": 3.244753837585449, "learning_rate": 0.0003695016429353779, "loss": 1.0274, "step": 6320 }, { "epoch": 0.4282846761694235, "grad_norm": 3.652761459350586, "learning_rate": 0.0003694961664841183, "loss": 0.9547, "step": 6321 }, { "epoch": 0.42835243201124745, "grad_norm": 4.18459415435791, "learning_rate": 0.00036949069003285875, "loss": 0.8724, "step": 6322 }, { "epoch": 0.42842018785307145, "grad_norm": 3.803577423095703, "learning_rate": 0.00036948521358159915, "loss": 0.92, "step": 6323 }, { "epoch": 0.42848794369489546, "grad_norm": 3.9907596111297607, "learning_rate": 0.00036947973713033955, "loss": 0.6945, "step": 6324 }, { "epoch": 0.42855569953671946, "grad_norm": 3.5603039264678955, "learning_rate": 0.00036947426067908, "loss": 0.904, "step": 6325 }, { "epoch": 0.4286234553785434, "grad_norm": 3.2467405796051025, "learning_rate": 0.0003694687842278204, "loss": 0.9901, "step": 6326 }, { "epoch": 0.4286912112203674, "grad_norm": 4.434312343597412, "learning_rate": 0.0003694633077765608, "loss": 1.0098, "step": 6327 }, { "epoch": 0.4287589670621914, "grad_norm": 3.5260140895843506, "learning_rate": 0.0003694578313253012, "loss": 0.6998, "step": 6328 }, { "epoch": 0.42882672290401536, "grad_norm": 4.022239685058594, "learning_rate": 0.0003694523548740416, "loss": 1.0871, "step": 6329 }, { "epoch": 0.42889447874583936, "grad_norm": 4.145698547363281, "learning_rate": 0.00036944687842278205, "loss": 1.1056, "step": 6330 }, { "epoch": 0.42896223458766336, "grad_norm": 3.535860300064087, "learning_rate": 0.0003694414019715225, "loss": 0.7968, "step": 6331 }, { "epoch": 0.42902999042948736, "grad_norm": 5.948753833770752, "learning_rate": 0.0003694359255202629, "loss": 1.1582, "step": 6332 }, { "epoch": 0.4290977462713113, "grad_norm": 3.2723734378814697, "learning_rate": 0.0003694304490690033, "loss": 0.7093, "step": 6333 }, { "epoch": 0.4291655021131353, "grad_norm": 3.322157144546509, "learning_rate": 0.0003694249726177437, "loss": 0.8741, "step": 6334 }, { "epoch": 0.4292332579549593, "grad_norm": 2.447329044342041, "learning_rate": 0.0003694194961664841, "loss": 0.8106, "step": 6335 }, { "epoch": 0.4293010137967833, "grad_norm": 3.5247979164123535, "learning_rate": 0.00036941401971522455, "loss": 0.8561, "step": 6336 }, { "epoch": 0.42936876963860726, "grad_norm": 2.7535293102264404, "learning_rate": 0.000369408543263965, "loss": 0.879, "step": 6337 }, { "epoch": 0.42943652548043126, "grad_norm": 3.910912275314331, "learning_rate": 0.0003694030668127054, "loss": 0.9347, "step": 6338 }, { "epoch": 0.42950428132225527, "grad_norm": 3.605294942855835, "learning_rate": 0.0003693975903614458, "loss": 0.7748, "step": 6339 }, { "epoch": 0.42957203716407927, "grad_norm": 3.562045097351074, "learning_rate": 0.0003693921139101862, "loss": 0.9727, "step": 6340 }, { "epoch": 0.4296397930059032, "grad_norm": 4.9539594650268555, "learning_rate": 0.00036938663745892666, "loss": 0.839, "step": 6341 }, { "epoch": 0.4297075488477272, "grad_norm": 3.089661121368408, "learning_rate": 0.00036938116100766706, "loss": 0.7785, "step": 6342 }, { "epoch": 0.4297753046895512, "grad_norm": 5.418990135192871, "learning_rate": 0.00036937568455640746, "loss": 0.7306, "step": 6343 }, { "epoch": 0.42984306053137517, "grad_norm": 3.431201457977295, "learning_rate": 0.00036937020810514786, "loss": 0.9657, "step": 6344 }, { "epoch": 0.42991081637319917, "grad_norm": 4.060028076171875, "learning_rate": 0.0003693647316538883, "loss": 0.9316, "step": 6345 }, { "epoch": 0.42997857221502317, "grad_norm": 3.276207447052002, "learning_rate": 0.0003693592552026287, "loss": 0.915, "step": 6346 }, { "epoch": 0.43004632805684717, "grad_norm": 4.945779323577881, "learning_rate": 0.00036935377875136916, "loss": 0.9503, "step": 6347 }, { "epoch": 0.4301140838986711, "grad_norm": 3.6668591499328613, "learning_rate": 0.00036934830230010956, "loss": 1.0945, "step": 6348 }, { "epoch": 0.4301818397404951, "grad_norm": 3.2330877780914307, "learning_rate": 0.00036934282584884996, "loss": 0.8425, "step": 6349 }, { "epoch": 0.4302495955823191, "grad_norm": 2.840606212615967, "learning_rate": 0.00036933734939759036, "loss": 0.8596, "step": 6350 }, { "epoch": 0.4303173514241431, "grad_norm": 4.570615291595459, "learning_rate": 0.00036933187294633076, "loss": 1.0667, "step": 6351 }, { "epoch": 0.43038510726596707, "grad_norm": 3.218773603439331, "learning_rate": 0.0003693263964950712, "loss": 0.809, "step": 6352 }, { "epoch": 0.4304528631077911, "grad_norm": 4.196386337280273, "learning_rate": 0.00036932092004381167, "loss": 1.1393, "step": 6353 }, { "epoch": 0.4305206189496151, "grad_norm": 2.651813268661499, "learning_rate": 0.00036931544359255206, "loss": 0.7174, "step": 6354 }, { "epoch": 0.4305883747914391, "grad_norm": 3.9410364627838135, "learning_rate": 0.00036930996714129246, "loss": 0.8696, "step": 6355 }, { "epoch": 0.430656130633263, "grad_norm": 3.962414264678955, "learning_rate": 0.00036930449069003286, "loss": 0.9269, "step": 6356 }, { "epoch": 0.430723886475087, "grad_norm": 3.3762874603271484, "learning_rate": 0.00036929901423877326, "loss": 0.7182, "step": 6357 }, { "epoch": 0.430791642316911, "grad_norm": 2.7965123653411865, "learning_rate": 0.0003692935377875137, "loss": 0.7127, "step": 6358 }, { "epoch": 0.430859398158735, "grad_norm": 3.9923713207244873, "learning_rate": 0.0003692880613362541, "loss": 1.0028, "step": 6359 }, { "epoch": 0.430927154000559, "grad_norm": 3.7321741580963135, "learning_rate": 0.0003692825848849945, "loss": 0.9499, "step": 6360 }, { "epoch": 0.430994909842383, "grad_norm": 5.394587993621826, "learning_rate": 0.00036927710843373497, "loss": 0.8899, "step": 6361 }, { "epoch": 0.431062665684207, "grad_norm": 3.4604806900024414, "learning_rate": 0.00036927163198247537, "loss": 0.9159, "step": 6362 }, { "epoch": 0.4311304215260309, "grad_norm": 3.2396180629730225, "learning_rate": 0.0003692661555312158, "loss": 0.87, "step": 6363 }, { "epoch": 0.43119817736785493, "grad_norm": 2.662715196609497, "learning_rate": 0.0003692606790799562, "loss": 0.7373, "step": 6364 }, { "epoch": 0.43126593320967893, "grad_norm": 3.6289896965026855, "learning_rate": 0.0003692552026286966, "loss": 0.9007, "step": 6365 }, { "epoch": 0.43133368905150293, "grad_norm": 5.851082801818848, "learning_rate": 0.000369249726177437, "loss": 0.9034, "step": 6366 }, { "epoch": 0.4314014448933269, "grad_norm": 4.435021877288818, "learning_rate": 0.0003692442497261774, "loss": 0.9074, "step": 6367 }, { "epoch": 0.4314692007351509, "grad_norm": 3.3232030868530273, "learning_rate": 0.00036923877327491787, "loss": 0.71, "step": 6368 }, { "epoch": 0.4315369565769749, "grad_norm": 3.4685187339782715, "learning_rate": 0.0003692332968236583, "loss": 0.6744, "step": 6369 }, { "epoch": 0.4316047124187989, "grad_norm": 3.925930976867676, "learning_rate": 0.0003692278203723987, "loss": 0.8717, "step": 6370 }, { "epoch": 0.43167246826062283, "grad_norm": 3.6178760528564453, "learning_rate": 0.0003692223439211391, "loss": 0.7661, "step": 6371 }, { "epoch": 0.43174022410244683, "grad_norm": 3.2185375690460205, "learning_rate": 0.0003692168674698795, "loss": 0.9917, "step": 6372 }, { "epoch": 0.43180797994427084, "grad_norm": 3.4683454036712646, "learning_rate": 0.0003692113910186199, "loss": 0.8117, "step": 6373 }, { "epoch": 0.4318757357860948, "grad_norm": 3.648073673248291, "learning_rate": 0.0003692059145673604, "loss": 0.8777, "step": 6374 }, { "epoch": 0.4319434916279188, "grad_norm": 3.439605236053467, "learning_rate": 0.0003692004381161008, "loss": 0.9459, "step": 6375 }, { "epoch": 0.4320112474697428, "grad_norm": 3.1711983680725098, "learning_rate": 0.0003691949616648412, "loss": 0.7894, "step": 6376 }, { "epoch": 0.4320790033115668, "grad_norm": 3.6149139404296875, "learning_rate": 0.0003691894852135816, "loss": 0.865, "step": 6377 }, { "epoch": 0.43214675915339074, "grad_norm": 2.7666499614715576, "learning_rate": 0.000369184008762322, "loss": 0.7178, "step": 6378 }, { "epoch": 0.43221451499521474, "grad_norm": 4.228898048400879, "learning_rate": 0.0003691785323110625, "loss": 0.9062, "step": 6379 }, { "epoch": 0.43228227083703874, "grad_norm": 3.1968986988067627, "learning_rate": 0.0003691730558598029, "loss": 0.6311, "step": 6380 }, { "epoch": 0.43235002667886274, "grad_norm": 2.9367904663085938, "learning_rate": 0.0003691675794085433, "loss": 0.8718, "step": 6381 }, { "epoch": 0.4324177825206867, "grad_norm": 2.967865467071533, "learning_rate": 0.0003691621029572837, "loss": 0.6497, "step": 6382 }, { "epoch": 0.4324855383625107, "grad_norm": 3.6863110065460205, "learning_rate": 0.0003691566265060241, "loss": 0.8094, "step": 6383 }, { "epoch": 0.4325532942043347, "grad_norm": 2.6155757904052734, "learning_rate": 0.00036915115005476453, "loss": 0.7596, "step": 6384 }, { "epoch": 0.4326210500461587, "grad_norm": 2.5703186988830566, "learning_rate": 0.000369145673603505, "loss": 0.6712, "step": 6385 }, { "epoch": 0.43268880588798264, "grad_norm": 3.064748764038086, "learning_rate": 0.0003691401971522454, "loss": 0.7744, "step": 6386 }, { "epoch": 0.43275656172980664, "grad_norm": 3.4838368892669678, "learning_rate": 0.0003691347207009858, "loss": 0.9509, "step": 6387 }, { "epoch": 0.43282431757163065, "grad_norm": 3.5276894569396973, "learning_rate": 0.0003691292442497262, "loss": 0.7774, "step": 6388 }, { "epoch": 0.4328920734134546, "grad_norm": 4.952828884124756, "learning_rate": 0.0003691237677984666, "loss": 1.0576, "step": 6389 }, { "epoch": 0.4329598292552786, "grad_norm": 3.433044910430908, "learning_rate": 0.00036911829134720703, "loss": 0.9533, "step": 6390 }, { "epoch": 0.4330275850971026, "grad_norm": 3.928586006164551, "learning_rate": 0.00036911281489594743, "loss": 0.9413, "step": 6391 }, { "epoch": 0.4330953409389266, "grad_norm": 3.032905101776123, "learning_rate": 0.0003691073384446879, "loss": 0.8326, "step": 6392 }, { "epoch": 0.43316309678075055, "grad_norm": 3.236323118209839, "learning_rate": 0.0003691018619934283, "loss": 0.7844, "step": 6393 }, { "epoch": 0.43323085262257455, "grad_norm": 2.665644645690918, "learning_rate": 0.0003690963855421687, "loss": 0.8506, "step": 6394 }, { "epoch": 0.43329860846439855, "grad_norm": 3.841647148132324, "learning_rate": 0.0003690909090909091, "loss": 0.8845, "step": 6395 }, { "epoch": 0.43336636430622255, "grad_norm": 3.316899299621582, "learning_rate": 0.00036908543263964954, "loss": 0.8186, "step": 6396 }, { "epoch": 0.4334341201480465, "grad_norm": 2.7390894889831543, "learning_rate": 0.00036907995618838994, "loss": 0.7887, "step": 6397 }, { "epoch": 0.4335018759898705, "grad_norm": 4.263178825378418, "learning_rate": 0.00036907447973713034, "loss": 1.1251, "step": 6398 }, { "epoch": 0.4335696318316945, "grad_norm": 4.338013172149658, "learning_rate": 0.00036906900328587073, "loss": 1.0152, "step": 6399 }, { "epoch": 0.43363738767351845, "grad_norm": 3.4951589107513428, "learning_rate": 0.0003690635268346112, "loss": 0.9751, "step": 6400 }, { "epoch": 0.43370514351534245, "grad_norm": 3.0352649688720703, "learning_rate": 0.00036905805038335164, "loss": 0.7971, "step": 6401 }, { "epoch": 0.43377289935716645, "grad_norm": 5.451804161071777, "learning_rate": 0.00036905257393209204, "loss": 1.0288, "step": 6402 }, { "epoch": 0.43384065519899045, "grad_norm": 4.270747184753418, "learning_rate": 0.00036904709748083244, "loss": 0.9167, "step": 6403 }, { "epoch": 0.4339084110408144, "grad_norm": 3.071958065032959, "learning_rate": 0.00036904162102957284, "loss": 0.8569, "step": 6404 }, { "epoch": 0.4339761668826384, "grad_norm": 3.8479325771331787, "learning_rate": 0.00036903614457831324, "loss": 1.0492, "step": 6405 }, { "epoch": 0.4340439227244624, "grad_norm": 3.2558488845825195, "learning_rate": 0.0003690306681270537, "loss": 0.6719, "step": 6406 }, { "epoch": 0.4341116785662864, "grad_norm": 3.8224434852600098, "learning_rate": 0.00036902519167579414, "loss": 1.1607, "step": 6407 }, { "epoch": 0.43417943440811035, "grad_norm": 4.575911521911621, "learning_rate": 0.00036901971522453454, "loss": 1.0672, "step": 6408 }, { "epoch": 0.43424719024993436, "grad_norm": 2.7880589962005615, "learning_rate": 0.00036901423877327494, "loss": 0.773, "step": 6409 }, { "epoch": 0.43431494609175836, "grad_norm": 2.6831464767456055, "learning_rate": 0.00036900876232201534, "loss": 0.8182, "step": 6410 }, { "epoch": 0.43438270193358236, "grad_norm": 4.622739791870117, "learning_rate": 0.00036900328587075574, "loss": 1.0571, "step": 6411 }, { "epoch": 0.4344504577754063, "grad_norm": 2.732297420501709, "learning_rate": 0.0003689978094194962, "loss": 0.7635, "step": 6412 }, { "epoch": 0.4345182136172303, "grad_norm": 2.8800840377807617, "learning_rate": 0.0003689923329682366, "loss": 0.8104, "step": 6413 }, { "epoch": 0.4345859694590543, "grad_norm": 4.376806735992432, "learning_rate": 0.000368986856516977, "loss": 0.9072, "step": 6414 }, { "epoch": 0.43465372530087826, "grad_norm": 3.1020851135253906, "learning_rate": 0.00036898138006571745, "loss": 0.906, "step": 6415 }, { "epoch": 0.43472148114270226, "grad_norm": 3.482807159423828, "learning_rate": 0.00036897590361445785, "loss": 0.993, "step": 6416 }, { "epoch": 0.43478923698452626, "grad_norm": 3.4399778842926025, "learning_rate": 0.0003689704271631983, "loss": 0.9887, "step": 6417 }, { "epoch": 0.43485699282635026, "grad_norm": 3.5763754844665527, "learning_rate": 0.0003689649507119387, "loss": 0.7502, "step": 6418 }, { "epoch": 0.4349247486681742, "grad_norm": 3.7276906967163086, "learning_rate": 0.0003689594742606791, "loss": 0.8054, "step": 6419 }, { "epoch": 0.4349925045099982, "grad_norm": 4.25399923324585, "learning_rate": 0.0003689539978094195, "loss": 0.9709, "step": 6420 }, { "epoch": 0.4350602603518222, "grad_norm": 3.461509943008423, "learning_rate": 0.0003689485213581599, "loss": 0.8785, "step": 6421 }, { "epoch": 0.4351280161936462, "grad_norm": 3.4107680320739746, "learning_rate": 0.00036894304490690035, "loss": 0.8226, "step": 6422 }, { "epoch": 0.43519577203547016, "grad_norm": 3.0504024028778076, "learning_rate": 0.0003689375684556408, "loss": 0.7504, "step": 6423 }, { "epoch": 0.43526352787729417, "grad_norm": 3.77532958984375, "learning_rate": 0.0003689320920043812, "loss": 0.9607, "step": 6424 }, { "epoch": 0.43533128371911817, "grad_norm": 3.5234289169311523, "learning_rate": 0.0003689266155531216, "loss": 0.7659, "step": 6425 }, { "epoch": 0.43539903956094217, "grad_norm": 3.5607590675354004, "learning_rate": 0.000368921139101862, "loss": 0.9839, "step": 6426 }, { "epoch": 0.4354667954027661, "grad_norm": 3.7073447704315186, "learning_rate": 0.0003689156626506024, "loss": 0.8055, "step": 6427 }, { "epoch": 0.4355345512445901, "grad_norm": 3.352524995803833, "learning_rate": 0.00036891018619934285, "loss": 1.2006, "step": 6428 }, { "epoch": 0.4356023070864141, "grad_norm": 3.957312822341919, "learning_rate": 0.00036890470974808325, "loss": 0.9059, "step": 6429 }, { "epoch": 0.43567006292823807, "grad_norm": 3.5661518573760986, "learning_rate": 0.00036889923329682365, "loss": 1.0713, "step": 6430 }, { "epoch": 0.43573781877006207, "grad_norm": 3.2303407192230225, "learning_rate": 0.0003688937568455641, "loss": 1.0364, "step": 6431 }, { "epoch": 0.43580557461188607, "grad_norm": 3.3034658432006836, "learning_rate": 0.0003688882803943045, "loss": 0.8578, "step": 6432 }, { "epoch": 0.4358733304537101, "grad_norm": 3.247990608215332, "learning_rate": 0.0003688828039430449, "loss": 0.9244, "step": 6433 }, { "epoch": 0.435941086295534, "grad_norm": 3.166553020477295, "learning_rate": 0.00036887732749178536, "loss": 0.9105, "step": 6434 }, { "epoch": 0.436008842137358, "grad_norm": 3.755772590637207, "learning_rate": 0.00036887185104052576, "loss": 0.9954, "step": 6435 }, { "epoch": 0.436076597979182, "grad_norm": 3.011124849319458, "learning_rate": 0.00036886637458926616, "loss": 1.1139, "step": 6436 }, { "epoch": 0.436144353821006, "grad_norm": 3.11771821975708, "learning_rate": 0.00036886089813800655, "loss": 0.8186, "step": 6437 }, { "epoch": 0.43621210966282997, "grad_norm": 4.253913402557373, "learning_rate": 0.000368855421686747, "loss": 0.8777, "step": 6438 }, { "epoch": 0.436279865504654, "grad_norm": 3.4229865074157715, "learning_rate": 0.00036884994523548746, "loss": 0.9297, "step": 6439 }, { "epoch": 0.436347621346478, "grad_norm": 3.4427995681762695, "learning_rate": 0.00036884446878422786, "loss": 0.8923, "step": 6440 }, { "epoch": 0.436415377188302, "grad_norm": 4.07667350769043, "learning_rate": 0.00036883899233296826, "loss": 0.7508, "step": 6441 }, { "epoch": 0.4364831330301259, "grad_norm": 5.215429306030273, "learning_rate": 0.00036883351588170866, "loss": 1.1685, "step": 6442 }, { "epoch": 0.4365508888719499, "grad_norm": 3.1781487464904785, "learning_rate": 0.00036882803943044906, "loss": 0.7363, "step": 6443 }, { "epoch": 0.43661864471377393, "grad_norm": 4.110567569732666, "learning_rate": 0.0003688225629791895, "loss": 0.8651, "step": 6444 }, { "epoch": 0.4366864005555979, "grad_norm": 5.254804611206055, "learning_rate": 0.0003688170865279299, "loss": 0.9323, "step": 6445 }, { "epoch": 0.4367541563974219, "grad_norm": 3.999006748199463, "learning_rate": 0.00036881161007667036, "loss": 0.9625, "step": 6446 }, { "epoch": 0.4368219122392459, "grad_norm": 4.045620918273926, "learning_rate": 0.00036880613362541076, "loss": 0.7214, "step": 6447 }, { "epoch": 0.4368896680810699, "grad_norm": 3.224346160888672, "learning_rate": 0.00036880065717415116, "loss": 0.6312, "step": 6448 }, { "epoch": 0.43695742392289383, "grad_norm": 4.967013835906982, "learning_rate": 0.00036879518072289156, "loss": 1.132, "step": 6449 }, { "epoch": 0.43702517976471783, "grad_norm": 3.3109216690063477, "learning_rate": 0.000368789704271632, "loss": 0.883, "step": 6450 }, { "epoch": 0.43709293560654183, "grad_norm": 3.6048665046691895, "learning_rate": 0.0003687842278203724, "loss": 1.1071, "step": 6451 }, { "epoch": 0.43716069144836583, "grad_norm": 3.6038169860839844, "learning_rate": 0.0003687787513691128, "loss": 1.0103, "step": 6452 }, { "epoch": 0.4372284472901898, "grad_norm": 3.9584147930145264, "learning_rate": 0.0003687732749178532, "loss": 0.8876, "step": 6453 }, { "epoch": 0.4372962031320138, "grad_norm": 3.0588903427124023, "learning_rate": 0.00036876779846659367, "loss": 0.7736, "step": 6454 }, { "epoch": 0.4373639589738378, "grad_norm": 2.948774814605713, "learning_rate": 0.0003687623220153341, "loss": 0.569, "step": 6455 }, { "epoch": 0.4374317148156618, "grad_norm": 3.045630693435669, "learning_rate": 0.0003687568455640745, "loss": 0.9276, "step": 6456 }, { "epoch": 0.43749947065748573, "grad_norm": 3.530590057373047, "learning_rate": 0.0003687513691128149, "loss": 1.0481, "step": 6457 }, { "epoch": 0.43756722649930974, "grad_norm": 4.0725884437561035, "learning_rate": 0.0003687458926615553, "loss": 1.0659, "step": 6458 }, { "epoch": 0.43763498234113374, "grad_norm": 2.8836610317230225, "learning_rate": 0.0003687404162102957, "loss": 0.7675, "step": 6459 }, { "epoch": 0.4377027381829577, "grad_norm": 3.132260799407959, "learning_rate": 0.00036873493975903617, "loss": 1.0386, "step": 6460 }, { "epoch": 0.4377704940247817, "grad_norm": 3.085855722427368, "learning_rate": 0.00036872946330777657, "loss": 0.9737, "step": 6461 }, { "epoch": 0.4378382498666057, "grad_norm": 3.0709495544433594, "learning_rate": 0.000368723986856517, "loss": 0.8462, "step": 6462 }, { "epoch": 0.4379060057084297, "grad_norm": 4.0219268798828125, "learning_rate": 0.0003687185104052574, "loss": 1.187, "step": 6463 }, { "epoch": 0.43797376155025364, "grad_norm": 3.509974718093872, "learning_rate": 0.0003687130339539978, "loss": 1.0872, "step": 6464 }, { "epoch": 0.43804151739207764, "grad_norm": 3.72304105758667, "learning_rate": 0.0003687075575027382, "loss": 0.8225, "step": 6465 }, { "epoch": 0.43810927323390164, "grad_norm": 2.5657875537872314, "learning_rate": 0.0003687020810514787, "loss": 0.6466, "step": 6466 }, { "epoch": 0.43817702907572564, "grad_norm": 3.663769483566284, "learning_rate": 0.0003686966046002191, "loss": 1.0481, "step": 6467 }, { "epoch": 0.4382447849175496, "grad_norm": 2.705408811569214, "learning_rate": 0.00036869112814895947, "loss": 0.6328, "step": 6468 }, { "epoch": 0.4383125407593736, "grad_norm": 3.0543930530548096, "learning_rate": 0.0003686856516976999, "loss": 0.9048, "step": 6469 }, { "epoch": 0.4383802966011976, "grad_norm": 2.823850154876709, "learning_rate": 0.0003686801752464403, "loss": 0.7256, "step": 6470 }, { "epoch": 0.4384480524430216, "grad_norm": 3.2431604862213135, "learning_rate": 0.0003686746987951807, "loss": 0.836, "step": 6471 }, { "epoch": 0.43851580828484554, "grad_norm": 2.4106810092926025, "learning_rate": 0.0003686692223439212, "loss": 0.5725, "step": 6472 }, { "epoch": 0.43858356412666955, "grad_norm": 3.2592759132385254, "learning_rate": 0.0003686637458926616, "loss": 0.958, "step": 6473 }, { "epoch": 0.43865131996849355, "grad_norm": 3.1344335079193115, "learning_rate": 0.000368658269441402, "loss": 0.8399, "step": 6474 }, { "epoch": 0.4387190758103175, "grad_norm": 3.1409285068511963, "learning_rate": 0.0003686527929901424, "loss": 0.7657, "step": 6475 }, { "epoch": 0.4387868316521415, "grad_norm": 3.3407764434814453, "learning_rate": 0.00036864731653888283, "loss": 0.8239, "step": 6476 }, { "epoch": 0.4388545874939655, "grad_norm": 5.22526741027832, "learning_rate": 0.0003686418400876233, "loss": 0.9147, "step": 6477 }, { "epoch": 0.4389223433357895, "grad_norm": 4.100168228149414, "learning_rate": 0.0003686363636363637, "loss": 1.2402, "step": 6478 }, { "epoch": 0.43899009917761345, "grad_norm": 2.857405424118042, "learning_rate": 0.0003686308871851041, "loss": 0.7548, "step": 6479 }, { "epoch": 0.43905785501943745, "grad_norm": 3.409290075302124, "learning_rate": 0.0003686254107338445, "loss": 0.9403, "step": 6480 }, { "epoch": 0.43912561086126145, "grad_norm": 3.635226011276245, "learning_rate": 0.0003686199342825849, "loss": 0.9574, "step": 6481 }, { "epoch": 0.43919336670308545, "grad_norm": 3.602391481399536, "learning_rate": 0.00036861445783132533, "loss": 0.8503, "step": 6482 }, { "epoch": 0.4392611225449094, "grad_norm": 3.9866127967834473, "learning_rate": 0.00036860898138006573, "loss": 0.9343, "step": 6483 }, { "epoch": 0.4393288783867334, "grad_norm": 3.038677215576172, "learning_rate": 0.00036860350492880613, "loss": 0.956, "step": 6484 }, { "epoch": 0.4393966342285574, "grad_norm": 2.6686954498291016, "learning_rate": 0.0003685980284775466, "loss": 0.6265, "step": 6485 }, { "epoch": 0.4394643900703814, "grad_norm": 4.260356426239014, "learning_rate": 0.000368592552026287, "loss": 1.2772, "step": 6486 }, { "epoch": 0.43953214591220535, "grad_norm": 3.2503671646118164, "learning_rate": 0.0003685870755750274, "loss": 0.7278, "step": 6487 }, { "epoch": 0.43959990175402935, "grad_norm": 3.4344663619995117, "learning_rate": 0.00036858159912376784, "loss": 0.8537, "step": 6488 }, { "epoch": 0.43966765759585336, "grad_norm": 4.4622321128845215, "learning_rate": 0.00036857612267250824, "loss": 0.693, "step": 6489 }, { "epoch": 0.4397354134376773, "grad_norm": 3.621267318725586, "learning_rate": 0.00036857064622124863, "loss": 1.2116, "step": 6490 }, { "epoch": 0.4398031692795013, "grad_norm": 3.276299238204956, "learning_rate": 0.00036856516976998903, "loss": 0.7946, "step": 6491 }, { "epoch": 0.4398709251213253, "grad_norm": 3.2066473960876465, "learning_rate": 0.00036855969331872943, "loss": 0.7881, "step": 6492 }, { "epoch": 0.4399386809631493, "grad_norm": 4.951755046844482, "learning_rate": 0.00036855421686746994, "loss": 1.4676, "step": 6493 }, { "epoch": 0.44000643680497326, "grad_norm": 4.703289985656738, "learning_rate": 0.00036854874041621034, "loss": 0.9681, "step": 6494 }, { "epoch": 0.44007419264679726, "grad_norm": 3.5397257804870605, "learning_rate": 0.00036854326396495074, "loss": 1.0075, "step": 6495 }, { "epoch": 0.44014194848862126, "grad_norm": 3.088146686553955, "learning_rate": 0.00036853778751369114, "loss": 0.7231, "step": 6496 }, { "epoch": 0.44020970433044526, "grad_norm": 3.5653629302978516, "learning_rate": 0.00036853231106243154, "loss": 0.6556, "step": 6497 }, { "epoch": 0.4402774601722692, "grad_norm": 4.319859504699707, "learning_rate": 0.000368526834611172, "loss": 1.0397, "step": 6498 }, { "epoch": 0.4403452160140932, "grad_norm": 3.47214412689209, "learning_rate": 0.0003685213581599124, "loss": 0.8525, "step": 6499 }, { "epoch": 0.4404129718559172, "grad_norm": 2.8462467193603516, "learning_rate": 0.00036851588170865284, "loss": 0.6245, "step": 6500 }, { "epoch": 0.4404807276977412, "grad_norm": 2.659900188446045, "learning_rate": 0.00036851040525739324, "loss": 0.6845, "step": 6501 }, { "epoch": 0.44054848353956516, "grad_norm": 4.891154766082764, "learning_rate": 0.00036850492880613364, "loss": 0.8643, "step": 6502 }, { "epoch": 0.44061623938138916, "grad_norm": 3.3436338901519775, "learning_rate": 0.00036849945235487404, "loss": 0.8406, "step": 6503 }, { "epoch": 0.44068399522321317, "grad_norm": 4.260509967803955, "learning_rate": 0.0003684939759036145, "loss": 1.1578, "step": 6504 }, { "epoch": 0.4407517510650371, "grad_norm": 3.491396903991699, "learning_rate": 0.0003684884994523549, "loss": 0.8398, "step": 6505 }, { "epoch": 0.4408195069068611, "grad_norm": 2.5010712146759033, "learning_rate": 0.0003684830230010953, "loss": 0.62, "step": 6506 }, { "epoch": 0.4408872627486851, "grad_norm": 3.253035306930542, "learning_rate": 0.0003684775465498357, "loss": 0.8889, "step": 6507 }, { "epoch": 0.4409550185905091, "grad_norm": 4.751802921295166, "learning_rate": 0.00036847207009857615, "loss": 0.9988, "step": 6508 }, { "epoch": 0.44102277443233306, "grad_norm": 3.310966968536377, "learning_rate": 0.00036846659364731655, "loss": 1.0503, "step": 6509 }, { "epoch": 0.44109053027415707, "grad_norm": 3.9141273498535156, "learning_rate": 0.000368461117196057, "loss": 1.0135, "step": 6510 }, { "epoch": 0.44115828611598107, "grad_norm": 3.2146761417388916, "learning_rate": 0.0003684556407447974, "loss": 0.8126, "step": 6511 }, { "epoch": 0.44122604195780507, "grad_norm": 3.6401798725128174, "learning_rate": 0.0003684501642935378, "loss": 1.0278, "step": 6512 }, { "epoch": 0.441293797799629, "grad_norm": 3.432103395462036, "learning_rate": 0.0003684446878422782, "loss": 0.9223, "step": 6513 }, { "epoch": 0.441361553641453, "grad_norm": 4.2941436767578125, "learning_rate": 0.00036843921139101865, "loss": 0.9862, "step": 6514 }, { "epoch": 0.441429309483277, "grad_norm": 3.293790817260742, "learning_rate": 0.00036843373493975905, "loss": 0.8908, "step": 6515 }, { "epoch": 0.441497065325101, "grad_norm": 3.803004026412964, "learning_rate": 0.0003684282584884995, "loss": 1.0581, "step": 6516 }, { "epoch": 0.44156482116692497, "grad_norm": 4.3799028396606445, "learning_rate": 0.0003684227820372399, "loss": 0.9003, "step": 6517 }, { "epoch": 0.44163257700874897, "grad_norm": 2.965482234954834, "learning_rate": 0.0003684173055859803, "loss": 0.7128, "step": 6518 }, { "epoch": 0.441700332850573, "grad_norm": 3.914576292037964, "learning_rate": 0.0003684118291347207, "loss": 0.9363, "step": 6519 }, { "epoch": 0.4417680886923969, "grad_norm": 5.1546783447265625, "learning_rate": 0.00036840635268346115, "loss": 0.8281, "step": 6520 }, { "epoch": 0.4418358445342209, "grad_norm": 2.9731457233428955, "learning_rate": 0.00036840087623220155, "loss": 0.7252, "step": 6521 }, { "epoch": 0.4419036003760449, "grad_norm": 3.2732949256896973, "learning_rate": 0.00036839539978094195, "loss": 0.8858, "step": 6522 }, { "epoch": 0.4419713562178689, "grad_norm": 3.1607766151428223, "learning_rate": 0.00036838992332968235, "loss": 0.896, "step": 6523 }, { "epoch": 0.4420391120596929, "grad_norm": 3.610090970993042, "learning_rate": 0.0003683844468784228, "loss": 1.1003, "step": 6524 }, { "epoch": 0.4421068679015169, "grad_norm": 3.349433183670044, "learning_rate": 0.0003683789704271632, "loss": 0.8562, "step": 6525 }, { "epoch": 0.4421746237433409, "grad_norm": 3.335765838623047, "learning_rate": 0.00036837349397590366, "loss": 0.8957, "step": 6526 }, { "epoch": 0.4422423795851649, "grad_norm": 3.300808906555176, "learning_rate": 0.00036836801752464406, "loss": 0.9747, "step": 6527 }, { "epoch": 0.4423101354269888, "grad_norm": 2.663215398788452, "learning_rate": 0.00036836254107338446, "loss": 0.8626, "step": 6528 }, { "epoch": 0.44237789126881283, "grad_norm": 2.7080183029174805, "learning_rate": 0.00036835706462212485, "loss": 0.8458, "step": 6529 }, { "epoch": 0.44244564711063683, "grad_norm": 3.0153160095214844, "learning_rate": 0.00036835158817086525, "loss": 0.864, "step": 6530 }, { "epoch": 0.44251340295246083, "grad_norm": 2.886664628982544, "learning_rate": 0.0003683461117196057, "loss": 0.7103, "step": 6531 }, { "epoch": 0.4425811587942848, "grad_norm": 3.7536299228668213, "learning_rate": 0.00036834063526834616, "loss": 0.8936, "step": 6532 }, { "epoch": 0.4426489146361088, "grad_norm": 3.1245455741882324, "learning_rate": 0.00036833515881708656, "loss": 0.6998, "step": 6533 }, { "epoch": 0.4427166704779328, "grad_norm": 17.655183792114258, "learning_rate": 0.00036832968236582696, "loss": 0.8947, "step": 6534 }, { "epoch": 0.44278442631975673, "grad_norm": 4.58343505859375, "learning_rate": 0.00036832420591456736, "loss": 0.8492, "step": 6535 }, { "epoch": 0.44285218216158073, "grad_norm": 3.236346483230591, "learning_rate": 0.0003683187294633078, "loss": 0.6378, "step": 6536 }, { "epoch": 0.44291993800340473, "grad_norm": 5.015084743499756, "learning_rate": 0.0003683132530120482, "loss": 0.9763, "step": 6537 }, { "epoch": 0.44298769384522874, "grad_norm": 4.515886306762695, "learning_rate": 0.0003683077765607886, "loss": 1.0005, "step": 6538 }, { "epoch": 0.4430554496870527, "grad_norm": 5.959397792816162, "learning_rate": 0.00036830230010952906, "loss": 0.8445, "step": 6539 }, { "epoch": 0.4431232055288767, "grad_norm": 3.009166717529297, "learning_rate": 0.00036829682365826946, "loss": 0.6422, "step": 6540 }, { "epoch": 0.4431909613707007, "grad_norm": 2.6195313930511475, "learning_rate": 0.00036829134720700986, "loss": 0.596, "step": 6541 }, { "epoch": 0.4432587172125247, "grad_norm": 3.6156516075134277, "learning_rate": 0.0003682858707557503, "loss": 0.8847, "step": 6542 }, { "epoch": 0.44332647305434864, "grad_norm": 3.2397103309631348, "learning_rate": 0.0003682803943044907, "loss": 0.9052, "step": 6543 }, { "epoch": 0.44339422889617264, "grad_norm": 3.2153878211975098, "learning_rate": 0.0003682749178532311, "loss": 0.6229, "step": 6544 }, { "epoch": 0.44346198473799664, "grad_norm": 3.775496006011963, "learning_rate": 0.0003682694414019715, "loss": 1.0613, "step": 6545 }, { "epoch": 0.44352974057982064, "grad_norm": 3.270073175430298, "learning_rate": 0.0003682639649507119, "loss": 0.8818, "step": 6546 }, { "epoch": 0.4435974964216446, "grad_norm": 3.6078624725341797, "learning_rate": 0.00036825848849945237, "loss": 0.8938, "step": 6547 }, { "epoch": 0.4436652522634686, "grad_norm": 2.975132465362549, "learning_rate": 0.0003682530120481928, "loss": 0.8076, "step": 6548 }, { "epoch": 0.4437330081052926, "grad_norm": 4.0195159912109375, "learning_rate": 0.0003682475355969332, "loss": 0.9355, "step": 6549 }, { "epoch": 0.44380076394711654, "grad_norm": 4.419267654418945, "learning_rate": 0.0003682420591456736, "loss": 1.0221, "step": 6550 }, { "epoch": 0.44386851978894054, "grad_norm": 3.6448726654052734, "learning_rate": 0.000368236582694414, "loss": 0.8718, "step": 6551 }, { "epoch": 0.44393627563076454, "grad_norm": 2.8546831607818604, "learning_rate": 0.00036823110624315447, "loss": 0.8306, "step": 6552 }, { "epoch": 0.44400403147258855, "grad_norm": 2.508455991744995, "learning_rate": 0.00036822562979189487, "loss": 0.649, "step": 6553 }, { "epoch": 0.4440717873144125, "grad_norm": 2.9486446380615234, "learning_rate": 0.00036822015334063527, "loss": 0.8681, "step": 6554 }, { "epoch": 0.4441395431562365, "grad_norm": 3.3726966381073, "learning_rate": 0.0003682146768893757, "loss": 0.8574, "step": 6555 }, { "epoch": 0.4442072989980605, "grad_norm": 3.380622625350952, "learning_rate": 0.0003682092004381161, "loss": 0.9855, "step": 6556 }, { "epoch": 0.4442750548398845, "grad_norm": 4.414060115814209, "learning_rate": 0.0003682037239868565, "loss": 1.1054, "step": 6557 }, { "epoch": 0.44434281068170844, "grad_norm": 6.830251216888428, "learning_rate": 0.000368198247535597, "loss": 0.9652, "step": 6558 }, { "epoch": 0.44441056652353245, "grad_norm": 3.6912944316864014, "learning_rate": 0.0003681927710843374, "loss": 1.0386, "step": 6559 }, { "epoch": 0.44447832236535645, "grad_norm": 2.7640342712402344, "learning_rate": 0.00036818729463307777, "loss": 0.7818, "step": 6560 }, { "epoch": 0.44454607820718045, "grad_norm": 2.9807262420654297, "learning_rate": 0.00036818181818181817, "loss": 0.8416, "step": 6561 }, { "epoch": 0.4446138340490044, "grad_norm": 2.963192939758301, "learning_rate": 0.00036817634173055857, "loss": 0.7593, "step": 6562 }, { "epoch": 0.4446815898908284, "grad_norm": 3.156198263168335, "learning_rate": 0.000368170865279299, "loss": 0.8491, "step": 6563 }, { "epoch": 0.4447493457326524, "grad_norm": 3.8442161083221436, "learning_rate": 0.0003681653888280395, "loss": 0.9212, "step": 6564 }, { "epoch": 0.44481710157447635, "grad_norm": 2.8533594608306885, "learning_rate": 0.0003681599123767799, "loss": 0.8152, "step": 6565 }, { "epoch": 0.44488485741630035, "grad_norm": 3.0193450450897217, "learning_rate": 0.0003681544359255203, "loss": 0.8255, "step": 6566 }, { "epoch": 0.44495261325812435, "grad_norm": 3.4161226749420166, "learning_rate": 0.0003681489594742607, "loss": 0.6955, "step": 6567 }, { "epoch": 0.44502036909994835, "grad_norm": 3.4605252742767334, "learning_rate": 0.0003681434830230011, "loss": 0.9087, "step": 6568 }, { "epoch": 0.4450881249417723, "grad_norm": 2.821627140045166, "learning_rate": 0.00036813800657174153, "loss": 0.7775, "step": 6569 }, { "epoch": 0.4451558807835963, "grad_norm": 3.5993270874023438, "learning_rate": 0.000368132530120482, "loss": 0.7311, "step": 6570 }, { "epoch": 0.4452236366254203, "grad_norm": 2.839766502380371, "learning_rate": 0.0003681270536692224, "loss": 0.7816, "step": 6571 }, { "epoch": 0.4452913924672443, "grad_norm": 3.844440221786499, "learning_rate": 0.0003681215772179628, "loss": 0.9757, "step": 6572 }, { "epoch": 0.44535914830906825, "grad_norm": 3.5649819374084473, "learning_rate": 0.0003681161007667032, "loss": 1.0716, "step": 6573 }, { "epoch": 0.44542690415089226, "grad_norm": 3.093223810195923, "learning_rate": 0.00036811062431544363, "loss": 0.7903, "step": 6574 }, { "epoch": 0.44549465999271626, "grad_norm": 2.9783992767333984, "learning_rate": 0.00036810514786418403, "loss": 0.8231, "step": 6575 }, { "epoch": 0.44556241583454026, "grad_norm": 3.2626657485961914, "learning_rate": 0.00036809967141292443, "loss": 0.8497, "step": 6576 }, { "epoch": 0.4456301716763642, "grad_norm": 4.365062713623047, "learning_rate": 0.00036809419496166483, "loss": 1.0658, "step": 6577 }, { "epoch": 0.4456979275181882, "grad_norm": 3.1774580478668213, "learning_rate": 0.0003680887185104053, "loss": 0.8548, "step": 6578 }, { "epoch": 0.4457656833600122, "grad_norm": 3.26467227935791, "learning_rate": 0.0003680832420591457, "loss": 1.0568, "step": 6579 }, { "epoch": 0.44583343920183616, "grad_norm": 2.8372318744659424, "learning_rate": 0.00036807776560788614, "loss": 0.7779, "step": 6580 }, { "epoch": 0.44590119504366016, "grad_norm": 3.1397173404693604, "learning_rate": 0.00036807228915662654, "loss": 0.8354, "step": 6581 }, { "epoch": 0.44596895088548416, "grad_norm": 4.023374557495117, "learning_rate": 0.00036806681270536693, "loss": 1.0398, "step": 6582 }, { "epoch": 0.44603670672730816, "grad_norm": 3.6403520107269287, "learning_rate": 0.00036806133625410733, "loss": 0.9607, "step": 6583 }, { "epoch": 0.4461044625691321, "grad_norm": 3.5833537578582764, "learning_rate": 0.00036805585980284773, "loss": 0.9033, "step": 6584 }, { "epoch": 0.4461722184109561, "grad_norm": 2.8995893001556396, "learning_rate": 0.0003680503833515882, "loss": 0.727, "step": 6585 }, { "epoch": 0.4462399742527801, "grad_norm": 2.4225871562957764, "learning_rate": 0.00036804490690032864, "loss": 0.5968, "step": 6586 }, { "epoch": 0.4463077300946041, "grad_norm": 3.201181650161743, "learning_rate": 0.00036803943044906904, "loss": 0.6553, "step": 6587 }, { "epoch": 0.44637548593642806, "grad_norm": 3.4142463207244873, "learning_rate": 0.00036803395399780944, "loss": 0.8416, "step": 6588 }, { "epoch": 0.44644324177825206, "grad_norm": 3.259758234024048, "learning_rate": 0.00036802847754654984, "loss": 0.7767, "step": 6589 }, { "epoch": 0.44651099762007607, "grad_norm": 3.7707202434539795, "learning_rate": 0.0003680230010952903, "loss": 0.9819, "step": 6590 }, { "epoch": 0.44657875346190007, "grad_norm": 4.538395404815674, "learning_rate": 0.0003680175246440307, "loss": 1.0028, "step": 6591 }, { "epoch": 0.446646509303724, "grad_norm": 2.8972203731536865, "learning_rate": 0.0003680120481927711, "loss": 0.8215, "step": 6592 }, { "epoch": 0.446714265145548, "grad_norm": 3.877552032470703, "learning_rate": 0.0003680065717415115, "loss": 0.7716, "step": 6593 }, { "epoch": 0.446782020987372, "grad_norm": 3.019200563430786, "learning_rate": 0.00036800109529025194, "loss": 0.6624, "step": 6594 }, { "epoch": 0.44684977682919597, "grad_norm": 4.166679859161377, "learning_rate": 0.00036799561883899234, "loss": 0.8365, "step": 6595 }, { "epoch": 0.44691753267101997, "grad_norm": 3.5631144046783447, "learning_rate": 0.0003679901423877328, "loss": 0.9124, "step": 6596 }, { "epoch": 0.44698528851284397, "grad_norm": 3.479762315750122, "learning_rate": 0.0003679846659364732, "loss": 0.8637, "step": 6597 }, { "epoch": 0.44705304435466797, "grad_norm": 3.6393232345581055, "learning_rate": 0.0003679791894852136, "loss": 0.9146, "step": 6598 }, { "epoch": 0.4471208001964919, "grad_norm": 3.4695568084716797, "learning_rate": 0.000367973713033954, "loss": 0.8695, "step": 6599 }, { "epoch": 0.4471885560383159, "grad_norm": 4.830214977264404, "learning_rate": 0.0003679682365826944, "loss": 1.1416, "step": 6600 }, { "epoch": 0.4472563118801399, "grad_norm": 3.480717182159424, "learning_rate": 0.00036796276013143485, "loss": 0.8978, "step": 6601 }, { "epoch": 0.4473240677219639, "grad_norm": 3.9682681560516357, "learning_rate": 0.0003679572836801753, "loss": 1.0269, "step": 6602 }, { "epoch": 0.44739182356378787, "grad_norm": 4.404621124267578, "learning_rate": 0.0003679518072289157, "loss": 0.8837, "step": 6603 }, { "epoch": 0.4474595794056119, "grad_norm": 3.5607800483703613, "learning_rate": 0.0003679463307776561, "loss": 1.0002, "step": 6604 }, { "epoch": 0.4475273352474359, "grad_norm": 3.1084532737731934, "learning_rate": 0.0003679408543263965, "loss": 0.8831, "step": 6605 }, { "epoch": 0.4475950910892599, "grad_norm": 3.1765453815460205, "learning_rate": 0.0003679353778751369, "loss": 0.7673, "step": 6606 }, { "epoch": 0.4476628469310838, "grad_norm": 3.410824775695801, "learning_rate": 0.00036792990142387735, "loss": 0.7701, "step": 6607 }, { "epoch": 0.4477306027729078, "grad_norm": 3.546560049057007, "learning_rate": 0.00036792442497261775, "loss": 0.9606, "step": 6608 }, { "epoch": 0.44779835861473183, "grad_norm": 3.5562584400177, "learning_rate": 0.0003679189485213582, "loss": 0.8583, "step": 6609 }, { "epoch": 0.4478661144565558, "grad_norm": 2.987919330596924, "learning_rate": 0.0003679134720700986, "loss": 0.8703, "step": 6610 }, { "epoch": 0.4479338702983798, "grad_norm": 3.2035911083221436, "learning_rate": 0.000367907995618839, "loss": 0.7334, "step": 6611 }, { "epoch": 0.4480016261402038, "grad_norm": 3.6076273918151855, "learning_rate": 0.00036790251916757945, "loss": 0.8631, "step": 6612 }, { "epoch": 0.4480693819820278, "grad_norm": 2.7905380725860596, "learning_rate": 0.00036789704271631985, "loss": 0.6757, "step": 6613 }, { "epoch": 0.44813713782385173, "grad_norm": 5.861031532287598, "learning_rate": 0.00036789156626506025, "loss": 1.0349, "step": 6614 }, { "epoch": 0.44820489366567573, "grad_norm": 4.044051647186279, "learning_rate": 0.00036788608981380065, "loss": 1.0325, "step": 6615 }, { "epoch": 0.44827264950749973, "grad_norm": 5.875138759613037, "learning_rate": 0.00036788061336254105, "loss": 1.0239, "step": 6616 }, { "epoch": 0.44834040534932373, "grad_norm": 4.003438949584961, "learning_rate": 0.0003678751369112815, "loss": 0.8721, "step": 6617 }, { "epoch": 0.4484081611911477, "grad_norm": 3.0285847187042236, "learning_rate": 0.00036786966046002196, "loss": 0.912, "step": 6618 }, { "epoch": 0.4484759170329717, "grad_norm": 4.008516788482666, "learning_rate": 0.00036786418400876236, "loss": 0.8605, "step": 6619 }, { "epoch": 0.4485436728747957, "grad_norm": 3.20580792427063, "learning_rate": 0.00036785870755750276, "loss": 0.8201, "step": 6620 }, { "epoch": 0.4486114287166197, "grad_norm": 3.934664011001587, "learning_rate": 0.00036785323110624315, "loss": 1.049, "step": 6621 }, { "epoch": 0.44867918455844363, "grad_norm": 4.749670505523682, "learning_rate": 0.00036784775465498355, "loss": 0.8235, "step": 6622 }, { "epoch": 0.44874694040026764, "grad_norm": 15.148301124572754, "learning_rate": 0.000367842278203724, "loss": 0.7173, "step": 6623 }, { "epoch": 0.44881469624209164, "grad_norm": 4.415149688720703, "learning_rate": 0.0003678368017524644, "loss": 0.9656, "step": 6624 }, { "epoch": 0.4488824520839156, "grad_norm": 4.555980205535889, "learning_rate": 0.00036783132530120486, "loss": 1.0373, "step": 6625 }, { "epoch": 0.4489502079257396, "grad_norm": 3.6277670860290527, "learning_rate": 0.00036782584884994526, "loss": 0.8583, "step": 6626 }, { "epoch": 0.4490179637675636, "grad_norm": 3.029195547103882, "learning_rate": 0.00036782037239868566, "loss": 0.746, "step": 6627 }, { "epoch": 0.4490857196093876, "grad_norm": 3.605330228805542, "learning_rate": 0.0003678148959474261, "loss": 1.2138, "step": 6628 }, { "epoch": 0.44915347545121154, "grad_norm": 3.151146650314331, "learning_rate": 0.0003678094194961665, "loss": 0.8781, "step": 6629 }, { "epoch": 0.44922123129303554, "grad_norm": 3.1878087520599365, "learning_rate": 0.0003678039430449069, "loss": 0.9928, "step": 6630 }, { "epoch": 0.44928898713485954, "grad_norm": 2.1682798862457275, "learning_rate": 0.0003677984665936473, "loss": 0.5435, "step": 6631 }, { "epoch": 0.44935674297668354, "grad_norm": 3.127149820327759, "learning_rate": 0.00036779299014238776, "loss": 0.7519, "step": 6632 }, { "epoch": 0.4494244988185075, "grad_norm": 3.4527814388275146, "learning_rate": 0.00036778751369112816, "loss": 0.9296, "step": 6633 }, { "epoch": 0.4494922546603315, "grad_norm": 3.1184945106506348, "learning_rate": 0.0003677820372398686, "loss": 0.8455, "step": 6634 }, { "epoch": 0.4495600105021555, "grad_norm": 3.2198891639709473, "learning_rate": 0.000367776560788609, "loss": 0.8255, "step": 6635 }, { "epoch": 0.4496277663439795, "grad_norm": 3.91862416267395, "learning_rate": 0.0003677710843373494, "loss": 0.9955, "step": 6636 }, { "epoch": 0.44969552218580344, "grad_norm": 3.643195629119873, "learning_rate": 0.0003677656078860898, "loss": 1.0776, "step": 6637 }, { "epoch": 0.44976327802762744, "grad_norm": 2.9081060886383057, "learning_rate": 0.0003677601314348302, "loss": 0.8753, "step": 6638 }, { "epoch": 0.44983103386945145, "grad_norm": 3.095890760421753, "learning_rate": 0.00036775465498357067, "loss": 1.0349, "step": 6639 }, { "epoch": 0.4498987897112754, "grad_norm": 3.476618528366089, "learning_rate": 0.0003677491785323111, "loss": 0.9101, "step": 6640 }, { "epoch": 0.4499665455530994, "grad_norm": 2.685668706893921, "learning_rate": 0.0003677437020810515, "loss": 0.7243, "step": 6641 }, { "epoch": 0.4500343013949234, "grad_norm": 3.003943920135498, "learning_rate": 0.0003677382256297919, "loss": 0.776, "step": 6642 }, { "epoch": 0.4501020572367474, "grad_norm": 3.8749020099639893, "learning_rate": 0.0003677327491785323, "loss": 0.9133, "step": 6643 }, { "epoch": 0.45016981307857135, "grad_norm": 3.827873706817627, "learning_rate": 0.0003677272727272727, "loss": 0.868, "step": 6644 }, { "epoch": 0.45023756892039535, "grad_norm": 2.7337422370910645, "learning_rate": 0.00036772179627601317, "loss": 0.7073, "step": 6645 }, { "epoch": 0.45030532476221935, "grad_norm": 3.6587579250335693, "learning_rate": 0.00036771631982475357, "loss": 1.0992, "step": 6646 }, { "epoch": 0.45037308060404335, "grad_norm": 2.280932903289795, "learning_rate": 0.00036771084337349397, "loss": 0.6104, "step": 6647 }, { "epoch": 0.4504408364458673, "grad_norm": 3.914771556854248, "learning_rate": 0.0003677053669222344, "loss": 0.9306, "step": 6648 }, { "epoch": 0.4505085922876913, "grad_norm": 5.850506782531738, "learning_rate": 0.0003676998904709748, "loss": 1.1088, "step": 6649 }, { "epoch": 0.4505763481295153, "grad_norm": 3.4800350666046143, "learning_rate": 0.0003676944140197153, "loss": 0.9601, "step": 6650 }, { "epoch": 0.4506441039713393, "grad_norm": 3.202361583709717, "learning_rate": 0.0003676889375684557, "loss": 0.8781, "step": 6651 }, { "epoch": 0.45071185981316325, "grad_norm": 3.5124053955078125, "learning_rate": 0.00036768346111719607, "loss": 1.082, "step": 6652 }, { "epoch": 0.45077961565498725, "grad_norm": 2.703045606613159, "learning_rate": 0.00036767798466593647, "loss": 0.79, "step": 6653 }, { "epoch": 0.45084737149681126, "grad_norm": 2.1702945232391357, "learning_rate": 0.00036767250821467687, "loss": 0.6568, "step": 6654 }, { "epoch": 0.4509151273386352, "grad_norm": 2.6123619079589844, "learning_rate": 0.0003676670317634173, "loss": 0.7728, "step": 6655 }, { "epoch": 0.4509828831804592, "grad_norm": 3.39876127243042, "learning_rate": 0.0003676615553121578, "loss": 0.8011, "step": 6656 }, { "epoch": 0.4510506390222832, "grad_norm": 4.075593948364258, "learning_rate": 0.0003676560788608982, "loss": 0.9171, "step": 6657 }, { "epoch": 0.4511183948641072, "grad_norm": 3.9166312217712402, "learning_rate": 0.0003676506024096386, "loss": 1.0366, "step": 6658 }, { "epoch": 0.45118615070593115, "grad_norm": 3.241339683532715, "learning_rate": 0.000367645125958379, "loss": 0.8743, "step": 6659 }, { "epoch": 0.45125390654775516, "grad_norm": 3.4779741764068604, "learning_rate": 0.0003676396495071194, "loss": 0.954, "step": 6660 }, { "epoch": 0.45132166238957916, "grad_norm": 2.9141013622283936, "learning_rate": 0.00036763417305585983, "loss": 1.0501, "step": 6661 }, { "epoch": 0.45138941823140316, "grad_norm": 3.1671226024627686, "learning_rate": 0.00036762869660460023, "loss": 0.8787, "step": 6662 }, { "epoch": 0.4514571740732271, "grad_norm": 3.6223084926605225, "learning_rate": 0.0003676232201533407, "loss": 1.1968, "step": 6663 }, { "epoch": 0.4515249299150511, "grad_norm": 2.6938719749450684, "learning_rate": 0.0003676177437020811, "loss": 0.8098, "step": 6664 }, { "epoch": 0.4515926857568751, "grad_norm": 2.852415084838867, "learning_rate": 0.0003676122672508215, "loss": 0.6586, "step": 6665 }, { "epoch": 0.4516604415986991, "grad_norm": 2.5442450046539307, "learning_rate": 0.00036760679079956193, "loss": 0.8995, "step": 6666 }, { "epoch": 0.45172819744052306, "grad_norm": 3.1210172176361084, "learning_rate": 0.00036760131434830233, "loss": 0.9907, "step": 6667 }, { "epoch": 0.45179595328234706, "grad_norm": 3.2614986896514893, "learning_rate": 0.00036759583789704273, "loss": 0.9179, "step": 6668 }, { "epoch": 0.45186370912417106, "grad_norm": 3.107819080352783, "learning_rate": 0.00036759036144578313, "loss": 0.8304, "step": 6669 }, { "epoch": 0.451931464965995, "grad_norm": 3.6040990352630615, "learning_rate": 0.00036758488499452353, "loss": 0.9526, "step": 6670 }, { "epoch": 0.451999220807819, "grad_norm": 2.8831000328063965, "learning_rate": 0.000367579408543264, "loss": 0.7705, "step": 6671 }, { "epoch": 0.452066976649643, "grad_norm": 2.385265350341797, "learning_rate": 0.00036757393209200444, "loss": 0.5889, "step": 6672 }, { "epoch": 0.452134732491467, "grad_norm": 3.003070592880249, "learning_rate": 0.00036756845564074484, "loss": 0.8762, "step": 6673 }, { "epoch": 0.45220248833329096, "grad_norm": 2.6604363918304443, "learning_rate": 0.00036756297918948523, "loss": 0.8474, "step": 6674 }, { "epoch": 0.45227024417511497, "grad_norm": 3.214559316635132, "learning_rate": 0.00036755750273822563, "loss": 0.9653, "step": 6675 }, { "epoch": 0.45233800001693897, "grad_norm": 3.7457571029663086, "learning_rate": 0.00036755202628696603, "loss": 0.933, "step": 6676 }, { "epoch": 0.45240575585876297, "grad_norm": 3.1652328968048096, "learning_rate": 0.0003675465498357065, "loss": 0.6116, "step": 6677 }, { "epoch": 0.4524735117005869, "grad_norm": 4.432468891143799, "learning_rate": 0.0003675410733844469, "loss": 1.0877, "step": 6678 }, { "epoch": 0.4525412675424109, "grad_norm": 3.4490833282470703, "learning_rate": 0.00036753559693318734, "loss": 1.0244, "step": 6679 }, { "epoch": 0.4526090233842349, "grad_norm": 5.689603328704834, "learning_rate": 0.00036753012048192774, "loss": 1.3669, "step": 6680 }, { "epoch": 0.4526767792260589, "grad_norm": 4.019708156585693, "learning_rate": 0.00036752464403066814, "loss": 1.1064, "step": 6681 }, { "epoch": 0.45274453506788287, "grad_norm": 4.241431713104248, "learning_rate": 0.00036751916757940854, "loss": 0.9555, "step": 6682 }, { "epoch": 0.45281229090970687, "grad_norm": 3.7472922801971436, "learning_rate": 0.000367513691128149, "loss": 0.9871, "step": 6683 }, { "epoch": 0.4528800467515309, "grad_norm": 4.003868103027344, "learning_rate": 0.0003675082146768894, "loss": 1.1268, "step": 6684 }, { "epoch": 0.4529478025933548, "grad_norm": 3.2392945289611816, "learning_rate": 0.0003675027382256298, "loss": 0.8479, "step": 6685 }, { "epoch": 0.4530155584351788, "grad_norm": 3.3898777961730957, "learning_rate": 0.0003674972617743702, "loss": 0.6946, "step": 6686 }, { "epoch": 0.4530833142770028, "grad_norm": 3.1725070476531982, "learning_rate": 0.00036749178532311064, "loss": 0.8442, "step": 6687 }, { "epoch": 0.4531510701188268, "grad_norm": 4.6193766593933105, "learning_rate": 0.0003674863088718511, "loss": 0.8861, "step": 6688 }, { "epoch": 0.4532188259606508, "grad_norm": 3.3585994243621826, "learning_rate": 0.0003674808324205915, "loss": 0.9751, "step": 6689 }, { "epoch": 0.4532865818024748, "grad_norm": 4.431196212768555, "learning_rate": 0.0003674753559693319, "loss": 1.0905, "step": 6690 }, { "epoch": 0.4533543376442988, "grad_norm": 3.221606731414795, "learning_rate": 0.0003674698795180723, "loss": 0.8323, "step": 6691 }, { "epoch": 0.4534220934861228, "grad_norm": 3.2835049629211426, "learning_rate": 0.0003674644030668127, "loss": 0.744, "step": 6692 }, { "epoch": 0.4534898493279467, "grad_norm": 2.762197256088257, "learning_rate": 0.00036745892661555314, "loss": 0.808, "step": 6693 }, { "epoch": 0.45355760516977073, "grad_norm": 7.627099990844727, "learning_rate": 0.00036745345016429354, "loss": 0.9913, "step": 6694 }, { "epoch": 0.45362536101159473, "grad_norm": 3.174818277359009, "learning_rate": 0.000367447973713034, "loss": 0.6659, "step": 6695 }, { "epoch": 0.45369311685341873, "grad_norm": 4.055415153503418, "learning_rate": 0.0003674424972617744, "loss": 0.7436, "step": 6696 }, { "epoch": 0.4537608726952427, "grad_norm": 3.6209418773651123, "learning_rate": 0.0003674370208105148, "loss": 0.8028, "step": 6697 }, { "epoch": 0.4538286285370667, "grad_norm": 3.664872646331787, "learning_rate": 0.0003674315443592552, "loss": 0.9715, "step": 6698 }, { "epoch": 0.4538963843788907, "grad_norm": 2.5106072425842285, "learning_rate": 0.00036742606790799565, "loss": 0.6917, "step": 6699 }, { "epoch": 0.45396414022071463, "grad_norm": 4.146648406982422, "learning_rate": 0.00036742059145673605, "loss": 0.7459, "step": 6700 }, { "epoch": 0.45403189606253863, "grad_norm": 3.3551318645477295, "learning_rate": 0.00036741511500547645, "loss": 0.8303, "step": 6701 }, { "epoch": 0.45409965190436263, "grad_norm": 2.791722059249878, "learning_rate": 0.0003674096385542169, "loss": 0.7429, "step": 6702 }, { "epoch": 0.45416740774618664, "grad_norm": 3.415062189102173, "learning_rate": 0.0003674041621029573, "loss": 0.8446, "step": 6703 }, { "epoch": 0.4542351635880106, "grad_norm": 4.859399318695068, "learning_rate": 0.00036739868565169775, "loss": 1.0237, "step": 6704 }, { "epoch": 0.4543029194298346, "grad_norm": 3.067272901535034, "learning_rate": 0.00036739320920043815, "loss": 0.7093, "step": 6705 }, { "epoch": 0.4543706752716586, "grad_norm": 3.007246732711792, "learning_rate": 0.00036738773274917855, "loss": 0.8466, "step": 6706 }, { "epoch": 0.4544384311134826, "grad_norm": 2.972806930541992, "learning_rate": 0.00036738225629791895, "loss": 0.8545, "step": 6707 }, { "epoch": 0.45450618695530653, "grad_norm": 3.236111640930176, "learning_rate": 0.00036737677984665935, "loss": 0.7981, "step": 6708 }, { "epoch": 0.45457394279713054, "grad_norm": 2.7540013790130615, "learning_rate": 0.0003673713033953998, "loss": 0.8206, "step": 6709 }, { "epoch": 0.45464169863895454, "grad_norm": 3.6158430576324463, "learning_rate": 0.00036736582694414026, "loss": 0.8257, "step": 6710 }, { "epoch": 0.45470945448077854, "grad_norm": 3.5469272136688232, "learning_rate": 0.00036736035049288066, "loss": 0.9401, "step": 6711 }, { "epoch": 0.4547772103226025, "grad_norm": 3.113218069076538, "learning_rate": 0.00036735487404162106, "loss": 0.8052, "step": 6712 }, { "epoch": 0.4548449661644265, "grad_norm": 3.5164742469787598, "learning_rate": 0.00036734939759036145, "loss": 0.9453, "step": 6713 }, { "epoch": 0.4549127220062505, "grad_norm": 4.344560623168945, "learning_rate": 0.00036734392113910185, "loss": 1.0853, "step": 6714 }, { "epoch": 0.45498047784807444, "grad_norm": 3.0442094802856445, "learning_rate": 0.0003673384446878423, "loss": 0.9048, "step": 6715 }, { "epoch": 0.45504823368989844, "grad_norm": 3.639571189880371, "learning_rate": 0.0003673329682365827, "loss": 0.9694, "step": 6716 }, { "epoch": 0.45511598953172244, "grad_norm": 3.424673318862915, "learning_rate": 0.0003673274917853231, "loss": 0.8212, "step": 6717 }, { "epoch": 0.45518374537354644, "grad_norm": 4.170720100402832, "learning_rate": 0.00036732201533406356, "loss": 0.922, "step": 6718 }, { "epoch": 0.4552515012153704, "grad_norm": 5.301748752593994, "learning_rate": 0.00036731653888280396, "loss": 1.3042, "step": 6719 }, { "epoch": 0.4553192570571944, "grad_norm": 4.586108207702637, "learning_rate": 0.00036731106243154436, "loss": 0.9665, "step": 6720 }, { "epoch": 0.4553870128990184, "grad_norm": 2.8867948055267334, "learning_rate": 0.0003673055859802848, "loss": 0.7391, "step": 6721 }, { "epoch": 0.4554547687408424, "grad_norm": 3.6852924823760986, "learning_rate": 0.0003673001095290252, "loss": 0.9929, "step": 6722 }, { "epoch": 0.45552252458266634, "grad_norm": 3.1234452724456787, "learning_rate": 0.0003672946330777656, "loss": 0.8108, "step": 6723 }, { "epoch": 0.45559028042449035, "grad_norm": 2.7990963459014893, "learning_rate": 0.000367289156626506, "loss": 0.8171, "step": 6724 }, { "epoch": 0.45565803626631435, "grad_norm": 5.326529026031494, "learning_rate": 0.00036728368017524646, "loss": 0.991, "step": 6725 }, { "epoch": 0.45572579210813835, "grad_norm": 3.352977752685547, "learning_rate": 0.0003672782037239869, "loss": 0.9047, "step": 6726 }, { "epoch": 0.4557935479499623, "grad_norm": 2.776648759841919, "learning_rate": 0.0003672727272727273, "loss": 0.9105, "step": 6727 }, { "epoch": 0.4558613037917863, "grad_norm": 3.610609292984009, "learning_rate": 0.0003672672508214677, "loss": 0.8497, "step": 6728 }, { "epoch": 0.4559290596336103, "grad_norm": 2.8792080879211426, "learning_rate": 0.0003672617743702081, "loss": 0.7035, "step": 6729 }, { "epoch": 0.45599681547543425, "grad_norm": 3.51960825920105, "learning_rate": 0.0003672562979189485, "loss": 0.9907, "step": 6730 }, { "epoch": 0.45606457131725825, "grad_norm": 3.308777093887329, "learning_rate": 0.00036725082146768897, "loss": 0.765, "step": 6731 }, { "epoch": 0.45613232715908225, "grad_norm": 3.9748308658599854, "learning_rate": 0.00036724534501642936, "loss": 0.9889, "step": 6732 }, { "epoch": 0.45620008300090625, "grad_norm": 3.9783642292022705, "learning_rate": 0.0003672398685651698, "loss": 0.9724, "step": 6733 }, { "epoch": 0.4562678388427302, "grad_norm": 4.660617828369141, "learning_rate": 0.0003672343921139102, "loss": 0.9647, "step": 6734 }, { "epoch": 0.4563355946845542, "grad_norm": 3.2996280193328857, "learning_rate": 0.0003672289156626506, "loss": 0.7457, "step": 6735 }, { "epoch": 0.4564033505263782, "grad_norm": 3.263307571411133, "learning_rate": 0.000367223439211391, "loss": 0.886, "step": 6736 }, { "epoch": 0.4564711063682022, "grad_norm": 3.318488597869873, "learning_rate": 0.00036721796276013147, "loss": 0.9337, "step": 6737 }, { "epoch": 0.45653886221002615, "grad_norm": 3.502129077911377, "learning_rate": 0.00036721248630887187, "loss": 0.7579, "step": 6738 }, { "epoch": 0.45660661805185015, "grad_norm": 3.0296366214752197, "learning_rate": 0.00036720700985761227, "loss": 0.7728, "step": 6739 }, { "epoch": 0.45667437389367416, "grad_norm": 4.176602363586426, "learning_rate": 0.00036720153340635267, "loss": 0.8176, "step": 6740 }, { "epoch": 0.45674212973549816, "grad_norm": 4.097350120544434, "learning_rate": 0.0003671960569550931, "loss": 0.6125, "step": 6741 }, { "epoch": 0.4568098855773221, "grad_norm": 2.7023661136627197, "learning_rate": 0.0003671905805038336, "loss": 0.7429, "step": 6742 }, { "epoch": 0.4568776414191461, "grad_norm": 4.511232852935791, "learning_rate": 0.000367185104052574, "loss": 0.841, "step": 6743 }, { "epoch": 0.4569453972609701, "grad_norm": 3.9234304428100586, "learning_rate": 0.00036717962760131437, "loss": 0.7982, "step": 6744 }, { "epoch": 0.45701315310279406, "grad_norm": 3.5279650688171387, "learning_rate": 0.00036717415115005477, "loss": 0.7903, "step": 6745 }, { "epoch": 0.45708090894461806, "grad_norm": 2.8566458225250244, "learning_rate": 0.00036716867469879517, "loss": 0.932, "step": 6746 }, { "epoch": 0.45714866478644206, "grad_norm": 4.1728949546813965, "learning_rate": 0.0003671631982475356, "loss": 1.0369, "step": 6747 }, { "epoch": 0.45721642062826606, "grad_norm": 4.744039535522461, "learning_rate": 0.000367157721796276, "loss": 0.8759, "step": 6748 }, { "epoch": 0.45728417647009, "grad_norm": 2.8540353775024414, "learning_rate": 0.0003671522453450165, "loss": 0.6685, "step": 6749 }, { "epoch": 0.457351932311914, "grad_norm": 3.6030352115631104, "learning_rate": 0.0003671467688937569, "loss": 0.8779, "step": 6750 }, { "epoch": 0.457419688153738, "grad_norm": 3.9240729808807373, "learning_rate": 0.0003671412924424973, "loss": 0.887, "step": 6751 }, { "epoch": 0.457487443995562, "grad_norm": 2.9366960525512695, "learning_rate": 0.0003671358159912377, "loss": 0.8131, "step": 6752 }, { "epoch": 0.45755519983738596, "grad_norm": 3.2934110164642334, "learning_rate": 0.00036713033953997813, "loss": 0.8867, "step": 6753 }, { "epoch": 0.45762295567920996, "grad_norm": 3.7788610458374023, "learning_rate": 0.0003671248630887185, "loss": 1.057, "step": 6754 }, { "epoch": 0.45769071152103397, "grad_norm": 2.447281837463379, "learning_rate": 0.0003671193866374589, "loss": 0.7407, "step": 6755 }, { "epoch": 0.45775846736285797, "grad_norm": 2.981074094772339, "learning_rate": 0.0003671139101861993, "loss": 0.8668, "step": 6756 }, { "epoch": 0.4578262232046819, "grad_norm": 4.289218902587891, "learning_rate": 0.0003671084337349398, "loss": 0.9796, "step": 6757 }, { "epoch": 0.4578939790465059, "grad_norm": 2.9780287742614746, "learning_rate": 0.0003671029572836802, "loss": 0.6865, "step": 6758 }, { "epoch": 0.4579617348883299, "grad_norm": 3.1109778881073, "learning_rate": 0.00036709748083242063, "loss": 0.8523, "step": 6759 }, { "epoch": 0.45802949073015387, "grad_norm": 3.2356812953948975, "learning_rate": 0.00036709200438116103, "loss": 0.9094, "step": 6760 }, { "epoch": 0.45809724657197787, "grad_norm": 4.056556224822998, "learning_rate": 0.00036708652792990143, "loss": 0.9592, "step": 6761 }, { "epoch": 0.45816500241380187, "grad_norm": 3.62164306640625, "learning_rate": 0.00036708105147864183, "loss": 1.0596, "step": 6762 }, { "epoch": 0.45823275825562587, "grad_norm": 2.6277976036071777, "learning_rate": 0.0003670755750273823, "loss": 0.774, "step": 6763 }, { "epoch": 0.4583005140974498, "grad_norm": 3.862790107727051, "learning_rate": 0.00036707009857612274, "loss": 0.9569, "step": 6764 }, { "epoch": 0.4583682699392738, "grad_norm": 3.4714248180389404, "learning_rate": 0.00036706462212486314, "loss": 0.9182, "step": 6765 }, { "epoch": 0.4584360257810978, "grad_norm": 2.6082961559295654, "learning_rate": 0.00036705914567360353, "loss": 0.742, "step": 6766 }, { "epoch": 0.4585037816229218, "grad_norm": 4.401329040527344, "learning_rate": 0.00036705366922234393, "loss": 0.8953, "step": 6767 }, { "epoch": 0.45857153746474577, "grad_norm": 2.4915878772735596, "learning_rate": 0.00036704819277108433, "loss": 0.7858, "step": 6768 }, { "epoch": 0.4586392933065698, "grad_norm": 3.2066338062286377, "learning_rate": 0.0003670427163198248, "loss": 0.8725, "step": 6769 }, { "epoch": 0.4587070491483938, "grad_norm": 2.664397716522217, "learning_rate": 0.0003670372398685652, "loss": 0.6812, "step": 6770 }, { "epoch": 0.4587748049902178, "grad_norm": 2.6875791549682617, "learning_rate": 0.0003670317634173056, "loss": 0.8331, "step": 6771 }, { "epoch": 0.4588425608320417, "grad_norm": 2.723428726196289, "learning_rate": 0.00036702628696604604, "loss": 0.8084, "step": 6772 }, { "epoch": 0.4589103166738657, "grad_norm": 3.308978319168091, "learning_rate": 0.00036702081051478644, "loss": 0.9181, "step": 6773 }, { "epoch": 0.45897807251568973, "grad_norm": 3.4426748752593994, "learning_rate": 0.00036701533406352684, "loss": 1.0331, "step": 6774 }, { "epoch": 0.4590458283575137, "grad_norm": 3.9764232635498047, "learning_rate": 0.0003670098576122673, "loss": 1.0057, "step": 6775 }, { "epoch": 0.4591135841993377, "grad_norm": 3.137744426727295, "learning_rate": 0.0003670043811610077, "loss": 0.7458, "step": 6776 }, { "epoch": 0.4591813400411617, "grad_norm": 4.490634441375732, "learning_rate": 0.0003669989047097481, "loss": 0.915, "step": 6777 }, { "epoch": 0.4592490958829857, "grad_norm": 3.3506228923797607, "learning_rate": 0.0003669934282584885, "loss": 0.8472, "step": 6778 }, { "epoch": 0.4593168517248096, "grad_norm": 3.4911375045776367, "learning_rate": 0.0003669879518072289, "loss": 0.885, "step": 6779 }, { "epoch": 0.45938460756663363, "grad_norm": 3.9891161918640137, "learning_rate": 0.0003669824753559694, "loss": 1.0805, "step": 6780 }, { "epoch": 0.45945236340845763, "grad_norm": 3.3216400146484375, "learning_rate": 0.0003669769989047098, "loss": 0.9943, "step": 6781 }, { "epoch": 0.45952011925028163, "grad_norm": 5.305748462677002, "learning_rate": 0.0003669715224534502, "loss": 0.8441, "step": 6782 }, { "epoch": 0.4595878750921056, "grad_norm": 3.3164525032043457, "learning_rate": 0.0003669660460021906, "loss": 0.8482, "step": 6783 }, { "epoch": 0.4596556309339296, "grad_norm": 2.998810291290283, "learning_rate": 0.000366960569550931, "loss": 0.8575, "step": 6784 }, { "epoch": 0.4597233867757536, "grad_norm": 4.6275224685668945, "learning_rate": 0.00036695509309967144, "loss": 0.9303, "step": 6785 }, { "epoch": 0.4597911426175776, "grad_norm": 3.8493552207946777, "learning_rate": 0.00036694961664841184, "loss": 0.9015, "step": 6786 }, { "epoch": 0.45985889845940153, "grad_norm": 3.116118907928467, "learning_rate": 0.00036694414019715224, "loss": 0.739, "step": 6787 }, { "epoch": 0.45992665430122553, "grad_norm": 3.628740072250366, "learning_rate": 0.0003669386637458927, "loss": 0.8539, "step": 6788 }, { "epoch": 0.45999441014304954, "grad_norm": 3.5999186038970947, "learning_rate": 0.0003669331872946331, "loss": 0.9361, "step": 6789 }, { "epoch": 0.4600621659848735, "grad_norm": 2.9682037830352783, "learning_rate": 0.0003669277108433735, "loss": 0.6842, "step": 6790 }, { "epoch": 0.4601299218266975, "grad_norm": 2.495215892791748, "learning_rate": 0.00036692223439211395, "loss": 0.6259, "step": 6791 }, { "epoch": 0.4601976776685215, "grad_norm": 3.3530232906341553, "learning_rate": 0.00036691675794085435, "loss": 0.9877, "step": 6792 }, { "epoch": 0.4602654335103455, "grad_norm": 4.018329620361328, "learning_rate": 0.00036691128148959475, "loss": 0.9866, "step": 6793 }, { "epoch": 0.46033318935216944, "grad_norm": 2.872091293334961, "learning_rate": 0.00036690580503833515, "loss": 0.7512, "step": 6794 }, { "epoch": 0.46040094519399344, "grad_norm": 3.5528547763824463, "learning_rate": 0.0003669003285870756, "loss": 0.8031, "step": 6795 }, { "epoch": 0.46046870103581744, "grad_norm": 3.029777765274048, "learning_rate": 0.000366894852135816, "loss": 0.7731, "step": 6796 }, { "epoch": 0.46053645687764144, "grad_norm": 4.326792240142822, "learning_rate": 0.00036688937568455645, "loss": 0.9055, "step": 6797 }, { "epoch": 0.4606042127194654, "grad_norm": 3.1317410469055176, "learning_rate": 0.00036688389923329685, "loss": 0.5626, "step": 6798 }, { "epoch": 0.4606719685612894, "grad_norm": 3.5356338024139404, "learning_rate": 0.00036687842278203725, "loss": 0.853, "step": 6799 }, { "epoch": 0.4607397244031134, "grad_norm": 2.8553287982940674, "learning_rate": 0.00036687294633077765, "loss": 0.6426, "step": 6800 }, { "epoch": 0.4608074802449374, "grad_norm": 3.874406099319458, "learning_rate": 0.0003668674698795181, "loss": 1.0255, "step": 6801 }, { "epoch": 0.46087523608676134, "grad_norm": 3.871143102645874, "learning_rate": 0.0003668619934282585, "loss": 0.9882, "step": 6802 }, { "epoch": 0.46094299192858534, "grad_norm": 3.0434324741363525, "learning_rate": 0.00036685651697699896, "loss": 0.7476, "step": 6803 }, { "epoch": 0.46101074777040935, "grad_norm": 3.435837507247925, "learning_rate": 0.00036685104052573936, "loss": 0.8971, "step": 6804 }, { "epoch": 0.4610785036122333, "grad_norm": 4.109983444213867, "learning_rate": 0.00036684556407447975, "loss": 0.7613, "step": 6805 }, { "epoch": 0.4611462594540573, "grad_norm": 9.24293327331543, "learning_rate": 0.00036684008762322015, "loss": 0.8614, "step": 6806 }, { "epoch": 0.4612140152958813, "grad_norm": 4.3038129806518555, "learning_rate": 0.0003668346111719606, "loss": 0.7105, "step": 6807 }, { "epoch": 0.4612817711377053, "grad_norm": 4.742788791656494, "learning_rate": 0.000366829134720701, "loss": 0.8352, "step": 6808 }, { "epoch": 0.46134952697952925, "grad_norm": 4.531766414642334, "learning_rate": 0.0003668236582694414, "loss": 1.0594, "step": 6809 }, { "epoch": 0.46141728282135325, "grad_norm": 3.222515106201172, "learning_rate": 0.0003668181818181818, "loss": 1.0649, "step": 6810 }, { "epoch": 0.46148503866317725, "grad_norm": 3.3346643447875977, "learning_rate": 0.00036681270536692226, "loss": 0.8002, "step": 6811 }, { "epoch": 0.46155279450500125, "grad_norm": 2.9966189861297607, "learning_rate": 0.00036680722891566266, "loss": 0.7326, "step": 6812 }, { "epoch": 0.4616205503468252, "grad_norm": 3.414384126663208, "learning_rate": 0.0003668017524644031, "loss": 0.7097, "step": 6813 }, { "epoch": 0.4616883061886492, "grad_norm": 3.037707805633545, "learning_rate": 0.0003667962760131435, "loss": 0.8322, "step": 6814 }, { "epoch": 0.4617560620304732, "grad_norm": 3.5692899227142334, "learning_rate": 0.0003667907995618839, "loss": 0.8434, "step": 6815 }, { "epoch": 0.4618238178722972, "grad_norm": 3.2966744899749756, "learning_rate": 0.0003667853231106243, "loss": 0.793, "step": 6816 }, { "epoch": 0.46189157371412115, "grad_norm": 4.1009111404418945, "learning_rate": 0.0003667798466593647, "loss": 0.9847, "step": 6817 }, { "epoch": 0.46195932955594515, "grad_norm": 3.4343984127044678, "learning_rate": 0.00036677437020810516, "loss": 0.8418, "step": 6818 }, { "epoch": 0.46202708539776915, "grad_norm": 3.6435389518737793, "learning_rate": 0.0003667688937568456, "loss": 0.7757, "step": 6819 }, { "epoch": 0.4620948412395931, "grad_norm": 3.3289976119995117, "learning_rate": 0.000366763417305586, "loss": 0.898, "step": 6820 }, { "epoch": 0.4621625970814171, "grad_norm": 10.199219703674316, "learning_rate": 0.0003667579408543264, "loss": 0.9081, "step": 6821 }, { "epoch": 0.4622303529232411, "grad_norm": 3.56518816947937, "learning_rate": 0.0003667524644030668, "loss": 0.732, "step": 6822 }, { "epoch": 0.4622981087650651, "grad_norm": 3.043318033218384, "learning_rate": 0.00036674698795180727, "loss": 0.7001, "step": 6823 }, { "epoch": 0.46236586460688905, "grad_norm": 5.187203407287598, "learning_rate": 0.00036674151150054766, "loss": 0.9504, "step": 6824 }, { "epoch": 0.46243362044871306, "grad_norm": 3.7006444931030273, "learning_rate": 0.00036673603504928806, "loss": 0.9257, "step": 6825 }, { "epoch": 0.46250137629053706, "grad_norm": 4.485423564910889, "learning_rate": 0.00036673055859802846, "loss": 1.0144, "step": 6826 }, { "epoch": 0.46256913213236106, "grad_norm": 3.4752562046051025, "learning_rate": 0.0003667250821467689, "loss": 0.8789, "step": 6827 }, { "epoch": 0.462636887974185, "grad_norm": 3.595877170562744, "learning_rate": 0.0003667196056955093, "loss": 0.9403, "step": 6828 }, { "epoch": 0.462704643816009, "grad_norm": 3.21968150138855, "learning_rate": 0.00036671412924424977, "loss": 0.8695, "step": 6829 }, { "epoch": 0.462772399657833, "grad_norm": 3.835871934890747, "learning_rate": 0.00036670865279299017, "loss": 0.8625, "step": 6830 }, { "epoch": 0.462840155499657, "grad_norm": 3.790015697479248, "learning_rate": 0.00036670317634173057, "loss": 0.7606, "step": 6831 }, { "epoch": 0.46290791134148096, "grad_norm": 7.378017425537109, "learning_rate": 0.00036669769989047097, "loss": 0.8836, "step": 6832 }, { "epoch": 0.46297566718330496, "grad_norm": 5.778117656707764, "learning_rate": 0.00036669222343921137, "loss": 1.2985, "step": 6833 }, { "epoch": 0.46304342302512896, "grad_norm": 2.7424163818359375, "learning_rate": 0.0003666867469879518, "loss": 0.6265, "step": 6834 }, { "epoch": 0.4631111788669529, "grad_norm": 3.1722123622894287, "learning_rate": 0.00036668127053669227, "loss": 0.7284, "step": 6835 }, { "epoch": 0.4631789347087769, "grad_norm": 2.423151731491089, "learning_rate": 0.00036667579408543267, "loss": 0.5896, "step": 6836 }, { "epoch": 0.4632466905506009, "grad_norm": 5.908917427062988, "learning_rate": 0.00036667031763417307, "loss": 0.8054, "step": 6837 }, { "epoch": 0.4633144463924249, "grad_norm": 4.044280529022217, "learning_rate": 0.00036666484118291347, "loss": 0.8194, "step": 6838 }, { "epoch": 0.46338220223424886, "grad_norm": 3.1080880165100098, "learning_rate": 0.0003666593647316539, "loss": 0.7458, "step": 6839 }, { "epoch": 0.46344995807607287, "grad_norm": 4.515875816345215, "learning_rate": 0.0003666538882803943, "loss": 0.8171, "step": 6840 }, { "epoch": 0.46351771391789687, "grad_norm": 3.5499284267425537, "learning_rate": 0.0003666484118291347, "loss": 1.0558, "step": 6841 }, { "epoch": 0.46358546975972087, "grad_norm": 3.763370990753174, "learning_rate": 0.0003666429353778752, "loss": 0.836, "step": 6842 }, { "epoch": 0.4636532256015448, "grad_norm": 3.991297960281372, "learning_rate": 0.0003666374589266156, "loss": 0.8979, "step": 6843 }, { "epoch": 0.4637209814433688, "grad_norm": 4.383556842803955, "learning_rate": 0.000366631982475356, "loss": 0.9745, "step": 6844 }, { "epoch": 0.4637887372851928, "grad_norm": 3.8411073684692383, "learning_rate": 0.00036662650602409643, "loss": 0.8923, "step": 6845 }, { "epoch": 0.4638564931270168, "grad_norm": 4.401012897491455, "learning_rate": 0.0003666210295728368, "loss": 0.9201, "step": 6846 }, { "epoch": 0.46392424896884077, "grad_norm": 5.080502033233643, "learning_rate": 0.0003666155531215772, "loss": 0.924, "step": 6847 }, { "epoch": 0.46399200481066477, "grad_norm": 3.867905378341675, "learning_rate": 0.0003666100766703176, "loss": 1.1402, "step": 6848 }, { "epoch": 0.4640597606524888, "grad_norm": 3.503359079360962, "learning_rate": 0.000366604600219058, "loss": 0.9091, "step": 6849 }, { "epoch": 0.4641275164943127, "grad_norm": 5.008991241455078, "learning_rate": 0.0003665991237677985, "loss": 0.775, "step": 6850 }, { "epoch": 0.4641952723361367, "grad_norm": 3.844327926635742, "learning_rate": 0.00036659364731653893, "loss": 0.8922, "step": 6851 }, { "epoch": 0.4642630281779607, "grad_norm": 3.2999629974365234, "learning_rate": 0.00036658817086527933, "loss": 0.7665, "step": 6852 }, { "epoch": 0.4643307840197847, "grad_norm": 3.3090641498565674, "learning_rate": 0.00036658269441401973, "loss": 0.9329, "step": 6853 }, { "epoch": 0.4643985398616087, "grad_norm": 3.007995843887329, "learning_rate": 0.00036657721796276013, "loss": 0.6145, "step": 6854 }, { "epoch": 0.4644662957034327, "grad_norm": 3.746373176574707, "learning_rate": 0.00036657174151150053, "loss": 0.8855, "step": 6855 }, { "epoch": 0.4645340515452567, "grad_norm": 2.691002607345581, "learning_rate": 0.000366566265060241, "loss": 0.659, "step": 6856 }, { "epoch": 0.4646018073870807, "grad_norm": 3.478188991546631, "learning_rate": 0.0003665607886089814, "loss": 0.8481, "step": 6857 }, { "epoch": 0.4646695632289046, "grad_norm": 3.255591869354248, "learning_rate": 0.00036655531215772183, "loss": 0.8078, "step": 6858 }, { "epoch": 0.4647373190707286, "grad_norm": 3.323719024658203, "learning_rate": 0.00036654983570646223, "loss": 0.9387, "step": 6859 }, { "epoch": 0.46480507491255263, "grad_norm": 5.026325702667236, "learning_rate": 0.00036654435925520263, "loss": 0.7528, "step": 6860 }, { "epoch": 0.46487283075437663, "grad_norm": 3.5956881046295166, "learning_rate": 0.0003665388828039431, "loss": 0.7365, "step": 6861 }, { "epoch": 0.4649405865962006, "grad_norm": 3.429459810256958, "learning_rate": 0.0003665334063526835, "loss": 0.8695, "step": 6862 }, { "epoch": 0.4650083424380246, "grad_norm": 2.7215240001678467, "learning_rate": 0.0003665279299014239, "loss": 0.6631, "step": 6863 }, { "epoch": 0.4650760982798486, "grad_norm": 4.061739444732666, "learning_rate": 0.0003665224534501643, "loss": 0.946, "step": 6864 }, { "epoch": 0.46514385412167253, "grad_norm": 3.123628616333008, "learning_rate": 0.00036651697699890474, "loss": 0.6169, "step": 6865 }, { "epoch": 0.46521160996349653, "grad_norm": 3.497309684753418, "learning_rate": 0.00036651150054764514, "loss": 0.9352, "step": 6866 }, { "epoch": 0.46527936580532053, "grad_norm": 5.638576507568359, "learning_rate": 0.0003665060240963856, "loss": 1.29, "step": 6867 }, { "epoch": 0.46534712164714453, "grad_norm": 3.8688902854919434, "learning_rate": 0.000366500547645126, "loss": 0.9257, "step": 6868 }, { "epoch": 0.4654148774889685, "grad_norm": 5.78640604019165, "learning_rate": 0.0003664950711938664, "loss": 1.1024, "step": 6869 }, { "epoch": 0.4654826333307925, "grad_norm": 3.0715813636779785, "learning_rate": 0.0003664895947426068, "loss": 0.8419, "step": 6870 }, { "epoch": 0.4655503891726165, "grad_norm": 4.469788551330566, "learning_rate": 0.0003664841182913472, "loss": 0.8239, "step": 6871 }, { "epoch": 0.4656181450144405, "grad_norm": 2.9056529998779297, "learning_rate": 0.00036647864184008764, "loss": 0.7786, "step": 6872 }, { "epoch": 0.46568590085626443, "grad_norm": 7.599416255950928, "learning_rate": 0.0003664731653888281, "loss": 0.8218, "step": 6873 }, { "epoch": 0.46575365669808844, "grad_norm": 3.157972812652588, "learning_rate": 0.0003664676889375685, "loss": 0.7576, "step": 6874 }, { "epoch": 0.46582141253991244, "grad_norm": 2.9815900325775146, "learning_rate": 0.0003664622124863089, "loss": 0.9713, "step": 6875 }, { "epoch": 0.46588916838173644, "grad_norm": 3.4726619720458984, "learning_rate": 0.0003664567360350493, "loss": 0.8562, "step": 6876 }, { "epoch": 0.4659569242235604, "grad_norm": 2.718519687652588, "learning_rate": 0.00036645125958378974, "loss": 0.7088, "step": 6877 }, { "epoch": 0.4660246800653844, "grad_norm": 2.8942787647247314, "learning_rate": 0.00036644578313253014, "loss": 0.7409, "step": 6878 }, { "epoch": 0.4660924359072084, "grad_norm": 3.9164581298828125, "learning_rate": 0.00036644030668127054, "loss": 1.101, "step": 6879 }, { "epoch": 0.46616019174903234, "grad_norm": 6.2570343017578125, "learning_rate": 0.00036643483023001094, "loss": 1.0047, "step": 6880 }, { "epoch": 0.46622794759085634, "grad_norm": 5.917110443115234, "learning_rate": 0.0003664293537787514, "loss": 1.0477, "step": 6881 }, { "epoch": 0.46629570343268034, "grad_norm": 3.7524573802948, "learning_rate": 0.0003664238773274918, "loss": 0.9979, "step": 6882 }, { "epoch": 0.46636345927450434, "grad_norm": 3.964839220046997, "learning_rate": 0.00036641840087623225, "loss": 0.9507, "step": 6883 }, { "epoch": 0.4664312151163283, "grad_norm": 3.8402013778686523, "learning_rate": 0.00036641292442497265, "loss": 0.8889, "step": 6884 }, { "epoch": 0.4664989709581523, "grad_norm": 3.116070508956909, "learning_rate": 0.00036640744797371305, "loss": 0.6783, "step": 6885 }, { "epoch": 0.4665667267999763, "grad_norm": 5.888935089111328, "learning_rate": 0.00036640197152245345, "loss": 0.9641, "step": 6886 }, { "epoch": 0.4666344826418003, "grad_norm": 3.2979023456573486, "learning_rate": 0.00036639649507119385, "loss": 0.8938, "step": 6887 }, { "epoch": 0.46670223848362424, "grad_norm": 3.619004726409912, "learning_rate": 0.0003663910186199343, "loss": 0.8621, "step": 6888 }, { "epoch": 0.46676999432544825, "grad_norm": 4.62145471572876, "learning_rate": 0.00036638554216867475, "loss": 0.9994, "step": 6889 }, { "epoch": 0.46683775016727225, "grad_norm": 3.494187831878662, "learning_rate": 0.00036638006571741515, "loss": 0.7495, "step": 6890 }, { "epoch": 0.46690550600909625, "grad_norm": 3.5156383514404297, "learning_rate": 0.00036637458926615555, "loss": 0.6182, "step": 6891 }, { "epoch": 0.4669732618509202, "grad_norm": 4.095824718475342, "learning_rate": 0.00036636911281489595, "loss": 0.9749, "step": 6892 }, { "epoch": 0.4670410176927442, "grad_norm": 5.314517498016357, "learning_rate": 0.00036636363636363635, "loss": 0.9656, "step": 6893 }, { "epoch": 0.4671087735345682, "grad_norm": 3.8324997425079346, "learning_rate": 0.0003663581599123768, "loss": 0.9091, "step": 6894 }, { "epoch": 0.46717652937639215, "grad_norm": 3.7757444381713867, "learning_rate": 0.0003663526834611172, "loss": 0.8631, "step": 6895 }, { "epoch": 0.46724428521821615, "grad_norm": 6.544497966766357, "learning_rate": 0.00036634720700985765, "loss": 0.8976, "step": 6896 }, { "epoch": 0.46731204106004015, "grad_norm": 3.6879751682281494, "learning_rate": 0.00036634173055859805, "loss": 0.7519, "step": 6897 }, { "epoch": 0.46737979690186415, "grad_norm": 3.650407075881958, "learning_rate": 0.00036633625410733845, "loss": 0.6644, "step": 6898 }, { "epoch": 0.4674475527436881, "grad_norm": 3.979417324066162, "learning_rate": 0.0003663307776560789, "loss": 1.0424, "step": 6899 }, { "epoch": 0.4675153085855121, "grad_norm": 3.8443737030029297, "learning_rate": 0.0003663253012048193, "loss": 0.8689, "step": 6900 }, { "epoch": 0.4675830644273361, "grad_norm": 3.0854873657226562, "learning_rate": 0.0003663198247535597, "loss": 0.6758, "step": 6901 }, { "epoch": 0.4676508202691601, "grad_norm": 4.0183868408203125, "learning_rate": 0.0003663143483023001, "loss": 0.9024, "step": 6902 }, { "epoch": 0.46771857611098405, "grad_norm": 3.3322348594665527, "learning_rate": 0.0003663088718510405, "loss": 0.8962, "step": 6903 }, { "epoch": 0.46778633195280805, "grad_norm": 3.3771893978118896, "learning_rate": 0.00036630339539978096, "loss": 0.7827, "step": 6904 }, { "epoch": 0.46785408779463206, "grad_norm": 3.741180181503296, "learning_rate": 0.0003662979189485214, "loss": 0.9019, "step": 6905 }, { "epoch": 0.467921843636456, "grad_norm": 2.977891445159912, "learning_rate": 0.0003662924424972618, "loss": 0.8792, "step": 6906 }, { "epoch": 0.46798959947828, "grad_norm": 3.030447244644165, "learning_rate": 0.0003662869660460022, "loss": 0.8086, "step": 6907 }, { "epoch": 0.468057355320104, "grad_norm": 3.952136993408203, "learning_rate": 0.0003662814895947426, "loss": 0.9528, "step": 6908 }, { "epoch": 0.468125111161928, "grad_norm": 2.476191997528076, "learning_rate": 0.000366276013143483, "loss": 0.6911, "step": 6909 }, { "epoch": 0.46819286700375196, "grad_norm": 5.128059387207031, "learning_rate": 0.00036627053669222346, "loss": 0.9254, "step": 6910 }, { "epoch": 0.46826062284557596, "grad_norm": 3.1387081146240234, "learning_rate": 0.00036626506024096386, "loss": 0.945, "step": 6911 }, { "epoch": 0.46832837868739996, "grad_norm": 3.2760894298553467, "learning_rate": 0.0003662595837897043, "loss": 0.9888, "step": 6912 }, { "epoch": 0.46839613452922396, "grad_norm": 3.1780893802642822, "learning_rate": 0.0003662541073384447, "loss": 0.8445, "step": 6913 }, { "epoch": 0.4684638903710479, "grad_norm": 3.604297161102295, "learning_rate": 0.0003662486308871851, "loss": 0.8436, "step": 6914 }, { "epoch": 0.4685316462128719, "grad_norm": 3.6089799404144287, "learning_rate": 0.00036624315443592557, "loss": 1.1871, "step": 6915 }, { "epoch": 0.4685994020546959, "grad_norm": 4.356726169586182, "learning_rate": 0.00036623767798466596, "loss": 0.9694, "step": 6916 }, { "epoch": 0.4686671578965199, "grad_norm": 3.4637291431427, "learning_rate": 0.00036623220153340636, "loss": 0.8196, "step": 6917 }, { "epoch": 0.46873491373834386, "grad_norm": 3.341883659362793, "learning_rate": 0.00036622672508214676, "loss": 0.7967, "step": 6918 }, { "epoch": 0.46880266958016786, "grad_norm": 3.023284912109375, "learning_rate": 0.00036622124863088716, "loss": 0.8515, "step": 6919 }, { "epoch": 0.46887042542199187, "grad_norm": 5.419569969177246, "learning_rate": 0.0003662157721796276, "loss": 1.0778, "step": 6920 }, { "epoch": 0.4689381812638158, "grad_norm": 3.9745099544525146, "learning_rate": 0.00036621029572836807, "loss": 0.8249, "step": 6921 }, { "epoch": 0.4690059371056398, "grad_norm": 3.102757692337036, "learning_rate": 0.00036620481927710847, "loss": 0.8829, "step": 6922 }, { "epoch": 0.4690736929474638, "grad_norm": 3.1213366985321045, "learning_rate": 0.00036619934282584887, "loss": 0.8187, "step": 6923 }, { "epoch": 0.4691414487892878, "grad_norm": 4.114562034606934, "learning_rate": 0.00036619386637458927, "loss": 1.1409, "step": 6924 }, { "epoch": 0.46920920463111176, "grad_norm": 4.076782703399658, "learning_rate": 0.00036618838992332967, "loss": 0.7574, "step": 6925 }, { "epoch": 0.46927696047293577, "grad_norm": 3.5207550525665283, "learning_rate": 0.0003661829134720701, "loss": 0.655, "step": 6926 }, { "epoch": 0.46934471631475977, "grad_norm": 2.673473596572876, "learning_rate": 0.00036617743702081057, "loss": 0.6973, "step": 6927 }, { "epoch": 0.46941247215658377, "grad_norm": 4.50869083404541, "learning_rate": 0.00036617196056955097, "loss": 0.8993, "step": 6928 }, { "epoch": 0.4694802279984077, "grad_norm": 4.767192363739014, "learning_rate": 0.00036616648411829137, "loss": 0.8648, "step": 6929 }, { "epoch": 0.4695479838402317, "grad_norm": 2.449878454208374, "learning_rate": 0.00036616100766703177, "loss": 0.7982, "step": 6930 }, { "epoch": 0.4696157396820557, "grad_norm": 8.032842636108398, "learning_rate": 0.00036615553121577217, "loss": 1.24, "step": 6931 }, { "epoch": 0.4696834955238797, "grad_norm": 3.022331953048706, "learning_rate": 0.0003661500547645126, "loss": 0.7937, "step": 6932 }, { "epoch": 0.46975125136570367, "grad_norm": 3.966614007949829, "learning_rate": 0.000366144578313253, "loss": 0.998, "step": 6933 }, { "epoch": 0.4698190072075277, "grad_norm": 2.9773447513580322, "learning_rate": 0.0003661391018619934, "loss": 0.7625, "step": 6934 }, { "epoch": 0.4698867630493517, "grad_norm": 3.1794166564941406, "learning_rate": 0.0003661336254107339, "loss": 0.8113, "step": 6935 }, { "epoch": 0.4699545188911756, "grad_norm": 3.9735448360443115, "learning_rate": 0.0003661281489594743, "loss": 1.0469, "step": 6936 }, { "epoch": 0.4700222747329996, "grad_norm": 3.1360580921173096, "learning_rate": 0.00036612267250821473, "loss": 0.7612, "step": 6937 }, { "epoch": 0.4700900305748236, "grad_norm": 4.010094165802002, "learning_rate": 0.0003661171960569551, "loss": 1.1591, "step": 6938 }, { "epoch": 0.4701577864166476, "grad_norm": 3.858873128890991, "learning_rate": 0.0003661117196056955, "loss": 1.0644, "step": 6939 }, { "epoch": 0.4702255422584716, "grad_norm": 4.409232139587402, "learning_rate": 0.0003661062431544359, "loss": 0.9288, "step": 6940 }, { "epoch": 0.4702932981002956, "grad_norm": 2.6737492084503174, "learning_rate": 0.0003661007667031763, "loss": 0.7498, "step": 6941 }, { "epoch": 0.4703610539421196, "grad_norm": 3.284297227859497, "learning_rate": 0.0003660952902519168, "loss": 0.8842, "step": 6942 }, { "epoch": 0.4704288097839436, "grad_norm": 3.2821011543273926, "learning_rate": 0.00036608981380065723, "loss": 0.8974, "step": 6943 }, { "epoch": 0.4704965656257675, "grad_norm": 4.192963123321533, "learning_rate": 0.00036608433734939763, "loss": 1.0426, "step": 6944 }, { "epoch": 0.47056432146759153, "grad_norm": 3.1401970386505127, "learning_rate": 0.00036607886089813803, "loss": 0.9265, "step": 6945 }, { "epoch": 0.47063207730941553, "grad_norm": 3.7897911071777344, "learning_rate": 0.00036607338444687843, "loss": 0.9217, "step": 6946 }, { "epoch": 0.47069983315123953, "grad_norm": 2.9557628631591797, "learning_rate": 0.00036606790799561883, "loss": 0.8174, "step": 6947 }, { "epoch": 0.4707675889930635, "grad_norm": 3.0121445655822754, "learning_rate": 0.0003660624315443593, "loss": 0.8854, "step": 6948 }, { "epoch": 0.4708353448348875, "grad_norm": 12.207379341125488, "learning_rate": 0.0003660569550930997, "loss": 0.8858, "step": 6949 }, { "epoch": 0.4709031006767115, "grad_norm": 2.6678264141082764, "learning_rate": 0.0003660514786418401, "loss": 0.6724, "step": 6950 }, { "epoch": 0.47097085651853543, "grad_norm": 3.0960118770599365, "learning_rate": 0.00036604600219058053, "loss": 0.7807, "step": 6951 }, { "epoch": 0.47103861236035943, "grad_norm": 3.5533461570739746, "learning_rate": 0.00036604052573932093, "loss": 0.8438, "step": 6952 }, { "epoch": 0.47110636820218343, "grad_norm": 3.35984468460083, "learning_rate": 0.0003660350492880614, "loss": 0.9822, "step": 6953 }, { "epoch": 0.47117412404400744, "grad_norm": 3.2131290435791016, "learning_rate": 0.0003660295728368018, "loss": 0.8044, "step": 6954 }, { "epoch": 0.4712418798858314, "grad_norm": 3.089059591293335, "learning_rate": 0.0003660240963855422, "loss": 0.6885, "step": 6955 }, { "epoch": 0.4713096357276554, "grad_norm": 2.815614938735962, "learning_rate": 0.0003660186199342826, "loss": 0.8631, "step": 6956 }, { "epoch": 0.4713773915694794, "grad_norm": 3.5746452808380127, "learning_rate": 0.000366013143483023, "loss": 0.9102, "step": 6957 }, { "epoch": 0.4714451474113034, "grad_norm": 3.2329461574554443, "learning_rate": 0.00036600766703176344, "loss": 0.8479, "step": 6958 }, { "epoch": 0.47151290325312734, "grad_norm": 2.9882423877716064, "learning_rate": 0.0003660021905805039, "loss": 0.848, "step": 6959 }, { "epoch": 0.47158065909495134, "grad_norm": 3.7672841548919678, "learning_rate": 0.0003659967141292443, "loss": 1.0059, "step": 6960 }, { "epoch": 0.47164841493677534, "grad_norm": 3.0679314136505127, "learning_rate": 0.0003659912376779847, "loss": 0.8082, "step": 6961 }, { "epoch": 0.47171617077859934, "grad_norm": 3.973259210586548, "learning_rate": 0.0003659857612267251, "loss": 1.0344, "step": 6962 }, { "epoch": 0.4717839266204233, "grad_norm": 3.4887943267822266, "learning_rate": 0.0003659802847754655, "loss": 1.0379, "step": 6963 }, { "epoch": 0.4718516824622473, "grad_norm": 4.421141147613525, "learning_rate": 0.00036597480832420594, "loss": 1.079, "step": 6964 }, { "epoch": 0.4719194383040713, "grad_norm": 3.239165782928467, "learning_rate": 0.00036596933187294634, "loss": 0.984, "step": 6965 }, { "epoch": 0.47198719414589524, "grad_norm": 2.4411447048187256, "learning_rate": 0.0003659638554216868, "loss": 0.609, "step": 6966 }, { "epoch": 0.47205494998771924, "grad_norm": 3.2318875789642334, "learning_rate": 0.0003659583789704272, "loss": 0.7866, "step": 6967 }, { "epoch": 0.47212270582954324, "grad_norm": 2.9246389865875244, "learning_rate": 0.0003659529025191676, "loss": 0.8373, "step": 6968 }, { "epoch": 0.47219046167136725, "grad_norm": 3.421640396118164, "learning_rate": 0.000365947426067908, "loss": 0.9237, "step": 6969 }, { "epoch": 0.4722582175131912, "grad_norm": 2.4820520877838135, "learning_rate": 0.00036594194961664844, "loss": 0.6321, "step": 6970 }, { "epoch": 0.4723259733550152, "grad_norm": 3.0315701961517334, "learning_rate": 0.00036593647316538884, "loss": 0.8349, "step": 6971 }, { "epoch": 0.4723937291968392, "grad_norm": 3.5067570209503174, "learning_rate": 0.00036593099671412924, "loss": 0.9877, "step": 6972 }, { "epoch": 0.4724614850386632, "grad_norm": 3.218188524246216, "learning_rate": 0.00036592552026286964, "loss": 0.9253, "step": 6973 }, { "epoch": 0.47252924088048714, "grad_norm": 2.806748151779175, "learning_rate": 0.0003659200438116101, "loss": 0.7986, "step": 6974 }, { "epoch": 0.47259699672231115, "grad_norm": 3.639439821243286, "learning_rate": 0.00036591456736035055, "loss": 0.9353, "step": 6975 }, { "epoch": 0.47266475256413515, "grad_norm": 2.663243293762207, "learning_rate": 0.00036590909090909095, "loss": 0.7807, "step": 6976 }, { "epoch": 0.47273250840595915, "grad_norm": 2.8811564445495605, "learning_rate": 0.00036590361445783135, "loss": 0.8371, "step": 6977 }, { "epoch": 0.4728002642477831, "grad_norm": 2.428194999694824, "learning_rate": 0.00036589813800657175, "loss": 0.7108, "step": 6978 }, { "epoch": 0.4728680200896071, "grad_norm": 3.1685309410095215, "learning_rate": 0.00036589266155531214, "loss": 0.9306, "step": 6979 }, { "epoch": 0.4729357759314311, "grad_norm": 4.389512062072754, "learning_rate": 0.0003658871851040526, "loss": 1.0503, "step": 6980 }, { "epoch": 0.47300353177325505, "grad_norm": 4.455149173736572, "learning_rate": 0.000365881708652793, "loss": 0.9788, "step": 6981 }, { "epoch": 0.47307128761507905, "grad_norm": 16.27849578857422, "learning_rate": 0.00036587623220153345, "loss": 0.9313, "step": 6982 }, { "epoch": 0.47313904345690305, "grad_norm": 3.4676337242126465, "learning_rate": 0.00036587075575027385, "loss": 0.8816, "step": 6983 }, { "epoch": 0.47320679929872705, "grad_norm": 3.4668054580688477, "learning_rate": 0.00036586527929901425, "loss": 0.7203, "step": 6984 }, { "epoch": 0.473274555140551, "grad_norm": 3.4497933387756348, "learning_rate": 0.00036585980284775465, "loss": 1.0012, "step": 6985 }, { "epoch": 0.473342310982375, "grad_norm": 5.555417060852051, "learning_rate": 0.0003658543263964951, "loss": 1.2974, "step": 6986 }, { "epoch": 0.473410066824199, "grad_norm": 4.148613452911377, "learning_rate": 0.0003658488499452355, "loss": 1.0836, "step": 6987 }, { "epoch": 0.473477822666023, "grad_norm": 3.5745673179626465, "learning_rate": 0.0003658433734939759, "loss": 0.7677, "step": 6988 }, { "epoch": 0.47354557850784695, "grad_norm": 5.7792181968688965, "learning_rate": 0.0003658378970427163, "loss": 1.1919, "step": 6989 }, { "epoch": 0.47361333434967096, "grad_norm": 3.771982431411743, "learning_rate": 0.00036583242059145675, "loss": 0.946, "step": 6990 }, { "epoch": 0.47368109019149496, "grad_norm": 3.61588191986084, "learning_rate": 0.0003658269441401972, "loss": 0.9939, "step": 6991 }, { "epoch": 0.47374884603331896, "grad_norm": 3.271522045135498, "learning_rate": 0.0003658214676889376, "loss": 0.7589, "step": 6992 }, { "epoch": 0.4738166018751429, "grad_norm": 2.7743799686431885, "learning_rate": 0.000365815991237678, "loss": 0.7557, "step": 6993 }, { "epoch": 0.4738843577169669, "grad_norm": 3.985610008239746, "learning_rate": 0.0003658105147864184, "loss": 0.8632, "step": 6994 }, { "epoch": 0.4739521135587909, "grad_norm": 3.173464775085449, "learning_rate": 0.0003658050383351588, "loss": 0.8487, "step": 6995 }, { "epoch": 0.47401986940061486, "grad_norm": 3.816845417022705, "learning_rate": 0.00036579956188389926, "loss": 0.8224, "step": 6996 }, { "epoch": 0.47408762524243886, "grad_norm": 5.161347389221191, "learning_rate": 0.0003657940854326397, "loss": 1.0825, "step": 6997 }, { "epoch": 0.47415538108426286, "grad_norm": 3.250669479370117, "learning_rate": 0.0003657886089813801, "loss": 0.8315, "step": 6998 }, { "epoch": 0.47422313692608686, "grad_norm": 3.5537071228027344, "learning_rate": 0.0003657831325301205, "loss": 0.8912, "step": 6999 }, { "epoch": 0.4742908927679108, "grad_norm": 5.161866188049316, "learning_rate": 0.0003657776560788609, "loss": 0.8229, "step": 7000 }, { "epoch": 0.4743586486097348, "grad_norm": 2.772449016571045, "learning_rate": 0.0003657721796276013, "loss": 0.6658, "step": 7001 }, { "epoch": 0.4744264044515588, "grad_norm": 3.452122449874878, "learning_rate": 0.00036576670317634176, "loss": 1.0359, "step": 7002 }, { "epoch": 0.4744941602933828, "grad_norm": 3.271793842315674, "learning_rate": 0.00036576122672508216, "loss": 0.7493, "step": 7003 }, { "epoch": 0.47456191613520676, "grad_norm": 4.461458206176758, "learning_rate": 0.00036575575027382256, "loss": 1.0327, "step": 7004 }, { "epoch": 0.47462967197703076, "grad_norm": 3.1749188899993896, "learning_rate": 0.000365750273822563, "loss": 0.7017, "step": 7005 }, { "epoch": 0.47469742781885477, "grad_norm": 4.022637844085693, "learning_rate": 0.0003657447973713034, "loss": 0.9998, "step": 7006 }, { "epoch": 0.47476518366067877, "grad_norm": 4.608244895935059, "learning_rate": 0.0003657393209200438, "loss": 0.9491, "step": 7007 }, { "epoch": 0.4748329395025027, "grad_norm": 4.1265387535095215, "learning_rate": 0.00036573384446878426, "loss": 0.9801, "step": 7008 }, { "epoch": 0.4749006953443267, "grad_norm": 3.126647710800171, "learning_rate": 0.00036572836801752466, "loss": 0.8472, "step": 7009 }, { "epoch": 0.4749684511861507, "grad_norm": 3.9151270389556885, "learning_rate": 0.00036572289156626506, "loss": 0.8419, "step": 7010 }, { "epoch": 0.47503620702797467, "grad_norm": 3.9314513206481934, "learning_rate": 0.00036571741511500546, "loss": 1.0649, "step": 7011 }, { "epoch": 0.47510396286979867, "grad_norm": 2.8826382160186768, "learning_rate": 0.0003657119386637459, "loss": 0.8215, "step": 7012 }, { "epoch": 0.47517171871162267, "grad_norm": 4.035910129547119, "learning_rate": 0.00036570646221248637, "loss": 0.947, "step": 7013 }, { "epoch": 0.4752394745534467, "grad_norm": 4.314208030700684, "learning_rate": 0.00036570098576122677, "loss": 0.7441, "step": 7014 }, { "epoch": 0.4753072303952706, "grad_norm": 3.9626882076263428, "learning_rate": 0.00036569550930996717, "loss": 0.803, "step": 7015 }, { "epoch": 0.4753749862370946, "grad_norm": 3.7505524158477783, "learning_rate": 0.00036569003285870757, "loss": 0.9136, "step": 7016 }, { "epoch": 0.4754427420789186, "grad_norm": 2.8343615531921387, "learning_rate": 0.00036568455640744797, "loss": 0.8121, "step": 7017 }, { "epoch": 0.4755104979207426, "grad_norm": 3.3346915245056152, "learning_rate": 0.0003656790799561884, "loss": 0.8321, "step": 7018 }, { "epoch": 0.47557825376256657, "grad_norm": 3.793213367462158, "learning_rate": 0.0003656736035049288, "loss": 0.7769, "step": 7019 }, { "epoch": 0.4756460096043906, "grad_norm": 3.6610019207000732, "learning_rate": 0.0003656681270536692, "loss": 1.0599, "step": 7020 }, { "epoch": 0.4757137654462146, "grad_norm": 3.3080999851226807, "learning_rate": 0.00036566265060240967, "loss": 0.923, "step": 7021 }, { "epoch": 0.4757815212880386, "grad_norm": 3.423093318939209, "learning_rate": 0.00036565717415115007, "loss": 1.0008, "step": 7022 }, { "epoch": 0.4758492771298625, "grad_norm": 3.085655450820923, "learning_rate": 0.00036565169769989047, "loss": 0.8721, "step": 7023 }, { "epoch": 0.4759170329716865, "grad_norm": 3.466634511947632, "learning_rate": 0.0003656462212486309, "loss": 0.9821, "step": 7024 }, { "epoch": 0.47598478881351053, "grad_norm": 3.055826425552368, "learning_rate": 0.0003656407447973713, "loss": 0.8331, "step": 7025 }, { "epoch": 0.4760525446553345, "grad_norm": 3.2738735675811768, "learning_rate": 0.0003656352683461117, "loss": 0.9977, "step": 7026 }, { "epoch": 0.4761203004971585, "grad_norm": 2.746328592300415, "learning_rate": 0.0003656297918948521, "loss": 0.7755, "step": 7027 }, { "epoch": 0.4761880563389825, "grad_norm": 3.1996796131134033, "learning_rate": 0.0003656243154435926, "loss": 0.8926, "step": 7028 }, { "epoch": 0.4762558121808065, "grad_norm": 3.0982675552368164, "learning_rate": 0.00036561883899233303, "loss": 0.8818, "step": 7029 }, { "epoch": 0.47632356802263043, "grad_norm": 2.9161229133605957, "learning_rate": 0.0003656133625410734, "loss": 0.6707, "step": 7030 }, { "epoch": 0.47639132386445443, "grad_norm": 3.1463730335235596, "learning_rate": 0.0003656078860898138, "loss": 1.0249, "step": 7031 }, { "epoch": 0.47645907970627843, "grad_norm": 2.980497121810913, "learning_rate": 0.0003656024096385542, "loss": 0.8362, "step": 7032 }, { "epoch": 0.47652683554810243, "grad_norm": 4.893301010131836, "learning_rate": 0.0003655969331872946, "loss": 0.7375, "step": 7033 }, { "epoch": 0.4765945913899264, "grad_norm": 3.25658917427063, "learning_rate": 0.0003655914567360351, "loss": 0.8783, "step": 7034 }, { "epoch": 0.4766623472317504, "grad_norm": 3.71366286277771, "learning_rate": 0.0003655859802847755, "loss": 1.0476, "step": 7035 }, { "epoch": 0.4767301030735744, "grad_norm": 3.779766082763672, "learning_rate": 0.00036558050383351593, "loss": 0.8555, "step": 7036 }, { "epoch": 0.4767978589153984, "grad_norm": 3.2486913204193115, "learning_rate": 0.00036557502738225633, "loss": 0.8306, "step": 7037 }, { "epoch": 0.47686561475722233, "grad_norm": 3.9256961345672607, "learning_rate": 0.00036556955093099673, "loss": 0.9648, "step": 7038 }, { "epoch": 0.47693337059904634, "grad_norm": 4.156589508056641, "learning_rate": 0.00036556407447973713, "loss": 0.8858, "step": 7039 }, { "epoch": 0.47700112644087034, "grad_norm": 4.325308322906494, "learning_rate": 0.0003655585980284776, "loss": 0.8432, "step": 7040 }, { "epoch": 0.4770688822826943, "grad_norm": 3.2549500465393066, "learning_rate": 0.000365553121577218, "loss": 0.9023, "step": 7041 }, { "epoch": 0.4771366381245183, "grad_norm": 3.8657357692718506, "learning_rate": 0.0003655476451259584, "loss": 0.8929, "step": 7042 }, { "epoch": 0.4772043939663423, "grad_norm": 4.100378513336182, "learning_rate": 0.0003655421686746988, "loss": 1.11, "step": 7043 }, { "epoch": 0.4772721498081663, "grad_norm": 4.510182857513428, "learning_rate": 0.00036553669222343923, "loss": 1.0536, "step": 7044 }, { "epoch": 0.47733990564999024, "grad_norm": 2.7603743076324463, "learning_rate": 0.00036553121577217963, "loss": 0.6272, "step": 7045 }, { "epoch": 0.47740766149181424, "grad_norm": 5.411044120788574, "learning_rate": 0.0003655257393209201, "loss": 1.0136, "step": 7046 }, { "epoch": 0.47747541733363824, "grad_norm": 2.8770270347595215, "learning_rate": 0.0003655202628696605, "loss": 0.8777, "step": 7047 }, { "epoch": 0.47754317317546224, "grad_norm": 4.0049729347229, "learning_rate": 0.0003655147864184009, "loss": 0.8581, "step": 7048 }, { "epoch": 0.4776109290172862, "grad_norm": 3.4769012928009033, "learning_rate": 0.0003655093099671413, "loss": 0.8718, "step": 7049 }, { "epoch": 0.4776786848591102, "grad_norm": 3.089939832687378, "learning_rate": 0.00036550383351588174, "loss": 0.8572, "step": 7050 }, { "epoch": 0.4777464407009342, "grad_norm": 2.7561824321746826, "learning_rate": 0.00036549835706462214, "loss": 0.7296, "step": 7051 }, { "epoch": 0.4778141965427582, "grad_norm": 3.5617809295654297, "learning_rate": 0.0003654928806133626, "loss": 1.0087, "step": 7052 }, { "epoch": 0.47788195238458214, "grad_norm": 3.13749098777771, "learning_rate": 0.000365487404162103, "loss": 0.7899, "step": 7053 }, { "epoch": 0.47794970822640614, "grad_norm": 3.4210963249206543, "learning_rate": 0.0003654819277108434, "loss": 0.9197, "step": 7054 }, { "epoch": 0.47801746406823015, "grad_norm": 4.553922176361084, "learning_rate": 0.0003654764512595838, "loss": 1.3402, "step": 7055 }, { "epoch": 0.4780852199100541, "grad_norm": 3.3222172260284424, "learning_rate": 0.00036547097480832424, "loss": 0.8878, "step": 7056 }, { "epoch": 0.4781529757518781, "grad_norm": 4.563650131225586, "learning_rate": 0.00036546549835706464, "loss": 0.9309, "step": 7057 }, { "epoch": 0.4782207315937021, "grad_norm": 2.821500778198242, "learning_rate": 0.00036546002190580504, "loss": 0.7737, "step": 7058 }, { "epoch": 0.4782884874355261, "grad_norm": 3.146010637283325, "learning_rate": 0.0003654545454545455, "loss": 0.8593, "step": 7059 }, { "epoch": 0.47835624327735005, "grad_norm": 3.8430845737457275, "learning_rate": 0.0003654490690032859, "loss": 1.0944, "step": 7060 }, { "epoch": 0.47842399911917405, "grad_norm": 4.156015872955322, "learning_rate": 0.0003654435925520263, "loss": 1.1063, "step": 7061 }, { "epoch": 0.47849175496099805, "grad_norm": 2.6701478958129883, "learning_rate": 0.00036543811610076674, "loss": 0.738, "step": 7062 }, { "epoch": 0.47855951080282205, "grad_norm": 3.3557567596435547, "learning_rate": 0.00036543263964950714, "loss": 0.9686, "step": 7063 }, { "epoch": 0.478627266644646, "grad_norm": 3.320216178894043, "learning_rate": 0.00036542716319824754, "loss": 0.8458, "step": 7064 }, { "epoch": 0.47869502248647, "grad_norm": 3.1010162830352783, "learning_rate": 0.00036542168674698794, "loss": 0.8541, "step": 7065 }, { "epoch": 0.478762778328294, "grad_norm": 3.3832695484161377, "learning_rate": 0.00036541621029572834, "loss": 1.0554, "step": 7066 }, { "epoch": 0.478830534170118, "grad_norm": 3.4255588054656982, "learning_rate": 0.00036541073384446885, "loss": 0.8683, "step": 7067 }, { "epoch": 0.47889829001194195, "grad_norm": 3.5920116901397705, "learning_rate": 0.00036540525739320925, "loss": 0.8276, "step": 7068 }, { "epoch": 0.47896604585376595, "grad_norm": 3.140841007232666, "learning_rate": 0.00036539978094194965, "loss": 0.9274, "step": 7069 }, { "epoch": 0.47903380169558996, "grad_norm": 4.105048656463623, "learning_rate": 0.00036539430449069005, "loss": 1.0483, "step": 7070 }, { "epoch": 0.4791015575374139, "grad_norm": 3.4043891429901123, "learning_rate": 0.00036538882803943044, "loss": 0.811, "step": 7071 }, { "epoch": 0.4791693133792379, "grad_norm": 7.139101028442383, "learning_rate": 0.0003653833515881709, "loss": 1.0361, "step": 7072 }, { "epoch": 0.4792370692210619, "grad_norm": 4.112957000732422, "learning_rate": 0.0003653778751369113, "loss": 1.1551, "step": 7073 }, { "epoch": 0.4793048250628859, "grad_norm": 3.147721767425537, "learning_rate": 0.0003653723986856517, "loss": 0.8365, "step": 7074 }, { "epoch": 0.47937258090470986, "grad_norm": 2.424685478210449, "learning_rate": 0.00036536692223439215, "loss": 0.7275, "step": 7075 }, { "epoch": 0.47944033674653386, "grad_norm": 13.267829895019531, "learning_rate": 0.00036536144578313255, "loss": 1.0718, "step": 7076 }, { "epoch": 0.47950809258835786, "grad_norm": 3.5957093238830566, "learning_rate": 0.00036535596933187295, "loss": 0.9365, "step": 7077 }, { "epoch": 0.47957584843018186, "grad_norm": 3.159766912460327, "learning_rate": 0.0003653504928806134, "loss": 0.8365, "step": 7078 }, { "epoch": 0.4796436042720058, "grad_norm": 4.329627513885498, "learning_rate": 0.0003653450164293538, "loss": 1.0137, "step": 7079 }, { "epoch": 0.4797113601138298, "grad_norm": 3.289166212081909, "learning_rate": 0.0003653395399780942, "loss": 0.6609, "step": 7080 }, { "epoch": 0.4797791159556538, "grad_norm": 2.8836162090301514, "learning_rate": 0.0003653340635268346, "loss": 0.9122, "step": 7081 }, { "epoch": 0.4798468717974778, "grad_norm": 4.472630977630615, "learning_rate": 0.000365328587075575, "loss": 0.9543, "step": 7082 }, { "epoch": 0.47991462763930176, "grad_norm": 4.552314281463623, "learning_rate": 0.00036532311062431545, "loss": 0.9072, "step": 7083 }, { "epoch": 0.47998238348112576, "grad_norm": 4.209261417388916, "learning_rate": 0.0003653176341730559, "loss": 0.8918, "step": 7084 }, { "epoch": 0.48005013932294976, "grad_norm": 3.4839067459106445, "learning_rate": 0.0003653121577217963, "loss": 0.8509, "step": 7085 }, { "epoch": 0.4801178951647737, "grad_norm": 3.296630620956421, "learning_rate": 0.0003653066812705367, "loss": 0.9678, "step": 7086 }, { "epoch": 0.4801856510065977, "grad_norm": 3.5066168308258057, "learning_rate": 0.0003653012048192771, "loss": 0.9769, "step": 7087 }, { "epoch": 0.4802534068484217, "grad_norm": 3.481354236602783, "learning_rate": 0.00036529572836801756, "loss": 0.9075, "step": 7088 }, { "epoch": 0.4803211626902457, "grad_norm": 3.689138650894165, "learning_rate": 0.00036529025191675796, "loss": 0.788, "step": 7089 }, { "epoch": 0.48038891853206966, "grad_norm": 3.30409574508667, "learning_rate": 0.0003652847754654984, "loss": 0.9533, "step": 7090 }, { "epoch": 0.48045667437389367, "grad_norm": 3.4780139923095703, "learning_rate": 0.0003652792990142388, "loss": 0.677, "step": 7091 }, { "epoch": 0.48052443021571767, "grad_norm": 2.803194761276245, "learning_rate": 0.0003652738225629792, "loss": 0.8808, "step": 7092 }, { "epoch": 0.48059218605754167, "grad_norm": 3.5123276710510254, "learning_rate": 0.0003652683461117196, "loss": 0.8405, "step": 7093 }, { "epoch": 0.4806599418993656, "grad_norm": 3.232330083847046, "learning_rate": 0.00036526286966046006, "loss": 0.8695, "step": 7094 }, { "epoch": 0.4807276977411896, "grad_norm": 3.2097840309143066, "learning_rate": 0.00036525739320920046, "loss": 0.9425, "step": 7095 }, { "epoch": 0.4807954535830136, "grad_norm": 2.415560483932495, "learning_rate": 0.00036525191675794086, "loss": 0.8282, "step": 7096 }, { "epoch": 0.4808632094248376, "grad_norm": 5.051825046539307, "learning_rate": 0.00036524644030668126, "loss": 0.9208, "step": 7097 }, { "epoch": 0.48093096526666157, "grad_norm": 3.5623295307159424, "learning_rate": 0.0003652409638554217, "loss": 1.0554, "step": 7098 }, { "epoch": 0.48099872110848557, "grad_norm": 2.834681987762451, "learning_rate": 0.0003652354874041621, "loss": 0.8731, "step": 7099 }, { "epoch": 0.4810664769503096, "grad_norm": 2.9720146656036377, "learning_rate": 0.00036523001095290256, "loss": 0.7322, "step": 7100 }, { "epoch": 0.4811342327921335, "grad_norm": 3.2971222400665283, "learning_rate": 0.00036522453450164296, "loss": 0.9088, "step": 7101 }, { "epoch": 0.4812019886339575, "grad_norm": 4.506463050842285, "learning_rate": 0.00036521905805038336, "loss": 0.8585, "step": 7102 }, { "epoch": 0.4812697444757815, "grad_norm": 3.0528645515441895, "learning_rate": 0.00036521358159912376, "loss": 0.7498, "step": 7103 }, { "epoch": 0.4813375003176055, "grad_norm": 4.038626670837402, "learning_rate": 0.00036520810514786416, "loss": 0.9752, "step": 7104 }, { "epoch": 0.4814052561594295, "grad_norm": 4.3903703689575195, "learning_rate": 0.0003652026286966046, "loss": 0.549, "step": 7105 }, { "epoch": 0.4814730120012535, "grad_norm": 2.9904773235321045, "learning_rate": 0.00036519715224534507, "loss": 0.7406, "step": 7106 }, { "epoch": 0.4815407678430775, "grad_norm": 3.0249788761138916, "learning_rate": 0.00036519167579408547, "loss": 0.9516, "step": 7107 }, { "epoch": 0.4816085236849015, "grad_norm": 3.8840136528015137, "learning_rate": 0.00036518619934282587, "loss": 0.8772, "step": 7108 }, { "epoch": 0.4816762795267254, "grad_norm": 4.370389461517334, "learning_rate": 0.00036518072289156627, "loss": 0.9199, "step": 7109 }, { "epoch": 0.48174403536854943, "grad_norm": 3.2719006538391113, "learning_rate": 0.0003651752464403067, "loss": 0.8319, "step": 7110 }, { "epoch": 0.48181179121037343, "grad_norm": 3.5225675106048584, "learning_rate": 0.0003651697699890471, "loss": 0.9515, "step": 7111 }, { "epoch": 0.48187954705219743, "grad_norm": 4.293737888336182, "learning_rate": 0.0003651642935377875, "loss": 0.9931, "step": 7112 }, { "epoch": 0.4819473028940214, "grad_norm": 4.4228081703186035, "learning_rate": 0.0003651588170865279, "loss": 0.6454, "step": 7113 }, { "epoch": 0.4820150587358454, "grad_norm": 3.9917142391204834, "learning_rate": 0.00036515334063526837, "loss": 0.9599, "step": 7114 }, { "epoch": 0.4820828145776694, "grad_norm": 3.7871861457824707, "learning_rate": 0.00036514786418400877, "loss": 0.843, "step": 7115 }, { "epoch": 0.48215057041949333, "grad_norm": 4.02907657623291, "learning_rate": 0.0003651423877327492, "loss": 0.867, "step": 7116 }, { "epoch": 0.48221832626131733, "grad_norm": 2.9835171699523926, "learning_rate": 0.0003651369112814896, "loss": 0.7628, "step": 7117 }, { "epoch": 0.48228608210314133, "grad_norm": 3.240896701812744, "learning_rate": 0.00036513143483023, "loss": 0.7861, "step": 7118 }, { "epoch": 0.48235383794496534, "grad_norm": 3.6979970932006836, "learning_rate": 0.0003651259583789704, "loss": 0.9943, "step": 7119 }, { "epoch": 0.4824215937867893, "grad_norm": 3.548574924468994, "learning_rate": 0.0003651204819277108, "loss": 0.9175, "step": 7120 }, { "epoch": 0.4824893496286133, "grad_norm": 3.562084913253784, "learning_rate": 0.00036511500547645127, "loss": 0.9229, "step": 7121 }, { "epoch": 0.4825571054704373, "grad_norm": 3.261357307434082, "learning_rate": 0.0003651095290251917, "loss": 0.9845, "step": 7122 }, { "epoch": 0.4826248613122613, "grad_norm": 6.5938239097595215, "learning_rate": 0.0003651040525739321, "loss": 1.0733, "step": 7123 }, { "epoch": 0.48269261715408524, "grad_norm": 3.627697467803955, "learning_rate": 0.0003650985761226725, "loss": 0.7509, "step": 7124 }, { "epoch": 0.48276037299590924, "grad_norm": 3.180096387863159, "learning_rate": 0.0003650930996714129, "loss": 0.8708, "step": 7125 }, { "epoch": 0.48282812883773324, "grad_norm": 3.4338533878326416, "learning_rate": 0.0003650876232201534, "loss": 0.8672, "step": 7126 }, { "epoch": 0.48289588467955724, "grad_norm": 3.016592502593994, "learning_rate": 0.0003650821467688938, "loss": 0.7978, "step": 7127 }, { "epoch": 0.4829636405213812, "grad_norm": 4.170135974884033, "learning_rate": 0.0003650766703176342, "loss": 1.0262, "step": 7128 }, { "epoch": 0.4830313963632052, "grad_norm": 2.348719358444214, "learning_rate": 0.00036507119386637463, "loss": 0.6891, "step": 7129 }, { "epoch": 0.4830991522050292, "grad_norm": 3.5059854984283447, "learning_rate": 0.00036506571741511503, "loss": 1.2394, "step": 7130 }, { "epoch": 0.48316690804685314, "grad_norm": 3.5658209323883057, "learning_rate": 0.00036506024096385543, "loss": 0.903, "step": 7131 }, { "epoch": 0.48323466388867714, "grad_norm": 4.500290870666504, "learning_rate": 0.0003650547645125959, "loss": 0.9241, "step": 7132 }, { "epoch": 0.48330241973050114, "grad_norm": 3.2235348224639893, "learning_rate": 0.0003650492880613363, "loss": 0.888, "step": 7133 }, { "epoch": 0.48337017557232514, "grad_norm": 3.3148717880249023, "learning_rate": 0.0003650438116100767, "loss": 0.8846, "step": 7134 }, { "epoch": 0.4834379314141491, "grad_norm": 2.3654325008392334, "learning_rate": 0.0003650383351588171, "loss": 0.5808, "step": 7135 }, { "epoch": 0.4835056872559731, "grad_norm": 4.137966156005859, "learning_rate": 0.0003650328587075575, "loss": 0.9064, "step": 7136 }, { "epoch": 0.4835734430977971, "grad_norm": 2.8993146419525146, "learning_rate": 0.00036502738225629793, "loss": 0.7764, "step": 7137 }, { "epoch": 0.4836411989396211, "grad_norm": 4.070688247680664, "learning_rate": 0.0003650219058050384, "loss": 1.1644, "step": 7138 }, { "epoch": 0.48370895478144504, "grad_norm": 3.4477105140686035, "learning_rate": 0.0003650164293537788, "loss": 0.9385, "step": 7139 }, { "epoch": 0.48377671062326905, "grad_norm": 3.2917897701263428, "learning_rate": 0.0003650109529025192, "loss": 0.9221, "step": 7140 }, { "epoch": 0.48384446646509305, "grad_norm": 4.0656328201293945, "learning_rate": 0.0003650054764512596, "loss": 1.0095, "step": 7141 }, { "epoch": 0.48391222230691705, "grad_norm": 2.8387787342071533, "learning_rate": 0.000365, "loss": 0.6955, "step": 7142 }, { "epoch": 0.483979978148741, "grad_norm": 2.958615779876709, "learning_rate": 0.00036499452354874044, "loss": 0.7257, "step": 7143 }, { "epoch": 0.484047733990565, "grad_norm": 2.971607208251953, "learning_rate": 0.00036498904709748083, "loss": 0.9944, "step": 7144 }, { "epoch": 0.484115489832389, "grad_norm": 5.910160541534424, "learning_rate": 0.0003649835706462213, "loss": 1.0089, "step": 7145 }, { "epoch": 0.48418324567421295, "grad_norm": 3.5335710048675537, "learning_rate": 0.0003649780941949617, "loss": 0.8118, "step": 7146 }, { "epoch": 0.48425100151603695, "grad_norm": 3.706664562225342, "learning_rate": 0.0003649726177437021, "loss": 0.7985, "step": 7147 }, { "epoch": 0.48431875735786095, "grad_norm": 3.7340774536132812, "learning_rate": 0.00036496714129244254, "loss": 0.8803, "step": 7148 }, { "epoch": 0.48438651319968495, "grad_norm": 3.5569095611572266, "learning_rate": 0.00036496166484118294, "loss": 1.054, "step": 7149 }, { "epoch": 0.4844542690415089, "grad_norm": 4.600679397583008, "learning_rate": 0.00036495618838992334, "loss": 0.8797, "step": 7150 }, { "epoch": 0.4845220248833329, "grad_norm": 3.6183366775512695, "learning_rate": 0.00036495071193866374, "loss": 1.1226, "step": 7151 }, { "epoch": 0.4845897807251569, "grad_norm": 2.7923455238342285, "learning_rate": 0.00036494523548740414, "loss": 0.7181, "step": 7152 }, { "epoch": 0.4846575365669809, "grad_norm": 4.835546493530273, "learning_rate": 0.0003649397590361446, "loss": 1.1671, "step": 7153 }, { "epoch": 0.48472529240880485, "grad_norm": 3.459317207336426, "learning_rate": 0.00036493428258488504, "loss": 0.8644, "step": 7154 }, { "epoch": 0.48479304825062886, "grad_norm": 3.739410877227783, "learning_rate": 0.00036492880613362544, "loss": 1.0839, "step": 7155 }, { "epoch": 0.48486080409245286, "grad_norm": 4.921090602874756, "learning_rate": 0.00036492332968236584, "loss": 0.7488, "step": 7156 }, { "epoch": 0.48492855993427686, "grad_norm": 2.932035446166992, "learning_rate": 0.00036491785323110624, "loss": 0.8031, "step": 7157 }, { "epoch": 0.4849963157761008, "grad_norm": 2.6947333812713623, "learning_rate": 0.00036491237677984664, "loss": 0.7467, "step": 7158 }, { "epoch": 0.4850640716179248, "grad_norm": 3.794215440750122, "learning_rate": 0.0003649069003285871, "loss": 1.1493, "step": 7159 }, { "epoch": 0.4851318274597488, "grad_norm": 3.6926331520080566, "learning_rate": 0.00036490142387732755, "loss": 0.8572, "step": 7160 }, { "epoch": 0.48519958330157276, "grad_norm": 3.1793718338012695, "learning_rate": 0.00036489594742606795, "loss": 0.8059, "step": 7161 }, { "epoch": 0.48526733914339676, "grad_norm": 2.9512412548065186, "learning_rate": 0.00036489047097480835, "loss": 0.7331, "step": 7162 }, { "epoch": 0.48533509498522076, "grad_norm": 3.587698459625244, "learning_rate": 0.00036488499452354874, "loss": 0.8368, "step": 7163 }, { "epoch": 0.48540285082704476, "grad_norm": 3.3382303714752197, "learning_rate": 0.0003648795180722892, "loss": 0.8784, "step": 7164 }, { "epoch": 0.4854706066688687, "grad_norm": 3.2260773181915283, "learning_rate": 0.0003648740416210296, "loss": 0.8711, "step": 7165 }, { "epoch": 0.4855383625106927, "grad_norm": 3.3344130516052246, "learning_rate": 0.00036486856516977, "loss": 0.9361, "step": 7166 }, { "epoch": 0.4856061183525167, "grad_norm": 3.3074846267700195, "learning_rate": 0.0003648630887185104, "loss": 1.0998, "step": 7167 }, { "epoch": 0.4856738741943407, "grad_norm": 5.344295024871826, "learning_rate": 0.00036485761226725085, "loss": 0.8425, "step": 7168 }, { "epoch": 0.48574163003616466, "grad_norm": 3.133944511413574, "learning_rate": 0.00036485213581599125, "loss": 0.8786, "step": 7169 }, { "epoch": 0.48580938587798866, "grad_norm": 3.6706655025482178, "learning_rate": 0.0003648466593647317, "loss": 1.072, "step": 7170 }, { "epoch": 0.48587714171981267, "grad_norm": 2.887371063232422, "learning_rate": 0.0003648411829134721, "loss": 0.9716, "step": 7171 }, { "epoch": 0.48594489756163667, "grad_norm": 2.860393524169922, "learning_rate": 0.0003648357064622125, "loss": 0.7751, "step": 7172 }, { "epoch": 0.4860126534034606, "grad_norm": 3.2582790851593018, "learning_rate": 0.0003648302300109529, "loss": 0.8447, "step": 7173 }, { "epoch": 0.4860804092452846, "grad_norm": 3.4169981479644775, "learning_rate": 0.0003648247535596933, "loss": 1.0744, "step": 7174 }, { "epoch": 0.4861481650871086, "grad_norm": 4.018370628356934, "learning_rate": 0.00036481927710843375, "loss": 0.9718, "step": 7175 }, { "epoch": 0.48621592092893257, "grad_norm": 3.739687442779541, "learning_rate": 0.0003648138006571742, "loss": 1.0595, "step": 7176 }, { "epoch": 0.48628367677075657, "grad_norm": 4.021834850311279, "learning_rate": 0.0003648083242059146, "loss": 0.7883, "step": 7177 }, { "epoch": 0.48635143261258057, "grad_norm": 3.863464117050171, "learning_rate": 0.000364802847754655, "loss": 0.9137, "step": 7178 }, { "epoch": 0.48641918845440457, "grad_norm": 3.238478660583496, "learning_rate": 0.0003647973713033954, "loss": 0.8528, "step": 7179 }, { "epoch": 0.4864869442962285, "grad_norm": 2.9305453300476074, "learning_rate": 0.0003647918948521358, "loss": 0.6888, "step": 7180 }, { "epoch": 0.4865547001380525, "grad_norm": 3.171278476715088, "learning_rate": 0.00036478641840087626, "loss": 0.9499, "step": 7181 }, { "epoch": 0.4866224559798765, "grad_norm": 3.5946550369262695, "learning_rate": 0.00036478094194961665, "loss": 0.8316, "step": 7182 }, { "epoch": 0.4866902118217005, "grad_norm": 2.5757858753204346, "learning_rate": 0.00036477546549835705, "loss": 0.6794, "step": 7183 }, { "epoch": 0.48675796766352447, "grad_norm": 3.524747610092163, "learning_rate": 0.0003647699890470975, "loss": 0.8973, "step": 7184 }, { "epoch": 0.4868257235053485, "grad_norm": 2.89768648147583, "learning_rate": 0.0003647645125958379, "loss": 0.7138, "step": 7185 }, { "epoch": 0.4868934793471725, "grad_norm": 3.884584903717041, "learning_rate": 0.00036475903614457836, "loss": 0.8778, "step": 7186 }, { "epoch": 0.4869612351889965, "grad_norm": 2.974853277206421, "learning_rate": 0.00036475355969331876, "loss": 0.527, "step": 7187 }, { "epoch": 0.4870289910308204, "grad_norm": 3.414931297302246, "learning_rate": 0.00036474808324205916, "loss": 1.0347, "step": 7188 }, { "epoch": 0.4870967468726444, "grad_norm": 3.6420962810516357, "learning_rate": 0.00036474260679079956, "loss": 0.8451, "step": 7189 }, { "epoch": 0.48716450271446843, "grad_norm": 5.097309589385986, "learning_rate": 0.00036473713033953996, "loss": 1.088, "step": 7190 }, { "epoch": 0.4872322585562924, "grad_norm": 4.49112606048584, "learning_rate": 0.0003647316538882804, "loss": 0.9722, "step": 7191 }, { "epoch": 0.4873000143981164, "grad_norm": 3.708585739135742, "learning_rate": 0.00036472617743702086, "loss": 0.9977, "step": 7192 }, { "epoch": 0.4873677702399404, "grad_norm": 3.6707522869110107, "learning_rate": 0.00036472070098576126, "loss": 1.0152, "step": 7193 }, { "epoch": 0.4874355260817644, "grad_norm": 3.5268642902374268, "learning_rate": 0.00036471522453450166, "loss": 0.8177, "step": 7194 }, { "epoch": 0.4875032819235883, "grad_norm": 2.9669647216796875, "learning_rate": 0.00036470974808324206, "loss": 0.7518, "step": 7195 }, { "epoch": 0.48757103776541233, "grad_norm": 3.615640163421631, "learning_rate": 0.00036470427163198246, "loss": 0.9294, "step": 7196 }, { "epoch": 0.48763879360723633, "grad_norm": 2.382514476776123, "learning_rate": 0.0003646987951807229, "loss": 0.6658, "step": 7197 }, { "epoch": 0.48770654944906033, "grad_norm": 3.800990581512451, "learning_rate": 0.0003646933187294633, "loss": 0.8933, "step": 7198 }, { "epoch": 0.4877743052908843, "grad_norm": 3.771376848220825, "learning_rate": 0.00036468784227820377, "loss": 0.9655, "step": 7199 }, { "epoch": 0.4878420611327083, "grad_norm": 4.421073913574219, "learning_rate": 0.00036468236582694417, "loss": 1.1454, "step": 7200 }, { "epoch": 0.4879098169745323, "grad_norm": 3.0794713497161865, "learning_rate": 0.00036467688937568457, "loss": 0.8761, "step": 7201 }, { "epoch": 0.4879775728163563, "grad_norm": 3.9848155975341797, "learning_rate": 0.000364671412924425, "loss": 1.1567, "step": 7202 }, { "epoch": 0.48804532865818023, "grad_norm": 3.933046579360962, "learning_rate": 0.0003646659364731654, "loss": 0.9133, "step": 7203 }, { "epoch": 0.48811308450000424, "grad_norm": 3.2673115730285645, "learning_rate": 0.0003646604600219058, "loss": 0.8498, "step": 7204 }, { "epoch": 0.48818084034182824, "grad_norm": 3.232728958129883, "learning_rate": 0.0003646549835706462, "loss": 0.7661, "step": 7205 }, { "epoch": 0.4882485961836522, "grad_norm": 3.2415552139282227, "learning_rate": 0.0003646495071193866, "loss": 0.8817, "step": 7206 }, { "epoch": 0.4883163520254762, "grad_norm": 2.229717254638672, "learning_rate": 0.00036464403066812707, "loss": 0.5357, "step": 7207 }, { "epoch": 0.4883841078673002, "grad_norm": 4.530365943908691, "learning_rate": 0.0003646385542168675, "loss": 0.934, "step": 7208 }, { "epoch": 0.4884518637091242, "grad_norm": 4.7035746574401855, "learning_rate": 0.0003646330777656079, "loss": 0.8951, "step": 7209 }, { "epoch": 0.48851961955094814, "grad_norm": 3.412269353866577, "learning_rate": 0.0003646276013143483, "loss": 0.8264, "step": 7210 }, { "epoch": 0.48858737539277214, "grad_norm": 4.032763481140137, "learning_rate": 0.0003646221248630887, "loss": 0.8776, "step": 7211 }, { "epoch": 0.48865513123459614, "grad_norm": 3.1023101806640625, "learning_rate": 0.0003646166484118291, "loss": 0.8691, "step": 7212 }, { "epoch": 0.48872288707642014, "grad_norm": 3.4920554161071777, "learning_rate": 0.00036461117196056957, "loss": 0.8909, "step": 7213 }, { "epoch": 0.4887906429182441, "grad_norm": 2.6165611743927, "learning_rate": 0.00036460569550930997, "loss": 0.6993, "step": 7214 }, { "epoch": 0.4888583987600681, "grad_norm": 2.4840362071990967, "learning_rate": 0.0003646002190580504, "loss": 0.7234, "step": 7215 }, { "epoch": 0.4889261546018921, "grad_norm": 3.1709091663360596, "learning_rate": 0.0003645947426067908, "loss": 0.8188, "step": 7216 }, { "epoch": 0.4889939104437161, "grad_norm": 10.934248924255371, "learning_rate": 0.0003645892661555312, "loss": 1.0186, "step": 7217 }, { "epoch": 0.48906166628554004, "grad_norm": 2.937351942062378, "learning_rate": 0.0003645837897042716, "loss": 0.6541, "step": 7218 }, { "epoch": 0.48912942212736404, "grad_norm": 2.8898909091949463, "learning_rate": 0.0003645783132530121, "loss": 0.9276, "step": 7219 }, { "epoch": 0.48919717796918805, "grad_norm": 2.9785335063934326, "learning_rate": 0.0003645728368017525, "loss": 0.7996, "step": 7220 }, { "epoch": 0.489264933811012, "grad_norm": 3.4694976806640625, "learning_rate": 0.0003645673603504929, "loss": 0.9759, "step": 7221 }, { "epoch": 0.489332689652836, "grad_norm": 3.291332960128784, "learning_rate": 0.00036456188389923333, "loss": 0.9663, "step": 7222 }, { "epoch": 0.48940044549466, "grad_norm": 4.180492401123047, "learning_rate": 0.00036455640744797373, "loss": 0.7784, "step": 7223 }, { "epoch": 0.489468201336484, "grad_norm": 3.5983517169952393, "learning_rate": 0.0003645509309967142, "loss": 0.7253, "step": 7224 }, { "epoch": 0.48953595717830795, "grad_norm": 4.264759540557861, "learning_rate": 0.0003645454545454546, "loss": 0.9521, "step": 7225 }, { "epoch": 0.48960371302013195, "grad_norm": 8.38875961303711, "learning_rate": 0.000364539978094195, "loss": 0.9189, "step": 7226 }, { "epoch": 0.48967146886195595, "grad_norm": 3.1842362880706787, "learning_rate": 0.0003645345016429354, "loss": 0.8336, "step": 7227 }, { "epoch": 0.48973922470377995, "grad_norm": 3.109042167663574, "learning_rate": 0.0003645290251916758, "loss": 0.925, "step": 7228 }, { "epoch": 0.4898069805456039, "grad_norm": 3.2520341873168945, "learning_rate": 0.00036452354874041623, "loss": 0.8339, "step": 7229 }, { "epoch": 0.4898747363874279, "grad_norm": 3.471632480621338, "learning_rate": 0.0003645180722891567, "loss": 0.8057, "step": 7230 }, { "epoch": 0.4899424922292519, "grad_norm": 2.5190813541412354, "learning_rate": 0.0003645125958378971, "loss": 0.754, "step": 7231 }, { "epoch": 0.4900102480710759, "grad_norm": 4.7783002853393555, "learning_rate": 0.0003645071193866375, "loss": 1.0364, "step": 7232 }, { "epoch": 0.49007800391289985, "grad_norm": 3.3371405601501465, "learning_rate": 0.0003645016429353779, "loss": 1.0256, "step": 7233 }, { "epoch": 0.49014575975472385, "grad_norm": 3.2697694301605225, "learning_rate": 0.0003644961664841183, "loss": 0.7596, "step": 7234 }, { "epoch": 0.49021351559654786, "grad_norm": 2.931368589401245, "learning_rate": 0.00036449069003285873, "loss": 0.8249, "step": 7235 }, { "epoch": 0.4902812714383718, "grad_norm": 3.9645628929138184, "learning_rate": 0.00036448521358159913, "loss": 0.8639, "step": 7236 }, { "epoch": 0.4903490272801958, "grad_norm": 3.9362282752990723, "learning_rate": 0.00036447973713033953, "loss": 0.8872, "step": 7237 }, { "epoch": 0.4904167831220198, "grad_norm": 3.378046989440918, "learning_rate": 0.00036447426067908, "loss": 0.9045, "step": 7238 }, { "epoch": 0.4904845389638438, "grad_norm": 3.370192050933838, "learning_rate": 0.0003644687842278204, "loss": 0.9858, "step": 7239 }, { "epoch": 0.49055229480566775, "grad_norm": 3.3162295818328857, "learning_rate": 0.00036446330777656084, "loss": 0.8435, "step": 7240 }, { "epoch": 0.49062005064749176, "grad_norm": 3.1406302452087402, "learning_rate": 0.00036445783132530124, "loss": 0.7646, "step": 7241 }, { "epoch": 0.49068780648931576, "grad_norm": 4.087240695953369, "learning_rate": 0.00036445235487404164, "loss": 0.9621, "step": 7242 }, { "epoch": 0.49075556233113976, "grad_norm": 4.539140224456787, "learning_rate": 0.00036444687842278204, "loss": 1.236, "step": 7243 }, { "epoch": 0.4908233181729637, "grad_norm": 4.190532207489014, "learning_rate": 0.00036444140197152244, "loss": 0.9701, "step": 7244 }, { "epoch": 0.4908910740147877, "grad_norm": 3.229532241821289, "learning_rate": 0.0003644359255202629, "loss": 0.9897, "step": 7245 }, { "epoch": 0.4909588298566117, "grad_norm": 3.3712518215179443, "learning_rate": 0.00036443044906900334, "loss": 0.9206, "step": 7246 }, { "epoch": 0.4910265856984357, "grad_norm": 2.782008409500122, "learning_rate": 0.00036442497261774374, "loss": 0.7643, "step": 7247 }, { "epoch": 0.49109434154025966, "grad_norm": 3.3744187355041504, "learning_rate": 0.00036441949616648414, "loss": 0.9643, "step": 7248 }, { "epoch": 0.49116209738208366, "grad_norm": 3.407055616378784, "learning_rate": 0.00036441401971522454, "loss": 0.9662, "step": 7249 }, { "epoch": 0.49122985322390766, "grad_norm": 3.43367600440979, "learning_rate": 0.00036440854326396494, "loss": 0.8881, "step": 7250 }, { "epoch": 0.4912976090657316, "grad_norm": 2.2906720638275146, "learning_rate": 0.0003644030668127054, "loss": 0.6432, "step": 7251 }, { "epoch": 0.4913653649075556, "grad_norm": 4.662262916564941, "learning_rate": 0.0003643975903614458, "loss": 1.5047, "step": 7252 }, { "epoch": 0.4914331207493796, "grad_norm": 3.315358877182007, "learning_rate": 0.0003643921139101862, "loss": 0.8749, "step": 7253 }, { "epoch": 0.4915008765912036, "grad_norm": 2.8205385208129883, "learning_rate": 0.00036438663745892665, "loss": 0.6296, "step": 7254 }, { "epoch": 0.49156863243302756, "grad_norm": 3.7904951572418213, "learning_rate": 0.00036438116100766704, "loss": 0.8804, "step": 7255 }, { "epoch": 0.49163638827485157, "grad_norm": 9.018576622009277, "learning_rate": 0.00036437568455640744, "loss": 0.735, "step": 7256 }, { "epoch": 0.49170414411667557, "grad_norm": 4.087843418121338, "learning_rate": 0.0003643702081051479, "loss": 0.8482, "step": 7257 }, { "epoch": 0.49177189995849957, "grad_norm": 3.2447502613067627, "learning_rate": 0.0003643647316538883, "loss": 0.9147, "step": 7258 }, { "epoch": 0.4918396558003235, "grad_norm": 3.5178072452545166, "learning_rate": 0.0003643592552026287, "loss": 0.8921, "step": 7259 }, { "epoch": 0.4919074116421475, "grad_norm": 3.39060378074646, "learning_rate": 0.0003643537787513691, "loss": 0.8947, "step": 7260 }, { "epoch": 0.4919751674839715, "grad_norm": 2.9089810848236084, "learning_rate": 0.00036434830230010955, "loss": 0.7456, "step": 7261 }, { "epoch": 0.4920429233257955, "grad_norm": 3.7920877933502197, "learning_rate": 0.00036434282584885, "loss": 0.8884, "step": 7262 }, { "epoch": 0.49211067916761947, "grad_norm": 4.007812976837158, "learning_rate": 0.0003643373493975904, "loss": 1.1416, "step": 7263 }, { "epoch": 0.49217843500944347, "grad_norm": 5.4365129470825195, "learning_rate": 0.0003643318729463308, "loss": 1.1935, "step": 7264 }, { "epoch": 0.4922461908512675, "grad_norm": 4.489016056060791, "learning_rate": 0.0003643263964950712, "loss": 0.8299, "step": 7265 }, { "epoch": 0.4923139466930914, "grad_norm": 3.6594879627227783, "learning_rate": 0.0003643209200438116, "loss": 0.8437, "step": 7266 }, { "epoch": 0.4923817025349154, "grad_norm": 3.3304755687713623, "learning_rate": 0.00036431544359255205, "loss": 0.8218, "step": 7267 }, { "epoch": 0.4924494583767394, "grad_norm": 4.625750541687012, "learning_rate": 0.00036430996714129245, "loss": 0.9721, "step": 7268 }, { "epoch": 0.4925172142185634, "grad_norm": 2.6974358558654785, "learning_rate": 0.0003643044906900329, "loss": 0.8001, "step": 7269 }, { "epoch": 0.4925849700603874, "grad_norm": 3.0046520233154297, "learning_rate": 0.0003642990142387733, "loss": 0.8644, "step": 7270 }, { "epoch": 0.4926527259022114, "grad_norm": 3.293609380722046, "learning_rate": 0.0003642935377875137, "loss": 0.6781, "step": 7271 }, { "epoch": 0.4927204817440354, "grad_norm": 3.4145655632019043, "learning_rate": 0.0003642880613362541, "loss": 1.0674, "step": 7272 }, { "epoch": 0.4927882375858594, "grad_norm": 2.6530654430389404, "learning_rate": 0.00036428258488499456, "loss": 0.7093, "step": 7273 }, { "epoch": 0.4928559934276833, "grad_norm": 3.6220695972442627, "learning_rate": 0.00036427710843373495, "loss": 0.8404, "step": 7274 }, { "epoch": 0.4929237492695073, "grad_norm": 3.131326913833618, "learning_rate": 0.00036427163198247535, "loss": 0.8618, "step": 7275 }, { "epoch": 0.49299150511133133, "grad_norm": 3.1955060958862305, "learning_rate": 0.00036426615553121575, "loss": 0.836, "step": 7276 }, { "epoch": 0.49305926095315533, "grad_norm": 3.7483677864074707, "learning_rate": 0.0003642606790799562, "loss": 1.2106, "step": 7277 }, { "epoch": 0.4931270167949793, "grad_norm": 3.673755407333374, "learning_rate": 0.00036425520262869666, "loss": 0.6951, "step": 7278 }, { "epoch": 0.4931947726368033, "grad_norm": 2.7614810466766357, "learning_rate": 0.00036424972617743706, "loss": 0.7113, "step": 7279 }, { "epoch": 0.4932625284786273, "grad_norm": 3.8135998249053955, "learning_rate": 0.00036424424972617746, "loss": 0.79, "step": 7280 }, { "epoch": 0.49333028432045123, "grad_norm": 3.258105754852295, "learning_rate": 0.00036423877327491786, "loss": 0.96, "step": 7281 }, { "epoch": 0.49339804016227523, "grad_norm": 3.130053997039795, "learning_rate": 0.00036423329682365826, "loss": 0.9152, "step": 7282 }, { "epoch": 0.49346579600409923, "grad_norm": 4.0675435066223145, "learning_rate": 0.0003642278203723987, "loss": 0.9672, "step": 7283 }, { "epoch": 0.49353355184592324, "grad_norm": 4.331455707550049, "learning_rate": 0.0003642223439211391, "loss": 1.1157, "step": 7284 }, { "epoch": 0.4936013076877472, "grad_norm": 3.0658717155456543, "learning_rate": 0.00036421686746987956, "loss": 0.7615, "step": 7285 }, { "epoch": 0.4936690635295712, "grad_norm": 2.994619846343994, "learning_rate": 0.00036421139101861996, "loss": 0.7751, "step": 7286 }, { "epoch": 0.4937368193713952, "grad_norm": 3.3711981773376465, "learning_rate": 0.00036420591456736036, "loss": 0.8386, "step": 7287 }, { "epoch": 0.4938045752132192, "grad_norm": 3.219965696334839, "learning_rate": 0.00036420043811610076, "loss": 0.8252, "step": 7288 }, { "epoch": 0.49387233105504313, "grad_norm": 3.915893793106079, "learning_rate": 0.0003641949616648412, "loss": 1.0292, "step": 7289 }, { "epoch": 0.49394008689686714, "grad_norm": 4.231311321258545, "learning_rate": 0.0003641894852135816, "loss": 1.0432, "step": 7290 }, { "epoch": 0.49400784273869114, "grad_norm": 2.913677930831909, "learning_rate": 0.000364184008762322, "loss": 0.8436, "step": 7291 }, { "epoch": 0.49407559858051514, "grad_norm": 3.7086596488952637, "learning_rate": 0.00036417853231106247, "loss": 0.9634, "step": 7292 }, { "epoch": 0.4941433544223391, "grad_norm": 3.7232983112335205, "learning_rate": 0.00036417305585980287, "loss": 0.7121, "step": 7293 }, { "epoch": 0.4942111102641631, "grad_norm": 4.259644031524658, "learning_rate": 0.00036416757940854326, "loss": 0.9817, "step": 7294 }, { "epoch": 0.4942788661059871, "grad_norm": 3.6866815090179443, "learning_rate": 0.0003641621029572837, "loss": 0.665, "step": 7295 }, { "epoch": 0.49434662194781104, "grad_norm": 3.939566135406494, "learning_rate": 0.0003641566265060241, "loss": 0.8889, "step": 7296 }, { "epoch": 0.49441437778963504, "grad_norm": 2.9838743209838867, "learning_rate": 0.0003641511500547645, "loss": 0.7578, "step": 7297 }, { "epoch": 0.49448213363145904, "grad_norm": 3.318920135498047, "learning_rate": 0.0003641456736035049, "loss": 0.819, "step": 7298 }, { "epoch": 0.49454988947328304, "grad_norm": 3.753159761428833, "learning_rate": 0.00036414019715224537, "loss": 1.041, "step": 7299 }, { "epoch": 0.494617645315107, "grad_norm": 3.6245944499969482, "learning_rate": 0.0003641347207009858, "loss": 0.9652, "step": 7300 }, { "epoch": 0.494685401156931, "grad_norm": 3.8581042289733887, "learning_rate": 0.0003641292442497262, "loss": 0.881, "step": 7301 }, { "epoch": 0.494753156998755, "grad_norm": 3.682664394378662, "learning_rate": 0.0003641237677984666, "loss": 1.0145, "step": 7302 }, { "epoch": 0.494820912840579, "grad_norm": 3.119403123855591, "learning_rate": 0.000364118291347207, "loss": 0.669, "step": 7303 }, { "epoch": 0.49488866868240294, "grad_norm": 3.01467227935791, "learning_rate": 0.0003641128148959474, "loss": 0.8858, "step": 7304 }, { "epoch": 0.49495642452422695, "grad_norm": 4.004265308380127, "learning_rate": 0.00036410733844468787, "loss": 0.8418, "step": 7305 }, { "epoch": 0.49502418036605095, "grad_norm": 2.823183298110962, "learning_rate": 0.00036410186199342827, "loss": 0.7566, "step": 7306 }, { "epoch": 0.49509193620787495, "grad_norm": 3.2066235542297363, "learning_rate": 0.00036409638554216867, "loss": 0.9143, "step": 7307 }, { "epoch": 0.4951596920496989, "grad_norm": 3.393353223800659, "learning_rate": 0.0003640909090909091, "loss": 0.8075, "step": 7308 }, { "epoch": 0.4952274478915229, "grad_norm": 3.9225432872772217, "learning_rate": 0.0003640854326396495, "loss": 1.0779, "step": 7309 }, { "epoch": 0.4952952037333469, "grad_norm": 3.267596960067749, "learning_rate": 0.0003640799561883899, "loss": 0.9933, "step": 7310 }, { "epoch": 0.49536295957517085, "grad_norm": 3.8255724906921387, "learning_rate": 0.0003640744797371304, "loss": 0.9242, "step": 7311 }, { "epoch": 0.49543071541699485, "grad_norm": 3.095795154571533, "learning_rate": 0.0003640690032858708, "loss": 0.7917, "step": 7312 }, { "epoch": 0.49549847125881885, "grad_norm": 2.893620014190674, "learning_rate": 0.0003640635268346112, "loss": 0.6388, "step": 7313 }, { "epoch": 0.49556622710064285, "grad_norm": 4.9673171043396, "learning_rate": 0.0003640580503833516, "loss": 1.2023, "step": 7314 }, { "epoch": 0.4956339829424668, "grad_norm": 3.5821421146392822, "learning_rate": 0.00036405257393209203, "loss": 0.7259, "step": 7315 }, { "epoch": 0.4957017387842908, "grad_norm": 4.861643314361572, "learning_rate": 0.0003640470974808325, "loss": 0.8984, "step": 7316 }, { "epoch": 0.4957694946261148, "grad_norm": 4.105581760406494, "learning_rate": 0.0003640416210295729, "loss": 1.0408, "step": 7317 }, { "epoch": 0.4958372504679388, "grad_norm": 3.4522600173950195, "learning_rate": 0.0003640361445783133, "loss": 0.8443, "step": 7318 }, { "epoch": 0.49590500630976275, "grad_norm": 4.436029434204102, "learning_rate": 0.0003640306681270537, "loss": 1.1108, "step": 7319 }, { "epoch": 0.49597276215158675, "grad_norm": 3.0460426807403564, "learning_rate": 0.0003640251916757941, "loss": 0.8682, "step": 7320 }, { "epoch": 0.49604051799341076, "grad_norm": 3.021231174468994, "learning_rate": 0.00036401971522453453, "loss": 0.8655, "step": 7321 }, { "epoch": 0.49610827383523476, "grad_norm": 3.4510080814361572, "learning_rate": 0.00036401423877327493, "loss": 0.846, "step": 7322 }, { "epoch": 0.4961760296770587, "grad_norm": 3.700927734375, "learning_rate": 0.0003640087623220154, "loss": 0.9023, "step": 7323 }, { "epoch": 0.4962437855188827, "grad_norm": 3.831176280975342, "learning_rate": 0.0003640032858707558, "loss": 0.7953, "step": 7324 }, { "epoch": 0.4963115413607067, "grad_norm": 3.258108615875244, "learning_rate": 0.0003639978094194962, "loss": 0.9805, "step": 7325 }, { "epoch": 0.49637929720253066, "grad_norm": 4.314726829528809, "learning_rate": 0.0003639923329682366, "loss": 0.8834, "step": 7326 }, { "epoch": 0.49644705304435466, "grad_norm": 3.4996511936187744, "learning_rate": 0.00036398685651697703, "loss": 0.8478, "step": 7327 }, { "epoch": 0.49651480888617866, "grad_norm": 4.065398693084717, "learning_rate": 0.00036398138006571743, "loss": 0.7719, "step": 7328 }, { "epoch": 0.49658256472800266, "grad_norm": 3.087994337081909, "learning_rate": 0.00036397590361445783, "loss": 0.8997, "step": 7329 }, { "epoch": 0.4966503205698266, "grad_norm": 3.3154287338256836, "learning_rate": 0.00036397042716319823, "loss": 0.8203, "step": 7330 }, { "epoch": 0.4967180764116506, "grad_norm": 3.5430290699005127, "learning_rate": 0.0003639649507119387, "loss": 0.9387, "step": 7331 }, { "epoch": 0.4967858322534746, "grad_norm": 4.101946830749512, "learning_rate": 0.0003639594742606791, "loss": 1.0105, "step": 7332 }, { "epoch": 0.4968535880952986, "grad_norm": 2.884880542755127, "learning_rate": 0.00036395399780941954, "loss": 0.6436, "step": 7333 }, { "epoch": 0.49692134393712256, "grad_norm": 3.2377781867980957, "learning_rate": 0.00036394852135815994, "loss": 0.7383, "step": 7334 }, { "epoch": 0.49698909977894656, "grad_norm": 2.770453453063965, "learning_rate": 0.00036394304490690034, "loss": 0.6723, "step": 7335 }, { "epoch": 0.49705685562077057, "grad_norm": 2.929874897003174, "learning_rate": 0.00036393756845564074, "loss": 0.8881, "step": 7336 }, { "epoch": 0.49712461146259457, "grad_norm": 2.9658660888671875, "learning_rate": 0.0003639320920043812, "loss": 0.8111, "step": 7337 }, { "epoch": 0.4971923673044185, "grad_norm": 2.7275726795196533, "learning_rate": 0.0003639266155531216, "loss": 0.7272, "step": 7338 }, { "epoch": 0.4972601231462425, "grad_norm": 4.1924943923950195, "learning_rate": 0.00036392113910186204, "loss": 1.2095, "step": 7339 }, { "epoch": 0.4973278789880665, "grad_norm": 2.998042106628418, "learning_rate": 0.00036391566265060244, "loss": 0.7589, "step": 7340 }, { "epoch": 0.49739563482989047, "grad_norm": 4.445460319519043, "learning_rate": 0.00036391018619934284, "loss": 0.8799, "step": 7341 }, { "epoch": 0.49746339067171447, "grad_norm": 3.805979013442993, "learning_rate": 0.00036390470974808324, "loss": 1.0957, "step": 7342 }, { "epoch": 0.49753114651353847, "grad_norm": 3.867902994155884, "learning_rate": 0.0003638992332968237, "loss": 0.9994, "step": 7343 }, { "epoch": 0.49759890235536247, "grad_norm": 2.8922431468963623, "learning_rate": 0.0003638937568455641, "loss": 0.9314, "step": 7344 }, { "epoch": 0.4976666581971864, "grad_norm": 3.387134075164795, "learning_rate": 0.0003638882803943045, "loss": 0.9598, "step": 7345 }, { "epoch": 0.4977344140390104, "grad_norm": 4.9702534675598145, "learning_rate": 0.0003638828039430449, "loss": 1.153, "step": 7346 }, { "epoch": 0.4978021698808344, "grad_norm": 2.896029472351074, "learning_rate": 0.00036387732749178534, "loss": 0.7887, "step": 7347 }, { "epoch": 0.4978699257226584, "grad_norm": 3.407331705093384, "learning_rate": 0.00036387185104052574, "loss": 0.9314, "step": 7348 }, { "epoch": 0.49793768156448237, "grad_norm": 3.8971898555755615, "learning_rate": 0.0003638663745892662, "loss": 0.9183, "step": 7349 }, { "epoch": 0.4980054374063064, "grad_norm": 3.2036612033843994, "learning_rate": 0.0003638608981380066, "loss": 0.8062, "step": 7350 }, { "epoch": 0.4980731932481304, "grad_norm": 2.159801483154297, "learning_rate": 0.000363855421686747, "loss": 0.6307, "step": 7351 }, { "epoch": 0.4981409490899544, "grad_norm": 3.5817506313323975, "learning_rate": 0.0003638499452354874, "loss": 0.9433, "step": 7352 }, { "epoch": 0.4982087049317783, "grad_norm": 2.938889741897583, "learning_rate": 0.00036384446878422785, "loss": 0.9006, "step": 7353 }, { "epoch": 0.4982764607736023, "grad_norm": 3.109180212020874, "learning_rate": 0.0003638389923329683, "loss": 0.9472, "step": 7354 }, { "epoch": 0.4983442166154263, "grad_norm": 2.50216007232666, "learning_rate": 0.0003638335158817087, "loss": 0.6677, "step": 7355 }, { "epoch": 0.4984119724572503, "grad_norm": 4.055174827575684, "learning_rate": 0.0003638280394304491, "loss": 0.8789, "step": 7356 }, { "epoch": 0.4984797282990743, "grad_norm": 3.6125152111053467, "learning_rate": 0.0003638225629791895, "loss": 0.8447, "step": 7357 }, { "epoch": 0.4985474841408983, "grad_norm": 3.1893670558929443, "learning_rate": 0.0003638170865279299, "loss": 0.8582, "step": 7358 }, { "epoch": 0.4986152399827223, "grad_norm": 3.9131522178649902, "learning_rate": 0.00036381161007667035, "loss": 1.1259, "step": 7359 }, { "epoch": 0.4986829958245462, "grad_norm": 3.2104737758636475, "learning_rate": 0.00036380613362541075, "loss": 0.7097, "step": 7360 }, { "epoch": 0.49875075166637023, "grad_norm": 2.8251423835754395, "learning_rate": 0.00036380065717415115, "loss": 0.728, "step": 7361 }, { "epoch": 0.49881850750819423, "grad_norm": 4.3711957931518555, "learning_rate": 0.0003637951807228916, "loss": 1.0424, "step": 7362 }, { "epoch": 0.49888626335001823, "grad_norm": 2.8683319091796875, "learning_rate": 0.000363789704271632, "loss": 0.7675, "step": 7363 }, { "epoch": 0.4989540191918422, "grad_norm": 3.573287010192871, "learning_rate": 0.0003637842278203724, "loss": 0.9163, "step": 7364 }, { "epoch": 0.4990217750336662, "grad_norm": 5.193227291107178, "learning_rate": 0.00036377875136911286, "loss": 0.9935, "step": 7365 }, { "epoch": 0.4990895308754902, "grad_norm": 3.1652491092681885, "learning_rate": 0.00036377327491785325, "loss": 0.955, "step": 7366 }, { "epoch": 0.4991572867173142, "grad_norm": 3.6080520153045654, "learning_rate": 0.00036376779846659365, "loss": 0.8582, "step": 7367 }, { "epoch": 0.49922504255913813, "grad_norm": 3.823575258255005, "learning_rate": 0.00036376232201533405, "loss": 1.0244, "step": 7368 }, { "epoch": 0.49929279840096213, "grad_norm": 3.814385414123535, "learning_rate": 0.00036375684556407445, "loss": 0.9597, "step": 7369 }, { "epoch": 0.49936055424278614, "grad_norm": 3.384467124938965, "learning_rate": 0.0003637513691128149, "loss": 0.7457, "step": 7370 }, { "epoch": 0.4994283100846101, "grad_norm": 2.923259973526001, "learning_rate": 0.00036374589266155536, "loss": 0.9238, "step": 7371 }, { "epoch": 0.4994960659264341, "grad_norm": 3.350383996963501, "learning_rate": 0.00036374041621029576, "loss": 0.9378, "step": 7372 }, { "epoch": 0.4995638217682581, "grad_norm": 2.574327230453491, "learning_rate": 0.00036373493975903616, "loss": 0.6841, "step": 7373 }, { "epoch": 0.4996315776100821, "grad_norm": 3.1296095848083496, "learning_rate": 0.00036372946330777656, "loss": 0.7949, "step": 7374 }, { "epoch": 0.49969933345190604, "grad_norm": 3.1888763904571533, "learning_rate": 0.000363723986856517, "loss": 0.8442, "step": 7375 }, { "epoch": 0.49976708929373004, "grad_norm": 3.1941256523132324, "learning_rate": 0.0003637185104052574, "loss": 0.8005, "step": 7376 }, { "epoch": 0.49983484513555404, "grad_norm": 3.4514129161834717, "learning_rate": 0.0003637130339539978, "loss": 0.7951, "step": 7377 }, { "epoch": 0.49990260097737804, "grad_norm": 3.17055344581604, "learning_rate": 0.00036370755750273826, "loss": 0.7866, "step": 7378 }, { "epoch": 0.49990260097737804, "eval_loss": 0.8217099905014038, "eval_noise_accuracy": 0.0, "eval_runtime": 16233.3955, "eval_samples_per_second": 0.317, "eval_steps_per_second": 0.079, "eval_wer": 69.54266530915496, "step": 7378 } ], "logging_steps": 1, "max_steps": 73790, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 3689, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.2208458672635904e+20, "train_batch_size": 2, "trial_name": null, "trial_params": null }