{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 7212, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_loss": 2.829824447631836, "eval_runtime": 408.1353, "eval_samples_per_second": 100.489, "eval_steps_per_second": 1.571, "step": 0 }, { "epoch": 0.00027731558513588466, "grad_norm": 46.86890411376953, "learning_rate": 0.0, "loss": 2.4417, "step": 1 }, { "epoch": 0.0005546311702717693, "grad_norm": 69.00373077392578, "learning_rate": 2.5000000000000004e-07, "loss": 2.3396, "step": 2 }, { "epoch": 0.0008319467554076539, "grad_norm": 53.255943298339844, "learning_rate": 5.000000000000001e-07, "loss": 2.3484, "step": 3 }, { "epoch": 0.0011092623405435386, "grad_norm": 21.859601974487305, "learning_rate": 7.5e-07, "loss": 2.3949, "step": 4 }, { "epoch": 0.0013865779256794233, "grad_norm": 18.474227905273438, "learning_rate": 1.0000000000000002e-06, "loss": 2.3011, "step": 5 }, { "epoch": 0.0016638935108153079, "grad_norm": 18.92083740234375, "learning_rate": 1.25e-06, "loss": 2.1921, "step": 6 }, { "epoch": 0.0019412090959511925, "grad_norm": 17.222856521606445, "learning_rate": 1.5e-06, "loss": 2.2153, "step": 7 }, { "epoch": 0.0022185246810870773, "grad_norm": 31.592514038085938, "learning_rate": 1.7500000000000002e-06, "loss": 2.0413, "step": 8 }, { "epoch": 0.0024958402662229617, "grad_norm": 17.17295265197754, "learning_rate": 2.0000000000000003e-06, "loss": 2.0861, "step": 9 }, { "epoch": 0.0027731558513588465, "grad_norm": 17.179834365844727, "learning_rate": 2.25e-06, "loss": 2.0175, "step": 10 }, { "epoch": 0.003050471436494731, "grad_norm": 17.518646240234375, "learning_rate": 2.5e-06, "loss": 1.9135, "step": 11 }, { "epoch": 0.0033277870216306157, "grad_norm": 14.855606079101562, "learning_rate": 2.7500000000000004e-06, "loss": 1.7351, "step": 12 }, { "epoch": 0.0036051026067665, "grad_norm": 13.514114379882812, "learning_rate": 3e-06, "loss": 1.5465, "step": 13 }, { "epoch": 0.003882418191902385, "grad_norm": 12.117414474487305, "learning_rate": 3.2500000000000002e-06, "loss": 1.4642, "step": 14 }, { "epoch": 0.004159733777038269, "grad_norm": 10.935081481933594, "learning_rate": 3.5000000000000004e-06, "loss": 1.3851, "step": 15 }, { "epoch": 0.004437049362174155, "grad_norm": 7.275962829589844, "learning_rate": 3.75e-06, "loss": 1.1289, "step": 16 }, { "epoch": 0.004714364947310039, "grad_norm": 6.399021148681641, "learning_rate": 4.000000000000001e-06, "loss": 1.1277, "step": 17 }, { "epoch": 0.004991680532445923, "grad_norm": 6.132956027984619, "learning_rate": 4.250000000000001e-06, "loss": 1.1483, "step": 18 }, { "epoch": 0.005268996117581808, "grad_norm": 5.525564670562744, "learning_rate": 4.5e-06, "loss": 1.0578, "step": 19 }, { "epoch": 0.005546311702717693, "grad_norm": 5.441694259643555, "learning_rate": 4.75e-06, "loss": 1.0647, "step": 20 }, { "epoch": 0.005823627287853577, "grad_norm": 5.160792827606201, "learning_rate": 5e-06, "loss": 0.9961, "step": 21 }, { "epoch": 0.006100942872989462, "grad_norm": 5.569485664367676, "learning_rate": 5.25e-06, "loss": 1.0063, "step": 22 }, { "epoch": 0.006378258458125347, "grad_norm": 4.869104385375977, "learning_rate": 5.500000000000001e-06, "loss": 0.9848, "step": 23 }, { "epoch": 0.0066555740432612314, "grad_norm": 3.172858238220215, "learning_rate": 5.750000000000001e-06, "loss": 0.9317, "step": 24 }, { "epoch": 0.006932889628397116, "grad_norm": 2.5935134887695312, "learning_rate": 6e-06, "loss": 0.9086, "step": 25 }, { "epoch": 0.007210205213533, "grad_norm": 1.7386329174041748, "learning_rate": 6.25e-06, "loss": 0.8976, "step": 26 }, { "epoch": 0.0074875207986688855, "grad_norm": 1.3860479593276978, "learning_rate": 6.5000000000000004e-06, "loss": 0.8668, "step": 27 }, { "epoch": 0.00776483638380477, "grad_norm": 1.3284790515899658, "learning_rate": 6.750000000000001e-06, "loss": 0.8438, "step": 28 }, { "epoch": 0.008042151968940654, "grad_norm": 1.208060383796692, "learning_rate": 7.000000000000001e-06, "loss": 0.8601, "step": 29 }, { "epoch": 0.008319467554076539, "grad_norm": 0.99210125207901, "learning_rate": 7.25e-06, "loss": 0.836, "step": 30 }, { "epoch": 0.008596783139212423, "grad_norm": 0.7937288284301758, "learning_rate": 7.5e-06, "loss": 0.829, "step": 31 }, { "epoch": 0.00887409872434831, "grad_norm": 0.706200361251831, "learning_rate": 7.75e-06, "loss": 0.8006, "step": 32 }, { "epoch": 0.009151414309484194, "grad_norm": 0.9658659100532532, "learning_rate": 8.000000000000001e-06, "loss": 0.7989, "step": 33 }, { "epoch": 0.009428729894620078, "grad_norm": 1.174869418144226, "learning_rate": 8.25e-06, "loss": 0.8159, "step": 34 }, { "epoch": 0.009706045479755962, "grad_norm": 0.5839990973472595, "learning_rate": 8.500000000000002e-06, "loss": 0.7837, "step": 35 }, { "epoch": 0.009983361064891847, "grad_norm": 0.6130610704421997, "learning_rate": 8.75e-06, "loss": 0.7799, "step": 36 }, { "epoch": 0.010260676650027731, "grad_norm": 0.7108742594718933, "learning_rate": 9e-06, "loss": 0.7611, "step": 37 }, { "epoch": 0.010537992235163616, "grad_norm": 0.7302682995796204, "learning_rate": 9.25e-06, "loss": 0.7942, "step": 38 }, { "epoch": 0.010815307820299502, "grad_norm": 0.5843620896339417, "learning_rate": 9.5e-06, "loss": 0.765, "step": 39 }, { "epoch": 0.011092623405435386, "grad_norm": 0.41768163442611694, "learning_rate": 9.750000000000002e-06, "loss": 0.7112, "step": 40 }, { "epoch": 0.01136993899057127, "grad_norm": 0.5103988647460938, "learning_rate": 1e-05, "loss": 0.762, "step": 41 }, { "epoch": 0.011647254575707155, "grad_norm": 0.47128552198410034, "learning_rate": 1.025e-05, "loss": 0.7509, "step": 42 }, { "epoch": 0.01192457016084304, "grad_norm": 1.0201480388641357, "learning_rate": 1.05e-05, "loss": 0.7507, "step": 43 }, { "epoch": 0.012201885745978924, "grad_norm": 0.3908264935016632, "learning_rate": 1.075e-05, "loss": 0.7629, "step": 44 }, { "epoch": 0.012479201331114808, "grad_norm": 0.4154920279979706, "learning_rate": 1.1000000000000001e-05, "loss": 0.7531, "step": 45 }, { "epoch": 0.012756516916250694, "grad_norm": 0.4213290512561798, "learning_rate": 1.125e-05, "loss": 0.7472, "step": 46 }, { "epoch": 0.013033832501386578, "grad_norm": 0.6245641112327576, "learning_rate": 1.1500000000000002e-05, "loss": 0.7404, "step": 47 }, { "epoch": 0.013311148086522463, "grad_norm": 0.44496941566467285, "learning_rate": 1.175e-05, "loss": 0.7427, "step": 48 }, { "epoch": 0.013588463671658347, "grad_norm": 0.4155629575252533, "learning_rate": 1.2e-05, "loss": 0.7147, "step": 49 }, { "epoch": 0.013865779256794232, "grad_norm": 0.37920621037483215, "learning_rate": 1.225e-05, "loss": 0.7426, "step": 50 }, { "epoch": 0.014143094841930116, "grad_norm": 0.3893055319786072, "learning_rate": 1.25e-05, "loss": 0.7176, "step": 51 }, { "epoch": 0.014420410427066, "grad_norm": 0.3363882005214691, "learning_rate": 1.2750000000000002e-05, "loss": 0.7063, "step": 52 }, { "epoch": 0.014697726012201887, "grad_norm": 0.4444830119609833, "learning_rate": 1.3000000000000001e-05, "loss": 0.6945, "step": 53 }, { "epoch": 0.014975041597337771, "grad_norm": 0.3413512706756592, "learning_rate": 1.3250000000000002e-05, "loss": 0.7288, "step": 54 }, { "epoch": 0.015252357182473655, "grad_norm": 0.4114389717578888, "learning_rate": 1.3500000000000001e-05, "loss": 0.7575, "step": 55 }, { "epoch": 0.01552967276760954, "grad_norm": 0.36049914360046387, "learning_rate": 1.3750000000000002e-05, "loss": 0.7217, "step": 56 }, { "epoch": 0.015806988352745424, "grad_norm": 0.41267284750938416, "learning_rate": 1.4000000000000001e-05, "loss": 0.7166, "step": 57 }, { "epoch": 0.01608430393788131, "grad_norm": 0.4639422297477722, "learning_rate": 1.4249999999999999e-05, "loss": 0.7069, "step": 58 }, { "epoch": 0.016361619523017193, "grad_norm": 0.36772483587265015, "learning_rate": 1.45e-05, "loss": 0.7247, "step": 59 }, { "epoch": 0.016638935108153077, "grad_norm": 0.3546575903892517, "learning_rate": 1.475e-05, "loss": 0.7128, "step": 60 }, { "epoch": 0.01691625069328896, "grad_norm": 0.31919416785240173, "learning_rate": 1.5e-05, "loss": 0.7207, "step": 61 }, { "epoch": 0.017193566278424846, "grad_norm": 0.3498699367046356, "learning_rate": 1.525e-05, "loss": 0.7065, "step": 62 }, { "epoch": 0.01747088186356073, "grad_norm": 0.35648590326309204, "learning_rate": 1.55e-05, "loss": 0.7146, "step": 63 }, { "epoch": 0.01774819744869662, "grad_norm": 0.30697041749954224, "learning_rate": 1.575e-05, "loss": 0.6805, "step": 64 }, { "epoch": 0.018025513033832503, "grad_norm": 0.5759001970291138, "learning_rate": 1.6000000000000003e-05, "loss": 0.7267, "step": 65 }, { "epoch": 0.018302828618968387, "grad_norm": 0.263336718082428, "learning_rate": 1.6250000000000002e-05, "loss": 0.6936, "step": 66 }, { "epoch": 0.01858014420410427, "grad_norm": 0.2977915108203888, "learning_rate": 1.65e-05, "loss": 0.6685, "step": 67 }, { "epoch": 0.018857459789240156, "grad_norm": 0.3028334081172943, "learning_rate": 1.675e-05, "loss": 0.7109, "step": 68 }, { "epoch": 0.01913477537437604, "grad_norm": 0.3265489935874939, "learning_rate": 1.7000000000000003e-05, "loss": 0.7023, "step": 69 }, { "epoch": 0.019412090959511925, "grad_norm": 0.2899531126022339, "learning_rate": 1.725e-05, "loss": 0.6985, "step": 70 }, { "epoch": 0.01968940654464781, "grad_norm": 0.29272034764289856, "learning_rate": 1.75e-05, "loss": 0.7125, "step": 71 }, { "epoch": 0.019966722129783693, "grad_norm": 0.3114602863788605, "learning_rate": 1.775e-05, "loss": 0.6836, "step": 72 }, { "epoch": 0.020244037714919578, "grad_norm": 0.28768229484558105, "learning_rate": 1.8e-05, "loss": 0.6809, "step": 73 }, { "epoch": 0.020521353300055462, "grad_norm": 0.270345002412796, "learning_rate": 1.825e-05, "loss": 0.6776, "step": 74 }, { "epoch": 0.020798668885191347, "grad_norm": 0.2635841369628906, "learning_rate": 1.85e-05, "loss": 0.6645, "step": 75 }, { "epoch": 0.02107598447032723, "grad_norm": 0.3204723000526428, "learning_rate": 1.8750000000000002e-05, "loss": 0.6631, "step": 76 }, { "epoch": 0.021353300055463115, "grad_norm": 0.27179455757141113, "learning_rate": 1.9e-05, "loss": 0.693, "step": 77 }, { "epoch": 0.021630615640599003, "grad_norm": 0.26558464765548706, "learning_rate": 1.925e-05, "loss": 0.6715, "step": 78 }, { "epoch": 0.021907931225734888, "grad_norm": 0.3682558834552765, "learning_rate": 1.9500000000000003e-05, "loss": 0.6826, "step": 79 }, { "epoch": 0.022185246810870772, "grad_norm": 0.281429648399353, "learning_rate": 1.9750000000000002e-05, "loss": 0.6599, "step": 80 }, { "epoch": 0.022462562396006656, "grad_norm": 0.28537672758102417, "learning_rate": 2e-05, "loss": 0.6695, "step": 81 }, { "epoch": 0.02273987798114254, "grad_norm": 0.274913489818573, "learning_rate": 2.025e-05, "loss": 0.678, "step": 82 }, { "epoch": 0.023017193566278425, "grad_norm": 0.28847208619117737, "learning_rate": 2.05e-05, "loss": 0.669, "step": 83 }, { "epoch": 0.02329450915141431, "grad_norm": 0.30678853392601013, "learning_rate": 2.075e-05, "loss": 0.6647, "step": 84 }, { "epoch": 0.023571824736550194, "grad_norm": 0.28266021609306335, "learning_rate": 2.1e-05, "loss": 0.671, "step": 85 }, { "epoch": 0.02384914032168608, "grad_norm": 0.2712315320968628, "learning_rate": 2.125e-05, "loss": 0.6579, "step": 86 }, { "epoch": 0.024126455906821963, "grad_norm": 0.30666086077690125, "learning_rate": 2.15e-05, "loss": 0.6598, "step": 87 }, { "epoch": 0.024403771491957847, "grad_norm": 0.257932186126709, "learning_rate": 2.175e-05, "loss": 0.6348, "step": 88 }, { "epoch": 0.02468108707709373, "grad_norm": 0.3133629560470581, "learning_rate": 2.2000000000000003e-05, "loss": 0.6611, "step": 89 }, { "epoch": 0.024958402662229616, "grad_norm": 0.27258774638175964, "learning_rate": 2.2250000000000002e-05, "loss": 0.6736, "step": 90 }, { "epoch": 0.0252357182473655, "grad_norm": 0.3201597034931183, "learning_rate": 2.25e-05, "loss": 0.6918, "step": 91 }, { "epoch": 0.025513033832501388, "grad_norm": 0.26909735798835754, "learning_rate": 2.275e-05, "loss": 0.6738, "step": 92 }, { "epoch": 0.025790349417637273, "grad_norm": 0.40945449471473694, "learning_rate": 2.3000000000000003e-05, "loss": 0.6615, "step": 93 }, { "epoch": 0.026067665002773157, "grad_norm": 0.3059796392917633, "learning_rate": 2.3250000000000003e-05, "loss": 0.6677, "step": 94 }, { "epoch": 0.02634498058790904, "grad_norm": 0.2737233638763428, "learning_rate": 2.35e-05, "loss": 0.6436, "step": 95 }, { "epoch": 0.026622296173044926, "grad_norm": 0.3231774866580963, "learning_rate": 2.375e-05, "loss": 0.6735, "step": 96 }, { "epoch": 0.02689961175818081, "grad_norm": 0.28404903411865234, "learning_rate": 2.4e-05, "loss": 0.6688, "step": 97 }, { "epoch": 0.027176927343316695, "grad_norm": 0.3392276465892792, "learning_rate": 2.425e-05, "loss": 0.684, "step": 98 }, { "epoch": 0.02745424292845258, "grad_norm": 0.3021562099456787, "learning_rate": 2.45e-05, "loss": 0.6886, "step": 99 }, { "epoch": 0.027731558513588463, "grad_norm": 0.2843964099884033, "learning_rate": 2.4750000000000002e-05, "loss": 0.6358, "step": 100 }, { "epoch": 0.028008874098724348, "grad_norm": 0.3120342195034027, "learning_rate": 2.5e-05, "loss": 0.669, "step": 101 }, { "epoch": 0.028286189683860232, "grad_norm": 0.2752966284751892, "learning_rate": 2.499999878045941e-05, "loss": 0.6231, "step": 102 }, { "epoch": 0.028563505268996116, "grad_norm": 0.28501492738723755, "learning_rate": 2.4999995121837877e-05, "loss": 0.6205, "step": 103 }, { "epoch": 0.028840820854132, "grad_norm": 0.2797880172729492, "learning_rate": 2.4999989024136113e-05, "loss": 0.6697, "step": 104 }, { "epoch": 0.029118136439267885, "grad_norm": 0.2929855287075043, "learning_rate": 2.4999980487355314e-05, "loss": 0.642, "step": 105 }, { "epoch": 0.029395452024403773, "grad_norm": 0.264911025762558, "learning_rate": 2.4999969511497135e-05, "loss": 0.6575, "step": 106 }, { "epoch": 0.029672767609539658, "grad_norm": 0.2797522246837616, "learning_rate": 2.4999956096563725e-05, "loss": 0.6566, "step": 107 }, { "epoch": 0.029950083194675542, "grad_norm": 0.24777652323246002, "learning_rate": 2.49999402425577e-05, "loss": 0.6288, "step": 108 }, { "epoch": 0.030227398779811426, "grad_norm": 0.34936287999153137, "learning_rate": 2.4999921949482157e-05, "loss": 0.6506, "step": 109 }, { "epoch": 0.03050471436494731, "grad_norm": 0.30599579215049744, "learning_rate": 2.499990121734066e-05, "loss": 0.6554, "step": 110 }, { "epoch": 0.030782029950083195, "grad_norm": 0.29751792550086975, "learning_rate": 2.499987804613726e-05, "loss": 0.6698, "step": 111 }, { "epoch": 0.03105934553521908, "grad_norm": 0.2642778754234314, "learning_rate": 2.4999852435876473e-05, "loss": 0.6337, "step": 112 }, { "epoch": 0.031336661120354964, "grad_norm": 0.2584931552410126, "learning_rate": 2.49998243865633e-05, "loss": 0.661, "step": 113 }, { "epoch": 0.03161397670549085, "grad_norm": 0.266797810792923, "learning_rate": 2.4999793898203212e-05, "loss": 0.6368, "step": 114 }, { "epoch": 0.03189129229062673, "grad_norm": 0.35552042722702026, "learning_rate": 2.4999760970802155e-05, "loss": 0.6364, "step": 115 }, { "epoch": 0.03216860787576262, "grad_norm": 0.28450194001197815, "learning_rate": 2.4999725604366562e-05, "loss": 0.661, "step": 116 }, { "epoch": 0.0324459234608985, "grad_norm": 0.3352636694908142, "learning_rate": 2.4999687798903327e-05, "loss": 0.6439, "step": 117 }, { "epoch": 0.032723239046034386, "grad_norm": 0.2475953847169876, "learning_rate": 2.499964755441983e-05, "loss": 0.6344, "step": 118 }, { "epoch": 0.03300055463117027, "grad_norm": 0.30431386828422546, "learning_rate": 2.4999604870923926e-05, "loss": 0.6459, "step": 119 }, { "epoch": 0.033277870216306155, "grad_norm": 0.2653152644634247, "learning_rate": 2.499955974842394e-05, "loss": 0.6425, "step": 120 }, { "epoch": 0.03355518580144204, "grad_norm": 0.29490575194358826, "learning_rate": 2.4999512186928675e-05, "loss": 0.6427, "step": 121 }, { "epoch": 0.03383250138657792, "grad_norm": 0.2630308270454407, "learning_rate": 2.4999462186447415e-05, "loss": 0.6597, "step": 122 }, { "epoch": 0.03410981697171381, "grad_norm": 0.26787513494491577, "learning_rate": 2.4999409746989914e-05, "loss": 0.6622, "step": 123 }, { "epoch": 0.03438713255684969, "grad_norm": 0.25667890906333923, "learning_rate": 2.499935486856641e-05, "loss": 0.6335, "step": 124 }, { "epoch": 0.03466444814198558, "grad_norm": 0.26751402020454407, "learning_rate": 2.4999297551187603e-05, "loss": 0.6358, "step": 125 }, { "epoch": 0.03494176372712146, "grad_norm": 0.2815951108932495, "learning_rate": 2.4999237794864683e-05, "loss": 0.6615, "step": 126 }, { "epoch": 0.03521907931225735, "grad_norm": 0.2573346793651581, "learning_rate": 2.499917559960931e-05, "loss": 0.6463, "step": 127 }, { "epoch": 0.03549639489739324, "grad_norm": 0.26202693581581116, "learning_rate": 2.4999110965433615e-05, "loss": 0.6436, "step": 128 }, { "epoch": 0.03577371048252912, "grad_norm": 0.267046719789505, "learning_rate": 2.4999043892350213e-05, "loss": 0.6433, "step": 129 }, { "epoch": 0.036051026067665005, "grad_norm": 0.2713761329650879, "learning_rate": 2.499897438037219e-05, "loss": 0.6314, "step": 130 }, { "epoch": 0.03632834165280089, "grad_norm": 0.2704955041408539, "learning_rate": 2.4998902429513115e-05, "loss": 0.6471, "step": 131 }, { "epoch": 0.036605657237936774, "grad_norm": 0.25811654329299927, "learning_rate": 2.4998828039787027e-05, "loss": 0.6346, "step": 132 }, { "epoch": 0.03688297282307266, "grad_norm": 0.2768125832080841, "learning_rate": 2.4998751211208432e-05, "loss": 0.6327, "step": 133 }, { "epoch": 0.03716028840820854, "grad_norm": 0.29026105999946594, "learning_rate": 2.499867194379233e-05, "loss": 0.6632, "step": 134 }, { "epoch": 0.03743760399334443, "grad_norm": 0.26648250222206116, "learning_rate": 2.4998590237554182e-05, "loss": 0.6414, "step": 135 }, { "epoch": 0.03771491957848031, "grad_norm": 0.2578074336051941, "learning_rate": 2.4998506092509938e-05, "loss": 0.6459, "step": 136 }, { "epoch": 0.037992235163616196, "grad_norm": 0.2555679678916931, "learning_rate": 2.4998419508676014e-05, "loss": 0.6561, "step": 137 }, { "epoch": 0.03826955074875208, "grad_norm": 0.25471994280815125, "learning_rate": 2.4998330486069304e-05, "loss": 0.6616, "step": 138 }, { "epoch": 0.038546866333887965, "grad_norm": 0.2434554398059845, "learning_rate": 2.4998239024707183e-05, "loss": 0.6423, "step": 139 }, { "epoch": 0.03882418191902385, "grad_norm": 0.23697395622730255, "learning_rate": 2.4998145124607485e-05, "loss": 0.629, "step": 140 }, { "epoch": 0.039101497504159734, "grad_norm": 0.2652537226676941, "learning_rate": 2.4998048785788547e-05, "loss": 0.6558, "step": 141 }, { "epoch": 0.03937881308929562, "grad_norm": 0.2602185606956482, "learning_rate": 2.499795000826916e-05, "loss": 0.6427, "step": 142 }, { "epoch": 0.0396561286744315, "grad_norm": 0.23875969648361206, "learning_rate": 2.49978487920686e-05, "loss": 0.6097, "step": 143 }, { "epoch": 0.03993344425956739, "grad_norm": 0.2549594044685364, "learning_rate": 2.4997745137206618e-05, "loss": 0.6477, "step": 144 }, { "epoch": 0.04021075984470327, "grad_norm": 0.2528778910636902, "learning_rate": 2.4997639043703437e-05, "loss": 0.6028, "step": 145 }, { "epoch": 0.040488075429839156, "grad_norm": 0.252888023853302, "learning_rate": 2.499753051157976e-05, "loss": 0.6547, "step": 146 }, { "epoch": 0.04076539101497504, "grad_norm": 0.289661705493927, "learning_rate": 2.4997419540856762e-05, "loss": 0.6604, "step": 147 }, { "epoch": 0.041042706600110924, "grad_norm": 0.27772676944732666, "learning_rate": 2.49973061315561e-05, "loss": 0.6321, "step": 148 }, { "epoch": 0.04132002218524681, "grad_norm": 0.29435357451438904, "learning_rate": 2.4997190283699904e-05, "loss": 0.6539, "step": 149 }, { "epoch": 0.04159733777038269, "grad_norm": 0.2796315848827362, "learning_rate": 2.4997071997310774e-05, "loss": 0.6816, "step": 150 }, { "epoch": 0.04187465335551858, "grad_norm": 0.2854909598827362, "learning_rate": 2.4996951272411794e-05, "loss": 0.621, "step": 151 }, { "epoch": 0.04215196894065446, "grad_norm": 0.3513517677783966, "learning_rate": 2.499682810902652e-05, "loss": 0.6462, "step": 152 }, { "epoch": 0.042429284525790346, "grad_norm": 0.2492416799068451, "learning_rate": 2.4996702507178988e-05, "loss": 0.6455, "step": 153 }, { "epoch": 0.04270660011092623, "grad_norm": 0.2352532148361206, "learning_rate": 2.49965744668937e-05, "loss": 0.6168, "step": 154 }, { "epoch": 0.04298391569606212, "grad_norm": 0.25850751996040344, "learning_rate": 2.4996443988195644e-05, "loss": 0.6452, "step": 155 }, { "epoch": 0.04326123128119801, "grad_norm": 0.23972827196121216, "learning_rate": 2.499631107111028e-05, "loss": 0.6447, "step": 156 }, { "epoch": 0.04353854686633389, "grad_norm": 0.24847468733787537, "learning_rate": 2.499617571566354e-05, "loss": 0.6386, "step": 157 }, { "epoch": 0.043815862451469775, "grad_norm": 0.25739696621894836, "learning_rate": 2.4996037921881837e-05, "loss": 0.6417, "step": 158 }, { "epoch": 0.04409317803660566, "grad_norm": 0.23640736937522888, "learning_rate": 2.4995897689792062e-05, "loss": 0.6451, "step": 159 }, { "epoch": 0.044370493621741544, "grad_norm": 0.25362861156463623, "learning_rate": 2.4995755019421577e-05, "loss": 0.6525, "step": 160 }, { "epoch": 0.04464780920687743, "grad_norm": 0.2607216536998749, "learning_rate": 2.4995609910798214e-05, "loss": 0.6276, "step": 161 }, { "epoch": 0.04492512479201331, "grad_norm": 0.2426438182592392, "learning_rate": 2.4995462363950295e-05, "loss": 0.6375, "step": 162 }, { "epoch": 0.0452024403771492, "grad_norm": 0.35536178946495056, "learning_rate": 2.499531237890661e-05, "loss": 0.6502, "step": 163 }, { "epoch": 0.04547975596228508, "grad_norm": 0.2616370618343353, "learning_rate": 2.4995159955696417e-05, "loss": 0.6422, "step": 164 }, { "epoch": 0.045757071547420966, "grad_norm": 0.2493521124124527, "learning_rate": 2.4995005094349473e-05, "loss": 0.6314, "step": 165 }, { "epoch": 0.04603438713255685, "grad_norm": 0.25228554010391235, "learning_rate": 2.4994847794895977e-05, "loss": 0.6154, "step": 166 }, { "epoch": 0.046311702717692735, "grad_norm": 0.24656261503696442, "learning_rate": 2.4994688057366635e-05, "loss": 0.6241, "step": 167 }, { "epoch": 0.04658901830282862, "grad_norm": 0.27083900570869446, "learning_rate": 2.4994525881792612e-05, "loss": 0.627, "step": 168 }, { "epoch": 0.046866333887964504, "grad_norm": 0.24816080927848816, "learning_rate": 2.499436126820555e-05, "loss": 0.6237, "step": 169 }, { "epoch": 0.04714364947310039, "grad_norm": 0.2589535415172577, "learning_rate": 2.499419421663758e-05, "loss": 0.6139, "step": 170 }, { "epoch": 0.04742096505823627, "grad_norm": 0.2533140182495117, "learning_rate": 2.499402472712129e-05, "loss": 0.6533, "step": 171 }, { "epoch": 0.04769828064337216, "grad_norm": 0.23976951837539673, "learning_rate": 2.499385279968975e-05, "loss": 0.645, "step": 172 }, { "epoch": 0.04797559622850804, "grad_norm": 0.24593569338321686, "learning_rate": 2.4993678434376507e-05, "loss": 0.5958, "step": 173 }, { "epoch": 0.048252911813643926, "grad_norm": 0.283794641494751, "learning_rate": 2.4993501631215593e-05, "loss": 0.6565, "step": 174 }, { "epoch": 0.04853022739877981, "grad_norm": 0.23555535078048706, "learning_rate": 2.4993322390241496e-05, "loss": 0.6077, "step": 175 }, { "epoch": 0.048807542983915694, "grad_norm": 0.24843829870224, "learning_rate": 2.4993140711489203e-05, "loss": 0.6001, "step": 176 }, { "epoch": 0.04908485856905158, "grad_norm": 0.24111098051071167, "learning_rate": 2.4992956594994156e-05, "loss": 0.6445, "step": 177 }, { "epoch": 0.04936217415418746, "grad_norm": 0.2340569943189621, "learning_rate": 2.499277004079228e-05, "loss": 0.6174, "step": 178 }, { "epoch": 0.04963948973932335, "grad_norm": 0.26766470074653625, "learning_rate": 2.499258104891998e-05, "loss": 0.6167, "step": 179 }, { "epoch": 0.04991680532445923, "grad_norm": 0.2567934989929199, "learning_rate": 2.499238961941413e-05, "loss": 0.6394, "step": 180 }, { "epoch": 0.050194120909595116, "grad_norm": 0.27323460578918457, "learning_rate": 2.4992195752312093e-05, "loss": 0.6337, "step": 181 }, { "epoch": 0.050471436494731, "grad_norm": 0.3052992522716522, "learning_rate": 2.4991999447651686e-05, "loss": 0.6389, "step": 182 }, { "epoch": 0.050748752079866885, "grad_norm": 0.24447570741176605, "learning_rate": 2.4991800705471218e-05, "loss": 0.6165, "step": 183 }, { "epoch": 0.051026067665002776, "grad_norm": 0.319871187210083, "learning_rate": 2.499159952580947e-05, "loss": 0.5816, "step": 184 }, { "epoch": 0.05130338325013866, "grad_norm": 0.2554628551006317, "learning_rate": 2.4991395908705693e-05, "loss": 0.646, "step": 185 }, { "epoch": 0.051580698835274545, "grad_norm": 0.2671261727809906, "learning_rate": 2.499118985419962e-05, "loss": 0.6592, "step": 186 }, { "epoch": 0.05185801442041043, "grad_norm": 0.2524307370185852, "learning_rate": 2.4990981362331462e-05, "loss": 0.6178, "step": 187 }, { "epoch": 0.052135330005546314, "grad_norm": 0.2809012532234192, "learning_rate": 2.4990770433141898e-05, "loss": 0.6234, "step": 188 }, { "epoch": 0.0524126455906822, "grad_norm": 0.23483595252037048, "learning_rate": 2.499055706667208e-05, "loss": 0.6209, "step": 189 }, { "epoch": 0.05268996117581808, "grad_norm": 0.2775763273239136, "learning_rate": 2.4990341262963654e-05, "loss": 0.618, "step": 190 }, { "epoch": 0.05296727676095397, "grad_norm": 0.27081090211868286, "learning_rate": 2.499012302205872e-05, "loss": 0.6377, "step": 191 }, { "epoch": 0.05324459234608985, "grad_norm": 0.24989834427833557, "learning_rate": 2.4989902343999865e-05, "loss": 0.6179, "step": 192 }, { "epoch": 0.053521907931225736, "grad_norm": 0.27669793367385864, "learning_rate": 2.498967922883015e-05, "loss": 0.6298, "step": 193 }, { "epoch": 0.05379922351636162, "grad_norm": 0.2672564387321472, "learning_rate": 2.4989453676593106e-05, "loss": 0.6536, "step": 194 }, { "epoch": 0.054076539101497505, "grad_norm": 0.24099047482013702, "learning_rate": 2.4989225687332752e-05, "loss": 0.61, "step": 195 }, { "epoch": 0.05435385468663339, "grad_norm": 0.235582634806633, "learning_rate": 2.4988995261093566e-05, "loss": 0.654, "step": 196 }, { "epoch": 0.054631170271769273, "grad_norm": 0.2795652747154236, "learning_rate": 2.4988762397920517e-05, "loss": 0.6224, "step": 197 }, { "epoch": 0.05490848585690516, "grad_norm": 0.22800379991531372, "learning_rate": 2.4988527097859045e-05, "loss": 0.6186, "step": 198 }, { "epoch": 0.05518580144204104, "grad_norm": 0.24810528755187988, "learning_rate": 2.4988289360955053e-05, "loss": 0.6286, "step": 199 }, { "epoch": 0.05546311702717693, "grad_norm": 0.21688294410705566, "learning_rate": 2.4988049187254935e-05, "loss": 0.598, "step": 200 }, { "epoch": 0.05574043261231281, "grad_norm": 0.23709554970264435, "learning_rate": 2.4987806576805562e-05, "loss": 0.6598, "step": 201 }, { "epoch": 0.056017748197448695, "grad_norm": 0.24982847273349762, "learning_rate": 2.4987561529654263e-05, "loss": 0.6342, "step": 202 }, { "epoch": 0.05629506378258458, "grad_norm": 0.2215258628129959, "learning_rate": 2.498731404584886e-05, "loss": 0.6159, "step": 203 }, { "epoch": 0.056572379367720464, "grad_norm": 0.25177863240242004, "learning_rate": 2.4987064125437643e-05, "loss": 0.6289, "step": 204 }, { "epoch": 0.05684969495285635, "grad_norm": 0.24716275930404663, "learning_rate": 2.498681176846937e-05, "loss": 0.628, "step": 205 }, { "epoch": 0.05712701053799223, "grad_norm": 0.2888506054878235, "learning_rate": 2.49865569749933e-05, "loss": 0.6216, "step": 206 }, { "epoch": 0.05740432612312812, "grad_norm": 0.24658267199993134, "learning_rate": 2.4986299745059127e-05, "loss": 0.6132, "step": 207 }, { "epoch": 0.057681641708264, "grad_norm": 0.24297240376472473, "learning_rate": 2.4986040078717063e-05, "loss": 0.6201, "step": 208 }, { "epoch": 0.057958957293399886, "grad_norm": 0.2425074279308319, "learning_rate": 2.4985777976017767e-05, "loss": 0.5997, "step": 209 }, { "epoch": 0.05823627287853577, "grad_norm": 0.25336262583732605, "learning_rate": 2.498551343701238e-05, "loss": 0.6489, "step": 210 }, { "epoch": 0.058513588463671655, "grad_norm": 0.23498830199241638, "learning_rate": 2.498524646175253e-05, "loss": 0.6341, "step": 211 }, { "epoch": 0.058790904048807546, "grad_norm": 0.24754488468170166, "learning_rate": 2.49849770502903e-05, "loss": 0.6538, "step": 212 }, { "epoch": 0.05906821963394343, "grad_norm": 0.24818097054958344, "learning_rate": 2.4984705202678266e-05, "loss": 0.6098, "step": 213 }, { "epoch": 0.059345535219079315, "grad_norm": 0.22981123626232147, "learning_rate": 2.498443091896947e-05, "loss": 0.6072, "step": 214 }, { "epoch": 0.0596228508042152, "grad_norm": 0.2612292766571045, "learning_rate": 2.4984154199217434e-05, "loss": 0.626, "step": 215 }, { "epoch": 0.059900166389351084, "grad_norm": 0.26491644978523254, "learning_rate": 2.4983875043476153e-05, "loss": 0.6495, "step": 216 }, { "epoch": 0.06017748197448697, "grad_norm": 0.22399267554283142, "learning_rate": 2.4983593451800096e-05, "loss": 0.6341, "step": 217 }, { "epoch": 0.06045479755962285, "grad_norm": 0.2318061739206314, "learning_rate": 2.498330942424421e-05, "loss": 0.5787, "step": 218 }, { "epoch": 0.06073211314475874, "grad_norm": 0.2700578272342682, "learning_rate": 2.498302296086392e-05, "loss": 0.6314, "step": 219 }, { "epoch": 0.06100942872989462, "grad_norm": 0.22675910592079163, "learning_rate": 2.4982734061715112e-05, "loss": 0.5714, "step": 220 }, { "epoch": 0.061286744315030506, "grad_norm": 0.2522087097167969, "learning_rate": 2.4982442726854173e-05, "loss": 0.6053, "step": 221 }, { "epoch": 0.06156405990016639, "grad_norm": 0.22665978968143463, "learning_rate": 2.4982148956337935e-05, "loss": 0.6156, "step": 222 }, { "epoch": 0.061841375485302275, "grad_norm": 0.24832209944725037, "learning_rate": 2.4981852750223726e-05, "loss": 0.6406, "step": 223 }, { "epoch": 0.06211869107043816, "grad_norm": 0.2526067793369293, "learning_rate": 2.498155410856935e-05, "loss": 0.6312, "step": 224 }, { "epoch": 0.06239600665557404, "grad_norm": 0.24257095158100128, "learning_rate": 2.4981253031433076e-05, "loss": 0.6456, "step": 225 }, { "epoch": 0.06267332224070993, "grad_norm": 0.25497207045555115, "learning_rate": 2.4980949518873648e-05, "loss": 0.6047, "step": 226 }, { "epoch": 0.06295063782584581, "grad_norm": 0.23874424397945404, "learning_rate": 2.498064357095029e-05, "loss": 0.614, "step": 227 }, { "epoch": 0.0632279534109817, "grad_norm": 0.24398040771484375, "learning_rate": 2.498033518772271e-05, "loss": 0.6212, "step": 228 }, { "epoch": 0.06350526899611758, "grad_norm": 0.27126333117485046, "learning_rate": 2.498002436925107e-05, "loss": 0.6051, "step": 229 }, { "epoch": 0.06378258458125347, "grad_norm": 0.22852414846420288, "learning_rate": 2.497971111559602e-05, "loss": 0.6133, "step": 230 }, { "epoch": 0.06405990016638935, "grad_norm": 0.22752775251865387, "learning_rate": 2.4979395426818696e-05, "loss": 0.5893, "step": 231 }, { "epoch": 0.06433721575152523, "grad_norm": 0.27361559867858887, "learning_rate": 2.4979077302980683e-05, "loss": 0.6431, "step": 232 }, { "epoch": 0.06461453133666112, "grad_norm": 0.3071225881576538, "learning_rate": 2.497875674414406e-05, "loss": 0.617, "step": 233 }, { "epoch": 0.064891846921797, "grad_norm": 0.3025614023208618, "learning_rate": 2.4978433750371382e-05, "loss": 0.6294, "step": 234 }, { "epoch": 0.06516916250693289, "grad_norm": 0.21824614703655243, "learning_rate": 2.4978108321725667e-05, "loss": 0.6189, "step": 235 }, { "epoch": 0.06544647809206877, "grad_norm": 0.23781217634677887, "learning_rate": 2.497778045827042e-05, "loss": 0.6115, "step": 236 }, { "epoch": 0.06572379367720466, "grad_norm": 0.3532952666282654, "learning_rate": 2.497745016006961e-05, "loss": 0.6091, "step": 237 }, { "epoch": 0.06600110926234054, "grad_norm": 0.26994985342025757, "learning_rate": 2.4977117427187692e-05, "loss": 0.6078, "step": 238 }, { "epoch": 0.06627842484747642, "grad_norm": 0.2395590841770172, "learning_rate": 2.4976782259689587e-05, "loss": 0.6437, "step": 239 }, { "epoch": 0.06655574043261231, "grad_norm": 0.2347521334886551, "learning_rate": 2.49764446576407e-05, "loss": 0.6015, "step": 240 }, { "epoch": 0.0668330560177482, "grad_norm": 0.2603704333305359, "learning_rate": 2.49761046211069e-05, "loss": 0.6441, "step": 241 }, { "epoch": 0.06711037160288408, "grad_norm": 0.21548427641391754, "learning_rate": 2.4975762150154542e-05, "loss": 0.6059, "step": 242 }, { "epoch": 0.06738768718801996, "grad_norm": 0.2270384132862091, "learning_rate": 2.497541724485045e-05, "loss": 0.6148, "step": 243 }, { "epoch": 0.06766500277315585, "grad_norm": 0.23716577887535095, "learning_rate": 2.497506990526192e-05, "loss": 0.6135, "step": 244 }, { "epoch": 0.06794231835829173, "grad_norm": 0.24414962530136108, "learning_rate": 2.4974720131456736e-05, "loss": 0.6363, "step": 245 }, { "epoch": 0.06821963394342762, "grad_norm": 0.24200287461280823, "learning_rate": 2.497436792350314e-05, "loss": 0.6278, "step": 246 }, { "epoch": 0.0684969495285635, "grad_norm": 0.25273025035858154, "learning_rate": 2.497401328146986e-05, "loss": 0.6012, "step": 247 }, { "epoch": 0.06877426511369938, "grad_norm": 0.2444678694009781, "learning_rate": 2.4973656205426094e-05, "loss": 0.6218, "step": 248 }, { "epoch": 0.06905158069883527, "grad_norm": 0.25649040937423706, "learning_rate": 2.4973296695441523e-05, "loss": 0.6678, "step": 249 }, { "epoch": 0.06932889628397115, "grad_norm": 0.2729082703590393, "learning_rate": 2.4972934751586292e-05, "loss": 0.6018, "step": 250 }, { "epoch": 0.06960621186910704, "grad_norm": 0.23950397968292236, "learning_rate": 2.4972570373931026e-05, "loss": 0.6342, "step": 251 }, { "epoch": 0.06988352745424292, "grad_norm": 0.27791211009025574, "learning_rate": 2.4972203562546825e-05, "loss": 0.5948, "step": 252 }, { "epoch": 0.0701608430393788, "grad_norm": 0.24157491326332092, "learning_rate": 2.4971834317505266e-05, "loss": 0.6346, "step": 253 }, { "epoch": 0.0704381586245147, "grad_norm": 0.23233160376548767, "learning_rate": 2.4971462638878394e-05, "loss": 0.6023, "step": 254 }, { "epoch": 0.07071547420965059, "grad_norm": 0.22705158591270447, "learning_rate": 2.4971088526738737e-05, "loss": 0.6314, "step": 255 }, { "epoch": 0.07099278979478647, "grad_norm": 0.2364932894706726, "learning_rate": 2.4970711981159294e-05, "loss": 0.6239, "step": 256 }, { "epoch": 0.07127010537992236, "grad_norm": 0.2369173765182495, "learning_rate": 2.4970333002213535e-05, "loss": 0.6056, "step": 257 }, { "epoch": 0.07154742096505824, "grad_norm": 0.23944415152072906, "learning_rate": 2.4969951589975415e-05, "loss": 0.6188, "step": 258 }, { "epoch": 0.07182473655019413, "grad_norm": 0.24431641399860382, "learning_rate": 2.4969567744519357e-05, "loss": 0.6393, "step": 259 }, { "epoch": 0.07210205213533001, "grad_norm": 0.23058977723121643, "learning_rate": 2.4969181465920254e-05, "loss": 0.623, "step": 260 }, { "epoch": 0.0723793677204659, "grad_norm": 0.2396584004163742, "learning_rate": 2.4968792754253483e-05, "loss": 0.6085, "step": 261 }, { "epoch": 0.07265668330560178, "grad_norm": 0.28117048740386963, "learning_rate": 2.496840160959489e-05, "loss": 0.612, "step": 262 }, { "epoch": 0.07293399889073766, "grad_norm": 0.22773273289203644, "learning_rate": 2.49680080320208e-05, "loss": 0.6215, "step": 263 }, { "epoch": 0.07321131447587355, "grad_norm": 0.25395745038986206, "learning_rate": 2.496761202160801e-05, "loss": 0.6197, "step": 264 }, { "epoch": 0.07348863006100943, "grad_norm": 0.2748405337333679, "learning_rate": 2.496721357843379e-05, "loss": 0.5983, "step": 265 }, { "epoch": 0.07376594564614532, "grad_norm": 0.37056779861450195, "learning_rate": 2.496681270257589e-05, "loss": 0.5974, "step": 266 }, { "epoch": 0.0740432612312812, "grad_norm": 0.2506537139415741, "learning_rate": 2.4966409394112528e-05, "loss": 0.6279, "step": 267 }, { "epoch": 0.07432057681641709, "grad_norm": 0.3248004615306854, "learning_rate": 2.4966003653122406e-05, "loss": 0.5968, "step": 268 }, { "epoch": 0.07459789240155297, "grad_norm": 0.27633965015411377, "learning_rate": 2.4965595479684685e-05, "loss": 0.6207, "step": 269 }, { "epoch": 0.07487520798668885, "grad_norm": 0.2533873915672302, "learning_rate": 2.4965184873879015e-05, "loss": 0.6428, "step": 270 }, { "epoch": 0.07515252357182474, "grad_norm": 0.24860017001628876, "learning_rate": 2.496477183578552e-05, "loss": 0.5717, "step": 271 }, { "epoch": 0.07542983915696062, "grad_norm": 0.2541423738002777, "learning_rate": 2.4964356365484797e-05, "loss": 0.6331, "step": 272 }, { "epoch": 0.07570715474209651, "grad_norm": 0.21915870904922485, "learning_rate": 2.4963938463057907e-05, "loss": 0.6167, "step": 273 }, { "epoch": 0.07598447032723239, "grad_norm": 0.21635891497135162, "learning_rate": 2.4963518128586393e-05, "loss": 0.602, "step": 274 }, { "epoch": 0.07626178591236828, "grad_norm": 0.23918819427490234, "learning_rate": 2.4963095362152282e-05, "loss": 0.5869, "step": 275 }, { "epoch": 0.07653910149750416, "grad_norm": 0.22294320166110992, "learning_rate": 2.496267016383806e-05, "loss": 0.6256, "step": 276 }, { "epoch": 0.07681641708264005, "grad_norm": 0.2167348712682724, "learning_rate": 2.49622425337267e-05, "loss": 0.5936, "step": 277 }, { "epoch": 0.07709373266777593, "grad_norm": 0.21333451569080353, "learning_rate": 2.496181247190164e-05, "loss": 0.6221, "step": 278 }, { "epoch": 0.07737104825291181, "grad_norm": 0.22917450964450836, "learning_rate": 2.4961379978446793e-05, "loss": 0.6132, "step": 279 }, { "epoch": 0.0776483638380477, "grad_norm": 0.228413388133049, "learning_rate": 2.496094505344656e-05, "loss": 0.5953, "step": 280 }, { "epoch": 0.07792567942318358, "grad_norm": 0.20908214151859283, "learning_rate": 2.4960507696985796e-05, "loss": 0.6081, "step": 281 }, { "epoch": 0.07820299500831947, "grad_norm": 0.2270585596561432, "learning_rate": 2.4960067909149846e-05, "loss": 0.5915, "step": 282 }, { "epoch": 0.07848031059345535, "grad_norm": 0.23176094889640808, "learning_rate": 2.4959625690024524e-05, "loss": 0.6126, "step": 283 }, { "epoch": 0.07875762617859124, "grad_norm": 0.24496515095233917, "learning_rate": 2.495918103969612e-05, "loss": 0.6245, "step": 284 }, { "epoch": 0.07903494176372712, "grad_norm": 0.21696555614471436, "learning_rate": 2.4958733958251394e-05, "loss": 0.6104, "step": 285 }, { "epoch": 0.079312257348863, "grad_norm": 0.22292231023311615, "learning_rate": 2.4958284445777584e-05, "loss": 0.6164, "step": 286 }, { "epoch": 0.07958957293399889, "grad_norm": 0.24104639887809753, "learning_rate": 2.4957832502362404e-05, "loss": 0.6002, "step": 287 }, { "epoch": 0.07986688851913477, "grad_norm": 0.22866299748420715, "learning_rate": 2.495737812809404e-05, "loss": 0.6143, "step": 288 }, { "epoch": 0.08014420410427066, "grad_norm": 0.23108075559139252, "learning_rate": 2.495692132306115e-05, "loss": 0.6591, "step": 289 }, { "epoch": 0.08042151968940654, "grad_norm": 0.24755387008190155, "learning_rate": 2.4956462087352868e-05, "loss": 0.5883, "step": 290 }, { "epoch": 0.08069883527454243, "grad_norm": 0.23270832002162933, "learning_rate": 2.4956000421058807e-05, "loss": 0.5727, "step": 291 }, { "epoch": 0.08097615085967831, "grad_norm": 0.23588238656520844, "learning_rate": 2.4955536324269048e-05, "loss": 0.6178, "step": 292 }, { "epoch": 0.0812534664448142, "grad_norm": 0.26772478222846985, "learning_rate": 2.4955069797074147e-05, "loss": 0.6214, "step": 293 }, { "epoch": 0.08153078202995008, "grad_norm": 0.22470323741436005, "learning_rate": 2.495460083956514e-05, "loss": 0.61, "step": 294 }, { "epoch": 0.08180809761508596, "grad_norm": 0.22901985049247742, "learning_rate": 2.495412945183353e-05, "loss": 0.5965, "step": 295 }, { "epoch": 0.08208541320022185, "grad_norm": 0.23927484452724457, "learning_rate": 2.49536556339713e-05, "loss": 0.6229, "step": 296 }, { "epoch": 0.08236272878535773, "grad_norm": 0.2505173087120056, "learning_rate": 2.49531793860709e-05, "loss": 0.5974, "step": 297 }, { "epoch": 0.08264004437049362, "grad_norm": 0.7846159934997559, "learning_rate": 2.4952700708225263e-05, "loss": 0.6097, "step": 298 }, { "epoch": 0.0829173599556295, "grad_norm": 0.23284678161144257, "learning_rate": 2.4952219600527786e-05, "loss": 0.6161, "step": 299 }, { "epoch": 0.08319467554076539, "grad_norm": 0.22659331560134888, "learning_rate": 2.4951736063072356e-05, "loss": 0.5917, "step": 300 }, { "epoch": 0.08347199112590127, "grad_norm": 0.24401767551898956, "learning_rate": 2.4951250095953315e-05, "loss": 0.6163, "step": 301 }, { "epoch": 0.08374930671103716, "grad_norm": 0.23994800448417664, "learning_rate": 2.4950761699265487e-05, "loss": 0.6035, "step": 302 }, { "epoch": 0.08402662229617304, "grad_norm": 0.293527752161026, "learning_rate": 2.495027087310418e-05, "loss": 0.6148, "step": 303 }, { "epoch": 0.08430393788130892, "grad_norm": 0.2797812819480896, "learning_rate": 2.4949777617565156e-05, "loss": 0.6249, "step": 304 }, { "epoch": 0.08458125346644481, "grad_norm": 0.2422715574502945, "learning_rate": 2.4949281932744672e-05, "loss": 0.6064, "step": 305 }, { "epoch": 0.08485856905158069, "grad_norm": 0.2489105761051178, "learning_rate": 2.4948783818739446e-05, "loss": 0.6176, "step": 306 }, { "epoch": 0.08513588463671658, "grad_norm": 0.23189565539360046, "learning_rate": 2.4948283275646672e-05, "loss": 0.6172, "step": 307 }, { "epoch": 0.08541320022185246, "grad_norm": 0.21257632970809937, "learning_rate": 2.4947780303564015e-05, "loss": 0.6132, "step": 308 }, { "epoch": 0.08569051580698835, "grad_norm": 0.23266074061393738, "learning_rate": 2.4947274902589628e-05, "loss": 0.6001, "step": 309 }, { "epoch": 0.08596783139212424, "grad_norm": 0.21587929129600525, "learning_rate": 2.4946767072822126e-05, "loss": 0.6381, "step": 310 }, { "epoch": 0.08624514697726013, "grad_norm": 0.23052595555782318, "learning_rate": 2.4946256814360594e-05, "loss": 0.6643, "step": 311 }, { "epoch": 0.08652246256239601, "grad_norm": 0.2822146713733673, "learning_rate": 2.4945744127304598e-05, "loss": 0.6331, "step": 312 }, { "epoch": 0.0867997781475319, "grad_norm": 0.22692646086215973, "learning_rate": 2.4945229011754184e-05, "loss": 0.6126, "step": 313 }, { "epoch": 0.08707709373266778, "grad_norm": 0.2250347137451172, "learning_rate": 2.4944711467809855e-05, "loss": 0.6308, "step": 314 }, { "epoch": 0.08735440931780367, "grad_norm": 0.21644283831119537, "learning_rate": 2.4944191495572604e-05, "loss": 0.587, "step": 315 }, { "epoch": 0.08763172490293955, "grad_norm": 0.22959665954113007, "learning_rate": 2.494366909514389e-05, "loss": 0.6138, "step": 316 }, { "epoch": 0.08790904048807544, "grad_norm": 0.24681390821933746, "learning_rate": 2.4943144266625645e-05, "loss": 0.6309, "step": 317 }, { "epoch": 0.08818635607321132, "grad_norm": 0.22859139740467072, "learning_rate": 2.4942617010120282e-05, "loss": 0.5937, "step": 318 }, { "epoch": 0.0884636716583472, "grad_norm": 0.20714016258716583, "learning_rate": 2.4942087325730678e-05, "loss": 0.5925, "step": 319 }, { "epoch": 0.08874098724348309, "grad_norm": 0.2056405246257782, "learning_rate": 2.494155521356019e-05, "loss": 0.5922, "step": 320 }, { "epoch": 0.08901830282861897, "grad_norm": 0.22429367899894714, "learning_rate": 2.4941020673712644e-05, "loss": 0.6141, "step": 321 }, { "epoch": 0.08929561841375486, "grad_norm": 0.2454768568277359, "learning_rate": 2.494048370629235e-05, "loss": 0.6221, "step": 322 }, { "epoch": 0.08957293399889074, "grad_norm": 0.21887235343456268, "learning_rate": 2.493994431140408e-05, "loss": 0.6249, "step": 323 }, { "epoch": 0.08985024958402663, "grad_norm": 0.23439091444015503, "learning_rate": 2.493940248915308e-05, "loss": 0.6145, "step": 324 }, { "epoch": 0.09012756516916251, "grad_norm": 0.21770575642585754, "learning_rate": 2.4938858239645087e-05, "loss": 0.6123, "step": 325 }, { "epoch": 0.0904048807542984, "grad_norm": 0.24734006822109222, "learning_rate": 2.4938311562986284e-05, "loss": 0.6223, "step": 326 }, { "epoch": 0.09068219633943428, "grad_norm": 0.22917009890079498, "learning_rate": 2.4937762459283348e-05, "loss": 0.6041, "step": 327 }, { "epoch": 0.09095951192457016, "grad_norm": 0.22535157203674316, "learning_rate": 2.4937210928643423e-05, "loss": 0.6449, "step": 328 }, { "epoch": 0.09123682750970605, "grad_norm": 0.21863703429698944, "learning_rate": 2.4936656971174134e-05, "loss": 0.6144, "step": 329 }, { "epoch": 0.09151414309484193, "grad_norm": 0.24071593582630157, "learning_rate": 2.4936100586983563e-05, "loss": 0.6391, "step": 330 }, { "epoch": 0.09179145867997782, "grad_norm": 0.21045182645320892, "learning_rate": 2.4935541776180275e-05, "loss": 0.613, "step": 331 }, { "epoch": 0.0920687742651137, "grad_norm": 0.250699520111084, "learning_rate": 2.493498053887332e-05, "loss": 0.6155, "step": 332 }, { "epoch": 0.09234608985024959, "grad_norm": 0.22076334059238434, "learning_rate": 2.4934416875172202e-05, "loss": 0.6184, "step": 333 }, { "epoch": 0.09262340543538547, "grad_norm": 0.22932595014572144, "learning_rate": 2.4933850785186906e-05, "loss": 0.6234, "step": 334 }, { "epoch": 0.09290072102052135, "grad_norm": 0.2126377820968628, "learning_rate": 2.4933282269027898e-05, "loss": 0.5768, "step": 335 }, { "epoch": 0.09317803660565724, "grad_norm": 0.21872107684612274, "learning_rate": 2.49327113268061e-05, "loss": 0.5847, "step": 336 }, { "epoch": 0.09345535219079312, "grad_norm": 0.22751103341579437, "learning_rate": 2.4932137958632922e-05, "loss": 0.6241, "step": 337 }, { "epoch": 0.09373266777592901, "grad_norm": 0.24364197254180908, "learning_rate": 2.493156216462025e-05, "loss": 0.5956, "step": 338 }, { "epoch": 0.09400998336106489, "grad_norm": 0.2077159285545349, "learning_rate": 2.493098394488043e-05, "loss": 0.5911, "step": 339 }, { "epoch": 0.09428729894620078, "grad_norm": 0.24238905310630798, "learning_rate": 2.4930403299526292e-05, "loss": 0.629, "step": 340 }, { "epoch": 0.09456461453133666, "grad_norm": 0.22944410145282745, "learning_rate": 2.492982022867113e-05, "loss": 0.596, "step": 341 }, { "epoch": 0.09484193011647254, "grad_norm": 0.22006259858608246, "learning_rate": 2.492923473242872e-05, "loss": 0.581, "step": 342 }, { "epoch": 0.09511924570160843, "grad_norm": 0.2297179251909256, "learning_rate": 2.4928646810913307e-05, "loss": 0.6107, "step": 343 }, { "epoch": 0.09539656128674431, "grad_norm": 0.21393971145153046, "learning_rate": 2.4928056464239614e-05, "loss": 0.5773, "step": 344 }, { "epoch": 0.0956738768718802, "grad_norm": 0.23898737132549286, "learning_rate": 2.4927463692522825e-05, "loss": 0.6119, "step": 345 }, { "epoch": 0.09595119245701608, "grad_norm": 0.22290126979351044, "learning_rate": 2.4926868495878613e-05, "loss": 0.5721, "step": 346 }, { "epoch": 0.09622850804215197, "grad_norm": 0.23102609813213348, "learning_rate": 2.4926270874423113e-05, "loss": 0.5735, "step": 347 }, { "epoch": 0.09650582362728785, "grad_norm": 0.22276602685451508, "learning_rate": 2.4925670828272935e-05, "loss": 0.5799, "step": 348 }, { "epoch": 0.09678313921242374, "grad_norm": 0.229088693857193, "learning_rate": 2.492506835754517e-05, "loss": 0.6191, "step": 349 }, { "epoch": 0.09706045479755962, "grad_norm": 0.22365529835224152, "learning_rate": 2.4924463462357373e-05, "loss": 0.5932, "step": 350 }, { "epoch": 0.0973377703826955, "grad_norm": 0.21552829444408417, "learning_rate": 2.492385614282757e-05, "loss": 0.6091, "step": 351 }, { "epoch": 0.09761508596783139, "grad_norm": 0.2322327196598053, "learning_rate": 2.4923246399074272e-05, "loss": 0.6216, "step": 352 }, { "epoch": 0.09789240155296727, "grad_norm": 0.27425798773765564, "learning_rate": 2.4922634231216458e-05, "loss": 0.5915, "step": 353 }, { "epoch": 0.09816971713810316, "grad_norm": 0.21547527611255646, "learning_rate": 2.492201963937357e-05, "loss": 0.6003, "step": 354 }, { "epoch": 0.09844703272323904, "grad_norm": 0.24001803994178772, "learning_rate": 2.4921402623665535e-05, "loss": 0.5879, "step": 355 }, { "epoch": 0.09872434830837493, "grad_norm": 0.2128361016511917, "learning_rate": 2.492078318421275e-05, "loss": 0.6192, "step": 356 }, { "epoch": 0.09900166389351081, "grad_norm": 0.23891720175743103, "learning_rate": 2.492016132113608e-05, "loss": 0.6237, "step": 357 }, { "epoch": 0.0992789794786467, "grad_norm": 0.2530137300491333, "learning_rate": 2.4919537034556876e-05, "loss": 0.5975, "step": 358 }, { "epoch": 0.09955629506378258, "grad_norm": 0.22864577174186707, "learning_rate": 2.4918910324596944e-05, "loss": 0.6085, "step": 359 }, { "epoch": 0.09983361064891846, "grad_norm": 0.21646267175674438, "learning_rate": 2.4918281191378573e-05, "loss": 0.5734, "step": 360 }, { "epoch": 0.10011092623405435, "grad_norm": 0.21921531856060028, "learning_rate": 2.491764963502453e-05, "loss": 0.6003, "step": 361 }, { "epoch": 0.10038824181919023, "grad_norm": 0.22741259634494781, "learning_rate": 2.491701565565804e-05, "loss": 0.59, "step": 362 }, { "epoch": 0.10066555740432612, "grad_norm": 0.2382003366947174, "learning_rate": 2.4916379253402815e-05, "loss": 0.6021, "step": 363 }, { "epoch": 0.100942872989462, "grad_norm": 0.20885150134563446, "learning_rate": 2.4915740428383032e-05, "loss": 0.5973, "step": 364 }, { "epoch": 0.10122018857459789, "grad_norm": 0.1941784769296646, "learning_rate": 2.491509918072334e-05, "loss": 0.591, "step": 365 }, { "epoch": 0.10149750415973377, "grad_norm": 0.21724678575992584, "learning_rate": 2.491445551054887e-05, "loss": 0.6212, "step": 366 }, { "epoch": 0.10177481974486967, "grad_norm": 0.29596778750419617, "learning_rate": 2.4913809417985213e-05, "loss": 0.6241, "step": 367 }, { "epoch": 0.10205213533000555, "grad_norm": 0.23365665972232819, "learning_rate": 2.4913160903158443e-05, "loss": 0.6243, "step": 368 }, { "epoch": 0.10232945091514144, "grad_norm": 0.23263859748840332, "learning_rate": 2.4912509966195098e-05, "loss": 0.5946, "step": 369 }, { "epoch": 0.10260676650027732, "grad_norm": 0.22337226569652557, "learning_rate": 2.4911856607222196e-05, "loss": 0.6287, "step": 370 }, { "epoch": 0.1028840820854132, "grad_norm": 0.23225417733192444, "learning_rate": 2.491120082636722e-05, "loss": 0.5927, "step": 371 }, { "epoch": 0.10316139767054909, "grad_norm": 0.22292552888393402, "learning_rate": 2.4910542623758142e-05, "loss": 0.6208, "step": 372 }, { "epoch": 0.10343871325568497, "grad_norm": 0.21180188655853271, "learning_rate": 2.4909881999523382e-05, "loss": 0.5652, "step": 373 }, { "epoch": 0.10371602884082086, "grad_norm": 0.2395281195640564, "learning_rate": 2.4909218953791853e-05, "loss": 0.5922, "step": 374 }, { "epoch": 0.10399334442595674, "grad_norm": 0.2313883900642395, "learning_rate": 2.4908553486692926e-05, "loss": 0.6083, "step": 375 }, { "epoch": 0.10427066001109263, "grad_norm": 0.21677231788635254, "learning_rate": 2.4907885598356456e-05, "loss": 0.6115, "step": 376 }, { "epoch": 0.10454797559622851, "grad_norm": 0.21811628341674805, "learning_rate": 2.4907215288912766e-05, "loss": 0.5815, "step": 377 }, { "epoch": 0.1048252911813644, "grad_norm": 0.22422359883785248, "learning_rate": 2.4906542558492652e-05, "loss": 0.6161, "step": 378 }, { "epoch": 0.10510260676650028, "grad_norm": 0.2190743088722229, "learning_rate": 2.4905867407227377e-05, "loss": 0.5554, "step": 379 }, { "epoch": 0.10537992235163617, "grad_norm": 0.25590968132019043, "learning_rate": 2.490518983524869e-05, "loss": 0.5856, "step": 380 }, { "epoch": 0.10565723793677205, "grad_norm": 0.26324909925460815, "learning_rate": 2.490450984268879e-05, "loss": 0.6057, "step": 381 }, { "epoch": 0.10593455352190793, "grad_norm": 0.2394174039363861, "learning_rate": 2.490382742968037e-05, "loss": 0.6045, "step": 382 }, { "epoch": 0.10621186910704382, "grad_norm": 0.23230458796024323, "learning_rate": 2.4903142596356586e-05, "loss": 0.6188, "step": 383 }, { "epoch": 0.1064891846921797, "grad_norm": 0.21763205528259277, "learning_rate": 2.4902455342851067e-05, "loss": 0.5626, "step": 384 }, { "epoch": 0.10676650027731559, "grad_norm": 0.469051718711853, "learning_rate": 2.490176566929791e-05, "loss": 0.5909, "step": 385 }, { "epoch": 0.10704381586245147, "grad_norm": 0.24806742370128632, "learning_rate": 2.4901073575831697e-05, "loss": 0.6215, "step": 386 }, { "epoch": 0.10732113144758736, "grad_norm": 0.22851231694221497, "learning_rate": 2.4900379062587463e-05, "loss": 0.593, "step": 387 }, { "epoch": 0.10759844703272324, "grad_norm": 0.24515169858932495, "learning_rate": 2.489968212970074e-05, "loss": 0.6036, "step": 388 }, { "epoch": 0.10787576261785913, "grad_norm": 0.24662603437900543, "learning_rate": 2.4898982777307506e-05, "loss": 0.6153, "step": 389 }, { "epoch": 0.10815307820299501, "grad_norm": 0.2459113895893097, "learning_rate": 2.4898281005544227e-05, "loss": 0.5771, "step": 390 }, { "epoch": 0.1084303937881309, "grad_norm": 0.23075874149799347, "learning_rate": 2.489757681454784e-05, "loss": 0.6297, "step": 391 }, { "epoch": 0.10870770937326678, "grad_norm": 0.24344393610954285, "learning_rate": 2.4896870204455746e-05, "loss": 0.5993, "step": 392 }, { "epoch": 0.10898502495840266, "grad_norm": 0.2444470226764679, "learning_rate": 2.4896161175405826e-05, "loss": 0.6159, "step": 393 }, { "epoch": 0.10926234054353855, "grad_norm": 0.24199549853801727, "learning_rate": 2.4895449727536435e-05, "loss": 0.6177, "step": 394 }, { "epoch": 0.10953965612867443, "grad_norm": 0.20678602159023285, "learning_rate": 2.4894735860986385e-05, "loss": 0.5894, "step": 395 }, { "epoch": 0.10981697171381032, "grad_norm": 0.25881609320640564, "learning_rate": 2.489401957589498e-05, "loss": 0.631, "step": 396 }, { "epoch": 0.1100942872989462, "grad_norm": 0.2568078637123108, "learning_rate": 2.489330087240198e-05, "loss": 0.5902, "step": 397 }, { "epoch": 0.11037160288408208, "grad_norm": 0.2495458871126175, "learning_rate": 2.489257975064763e-05, "loss": 0.6141, "step": 398 }, { "epoch": 0.11064891846921797, "grad_norm": 0.6050971150398254, "learning_rate": 2.489185621077263e-05, "loss": 0.597, "step": 399 }, { "epoch": 0.11092623405435385, "grad_norm": 0.22337263822555542, "learning_rate": 2.489113025291817e-05, "loss": 0.58, "step": 400 }, { "epoch": 0.11120354963948974, "grad_norm": 0.20583049952983856, "learning_rate": 2.4890401877225898e-05, "loss": 0.5751, "step": 401 }, { "epoch": 0.11148086522462562, "grad_norm": 0.2487124800682068, "learning_rate": 2.488967108383795e-05, "loss": 0.6009, "step": 402 }, { "epoch": 0.1117581808097615, "grad_norm": 0.24986512959003448, "learning_rate": 2.4888937872896908e-05, "loss": 0.6203, "step": 403 }, { "epoch": 0.11203549639489739, "grad_norm": 0.28655165433883667, "learning_rate": 2.488820224454585e-05, "loss": 0.6037, "step": 404 }, { "epoch": 0.11231281198003328, "grad_norm": 0.24651272594928741, "learning_rate": 2.4887464198928317e-05, "loss": 0.5853, "step": 405 }, { "epoch": 0.11259012756516916, "grad_norm": 0.1938582807779312, "learning_rate": 2.4886723736188318e-05, "loss": 0.5888, "step": 406 }, { "epoch": 0.11286744315030504, "grad_norm": 0.22223535180091858, "learning_rate": 2.4885980856470338e-05, "loss": 0.627, "step": 407 }, { "epoch": 0.11314475873544093, "grad_norm": 0.24378454685211182, "learning_rate": 2.4885235559919328e-05, "loss": 0.5827, "step": 408 }, { "epoch": 0.11342207432057681, "grad_norm": 0.2019236534833908, "learning_rate": 2.4884487846680727e-05, "loss": 0.5976, "step": 409 }, { "epoch": 0.1136993899057127, "grad_norm": 0.21661922335624695, "learning_rate": 2.4883737716900424e-05, "loss": 0.6013, "step": 410 }, { "epoch": 0.11397670549084858, "grad_norm": 0.26957792043685913, "learning_rate": 2.4882985170724787e-05, "loss": 0.63, "step": 411 }, { "epoch": 0.11425402107598447, "grad_norm": 0.21899108588695526, "learning_rate": 2.4882230208300668e-05, "loss": 0.5935, "step": 412 }, { "epoch": 0.11453133666112035, "grad_norm": 0.2505897879600525, "learning_rate": 2.488147282977537e-05, "loss": 0.5689, "step": 413 }, { "epoch": 0.11480865224625623, "grad_norm": 0.20966675877571106, "learning_rate": 2.4880713035296686e-05, "loss": 0.5893, "step": 414 }, { "epoch": 0.11508596783139212, "grad_norm": 0.26599064469337463, "learning_rate": 2.4879950825012864e-05, "loss": 0.5912, "step": 415 }, { "epoch": 0.115363283416528, "grad_norm": 0.22095918655395508, "learning_rate": 2.487918619907264e-05, "loss": 0.6068, "step": 416 }, { "epoch": 0.11564059900166389, "grad_norm": 0.20822377502918243, "learning_rate": 2.4878419157625206e-05, "loss": 0.5783, "step": 417 }, { "epoch": 0.11591791458679977, "grad_norm": 0.20983396470546722, "learning_rate": 2.4877649700820232e-05, "loss": 0.6258, "step": 418 }, { "epoch": 0.11619523017193566, "grad_norm": 0.2288864701986313, "learning_rate": 2.4876877828807864e-05, "loss": 0.6196, "step": 419 }, { "epoch": 0.11647254575707154, "grad_norm": 0.20762163400650024, "learning_rate": 2.4876103541738714e-05, "loss": 0.5674, "step": 420 }, { "epoch": 0.11674986134220743, "grad_norm": 0.2152256816625595, "learning_rate": 2.4875326839763863e-05, "loss": 0.5681, "step": 421 }, { "epoch": 0.11702717692734331, "grad_norm": 0.25224751234054565, "learning_rate": 2.4874547723034865e-05, "loss": 0.5948, "step": 422 }, { "epoch": 0.11730449251247921, "grad_norm": 0.21316662430763245, "learning_rate": 2.4873766191703752e-05, "loss": 0.5757, "step": 423 }, { "epoch": 0.11758180809761509, "grad_norm": 0.20757247507572174, "learning_rate": 2.4872982245923014e-05, "loss": 0.5903, "step": 424 }, { "epoch": 0.11785912368275098, "grad_norm": 0.23846663534641266, "learning_rate": 2.487219588584563e-05, "loss": 0.5735, "step": 425 }, { "epoch": 0.11813643926788686, "grad_norm": 0.21389099955558777, "learning_rate": 2.4871407111625027e-05, "loss": 0.5998, "step": 426 }, { "epoch": 0.11841375485302275, "grad_norm": 0.21840502321720123, "learning_rate": 2.487061592341513e-05, "loss": 0.5854, "step": 427 }, { "epoch": 0.11869107043815863, "grad_norm": 0.23358672857284546, "learning_rate": 2.4869822321370308e-05, "loss": 0.6212, "step": 428 }, { "epoch": 0.11896838602329451, "grad_norm": 0.24467387795448303, "learning_rate": 2.4869026305645418e-05, "loss": 0.5937, "step": 429 }, { "epoch": 0.1192457016084304, "grad_norm": 0.24679329991340637, "learning_rate": 2.486822787639579e-05, "loss": 0.6027, "step": 430 }, { "epoch": 0.11952301719356628, "grad_norm": 0.22588002681732178, "learning_rate": 2.4867427033777206e-05, "loss": 0.5707, "step": 431 }, { "epoch": 0.11980033277870217, "grad_norm": 0.20728443562984467, "learning_rate": 2.486662377794594e-05, "loss": 0.5857, "step": 432 }, { "epoch": 0.12007764836383805, "grad_norm": 0.2292574942111969, "learning_rate": 2.4865818109058732e-05, "loss": 0.6288, "step": 433 }, { "epoch": 0.12035496394897394, "grad_norm": 0.22358085215091705, "learning_rate": 2.4865010027272784e-05, "loss": 0.6043, "step": 434 }, { "epoch": 0.12063227953410982, "grad_norm": 0.21650134027004242, "learning_rate": 2.4864199532745776e-05, "loss": 0.5772, "step": 435 }, { "epoch": 0.1209095951192457, "grad_norm": 0.21783700585365295, "learning_rate": 2.486338662563585e-05, "loss": 0.608, "step": 436 }, { "epoch": 0.12118691070438159, "grad_norm": 0.2252453863620758, "learning_rate": 2.4862571306101633e-05, "loss": 0.5783, "step": 437 }, { "epoch": 0.12146422628951747, "grad_norm": 0.22224466502666473, "learning_rate": 2.4861753574302217e-05, "loss": 0.5823, "step": 438 }, { "epoch": 0.12174154187465336, "grad_norm": 0.24375957250595093, "learning_rate": 2.486093343039716e-05, "loss": 0.5872, "step": 439 }, { "epoch": 0.12201885745978924, "grad_norm": 0.20903299748897552, "learning_rate": 2.4860110874546495e-05, "loss": 0.6237, "step": 440 }, { "epoch": 0.12229617304492513, "grad_norm": 0.23007185757160187, "learning_rate": 2.485928590691072e-05, "loss": 0.6188, "step": 441 }, { "epoch": 0.12257348863006101, "grad_norm": 0.23085376620292664, "learning_rate": 2.4858458527650814e-05, "loss": 0.5693, "step": 442 }, { "epoch": 0.1228508042151969, "grad_norm": 0.2241743952035904, "learning_rate": 2.485762873692822e-05, "loss": 0.6294, "step": 443 }, { "epoch": 0.12312811980033278, "grad_norm": 0.20904746651649475, "learning_rate": 2.4856796534904845e-05, "loss": 0.6301, "step": 444 }, { "epoch": 0.12340543538546866, "grad_norm": 0.6742352843284607, "learning_rate": 2.4855961921743083e-05, "loss": 0.5524, "step": 445 }, { "epoch": 0.12368275097060455, "grad_norm": 0.20682546496391296, "learning_rate": 2.4855124897605782e-05, "loss": 0.5907, "step": 446 }, { "epoch": 0.12396006655574043, "grad_norm": 0.2383589744567871, "learning_rate": 2.485428546265627e-05, "loss": 0.5865, "step": 447 }, { "epoch": 0.12423738214087632, "grad_norm": 0.2051754891872406, "learning_rate": 2.4853443617058348e-05, "loss": 0.6112, "step": 448 }, { "epoch": 0.1245146977260122, "grad_norm": 0.2156454175710678, "learning_rate": 2.4852599360976274e-05, "loss": 0.5913, "step": 449 }, { "epoch": 0.12479201331114809, "grad_norm": 0.22987020015716553, "learning_rate": 2.485175269457479e-05, "loss": 0.5873, "step": 450 }, { "epoch": 0.12506932889628397, "grad_norm": 0.20809032022953033, "learning_rate": 2.4850903618019102e-05, "loss": 0.582, "step": 451 }, { "epoch": 0.12534664448141986, "grad_norm": 0.2254360467195511, "learning_rate": 2.485005213147489e-05, "loss": 0.5998, "step": 452 }, { "epoch": 0.12562396006655574, "grad_norm": 0.214163139462471, "learning_rate": 2.4849198235108296e-05, "loss": 0.5884, "step": 453 }, { "epoch": 0.12590127565169162, "grad_norm": 0.21463198959827423, "learning_rate": 2.484834192908594e-05, "loss": 0.606, "step": 454 }, { "epoch": 0.1261785912368275, "grad_norm": 0.20102332532405853, "learning_rate": 2.4847483213574908e-05, "loss": 0.6012, "step": 455 }, { "epoch": 0.1264559068219634, "grad_norm": 0.19328515231609344, "learning_rate": 2.4846622088742765e-05, "loss": 0.5749, "step": 456 }, { "epoch": 0.12673322240709928, "grad_norm": 0.20251993834972382, "learning_rate": 2.484575855475753e-05, "loss": 0.6121, "step": 457 }, { "epoch": 0.12701053799223516, "grad_norm": 0.21547801792621613, "learning_rate": 2.484489261178771e-05, "loss": 0.6019, "step": 458 }, { "epoch": 0.12728785357737105, "grad_norm": 0.21968044340610504, "learning_rate": 2.4844024260002276e-05, "loss": 0.5863, "step": 459 }, { "epoch": 0.12756516916250693, "grad_norm": 0.21164929866790771, "learning_rate": 2.4843153499570648e-05, "loss": 0.5995, "step": 460 }, { "epoch": 0.12784248474764282, "grad_norm": 0.2152341902256012, "learning_rate": 2.4842280330662753e-05, "loss": 0.6374, "step": 461 }, { "epoch": 0.1281198003327787, "grad_norm": 0.19914227724075317, "learning_rate": 2.4841404753448963e-05, "loss": 0.5919, "step": 462 }, { "epoch": 0.12839711591791458, "grad_norm": 0.2268274873495102, "learning_rate": 2.4840526768100124e-05, "loss": 0.5913, "step": 463 }, { "epoch": 0.12867443150305047, "grad_norm": 0.21451812982559204, "learning_rate": 2.483964637478756e-05, "loss": 0.6146, "step": 464 }, { "epoch": 0.12895174708818635, "grad_norm": 0.1978655308485031, "learning_rate": 2.483876357368305e-05, "loss": 0.5938, "step": 465 }, { "epoch": 0.12922906267332224, "grad_norm": 0.20545656979084015, "learning_rate": 2.4837878364958865e-05, "loss": 0.6172, "step": 466 }, { "epoch": 0.12950637825845812, "grad_norm": 0.21529193222522736, "learning_rate": 2.483699074878772e-05, "loss": 0.5794, "step": 467 }, { "epoch": 0.129783693843594, "grad_norm": 0.2971234917640686, "learning_rate": 2.4836100725342818e-05, "loss": 0.6166, "step": 468 }, { "epoch": 0.1300610094287299, "grad_norm": 0.1968923807144165, "learning_rate": 2.4835208294797824e-05, "loss": 0.5898, "step": 469 }, { "epoch": 0.13033832501386577, "grad_norm": 0.2248852252960205, "learning_rate": 2.483431345732688e-05, "loss": 0.5984, "step": 470 }, { "epoch": 0.13061564059900166, "grad_norm": 0.21942903101444244, "learning_rate": 2.4833416213104588e-05, "loss": 0.5984, "step": 471 }, { "epoch": 0.13089295618413754, "grad_norm": 0.22266723215579987, "learning_rate": 2.4832516562306024e-05, "loss": 0.5858, "step": 472 }, { "epoch": 0.13117027176927343, "grad_norm": 0.21460357308387756, "learning_rate": 2.483161450510674e-05, "loss": 0.5763, "step": 473 }, { "epoch": 0.1314475873544093, "grad_norm": 0.333474725484848, "learning_rate": 2.4830710041682735e-05, "loss": 0.6024, "step": 474 }, { "epoch": 0.1317249029395452, "grad_norm": 0.1983480155467987, "learning_rate": 2.4829803172210515e-05, "loss": 0.5898, "step": 475 }, { "epoch": 0.13200221852468108, "grad_norm": 0.2835070788860321, "learning_rate": 2.482889389686702e-05, "loss": 0.571, "step": 476 }, { "epoch": 0.13227953410981697, "grad_norm": 0.2176080197095871, "learning_rate": 2.4827982215829674e-05, "loss": 0.5875, "step": 477 }, { "epoch": 0.13255684969495285, "grad_norm": 0.2436138391494751, "learning_rate": 2.482706812927638e-05, "loss": 0.5965, "step": 478 }, { "epoch": 0.13283416528008873, "grad_norm": 0.21060815453529358, "learning_rate": 2.4826151637385495e-05, "loss": 0.5881, "step": 479 }, { "epoch": 0.13311148086522462, "grad_norm": 0.49135246872901917, "learning_rate": 2.4825232740335847e-05, "loss": 0.5742, "step": 480 }, { "epoch": 0.1333887964503605, "grad_norm": 0.20535485446453094, "learning_rate": 2.4824311438306742e-05, "loss": 0.5877, "step": 481 }, { "epoch": 0.1336661120354964, "grad_norm": 0.20854201912879944, "learning_rate": 2.482338773147795e-05, "loss": 0.6065, "step": 482 }, { "epoch": 0.13394342762063227, "grad_norm": 0.20914287865161896, "learning_rate": 2.4822461620029708e-05, "loss": 0.5919, "step": 483 }, { "epoch": 0.13422074320576816, "grad_norm": 0.20028036832809448, "learning_rate": 2.4821533104142724e-05, "loss": 0.5707, "step": 484 }, { "epoch": 0.13449805879090404, "grad_norm": 0.22616969048976898, "learning_rate": 2.4820602183998185e-05, "loss": 0.5896, "step": 485 }, { "epoch": 0.13477537437603992, "grad_norm": 0.2049257457256317, "learning_rate": 2.4819668859777728e-05, "loss": 0.5693, "step": 486 }, { "epoch": 0.1350526899611758, "grad_norm": 0.21746453642845154, "learning_rate": 2.4818733131663473e-05, "loss": 0.6177, "step": 487 }, { "epoch": 0.1353300055463117, "grad_norm": 0.20084752142429352, "learning_rate": 2.4817794999838004e-05, "loss": 0.5871, "step": 488 }, { "epoch": 0.13560732113144758, "grad_norm": 0.2062511444091797, "learning_rate": 2.4816854464484378e-05, "loss": 0.5975, "step": 489 }, { "epoch": 0.13588463671658346, "grad_norm": 0.2201562523841858, "learning_rate": 2.4815911525786118e-05, "loss": 0.5683, "step": 490 }, { "epoch": 0.13616195230171935, "grad_norm": 0.22616079449653625, "learning_rate": 2.4814966183927213e-05, "loss": 0.6306, "step": 491 }, { "epoch": 0.13643926788685523, "grad_norm": 0.21003180742263794, "learning_rate": 2.4814018439092128e-05, "loss": 0.6064, "step": 492 }, { "epoch": 0.13671658347199112, "grad_norm": 0.2046622782945633, "learning_rate": 2.481306829146579e-05, "loss": 0.6107, "step": 493 }, { "epoch": 0.136993899057127, "grad_norm": 0.2102370411157608, "learning_rate": 2.4812115741233606e-05, "loss": 0.596, "step": 494 }, { "epoch": 0.13727121464226288, "grad_norm": 0.20774902403354645, "learning_rate": 2.4811160788581434e-05, "loss": 0.6111, "step": 495 }, { "epoch": 0.13754853022739877, "grad_norm": 0.20868700742721558, "learning_rate": 2.481020343369561e-05, "loss": 0.604, "step": 496 }, { "epoch": 0.13782584581253465, "grad_norm": 0.20590144395828247, "learning_rate": 2.4809243676762947e-05, "loss": 0.606, "step": 497 }, { "epoch": 0.13810316139767054, "grad_norm": 0.2019280344247818, "learning_rate": 2.4808281517970716e-05, "loss": 0.6034, "step": 498 }, { "epoch": 0.13838047698280642, "grad_norm": 0.22689440846443176, "learning_rate": 2.4807316957506656e-05, "loss": 0.5715, "step": 499 }, { "epoch": 0.1386577925679423, "grad_norm": 0.2134653776884079, "learning_rate": 2.4806349995558986e-05, "loss": 0.6184, "step": 500 }, { "epoch": 0.1389351081530782, "grad_norm": 0.20334339141845703, "learning_rate": 2.4805380632316377e-05, "loss": 0.5804, "step": 501 }, { "epoch": 0.13921242373821408, "grad_norm": 0.20713390409946442, "learning_rate": 2.4804408867967984e-05, "loss": 0.5898, "step": 502 }, { "epoch": 0.13948973932334996, "grad_norm": 0.21584905683994293, "learning_rate": 2.4803434702703422e-05, "loss": 0.5957, "step": 503 }, { "epoch": 0.13976705490848584, "grad_norm": 0.21197180449962616, "learning_rate": 2.4802458136712775e-05, "loss": 0.5981, "step": 504 }, { "epoch": 0.14004437049362173, "grad_norm": 0.19864031672477722, "learning_rate": 2.4801479170186597e-05, "loss": 0.6027, "step": 505 }, { "epoch": 0.1403216860787576, "grad_norm": 0.21110500395298004, "learning_rate": 2.4800497803315913e-05, "loss": 0.5882, "step": 506 }, { "epoch": 0.1405990016638935, "grad_norm": 0.20834285020828247, "learning_rate": 2.4799514036292215e-05, "loss": 0.5935, "step": 507 }, { "epoch": 0.1408763172490294, "grad_norm": 0.22122903168201447, "learning_rate": 2.4798527869307454e-05, "loss": 0.6011, "step": 508 }, { "epoch": 0.1411536328341653, "grad_norm": 0.21510954201221466, "learning_rate": 2.4797539302554064e-05, "loss": 0.6266, "step": 509 }, { "epoch": 0.14143094841930118, "grad_norm": 0.20589859783649445, "learning_rate": 2.479654833622494e-05, "loss": 0.5858, "step": 510 }, { "epoch": 0.14170826400443706, "grad_norm": 0.20928624272346497, "learning_rate": 2.4795554970513445e-05, "loss": 0.6006, "step": 511 }, { "epoch": 0.14198557958957295, "grad_norm": 0.2174837589263916, "learning_rate": 2.4794559205613412e-05, "loss": 0.5792, "step": 512 }, { "epoch": 0.14226289517470883, "grad_norm": 0.20877033472061157, "learning_rate": 2.4793561041719137e-05, "loss": 0.5662, "step": 513 }, { "epoch": 0.14254021075984472, "grad_norm": 0.240639790892601, "learning_rate": 2.479256047902539e-05, "loss": 0.5824, "step": 514 }, { "epoch": 0.1428175263449806, "grad_norm": 0.21567635238170624, "learning_rate": 2.479155751772741e-05, "loss": 0.5833, "step": 515 }, { "epoch": 0.14309484193011648, "grad_norm": 0.2284121960401535, "learning_rate": 2.4790552158020896e-05, "loss": 0.6057, "step": 516 }, { "epoch": 0.14337215751525237, "grad_norm": 0.19480617344379425, "learning_rate": 2.478954440010203e-05, "loss": 0.5972, "step": 517 }, { "epoch": 0.14364947310038825, "grad_norm": 0.20838883519172668, "learning_rate": 2.4788534244167443e-05, "loss": 0.6373, "step": 518 }, { "epoch": 0.14392678868552414, "grad_norm": 0.21365465223789215, "learning_rate": 2.4787521690414245e-05, "loss": 0.5796, "step": 519 }, { "epoch": 0.14420410427066002, "grad_norm": 2.2805471420288086, "learning_rate": 2.4786506739040018e-05, "loss": 0.5915, "step": 520 }, { "epoch": 0.1444814198557959, "grad_norm": 0.34635624289512634, "learning_rate": 2.47854893902428e-05, "loss": 0.6325, "step": 521 }, { "epoch": 0.1447587354409318, "grad_norm": 0.39266762137413025, "learning_rate": 2.47844696442211e-05, "loss": 0.5756, "step": 522 }, { "epoch": 0.14503605102606767, "grad_norm": 0.31766456365585327, "learning_rate": 2.4783447501173907e-05, "loss": 0.5703, "step": 523 }, { "epoch": 0.14531336661120356, "grad_norm": 0.24752533435821533, "learning_rate": 2.478242296130066e-05, "loss": 0.5878, "step": 524 }, { "epoch": 0.14559068219633944, "grad_norm": 0.24595655500888824, "learning_rate": 2.4781396024801272e-05, "loss": 0.5819, "step": 525 }, { "epoch": 0.14586799778147533, "grad_norm": 0.2457636296749115, "learning_rate": 2.478036669187614e-05, "loss": 0.599, "step": 526 }, { "epoch": 0.1461453133666112, "grad_norm": 0.244289368391037, "learning_rate": 2.4779334962726096e-05, "loss": 0.5922, "step": 527 }, { "epoch": 0.1464226289517471, "grad_norm": 0.23528233170509338, "learning_rate": 2.477830083755247e-05, "loss": 0.6032, "step": 528 }, { "epoch": 0.14669994453688298, "grad_norm": 0.2198038101196289, "learning_rate": 2.477726431655704e-05, "loss": 0.5954, "step": 529 }, { "epoch": 0.14697726012201887, "grad_norm": 0.23673711717128754, "learning_rate": 2.4776225399942066e-05, "loss": 0.5938, "step": 530 }, { "epoch": 0.14725457570715475, "grad_norm": 0.2085774540901184, "learning_rate": 2.4775184087910262e-05, "loss": 0.5856, "step": 531 }, { "epoch": 0.14753189129229063, "grad_norm": 0.21415582299232483, "learning_rate": 2.4774140380664816e-05, "loss": 0.5751, "step": 532 }, { "epoch": 0.14780920687742652, "grad_norm": 0.2082296758890152, "learning_rate": 2.4773094278409388e-05, "loss": 0.5573, "step": 533 }, { "epoch": 0.1480865224625624, "grad_norm": 0.20202411711215973, "learning_rate": 2.4772045781348093e-05, "loss": 0.5883, "step": 534 }, { "epoch": 0.1483638380476983, "grad_norm": 0.20766015350818634, "learning_rate": 2.477099488968553e-05, "loss": 0.6066, "step": 535 }, { "epoch": 0.14864115363283417, "grad_norm": 0.2137647122144699, "learning_rate": 2.4769941603626744e-05, "loss": 0.597, "step": 536 }, { "epoch": 0.14891846921797006, "grad_norm": 0.23699134588241577, "learning_rate": 2.4768885923377265e-05, "loss": 0.587, "step": 537 }, { "epoch": 0.14919578480310594, "grad_norm": 0.21466752886772156, "learning_rate": 2.4767827849143087e-05, "loss": 0.5725, "step": 538 }, { "epoch": 0.14947310038824183, "grad_norm": 0.20940807461738586, "learning_rate": 2.476676738113067e-05, "loss": 0.5807, "step": 539 }, { "epoch": 0.1497504159733777, "grad_norm": 0.22769619524478912, "learning_rate": 2.476570451954693e-05, "loss": 0.6089, "step": 540 }, { "epoch": 0.1500277315585136, "grad_norm": 0.20399393141269684, "learning_rate": 2.4764639264599266e-05, "loss": 0.5705, "step": 541 }, { "epoch": 0.15030504714364948, "grad_norm": 0.2241872102022171, "learning_rate": 2.4763571616495535e-05, "loss": 0.5731, "step": 542 }, { "epoch": 0.15058236272878536, "grad_norm": 0.20283614099025726, "learning_rate": 2.4762501575444062e-05, "loss": 0.6051, "step": 543 }, { "epoch": 0.15085967831392125, "grad_norm": 0.2145642638206482, "learning_rate": 2.4761429141653646e-05, "loss": 0.6069, "step": 544 }, { "epoch": 0.15113699389905713, "grad_norm": 0.2139946073293686, "learning_rate": 2.4760354315333546e-05, "loss": 0.6055, "step": 545 }, { "epoch": 0.15141430948419302, "grad_norm": 0.22807584702968597, "learning_rate": 2.4759277096693486e-05, "loss": 0.5945, "step": 546 }, { "epoch": 0.1516916250693289, "grad_norm": 0.2132754623889923, "learning_rate": 2.4758197485943657e-05, "loss": 0.5975, "step": 547 }, { "epoch": 0.15196894065446478, "grad_norm": 0.2016879767179489, "learning_rate": 2.4757115483294724e-05, "loss": 0.5863, "step": 548 }, { "epoch": 0.15224625623960067, "grad_norm": 0.227370485663414, "learning_rate": 2.475603108895782e-05, "loss": 0.583, "step": 549 }, { "epoch": 0.15252357182473655, "grad_norm": 0.22234570980072021, "learning_rate": 2.475494430314453e-05, "loss": 0.5962, "step": 550 }, { "epoch": 0.15280088740987244, "grad_norm": 0.20360559225082397, "learning_rate": 2.4753855126066916e-05, "loss": 0.587, "step": 551 }, { "epoch": 0.15307820299500832, "grad_norm": 0.23359502851963043, "learning_rate": 2.475276355793751e-05, "loss": 0.5967, "step": 552 }, { "epoch": 0.1533555185801442, "grad_norm": 0.23216257989406586, "learning_rate": 2.47516695989693e-05, "loss": 0.5845, "step": 553 }, { "epoch": 0.1536328341652801, "grad_norm": 0.21213343739509583, "learning_rate": 2.475057324937575e-05, "loss": 0.5821, "step": 554 }, { "epoch": 0.15391014975041598, "grad_norm": 0.21203738451004028, "learning_rate": 2.4749474509370784e-05, "loss": 0.5792, "step": 555 }, { "epoch": 0.15418746533555186, "grad_norm": 0.21234023571014404, "learning_rate": 2.4748373379168805e-05, "loss": 0.5985, "step": 556 }, { "epoch": 0.15446478092068774, "grad_norm": 0.20847538113594055, "learning_rate": 2.4747269858984658e-05, "loss": 0.595, "step": 557 }, { "epoch": 0.15474209650582363, "grad_norm": 0.20475518703460693, "learning_rate": 2.474616394903368e-05, "loss": 0.5821, "step": 558 }, { "epoch": 0.1550194120909595, "grad_norm": 0.211504727602005, "learning_rate": 2.474505564953166e-05, "loss": 0.572, "step": 559 }, { "epoch": 0.1552967276760954, "grad_norm": 0.21250484883785248, "learning_rate": 2.4743944960694854e-05, "loss": 0.5748, "step": 560 }, { "epoch": 0.15557404326123128, "grad_norm": 0.2148432582616806, "learning_rate": 2.4742831882739988e-05, "loss": 0.5881, "step": 561 }, { "epoch": 0.15585135884636717, "grad_norm": 0.19098572432994843, "learning_rate": 2.4741716415884257e-05, "loss": 0.5989, "step": 562 }, { "epoch": 0.15612867443150305, "grad_norm": 0.20260894298553467, "learning_rate": 2.474059856034531e-05, "loss": 0.567, "step": 563 }, { "epoch": 0.15640599001663893, "grad_norm": 0.21840746700763702, "learning_rate": 2.4739478316341282e-05, "loss": 0.6054, "step": 564 }, { "epoch": 0.15668330560177482, "grad_norm": 0.2050980031490326, "learning_rate": 2.473835568409075e-05, "loss": 0.5842, "step": 565 }, { "epoch": 0.1569606211869107, "grad_norm": 0.20163971185684204, "learning_rate": 2.473723066381278e-05, "loss": 0.5823, "step": 566 }, { "epoch": 0.1572379367720466, "grad_norm": 0.2088451236486435, "learning_rate": 2.473610325572689e-05, "loss": 0.5995, "step": 567 }, { "epoch": 0.15751525235718247, "grad_norm": 0.20921272039413452, "learning_rate": 2.4734973460053056e-05, "loss": 0.585, "step": 568 }, { "epoch": 0.15779256794231836, "grad_norm": 0.22330057621002197, "learning_rate": 2.473384127701175e-05, "loss": 0.5888, "step": 569 }, { "epoch": 0.15806988352745424, "grad_norm": 0.2152683287858963, "learning_rate": 2.4732706706823876e-05, "loss": 0.5942, "step": 570 }, { "epoch": 0.15834719911259013, "grad_norm": 0.20223170518875122, "learning_rate": 2.4731569749710824e-05, "loss": 0.5781, "step": 571 }, { "epoch": 0.158624514697726, "grad_norm": 0.20824022591114044, "learning_rate": 2.4730430405894446e-05, "loss": 0.6404, "step": 572 }, { "epoch": 0.1589018302828619, "grad_norm": 0.19907240569591522, "learning_rate": 2.4729288675597058e-05, "loss": 0.5983, "step": 573 }, { "epoch": 0.15917914586799778, "grad_norm": 0.20674046874046326, "learning_rate": 2.472814455904144e-05, "loss": 0.5595, "step": 574 }, { "epoch": 0.15945646145313366, "grad_norm": 0.19486385583877563, "learning_rate": 2.4726998056450833e-05, "loss": 0.5783, "step": 575 }, { "epoch": 0.15973377703826955, "grad_norm": 0.2102123498916626, "learning_rate": 2.4725849168048965e-05, "loss": 0.5809, "step": 576 }, { "epoch": 0.16001109262340543, "grad_norm": 0.21006052196025848, "learning_rate": 2.4724697894060005e-05, "loss": 0.5882, "step": 577 }, { "epoch": 0.16028840820854132, "grad_norm": 0.22287555038928986, "learning_rate": 2.47235442347086e-05, "loss": 0.6012, "step": 578 }, { "epoch": 0.1605657237936772, "grad_norm": 0.20599472522735596, "learning_rate": 2.4722388190219852e-05, "loss": 0.5971, "step": 579 }, { "epoch": 0.16084303937881309, "grad_norm": 0.21176591515541077, "learning_rate": 2.4721229760819348e-05, "loss": 0.5954, "step": 580 }, { "epoch": 0.16112035496394897, "grad_norm": 0.24732773005962372, "learning_rate": 2.4720068946733123e-05, "loss": 0.5818, "step": 581 }, { "epoch": 0.16139767054908485, "grad_norm": 0.20434054732322693, "learning_rate": 2.4718905748187677e-05, "loss": 0.5745, "step": 582 }, { "epoch": 0.16167498613422074, "grad_norm": 0.20684310793876648, "learning_rate": 2.4717740165409988e-05, "loss": 0.5663, "step": 583 }, { "epoch": 0.16195230171935662, "grad_norm": 0.2029474377632141, "learning_rate": 2.471657219862749e-05, "loss": 0.5855, "step": 584 }, { "epoch": 0.1622296173044925, "grad_norm": 0.2033785730600357, "learning_rate": 2.4715401848068086e-05, "loss": 0.6119, "step": 585 }, { "epoch": 0.1625069328896284, "grad_norm": 0.21371322870254517, "learning_rate": 2.4714229113960135e-05, "loss": 0.6022, "step": 586 }, { "epoch": 0.16278424847476428, "grad_norm": 0.20918406546115875, "learning_rate": 2.4713053996532477e-05, "loss": 0.569, "step": 587 }, { "epoch": 0.16306156405990016, "grad_norm": 0.2060522437095642, "learning_rate": 2.4711876496014407e-05, "loss": 0.5982, "step": 588 }, { "epoch": 0.16333887964503604, "grad_norm": 0.20782527327537537, "learning_rate": 2.4710696612635688e-05, "loss": 0.6015, "step": 589 }, { "epoch": 0.16361619523017193, "grad_norm": 0.20826764404773712, "learning_rate": 2.4709514346626536e-05, "loss": 0.6094, "step": 590 }, { "epoch": 0.1638935108153078, "grad_norm": 0.20720824599266052, "learning_rate": 2.4708329698217652e-05, "loss": 0.6054, "step": 591 }, { "epoch": 0.1641708264004437, "grad_norm": 0.19394385814666748, "learning_rate": 2.4707142667640193e-05, "loss": 0.5812, "step": 592 }, { "epoch": 0.16444814198557958, "grad_norm": 0.2022271454334259, "learning_rate": 2.4705953255125777e-05, "loss": 0.6084, "step": 593 }, { "epoch": 0.16472545757071547, "grad_norm": 0.21304059028625488, "learning_rate": 2.4704761460906488e-05, "loss": 0.5673, "step": 594 }, { "epoch": 0.16500277315585135, "grad_norm": 0.20137831568717957, "learning_rate": 2.470356728521488e-05, "loss": 0.5945, "step": 595 }, { "epoch": 0.16528008874098724, "grad_norm": 0.20188415050506592, "learning_rate": 2.470237072828397e-05, "loss": 0.5849, "step": 596 }, { "epoch": 0.16555740432612312, "grad_norm": 0.206806018948555, "learning_rate": 2.4701171790347233e-05, "loss": 0.5863, "step": 597 }, { "epoch": 0.165834719911259, "grad_norm": 0.2093089371919632, "learning_rate": 2.4699970471638613e-05, "loss": 0.601, "step": 598 }, { "epoch": 0.1661120354963949, "grad_norm": 0.19595085084438324, "learning_rate": 2.4698766772392524e-05, "loss": 0.5993, "step": 599 }, { "epoch": 0.16638935108153077, "grad_norm": 0.20450963079929352, "learning_rate": 2.469756069284384e-05, "loss": 0.5875, "step": 600 }, { "epoch": 0.16666666666666666, "grad_norm": 0.18902625143527985, "learning_rate": 2.4696352233227894e-05, "loss": 0.5943, "step": 601 }, { "epoch": 0.16694398225180254, "grad_norm": 0.19143831729888916, "learning_rate": 2.469514139378049e-05, "loss": 0.5925, "step": 602 }, { "epoch": 0.16722129783693843, "grad_norm": 0.20280803740024567, "learning_rate": 2.46939281747379e-05, "loss": 0.596, "step": 603 }, { "epoch": 0.1674986134220743, "grad_norm": 0.20762760937213898, "learning_rate": 2.4692712576336848e-05, "loss": 0.5951, "step": 604 }, { "epoch": 0.1677759290072102, "grad_norm": 0.209476500749588, "learning_rate": 2.4691494598814536e-05, "loss": 0.5988, "step": 605 }, { "epoch": 0.16805324459234608, "grad_norm": 0.23190903663635254, "learning_rate": 2.4690274242408617e-05, "loss": 0.5928, "step": 606 }, { "epoch": 0.16833056017748196, "grad_norm": 0.20941099524497986, "learning_rate": 2.4689051507357218e-05, "loss": 0.6001, "step": 607 }, { "epoch": 0.16860787576261785, "grad_norm": 0.20067985355854034, "learning_rate": 2.468782639389893e-05, "loss": 0.572, "step": 608 }, { "epoch": 0.16888519134775373, "grad_norm": 0.20099548995494843, "learning_rate": 2.4686598902272793e-05, "loss": 0.5603, "step": 609 }, { "epoch": 0.16916250693288962, "grad_norm": 0.20298384130001068, "learning_rate": 2.4685369032718343e-05, "loss": 0.5657, "step": 610 }, { "epoch": 0.1694398225180255, "grad_norm": 0.19231431186199188, "learning_rate": 2.4684136785475544e-05, "loss": 0.5628, "step": 611 }, { "epoch": 0.16971713810316139, "grad_norm": 0.20296776294708252, "learning_rate": 2.468290216078485e-05, "loss": 0.6011, "step": 612 }, { "epoch": 0.16999445368829727, "grad_norm": 0.19989420473575592, "learning_rate": 2.468166515888716e-05, "loss": 0.5876, "step": 613 }, { "epoch": 0.17027176927343315, "grad_norm": 0.19636170566082, "learning_rate": 2.4680425780023852e-05, "loss": 0.5852, "step": 614 }, { "epoch": 0.17054908485856904, "grad_norm": 0.21776345372200012, "learning_rate": 2.4679184024436757e-05, "loss": 0.5988, "step": 615 }, { "epoch": 0.17082640044370492, "grad_norm": 0.2017216682434082, "learning_rate": 2.4677939892368183e-05, "loss": 0.6135, "step": 616 }, { "epoch": 0.1711037160288408, "grad_norm": 0.218113973736763, "learning_rate": 2.4676693384060884e-05, "loss": 0.5727, "step": 617 }, { "epoch": 0.1713810316139767, "grad_norm": 0.2070799022912979, "learning_rate": 2.4675444499758093e-05, "loss": 0.6229, "step": 618 }, { "epoch": 0.17165834719911258, "grad_norm": 0.211971253156662, "learning_rate": 2.4674193239703496e-05, "loss": 0.5909, "step": 619 }, { "epoch": 0.1719356627842485, "grad_norm": 0.21103401482105255, "learning_rate": 2.4672939604141248e-05, "loss": 0.5805, "step": 620 }, { "epoch": 0.17221297836938437, "grad_norm": 0.22362381219863892, "learning_rate": 2.467168359331597e-05, "loss": 0.6006, "step": 621 }, { "epoch": 0.17249029395452026, "grad_norm": 0.1989423632621765, "learning_rate": 2.4670425207472737e-05, "loss": 0.5895, "step": 622 }, { "epoch": 0.17276760953965614, "grad_norm": 0.20032650232315063, "learning_rate": 2.46691644468571e-05, "loss": 0.5856, "step": 623 }, { "epoch": 0.17304492512479203, "grad_norm": 0.21420548856258392, "learning_rate": 2.466790131171506e-05, "loss": 0.5835, "step": 624 }, { "epoch": 0.1733222407099279, "grad_norm": 0.2181633710861206, "learning_rate": 2.466663580229309e-05, "loss": 0.5608, "step": 625 }, { "epoch": 0.1735995562950638, "grad_norm": 0.19224753975868225, "learning_rate": 2.4665367918838135e-05, "loss": 0.5826, "step": 626 }, { "epoch": 0.17387687188019968, "grad_norm": 0.20331954956054688, "learning_rate": 2.4664097661597576e-05, "loss": 0.5948, "step": 627 }, { "epoch": 0.17415418746533556, "grad_norm": 0.20249582827091217, "learning_rate": 2.4662825030819282e-05, "loss": 0.5894, "step": 628 }, { "epoch": 0.17443150305047145, "grad_norm": 0.19642974436283112, "learning_rate": 2.466155002675158e-05, "loss": 0.5938, "step": 629 }, { "epoch": 0.17470881863560733, "grad_norm": 0.20837126672267914, "learning_rate": 2.466027264964325e-05, "loss": 0.589, "step": 630 }, { "epoch": 0.17498613422074322, "grad_norm": 0.1986762434244156, "learning_rate": 2.465899289974355e-05, "loss": 0.5677, "step": 631 }, { "epoch": 0.1752634498058791, "grad_norm": 0.47144341468811035, "learning_rate": 2.4657710777302183e-05, "loss": 0.6075, "step": 632 }, { "epoch": 0.17554076539101499, "grad_norm": 0.2005637288093567, "learning_rate": 2.465642628256934e-05, "loss": 0.5863, "step": 633 }, { "epoch": 0.17581808097615087, "grad_norm": 0.2160159796476364, "learning_rate": 2.465513941579564e-05, "loss": 0.5661, "step": 634 }, { "epoch": 0.17609539656128675, "grad_norm": 0.22309495508670807, "learning_rate": 2.4653850177232203e-05, "loss": 0.6029, "step": 635 }, { "epoch": 0.17637271214642264, "grad_norm": 0.20880426466464996, "learning_rate": 2.4652558567130585e-05, "loss": 0.6039, "step": 636 }, { "epoch": 0.17665002773155852, "grad_norm": 0.20371706783771515, "learning_rate": 2.4651264585742813e-05, "loss": 0.5974, "step": 637 }, { "epoch": 0.1769273433166944, "grad_norm": 0.19604718685150146, "learning_rate": 2.464996823332138e-05, "loss": 0.6012, "step": 638 }, { "epoch": 0.1772046589018303, "grad_norm": 0.2000109851360321, "learning_rate": 2.4648669510119235e-05, "loss": 0.6038, "step": 639 }, { "epoch": 0.17748197448696618, "grad_norm": 0.19588269293308258, "learning_rate": 2.46473684163898e-05, "loss": 0.5949, "step": 640 }, { "epoch": 0.17775929007210206, "grad_norm": 0.2040427029132843, "learning_rate": 2.4646064952386945e-05, "loss": 0.5616, "step": 641 }, { "epoch": 0.17803660565723795, "grad_norm": 0.2059299200773239, "learning_rate": 2.4644759118365014e-05, "loss": 0.5785, "step": 642 }, { "epoch": 0.17831392124237383, "grad_norm": 0.1959904134273529, "learning_rate": 2.464345091457881e-05, "loss": 0.5691, "step": 643 }, { "epoch": 0.17859123682750971, "grad_norm": 0.20045484602451324, "learning_rate": 2.46421403412836e-05, "loss": 0.5831, "step": 644 }, { "epoch": 0.1788685524126456, "grad_norm": 0.18542559444904327, "learning_rate": 2.4640827398735105e-05, "loss": 0.5666, "step": 645 }, { "epoch": 0.17914586799778148, "grad_norm": 0.29157590866088867, "learning_rate": 2.463951208718952e-05, "loss": 0.5841, "step": 646 }, { "epoch": 0.17942318358291737, "grad_norm": 0.20582380890846252, "learning_rate": 2.46381944069035e-05, "loss": 0.618, "step": 647 }, { "epoch": 0.17970049916805325, "grad_norm": 0.20922648906707764, "learning_rate": 2.4636874358134153e-05, "loss": 0.5831, "step": 648 }, { "epoch": 0.17997781475318914, "grad_norm": 0.19867978990077972, "learning_rate": 2.463555194113906e-05, "loss": 0.5734, "step": 649 }, { "epoch": 0.18025513033832502, "grad_norm": 0.19810400903224945, "learning_rate": 2.463422715617626e-05, "loss": 0.6086, "step": 650 }, { "epoch": 0.1805324459234609, "grad_norm": 0.22697949409484863, "learning_rate": 2.4632900003504246e-05, "loss": 0.5942, "step": 651 }, { "epoch": 0.1808097615085968, "grad_norm": 0.21418651938438416, "learning_rate": 2.4631570483381992e-05, "loss": 0.5793, "step": 652 }, { "epoch": 0.18108707709373267, "grad_norm": 0.22276844084262848, "learning_rate": 2.4630238596068914e-05, "loss": 0.5998, "step": 653 }, { "epoch": 0.18136439267886856, "grad_norm": 0.237818643450737, "learning_rate": 2.4628904341824898e-05, "loss": 0.5787, "step": 654 }, { "epoch": 0.18164170826400444, "grad_norm": 0.20757392048835754, "learning_rate": 2.46275677209103e-05, "loss": 0.5603, "step": 655 }, { "epoch": 0.18191902384914033, "grad_norm": 0.20871873199939728, "learning_rate": 2.4626228733585926e-05, "loss": 0.5689, "step": 656 }, { "epoch": 0.1821963394342762, "grad_norm": 0.2344467043876648, "learning_rate": 2.4624887380113048e-05, "loss": 0.5887, "step": 657 }, { "epoch": 0.1824736550194121, "grad_norm": 0.20889438688755035, "learning_rate": 2.4623543660753397e-05, "loss": 0.5699, "step": 658 }, { "epoch": 0.18275097060454798, "grad_norm": 0.23723891377449036, "learning_rate": 2.4622197575769173e-05, "loss": 0.5691, "step": 659 }, { "epoch": 0.18302828618968386, "grad_norm": 0.20685730874538422, "learning_rate": 2.462084912542303e-05, "loss": 0.585, "step": 660 }, { "epoch": 0.18330560177481975, "grad_norm": 0.18916456401348114, "learning_rate": 2.4619498309978085e-05, "loss": 0.5785, "step": 661 }, { "epoch": 0.18358291735995563, "grad_norm": 0.19421158730983734, "learning_rate": 2.4618145129697916e-05, "loss": 0.5742, "step": 662 }, { "epoch": 0.18386023294509152, "grad_norm": 0.19799606502056122, "learning_rate": 2.4616789584846575e-05, "loss": 0.5642, "step": 663 }, { "epoch": 0.1841375485302274, "grad_norm": 0.20754088461399078, "learning_rate": 2.4615431675688556e-05, "loss": 0.5793, "step": 664 }, { "epoch": 0.1844148641153633, "grad_norm": 0.20479615032672882, "learning_rate": 2.4614071402488822e-05, "loss": 0.6009, "step": 665 }, { "epoch": 0.18469217970049917, "grad_norm": 0.20695596933364868, "learning_rate": 2.4612708765512803e-05, "loss": 0.607, "step": 666 }, { "epoch": 0.18496949528563505, "grad_norm": 0.21166419982910156, "learning_rate": 2.4611343765026385e-05, "loss": 0.5889, "step": 667 }, { "epoch": 0.18524681087077094, "grad_norm": 0.19035880267620087, "learning_rate": 2.4609976401295914e-05, "loss": 0.5596, "step": 668 }, { "epoch": 0.18552412645590682, "grad_norm": 0.2189519852399826, "learning_rate": 2.4608606674588196e-05, "loss": 0.595, "step": 669 }, { "epoch": 0.1858014420410427, "grad_norm": 0.20535226166248322, "learning_rate": 2.4607234585170506e-05, "loss": 0.5785, "step": 670 }, { "epoch": 0.1860787576261786, "grad_norm": 0.20723526179790497, "learning_rate": 2.4605860133310577e-05, "loss": 0.6205, "step": 671 }, { "epoch": 0.18635607321131448, "grad_norm": 0.22765034437179565, "learning_rate": 2.4604483319276596e-05, "loss": 0.5739, "step": 672 }, { "epoch": 0.18663338879645036, "grad_norm": 0.19783975183963776, "learning_rate": 2.4603104143337212e-05, "loss": 0.6001, "step": 673 }, { "epoch": 0.18691070438158625, "grad_norm": 0.21098697185516357, "learning_rate": 2.4601722605761547e-05, "loss": 0.5636, "step": 674 }, { "epoch": 0.18718801996672213, "grad_norm": 0.20571714639663696, "learning_rate": 2.4600338706819175e-05, "loss": 0.6031, "step": 675 }, { "epoch": 0.18746533555185801, "grad_norm": 0.2101418673992157, "learning_rate": 2.4598952446780127e-05, "loss": 0.5854, "step": 676 }, { "epoch": 0.1877426511369939, "grad_norm": 0.20562447607517242, "learning_rate": 2.45975638259149e-05, "loss": 0.6007, "step": 677 }, { "epoch": 0.18801996672212978, "grad_norm": 0.20215946435928345, "learning_rate": 2.4596172844494454e-05, "loss": 0.601, "step": 678 }, { "epoch": 0.18829728230726567, "grad_norm": 0.2149016410112381, "learning_rate": 2.45947795027902e-05, "loss": 0.5965, "step": 679 }, { "epoch": 0.18857459789240155, "grad_norm": 0.1985412836074829, "learning_rate": 2.4593383801074025e-05, "loss": 0.563, "step": 680 }, { "epoch": 0.18885191347753744, "grad_norm": 0.20387370884418488, "learning_rate": 2.459198573961826e-05, "loss": 0.5816, "step": 681 }, { "epoch": 0.18912922906267332, "grad_norm": 0.19687046110630035, "learning_rate": 2.4590585318695703e-05, "loss": 0.5761, "step": 682 }, { "epoch": 0.1894065446478092, "grad_norm": 0.2053958773612976, "learning_rate": 2.458918253857962e-05, "loss": 0.5931, "step": 683 }, { "epoch": 0.1896838602329451, "grad_norm": 0.20455330610275269, "learning_rate": 2.4587777399543726e-05, "loss": 0.5739, "step": 684 }, { "epoch": 0.18996117581808097, "grad_norm": 0.2084437757730484, "learning_rate": 2.4586369901862204e-05, "loss": 0.5659, "step": 685 }, { "epoch": 0.19023849140321686, "grad_norm": 0.20842646062374115, "learning_rate": 2.4584960045809686e-05, "loss": 0.5863, "step": 686 }, { "epoch": 0.19051580698835274, "grad_norm": 0.19156675040721893, "learning_rate": 2.4583547831661283e-05, "loss": 0.5738, "step": 687 }, { "epoch": 0.19079312257348863, "grad_norm": 0.19893507659435272, "learning_rate": 2.4582133259692546e-05, "loss": 0.5739, "step": 688 }, { "epoch": 0.1910704381586245, "grad_norm": 0.20070527493953705, "learning_rate": 2.4580716330179505e-05, "loss": 0.5703, "step": 689 }, { "epoch": 0.1913477537437604, "grad_norm": 0.20454630255699158, "learning_rate": 2.4579297043398636e-05, "loss": 0.5735, "step": 690 }, { "epoch": 0.19162506932889628, "grad_norm": 0.21206796169281006, "learning_rate": 2.4577875399626877e-05, "loss": 0.5852, "step": 691 }, { "epoch": 0.19190238491403216, "grad_norm": 0.20151346921920776, "learning_rate": 2.4576451399141627e-05, "loss": 0.6033, "step": 692 }, { "epoch": 0.19217970049916805, "grad_norm": 0.1981053203344345, "learning_rate": 2.457502504222075e-05, "loss": 0.5874, "step": 693 }, { "epoch": 0.19245701608430393, "grad_norm": 0.197993203997612, "learning_rate": 2.457359632914257e-05, "loss": 0.5918, "step": 694 }, { "epoch": 0.19273433166943982, "grad_norm": 0.2022867351770401, "learning_rate": 2.4572165260185857e-05, "loss": 0.5786, "step": 695 }, { "epoch": 0.1930116472545757, "grad_norm": 0.20423941314220428, "learning_rate": 2.457073183562986e-05, "loss": 0.5781, "step": 696 }, { "epoch": 0.1932889628397116, "grad_norm": 0.2390977442264557, "learning_rate": 2.4569296055754275e-05, "loss": 0.5727, "step": 697 }, { "epoch": 0.19356627842484747, "grad_norm": 0.2169281542301178, "learning_rate": 2.4567857920839256e-05, "loss": 0.5638, "step": 698 }, { "epoch": 0.19384359400998336, "grad_norm": 0.18818534910678864, "learning_rate": 2.4566417431165427e-05, "loss": 0.5722, "step": 699 }, { "epoch": 0.19412090959511924, "grad_norm": 0.1905328780412674, "learning_rate": 2.456497458701386e-05, "loss": 0.5632, "step": 700 }, { "epoch": 0.19439822518025512, "grad_norm": 0.19498996436595917, "learning_rate": 2.45635293886661e-05, "loss": 0.5953, "step": 701 }, { "epoch": 0.194675540765391, "grad_norm": 0.19959990680217743, "learning_rate": 2.456208183640414e-05, "loss": 0.583, "step": 702 }, { "epoch": 0.1949528563505269, "grad_norm": 0.2034797966480255, "learning_rate": 2.456063193051043e-05, "loss": 0.5883, "step": 703 }, { "epoch": 0.19523017193566278, "grad_norm": 0.20221780240535736, "learning_rate": 2.455917967126789e-05, "loss": 0.5928, "step": 704 }, { "epoch": 0.19550748752079866, "grad_norm": 0.21273115277290344, "learning_rate": 2.4557725058959895e-05, "loss": 0.5719, "step": 705 }, { "epoch": 0.19578480310593455, "grad_norm": 0.1873910129070282, "learning_rate": 2.455626809387028e-05, "loss": 0.5642, "step": 706 }, { "epoch": 0.19606211869107043, "grad_norm": 0.19268092513084412, "learning_rate": 2.4554808776283334e-05, "loss": 0.5555, "step": 707 }, { "epoch": 0.19633943427620631, "grad_norm": 0.20540937781333923, "learning_rate": 2.4553347106483808e-05, "loss": 0.6076, "step": 708 }, { "epoch": 0.1966167498613422, "grad_norm": 0.19650278985500336, "learning_rate": 2.4551883084756917e-05, "loss": 0.5866, "step": 709 }, { "epoch": 0.19689406544647808, "grad_norm": 0.221206396818161, "learning_rate": 2.4550416711388327e-05, "loss": 0.581, "step": 710 }, { "epoch": 0.19717138103161397, "grad_norm": 0.19788506627082825, "learning_rate": 2.4548947986664167e-05, "loss": 0.5667, "step": 711 }, { "epoch": 0.19744869661674985, "grad_norm": 0.22713138163089752, "learning_rate": 2.454747691087102e-05, "loss": 0.5732, "step": 712 }, { "epoch": 0.19772601220188574, "grad_norm": 0.19035859405994415, "learning_rate": 2.454600348429594e-05, "loss": 0.6072, "step": 713 }, { "epoch": 0.19800332778702162, "grad_norm": 0.19724468886852264, "learning_rate": 2.4544527707226428e-05, "loss": 0.5958, "step": 714 }, { "epoch": 0.1982806433721575, "grad_norm": 0.20074382424354553, "learning_rate": 2.4543049579950445e-05, "loss": 0.6006, "step": 715 }, { "epoch": 0.1985579589572934, "grad_norm": 0.19603832066059113, "learning_rate": 2.4541569102756414e-05, "loss": 0.5901, "step": 716 }, { "epoch": 0.19883527454242927, "grad_norm": 0.1956927478313446, "learning_rate": 2.4540086275933215e-05, "loss": 0.5731, "step": 717 }, { "epoch": 0.19911259012756516, "grad_norm": 0.19697944819927216, "learning_rate": 2.4538601099770187e-05, "loss": 0.5778, "step": 718 }, { "epoch": 0.19938990571270104, "grad_norm": 0.19818982481956482, "learning_rate": 2.453711357455713e-05, "loss": 0.5588, "step": 719 }, { "epoch": 0.19966722129783693, "grad_norm": 0.18997104465961456, "learning_rate": 2.4535623700584297e-05, "loss": 0.5789, "step": 720 }, { "epoch": 0.1999445368829728, "grad_norm": 0.21167294681072235, "learning_rate": 2.4534131478142402e-05, "loss": 0.5804, "step": 721 }, { "epoch": 0.2002218524681087, "grad_norm": 0.21374750137329102, "learning_rate": 2.4532636907522617e-05, "loss": 0.5727, "step": 722 }, { "epoch": 0.20049916805324458, "grad_norm": 0.20970353484153748, "learning_rate": 2.453113998901657e-05, "loss": 0.5766, "step": 723 }, { "epoch": 0.20077648363838047, "grad_norm": 0.19419021904468536, "learning_rate": 2.4529640722916355e-05, "loss": 0.5755, "step": 724 }, { "epoch": 0.20105379922351635, "grad_norm": 0.20143002271652222, "learning_rate": 2.4528139109514513e-05, "loss": 0.5627, "step": 725 }, { "epoch": 0.20133111480865223, "grad_norm": 0.20216137170791626, "learning_rate": 2.4526635149104056e-05, "loss": 0.5771, "step": 726 }, { "epoch": 0.20160843039378812, "grad_norm": 0.20338623225688934, "learning_rate": 2.452512884197844e-05, "loss": 0.605, "step": 727 }, { "epoch": 0.201885745978924, "grad_norm": 0.20078277587890625, "learning_rate": 2.4523620188431585e-05, "loss": 0.5954, "step": 728 }, { "epoch": 0.2021630615640599, "grad_norm": 0.2046244889497757, "learning_rate": 2.4522109188757875e-05, "loss": 0.5945, "step": 729 }, { "epoch": 0.20244037714919577, "grad_norm": 0.20462092757225037, "learning_rate": 2.4520595843252138e-05, "loss": 0.5762, "step": 730 }, { "epoch": 0.20271769273433166, "grad_norm": 0.20749370753765106, "learning_rate": 2.4519080152209675e-05, "loss": 0.5435, "step": 731 }, { "epoch": 0.20299500831946754, "grad_norm": 0.21734094619750977, "learning_rate": 2.4517562115926233e-05, "loss": 0.5961, "step": 732 }, { "epoch": 0.20327232390460345, "grad_norm": 0.20754100382328033, "learning_rate": 2.4516041734698024e-05, "loss": 0.5548, "step": 733 }, { "epoch": 0.20354963948973934, "grad_norm": 0.20619900524616241, "learning_rate": 2.451451900882172e-05, "loss": 0.5897, "step": 734 }, { "epoch": 0.20382695507487522, "grad_norm": 0.28535279631614685, "learning_rate": 2.451299393859443e-05, "loss": 0.5815, "step": 735 }, { "epoch": 0.2041042706600111, "grad_norm": 0.19404634833335876, "learning_rate": 2.4511466524313748e-05, "loss": 0.5743, "step": 736 }, { "epoch": 0.204381586245147, "grad_norm": 0.18895412981510162, "learning_rate": 2.4509936766277706e-05, "loss": 0.5876, "step": 737 }, { "epoch": 0.20465890183028287, "grad_norm": 0.22120583057403564, "learning_rate": 2.4508404664784808e-05, "loss": 0.5873, "step": 738 }, { "epoch": 0.20493621741541876, "grad_norm": 0.21361954510211945, "learning_rate": 2.4506870220134e-05, "loss": 0.6002, "step": 739 }, { "epoch": 0.20521353300055464, "grad_norm": 0.19417433440685272, "learning_rate": 2.4505333432624694e-05, "loss": 0.5673, "step": 740 }, { "epoch": 0.20549084858569053, "grad_norm": 0.19543422758579254, "learning_rate": 2.4503794302556765e-05, "loss": 0.5628, "step": 741 }, { "epoch": 0.2057681641708264, "grad_norm": 0.1954490691423416, "learning_rate": 2.450225283023053e-05, "loss": 0.6246, "step": 742 }, { "epoch": 0.2060454797559623, "grad_norm": 0.1868993192911148, "learning_rate": 2.4500709015946776e-05, "loss": 0.5858, "step": 743 }, { "epoch": 0.20632279534109818, "grad_norm": 0.2035941481590271, "learning_rate": 2.449916286000674e-05, "loss": 0.5974, "step": 744 }, { "epoch": 0.20660011092623407, "grad_norm": 0.1918855458498001, "learning_rate": 2.4497614362712118e-05, "loss": 0.5843, "step": 745 }, { "epoch": 0.20687742651136995, "grad_norm": 0.19244706630706787, "learning_rate": 2.4496063524365063e-05, "loss": 0.5535, "step": 746 }, { "epoch": 0.20715474209650583, "grad_norm": 0.20424753427505493, "learning_rate": 2.4494510345268185e-05, "loss": 0.5835, "step": 747 }, { "epoch": 0.20743205768164172, "grad_norm": 0.19604821503162384, "learning_rate": 2.4492954825724544e-05, "loss": 0.5701, "step": 748 }, { "epoch": 0.2077093732667776, "grad_norm": 0.19546863436698914, "learning_rate": 2.4491396966037678e-05, "loss": 0.5898, "step": 749 }, { "epoch": 0.2079866888519135, "grad_norm": 0.19861635565757751, "learning_rate": 2.4489836766511555e-05, "loss": 0.587, "step": 750 }, { "epoch": 0.20826400443704937, "grad_norm": 0.19369752705097198, "learning_rate": 2.4488274227450613e-05, "loss": 0.6027, "step": 751 }, { "epoch": 0.20854132002218526, "grad_norm": 0.4158318340778351, "learning_rate": 2.448670934915975e-05, "loss": 0.602, "step": 752 }, { "epoch": 0.20881863560732114, "grad_norm": 0.20547251403331757, "learning_rate": 2.4485142131944306e-05, "loss": 0.5949, "step": 753 }, { "epoch": 0.20909595119245702, "grad_norm": 0.21317002177238464, "learning_rate": 2.4483572576110093e-05, "loss": 0.5862, "step": 754 }, { "epoch": 0.2093732667775929, "grad_norm": 0.19712896645069122, "learning_rate": 2.448200068196337e-05, "loss": 0.5983, "step": 755 }, { "epoch": 0.2096505823627288, "grad_norm": 0.2054811269044876, "learning_rate": 2.448042644981086e-05, "loss": 0.5983, "step": 756 }, { "epoch": 0.20992789794786468, "grad_norm": 0.19795221090316772, "learning_rate": 2.447884987995973e-05, "loss": 0.6208, "step": 757 }, { "epoch": 0.21020521353300056, "grad_norm": 0.21395504474639893, "learning_rate": 2.447727097271762e-05, "loss": 0.5983, "step": 758 }, { "epoch": 0.21048252911813645, "grad_norm": 0.19311439990997314, "learning_rate": 2.447568972839261e-05, "loss": 0.592, "step": 759 }, { "epoch": 0.21075984470327233, "grad_norm": 0.21382609009742737, "learning_rate": 2.4474106147293242e-05, "loss": 0.5752, "step": 760 }, { "epoch": 0.21103716028840822, "grad_norm": 0.19354097545146942, "learning_rate": 2.447252022972852e-05, "loss": 0.5911, "step": 761 }, { "epoch": 0.2113144758735441, "grad_norm": 0.19984754920005798, "learning_rate": 2.4470931976007894e-05, "loss": 0.5995, "step": 762 }, { "epoch": 0.21159179145867998, "grad_norm": 0.20404407382011414, "learning_rate": 2.4469341386441274e-05, "loss": 0.5551, "step": 763 }, { "epoch": 0.21186910704381587, "grad_norm": 0.2025006264448166, "learning_rate": 2.446774846133903e-05, "loss": 0.6105, "step": 764 }, { "epoch": 0.21214642262895175, "grad_norm": 0.20010975003242493, "learning_rate": 2.446615320101198e-05, "loss": 0.5788, "step": 765 }, { "epoch": 0.21242373821408764, "grad_norm": 0.20225434005260468, "learning_rate": 2.4464555605771404e-05, "loss": 0.5636, "step": 766 }, { "epoch": 0.21270105379922352, "grad_norm": 0.19845524430274963, "learning_rate": 2.4462955675929032e-05, "loss": 0.5758, "step": 767 }, { "epoch": 0.2129783693843594, "grad_norm": 0.19598202407360077, "learning_rate": 2.446135341179706e-05, "loss": 0.5456, "step": 768 }, { "epoch": 0.2132556849694953, "grad_norm": 0.2050497829914093, "learning_rate": 2.445974881368812e-05, "loss": 0.5912, "step": 769 }, { "epoch": 0.21353300055463117, "grad_norm": 0.19924525916576385, "learning_rate": 2.4458141881915324e-05, "loss": 0.5479, "step": 770 }, { "epoch": 0.21381031613976706, "grad_norm": 0.20329277217388153, "learning_rate": 2.445653261679222e-05, "loss": 0.6006, "step": 771 }, { "epoch": 0.21408763172490294, "grad_norm": 0.19327110052108765, "learning_rate": 2.4454921018632827e-05, "loss": 0.5739, "step": 772 }, { "epoch": 0.21436494731003883, "grad_norm": 0.19316452741622925, "learning_rate": 2.4453307087751594e-05, "loss": 0.5953, "step": 773 }, { "epoch": 0.2146422628951747, "grad_norm": 0.19617030024528503, "learning_rate": 2.4451690824463457e-05, "loss": 0.5686, "step": 774 }, { "epoch": 0.2149195784803106, "grad_norm": 0.20528316497802734, "learning_rate": 2.4450072229083786e-05, "loss": 0.5691, "step": 775 }, { "epoch": 0.21519689406544648, "grad_norm": 0.20544420182704926, "learning_rate": 2.4448451301928408e-05, "loss": 0.5776, "step": 776 }, { "epoch": 0.21547420965058237, "grad_norm": 0.21979959309101105, "learning_rate": 2.4446828043313614e-05, "loss": 0.5947, "step": 777 }, { "epoch": 0.21575152523571825, "grad_norm": 0.2081802487373352, "learning_rate": 2.4445202453556145e-05, "loss": 0.5752, "step": 778 }, { "epoch": 0.21602884082085413, "grad_norm": 0.2012367993593216, "learning_rate": 2.4443574532973195e-05, "loss": 0.5671, "step": 779 }, { "epoch": 0.21630615640599002, "grad_norm": 0.1998508721590042, "learning_rate": 2.4441944281882415e-05, "loss": 0.6154, "step": 780 }, { "epoch": 0.2165834719911259, "grad_norm": 0.20325055718421936, "learning_rate": 2.444031170060191e-05, "loss": 0.5743, "step": 781 }, { "epoch": 0.2168607875762618, "grad_norm": 0.20255804061889648, "learning_rate": 2.443867678945024e-05, "loss": 0.5748, "step": 782 }, { "epoch": 0.21713810316139767, "grad_norm": 0.1919908970594406, "learning_rate": 2.4437039548746415e-05, "loss": 0.5709, "step": 783 }, { "epoch": 0.21741541874653356, "grad_norm": 0.20014292001724243, "learning_rate": 2.443539997880991e-05, "loss": 0.5636, "step": 784 }, { "epoch": 0.21769273433166944, "grad_norm": 0.19818776845932007, "learning_rate": 2.4433758079960647e-05, "loss": 0.5649, "step": 785 }, { "epoch": 0.21797004991680533, "grad_norm": 0.18718703091144562, "learning_rate": 2.4432113852519005e-05, "loss": 0.5917, "step": 786 }, { "epoch": 0.2182473655019412, "grad_norm": 0.32280299067497253, "learning_rate": 2.4430467296805816e-05, "loss": 0.5864, "step": 787 }, { "epoch": 0.2185246810870771, "grad_norm": 0.20851466059684753, "learning_rate": 2.442881841314236e-05, "loss": 0.5837, "step": 788 }, { "epoch": 0.21880199667221298, "grad_norm": 0.1917923539876938, "learning_rate": 2.442716720185039e-05, "loss": 0.6032, "step": 789 }, { "epoch": 0.21907931225734886, "grad_norm": 0.25185203552246094, "learning_rate": 2.442551366325209e-05, "loss": 0.5873, "step": 790 }, { "epoch": 0.21935662784248475, "grad_norm": 0.1962638646364212, "learning_rate": 2.4423857797670118e-05, "loss": 0.577, "step": 791 }, { "epoch": 0.21963394342762063, "grad_norm": 0.19475746154785156, "learning_rate": 2.4422199605427572e-05, "loss": 0.5677, "step": 792 }, { "epoch": 0.21991125901275652, "grad_norm": 0.517663836479187, "learning_rate": 2.4420539086848007e-05, "loss": 0.5718, "step": 793 }, { "epoch": 0.2201885745978924, "grad_norm": 0.1952415555715561, "learning_rate": 2.441887624225544e-05, "loss": 0.5801, "step": 794 }, { "epoch": 0.22046589018302828, "grad_norm": 0.20376408100128174, "learning_rate": 2.441721107197433e-05, "loss": 0.6038, "step": 795 }, { "epoch": 0.22074320576816417, "grad_norm": 0.21492497622966766, "learning_rate": 2.4415543576329604e-05, "loss": 0.5626, "step": 796 }, { "epoch": 0.22102052135330005, "grad_norm": 0.21810825169086456, "learning_rate": 2.4413873755646627e-05, "loss": 0.566, "step": 797 }, { "epoch": 0.22129783693843594, "grad_norm": 0.2002691626548767, "learning_rate": 2.4412201610251232e-05, "loss": 0.5706, "step": 798 }, { "epoch": 0.22157515252357182, "grad_norm": 0.24929803609848022, "learning_rate": 2.441052714046969e-05, "loss": 0.5878, "step": 799 }, { "epoch": 0.2218524681087077, "grad_norm": 0.20125854015350342, "learning_rate": 2.440885034662874e-05, "loss": 0.5869, "step": 800 }, { "epoch": 0.2221297836938436, "grad_norm": 0.19206437468528748, "learning_rate": 2.4407171229055574e-05, "loss": 0.5911, "step": 801 }, { "epoch": 0.22240709927897948, "grad_norm": 0.22323836386203766, "learning_rate": 2.4405489788077823e-05, "loss": 0.5725, "step": 802 }, { "epoch": 0.22268441486411536, "grad_norm": 0.2044333517551422, "learning_rate": 2.4403806024023584e-05, "loss": 0.6243, "step": 803 }, { "epoch": 0.22296173044925124, "grad_norm": 0.20750725269317627, "learning_rate": 2.44021199372214e-05, "loss": 0.6002, "step": 804 }, { "epoch": 0.22323904603438713, "grad_norm": 0.2218470573425293, "learning_rate": 2.4400431528000284e-05, "loss": 0.5886, "step": 805 }, { "epoch": 0.223516361619523, "grad_norm": 0.20135878026485443, "learning_rate": 2.4398740796689676e-05, "loss": 0.5711, "step": 806 }, { "epoch": 0.2237936772046589, "grad_norm": 0.19776999950408936, "learning_rate": 2.439704774361949e-05, "loss": 0.5706, "step": 807 }, { "epoch": 0.22407099278979478, "grad_norm": 0.22162839770317078, "learning_rate": 2.4395352369120078e-05, "loss": 0.593, "step": 808 }, { "epoch": 0.22434830837493067, "grad_norm": 0.1974382847547531, "learning_rate": 2.4393654673522264e-05, "loss": 0.5657, "step": 809 }, { "epoch": 0.22462562396006655, "grad_norm": 0.2362552434206009, "learning_rate": 2.4391954657157302e-05, "loss": 0.5844, "step": 810 }, { "epoch": 0.22490293954520243, "grad_norm": 0.2056739628314972, "learning_rate": 2.4390252320356915e-05, "loss": 0.5959, "step": 811 }, { "epoch": 0.22518025513033832, "grad_norm": 0.19037006795406342, "learning_rate": 2.4388547663453275e-05, "loss": 0.5638, "step": 812 }, { "epoch": 0.2254575707154742, "grad_norm": 0.21129223704338074, "learning_rate": 2.4386840686779004e-05, "loss": 0.5954, "step": 813 }, { "epoch": 0.2257348863006101, "grad_norm": 0.2718643546104431, "learning_rate": 2.4385131390667184e-05, "loss": 0.5881, "step": 814 }, { "epoch": 0.22601220188574597, "grad_norm": 0.2181466519832611, "learning_rate": 2.4383419775451334e-05, "loss": 0.5858, "step": 815 }, { "epoch": 0.22628951747088186, "grad_norm": 0.22737659513950348, "learning_rate": 2.438170584146544e-05, "loss": 0.5732, "step": 816 }, { "epoch": 0.22656683305601774, "grad_norm": 0.20686711370944977, "learning_rate": 2.437998958904394e-05, "loss": 0.5524, "step": 817 }, { "epoch": 0.22684414864115363, "grad_norm": 0.2140977680683136, "learning_rate": 2.4378271018521714e-05, "loss": 0.5628, "step": 818 }, { "epoch": 0.2271214642262895, "grad_norm": 0.19793452322483063, "learning_rate": 2.4376550130234104e-05, "loss": 0.5796, "step": 819 }, { "epoch": 0.2273987798114254, "grad_norm": 0.20289914309978485, "learning_rate": 2.4374826924516903e-05, "loss": 0.5848, "step": 820 }, { "epoch": 0.22767609539656128, "grad_norm": 0.19081373512744904, "learning_rate": 2.437310140170635e-05, "loss": 0.6011, "step": 821 }, { "epoch": 0.22795341098169716, "grad_norm": 0.20546457171440125, "learning_rate": 2.437137356213914e-05, "loss": 0.5476, "step": 822 }, { "epoch": 0.22823072656683305, "grad_norm": 0.22531366348266602, "learning_rate": 2.4369643406152422e-05, "loss": 0.577, "step": 823 }, { "epoch": 0.22850804215196893, "grad_norm": 0.1964918076992035, "learning_rate": 2.4367910934083795e-05, "loss": 0.5733, "step": 824 }, { "epoch": 0.22878535773710482, "grad_norm": 0.1976742297410965, "learning_rate": 2.4366176146271313e-05, "loss": 0.576, "step": 825 }, { "epoch": 0.2290626733222407, "grad_norm": 0.20134706795215607, "learning_rate": 2.4364439043053475e-05, "loss": 0.5715, "step": 826 }, { "epoch": 0.22933998890737659, "grad_norm": 0.28538307547569275, "learning_rate": 2.4362699624769236e-05, "loss": 0.6009, "step": 827 }, { "epoch": 0.22961730449251247, "grad_norm": 0.21845568716526031, "learning_rate": 2.4360957891758006e-05, "loss": 0.5936, "step": 828 }, { "epoch": 0.22989462007764835, "grad_norm": 0.1977756768465042, "learning_rate": 2.435921384435964e-05, "loss": 0.5651, "step": 829 }, { "epoch": 0.23017193566278424, "grad_norm": 0.20483648777008057, "learning_rate": 2.4357467482914447e-05, "loss": 0.5861, "step": 830 }, { "epoch": 0.23044925124792012, "grad_norm": 0.191145658493042, "learning_rate": 2.4355718807763196e-05, "loss": 0.5951, "step": 831 }, { "epoch": 0.230726566833056, "grad_norm": 0.20611602067947388, "learning_rate": 2.4353967819247093e-05, "loss": 0.5762, "step": 832 }, { "epoch": 0.2310038824181919, "grad_norm": 0.34163740277290344, "learning_rate": 2.43522145177078e-05, "loss": 0.5573, "step": 833 }, { "epoch": 0.23128119800332778, "grad_norm": 0.1989511400461197, "learning_rate": 2.4350458903487438e-05, "loss": 0.5799, "step": 834 }, { "epoch": 0.23155851358846366, "grad_norm": 0.1951713114976883, "learning_rate": 2.434870097692857e-05, "loss": 0.5763, "step": 835 }, { "epoch": 0.23183582917359954, "grad_norm": 0.19492702186107635, "learning_rate": 2.4346940738374217e-05, "loss": 0.5751, "step": 836 }, { "epoch": 0.23211314475873543, "grad_norm": 0.20524460077285767, "learning_rate": 2.434517818816785e-05, "loss": 0.5959, "step": 837 }, { "epoch": 0.2323904603438713, "grad_norm": 0.19833968579769135, "learning_rate": 2.4343413326653384e-05, "loss": 0.5881, "step": 838 }, { "epoch": 0.2326677759290072, "grad_norm": 0.19568949937820435, "learning_rate": 2.4341646154175192e-05, "loss": 0.5449, "step": 839 }, { "epoch": 0.23294509151414308, "grad_norm": 0.19156986474990845, "learning_rate": 2.43398766710781e-05, "loss": 0.5703, "step": 840 }, { "epoch": 0.23322240709927897, "grad_norm": 0.20440654456615448, "learning_rate": 2.4338104877707372e-05, "loss": 0.5783, "step": 841 }, { "epoch": 0.23349972268441485, "grad_norm": 0.20396758615970612, "learning_rate": 2.4336330774408744e-05, "loss": 0.5911, "step": 842 }, { "epoch": 0.23377703826955074, "grad_norm": 0.19637946784496307, "learning_rate": 2.4334554361528376e-05, "loss": 0.5708, "step": 843 }, { "epoch": 0.23405435385468662, "grad_norm": 0.21237438917160034, "learning_rate": 2.433277563941291e-05, "loss": 0.6104, "step": 844 }, { "epoch": 0.2343316694398225, "grad_norm": 0.18440033495426178, "learning_rate": 2.433099460840941e-05, "loss": 0.5745, "step": 845 }, { "epoch": 0.23460898502495842, "grad_norm": 0.19301645457744598, "learning_rate": 2.4329211268865406e-05, "loss": 0.5621, "step": 846 }, { "epoch": 0.2348863006100943, "grad_norm": 0.2056163102388382, "learning_rate": 2.4327425621128873e-05, "loss": 0.5973, "step": 847 }, { "epoch": 0.23516361619523019, "grad_norm": 0.20398737490177155, "learning_rate": 2.432563766554824e-05, "loss": 0.5795, "step": 848 }, { "epoch": 0.23544093178036607, "grad_norm": 0.19015717506408691, "learning_rate": 2.432384740247239e-05, "loss": 0.5563, "step": 849 }, { "epoch": 0.23571824736550195, "grad_norm": 0.19576147198677063, "learning_rate": 2.4322054832250636e-05, "loss": 0.5757, "step": 850 }, { "epoch": 0.23599556295063784, "grad_norm": 0.1978127360343933, "learning_rate": 2.4320259955232773e-05, "loss": 0.5832, "step": 851 }, { "epoch": 0.23627287853577372, "grad_norm": 0.19191214442253113, "learning_rate": 2.4318462771769012e-05, "loss": 0.5812, "step": 852 }, { "epoch": 0.2365501941209096, "grad_norm": 0.18717870116233826, "learning_rate": 2.4316663282210046e-05, "loss": 0.5753, "step": 853 }, { "epoch": 0.2368275097060455, "grad_norm": 0.18739578127861023, "learning_rate": 2.4314861486906996e-05, "loss": 0.5665, "step": 854 }, { "epoch": 0.23710482529118138, "grad_norm": 0.1928299367427826, "learning_rate": 2.431305738621144e-05, "loss": 0.5836, "step": 855 }, { "epoch": 0.23738214087631726, "grad_norm": 0.20048747956752777, "learning_rate": 2.4311250980475408e-05, "loss": 0.5673, "step": 856 }, { "epoch": 0.23765945646145314, "grad_norm": 0.2141515463590622, "learning_rate": 2.4309442270051376e-05, "loss": 0.5426, "step": 857 }, { "epoch": 0.23793677204658903, "grad_norm": 0.19637706875801086, "learning_rate": 2.4307631255292273e-05, "loss": 0.5542, "step": 858 }, { "epoch": 0.2382140876317249, "grad_norm": 0.21245527267456055, "learning_rate": 2.4305817936551472e-05, "loss": 0.5867, "step": 859 }, { "epoch": 0.2384914032168608, "grad_norm": 0.19474704563617706, "learning_rate": 2.4304002314182804e-05, "loss": 0.5716, "step": 860 }, { "epoch": 0.23876871880199668, "grad_norm": 0.19590826332569122, "learning_rate": 2.4302184388540544e-05, "loss": 0.5746, "step": 861 }, { "epoch": 0.23904603438713257, "grad_norm": 0.21298062801361084, "learning_rate": 2.4300364159979418e-05, "loss": 0.5788, "step": 862 }, { "epoch": 0.23932334997226845, "grad_norm": 0.21597878634929657, "learning_rate": 2.4298541628854597e-05, "loss": 0.5868, "step": 863 }, { "epoch": 0.23960066555740434, "grad_norm": 0.2077784389257431, "learning_rate": 2.4296716795521707e-05, "loss": 0.5878, "step": 864 }, { "epoch": 0.23987798114254022, "grad_norm": 0.1982557773590088, "learning_rate": 2.4294889660336823e-05, "loss": 0.5734, "step": 865 }, { "epoch": 0.2401552967276761, "grad_norm": 0.19443267583847046, "learning_rate": 2.4293060223656465e-05, "loss": 0.5845, "step": 866 }, { "epoch": 0.240432612312812, "grad_norm": 0.2007235586643219, "learning_rate": 2.4291228485837613e-05, "loss": 0.5829, "step": 867 }, { "epoch": 0.24070992789794787, "grad_norm": 0.18996436893939972, "learning_rate": 2.4289394447237674e-05, "loss": 0.5918, "step": 868 }, { "epoch": 0.24098724348308376, "grad_norm": 0.19309580326080322, "learning_rate": 2.4287558108214527e-05, "loss": 0.5962, "step": 869 }, { "epoch": 0.24126455906821964, "grad_norm": 0.19195586442947388, "learning_rate": 2.428571946912649e-05, "loss": 0.5738, "step": 870 }, { "epoch": 0.24154187465335553, "grad_norm": 0.19671426713466644, "learning_rate": 2.4283878530332322e-05, "loss": 0.5551, "step": 871 }, { "epoch": 0.2418191902384914, "grad_norm": 0.19544430077075958, "learning_rate": 2.4282035292191247e-05, "loss": 0.5876, "step": 872 }, { "epoch": 0.2420965058236273, "grad_norm": 0.19258378446102142, "learning_rate": 2.4280189755062928e-05, "loss": 0.559, "step": 873 }, { "epoch": 0.24237382140876318, "grad_norm": 0.21361422538757324, "learning_rate": 2.427834191930748e-05, "loss": 0.5753, "step": 874 }, { "epoch": 0.24265113699389906, "grad_norm": 0.20279040932655334, "learning_rate": 2.4276491785285457e-05, "loss": 0.5724, "step": 875 }, { "epoch": 0.24292845257903495, "grad_norm": 0.20467157661914825, "learning_rate": 2.427463935335788e-05, "loss": 0.5989, "step": 876 }, { "epoch": 0.24320576816417083, "grad_norm": 0.20144453644752502, "learning_rate": 2.4272784623886195e-05, "loss": 0.5943, "step": 877 }, { "epoch": 0.24348308374930672, "grad_norm": 0.19686299562454224, "learning_rate": 2.4270927597232325e-05, "loss": 0.5692, "step": 878 }, { "epoch": 0.2437603993344426, "grad_norm": 0.22722774744033813, "learning_rate": 2.426906827375861e-05, "loss": 0.566, "step": 879 }, { "epoch": 0.24403771491957849, "grad_norm": 0.19703295826911926, "learning_rate": 2.4267206653827856e-05, "loss": 0.5627, "step": 880 }, { "epoch": 0.24431503050471437, "grad_norm": 0.2020971179008484, "learning_rate": 2.4265342737803327e-05, "loss": 0.5836, "step": 881 }, { "epoch": 0.24459234608985025, "grad_norm": 0.1921062171459198, "learning_rate": 2.4263476526048707e-05, "loss": 0.5651, "step": 882 }, { "epoch": 0.24486966167498614, "grad_norm": 0.20176348090171814, "learning_rate": 2.4261608018928147e-05, "loss": 0.5883, "step": 883 }, { "epoch": 0.24514697726012202, "grad_norm": 0.19450893998146057, "learning_rate": 2.425973721680625e-05, "loss": 0.5591, "step": 884 }, { "epoch": 0.2454242928452579, "grad_norm": 0.8580565452575684, "learning_rate": 2.425786412004805e-05, "loss": 0.5592, "step": 885 }, { "epoch": 0.2457016084303938, "grad_norm": 0.24260735511779785, "learning_rate": 2.4255988729019042e-05, "loss": 0.5902, "step": 886 }, { "epoch": 0.24597892401552968, "grad_norm": 0.19789321720600128, "learning_rate": 2.4254111044085163e-05, "loss": 0.5745, "step": 887 }, { "epoch": 0.24625623960066556, "grad_norm": 0.21979939937591553, "learning_rate": 2.4252231065612805e-05, "loss": 0.5551, "step": 888 }, { "epoch": 0.24653355518580145, "grad_norm": 0.20352937281131744, "learning_rate": 2.425034879396879e-05, "loss": 0.5591, "step": 889 }, { "epoch": 0.24681087077093733, "grad_norm": 0.2068743109703064, "learning_rate": 2.424846422952041e-05, "loss": 0.5848, "step": 890 }, { "epoch": 0.24708818635607321, "grad_norm": 0.19195155799388885, "learning_rate": 2.4246577372635387e-05, "loss": 0.5621, "step": 891 }, { "epoch": 0.2473655019412091, "grad_norm": 0.2014138102531433, "learning_rate": 2.42446882236819e-05, "loss": 0.5689, "step": 892 }, { "epoch": 0.24764281752634498, "grad_norm": 0.2104417085647583, "learning_rate": 2.4242796783028573e-05, "loss": 0.5898, "step": 893 }, { "epoch": 0.24792013311148087, "grad_norm": 0.22085507214069366, "learning_rate": 2.4240903051044474e-05, "loss": 0.5762, "step": 894 }, { "epoch": 0.24819744869661675, "grad_norm": 0.20655465126037598, "learning_rate": 2.4239007028099117e-05, "loss": 0.5654, "step": 895 }, { "epoch": 0.24847476428175264, "grad_norm": 0.2050492912530899, "learning_rate": 2.4237108714562474e-05, "loss": 0.5859, "step": 896 }, { "epoch": 0.24875207986688852, "grad_norm": 0.21355165541172028, "learning_rate": 2.4235208110804947e-05, "loss": 0.5695, "step": 897 }, { "epoch": 0.2490293954520244, "grad_norm": 0.20924112200737, "learning_rate": 2.42333052171974e-05, "loss": 0.5593, "step": 898 }, { "epoch": 0.2493067110371603, "grad_norm": 0.22572918236255646, "learning_rate": 2.423140003411114e-05, "loss": 0.6217, "step": 899 }, { "epoch": 0.24958402662229617, "grad_norm": 0.2063211351633072, "learning_rate": 2.4229492561917914e-05, "loss": 0.5765, "step": 900 }, { "epoch": 0.24986134220743206, "grad_norm": 0.21796129643917084, "learning_rate": 2.4227582800989923e-05, "loss": 0.5932, "step": 901 }, { "epoch": 0.25013865779256794, "grad_norm": 0.20169825851917267, "learning_rate": 2.4225670751699808e-05, "loss": 0.5858, "step": 902 }, { "epoch": 0.2504159733777038, "grad_norm": 0.2097984254360199, "learning_rate": 2.4223756414420668e-05, "loss": 0.5888, "step": 903 }, { "epoch": 0.2506932889628397, "grad_norm": 0.2516496479511261, "learning_rate": 2.4221839789526033e-05, "loss": 0.5812, "step": 904 }, { "epoch": 0.2509706045479756, "grad_norm": 0.2110574096441269, "learning_rate": 2.421992087738989e-05, "loss": 0.5859, "step": 905 }, { "epoch": 0.2512479201331115, "grad_norm": 0.1935090869665146, "learning_rate": 2.4217999678386673e-05, "loss": 0.5768, "step": 906 }, { "epoch": 0.25152523571824736, "grad_norm": 0.20760700106620789, "learning_rate": 2.4216076192891257e-05, "loss": 0.5856, "step": 907 }, { "epoch": 0.25180255130338325, "grad_norm": 0.20197226107120514, "learning_rate": 2.4214150421278964e-05, "loss": 0.6041, "step": 908 }, { "epoch": 0.25207986688851913, "grad_norm": 0.20894859731197357, "learning_rate": 2.4212222363925563e-05, "loss": 0.5821, "step": 909 }, { "epoch": 0.252357182473655, "grad_norm": 0.20673821866512299, "learning_rate": 2.4210292021207268e-05, "loss": 0.5813, "step": 910 }, { "epoch": 0.2526344980587909, "grad_norm": 0.23159608244895935, "learning_rate": 2.420835939350074e-05, "loss": 0.5803, "step": 911 }, { "epoch": 0.2529118136439268, "grad_norm": 0.19740775227546692, "learning_rate": 2.420642448118309e-05, "loss": 0.5721, "step": 912 }, { "epoch": 0.25318912922906267, "grad_norm": 0.20442472398281097, "learning_rate": 2.4204487284631866e-05, "loss": 0.5681, "step": 913 }, { "epoch": 0.25346644481419855, "grad_norm": 0.19745062291622162, "learning_rate": 2.420254780422507e-05, "loss": 0.573, "step": 914 }, { "epoch": 0.25374376039933444, "grad_norm": 0.19819265604019165, "learning_rate": 2.4200606040341147e-05, "loss": 0.6015, "step": 915 }, { "epoch": 0.2540210759844703, "grad_norm": 0.20158332586288452, "learning_rate": 2.4198661993358976e-05, "loss": 0.5557, "step": 916 }, { "epoch": 0.2542983915696062, "grad_norm": 0.19300688803195953, "learning_rate": 2.4196715663657903e-05, "loss": 0.5691, "step": 917 }, { "epoch": 0.2545757071547421, "grad_norm": 0.19178220629692078, "learning_rate": 2.4194767051617707e-05, "loss": 0.5877, "step": 918 }, { "epoch": 0.254853022739878, "grad_norm": 0.19551022350788116, "learning_rate": 2.4192816157618615e-05, "loss": 0.5719, "step": 919 }, { "epoch": 0.25513033832501386, "grad_norm": 0.2008076012134552, "learning_rate": 2.419086298204129e-05, "loss": 0.579, "step": 920 }, { "epoch": 0.25540765391014975, "grad_norm": 0.19526442885398865, "learning_rate": 2.4188907525266856e-05, "loss": 0.546, "step": 921 }, { "epoch": 0.25568496949528563, "grad_norm": 0.1844739466905594, "learning_rate": 2.418694978767687e-05, "loss": 0.5759, "step": 922 }, { "epoch": 0.2559622850804215, "grad_norm": 0.20165039598941803, "learning_rate": 2.4184989769653343e-05, "loss": 0.5618, "step": 923 }, { "epoch": 0.2562396006655574, "grad_norm": 0.19225285947322845, "learning_rate": 2.418302747157872e-05, "loss": 0.5627, "step": 924 }, { "epoch": 0.2565169162506933, "grad_norm": 0.19688788056373596, "learning_rate": 2.418106289383591e-05, "loss": 0.5714, "step": 925 }, { "epoch": 0.25679423183582917, "grad_norm": 0.2039179801940918, "learning_rate": 2.417909603680824e-05, "loss": 0.5975, "step": 926 }, { "epoch": 0.25707154742096505, "grad_norm": 0.21120522916316986, "learning_rate": 2.41771269008795e-05, "loss": 0.6026, "step": 927 }, { "epoch": 0.25734886300610094, "grad_norm": 0.21704575419425964, "learning_rate": 2.4175155486433927e-05, "loss": 0.5798, "step": 928 }, { "epoch": 0.2576261785912368, "grad_norm": 0.18682295083999634, "learning_rate": 2.4173181793856187e-05, "loss": 0.5872, "step": 929 }, { "epoch": 0.2579034941763727, "grad_norm": 0.200609490275383, "learning_rate": 2.4171205823531402e-05, "loss": 0.5568, "step": 930 }, { "epoch": 0.2581808097615086, "grad_norm": 0.1996905356645584, "learning_rate": 2.416922757584514e-05, "loss": 0.5965, "step": 931 }, { "epoch": 0.2584581253466445, "grad_norm": 0.1988278329372406, "learning_rate": 2.4167247051183412e-05, "loss": 0.5819, "step": 932 }, { "epoch": 0.25873544093178036, "grad_norm": 0.20553693175315857, "learning_rate": 2.4165264249932662e-05, "loss": 0.591, "step": 933 }, { "epoch": 0.25901275651691624, "grad_norm": 0.19097572565078735, "learning_rate": 2.416327917247979e-05, "loss": 0.5517, "step": 934 }, { "epoch": 0.2592900721020521, "grad_norm": 0.191832035779953, "learning_rate": 2.4161291819212144e-05, "loss": 0.5811, "step": 935 }, { "epoch": 0.259567387687188, "grad_norm": 0.19829009473323822, "learning_rate": 2.4159302190517496e-05, "loss": 0.5966, "step": 936 }, { "epoch": 0.2598447032723239, "grad_norm": 0.1965586543083191, "learning_rate": 2.415731028678409e-05, "loss": 0.5458, "step": 937 }, { "epoch": 0.2601220188574598, "grad_norm": 0.20748619735240936, "learning_rate": 2.4155316108400593e-05, "loss": 0.5843, "step": 938 }, { "epoch": 0.26039933444259566, "grad_norm": 0.20656223595142365, "learning_rate": 2.415331965575612e-05, "loss": 0.5749, "step": 939 }, { "epoch": 0.26067665002773155, "grad_norm": 0.186055988073349, "learning_rate": 2.4151320929240227e-05, "loss": 0.5749, "step": 940 }, { "epoch": 0.26095396561286743, "grad_norm": 0.21665525436401367, "learning_rate": 2.4149319929242934e-05, "loss": 0.5825, "step": 941 }, { "epoch": 0.2612312811980033, "grad_norm": 0.2007439136505127, "learning_rate": 2.4147316656154674e-05, "loss": 0.5904, "step": 942 }, { "epoch": 0.2615085967831392, "grad_norm": 0.20290826261043549, "learning_rate": 2.4145311110366347e-05, "loss": 0.6019, "step": 943 }, { "epoch": 0.2617859123682751, "grad_norm": 0.20062971115112305, "learning_rate": 2.4143303292269286e-05, "loss": 0.5571, "step": 944 }, { "epoch": 0.26206322795341097, "grad_norm": 0.2056947946548462, "learning_rate": 2.414129320225527e-05, "loss": 0.5683, "step": 945 }, { "epoch": 0.26234054353854686, "grad_norm": 0.18966248631477356, "learning_rate": 2.4139280840716517e-05, "loss": 0.5962, "step": 946 }, { "epoch": 0.26261785912368274, "grad_norm": 0.20114421844482422, "learning_rate": 2.41372662080457e-05, "loss": 0.5528, "step": 947 }, { "epoch": 0.2628951747088186, "grad_norm": 0.1974896341562271, "learning_rate": 2.4135249304635914e-05, "loss": 0.5802, "step": 948 }, { "epoch": 0.2631724902939545, "grad_norm": 0.1997508406639099, "learning_rate": 2.4133230130880726e-05, "loss": 0.6122, "step": 949 }, { "epoch": 0.2634498058790904, "grad_norm": 0.18641312420368195, "learning_rate": 2.413120868717412e-05, "loss": 0.5705, "step": 950 }, { "epoch": 0.2637271214642263, "grad_norm": 0.19226068258285522, "learning_rate": 2.4129184973910533e-05, "loss": 0.5737, "step": 951 }, { "epoch": 0.26400443704936216, "grad_norm": 0.18802867829799652, "learning_rate": 2.4127158991484855e-05, "loss": 0.562, "step": 952 }, { "epoch": 0.26428175263449805, "grad_norm": 0.19342157244682312, "learning_rate": 2.41251307402924e-05, "loss": 0.5483, "step": 953 }, { "epoch": 0.26455906821963393, "grad_norm": 0.1865961253643036, "learning_rate": 2.4123100220728935e-05, "loss": 0.5638, "step": 954 }, { "epoch": 0.2648363838047698, "grad_norm": 0.22456099092960358, "learning_rate": 2.4121067433190666e-05, "loss": 0.5918, "step": 955 }, { "epoch": 0.2651136993899057, "grad_norm": 0.24317651987075806, "learning_rate": 2.4119032378074245e-05, "loss": 0.5477, "step": 956 }, { "epoch": 0.2653910149750416, "grad_norm": 0.19641970098018646, "learning_rate": 2.411699505577677e-05, "loss": 0.5554, "step": 957 }, { "epoch": 0.26566833056017747, "grad_norm": 0.20111410319805145, "learning_rate": 2.4114955466695773e-05, "loss": 0.562, "step": 958 }, { "epoch": 0.26594564614531335, "grad_norm": 0.19789332151412964, "learning_rate": 2.411291361122923e-05, "loss": 0.5919, "step": 959 }, { "epoch": 0.26622296173044924, "grad_norm": 0.20293276011943817, "learning_rate": 2.4110869489775567e-05, "loss": 0.5839, "step": 960 }, { "epoch": 0.2665002773155851, "grad_norm": 0.18560869991779327, "learning_rate": 2.410882310273364e-05, "loss": 0.5476, "step": 961 }, { "epoch": 0.266777592900721, "grad_norm": 0.1932571828365326, "learning_rate": 2.410677445050276e-05, "loss": 0.5736, "step": 962 }, { "epoch": 0.2670549084858569, "grad_norm": 0.21673519909381866, "learning_rate": 2.4104723533482664e-05, "loss": 0.5558, "step": 963 }, { "epoch": 0.2673322240709928, "grad_norm": 0.21258150041103363, "learning_rate": 2.4102670352073548e-05, "loss": 0.591, "step": 964 }, { "epoch": 0.26760953965612866, "grad_norm": 0.20553314685821533, "learning_rate": 2.4100614906676036e-05, "loss": 0.5901, "step": 965 }, { "epoch": 0.26788685524126454, "grad_norm": 0.23216207325458527, "learning_rate": 2.4098557197691204e-05, "loss": 0.5677, "step": 966 }, { "epoch": 0.2681641708264004, "grad_norm": 0.20041371881961823, "learning_rate": 2.4096497225520564e-05, "loss": 0.566, "step": 967 }, { "epoch": 0.2684414864115363, "grad_norm": 0.20803777873516083, "learning_rate": 2.4094434990566076e-05, "loss": 0.5714, "step": 968 }, { "epoch": 0.2687188019966722, "grad_norm": 0.19973017275333405, "learning_rate": 2.409237049323013e-05, "loss": 0.5805, "step": 969 }, { "epoch": 0.2689961175818081, "grad_norm": 0.20199733972549438, "learning_rate": 2.4090303733915567e-05, "loss": 0.5475, "step": 970 }, { "epoch": 0.26927343316694397, "grad_norm": 0.2112300544977188, "learning_rate": 2.4088234713025664e-05, "loss": 0.5821, "step": 971 }, { "epoch": 0.26955074875207985, "grad_norm": 0.18687258660793304, "learning_rate": 2.408616343096415e-05, "loss": 0.5369, "step": 972 }, { "epoch": 0.26982806433721573, "grad_norm": 0.20027992129325867, "learning_rate": 2.4084089888135176e-05, "loss": 0.551, "step": 973 }, { "epoch": 0.2701053799223516, "grad_norm": 0.1915608048439026, "learning_rate": 2.408201408494335e-05, "loss": 0.5653, "step": 974 }, { "epoch": 0.2703826955074875, "grad_norm": 0.2044133096933365, "learning_rate": 2.407993602179372e-05, "loss": 0.597, "step": 975 }, { "epoch": 0.2706600110926234, "grad_norm": 0.20056426525115967, "learning_rate": 2.4077855699091764e-05, "loss": 0.5864, "step": 976 }, { "epoch": 0.27093732667775927, "grad_norm": 0.19527383148670197, "learning_rate": 2.407577311724341e-05, "loss": 0.565, "step": 977 }, { "epoch": 0.27121464226289516, "grad_norm": 0.2120949625968933, "learning_rate": 2.407368827665503e-05, "loss": 0.5621, "step": 978 }, { "epoch": 0.27149195784803104, "grad_norm": 0.18631702661514282, "learning_rate": 2.407160117773343e-05, "loss": 0.5657, "step": 979 }, { "epoch": 0.2717692734331669, "grad_norm": 0.19784550368785858, "learning_rate": 2.4069511820885854e-05, "loss": 0.5547, "step": 980 }, { "epoch": 0.2720465890183028, "grad_norm": 0.19714047014713287, "learning_rate": 2.4067420206519993e-05, "loss": 0.5793, "step": 981 }, { "epoch": 0.2723239046034387, "grad_norm": 0.2170424610376358, "learning_rate": 2.4065326335043976e-05, "loss": 0.5745, "step": 982 }, { "epoch": 0.2726012201885746, "grad_norm": 0.20346680283546448, "learning_rate": 2.4063230206866377e-05, "loss": 0.5828, "step": 983 }, { "epoch": 0.27287853577371046, "grad_norm": 0.2216998040676117, "learning_rate": 2.40611318223962e-05, "loss": 0.5762, "step": 984 }, { "epoch": 0.27315585135884635, "grad_norm": 0.20975996553897858, "learning_rate": 2.4059031182042897e-05, "loss": 0.5442, "step": 985 }, { "epoch": 0.27343316694398223, "grad_norm": 0.19896754622459412, "learning_rate": 2.405692828621636e-05, "loss": 0.5977, "step": 986 }, { "epoch": 0.2737104825291181, "grad_norm": 0.18813903629779816, "learning_rate": 2.4054823135326922e-05, "loss": 0.5641, "step": 987 }, { "epoch": 0.273987798114254, "grad_norm": 0.19095094501972198, "learning_rate": 2.4052715729785348e-05, "loss": 0.5427, "step": 988 }, { "epoch": 0.2742651136993899, "grad_norm": 0.19185671210289001, "learning_rate": 2.405060607000285e-05, "loss": 0.557, "step": 989 }, { "epoch": 0.27454242928452577, "grad_norm": 0.19244584441184998, "learning_rate": 2.4048494156391087e-05, "loss": 0.5558, "step": 990 }, { "epoch": 0.27481974486966165, "grad_norm": 0.20083992183208466, "learning_rate": 2.404637998936214e-05, "loss": 0.5635, "step": 991 }, { "epoch": 0.27509706045479754, "grad_norm": 0.19767695665359497, "learning_rate": 2.404426356932854e-05, "loss": 0.5814, "step": 992 }, { "epoch": 0.2753743760399334, "grad_norm": 0.19133426249027252, "learning_rate": 2.4042144896703256e-05, "loss": 0.5951, "step": 993 }, { "epoch": 0.2756516916250693, "grad_norm": 0.19364149868488312, "learning_rate": 2.40400239718997e-05, "loss": 0.5695, "step": 994 }, { "epoch": 0.2759290072102052, "grad_norm": 0.19669091701507568, "learning_rate": 2.4037900795331722e-05, "loss": 0.5801, "step": 995 }, { "epoch": 0.2762063227953411, "grad_norm": 0.2011607140302658, "learning_rate": 2.403577536741361e-05, "loss": 0.5736, "step": 996 }, { "epoch": 0.27648363838047696, "grad_norm": 0.19536298513412476, "learning_rate": 2.4033647688560084e-05, "loss": 0.5404, "step": 997 }, { "epoch": 0.27676095396561284, "grad_norm": 0.1903197318315506, "learning_rate": 2.403151775918632e-05, "loss": 0.5939, "step": 998 }, { "epoch": 0.27703826955074873, "grad_norm": 0.19172310829162598, "learning_rate": 2.4029385579707916e-05, "loss": 0.5688, "step": 999 }, { "epoch": 0.2773155851358846, "grad_norm": 0.22239771485328674, "learning_rate": 2.402725115054092e-05, "loss": 0.5754, "step": 1000 }, { "epoch": 0.2775929007210205, "grad_norm": 0.18384471535682678, "learning_rate": 2.402511447210182e-05, "loss": 0.5668, "step": 1001 }, { "epoch": 0.2778702163061564, "grad_norm": 0.2017565667629242, "learning_rate": 2.402297554480753e-05, "loss": 0.5711, "step": 1002 }, { "epoch": 0.27814753189129227, "grad_norm": 0.1890055239200592, "learning_rate": 2.402083436907542e-05, "loss": 0.5552, "step": 1003 }, { "epoch": 0.27842484747642815, "grad_norm": 0.1961050033569336, "learning_rate": 2.4018690945323284e-05, "loss": 0.5744, "step": 1004 }, { "epoch": 0.27870216306156403, "grad_norm": 0.2047930508852005, "learning_rate": 2.401654527396936e-05, "loss": 0.5794, "step": 1005 }, { "epoch": 0.2789794786466999, "grad_norm": 0.19990523159503937, "learning_rate": 2.4014397355432335e-05, "loss": 0.6046, "step": 1006 }, { "epoch": 0.2792567942318358, "grad_norm": 0.19778995215892792, "learning_rate": 2.401224719013131e-05, "loss": 0.5621, "step": 1007 }, { "epoch": 0.2795341098169717, "grad_norm": 0.1909160017967224, "learning_rate": 2.4010094778485846e-05, "loss": 0.5943, "step": 1008 }, { "epoch": 0.27981142540210757, "grad_norm": 0.20408110320568085, "learning_rate": 2.4007940120915946e-05, "loss": 0.5991, "step": 1009 }, { "epoch": 0.28008874098724346, "grad_norm": 0.20102624595165253, "learning_rate": 2.4005783217842024e-05, "loss": 0.6022, "step": 1010 }, { "epoch": 0.28036605657237934, "grad_norm": 0.1910308301448822, "learning_rate": 2.4003624069684957e-05, "loss": 0.5874, "step": 1011 }, { "epoch": 0.2806433721575152, "grad_norm": 0.189175084233284, "learning_rate": 2.4001462676866054e-05, "loss": 0.5698, "step": 1012 }, { "epoch": 0.2809206877426511, "grad_norm": 0.19003014266490936, "learning_rate": 2.3999299039807055e-05, "loss": 0.5819, "step": 1013 }, { "epoch": 0.281198003327787, "grad_norm": 0.2033187299966812, "learning_rate": 2.3997133158930145e-05, "loss": 0.5979, "step": 1014 }, { "epoch": 0.28147531891292293, "grad_norm": 0.1880473643541336, "learning_rate": 2.3994965034657946e-05, "loss": 0.5472, "step": 1015 }, { "epoch": 0.2817526344980588, "grad_norm": 0.19091346859931946, "learning_rate": 2.3992794667413514e-05, "loss": 0.5698, "step": 1016 }, { "epoch": 0.2820299500831947, "grad_norm": 0.19986368715763092, "learning_rate": 2.399062205762035e-05, "loss": 0.5956, "step": 1017 }, { "epoch": 0.2823072656683306, "grad_norm": 0.19207067787647247, "learning_rate": 2.398844720570238e-05, "loss": 0.549, "step": 1018 }, { "epoch": 0.28258458125346647, "grad_norm": 0.19408905506134033, "learning_rate": 2.398627011208398e-05, "loss": 0.5938, "step": 1019 }, { "epoch": 0.28286189683860236, "grad_norm": 0.20354047417640686, "learning_rate": 2.398409077718996e-05, "loss": 0.5488, "step": 1020 }, { "epoch": 0.28313921242373824, "grad_norm": 0.19870953261852264, "learning_rate": 2.3981909201445563e-05, "loss": 0.565, "step": 1021 }, { "epoch": 0.2834165280088741, "grad_norm": 0.19812439382076263, "learning_rate": 2.3979725385276475e-05, "loss": 0.5455, "step": 1022 }, { "epoch": 0.28369384359401, "grad_norm": 0.1886581927537918, "learning_rate": 2.3977539329108813e-05, "loss": 0.5458, "step": 1023 }, { "epoch": 0.2839711591791459, "grad_norm": 0.1987125426530838, "learning_rate": 2.3975351033369138e-05, "loss": 0.5669, "step": 1024 }, { "epoch": 0.2842484747642818, "grad_norm": 0.22528746724128723, "learning_rate": 2.397316049848444e-05, "loss": 0.579, "step": 1025 }, { "epoch": 0.28452579034941766, "grad_norm": 0.20103305578231812, "learning_rate": 2.3970967724882154e-05, "loss": 0.5843, "step": 1026 }, { "epoch": 0.28480310593455355, "grad_norm": 0.21639437973499298, "learning_rate": 2.396877271299015e-05, "loss": 0.5709, "step": 1027 }, { "epoch": 0.28508042151968943, "grad_norm": 0.24263976514339447, "learning_rate": 2.3966575463236725e-05, "loss": 0.5926, "step": 1028 }, { "epoch": 0.2853577371048253, "grad_norm": 0.19815106689929962, "learning_rate": 2.396437597605063e-05, "loss": 0.5722, "step": 1029 }, { "epoch": 0.2856350526899612, "grad_norm": 0.19260184466838837, "learning_rate": 2.396217425186104e-05, "loss": 0.5621, "step": 1030 }, { "epoch": 0.2859123682750971, "grad_norm": 0.20668724179267883, "learning_rate": 2.3959970291097566e-05, "loss": 0.5924, "step": 1031 }, { "epoch": 0.28618968386023297, "grad_norm": 0.2019844353199005, "learning_rate": 2.3957764094190265e-05, "loss": 0.5821, "step": 1032 }, { "epoch": 0.28646699944536885, "grad_norm": 0.1919315904378891, "learning_rate": 2.3955555661569617e-05, "loss": 0.5985, "step": 1033 }, { "epoch": 0.28674431503050474, "grad_norm": 0.19516989588737488, "learning_rate": 2.3953344993666555e-05, "loss": 0.5993, "step": 1034 }, { "epoch": 0.2870216306156406, "grad_norm": 0.18881164491176605, "learning_rate": 2.3951132090912432e-05, "loss": 0.5911, "step": 1035 }, { "epoch": 0.2872989462007765, "grad_norm": 0.20282314717769623, "learning_rate": 2.3948916953739045e-05, "loss": 0.5939, "step": 1036 }, { "epoch": 0.2875762617859124, "grad_norm": 0.19400392472743988, "learning_rate": 2.394669958257863e-05, "loss": 0.5993, "step": 1037 }, { "epoch": 0.2878535773710483, "grad_norm": 0.21767151355743408, "learning_rate": 2.3944479977863847e-05, "loss": 0.5796, "step": 1038 }, { "epoch": 0.28813089295618416, "grad_norm": 0.21109527349472046, "learning_rate": 2.3942258140027805e-05, "loss": 0.5678, "step": 1039 }, { "epoch": 0.28840820854132004, "grad_norm": 0.19805195927619934, "learning_rate": 2.3940034069504048e-05, "loss": 0.5645, "step": 1040 }, { "epoch": 0.28868552412645593, "grad_norm": 0.18621553480625153, "learning_rate": 2.3937807766726545e-05, "loss": 0.5639, "step": 1041 }, { "epoch": 0.2889628397115918, "grad_norm": 0.19867920875549316, "learning_rate": 2.3935579232129705e-05, "loss": 0.578, "step": 1042 }, { "epoch": 0.2892401552967277, "grad_norm": 0.1965349018573761, "learning_rate": 2.393334846614838e-05, "loss": 0.5895, "step": 1043 }, { "epoch": 0.2895174708818636, "grad_norm": 0.21346546709537506, "learning_rate": 2.3931115469217848e-05, "loss": 0.5592, "step": 1044 }, { "epoch": 0.28979478646699947, "grad_norm": 0.19675497710704803, "learning_rate": 2.392888024177382e-05, "loss": 0.5709, "step": 1045 }, { "epoch": 0.29007210205213535, "grad_norm": 0.18906576931476593, "learning_rate": 2.392664278425246e-05, "loss": 0.5721, "step": 1046 }, { "epoch": 0.29034941763727123, "grad_norm": 0.1987171620130539, "learning_rate": 2.3924403097090348e-05, "loss": 0.5796, "step": 1047 }, { "epoch": 0.2906267332224071, "grad_norm": 0.20017191767692566, "learning_rate": 2.392216118072451e-05, "loss": 0.5803, "step": 1048 }, { "epoch": 0.290904048807543, "grad_norm": 0.1904933750629425, "learning_rate": 2.39199170355924e-05, "loss": 0.5847, "step": 1049 }, { "epoch": 0.2911813643926789, "grad_norm": 0.20761634409427643, "learning_rate": 2.3917670662131914e-05, "loss": 0.5678, "step": 1050 }, { "epoch": 0.29145867997781477, "grad_norm": 0.1986733376979828, "learning_rate": 2.391542206078137e-05, "loss": 0.5886, "step": 1051 }, { "epoch": 0.29173599556295066, "grad_norm": 0.2128080427646637, "learning_rate": 2.3913171231979543e-05, "loss": 0.5649, "step": 1052 }, { "epoch": 0.29201331114808654, "grad_norm": 0.1908857524394989, "learning_rate": 2.391091817616562e-05, "loss": 0.5951, "step": 1053 }, { "epoch": 0.2922906267332224, "grad_norm": 0.19354763627052307, "learning_rate": 2.3908662893779228e-05, "loss": 0.5878, "step": 1054 }, { "epoch": 0.2925679423183583, "grad_norm": 0.19191789627075195, "learning_rate": 2.3906405385260443e-05, "loss": 0.5842, "step": 1055 }, { "epoch": 0.2928452579034942, "grad_norm": 0.19070293009281158, "learning_rate": 2.3904145651049764e-05, "loss": 0.5707, "step": 1056 }, { "epoch": 0.2931225734886301, "grad_norm": 0.20022979378700256, "learning_rate": 2.3901883691588116e-05, "loss": 0.5838, "step": 1057 }, { "epoch": 0.29339988907376596, "grad_norm": 0.18544460833072662, "learning_rate": 2.3899619507316878e-05, "loss": 0.5732, "step": 1058 }, { "epoch": 0.29367720465890185, "grad_norm": 0.19487732648849487, "learning_rate": 2.3897353098677845e-05, "loss": 0.5681, "step": 1059 }, { "epoch": 0.29395452024403773, "grad_norm": 0.19073253870010376, "learning_rate": 2.3895084466113253e-05, "loss": 0.5638, "step": 1060 }, { "epoch": 0.2942318358291736, "grad_norm": 0.19357110559940338, "learning_rate": 2.3892813610065778e-05, "loss": 0.5617, "step": 1061 }, { "epoch": 0.2945091514143095, "grad_norm": 0.19819645583629608, "learning_rate": 2.3890540530978518e-05, "loss": 0.5601, "step": 1062 }, { "epoch": 0.2947864669994454, "grad_norm": 0.20150421559810638, "learning_rate": 2.3888265229295014e-05, "loss": 0.5675, "step": 1063 }, { "epoch": 0.29506378258458127, "grad_norm": 0.19517284631729126, "learning_rate": 2.388598770545924e-05, "loss": 0.5516, "step": 1064 }, { "epoch": 0.29534109816971715, "grad_norm": 0.2073058784008026, "learning_rate": 2.3883707959915594e-05, "loss": 0.5729, "step": 1065 }, { "epoch": 0.29561841375485304, "grad_norm": 0.190653994679451, "learning_rate": 2.3881425993108922e-05, "loss": 0.5932, "step": 1066 }, { "epoch": 0.2958957293399889, "grad_norm": 0.19685585796833038, "learning_rate": 2.3879141805484492e-05, "loss": 0.5579, "step": 1067 }, { "epoch": 0.2961730449251248, "grad_norm": 0.19154155254364014, "learning_rate": 2.3876855397488014e-05, "loss": 0.5622, "step": 1068 }, { "epoch": 0.2964503605102607, "grad_norm": 0.23048245906829834, "learning_rate": 2.387456676956562e-05, "loss": 0.5951, "step": 1069 }, { "epoch": 0.2967276760953966, "grad_norm": 0.2001733034849167, "learning_rate": 2.387227592216389e-05, "loss": 0.5723, "step": 1070 }, { "epoch": 0.29700499168053246, "grad_norm": 0.20289377868175507, "learning_rate": 2.3869982855729822e-05, "loss": 0.6023, "step": 1071 }, { "epoch": 0.29728230726566834, "grad_norm": 0.18517981469631195, "learning_rate": 2.386768757071086e-05, "loss": 0.6075, "step": 1072 }, { "epoch": 0.29755962285080423, "grad_norm": 0.18674753606319427, "learning_rate": 2.3865390067554865e-05, "loss": 0.5725, "step": 1073 }, { "epoch": 0.2978369384359401, "grad_norm": 0.19436419010162354, "learning_rate": 2.3863090346710153e-05, "loss": 0.595, "step": 1074 }, { "epoch": 0.298114254021076, "grad_norm": 0.1953345537185669, "learning_rate": 2.3860788408625456e-05, "loss": 0.5845, "step": 1075 }, { "epoch": 0.2983915696062119, "grad_norm": 0.23673701286315918, "learning_rate": 2.385848425374994e-05, "loss": 0.5716, "step": 1076 }, { "epoch": 0.29866888519134777, "grad_norm": 0.20462685823440552, "learning_rate": 2.385617788253321e-05, "loss": 0.5486, "step": 1077 }, { "epoch": 0.29894620077648365, "grad_norm": 0.19857102632522583, "learning_rate": 2.3853869295425296e-05, "loss": 0.5744, "step": 1078 }, { "epoch": 0.29922351636161953, "grad_norm": 0.21950219571590424, "learning_rate": 2.385155849287667e-05, "loss": 0.5644, "step": 1079 }, { "epoch": 0.2995008319467554, "grad_norm": 0.25962188839912415, "learning_rate": 2.384924547533823e-05, "loss": 0.5826, "step": 1080 }, { "epoch": 0.2997781475318913, "grad_norm": 0.21206709742546082, "learning_rate": 2.3846930243261302e-05, "loss": 0.5726, "step": 1081 }, { "epoch": 0.3000554631170272, "grad_norm": 0.20144885778427124, "learning_rate": 2.384461279709765e-05, "loss": 0.5736, "step": 1082 }, { "epoch": 0.3003327787021631, "grad_norm": 0.19098758697509766, "learning_rate": 2.3842293137299475e-05, "loss": 0.57, "step": 1083 }, { "epoch": 0.30061009428729896, "grad_norm": 0.20457082986831665, "learning_rate": 2.38399712643194e-05, "loss": 0.5527, "step": 1084 }, { "epoch": 0.30088740987243484, "grad_norm": 0.22459912300109863, "learning_rate": 2.3837647178610482e-05, "loss": 0.5907, "step": 1085 }, { "epoch": 0.3011647254575707, "grad_norm": 0.2092091143131256, "learning_rate": 2.3835320880626216e-05, "loss": 0.5796, "step": 1086 }, { "epoch": 0.3014420410427066, "grad_norm": 0.18621404469013214, "learning_rate": 2.3832992370820523e-05, "loss": 0.6085, "step": 1087 }, { "epoch": 0.3017193566278425, "grad_norm": 0.1938057541847229, "learning_rate": 2.3830661649647757e-05, "loss": 0.5642, "step": 1088 }, { "epoch": 0.3019966722129784, "grad_norm": 0.2136821299791336, "learning_rate": 2.3828328717562704e-05, "loss": 0.5621, "step": 1089 }, { "epoch": 0.30227398779811426, "grad_norm": 0.200357124209404, "learning_rate": 2.3825993575020577e-05, "loss": 0.5728, "step": 1090 }, { "epoch": 0.30255130338325015, "grad_norm": 0.19953665137290955, "learning_rate": 2.382365622247703e-05, "loss": 0.5677, "step": 1091 }, { "epoch": 0.30282861896838603, "grad_norm": 0.19972378015518188, "learning_rate": 2.382131666038814e-05, "loss": 0.5623, "step": 1092 }, { "epoch": 0.3031059345535219, "grad_norm": 0.21744661033153534, "learning_rate": 2.381897488921041e-05, "loss": 0.5508, "step": 1093 }, { "epoch": 0.3033832501386578, "grad_norm": 0.18918602168560028, "learning_rate": 2.3816630909400793e-05, "loss": 0.5805, "step": 1094 }, { "epoch": 0.3036605657237937, "grad_norm": 0.1894434541463852, "learning_rate": 2.3814284721416656e-05, "loss": 0.563, "step": 1095 }, { "epoch": 0.30393788130892957, "grad_norm": 0.19141651690006256, "learning_rate": 2.3811936325715807e-05, "loss": 0.5839, "step": 1096 }, { "epoch": 0.30421519689406545, "grad_norm": 0.1895507425069809, "learning_rate": 2.3809585722756472e-05, "loss": 0.5864, "step": 1097 }, { "epoch": 0.30449251247920134, "grad_norm": 0.21037045121192932, "learning_rate": 2.3807232912997324e-05, "loss": 0.5806, "step": 1098 }, { "epoch": 0.3047698280643372, "grad_norm": 0.20355623960494995, "learning_rate": 2.3804877896897455e-05, "loss": 0.5677, "step": 1099 }, { "epoch": 0.3050471436494731, "grad_norm": 0.20115728676319122, "learning_rate": 2.380252067491639e-05, "loss": 0.5635, "step": 1100 }, { "epoch": 0.305324459234609, "grad_norm": 0.19460982084274292, "learning_rate": 2.3800161247514086e-05, "loss": 0.5774, "step": 1101 }, { "epoch": 0.3056017748197449, "grad_norm": 0.18929602205753326, "learning_rate": 2.3797799615150934e-05, "loss": 0.5708, "step": 1102 }, { "epoch": 0.30587909040488076, "grad_norm": 0.20288972556591034, "learning_rate": 2.3795435778287745e-05, "loss": 0.5852, "step": 1103 }, { "epoch": 0.30615640599001664, "grad_norm": 0.20964893698692322, "learning_rate": 2.379306973738577e-05, "loss": 0.5731, "step": 1104 }, { "epoch": 0.30643372157515253, "grad_norm": 0.2250620722770691, "learning_rate": 2.379070149290668e-05, "loss": 0.5741, "step": 1105 }, { "epoch": 0.3067110371602884, "grad_norm": 0.19434000551700592, "learning_rate": 2.3788331045312592e-05, "loss": 0.5949, "step": 1106 }, { "epoch": 0.3069883527454243, "grad_norm": 0.1980692446231842, "learning_rate": 2.3785958395066037e-05, "loss": 0.5591, "step": 1107 }, { "epoch": 0.3072656683305602, "grad_norm": 0.20279406011104584, "learning_rate": 2.3783583542629984e-05, "loss": 0.5925, "step": 1108 }, { "epoch": 0.30754298391569607, "grad_norm": 0.19802772998809814, "learning_rate": 2.378120648846783e-05, "loss": 0.5756, "step": 1109 }, { "epoch": 0.30782029950083195, "grad_norm": 0.19455523788928986, "learning_rate": 2.37788272330434e-05, "loss": 0.5846, "step": 1110 }, { "epoch": 0.30809761508596784, "grad_norm": 0.19493591785430908, "learning_rate": 2.3776445776820948e-05, "loss": 0.5788, "step": 1111 }, { "epoch": 0.3083749306711037, "grad_norm": 0.19869014620780945, "learning_rate": 2.3774062120265163e-05, "loss": 0.5836, "step": 1112 }, { "epoch": 0.3086522462562396, "grad_norm": 0.19301962852478027, "learning_rate": 2.3771676263841157e-05, "loss": 0.5689, "step": 1113 }, { "epoch": 0.3089295618413755, "grad_norm": 0.2125353217124939, "learning_rate": 2.3769288208014473e-05, "loss": 0.579, "step": 1114 }, { "epoch": 0.3092068774265114, "grad_norm": 0.20093408226966858, "learning_rate": 2.376689795325109e-05, "loss": 0.5489, "step": 1115 }, { "epoch": 0.30948419301164726, "grad_norm": 0.20457975566387177, "learning_rate": 2.37645055000174e-05, "loss": 0.5734, "step": 1116 }, { "epoch": 0.30976150859678314, "grad_norm": 0.2070866972208023, "learning_rate": 2.376211084878024e-05, "loss": 0.5711, "step": 1117 }, { "epoch": 0.310038824181919, "grad_norm": 0.19698989391326904, "learning_rate": 2.375971400000687e-05, "loss": 0.5781, "step": 1118 }, { "epoch": 0.3103161397670549, "grad_norm": 0.19639791548252106, "learning_rate": 2.3757314954164982e-05, "loss": 0.5496, "step": 1119 }, { "epoch": 0.3105934553521908, "grad_norm": 0.19890196621418, "learning_rate": 2.3754913711722687e-05, "loss": 0.5658, "step": 1120 }, { "epoch": 0.3108707709373267, "grad_norm": 0.2007942795753479, "learning_rate": 2.3752510273148533e-05, "loss": 0.5679, "step": 1121 }, { "epoch": 0.31114808652246256, "grad_norm": 0.21742717921733856, "learning_rate": 2.3750104638911493e-05, "loss": 0.5603, "step": 1122 }, { "epoch": 0.31142540210759845, "grad_norm": 0.19708824157714844, "learning_rate": 2.3747696809480974e-05, "loss": 0.561, "step": 1123 }, { "epoch": 0.31170271769273433, "grad_norm": 0.20064733922481537, "learning_rate": 2.374528678532681e-05, "loss": 0.5535, "step": 1124 }, { "epoch": 0.3119800332778702, "grad_norm": 0.19142742455005646, "learning_rate": 2.3742874566919248e-05, "loss": 0.5621, "step": 1125 }, { "epoch": 0.3122573488630061, "grad_norm": 0.20165249705314636, "learning_rate": 2.3740460154728987e-05, "loss": 0.5862, "step": 1126 }, { "epoch": 0.312534664448142, "grad_norm": 0.19451411068439484, "learning_rate": 2.373804354922714e-05, "loss": 0.5611, "step": 1127 }, { "epoch": 0.31281198003327787, "grad_norm": 0.20111672580242157, "learning_rate": 2.373562475088525e-05, "loss": 0.5629, "step": 1128 }, { "epoch": 0.31308929561841375, "grad_norm": 0.21810267865657806, "learning_rate": 2.3733203760175292e-05, "loss": 0.5746, "step": 1129 }, { "epoch": 0.31336661120354964, "grad_norm": 0.20461341738700867, "learning_rate": 2.3730780577569654e-05, "loss": 0.5429, "step": 1130 }, { "epoch": 0.3136439267886855, "grad_norm": 0.18515266478061676, "learning_rate": 2.3728355203541182e-05, "loss": 0.5547, "step": 1131 }, { "epoch": 0.3139212423738214, "grad_norm": 0.1941545456647873, "learning_rate": 2.3725927638563112e-05, "loss": 0.5566, "step": 1132 }, { "epoch": 0.3141985579589573, "grad_norm": 0.20392583310604095, "learning_rate": 2.3723497883109137e-05, "loss": 0.5506, "step": 1133 }, { "epoch": 0.3144758735440932, "grad_norm": 0.19422155618667603, "learning_rate": 2.3721065937653363e-05, "loss": 0.5826, "step": 1134 }, { "epoch": 0.31475318912922906, "grad_norm": 0.19887231290340424, "learning_rate": 2.3718631802670334e-05, "loss": 0.5651, "step": 1135 }, { "epoch": 0.31503050471436495, "grad_norm": 0.20396895706653595, "learning_rate": 2.3716195478635e-05, "loss": 0.5396, "step": 1136 }, { "epoch": 0.31530782029950083, "grad_norm": 0.2106340378522873, "learning_rate": 2.3713756966022766e-05, "loss": 0.552, "step": 1137 }, { "epoch": 0.3155851358846367, "grad_norm": 0.20762008428573608, "learning_rate": 2.371131626530944e-05, "loss": 0.5942, "step": 1138 }, { "epoch": 0.3158624514697726, "grad_norm": 0.19259460270404816, "learning_rate": 2.3708873376971277e-05, "loss": 0.5364, "step": 1139 }, { "epoch": 0.3161397670549085, "grad_norm": 0.19473238289356232, "learning_rate": 2.3706428301484946e-05, "loss": 0.5862, "step": 1140 }, { "epoch": 0.31641708264004437, "grad_norm": 0.20146487653255463, "learning_rate": 2.370398103932754e-05, "loss": 0.5924, "step": 1141 }, { "epoch": 0.31669439822518025, "grad_norm": 0.19463180005550385, "learning_rate": 2.370153159097659e-05, "loss": 0.554, "step": 1142 }, { "epoch": 0.31697171381031614, "grad_norm": 0.22867369651794434, "learning_rate": 2.3699079956910052e-05, "loss": 0.58, "step": 1143 }, { "epoch": 0.317249029395452, "grad_norm": 0.1938031017780304, "learning_rate": 2.3696626137606297e-05, "loss": 0.5998, "step": 1144 }, { "epoch": 0.3175263449805879, "grad_norm": 0.1907264143228531, "learning_rate": 2.369417013354413e-05, "loss": 0.5601, "step": 1145 }, { "epoch": 0.3178036605657238, "grad_norm": 0.21385140717029572, "learning_rate": 2.369171194520279e-05, "loss": 0.5919, "step": 1146 }, { "epoch": 0.3180809761508597, "grad_norm": 0.23154176771640778, "learning_rate": 2.3689251573061932e-05, "loss": 0.5901, "step": 1147 }, { "epoch": 0.31835829173599556, "grad_norm": 0.19392211735248566, "learning_rate": 2.3686789017601634e-05, "loss": 0.5514, "step": 1148 }, { "epoch": 0.31863560732113144, "grad_norm": 0.19515374302864075, "learning_rate": 2.3684324279302418e-05, "loss": 0.558, "step": 1149 }, { "epoch": 0.3189129229062673, "grad_norm": 0.18750956654548645, "learning_rate": 2.3681857358645205e-05, "loss": 0.5544, "step": 1150 }, { "epoch": 0.3191902384914032, "grad_norm": 0.20247051119804382, "learning_rate": 2.3679388256111368e-05, "loss": 0.5724, "step": 1151 }, { "epoch": 0.3194675540765391, "grad_norm": 0.19525887072086334, "learning_rate": 2.3676916972182686e-05, "loss": 0.583, "step": 1152 }, { "epoch": 0.319744869661675, "grad_norm": 0.20457209646701813, "learning_rate": 2.3674443507341377e-05, "loss": 0.5592, "step": 1153 }, { "epoch": 0.32002218524681086, "grad_norm": 0.19369947910308838, "learning_rate": 2.367196786207008e-05, "loss": 0.5889, "step": 1154 }, { "epoch": 0.32029950083194675, "grad_norm": 0.18851204216480255, "learning_rate": 2.3669490036851856e-05, "loss": 0.5804, "step": 1155 }, { "epoch": 0.32057681641708263, "grad_norm": 0.1838391125202179, "learning_rate": 2.3667010032170196e-05, "loss": 0.5251, "step": 1156 }, { "epoch": 0.3208541320022185, "grad_norm": 0.1949407011270523, "learning_rate": 2.3664527848509015e-05, "loss": 0.5719, "step": 1157 }, { "epoch": 0.3211314475873544, "grad_norm": 0.2245536744594574, "learning_rate": 2.3662043486352653e-05, "loss": 0.5699, "step": 1158 }, { "epoch": 0.3214087631724903, "grad_norm": 0.18889588117599487, "learning_rate": 2.3659556946185875e-05, "loss": 0.5449, "step": 1159 }, { "epoch": 0.32168607875762617, "grad_norm": 0.18529140949249268, "learning_rate": 2.3657068228493863e-05, "loss": 0.5469, "step": 1160 }, { "epoch": 0.32196339434276205, "grad_norm": 0.19042176008224487, "learning_rate": 2.3654577333762246e-05, "loss": 0.5662, "step": 1161 }, { "epoch": 0.32224070992789794, "grad_norm": 0.22052356600761414, "learning_rate": 2.3652084262477055e-05, "loss": 0.5603, "step": 1162 }, { "epoch": 0.3225180255130338, "grad_norm": 0.19664537906646729, "learning_rate": 2.364958901512475e-05, "loss": 0.5837, "step": 1163 }, { "epoch": 0.3227953410981697, "grad_norm": 0.1836601048707962, "learning_rate": 2.3647091592192234e-05, "loss": 0.5626, "step": 1164 }, { "epoch": 0.3230726566833056, "grad_norm": 0.19754944741725922, "learning_rate": 2.3644591994166805e-05, "loss": 0.5867, "step": 1165 }, { "epoch": 0.3233499722684415, "grad_norm": 0.18801425397396088, "learning_rate": 2.364209022153621e-05, "loss": 0.5447, "step": 1166 }, { "epoch": 0.32362728785357736, "grad_norm": 0.19709810614585876, "learning_rate": 2.363958627478861e-05, "loss": 0.5953, "step": 1167 }, { "epoch": 0.32390460343871325, "grad_norm": 0.19867432117462158, "learning_rate": 2.3637080154412588e-05, "loss": 0.6041, "step": 1168 }, { "epoch": 0.32418191902384913, "grad_norm": 0.19217659533023834, "learning_rate": 2.363457186089716e-05, "loss": 0.5693, "step": 1169 }, { "epoch": 0.324459234608985, "grad_norm": 0.20366674661636353, "learning_rate": 2.3632061394731753e-05, "loss": 0.5957, "step": 1170 }, { "epoch": 0.3247365501941209, "grad_norm": 0.19761385023593903, "learning_rate": 2.362954875640623e-05, "loss": 0.5722, "step": 1171 }, { "epoch": 0.3250138657792568, "grad_norm": 0.20664581656455994, "learning_rate": 2.362703394641087e-05, "loss": 0.5763, "step": 1172 }, { "epoch": 0.32529118136439267, "grad_norm": 0.18691927194595337, "learning_rate": 2.3624516965236386e-05, "loss": 0.5707, "step": 1173 }, { "epoch": 0.32556849694952855, "grad_norm": 0.18426918983459473, "learning_rate": 2.36219978133739e-05, "loss": 0.5337, "step": 1174 }, { "epoch": 0.32584581253466444, "grad_norm": 0.19540777802467346, "learning_rate": 2.3619476491314977e-05, "loss": 0.5549, "step": 1175 }, { "epoch": 0.3261231281198003, "grad_norm": 0.2000686079263687, "learning_rate": 2.3616952999551576e-05, "loss": 0.5765, "step": 1176 }, { "epoch": 0.3264004437049362, "grad_norm": 0.19267399609088898, "learning_rate": 2.3614427338576114e-05, "loss": 0.5585, "step": 1177 }, { "epoch": 0.3266777592900721, "grad_norm": 0.21964752674102783, "learning_rate": 2.3611899508881403e-05, "loss": 0.5742, "step": 1178 }, { "epoch": 0.326955074875208, "grad_norm": 0.21694490313529968, "learning_rate": 2.3609369510960696e-05, "loss": 0.5586, "step": 1179 }, { "epoch": 0.32723239046034386, "grad_norm": 0.2035355418920517, "learning_rate": 2.360683734530766e-05, "loss": 0.5712, "step": 1180 }, { "epoch": 0.32750970604547974, "grad_norm": 0.1984809935092926, "learning_rate": 2.3604303012416383e-05, "loss": 0.5846, "step": 1181 }, { "epoch": 0.3277870216306156, "grad_norm": 0.19246408343315125, "learning_rate": 2.360176651278139e-05, "loss": 0.5931, "step": 1182 }, { "epoch": 0.3280643372157515, "grad_norm": 0.18938469886779785, "learning_rate": 2.3599227846897615e-05, "loss": 0.5722, "step": 1183 }, { "epoch": 0.3283416528008874, "grad_norm": 0.1882466822862625, "learning_rate": 2.359668701526042e-05, "loss": 0.5693, "step": 1184 }, { "epoch": 0.3286189683860233, "grad_norm": 0.18115603923797607, "learning_rate": 2.3594144018365584e-05, "loss": 0.5694, "step": 1185 }, { "epoch": 0.32889628397115916, "grad_norm": 0.21563617885112762, "learning_rate": 2.3591598856709317e-05, "loss": 0.5541, "step": 1186 }, { "epoch": 0.32917359955629505, "grad_norm": 0.19328701496124268, "learning_rate": 2.3589051530788246e-05, "loss": 0.5469, "step": 1187 }, { "epoch": 0.32945091514143093, "grad_norm": 0.19828177988529205, "learning_rate": 2.358650204109942e-05, "loss": 0.5785, "step": 1188 }, { "epoch": 0.3297282307265668, "grad_norm": 0.1821170300245285, "learning_rate": 2.358395038814032e-05, "loss": 0.5727, "step": 1189 }, { "epoch": 0.3300055463117027, "grad_norm": 0.19181084632873535, "learning_rate": 2.3581396572408833e-05, "loss": 0.5699, "step": 1190 }, { "epoch": 0.3302828618968386, "grad_norm": 0.18721237778663635, "learning_rate": 2.3578840594403275e-05, "loss": 0.5786, "step": 1191 }, { "epoch": 0.33056017748197447, "grad_norm": 0.2080426812171936, "learning_rate": 2.3576282454622394e-05, "loss": 0.6082, "step": 1192 }, { "epoch": 0.33083749306711036, "grad_norm": 0.19874081015586853, "learning_rate": 2.3573722153565343e-05, "loss": 0.5802, "step": 1193 }, { "epoch": 0.33111480865224624, "grad_norm": 0.20292651653289795, "learning_rate": 2.357115969173171e-05, "loss": 0.5562, "step": 1194 }, { "epoch": 0.3313921242373821, "grad_norm": 0.1992533951997757, "learning_rate": 2.356859506962149e-05, "loss": 0.5491, "step": 1195 }, { "epoch": 0.331669439822518, "grad_norm": 0.19271281361579895, "learning_rate": 2.356602828773512e-05, "loss": 0.5713, "step": 1196 }, { "epoch": 0.3319467554076539, "grad_norm": 0.19548431038856506, "learning_rate": 2.356345934657344e-05, "loss": 0.5455, "step": 1197 }, { "epoch": 0.3322240709927898, "grad_norm": 0.19355355203151703, "learning_rate": 2.3560888246637726e-05, "loss": 0.576, "step": 1198 }, { "epoch": 0.33250138657792566, "grad_norm": 0.19047684967517853, "learning_rate": 2.3558314988429657e-05, "loss": 0.5569, "step": 1199 }, { "epoch": 0.33277870216306155, "grad_norm": 0.19171833992004395, "learning_rate": 2.3555739572451353e-05, "loss": 0.5974, "step": 1200 }, { "epoch": 0.33305601774819743, "grad_norm": 0.18777206540107727, "learning_rate": 2.3553161999205337e-05, "loss": 0.5614, "step": 1201 }, { "epoch": 0.3333333333333333, "grad_norm": 0.1937248855829239, "learning_rate": 2.355058226919457e-05, "loss": 0.5725, "step": 1202 }, { "epoch": 0.3336106489184692, "grad_norm": 0.19912956655025482, "learning_rate": 2.3548000382922422e-05, "loss": 0.5595, "step": 1203 }, { "epoch": 0.3338879645036051, "grad_norm": 0.19780032336711884, "learning_rate": 2.354541634089269e-05, "loss": 0.5766, "step": 1204 }, { "epoch": 0.33416528008874097, "grad_norm": 0.19286835193634033, "learning_rate": 2.3542830143609584e-05, "loss": 0.5529, "step": 1205 }, { "epoch": 0.33444259567387685, "grad_norm": 0.19875440001487732, "learning_rate": 2.3540241791577745e-05, "loss": 0.5616, "step": 1206 }, { "epoch": 0.33471991125901274, "grad_norm": 0.20458170771598816, "learning_rate": 2.3537651285302224e-05, "loss": 0.5754, "step": 1207 }, { "epoch": 0.3349972268441486, "grad_norm": 0.19896847009658813, "learning_rate": 2.3535058625288503e-05, "loss": 0.586, "step": 1208 }, { "epoch": 0.3352745424292845, "grad_norm": 0.2005191296339035, "learning_rate": 2.3532463812042478e-05, "loss": 0.57, "step": 1209 }, { "epoch": 0.3355518580144204, "grad_norm": 0.19629941880702972, "learning_rate": 2.3529866846070457e-05, "loss": 0.5632, "step": 1210 }, { "epoch": 0.3358291735995563, "grad_norm": 0.2009962797164917, "learning_rate": 2.3527267727879187e-05, "loss": 0.5926, "step": 1211 }, { "epoch": 0.33610648918469216, "grad_norm": 0.18961815536022186, "learning_rate": 2.3524666457975826e-05, "loss": 0.566, "step": 1212 }, { "epoch": 0.33638380476982804, "grad_norm": 0.19457431137561798, "learning_rate": 2.3522063036867938e-05, "loss": 0.5302, "step": 1213 }, { "epoch": 0.3366611203549639, "grad_norm": 0.20213200151920319, "learning_rate": 2.351945746506353e-05, "loss": 0.5726, "step": 1214 }, { "epoch": 0.3369384359400998, "grad_norm": 0.1976713389158249, "learning_rate": 2.351684974307102e-05, "loss": 0.5425, "step": 1215 }, { "epoch": 0.3372157515252357, "grad_norm": 0.19058452546596527, "learning_rate": 2.3514239871399235e-05, "loss": 0.5695, "step": 1216 }, { "epoch": 0.3374930671103716, "grad_norm": 0.18397174775600433, "learning_rate": 2.3511627850557432e-05, "loss": 0.535, "step": 1217 }, { "epoch": 0.33777038269550747, "grad_norm": 0.2004639059305191, "learning_rate": 2.3509013681055293e-05, "loss": 0.5698, "step": 1218 }, { "epoch": 0.33804769828064335, "grad_norm": 0.1970200389623642, "learning_rate": 2.3506397363402905e-05, "loss": 0.5831, "step": 1219 }, { "epoch": 0.33832501386577923, "grad_norm": 0.24607634544372559, "learning_rate": 2.3503778898110782e-05, "loss": 0.6011, "step": 1220 }, { "epoch": 0.3386023294509151, "grad_norm": 0.1974974125623703, "learning_rate": 2.3501158285689857e-05, "loss": 0.5402, "step": 1221 }, { "epoch": 0.338879645036051, "grad_norm": 0.20804527401924133, "learning_rate": 2.3498535526651478e-05, "loss": 0.5804, "step": 1222 }, { "epoch": 0.3391569606211869, "grad_norm": 0.21283218264579773, "learning_rate": 2.349591062150742e-05, "loss": 0.5498, "step": 1223 }, { "epoch": 0.33943427620632277, "grad_norm": 0.20082467794418335, "learning_rate": 2.3493283570769863e-05, "loss": 0.5589, "step": 1224 }, { "epoch": 0.33971159179145866, "grad_norm": 0.1998477727174759, "learning_rate": 2.3490654374951426e-05, "loss": 0.5496, "step": 1225 }, { "epoch": 0.33998890737659454, "grad_norm": 0.18160581588745117, "learning_rate": 2.3488023034565127e-05, "loss": 0.569, "step": 1226 }, { "epoch": 0.3402662229617304, "grad_norm": 0.18750114738941193, "learning_rate": 2.3485389550124413e-05, "loss": 0.5768, "step": 1227 }, { "epoch": 0.3405435385468663, "grad_norm": 0.19347476959228516, "learning_rate": 2.3482753922143143e-05, "loss": 0.5354, "step": 1228 }, { "epoch": 0.3408208541320022, "grad_norm": 0.19742092490196228, "learning_rate": 2.34801161511356e-05, "loss": 0.5656, "step": 1229 }, { "epoch": 0.3410981697171381, "grad_norm": 0.19319242238998413, "learning_rate": 2.3477476237616487e-05, "loss": 0.5654, "step": 1230 }, { "epoch": 0.34137548530227396, "grad_norm": 0.18535441160202026, "learning_rate": 2.3474834182100914e-05, "loss": 0.5419, "step": 1231 }, { "epoch": 0.34165280088740985, "grad_norm": 0.2077123373746872, "learning_rate": 2.347218998510442e-05, "loss": 0.5719, "step": 1232 }, { "epoch": 0.34193011647254573, "grad_norm": 0.1924944818019867, "learning_rate": 2.3469543647142954e-05, "loss": 0.5493, "step": 1233 }, { "epoch": 0.3422074320576816, "grad_norm": 0.1943088173866272, "learning_rate": 2.3466895168732894e-05, "loss": 0.5418, "step": 1234 }, { "epoch": 0.3424847476428175, "grad_norm": 0.19199106097221375, "learning_rate": 2.3464244550391023e-05, "loss": 0.5463, "step": 1235 }, { "epoch": 0.3427620632279534, "grad_norm": 0.19151833653450012, "learning_rate": 2.3461591792634548e-05, "loss": 0.5595, "step": 1236 }, { "epoch": 0.34303937881308927, "grad_norm": 0.20270255208015442, "learning_rate": 2.3458936895981093e-05, "loss": 0.5963, "step": 1237 }, { "epoch": 0.34331669439822515, "grad_norm": 0.1867821365594864, "learning_rate": 2.3456279860948696e-05, "loss": 0.5716, "step": 1238 }, { "epoch": 0.34359400998336104, "grad_norm": 0.20384635031223297, "learning_rate": 2.3453620688055817e-05, "loss": 0.5726, "step": 1239 }, { "epoch": 0.343871325568497, "grad_norm": 0.19507844746112823, "learning_rate": 2.3450959377821334e-05, "loss": 0.5461, "step": 1240 }, { "epoch": 0.34414864115363286, "grad_norm": 0.18854713439941406, "learning_rate": 2.3448295930764536e-05, "loss": 0.5617, "step": 1241 }, { "epoch": 0.34442595673876875, "grad_norm": 0.1924966722726822, "learning_rate": 2.344563034740513e-05, "loss": 0.5716, "step": 1242 }, { "epoch": 0.34470327232390463, "grad_norm": 0.1878724843263626, "learning_rate": 2.3442962628263245e-05, "loss": 0.5588, "step": 1243 }, { "epoch": 0.3449805879090405, "grad_norm": 0.20984706282615662, "learning_rate": 2.3440292773859422e-05, "loss": 0.5623, "step": 1244 }, { "epoch": 0.3452579034941764, "grad_norm": 0.19068847596645355, "learning_rate": 2.343762078471462e-05, "loss": 0.5573, "step": 1245 }, { "epoch": 0.3455352190793123, "grad_norm": 0.1986820548772812, "learning_rate": 2.343494666135022e-05, "loss": 0.5799, "step": 1246 }, { "epoch": 0.34581253466444817, "grad_norm": 0.21772533655166626, "learning_rate": 2.343227040428801e-05, "loss": 0.5674, "step": 1247 }, { "epoch": 0.34608985024958405, "grad_norm": 0.20012469589710236, "learning_rate": 2.3429592014050198e-05, "loss": 0.5713, "step": 1248 }, { "epoch": 0.34636716583471994, "grad_norm": 0.21233108639717102, "learning_rate": 2.3426911491159408e-05, "loss": 0.5232, "step": 1249 }, { "epoch": 0.3466444814198558, "grad_norm": 0.19460223615169525, "learning_rate": 2.3424228836138686e-05, "loss": 0.5547, "step": 1250 }, { "epoch": 0.3469217970049917, "grad_norm": 0.18516409397125244, "learning_rate": 2.3421544049511484e-05, "loss": 0.5445, "step": 1251 }, { "epoch": 0.3471991125901276, "grad_norm": 0.20658938586711884, "learning_rate": 2.341885713180168e-05, "loss": 0.5551, "step": 1252 }, { "epoch": 0.3474764281752635, "grad_norm": 0.19541315734386444, "learning_rate": 2.3416168083533556e-05, "loss": 0.5608, "step": 1253 }, { "epoch": 0.34775374376039936, "grad_norm": 0.22885319590568542, "learning_rate": 2.3413476905231825e-05, "loss": 0.5836, "step": 1254 }, { "epoch": 0.34803105934553524, "grad_norm": 0.2285439521074295, "learning_rate": 2.3410783597421597e-05, "loss": 0.5444, "step": 1255 }, { "epoch": 0.3483083749306711, "grad_norm": 0.200783833861351, "learning_rate": 2.3408088160628422e-05, "loss": 0.5601, "step": 1256 }, { "epoch": 0.348585690515807, "grad_norm": 0.19225940108299255, "learning_rate": 2.3405390595378236e-05, "loss": 0.5602, "step": 1257 }, { "epoch": 0.3488630061009429, "grad_norm": 0.19936451315879822, "learning_rate": 2.340269090219741e-05, "loss": 0.5593, "step": 1258 }, { "epoch": 0.3491403216860788, "grad_norm": 0.20055221021175385, "learning_rate": 2.3399989081612732e-05, "loss": 0.568, "step": 1259 }, { "epoch": 0.34941763727121466, "grad_norm": 0.1941661238670349, "learning_rate": 2.3397285134151394e-05, "loss": 0.5688, "step": 1260 }, { "epoch": 0.34969495285635055, "grad_norm": 0.19209027290344238, "learning_rate": 2.3394579060341008e-05, "loss": 0.5635, "step": 1261 }, { "epoch": 0.34997226844148643, "grad_norm": 0.20127955079078674, "learning_rate": 2.33918708607096e-05, "loss": 0.5833, "step": 1262 }, { "epoch": 0.3502495840266223, "grad_norm": 0.20718038082122803, "learning_rate": 2.3389160535785612e-05, "loss": 0.5883, "step": 1263 }, { "epoch": 0.3505268996117582, "grad_norm": 0.19489611685276031, "learning_rate": 2.3386448086097902e-05, "loss": 0.5618, "step": 1264 }, { "epoch": 0.3508042151968941, "grad_norm": 0.19366618990898132, "learning_rate": 2.338373351217574e-05, "loss": 0.5813, "step": 1265 }, { "epoch": 0.35108153078202997, "grad_norm": 0.19742882251739502, "learning_rate": 2.3381016814548806e-05, "loss": 0.5866, "step": 1266 }, { "epoch": 0.35135884636716586, "grad_norm": 0.2014235556125641, "learning_rate": 2.337829799374721e-05, "loss": 0.5503, "step": 1267 }, { "epoch": 0.35163616195230174, "grad_norm": 0.19284577667713165, "learning_rate": 2.337557705030146e-05, "loss": 0.5683, "step": 1268 }, { "epoch": 0.3519134775374376, "grad_norm": 0.19096529483795166, "learning_rate": 2.3372853984742482e-05, "loss": 0.5535, "step": 1269 }, { "epoch": 0.3521907931225735, "grad_norm": 0.19602730870246887, "learning_rate": 2.337012879760162e-05, "loss": 0.5717, "step": 1270 }, { "epoch": 0.3524681087077094, "grad_norm": 0.19658134877681732, "learning_rate": 2.3367401489410635e-05, "loss": 0.5945, "step": 1271 }, { "epoch": 0.3527454242928453, "grad_norm": 0.20233921706676483, "learning_rate": 2.3364672060701688e-05, "loss": 0.5887, "step": 1272 }, { "epoch": 0.35302273987798116, "grad_norm": 0.20138201117515564, "learning_rate": 2.3361940512007368e-05, "loss": 0.5424, "step": 1273 }, { "epoch": 0.35330005546311705, "grad_norm": 0.1879206895828247, "learning_rate": 2.3359206843860675e-05, "loss": 0.546, "step": 1274 }, { "epoch": 0.35357737104825293, "grad_norm": 0.19074855744838715, "learning_rate": 2.335647105679502e-05, "loss": 0.5486, "step": 1275 }, { "epoch": 0.3538546866333888, "grad_norm": 0.2010781466960907, "learning_rate": 2.335373315134422e-05, "loss": 0.6002, "step": 1276 }, { "epoch": 0.3541320022185247, "grad_norm": 0.19462116062641144, "learning_rate": 2.3350993128042523e-05, "loss": 0.5937, "step": 1277 }, { "epoch": 0.3544093178036606, "grad_norm": 0.21550050377845764, "learning_rate": 2.3348250987424573e-05, "loss": 0.5783, "step": 1278 }, { "epoch": 0.35468663338879647, "grad_norm": 0.20210538804531097, "learning_rate": 2.3345506730025434e-05, "loss": 0.5784, "step": 1279 }, { "epoch": 0.35496394897393235, "grad_norm": 0.19225560128688812, "learning_rate": 2.3342760356380588e-05, "loss": 0.566, "step": 1280 }, { "epoch": 0.35524126455906824, "grad_norm": 0.2126346081495285, "learning_rate": 2.3340011867025924e-05, "loss": 0.5369, "step": 1281 }, { "epoch": 0.3555185801442041, "grad_norm": 0.19172579050064087, "learning_rate": 2.333726126249774e-05, "loss": 0.5736, "step": 1282 }, { "epoch": 0.35579589572934, "grad_norm": 0.20066869258880615, "learning_rate": 2.333450854333276e-05, "loss": 0.5538, "step": 1283 }, { "epoch": 0.3560732113144759, "grad_norm": 0.2043704241514206, "learning_rate": 2.3331753710068106e-05, "loss": 0.5682, "step": 1284 }, { "epoch": 0.3563505268996118, "grad_norm": 0.1963237076997757, "learning_rate": 2.3328996763241323e-05, "loss": 0.5342, "step": 1285 }, { "epoch": 0.35662784248474766, "grad_norm": 0.18448245525360107, "learning_rate": 2.332623770339036e-05, "loss": 0.5717, "step": 1286 }, { "epoch": 0.35690515806988354, "grad_norm": 0.1940702348947525, "learning_rate": 2.3323476531053587e-05, "loss": 0.5491, "step": 1287 }, { "epoch": 0.35718247365501943, "grad_norm": 0.1945616453886032, "learning_rate": 2.3320713246769782e-05, "loss": 0.5464, "step": 1288 }, { "epoch": 0.3574597892401553, "grad_norm": 0.19436432421207428, "learning_rate": 2.331794785107813e-05, "loss": 0.5475, "step": 1289 }, { "epoch": 0.3577371048252912, "grad_norm": 0.19309785962104797, "learning_rate": 2.3315180344518236e-05, "loss": 0.5413, "step": 1290 }, { "epoch": 0.3580144204104271, "grad_norm": 0.19348269701004028, "learning_rate": 2.331241072763012e-05, "loss": 0.5713, "step": 1291 }, { "epoch": 0.35829173599556297, "grad_norm": 0.18745654821395874, "learning_rate": 2.330963900095419e-05, "loss": 0.5661, "step": 1292 }, { "epoch": 0.35856905158069885, "grad_norm": 0.20293056964874268, "learning_rate": 2.3306865165031305e-05, "loss": 0.5855, "step": 1293 }, { "epoch": 0.35884636716583473, "grad_norm": 0.202471524477005, "learning_rate": 2.3304089220402702e-05, "loss": 0.5905, "step": 1294 }, { "epoch": 0.3591236827509706, "grad_norm": 0.18649210035800934, "learning_rate": 2.330131116761004e-05, "loss": 0.5768, "step": 1295 }, { "epoch": 0.3594009983361065, "grad_norm": 0.18877407908439636, "learning_rate": 2.3298531007195398e-05, "loss": 0.576, "step": 1296 }, { "epoch": 0.3596783139212424, "grad_norm": 0.18760234117507935, "learning_rate": 2.329574873970125e-05, "loss": 0.54, "step": 1297 }, { "epoch": 0.35995562950637827, "grad_norm": 0.2042498141527176, "learning_rate": 2.32929643656705e-05, "loss": 0.5595, "step": 1298 }, { "epoch": 0.36023294509151416, "grad_norm": 0.19519071280956268, "learning_rate": 2.3290177885646448e-05, "loss": 0.5446, "step": 1299 }, { "epoch": 0.36051026067665004, "grad_norm": 0.19533094763755798, "learning_rate": 2.3287389300172806e-05, "loss": 0.619, "step": 1300 }, { "epoch": 0.3607875762617859, "grad_norm": 0.18442007899284363, "learning_rate": 2.3284598609793705e-05, "loss": 0.5651, "step": 1301 }, { "epoch": 0.3610648918469218, "grad_norm": 0.1934802085161209, "learning_rate": 2.3281805815053688e-05, "loss": 0.5894, "step": 1302 }, { "epoch": 0.3613422074320577, "grad_norm": 0.20552469789981842, "learning_rate": 2.327901091649769e-05, "loss": 0.5943, "step": 1303 }, { "epoch": 0.3616195230171936, "grad_norm": 0.19476066529750824, "learning_rate": 2.3276213914671084e-05, "loss": 0.5729, "step": 1304 }, { "epoch": 0.36189683860232946, "grad_norm": 0.19621802866458893, "learning_rate": 2.3273414810119632e-05, "loss": 0.5616, "step": 1305 }, { "epoch": 0.36217415418746535, "grad_norm": 0.192935511469841, "learning_rate": 2.3270613603389513e-05, "loss": 0.5507, "step": 1306 }, { "epoch": 0.36245146977260123, "grad_norm": 0.18750174343585968, "learning_rate": 2.3267810295027317e-05, "loss": 0.5866, "step": 1307 }, { "epoch": 0.3627287853577371, "grad_norm": 0.18611189723014832, "learning_rate": 2.3265004885580047e-05, "loss": 0.5638, "step": 1308 }, { "epoch": 0.363006100942873, "grad_norm": 0.205692857503891, "learning_rate": 2.3262197375595108e-05, "loss": 0.5628, "step": 1309 }, { "epoch": 0.3632834165280089, "grad_norm": 0.19303978979587555, "learning_rate": 2.3259387765620322e-05, "loss": 0.5785, "step": 1310 }, { "epoch": 0.36356073211314477, "grad_norm": 0.19016319513320923, "learning_rate": 2.325657605620392e-05, "loss": 0.5637, "step": 1311 }, { "epoch": 0.36383804769828065, "grad_norm": 0.18235303461551666, "learning_rate": 2.325376224789454e-05, "loss": 0.5518, "step": 1312 }, { "epoch": 0.36411536328341654, "grad_norm": 0.1889420747756958, "learning_rate": 2.325094634124123e-05, "loss": 0.5539, "step": 1313 }, { "epoch": 0.3643926788685524, "grad_norm": 0.1905851662158966, "learning_rate": 2.3248128336793444e-05, "loss": 0.5892, "step": 1314 }, { "epoch": 0.3646699944536883, "grad_norm": 0.18385791778564453, "learning_rate": 2.324530823510106e-05, "loss": 0.5746, "step": 1315 }, { "epoch": 0.3649473100388242, "grad_norm": 0.18778762221336365, "learning_rate": 2.3242486036714343e-05, "loss": 0.5578, "step": 1316 }, { "epoch": 0.3652246256239601, "grad_norm": 0.18907684087753296, "learning_rate": 2.3239661742183984e-05, "loss": 0.5912, "step": 1317 }, { "epoch": 0.36550194120909596, "grad_norm": 0.19764885306358337, "learning_rate": 2.3236835352061076e-05, "loss": 0.5719, "step": 1318 }, { "epoch": 0.36577925679423184, "grad_norm": 0.1823858916759491, "learning_rate": 2.3234006866897125e-05, "loss": 0.5619, "step": 1319 }, { "epoch": 0.36605657237936773, "grad_norm": 0.31975796818733215, "learning_rate": 2.3231176287244044e-05, "loss": 0.5935, "step": 1320 }, { "epoch": 0.3663338879645036, "grad_norm": 0.17738473415374756, "learning_rate": 2.322834361365415e-05, "loss": 0.56, "step": 1321 }, { "epoch": 0.3666112035496395, "grad_norm": 0.19397957623004913, "learning_rate": 2.3225508846680173e-05, "loss": 0.5767, "step": 1322 }, { "epoch": 0.3668885191347754, "grad_norm": 0.1985812485218048, "learning_rate": 2.3222671986875255e-05, "loss": 0.5322, "step": 1323 }, { "epoch": 0.36716583471991127, "grad_norm": 0.19608476758003235, "learning_rate": 2.3219833034792943e-05, "loss": 0.5758, "step": 1324 }, { "epoch": 0.36744315030504715, "grad_norm": 0.196056067943573, "learning_rate": 2.3216991990987186e-05, "loss": 0.5658, "step": 1325 }, { "epoch": 0.36772046589018303, "grad_norm": 0.18968339264392853, "learning_rate": 2.3214148856012354e-05, "loss": 0.5472, "step": 1326 }, { "epoch": 0.3679977814753189, "grad_norm": 0.19283819198608398, "learning_rate": 2.3211303630423208e-05, "loss": 0.5554, "step": 1327 }, { "epoch": 0.3682750970604548, "grad_norm": 0.19521844387054443, "learning_rate": 2.320845631477494e-05, "loss": 0.5252, "step": 1328 }, { "epoch": 0.3685524126455907, "grad_norm": 0.19548355042934418, "learning_rate": 2.3205606909623122e-05, "loss": 0.569, "step": 1329 }, { "epoch": 0.3688297282307266, "grad_norm": 0.18432483077049255, "learning_rate": 2.3202755415523763e-05, "loss": 0.5612, "step": 1330 }, { "epoch": 0.36910704381586246, "grad_norm": 0.19396983087062836, "learning_rate": 2.3199901833033255e-05, "loss": 0.586, "step": 1331 }, { "epoch": 0.36938435940099834, "grad_norm": 0.18808341026306152, "learning_rate": 2.3197046162708413e-05, "loss": 0.566, "step": 1332 }, { "epoch": 0.3696616749861342, "grad_norm": 0.19177477061748505, "learning_rate": 2.3194188405106453e-05, "loss": 0.5673, "step": 1333 }, { "epoch": 0.3699389905712701, "grad_norm": 0.19362227618694305, "learning_rate": 2.3191328560784992e-05, "loss": 0.5367, "step": 1334 }, { "epoch": 0.370216306156406, "grad_norm": 0.1922491490840912, "learning_rate": 2.3188466630302072e-05, "loss": 0.5466, "step": 1335 }, { "epoch": 0.3704936217415419, "grad_norm": 0.19545325636863708, "learning_rate": 2.3185602614216125e-05, "loss": 0.5861, "step": 1336 }, { "epoch": 0.37077093732667776, "grad_norm": 0.1878175288438797, "learning_rate": 2.3182736513086002e-05, "loss": 0.5429, "step": 1337 }, { "epoch": 0.37104825291181365, "grad_norm": 0.20909211039543152, "learning_rate": 2.3179868327470948e-05, "loss": 0.579, "step": 1338 }, { "epoch": 0.37132556849694953, "grad_norm": 0.2049614042043686, "learning_rate": 2.3176998057930626e-05, "loss": 0.5877, "step": 1339 }, { "epoch": 0.3716028840820854, "grad_norm": 0.20033860206604004, "learning_rate": 2.3174125705025103e-05, "loss": 0.5576, "step": 1340 }, { "epoch": 0.3718801996672213, "grad_norm": 0.19559535384178162, "learning_rate": 2.3171251269314846e-05, "loss": 0.581, "step": 1341 }, { "epoch": 0.3721575152523572, "grad_norm": 0.19223865866661072, "learning_rate": 2.3168374751360737e-05, "loss": 0.5742, "step": 1342 }, { "epoch": 0.37243483083749307, "grad_norm": 0.18235072493553162, "learning_rate": 2.316549615172406e-05, "loss": 0.5839, "step": 1343 }, { "epoch": 0.37271214642262895, "grad_norm": 0.1987343281507492, "learning_rate": 2.3162615470966512e-05, "loss": 0.5531, "step": 1344 }, { "epoch": 0.37298946200776484, "grad_norm": 0.20500993728637695, "learning_rate": 2.3159732709650182e-05, "loss": 0.5849, "step": 1345 }, { "epoch": 0.3732667775929007, "grad_norm": 0.19410103559494019, "learning_rate": 2.3156847868337574e-05, "loss": 0.602, "step": 1346 }, { "epoch": 0.3735440931780366, "grad_norm": 0.19419234991073608, "learning_rate": 2.31539609475916e-05, "loss": 0.5639, "step": 1347 }, { "epoch": 0.3738214087631725, "grad_norm": 0.19289056956768036, "learning_rate": 2.3151071947975578e-05, "loss": 0.5833, "step": 1348 }, { "epoch": 0.3740987243483084, "grad_norm": 0.1862688809633255, "learning_rate": 2.314818087005322e-05, "loss": 0.5587, "step": 1349 }, { "epoch": 0.37437603993344426, "grad_norm": 0.1773182898759842, "learning_rate": 2.314528771438866e-05, "loss": 0.5297, "step": 1350 }, { "epoch": 0.37465335551858014, "grad_norm": 0.18808980286121368, "learning_rate": 2.314239248154642e-05, "loss": 0.5478, "step": 1351 }, { "epoch": 0.37493067110371603, "grad_norm": 0.1850731521844864, "learning_rate": 2.3139495172091447e-05, "loss": 0.5631, "step": 1352 }, { "epoch": 0.3752079866888519, "grad_norm": 0.233089417219162, "learning_rate": 2.313659578658907e-05, "loss": 0.5688, "step": 1353 }, { "epoch": 0.3754853022739878, "grad_norm": 0.19679111242294312, "learning_rate": 2.313369432560505e-05, "loss": 0.5713, "step": 1354 }, { "epoch": 0.3757626178591237, "grad_norm": 0.22906848788261414, "learning_rate": 2.3130790789705535e-05, "loss": 0.5727, "step": 1355 }, { "epoch": 0.37603993344425957, "grad_norm": 0.20593827962875366, "learning_rate": 2.3127885179457077e-05, "loss": 0.5382, "step": 1356 }, { "epoch": 0.37631724902939545, "grad_norm": 0.18781960010528564, "learning_rate": 2.3124977495426637e-05, "loss": 0.5746, "step": 1357 }, { "epoch": 0.37659456461453134, "grad_norm": 0.2294550985097885, "learning_rate": 2.3122067738181587e-05, "loss": 0.5783, "step": 1358 }, { "epoch": 0.3768718801996672, "grad_norm": 0.2007582187652588, "learning_rate": 2.311915590828969e-05, "loss": 0.5586, "step": 1359 }, { "epoch": 0.3771491957848031, "grad_norm": 0.20126573741436005, "learning_rate": 2.3116242006319132e-05, "loss": 0.5621, "step": 1360 }, { "epoch": 0.377426511369939, "grad_norm": 0.19607853889465332, "learning_rate": 2.3113326032838487e-05, "loss": 0.5305, "step": 1361 }, { "epoch": 0.3777038269550749, "grad_norm": 0.193894624710083, "learning_rate": 2.3110407988416736e-05, "loss": 0.5578, "step": 1362 }, { "epoch": 0.37798114254021076, "grad_norm": 0.1813306212425232, "learning_rate": 2.310748787362327e-05, "loss": 0.5787, "step": 1363 }, { "epoch": 0.37825845812534664, "grad_norm": 0.19636552035808563, "learning_rate": 2.3104565689027875e-05, "loss": 0.5615, "step": 1364 }, { "epoch": 0.3785357737104825, "grad_norm": 0.1957857310771942, "learning_rate": 2.3101641435200756e-05, "loss": 0.5821, "step": 1365 }, { "epoch": 0.3788130892956184, "grad_norm": 0.18413352966308594, "learning_rate": 2.3098715112712507e-05, "loss": 0.5388, "step": 1366 }, { "epoch": 0.3790904048807543, "grad_norm": 0.20153377950191498, "learning_rate": 2.3095786722134133e-05, "loss": 0.5748, "step": 1367 }, { "epoch": 0.3793677204658902, "grad_norm": 0.22044618427753448, "learning_rate": 2.309285626403704e-05, "loss": 0.5668, "step": 1368 }, { "epoch": 0.37964503605102606, "grad_norm": 0.19130001962184906, "learning_rate": 2.3089923738993034e-05, "loss": 0.5752, "step": 1369 }, { "epoch": 0.37992235163616195, "grad_norm": 0.18104785680770874, "learning_rate": 2.3086989147574333e-05, "loss": 0.5577, "step": 1370 }, { "epoch": 0.38019966722129783, "grad_norm": 0.18427520990371704, "learning_rate": 2.3084052490353553e-05, "loss": 0.5537, "step": 1371 }, { "epoch": 0.3804769828064337, "grad_norm": 0.1886986941099167, "learning_rate": 2.3081113767903713e-05, "loss": 0.5646, "step": 1372 }, { "epoch": 0.3807542983915696, "grad_norm": 0.19212138652801514, "learning_rate": 2.3078172980798236e-05, "loss": 0.5415, "step": 1373 }, { "epoch": 0.3810316139767055, "grad_norm": 0.18980136513710022, "learning_rate": 2.3075230129610946e-05, "loss": 0.5725, "step": 1374 }, { "epoch": 0.38130892956184137, "grad_norm": 0.1848769187927246, "learning_rate": 2.3072285214916072e-05, "loss": 0.536, "step": 1375 }, { "epoch": 0.38158624514697725, "grad_norm": 0.18508492410182953, "learning_rate": 2.3069338237288247e-05, "loss": 0.5753, "step": 1376 }, { "epoch": 0.38186356073211314, "grad_norm": 0.1909710019826889, "learning_rate": 2.30663891973025e-05, "loss": 0.563, "step": 1377 }, { "epoch": 0.382140876317249, "grad_norm": 0.20639832317829132, "learning_rate": 2.3063438095534272e-05, "loss": 0.5713, "step": 1378 }, { "epoch": 0.3824181919023849, "grad_norm": 0.1900801658630371, "learning_rate": 2.3060484932559395e-05, "loss": 0.5579, "step": 1379 }, { "epoch": 0.3826955074875208, "grad_norm": 0.19208571314811707, "learning_rate": 2.305752970895412e-05, "loss": 0.5571, "step": 1380 }, { "epoch": 0.3829728230726567, "grad_norm": 0.1912533938884735, "learning_rate": 2.3054572425295075e-05, "loss": 0.5452, "step": 1381 }, { "epoch": 0.38325013865779256, "grad_norm": 0.19920608401298523, "learning_rate": 2.3051613082159313e-05, "loss": 0.5799, "step": 1382 }, { "epoch": 0.38352745424292845, "grad_norm": 0.19880720973014832, "learning_rate": 2.3048651680124283e-05, "loss": 0.5504, "step": 1383 }, { "epoch": 0.38380476982806433, "grad_norm": 0.1889009028673172, "learning_rate": 2.3045688219767824e-05, "loss": 0.5751, "step": 1384 }, { "epoch": 0.3840820854132002, "grad_norm": 0.20132310688495636, "learning_rate": 2.3042722701668194e-05, "loss": 0.5723, "step": 1385 }, { "epoch": 0.3843594009983361, "grad_norm": 0.20646221935749054, "learning_rate": 2.3039755126404037e-05, "loss": 0.581, "step": 1386 }, { "epoch": 0.384636716583472, "grad_norm": 0.19051332771778107, "learning_rate": 2.3036785494554415e-05, "loss": 0.5609, "step": 1387 }, { "epoch": 0.38491403216860787, "grad_norm": 0.1971728354692459, "learning_rate": 2.303381380669877e-05, "loss": 0.563, "step": 1388 }, { "epoch": 0.38519134775374375, "grad_norm": 0.19891361892223358, "learning_rate": 2.303084006341697e-05, "loss": 0.571, "step": 1389 }, { "epoch": 0.38546866333887964, "grad_norm": 0.18801699578762054, "learning_rate": 2.302786426528926e-05, "loss": 0.5772, "step": 1390 }, { "epoch": 0.3857459789240155, "grad_norm": 0.1934468299150467, "learning_rate": 2.3024886412896302e-05, "loss": 0.5426, "step": 1391 }, { "epoch": 0.3860232945091514, "grad_norm": 0.20585696399211884, "learning_rate": 2.3021906506819152e-05, "loss": 0.5521, "step": 1392 }, { "epoch": 0.3863006100942873, "grad_norm": 0.19114629924297333, "learning_rate": 2.3018924547639272e-05, "loss": 0.597, "step": 1393 }, { "epoch": 0.3865779256794232, "grad_norm": 0.18134154379367828, "learning_rate": 2.301594053593852e-05, "loss": 0.5473, "step": 1394 }, { "epoch": 0.38685524126455906, "grad_norm": 0.18706971406936646, "learning_rate": 2.301295447229915e-05, "loss": 0.5843, "step": 1395 }, { "epoch": 0.38713255684969494, "grad_norm": 0.183674156665802, "learning_rate": 2.300996635730383e-05, "loss": 0.5268, "step": 1396 }, { "epoch": 0.3874098724348308, "grad_norm": 0.18946042656898499, "learning_rate": 2.3006976191535616e-05, "loss": 0.5529, "step": 1397 }, { "epoch": 0.3876871880199667, "grad_norm": 0.20170167088508606, "learning_rate": 2.3003983975577975e-05, "loss": 0.5781, "step": 1398 }, { "epoch": 0.3879645036051026, "grad_norm": 0.1854201704263687, "learning_rate": 2.300098971001476e-05, "loss": 0.536, "step": 1399 }, { "epoch": 0.3882418191902385, "grad_norm": 0.19159002602100372, "learning_rate": 2.299799339543023e-05, "loss": 0.584, "step": 1400 }, { "epoch": 0.38851913477537436, "grad_norm": 0.18878300487995148, "learning_rate": 2.299499503240905e-05, "loss": 0.5613, "step": 1401 }, { "epoch": 0.38879645036051025, "grad_norm": 0.2083284854888916, "learning_rate": 2.2991994621536283e-05, "loss": 0.5607, "step": 1402 }, { "epoch": 0.38907376594564613, "grad_norm": 0.19140368700027466, "learning_rate": 2.2988992163397386e-05, "loss": 0.5723, "step": 1403 }, { "epoch": 0.389351081530782, "grad_norm": 0.19407185912132263, "learning_rate": 2.2985987658578217e-05, "loss": 0.5592, "step": 1404 }, { "epoch": 0.3896283971159179, "grad_norm": 0.202662855386734, "learning_rate": 2.298298110766503e-05, "loss": 0.5682, "step": 1405 }, { "epoch": 0.3899057127010538, "grad_norm": 0.1902836710214615, "learning_rate": 2.2979972511244493e-05, "loss": 0.5626, "step": 1406 }, { "epoch": 0.39018302828618967, "grad_norm": 0.19113852083683014, "learning_rate": 2.2976961869903657e-05, "loss": 0.5713, "step": 1407 }, { "epoch": 0.39046034387132555, "grad_norm": 0.19765017926692963, "learning_rate": 2.2973949184229975e-05, "loss": 0.5811, "step": 1408 }, { "epoch": 0.39073765945646144, "grad_norm": 0.1867290586233139, "learning_rate": 2.2970934454811306e-05, "loss": 0.5094, "step": 1409 }, { "epoch": 0.3910149750415973, "grad_norm": 0.19188763201236725, "learning_rate": 2.2967917682235905e-05, "loss": 0.5736, "step": 1410 }, { "epoch": 0.3912922906267332, "grad_norm": 0.19784973561763763, "learning_rate": 2.296489886709242e-05, "loss": 0.5739, "step": 1411 }, { "epoch": 0.3915696062118691, "grad_norm": 0.19761349260807037, "learning_rate": 2.2961878009969904e-05, "loss": 0.5702, "step": 1412 }, { "epoch": 0.391846921797005, "grad_norm": 0.19187502562999725, "learning_rate": 2.2958855111457804e-05, "loss": 0.5575, "step": 1413 }, { "epoch": 0.39212423738214086, "grad_norm": 0.19723129272460938, "learning_rate": 2.2955830172145975e-05, "loss": 0.5757, "step": 1414 }, { "epoch": 0.39240155296727675, "grad_norm": 0.1946565806865692, "learning_rate": 2.2952803192624653e-05, "loss": 0.5696, "step": 1415 }, { "epoch": 0.39267886855241263, "grad_norm": 0.1714453399181366, "learning_rate": 2.2949774173484488e-05, "loss": 0.5783, "step": 1416 }, { "epoch": 0.3929561841375485, "grad_norm": 0.1907140165567398, "learning_rate": 2.2946743115316518e-05, "loss": 0.5642, "step": 1417 }, { "epoch": 0.3932334997226844, "grad_norm": 0.18922662734985352, "learning_rate": 2.294371001871219e-05, "loss": 0.5403, "step": 1418 }, { "epoch": 0.3935108153078203, "grad_norm": 0.18016541004180908, "learning_rate": 2.294067488426333e-05, "loss": 0.5428, "step": 1419 }, { "epoch": 0.39378813089295617, "grad_norm": 0.18581193685531616, "learning_rate": 2.293763771256218e-05, "loss": 0.5671, "step": 1420 }, { "epoch": 0.39406544647809205, "grad_norm": 0.19479569792747498, "learning_rate": 2.293459850420138e-05, "loss": 0.5471, "step": 1421 }, { "epoch": 0.39434276206322794, "grad_norm": 0.19193242490291595, "learning_rate": 2.2931557259773944e-05, "loss": 0.5422, "step": 1422 }, { "epoch": 0.3946200776483638, "grad_norm": 0.1958051174879074, "learning_rate": 2.2928513979873312e-05, "loss": 0.5605, "step": 1423 }, { "epoch": 0.3948973932334997, "grad_norm": 0.1831037700176239, "learning_rate": 2.2925468665093304e-05, "loss": 0.5632, "step": 1424 }, { "epoch": 0.3951747088186356, "grad_norm": 0.19089363515377045, "learning_rate": 2.2922421316028142e-05, "loss": 0.5534, "step": 1425 }, { "epoch": 0.3954520244037715, "grad_norm": 0.18920765817165375, "learning_rate": 2.2919371933272445e-05, "loss": 0.5708, "step": 1426 }, { "epoch": 0.39572933998890736, "grad_norm": 0.1790907382965088, "learning_rate": 2.2916320517421224e-05, "loss": 0.54, "step": 1427 }, { "epoch": 0.39600665557404324, "grad_norm": 0.19448032975196838, "learning_rate": 2.29132670690699e-05, "loss": 0.5713, "step": 1428 }, { "epoch": 0.3962839711591791, "grad_norm": 0.18597151339054108, "learning_rate": 2.2910211588814272e-05, "loss": 0.5619, "step": 1429 }, { "epoch": 0.396561286744315, "grad_norm": 0.19088095426559448, "learning_rate": 2.2907154077250554e-05, "loss": 0.5567, "step": 1430 }, { "epoch": 0.3968386023294509, "grad_norm": 0.2470846325159073, "learning_rate": 2.290409453497534e-05, "loss": 0.5567, "step": 1431 }, { "epoch": 0.3971159179145868, "grad_norm": 0.189782977104187, "learning_rate": 2.2901032962585633e-05, "loss": 0.5689, "step": 1432 }, { "epoch": 0.39739323349972266, "grad_norm": 0.18323121964931488, "learning_rate": 2.289796936067882e-05, "loss": 0.5695, "step": 1433 }, { "epoch": 0.39767054908485855, "grad_norm": 0.19183968007564545, "learning_rate": 2.28949037298527e-05, "loss": 0.5527, "step": 1434 }, { "epoch": 0.39794786466999443, "grad_norm": 0.2490936517715454, "learning_rate": 2.2891836070705454e-05, "loss": 0.5705, "step": 1435 }, { "epoch": 0.3982251802551303, "grad_norm": 0.18432928621768951, "learning_rate": 2.2888766383835664e-05, "loss": 0.5542, "step": 1436 }, { "epoch": 0.3985024958402662, "grad_norm": 0.18222655355930328, "learning_rate": 2.2885694669842305e-05, "loss": 0.5693, "step": 1437 }, { "epoch": 0.3987798114254021, "grad_norm": 0.18472431600093842, "learning_rate": 2.2882620929324758e-05, "loss": 0.5664, "step": 1438 }, { "epoch": 0.39905712701053797, "grad_norm": 0.19013690948486328, "learning_rate": 2.2879545162882782e-05, "loss": 0.534, "step": 1439 }, { "epoch": 0.39933444259567386, "grad_norm": 0.1890943944454193, "learning_rate": 2.2876467371116546e-05, "loss": 0.5537, "step": 1440 }, { "epoch": 0.39961175818080974, "grad_norm": 0.19494980573654175, "learning_rate": 2.28733875546266e-05, "loss": 0.5502, "step": 1441 }, { "epoch": 0.3998890737659456, "grad_norm": 0.215170755982399, "learning_rate": 2.2870305714013908e-05, "loss": 0.57, "step": 1442 }, { "epoch": 0.4001663893510815, "grad_norm": 0.20500238239765167, "learning_rate": 2.2867221849879816e-05, "loss": 0.5648, "step": 1443 }, { "epoch": 0.4004437049362174, "grad_norm": 0.1898437738418579, "learning_rate": 2.2864135962826067e-05, "loss": 0.5685, "step": 1444 }, { "epoch": 0.4007210205213533, "grad_norm": 0.1926756501197815, "learning_rate": 2.2861048053454797e-05, "loss": 0.5514, "step": 1445 }, { "epoch": 0.40099833610648916, "grad_norm": 0.19258753955364227, "learning_rate": 2.2857958122368545e-05, "loss": 0.5397, "step": 1446 }, { "epoch": 0.40127565169162505, "grad_norm": 0.18636788427829742, "learning_rate": 2.285486617017023e-05, "loss": 0.5421, "step": 1447 }, { "epoch": 0.40155296727676093, "grad_norm": 0.20043087005615234, "learning_rate": 2.2851772197463184e-05, "loss": 0.5241, "step": 1448 }, { "epoch": 0.4018302828618968, "grad_norm": 0.2062230408191681, "learning_rate": 2.284867620485111e-05, "loss": 0.5702, "step": 1449 }, { "epoch": 0.4021075984470327, "grad_norm": 0.1930972784757614, "learning_rate": 2.284557819293813e-05, "loss": 0.5533, "step": 1450 }, { "epoch": 0.4023849140321686, "grad_norm": 0.20658205449581146, "learning_rate": 2.284247816232874e-05, "loss": 0.5613, "step": 1451 }, { "epoch": 0.40266222961730447, "grad_norm": 0.2103574126958847, "learning_rate": 2.2839376113627848e-05, "loss": 0.5668, "step": 1452 }, { "epoch": 0.40293954520244035, "grad_norm": 0.19861139357089996, "learning_rate": 2.2836272047440733e-05, "loss": 0.5549, "step": 1453 }, { "epoch": 0.40321686078757624, "grad_norm": 0.1885387897491455, "learning_rate": 2.2833165964373093e-05, "loss": 0.565, "step": 1454 }, { "epoch": 0.4034941763727121, "grad_norm": 0.18594177067279816, "learning_rate": 2.2830057865030997e-05, "loss": 0.5129, "step": 1455 }, { "epoch": 0.403771491957848, "grad_norm": 0.19150151312351227, "learning_rate": 2.282694775002092e-05, "loss": 0.5591, "step": 1456 }, { "epoch": 0.4040488075429839, "grad_norm": 0.1973596066236496, "learning_rate": 2.2823835619949735e-05, "loss": 0.5795, "step": 1457 }, { "epoch": 0.4043261231281198, "grad_norm": 0.21641992032527924, "learning_rate": 2.2820721475424693e-05, "loss": 0.5286, "step": 1458 }, { "epoch": 0.40460343871325566, "grad_norm": 0.20730352401733398, "learning_rate": 2.281760531705345e-05, "loss": 0.578, "step": 1459 }, { "epoch": 0.40488075429839154, "grad_norm": 0.19733300805091858, "learning_rate": 2.281448714544405e-05, "loss": 0.6048, "step": 1460 }, { "epoch": 0.4051580698835274, "grad_norm": 0.2007417231798172, "learning_rate": 2.281136696120493e-05, "loss": 0.588, "step": 1461 }, { "epoch": 0.4054353854686633, "grad_norm": 0.1785038709640503, "learning_rate": 2.280824476494492e-05, "loss": 0.5505, "step": 1462 }, { "epoch": 0.4057127010537992, "grad_norm": 0.18670029938220978, "learning_rate": 2.2805120557273246e-05, "loss": 0.5533, "step": 1463 }, { "epoch": 0.4059900166389351, "grad_norm": 0.18775032460689545, "learning_rate": 2.2801994338799525e-05, "loss": 0.5457, "step": 1464 }, { "epoch": 0.406267332224071, "grad_norm": 0.18645282089710236, "learning_rate": 2.2798866110133758e-05, "loss": 0.5595, "step": 1465 }, { "epoch": 0.4065446478092069, "grad_norm": 0.2067873477935791, "learning_rate": 2.279573587188635e-05, "loss": 0.5438, "step": 1466 }, { "epoch": 0.4068219633943428, "grad_norm": 0.18860718607902527, "learning_rate": 2.2792603624668097e-05, "loss": 0.541, "step": 1467 }, { "epoch": 0.4070992789794787, "grad_norm": 0.18750540912151337, "learning_rate": 2.2789469369090173e-05, "loss": 0.5567, "step": 1468 }, { "epoch": 0.40737659456461456, "grad_norm": 0.18958985805511475, "learning_rate": 2.2786333105764162e-05, "loss": 0.5413, "step": 1469 }, { "epoch": 0.40765391014975044, "grad_norm": 0.27570971846580505, "learning_rate": 2.2783194835302035e-05, "loss": 0.5548, "step": 1470 }, { "epoch": 0.4079312257348863, "grad_norm": 0.1889680027961731, "learning_rate": 2.2780054558316146e-05, "loss": 0.5507, "step": 1471 }, { "epoch": 0.4082085413200222, "grad_norm": 0.18410347402095795, "learning_rate": 2.277691227541925e-05, "loss": 0.5689, "step": 1472 }, { "epoch": 0.4084858569051581, "grad_norm": 0.18935447931289673, "learning_rate": 2.277376798722448e-05, "loss": 0.5631, "step": 1473 }, { "epoch": 0.408763172490294, "grad_norm": 0.19542036950588226, "learning_rate": 2.2770621694345385e-05, "loss": 0.5614, "step": 1474 }, { "epoch": 0.40904048807542986, "grad_norm": 0.2004593461751938, "learning_rate": 2.2767473397395876e-05, "loss": 0.5707, "step": 1475 }, { "epoch": 0.40931780366056575, "grad_norm": 0.19551704823970795, "learning_rate": 2.276432309699028e-05, "loss": 0.5656, "step": 1476 }, { "epoch": 0.40959511924570163, "grad_norm": 0.18432258069515228, "learning_rate": 2.27611707937433e-05, "loss": 0.5696, "step": 1477 }, { "epoch": 0.4098724348308375, "grad_norm": 0.20155014097690582, "learning_rate": 2.2758016488270033e-05, "loss": 0.581, "step": 1478 }, { "epoch": 0.4101497504159734, "grad_norm": 0.1944228559732437, "learning_rate": 2.2754860181185967e-05, "loss": 0.565, "step": 1479 }, { "epoch": 0.4104270660011093, "grad_norm": 0.20183973014354706, "learning_rate": 2.2751701873106983e-05, "loss": 0.5743, "step": 1480 }, { "epoch": 0.41070438158624517, "grad_norm": 0.18106213212013245, "learning_rate": 2.274854156464935e-05, "loss": 0.5726, "step": 1481 }, { "epoch": 0.41098169717138106, "grad_norm": 0.19121475517749786, "learning_rate": 2.2745379256429728e-05, "loss": 0.5856, "step": 1482 }, { "epoch": 0.41125901275651694, "grad_norm": 0.1916564702987671, "learning_rate": 2.2742214949065166e-05, "loss": 0.5494, "step": 1483 }, { "epoch": 0.4115363283416528, "grad_norm": 0.2309359312057495, "learning_rate": 2.2739048643173105e-05, "loss": 0.575, "step": 1484 }, { "epoch": 0.4118136439267887, "grad_norm": 0.3025970757007599, "learning_rate": 2.2735880339371373e-05, "loss": 0.5717, "step": 1485 }, { "epoch": 0.4120909595119246, "grad_norm": 0.19141024351119995, "learning_rate": 2.27327100382782e-05, "loss": 0.5743, "step": 1486 }, { "epoch": 0.4123682750970605, "grad_norm": 0.19072705507278442, "learning_rate": 2.272953774051218e-05, "loss": 0.5498, "step": 1487 }, { "epoch": 0.41264559068219636, "grad_norm": 0.23282243311405182, "learning_rate": 2.2726363446692324e-05, "loss": 0.5653, "step": 1488 }, { "epoch": 0.41292290626733225, "grad_norm": 0.1904718577861786, "learning_rate": 2.2723187157438015e-05, "loss": 0.5557, "step": 1489 }, { "epoch": 0.41320022185246813, "grad_norm": 0.18210548162460327, "learning_rate": 2.2720008873369036e-05, "loss": 0.5648, "step": 1490 }, { "epoch": 0.413477537437604, "grad_norm": 0.1940220296382904, "learning_rate": 2.271682859510555e-05, "loss": 0.5824, "step": 1491 }, { "epoch": 0.4137548530227399, "grad_norm": 0.18632103502750397, "learning_rate": 2.2713646323268113e-05, "loss": 0.5694, "step": 1492 }, { "epoch": 0.4140321686078758, "grad_norm": 0.19417639076709747, "learning_rate": 2.2710462058477676e-05, "loss": 0.5385, "step": 1493 }, { "epoch": 0.41430948419301167, "grad_norm": 0.1876698136329651, "learning_rate": 2.270727580135557e-05, "loss": 0.5441, "step": 1494 }, { "epoch": 0.41458679977814755, "grad_norm": 0.19295859336853027, "learning_rate": 2.270408755252352e-05, "loss": 0.5587, "step": 1495 }, { "epoch": 0.41486411536328344, "grad_norm": 0.1876155138015747, "learning_rate": 2.2700897312603635e-05, "loss": 0.5597, "step": 1496 }, { "epoch": 0.4151414309484193, "grad_norm": 0.18888817727565765, "learning_rate": 2.2697705082218417e-05, "loss": 0.58, "step": 1497 }, { "epoch": 0.4154187465335552, "grad_norm": 0.1931556612253189, "learning_rate": 2.2694510861990755e-05, "loss": 0.5195, "step": 1498 }, { "epoch": 0.4156960621186911, "grad_norm": 0.20737329125404358, "learning_rate": 2.2691314652543922e-05, "loss": 0.5742, "step": 1499 }, { "epoch": 0.415973377703827, "grad_norm": 0.18405389785766602, "learning_rate": 2.268811645450159e-05, "loss": 0.5572, "step": 1500 }, { "epoch": 0.41625069328896286, "grad_norm": 0.2047012746334076, "learning_rate": 2.2684916268487805e-05, "loss": 0.5682, "step": 1501 }, { "epoch": 0.41652800887409874, "grad_norm": 0.19159288704395294, "learning_rate": 2.2681714095127016e-05, "loss": 0.5642, "step": 1502 }, { "epoch": 0.4168053244592346, "grad_norm": 0.19872340559959412, "learning_rate": 2.2678509935044046e-05, "loss": 0.5801, "step": 1503 }, { "epoch": 0.4170826400443705, "grad_norm": 0.22284448146820068, "learning_rate": 2.267530378886411e-05, "loss": 0.5468, "step": 1504 }, { "epoch": 0.4173599556295064, "grad_norm": 0.18655410408973694, "learning_rate": 2.2672095657212822e-05, "loss": 0.5557, "step": 1505 }, { "epoch": 0.4176372712146423, "grad_norm": 0.1884729266166687, "learning_rate": 2.266888554071616e-05, "loss": 0.5641, "step": 1506 }, { "epoch": 0.41791458679977816, "grad_norm": 0.1828029453754425, "learning_rate": 2.2665673440000512e-05, "loss": 0.5295, "step": 1507 }, { "epoch": 0.41819190238491405, "grad_norm": 0.19014927744865417, "learning_rate": 2.2662459355692645e-05, "loss": 0.5385, "step": 1508 }, { "epoch": 0.41846921797004993, "grad_norm": 0.20038922131061554, "learning_rate": 2.26592432884197e-05, "loss": 0.5763, "step": 1509 }, { "epoch": 0.4187465335551858, "grad_norm": 0.19587905704975128, "learning_rate": 2.2656025238809233e-05, "loss": 0.5642, "step": 1510 }, { "epoch": 0.4190238491403217, "grad_norm": 0.18529456853866577, "learning_rate": 2.265280520748916e-05, "loss": 0.5467, "step": 1511 }, { "epoch": 0.4193011647254576, "grad_norm": 0.19396060705184937, "learning_rate": 2.26495831950878e-05, "loss": 0.5756, "step": 1512 }, { "epoch": 0.41957848031059347, "grad_norm": 0.19712162017822266, "learning_rate": 2.2646359202233848e-05, "loss": 0.5856, "step": 1513 }, { "epoch": 0.41985579589572936, "grad_norm": 0.1929578334093094, "learning_rate": 2.264313322955639e-05, "loss": 0.5677, "step": 1514 }, { "epoch": 0.42013311148086524, "grad_norm": 0.19393184781074524, "learning_rate": 2.263990527768491e-05, "loss": 0.5409, "step": 1515 }, { "epoch": 0.4204104270660011, "grad_norm": 0.19869175553321838, "learning_rate": 2.2636675347249252e-05, "loss": 0.5344, "step": 1516 }, { "epoch": 0.420687742651137, "grad_norm": 0.19600443542003632, "learning_rate": 2.263344343887967e-05, "loss": 0.5627, "step": 1517 }, { "epoch": 0.4209650582362729, "grad_norm": 0.19098101556301117, "learning_rate": 2.263020955320679e-05, "loss": 0.5704, "step": 1518 }, { "epoch": 0.4212423738214088, "grad_norm": 0.20919351279735565, "learning_rate": 2.2626973690861635e-05, "loss": 0.55, "step": 1519 }, { "epoch": 0.42151968940654466, "grad_norm": 0.21177563071250916, "learning_rate": 2.2623735852475602e-05, "loss": 0.5723, "step": 1520 }, { "epoch": 0.42179700499168055, "grad_norm": 0.19463296234607697, "learning_rate": 2.262049603868048e-05, "loss": 0.5601, "step": 1521 }, { "epoch": 0.42207432057681643, "grad_norm": 0.20798444747924805, "learning_rate": 2.2617254250108445e-05, "loss": 0.5606, "step": 1522 }, { "epoch": 0.4223516361619523, "grad_norm": 0.20048515498638153, "learning_rate": 2.2614010487392053e-05, "loss": 0.5628, "step": 1523 }, { "epoch": 0.4226289517470882, "grad_norm": 0.1931045949459076, "learning_rate": 2.2610764751164253e-05, "loss": 0.5662, "step": 1524 }, { "epoch": 0.4229062673322241, "grad_norm": 0.19520334899425507, "learning_rate": 2.2607517042058367e-05, "loss": 0.5552, "step": 1525 }, { "epoch": 0.42318358291735997, "grad_norm": 0.1984749138355255, "learning_rate": 2.2604267360708113e-05, "loss": 0.5672, "step": 1526 }, { "epoch": 0.42346089850249585, "grad_norm": 0.19392019510269165, "learning_rate": 2.2601015707747585e-05, "loss": 0.5689, "step": 1527 }, { "epoch": 0.42373821408763174, "grad_norm": 0.1880037933588028, "learning_rate": 2.2597762083811276e-05, "loss": 0.5606, "step": 1528 }, { "epoch": 0.4240155296727676, "grad_norm": 0.19556698203086853, "learning_rate": 2.259450648953405e-05, "loss": 0.5626, "step": 1529 }, { "epoch": 0.4242928452579035, "grad_norm": 0.18473385274410248, "learning_rate": 2.2591248925551156e-05, "loss": 0.541, "step": 1530 }, { "epoch": 0.4245701608430394, "grad_norm": 0.2530158460140228, "learning_rate": 2.2587989392498237e-05, "loss": 0.5429, "step": 1531 }, { "epoch": 0.4248474764281753, "grad_norm": 0.22607286274433136, "learning_rate": 2.258472789101131e-05, "loss": 0.5578, "step": 1532 }, { "epoch": 0.42512479201331116, "grad_norm": 0.19067604839801788, "learning_rate": 2.258146442172678e-05, "loss": 0.5474, "step": 1533 }, { "epoch": 0.42540210759844704, "grad_norm": 0.18085655570030212, "learning_rate": 2.257819898528144e-05, "loss": 0.5506, "step": 1534 }, { "epoch": 0.42567942318358293, "grad_norm": 0.18807153403759003, "learning_rate": 2.257493158231246e-05, "loss": 0.5461, "step": 1535 }, { "epoch": 0.4259567387687188, "grad_norm": 0.18786919116973877, "learning_rate": 2.25716622134574e-05, "loss": 0.5781, "step": 1536 }, { "epoch": 0.4262340543538547, "grad_norm": 0.19066756963729858, "learning_rate": 2.2568390879354195e-05, "loss": 0.5378, "step": 1537 }, { "epoch": 0.4265113699389906, "grad_norm": 0.1895960569381714, "learning_rate": 2.2565117580641175e-05, "loss": 0.5661, "step": 1538 }, { "epoch": 0.42678868552412647, "grad_norm": 0.21506737172603607, "learning_rate": 2.2561842317957045e-05, "loss": 0.661, "step": 1539 }, { "epoch": 0.42706600110926235, "grad_norm": 0.1904166042804718, "learning_rate": 2.2558565091940895e-05, "loss": 0.5643, "step": 1540 }, { "epoch": 0.42734331669439823, "grad_norm": 0.18517723679542542, "learning_rate": 2.2555285903232197e-05, "loss": 0.5509, "step": 1541 }, { "epoch": 0.4276206322795341, "grad_norm": 0.2003047913312912, "learning_rate": 2.2552004752470814e-05, "loss": 0.5487, "step": 1542 }, { "epoch": 0.42789794786467, "grad_norm": 0.19767479598522186, "learning_rate": 2.2548721640296976e-05, "loss": 0.5534, "step": 1543 }, { "epoch": 0.4281752634498059, "grad_norm": 0.1890031397342682, "learning_rate": 2.2545436567351312e-05, "loss": 0.5762, "step": 1544 }, { "epoch": 0.42845257903494177, "grad_norm": 0.22215117514133453, "learning_rate": 2.2542149534274827e-05, "loss": 0.5433, "step": 1545 }, { "epoch": 0.42872989462007766, "grad_norm": 0.17825603485107422, "learning_rate": 2.2538860541708902e-05, "loss": 0.5724, "step": 1546 }, { "epoch": 0.42900721020521354, "grad_norm": 0.1967187076807022, "learning_rate": 2.2535569590295313e-05, "loss": 0.5632, "step": 1547 }, { "epoch": 0.4292845257903494, "grad_norm": 0.18680186569690704, "learning_rate": 2.253227668067621e-05, "loss": 0.5704, "step": 1548 }, { "epoch": 0.4295618413754853, "grad_norm": 0.17610576748847961, "learning_rate": 2.2528981813494127e-05, "loss": 0.5295, "step": 1549 }, { "epoch": 0.4298391569606212, "grad_norm": 0.19055548310279846, "learning_rate": 2.2525684989391975e-05, "loss": 0.5651, "step": 1550 }, { "epoch": 0.4301164725457571, "grad_norm": 0.19191017746925354, "learning_rate": 2.2522386209013062e-05, "loss": 0.5366, "step": 1551 }, { "epoch": 0.43039378813089296, "grad_norm": 0.20109041035175323, "learning_rate": 2.2519085473001055e-05, "loss": 0.5508, "step": 1552 }, { "epoch": 0.43067110371602885, "grad_norm": 0.18693628907203674, "learning_rate": 2.2515782782000027e-05, "loss": 0.5603, "step": 1553 }, { "epoch": 0.43094841930116473, "grad_norm": 0.18805643916130066, "learning_rate": 2.2512478136654412e-05, "loss": 0.5197, "step": 1554 }, { "epoch": 0.4312257348863006, "grad_norm": 0.19016428291797638, "learning_rate": 2.2509171537609042e-05, "loss": 0.5719, "step": 1555 }, { "epoch": 0.4315030504714365, "grad_norm": 0.18083550035953522, "learning_rate": 2.2505862985509112e-05, "loss": 0.5502, "step": 1556 }, { "epoch": 0.4317803660565724, "grad_norm": 0.18382135033607483, "learning_rate": 2.2502552481000218e-05, "loss": 0.5437, "step": 1557 }, { "epoch": 0.43205768164170827, "grad_norm": 0.20305176079273224, "learning_rate": 2.2499240024728316e-05, "loss": 0.5559, "step": 1558 }, { "epoch": 0.43233499722684415, "grad_norm": 0.19593636691570282, "learning_rate": 2.2495925617339765e-05, "loss": 0.5695, "step": 1559 }, { "epoch": 0.43261231281198004, "grad_norm": 0.18708528578281403, "learning_rate": 2.2492609259481283e-05, "loss": 0.5828, "step": 1560 }, { "epoch": 0.4328896283971159, "grad_norm": 0.20516738295555115, "learning_rate": 2.248929095179999e-05, "loss": 0.5691, "step": 1561 }, { "epoch": 0.4331669439822518, "grad_norm": 0.19292466342449188, "learning_rate": 2.248597069494337e-05, "loss": 0.5567, "step": 1562 }, { "epoch": 0.4334442595673877, "grad_norm": 0.18612238764762878, "learning_rate": 2.2482648489559296e-05, "loss": 0.5497, "step": 1563 }, { "epoch": 0.4337215751525236, "grad_norm": 0.18284805119037628, "learning_rate": 2.2479324336296016e-05, "loss": 0.5305, "step": 1564 }, { "epoch": 0.43399889073765946, "grad_norm": 0.19658204913139343, "learning_rate": 2.247599823580216e-05, "loss": 0.5631, "step": 1565 }, { "epoch": 0.43427620632279534, "grad_norm": 0.19752554595470428, "learning_rate": 2.2472670188726737e-05, "loss": 0.5647, "step": 1566 }, { "epoch": 0.43455352190793123, "grad_norm": 0.18531207740306854, "learning_rate": 2.246934019571914e-05, "loss": 0.5667, "step": 1567 }, { "epoch": 0.4348308374930671, "grad_norm": 0.18967878818511963, "learning_rate": 2.2466008257429142e-05, "loss": 0.5572, "step": 1568 }, { "epoch": 0.435108153078203, "grad_norm": 0.20551453530788422, "learning_rate": 2.2462674374506886e-05, "loss": 0.5554, "step": 1569 }, { "epoch": 0.4353854686633389, "grad_norm": 0.1888595074415207, "learning_rate": 2.2459338547602905e-05, "loss": 0.5986, "step": 1570 }, { "epoch": 0.43566278424847477, "grad_norm": 0.1824692189693451, "learning_rate": 2.2456000777368102e-05, "loss": 0.5223, "step": 1571 }, { "epoch": 0.43594009983361065, "grad_norm": 0.189620703458786, "learning_rate": 2.245266106445377e-05, "loss": 0.5465, "step": 1572 }, { "epoch": 0.43621741541874653, "grad_norm": 0.20219853520393372, "learning_rate": 2.2449319409511574e-05, "loss": 0.5655, "step": 1573 }, { "epoch": 0.4364947310038824, "grad_norm": 0.18667539954185486, "learning_rate": 2.244597581319356e-05, "loss": 0.5307, "step": 1574 }, { "epoch": 0.4367720465890183, "grad_norm": 0.19490988552570343, "learning_rate": 2.2442630276152148e-05, "loss": 0.5666, "step": 1575 }, { "epoch": 0.4370493621741542, "grad_norm": 0.18786631524562836, "learning_rate": 2.2439282799040146e-05, "loss": 0.535, "step": 1576 }, { "epoch": 0.4373266777592901, "grad_norm": 0.1902604103088379, "learning_rate": 2.2435933382510735e-05, "loss": 0.5362, "step": 1577 }, { "epoch": 0.43760399334442596, "grad_norm": 0.17974701523780823, "learning_rate": 2.2432582027217473e-05, "loss": 0.5538, "step": 1578 }, { "epoch": 0.43788130892956184, "grad_norm": 0.1953812539577484, "learning_rate": 2.2429228733814294e-05, "loss": 0.5584, "step": 1579 }, { "epoch": 0.4381586245146977, "grad_norm": 0.17894363403320312, "learning_rate": 2.2425873502955524e-05, "loss": 0.5667, "step": 1580 }, { "epoch": 0.4384359400998336, "grad_norm": 0.2096855342388153, "learning_rate": 2.2422516335295852e-05, "loss": 0.5634, "step": 1581 }, { "epoch": 0.4387132556849695, "grad_norm": 0.1870642900466919, "learning_rate": 2.241915723149035e-05, "loss": 0.5691, "step": 1582 }, { "epoch": 0.4389905712701054, "grad_norm": 0.2048119157552719, "learning_rate": 2.241579619219447e-05, "loss": 0.5645, "step": 1583 }, { "epoch": 0.43926788685524126, "grad_norm": 0.19508272409439087, "learning_rate": 2.2412433218064037e-05, "loss": 0.5543, "step": 1584 }, { "epoch": 0.43954520244037715, "grad_norm": 0.1895490139722824, "learning_rate": 2.240906830975526e-05, "loss": 0.5522, "step": 1585 }, { "epoch": 0.43982251802551303, "grad_norm": 0.18243685364723206, "learning_rate": 2.240570146792472e-05, "loss": 0.5695, "step": 1586 }, { "epoch": 0.4400998336106489, "grad_norm": 0.18209905922412872, "learning_rate": 2.2402332693229377e-05, "loss": 0.5447, "step": 1587 }, { "epoch": 0.4403771491957848, "grad_norm": 0.1850498467683792, "learning_rate": 2.2398961986326567e-05, "loss": 0.5289, "step": 1588 }, { "epoch": 0.4406544647809207, "grad_norm": 0.19210520386695862, "learning_rate": 2.2395589347874005e-05, "loss": 0.5792, "step": 1589 }, { "epoch": 0.44093178036605657, "grad_norm": 0.19306769967079163, "learning_rate": 2.239221477852978e-05, "loss": 0.5771, "step": 1590 }, { "epoch": 0.44120909595119245, "grad_norm": 0.18809685111045837, "learning_rate": 2.2388838278952367e-05, "loss": 0.5648, "step": 1591 }, { "epoch": 0.44148641153632834, "grad_norm": 0.19211184978485107, "learning_rate": 2.2385459849800606e-05, "loss": 0.5867, "step": 1592 }, { "epoch": 0.4417637271214642, "grad_norm": 0.2126152366399765, "learning_rate": 2.2382079491733715e-05, "loss": 0.5705, "step": 1593 }, { "epoch": 0.4420410427066001, "grad_norm": 0.18234777450561523, "learning_rate": 2.23786972054113e-05, "loss": 0.5373, "step": 1594 }, { "epoch": 0.442318358291736, "grad_norm": 0.18619130551815033, "learning_rate": 2.2375312991493324e-05, "loss": 0.5525, "step": 1595 }, { "epoch": 0.4425956738768719, "grad_norm": 0.19054090976715088, "learning_rate": 2.237192685064014e-05, "loss": 0.5606, "step": 1596 }, { "epoch": 0.44287298946200776, "grad_norm": 0.19798876345157623, "learning_rate": 2.236853878351248e-05, "loss": 0.5389, "step": 1597 }, { "epoch": 0.44315030504714364, "grad_norm": 0.20198500156402588, "learning_rate": 2.2365148790771442e-05, "loss": 0.577, "step": 1598 }, { "epoch": 0.44342762063227953, "grad_norm": 0.22898751497268677, "learning_rate": 2.2361756873078502e-05, "loss": 0.5516, "step": 1599 }, { "epoch": 0.4437049362174154, "grad_norm": 0.1942586600780487, "learning_rate": 2.2358363031095513e-05, "loss": 0.5583, "step": 1600 }, { "epoch": 0.4439822518025513, "grad_norm": 0.18665020167827606, "learning_rate": 2.23549672654847e-05, "loss": 0.562, "step": 1601 }, { "epoch": 0.4442595673876872, "grad_norm": 0.20378893613815308, "learning_rate": 2.2351569576908675e-05, "loss": 0.5783, "step": 1602 }, { "epoch": 0.44453688297282307, "grad_norm": 0.21167699992656708, "learning_rate": 2.2348169966030416e-05, "loss": 0.5453, "step": 1603 }, { "epoch": 0.44481419855795895, "grad_norm": 0.19410103559494019, "learning_rate": 2.234476843351327e-05, "loss": 0.5527, "step": 1604 }, { "epoch": 0.44509151414309484, "grad_norm": 0.17932045459747314, "learning_rate": 2.2341364980020973e-05, "loss": 0.568, "step": 1605 }, { "epoch": 0.4453688297282307, "grad_norm": 0.2007569819688797, "learning_rate": 2.2337959606217624e-05, "loss": 0.5699, "step": 1606 }, { "epoch": 0.4456461453133666, "grad_norm": 0.18869513273239136, "learning_rate": 2.2334552312767705e-05, "loss": 0.5506, "step": 1607 }, { "epoch": 0.4459234608985025, "grad_norm": 0.1867865025997162, "learning_rate": 2.2331143100336072e-05, "loss": 0.5758, "step": 1608 }, { "epoch": 0.4462007764836384, "grad_norm": 0.19275221228599548, "learning_rate": 2.2327731969587947e-05, "loss": 0.5408, "step": 1609 }, { "epoch": 0.44647809206877426, "grad_norm": 0.19611743092536926, "learning_rate": 2.2324318921188932e-05, "loss": 0.5974, "step": 1610 }, { "epoch": 0.44675540765391014, "grad_norm": 0.20041632652282715, "learning_rate": 2.2320903955805e-05, "loss": 0.5598, "step": 1611 }, { "epoch": 0.447032723239046, "grad_norm": 0.18512395024299622, "learning_rate": 2.2317487074102514e-05, "loss": 0.5661, "step": 1612 }, { "epoch": 0.4473100388241819, "grad_norm": 0.1952909678220749, "learning_rate": 2.2314068276748188e-05, "loss": 0.5635, "step": 1613 }, { "epoch": 0.4475873544093178, "grad_norm": 0.1981881707906723, "learning_rate": 2.231064756440912e-05, "loss": 0.5601, "step": 1614 }, { "epoch": 0.4478646699944537, "grad_norm": 0.1989242285490036, "learning_rate": 2.230722493775279e-05, "loss": 0.5635, "step": 1615 }, { "epoch": 0.44814198557958956, "grad_norm": 0.18769480288028717, "learning_rate": 2.2303800397447034e-05, "loss": 0.5589, "step": 1616 }, { "epoch": 0.44841930116472545, "grad_norm": 0.19833675026893616, "learning_rate": 2.230037394416007e-05, "loss": 0.5622, "step": 1617 }, { "epoch": 0.44869661674986133, "grad_norm": 0.19801415503025055, "learning_rate": 2.2296945578560498e-05, "loss": 0.5862, "step": 1618 }, { "epoch": 0.4489739323349972, "grad_norm": 0.1936810463666916, "learning_rate": 2.2293515301317274e-05, "loss": 0.5452, "step": 1619 }, { "epoch": 0.4492512479201331, "grad_norm": 0.20558279752731323, "learning_rate": 2.2290083113099748e-05, "loss": 0.5573, "step": 1620 }, { "epoch": 0.449528563505269, "grad_norm": 0.2063780575990677, "learning_rate": 2.2286649014577615e-05, "loss": 0.6017, "step": 1621 }, { "epoch": 0.44980587909040487, "grad_norm": 0.18592418730258942, "learning_rate": 2.2283213006420973e-05, "loss": 0.5421, "step": 1622 }, { "epoch": 0.45008319467554075, "grad_norm": 0.19346946477890015, "learning_rate": 2.227977508930027e-05, "loss": 0.53, "step": 1623 }, { "epoch": 0.45036051026067664, "grad_norm": 0.2029414027929306, "learning_rate": 2.2276335263886336e-05, "loss": 0.5808, "step": 1624 }, { "epoch": 0.4506378258458125, "grad_norm": 0.21214988827705383, "learning_rate": 2.2272893530850373e-05, "loss": 0.564, "step": 1625 }, { "epoch": 0.4509151414309484, "grad_norm": 0.19748808443546295, "learning_rate": 2.2269449890863956e-05, "loss": 0.5562, "step": 1626 }, { "epoch": 0.4511924570160843, "grad_norm": 0.19283835589885712, "learning_rate": 2.2266004344599028e-05, "loss": 0.5511, "step": 1627 }, { "epoch": 0.4514697726012202, "grad_norm": 0.1917610466480255, "learning_rate": 2.2262556892727904e-05, "loss": 0.5744, "step": 1628 }, { "epoch": 0.45174708818635606, "grad_norm": 0.20705603063106537, "learning_rate": 2.225910753592328e-05, "loss": 0.5786, "step": 1629 }, { "epoch": 0.45202440377149194, "grad_norm": 0.19348299503326416, "learning_rate": 2.225565627485821e-05, "loss": 0.5899, "step": 1630 }, { "epoch": 0.45230171935662783, "grad_norm": 0.19649489223957062, "learning_rate": 2.2252203110206134e-05, "loss": 0.5317, "step": 1631 }, { "epoch": 0.4525790349417637, "grad_norm": 0.18169252574443817, "learning_rate": 2.224874804264085e-05, "loss": 0.5243, "step": 1632 }, { "epoch": 0.4528563505268996, "grad_norm": 0.188733771443367, "learning_rate": 2.224529107283653e-05, "loss": 0.5376, "step": 1633 }, { "epoch": 0.4531336661120355, "grad_norm": 0.18059036135673523, "learning_rate": 2.2241832201467727e-05, "loss": 0.5603, "step": 1634 }, { "epoch": 0.45341098169717137, "grad_norm": 0.19139643013477325, "learning_rate": 2.223837142920936e-05, "loss": 0.5603, "step": 1635 }, { "epoch": 0.45368829728230725, "grad_norm": 0.19182981550693512, "learning_rate": 2.2234908756736712e-05, "loss": 0.5805, "step": 1636 }, { "epoch": 0.45396561286744314, "grad_norm": 0.187539204955101, "learning_rate": 2.223144418472544e-05, "loss": 0.5546, "step": 1637 }, { "epoch": 0.454242928452579, "grad_norm": 0.19611942768096924, "learning_rate": 2.2227977713851587e-05, "loss": 0.5349, "step": 1638 }, { "epoch": 0.4545202440377149, "grad_norm": 0.19672465324401855, "learning_rate": 2.2224509344791536e-05, "loss": 0.5342, "step": 1639 }, { "epoch": 0.4547975596228508, "grad_norm": 0.17679478228092194, "learning_rate": 2.222103907822207e-05, "loss": 0.5473, "step": 1640 }, { "epoch": 0.4550748752079867, "grad_norm": 0.1936340481042862, "learning_rate": 2.2217566914820322e-05, "loss": 0.5543, "step": 1641 }, { "epoch": 0.45535219079312256, "grad_norm": 0.18610352277755737, "learning_rate": 2.2214092855263813e-05, "loss": 0.5412, "step": 1642 }, { "epoch": 0.45562950637825844, "grad_norm": 0.18969598412513733, "learning_rate": 2.2210616900230412e-05, "loss": 0.5707, "step": 1643 }, { "epoch": 0.4559068219633943, "grad_norm": 0.18808507919311523, "learning_rate": 2.220713905039838e-05, "loss": 0.5288, "step": 1644 }, { "epoch": 0.4561841375485302, "grad_norm": 0.19103705883026123, "learning_rate": 2.220365930644633e-05, "loss": 0.5925, "step": 1645 }, { "epoch": 0.4564614531336661, "grad_norm": 0.1837342530488968, "learning_rate": 2.2200177669053258e-05, "loss": 0.5893, "step": 1646 }, { "epoch": 0.456738768718802, "grad_norm": 0.1928233504295349, "learning_rate": 2.2196694138898517e-05, "loss": 0.5445, "step": 1647 }, { "epoch": 0.45701608430393786, "grad_norm": 0.195438414812088, "learning_rate": 2.2193208716661846e-05, "loss": 0.5561, "step": 1648 }, { "epoch": 0.45729339988907375, "grad_norm": 0.18883578479290009, "learning_rate": 2.2189721403023334e-05, "loss": 0.5463, "step": 1649 }, { "epoch": 0.45757071547420963, "grad_norm": 0.19664356112480164, "learning_rate": 2.2186232198663455e-05, "loss": 0.576, "step": 1650 }, { "epoch": 0.4578480310593455, "grad_norm": 0.5387895703315735, "learning_rate": 2.218274110426304e-05, "loss": 0.562, "step": 1651 }, { "epoch": 0.4581253466444814, "grad_norm": 0.18329556286334991, "learning_rate": 2.21792481205033e-05, "loss": 0.5737, "step": 1652 }, { "epoch": 0.4584026622296173, "grad_norm": 0.20161500573158264, "learning_rate": 2.21757532480658e-05, "loss": 0.5711, "step": 1653 }, { "epoch": 0.45867997781475317, "grad_norm": 0.20053423941135406, "learning_rate": 2.2172256487632488e-05, "loss": 0.5842, "step": 1654 }, { "epoch": 0.45895729339988905, "grad_norm": 0.1900150030851364, "learning_rate": 2.2168757839885672e-05, "loss": 0.5702, "step": 1655 }, { "epoch": 0.45923460898502494, "grad_norm": 0.19815103709697723, "learning_rate": 2.2165257305508035e-05, "loss": 0.5661, "step": 1656 }, { "epoch": 0.4595119245701608, "grad_norm": 0.18877148628234863, "learning_rate": 2.2161754885182623e-05, "loss": 0.5278, "step": 1657 }, { "epoch": 0.4597892401552967, "grad_norm": 0.1913890838623047, "learning_rate": 2.215825057959285e-05, "loss": 0.5342, "step": 1658 }, { "epoch": 0.4600665557404326, "grad_norm": 0.18911883234977722, "learning_rate": 2.2154744389422493e-05, "loss": 0.5473, "step": 1659 }, { "epoch": 0.4603438713255685, "grad_norm": 0.2087319940328598, "learning_rate": 2.2151236315355714e-05, "loss": 0.5839, "step": 1660 }, { "epoch": 0.46062118691070436, "grad_norm": 0.19037386775016785, "learning_rate": 2.214772635807702e-05, "loss": 0.5518, "step": 1661 }, { "epoch": 0.46089850249584025, "grad_norm": 0.1889956146478653, "learning_rate": 2.2144214518271307e-05, "loss": 0.5527, "step": 1662 }, { "epoch": 0.46117581808097613, "grad_norm": 0.24889707565307617, "learning_rate": 2.214070079662382e-05, "loss": 0.5766, "step": 1663 }, { "epoch": 0.461453133666112, "grad_norm": 0.19265350699424744, "learning_rate": 2.213718519382018e-05, "loss": 0.5721, "step": 1664 }, { "epoch": 0.4617304492512479, "grad_norm": 0.19039899110794067, "learning_rate": 2.213366771054638e-05, "loss": 0.5678, "step": 1665 }, { "epoch": 0.4620077648363838, "grad_norm": 0.203168585896492, "learning_rate": 2.2130148347488773e-05, "loss": 0.5486, "step": 1666 }, { "epoch": 0.46228508042151967, "grad_norm": 0.19051045179367065, "learning_rate": 2.2126627105334073e-05, "loss": 0.544, "step": 1667 }, { "epoch": 0.46256239600665555, "grad_norm": 0.18888403475284576, "learning_rate": 2.212310398476937e-05, "loss": 0.5436, "step": 1668 }, { "epoch": 0.46283971159179144, "grad_norm": 0.1800607144832611, "learning_rate": 2.2119578986482127e-05, "loss": 0.5659, "step": 1669 }, { "epoch": 0.4631170271769273, "grad_norm": 0.19722715020179749, "learning_rate": 2.211605211116015e-05, "loss": 0.5582, "step": 1670 }, { "epoch": 0.4633943427620632, "grad_norm": 0.19088581204414368, "learning_rate": 2.2112523359491637e-05, "loss": 0.5471, "step": 1671 }, { "epoch": 0.4636716583471991, "grad_norm": 0.19284148514270782, "learning_rate": 2.210899273216514e-05, "loss": 0.5408, "step": 1672 }, { "epoch": 0.463948973932335, "grad_norm": 0.1880495250225067, "learning_rate": 2.2105460229869574e-05, "loss": 0.561, "step": 1673 }, { "epoch": 0.46422628951747086, "grad_norm": 0.186056986451149, "learning_rate": 2.2101925853294226e-05, "loss": 0.5811, "step": 1674 }, { "epoch": 0.46450360510260674, "grad_norm": 0.1980651617050171, "learning_rate": 2.2098389603128744e-05, "loss": 0.5456, "step": 1675 }, { "epoch": 0.4647809206877426, "grad_norm": 0.2039021998643875, "learning_rate": 2.2094851480063143e-05, "loss": 0.6018, "step": 1676 }, { "epoch": 0.4650582362728785, "grad_norm": 0.19567370414733887, "learning_rate": 2.2091311484787815e-05, "loss": 0.5499, "step": 1677 }, { "epoch": 0.4653355518580144, "grad_norm": 0.19234254956245422, "learning_rate": 2.208776961799349e-05, "loss": 0.5522, "step": 1678 }, { "epoch": 0.4656128674431503, "grad_norm": 0.1954323649406433, "learning_rate": 2.20842258803713e-05, "loss": 0.5702, "step": 1679 }, { "epoch": 0.46589018302828616, "grad_norm": 0.1974237710237503, "learning_rate": 2.20806802726127e-05, "loss": 0.5604, "step": 1680 }, { "epoch": 0.46616749861342205, "grad_norm": 0.19580432772636414, "learning_rate": 2.2077132795409552e-05, "loss": 0.5184, "step": 1681 }, { "epoch": 0.46644481419855793, "grad_norm": 0.19890138506889343, "learning_rate": 2.207358344945405e-05, "loss": 0.5767, "step": 1682 }, { "epoch": 0.4667221297836938, "grad_norm": 0.18458129465579987, "learning_rate": 2.2070032235438776e-05, "loss": 0.5316, "step": 1683 }, { "epoch": 0.4669994453688297, "grad_norm": 0.1938208043575287, "learning_rate": 2.206647915405665e-05, "loss": 0.5809, "step": 1684 }, { "epoch": 0.4672767609539656, "grad_norm": 0.20221275091171265, "learning_rate": 2.206292420600099e-05, "loss": 0.5731, "step": 1685 }, { "epoch": 0.46755407653910147, "grad_norm": 0.194106787443161, "learning_rate": 2.205936739196545e-05, "loss": 0.5624, "step": 1686 }, { "epoch": 0.46783139212423736, "grad_norm": 0.1918519288301468, "learning_rate": 2.205580871264406e-05, "loss": 0.535, "step": 1687 }, { "epoch": 0.46810870770937324, "grad_norm": 0.19345992803573608, "learning_rate": 2.2052248168731216e-05, "loss": 0.5686, "step": 1688 }, { "epoch": 0.4683860232945091, "grad_norm": 0.20070746541023254, "learning_rate": 2.2048685760921674e-05, "loss": 0.567, "step": 1689 }, { "epoch": 0.468663338879645, "grad_norm": 0.1972619593143463, "learning_rate": 2.204512148991055e-05, "loss": 0.555, "step": 1690 }, { "epoch": 0.46894065446478095, "grad_norm": 0.19085273146629333, "learning_rate": 2.2041555356393327e-05, "loss": 0.5985, "step": 1691 }, { "epoch": 0.46921797004991683, "grad_norm": 0.20294739305973053, "learning_rate": 2.2037987361065855e-05, "loss": 0.5762, "step": 1692 }, { "epoch": 0.4694952856350527, "grad_norm": 0.1896994560956955, "learning_rate": 2.203441750462435e-05, "loss": 0.5857, "step": 1693 }, { "epoch": 0.4697726012201886, "grad_norm": 0.19331398606300354, "learning_rate": 2.2030845787765377e-05, "loss": 0.5654, "step": 1694 }, { "epoch": 0.4700499168053245, "grad_norm": 0.20668423175811768, "learning_rate": 2.2027272211185875e-05, "loss": 0.5812, "step": 1695 }, { "epoch": 0.47032723239046037, "grad_norm": 0.1910814493894577, "learning_rate": 2.2023696775583146e-05, "loss": 0.5479, "step": 1696 }, { "epoch": 0.47060454797559625, "grad_norm": 0.1902536004781723, "learning_rate": 2.2020119481654848e-05, "loss": 0.5647, "step": 1697 }, { "epoch": 0.47088186356073214, "grad_norm": 0.1955711394548416, "learning_rate": 2.201654033009901e-05, "loss": 0.581, "step": 1698 }, { "epoch": 0.471159179145868, "grad_norm": 0.1895892173051834, "learning_rate": 2.2012959321614018e-05, "loss": 0.5658, "step": 1699 }, { "epoch": 0.4714364947310039, "grad_norm": 0.1803213357925415, "learning_rate": 2.2009376456898622e-05, "loss": 0.558, "step": 1700 }, { "epoch": 0.4717138103161398, "grad_norm": 0.19255508482456207, "learning_rate": 2.200579173665193e-05, "loss": 0.5649, "step": 1701 }, { "epoch": 0.4719911259012757, "grad_norm": 0.20155562460422516, "learning_rate": 2.2002205161573426e-05, "loss": 0.5592, "step": 1702 }, { "epoch": 0.47226844148641156, "grad_norm": 0.20530745387077332, "learning_rate": 2.1998616732362935e-05, "loss": 0.5677, "step": 1703 }, { "epoch": 0.47254575707154745, "grad_norm": 0.2158524990081787, "learning_rate": 2.1995026449720657e-05, "loss": 0.5476, "step": 1704 }, { "epoch": 0.47282307265668333, "grad_norm": 0.2383381724357605, "learning_rate": 2.1991434314347155e-05, "loss": 0.5413, "step": 1705 }, { "epoch": 0.4731003882418192, "grad_norm": 0.19787725806236267, "learning_rate": 2.1987840326943343e-05, "loss": 0.5637, "step": 1706 }, { "epoch": 0.4733777038269551, "grad_norm": 0.18500499427318573, "learning_rate": 2.1984244488210508e-05, "loss": 0.5333, "step": 1707 }, { "epoch": 0.473655019412091, "grad_norm": 0.19429421424865723, "learning_rate": 2.1980646798850295e-05, "loss": 0.5611, "step": 1708 }, { "epoch": 0.47393233499722687, "grad_norm": 0.18553559482097626, "learning_rate": 2.197704725956471e-05, "loss": 0.5516, "step": 1709 }, { "epoch": 0.47420965058236275, "grad_norm": 0.1934727132320404, "learning_rate": 2.197344587105611e-05, "loss": 0.5464, "step": 1710 }, { "epoch": 0.47448696616749864, "grad_norm": 0.20638912916183472, "learning_rate": 2.1969842634027233e-05, "loss": 0.5664, "step": 1711 }, { "epoch": 0.4747642817526345, "grad_norm": 0.19581542909145355, "learning_rate": 2.196623754918115e-05, "loss": 0.5597, "step": 1712 }, { "epoch": 0.4750415973377704, "grad_norm": 0.19786013662815094, "learning_rate": 2.1962630617221325e-05, "loss": 0.5729, "step": 1713 }, { "epoch": 0.4753189129229063, "grad_norm": 0.19448676705360413, "learning_rate": 2.1959021838851556e-05, "loss": 0.5573, "step": 1714 }, { "epoch": 0.4755962285080422, "grad_norm": 0.2131812423467636, "learning_rate": 2.1955411214776015e-05, "loss": 0.5862, "step": 1715 }, { "epoch": 0.47587354409317806, "grad_norm": 0.2251943200826645, "learning_rate": 2.195179874569923e-05, "loss": 0.5847, "step": 1716 }, { "epoch": 0.47615085967831394, "grad_norm": 0.20042872428894043, "learning_rate": 2.1948184432326084e-05, "loss": 0.5742, "step": 1717 }, { "epoch": 0.4764281752634498, "grad_norm": 0.193080872297287, "learning_rate": 2.1944568275361838e-05, "loss": 0.5441, "step": 1718 }, { "epoch": 0.4767054908485857, "grad_norm": 0.20040108263492584, "learning_rate": 2.194095027551209e-05, "loss": 0.5635, "step": 1719 }, { "epoch": 0.4769828064337216, "grad_norm": 0.19599542021751404, "learning_rate": 2.193733043348281e-05, "loss": 0.5852, "step": 1720 }, { "epoch": 0.4772601220188575, "grad_norm": 0.1938834935426712, "learning_rate": 2.1933708749980324e-05, "loss": 0.5644, "step": 1721 }, { "epoch": 0.47753743760399336, "grad_norm": 0.19517837464809418, "learning_rate": 2.1930085225711317e-05, "loss": 0.5568, "step": 1722 }, { "epoch": 0.47781475318912925, "grad_norm": 0.1954992413520813, "learning_rate": 2.1926459861382843e-05, "loss": 0.5459, "step": 1723 }, { "epoch": 0.47809206877426513, "grad_norm": 0.17570015788078308, "learning_rate": 2.1922832657702297e-05, "loss": 0.539, "step": 1724 }, { "epoch": 0.478369384359401, "grad_norm": 0.24834416806697845, "learning_rate": 2.1919203615377442e-05, "loss": 0.5401, "step": 1725 }, { "epoch": 0.4786466999445369, "grad_norm": 0.19633722305297852, "learning_rate": 2.1915572735116413e-05, "loss": 0.5469, "step": 1726 }, { "epoch": 0.4789240155296728, "grad_norm": 0.19104620814323425, "learning_rate": 2.1911940017627676e-05, "loss": 0.5472, "step": 1727 }, { "epoch": 0.47920133111480867, "grad_norm": 0.18845802545547485, "learning_rate": 2.1908305463620084e-05, "loss": 0.5742, "step": 1728 }, { "epoch": 0.47947864669994456, "grad_norm": 0.20180946588516235, "learning_rate": 2.190466907380282e-05, "loss": 0.5402, "step": 1729 }, { "epoch": 0.47975596228508044, "grad_norm": 0.19500130414962769, "learning_rate": 2.190103084888545e-05, "loss": 0.5476, "step": 1730 }, { "epoch": 0.4800332778702163, "grad_norm": 0.1933142989873886, "learning_rate": 2.1897390789577887e-05, "loss": 0.5426, "step": 1731 }, { "epoch": 0.4803105934553522, "grad_norm": 0.1977783739566803, "learning_rate": 2.1893748896590404e-05, "loss": 0.5614, "step": 1732 }, { "epoch": 0.4805879090404881, "grad_norm": 0.2301134616136551, "learning_rate": 2.1890105170633624e-05, "loss": 0.564, "step": 1733 }, { "epoch": 0.480865224625624, "grad_norm": 0.219647616147995, "learning_rate": 2.1886459612418542e-05, "loss": 0.5289, "step": 1734 }, { "epoch": 0.48114254021075986, "grad_norm": 0.19821031391620636, "learning_rate": 2.18828122226565e-05, "loss": 0.5903, "step": 1735 }, { "epoch": 0.48141985579589575, "grad_norm": 0.24226263165473938, "learning_rate": 2.18791630020592e-05, "loss": 0.5795, "step": 1736 }, { "epoch": 0.48169717138103163, "grad_norm": 0.200203076004982, "learning_rate": 2.18755119513387e-05, "loss": 0.568, "step": 1737 }, { "epoch": 0.4819744869661675, "grad_norm": 0.19729411602020264, "learning_rate": 2.1871859071207425e-05, "loss": 0.5633, "step": 1738 }, { "epoch": 0.4822518025513034, "grad_norm": 0.19362856447696686, "learning_rate": 2.1868204362378136e-05, "loss": 0.5709, "step": 1739 }, { "epoch": 0.4825291181364393, "grad_norm": 0.21311257779598236, "learning_rate": 2.1864547825563968e-05, "loss": 0.5481, "step": 1740 }, { "epoch": 0.48280643372157517, "grad_norm": 0.1957651972770691, "learning_rate": 2.1860889461478416e-05, "loss": 0.5481, "step": 1741 }, { "epoch": 0.48308374930671105, "grad_norm": 0.20088225603103638, "learning_rate": 2.1857229270835316e-05, "loss": 0.5774, "step": 1742 }, { "epoch": 0.48336106489184694, "grad_norm": 0.19715815782546997, "learning_rate": 2.1853567254348873e-05, "loss": 0.5737, "step": 1743 }, { "epoch": 0.4836383804769828, "grad_norm": 0.2071049064397812, "learning_rate": 2.184990341273364e-05, "loss": 0.5745, "step": 1744 }, { "epoch": 0.4839156960621187, "grad_norm": 0.19320468604564667, "learning_rate": 2.1846237746704526e-05, "loss": 0.5662, "step": 1745 }, { "epoch": 0.4841930116472546, "grad_norm": 0.18520797789096832, "learning_rate": 2.1842570256976807e-05, "loss": 0.5808, "step": 1746 }, { "epoch": 0.4844703272323905, "grad_norm": 0.20254836976528168, "learning_rate": 2.18389009442661e-05, "loss": 0.5558, "step": 1747 }, { "epoch": 0.48474764281752636, "grad_norm": 0.2153664231300354, "learning_rate": 2.1835229809288393e-05, "loss": 0.5661, "step": 1748 }, { "epoch": 0.48502495840266224, "grad_norm": 0.1884908676147461, "learning_rate": 2.183155685276002e-05, "loss": 0.5577, "step": 1749 }, { "epoch": 0.4853022739877981, "grad_norm": 0.19069699943065643, "learning_rate": 2.1827882075397664e-05, "loss": 0.5417, "step": 1750 }, { "epoch": 0.485579589572934, "grad_norm": 0.2055320143699646, "learning_rate": 2.182420547791838e-05, "loss": 0.5887, "step": 1751 }, { "epoch": 0.4858569051580699, "grad_norm": 0.19550320506095886, "learning_rate": 2.182052706103957e-05, "loss": 0.5348, "step": 1752 }, { "epoch": 0.4861342207432058, "grad_norm": 0.19508466124534607, "learning_rate": 2.1816846825478988e-05, "loss": 0.5506, "step": 1753 }, { "epoch": 0.48641153632834166, "grad_norm": 0.18409934639930725, "learning_rate": 2.181316477195474e-05, "loss": 0.5629, "step": 1754 }, { "epoch": 0.48668885191347755, "grad_norm": 0.18185651302337646, "learning_rate": 2.1809480901185302e-05, "loss": 0.5471, "step": 1755 }, { "epoch": 0.48696616749861343, "grad_norm": 0.18371707201004028, "learning_rate": 2.180579521388949e-05, "loss": 0.5747, "step": 1756 }, { "epoch": 0.4872434830837493, "grad_norm": 0.1873805671930313, "learning_rate": 2.1802107710786476e-05, "loss": 0.5606, "step": 1757 }, { "epoch": 0.4875207986688852, "grad_norm": 0.19243435561656952, "learning_rate": 2.1798418392595794e-05, "loss": 0.5638, "step": 1758 }, { "epoch": 0.4877981142540211, "grad_norm": 0.184648796916008, "learning_rate": 2.179472726003733e-05, "loss": 0.5584, "step": 1759 }, { "epoch": 0.48807542983915697, "grad_norm": 0.17338646948337555, "learning_rate": 2.1791034313831316e-05, "loss": 0.556, "step": 1760 }, { "epoch": 0.48835274542429286, "grad_norm": 0.18127377331256866, "learning_rate": 2.1787339554698344e-05, "loss": 0.5631, "step": 1761 }, { "epoch": 0.48863006100942874, "grad_norm": 0.1888059824705124, "learning_rate": 2.1783642983359364e-05, "loss": 0.5611, "step": 1762 }, { "epoch": 0.4889073765945646, "grad_norm": 0.17712418735027313, "learning_rate": 2.1779944600535672e-05, "loss": 0.5462, "step": 1763 }, { "epoch": 0.4891846921797005, "grad_norm": 0.19322241842746735, "learning_rate": 2.177624440694892e-05, "loss": 0.5511, "step": 1764 }, { "epoch": 0.4894620077648364, "grad_norm": 0.18574179708957672, "learning_rate": 2.1772542403321118e-05, "loss": 0.5531, "step": 1765 }, { "epoch": 0.4897393233499723, "grad_norm": 0.18718282878398895, "learning_rate": 2.1768838590374617e-05, "loss": 0.5683, "step": 1766 }, { "epoch": 0.49001663893510816, "grad_norm": 0.18598803877830505, "learning_rate": 2.1765132968832135e-05, "loss": 0.5488, "step": 1767 }, { "epoch": 0.49029395452024405, "grad_norm": 0.18899311125278473, "learning_rate": 2.1761425539416737e-05, "loss": 0.5449, "step": 1768 }, { "epoch": 0.49057127010537993, "grad_norm": 0.1895790696144104, "learning_rate": 2.175771630285184e-05, "loss": 0.58, "step": 1769 }, { "epoch": 0.4908485856905158, "grad_norm": 0.17997822165489197, "learning_rate": 2.1754005259861217e-05, "loss": 0.5734, "step": 1770 }, { "epoch": 0.4911259012756517, "grad_norm": 0.19107869267463684, "learning_rate": 2.175029241116898e-05, "loss": 0.5707, "step": 1771 }, { "epoch": 0.4914032168607876, "grad_norm": 0.22478626668453217, "learning_rate": 2.1746577757499613e-05, "loss": 0.5667, "step": 1772 }, { "epoch": 0.49168053244592347, "grad_norm": 0.2036799043416977, "learning_rate": 2.1742861299577947e-05, "loss": 0.5505, "step": 1773 }, { "epoch": 0.49195784803105935, "grad_norm": 0.1856662929058075, "learning_rate": 2.1739143038129152e-05, "loss": 0.538, "step": 1774 }, { "epoch": 0.49223516361619524, "grad_norm": 0.18573297560214996, "learning_rate": 2.1735422973878766e-05, "loss": 0.5507, "step": 1775 }, { "epoch": 0.4925124792013311, "grad_norm": 0.19560420513153076, "learning_rate": 2.1731701107552673e-05, "loss": 0.5395, "step": 1776 }, { "epoch": 0.492789794786467, "grad_norm": 0.18675924837589264, "learning_rate": 2.1727977439877094e-05, "loss": 0.5523, "step": 1777 }, { "epoch": 0.4930671103716029, "grad_norm": 0.19126398861408234, "learning_rate": 2.1724251971578636e-05, "loss": 0.5736, "step": 1778 }, { "epoch": 0.4933444259567388, "grad_norm": 0.17955103516578674, "learning_rate": 2.1720524703384222e-05, "loss": 0.5398, "step": 1779 }, { "epoch": 0.49362174154187466, "grad_norm": 0.1961311250925064, "learning_rate": 2.1716795636021148e-05, "loss": 0.5565, "step": 1780 }, { "epoch": 0.49389905712701054, "grad_norm": 0.1914500594139099, "learning_rate": 2.171306477021705e-05, "loss": 0.5296, "step": 1781 }, { "epoch": 0.49417637271214643, "grad_norm": 0.2172551453113556, "learning_rate": 2.170933210669992e-05, "loss": 0.5711, "step": 1782 }, { "epoch": 0.4944536882972823, "grad_norm": 0.18967878818511963, "learning_rate": 2.1705597646198098e-05, "loss": 0.5719, "step": 1783 }, { "epoch": 0.4947310038824182, "grad_norm": 0.19041703641414642, "learning_rate": 2.1701861389440277e-05, "loss": 0.5431, "step": 1784 }, { "epoch": 0.4950083194675541, "grad_norm": 0.19202065467834473, "learning_rate": 2.1698123337155503e-05, "loss": 0.5392, "step": 1785 }, { "epoch": 0.49528563505268997, "grad_norm": 0.1918521374464035, "learning_rate": 2.1694383490073162e-05, "loss": 0.5268, "step": 1786 }, { "epoch": 0.49556295063782585, "grad_norm": 0.19112561643123627, "learning_rate": 2.1690641848923004e-05, "loss": 0.5741, "step": 1787 }, { "epoch": 0.49584026622296173, "grad_norm": 0.28441137075424194, "learning_rate": 2.168689841443512e-05, "loss": 0.5628, "step": 1788 }, { "epoch": 0.4961175818080976, "grad_norm": 0.1971031278371811, "learning_rate": 2.1683153187339955e-05, "loss": 0.5336, "step": 1789 }, { "epoch": 0.4963948973932335, "grad_norm": 0.18874448537826538, "learning_rate": 2.16794061683683e-05, "loss": 0.5717, "step": 1790 }, { "epoch": 0.4966722129783694, "grad_norm": 0.19406016170978546, "learning_rate": 2.1675657358251293e-05, "loss": 0.5641, "step": 1791 }, { "epoch": 0.49694952856350527, "grad_norm": 0.19491691887378693, "learning_rate": 2.1671906757720433e-05, "loss": 0.5598, "step": 1792 }, { "epoch": 0.49722684414864116, "grad_norm": 0.1898011416196823, "learning_rate": 2.166815436750756e-05, "loss": 0.5748, "step": 1793 }, { "epoch": 0.49750415973377704, "grad_norm": 0.18792784214019775, "learning_rate": 2.1664400188344863e-05, "loss": 0.5383, "step": 1794 }, { "epoch": 0.4977814753189129, "grad_norm": 0.1921299546957016, "learning_rate": 2.1660644220964886e-05, "loss": 0.5649, "step": 1795 }, { "epoch": 0.4980587909040488, "grad_norm": 0.1881396770477295, "learning_rate": 2.1656886466100514e-05, "loss": 0.5525, "step": 1796 }, { "epoch": 0.4983361064891847, "grad_norm": 0.19252420961856842, "learning_rate": 2.1653126924484985e-05, "loss": 0.5308, "step": 1797 }, { "epoch": 0.4986134220743206, "grad_norm": 0.2611597180366516, "learning_rate": 2.1649365596851884e-05, "loss": 0.5664, "step": 1798 }, { "epoch": 0.49889073765945646, "grad_norm": 0.18851755559444427, "learning_rate": 2.164560248393515e-05, "loss": 0.5314, "step": 1799 }, { "epoch": 0.49916805324459235, "grad_norm": 0.19385330379009247, "learning_rate": 2.164183758646906e-05, "loss": 0.5615, "step": 1800 }, { "epoch": 0.49944536882972823, "grad_norm": 0.20486874878406525, "learning_rate": 2.163807090518825e-05, "loss": 0.5833, "step": 1801 }, { "epoch": 0.4997226844148641, "grad_norm": 0.21984978020191193, "learning_rate": 2.16343024408277e-05, "loss": 0.5494, "step": 1802 }, { "epoch": 0.5, "grad_norm": 0.1872473657131195, "learning_rate": 2.1630532194122733e-05, "loss": 0.5388, "step": 1803 }, { "epoch": 0.5002773155851359, "grad_norm": 0.19348090887069702, "learning_rate": 2.1626760165809022e-05, "loss": 0.5615, "step": 1804 }, { "epoch": 0.5005546311702718, "grad_norm": 0.18968060612678528, "learning_rate": 2.16229863566226e-05, "loss": 0.5401, "step": 1805 }, { "epoch": 0.5008319467554077, "grad_norm": 0.1913541853427887, "learning_rate": 2.161921076729983e-05, "loss": 0.5797, "step": 1806 }, { "epoch": 0.5011092623405435, "grad_norm": 0.18872712552547455, "learning_rate": 2.1615433398577428e-05, "loss": 0.5385, "step": 1807 }, { "epoch": 0.5013865779256794, "grad_norm": 0.19591103494167328, "learning_rate": 2.1611654251192465e-05, "loss": 0.5568, "step": 1808 }, { "epoch": 0.5016638935108153, "grad_norm": 0.19541212916374207, "learning_rate": 2.1607873325882343e-05, "loss": 0.5679, "step": 1809 }, { "epoch": 0.5019412090959512, "grad_norm": 0.1972798854112625, "learning_rate": 2.160409062338483e-05, "loss": 0.5513, "step": 1810 }, { "epoch": 0.5022185246810871, "grad_norm": 0.19507858157157898, "learning_rate": 2.1600306144438027e-05, "loss": 0.5509, "step": 1811 }, { "epoch": 0.502495840266223, "grad_norm": 0.18637265264987946, "learning_rate": 2.1596519889780387e-05, "loss": 0.5632, "step": 1812 }, { "epoch": 0.5027731558513588, "grad_norm": 0.17938151955604553, "learning_rate": 2.159273186015071e-05, "loss": 0.5583, "step": 1813 }, { "epoch": 0.5030504714364947, "grad_norm": 0.19754791259765625, "learning_rate": 2.158894205628814e-05, "loss": 0.5705, "step": 1814 }, { "epoch": 0.5033277870216306, "grad_norm": 0.19835114479064941, "learning_rate": 2.1585150478932165e-05, "loss": 0.6031, "step": 1815 }, { "epoch": 0.5036051026067665, "grad_norm": 0.1887637823820114, "learning_rate": 2.1581357128822627e-05, "loss": 0.5551, "step": 1816 }, { "epoch": 0.5038824181919024, "grad_norm": 0.18568859994411469, "learning_rate": 2.157756200669971e-05, "loss": 0.5384, "step": 1817 }, { "epoch": 0.5041597337770383, "grad_norm": 0.1866898089647293, "learning_rate": 2.1573765113303936e-05, "loss": 0.5474, "step": 1818 }, { "epoch": 0.5044370493621742, "grad_norm": 0.18115058541297913, "learning_rate": 2.156996644937618e-05, "loss": 0.5459, "step": 1819 }, { "epoch": 0.50471436494731, "grad_norm": 0.188395157456398, "learning_rate": 2.1566166015657672e-05, "loss": 0.5379, "step": 1820 }, { "epoch": 0.5049916805324459, "grad_norm": 0.2012917846441269, "learning_rate": 2.156236381288997e-05, "loss": 0.5771, "step": 1821 }, { "epoch": 0.5052689961175818, "grad_norm": 0.18014481663703918, "learning_rate": 2.1558559841814986e-05, "loss": 0.5508, "step": 1822 }, { "epoch": 0.5055463117027177, "grad_norm": 0.1956920027732849, "learning_rate": 2.1554754103174972e-05, "loss": 0.5625, "step": 1823 }, { "epoch": 0.5058236272878536, "grad_norm": 0.1985912024974823, "learning_rate": 2.1550946597712536e-05, "loss": 0.532, "step": 1824 }, { "epoch": 0.5061009428729895, "grad_norm": 0.1955011934041977, "learning_rate": 2.1547137326170613e-05, "loss": 0.5641, "step": 1825 }, { "epoch": 0.5063782584581253, "grad_norm": 0.1937127411365509, "learning_rate": 2.1543326289292497e-05, "loss": 0.5369, "step": 1826 }, { "epoch": 0.5066555740432612, "grad_norm": 0.22294695675373077, "learning_rate": 2.153951348782183e-05, "loss": 0.5754, "step": 1827 }, { "epoch": 0.5069328896283971, "grad_norm": 0.1839090883731842, "learning_rate": 2.1535698922502582e-05, "loss": 0.5344, "step": 1828 }, { "epoch": 0.507210205213533, "grad_norm": 0.18940389156341553, "learning_rate": 2.1531882594079074e-05, "loss": 0.5399, "step": 1829 }, { "epoch": 0.5074875207986689, "grad_norm": 0.19242316484451294, "learning_rate": 2.152806450329598e-05, "loss": 0.5473, "step": 1830 }, { "epoch": 0.5077648363838048, "grad_norm": 0.19500425457954407, "learning_rate": 2.1524244650898308e-05, "loss": 0.5812, "step": 1831 }, { "epoch": 0.5080421519689406, "grad_norm": 0.19228143990039825, "learning_rate": 2.1520423037631408e-05, "loss": 0.5518, "step": 1832 }, { "epoch": 0.5083194675540765, "grad_norm": 0.1868668794631958, "learning_rate": 2.1516599664240985e-05, "loss": 0.5534, "step": 1833 }, { "epoch": 0.5085967831392124, "grad_norm": 0.1996205449104309, "learning_rate": 2.151277453147308e-05, "loss": 0.5283, "step": 1834 }, { "epoch": 0.5088740987243483, "grad_norm": 0.18492724001407623, "learning_rate": 2.150894764007407e-05, "loss": 0.563, "step": 1835 }, { "epoch": 0.5091514143094842, "grad_norm": 0.1847442388534546, "learning_rate": 2.150511899079069e-05, "loss": 0.5478, "step": 1836 }, { "epoch": 0.5094287298946201, "grad_norm": 0.19469550251960754, "learning_rate": 2.1501288584370006e-05, "loss": 0.5388, "step": 1837 }, { "epoch": 0.509706045479756, "grad_norm": 0.18692530691623688, "learning_rate": 2.1497456421559436e-05, "loss": 0.523, "step": 1838 }, { "epoch": 0.5099833610648918, "grad_norm": 0.18751968443393707, "learning_rate": 2.1493622503106736e-05, "loss": 0.561, "step": 1839 }, { "epoch": 0.5102606766500277, "grad_norm": 0.18232478201389313, "learning_rate": 2.1489786829760005e-05, "loss": 0.5579, "step": 1840 }, { "epoch": 0.5105379922351636, "grad_norm": 0.20106928050518036, "learning_rate": 2.1485949402267684e-05, "loss": 0.5445, "step": 1841 }, { "epoch": 0.5108153078202995, "grad_norm": 0.19342289865016937, "learning_rate": 2.1482110221378555e-05, "loss": 0.5627, "step": 1842 }, { "epoch": 0.5110926234054354, "grad_norm": 0.1977401226758957, "learning_rate": 2.1478269287841747e-05, "loss": 0.5949, "step": 1843 }, { "epoch": 0.5113699389905713, "grad_norm": 0.18564966320991516, "learning_rate": 2.1474426602406722e-05, "loss": 0.5598, "step": 1844 }, { "epoch": 0.5116472545757071, "grad_norm": 0.19453705847263336, "learning_rate": 2.1470582165823296e-05, "loss": 0.5876, "step": 1845 }, { "epoch": 0.511924570160843, "grad_norm": 0.1874392032623291, "learning_rate": 2.146673597884162e-05, "loss": 0.5465, "step": 1846 }, { "epoch": 0.5122018857459789, "grad_norm": 0.19269202649593353, "learning_rate": 2.1462888042212183e-05, "loss": 0.5333, "step": 1847 }, { "epoch": 0.5124792013311148, "grad_norm": 0.21227677166461945, "learning_rate": 2.1459038356685824e-05, "loss": 0.5772, "step": 1848 }, { "epoch": 0.5127565169162507, "grad_norm": 0.18281084299087524, "learning_rate": 2.1455186923013716e-05, "loss": 0.5732, "step": 1849 }, { "epoch": 0.5130338325013866, "grad_norm": 0.18322256207466125, "learning_rate": 2.1451333741947373e-05, "loss": 0.5367, "step": 1850 }, { "epoch": 0.5133111480865225, "grad_norm": 0.19677676260471344, "learning_rate": 2.1447478814238658e-05, "loss": 0.5797, "step": 1851 }, { "epoch": 0.5135884636716583, "grad_norm": 0.2173527330160141, "learning_rate": 2.1443622140639768e-05, "loss": 0.5688, "step": 1852 }, { "epoch": 0.5138657792567942, "grad_norm": 0.240644633769989, "learning_rate": 2.143976372190324e-05, "loss": 0.5754, "step": 1853 }, { "epoch": 0.5141430948419301, "grad_norm": 0.19872795045375824, "learning_rate": 2.1435903558781954e-05, "loss": 0.5752, "step": 1854 }, { "epoch": 0.514420410427066, "grad_norm": 0.18520157039165497, "learning_rate": 2.143204165202914e-05, "loss": 0.5564, "step": 1855 }, { "epoch": 0.5146977260122019, "grad_norm": 0.1843482255935669, "learning_rate": 2.1428178002398342e-05, "loss": 0.536, "step": 1856 }, { "epoch": 0.5149750415973378, "grad_norm": 0.18434906005859375, "learning_rate": 2.1424312610643467e-05, "loss": 0.5722, "step": 1857 }, { "epoch": 0.5152523571824736, "grad_norm": 0.18243710696697235, "learning_rate": 2.1420445477518756e-05, "loss": 0.5134, "step": 1858 }, { "epoch": 0.5155296727676095, "grad_norm": 0.1954345405101776, "learning_rate": 2.14165766037788e-05, "loss": 0.5465, "step": 1859 }, { "epoch": 0.5158069883527454, "grad_norm": 0.1804470270872116, "learning_rate": 2.1412705990178496e-05, "loss": 0.5529, "step": 1860 }, { "epoch": 0.5160843039378813, "grad_norm": 0.19080010056495667, "learning_rate": 2.140883363747312e-05, "loss": 0.54, "step": 1861 }, { "epoch": 0.5163616195230172, "grad_norm": 0.18515829741954803, "learning_rate": 2.1404959546418268e-05, "loss": 0.5409, "step": 1862 }, { "epoch": 0.5166389351081531, "grad_norm": 0.19277918338775635, "learning_rate": 2.1401083717769876e-05, "loss": 0.5703, "step": 1863 }, { "epoch": 0.516916250693289, "grad_norm": 0.2017827183008194, "learning_rate": 2.139720615228422e-05, "loss": 0.5545, "step": 1864 }, { "epoch": 0.5171935662784248, "grad_norm": 0.19409088790416718, "learning_rate": 2.1393326850717915e-05, "loss": 0.5613, "step": 1865 }, { "epoch": 0.5174708818635607, "grad_norm": 0.18852104246616364, "learning_rate": 2.138944581382792e-05, "loss": 0.5784, "step": 1866 }, { "epoch": 0.5177481974486966, "grad_norm": 0.18418540060520172, "learning_rate": 2.1385563042371525e-05, "loss": 0.5291, "step": 1867 }, { "epoch": 0.5180255130338325, "grad_norm": 0.20222961902618408, "learning_rate": 2.138167853710636e-05, "loss": 0.5432, "step": 1868 }, { "epoch": 0.5183028286189684, "grad_norm": 0.1890316754579544, "learning_rate": 2.1377792298790396e-05, "loss": 0.5859, "step": 1869 }, { "epoch": 0.5185801442041043, "grad_norm": 0.19611713290214539, "learning_rate": 2.1373904328181946e-05, "loss": 0.5468, "step": 1870 }, { "epoch": 0.5188574597892401, "grad_norm": 0.18812295794487, "learning_rate": 2.1370014626039648e-05, "loss": 0.5356, "step": 1871 }, { "epoch": 0.519134775374376, "grad_norm": 0.18963585793972015, "learning_rate": 2.136612319312249e-05, "loss": 0.5646, "step": 1872 }, { "epoch": 0.5194120909595119, "grad_norm": 0.1885354369878769, "learning_rate": 2.1362230030189795e-05, "loss": 0.5578, "step": 1873 }, { "epoch": 0.5196894065446478, "grad_norm": 0.1933208405971527, "learning_rate": 2.1358335138001224e-05, "loss": 0.5379, "step": 1874 }, { "epoch": 0.5199667221297837, "grad_norm": 0.1998060792684555, "learning_rate": 2.1354438517316767e-05, "loss": 0.5741, "step": 1875 }, { "epoch": 0.5202440377149196, "grad_norm": 0.18762660026550293, "learning_rate": 2.135054016889676e-05, "loss": 0.5618, "step": 1876 }, { "epoch": 0.5205213533000554, "grad_norm": 0.2009068727493286, "learning_rate": 2.1346640093501872e-05, "loss": 0.5499, "step": 1877 }, { "epoch": 0.5207986688851913, "grad_norm": 0.1818576157093048, "learning_rate": 2.1342738291893122e-05, "loss": 0.5276, "step": 1878 }, { "epoch": 0.5210759844703272, "grad_norm": 0.2910788357257843, "learning_rate": 2.1338834764831845e-05, "loss": 0.5333, "step": 1879 }, { "epoch": 0.5213533000554631, "grad_norm": 0.20181319117546082, "learning_rate": 2.1334929513079722e-05, "loss": 0.567, "step": 1880 }, { "epoch": 0.521630615640599, "grad_norm": 0.19496895372867584, "learning_rate": 2.133102253739878e-05, "loss": 0.5369, "step": 1881 }, { "epoch": 0.5219079312257349, "grad_norm": 0.18957465887069702, "learning_rate": 2.1327113838551362e-05, "loss": 0.5359, "step": 1882 }, { "epoch": 0.5221852468108708, "grad_norm": 0.19057804346084595, "learning_rate": 2.132320341730017e-05, "loss": 0.5595, "step": 1883 }, { "epoch": 0.5224625623960066, "grad_norm": 0.18731848895549774, "learning_rate": 2.131929127440822e-05, "loss": 0.5258, "step": 1884 }, { "epoch": 0.5227398779811425, "grad_norm": 0.19227994978427887, "learning_rate": 2.131537741063888e-05, "loss": 0.5927, "step": 1885 }, { "epoch": 0.5230171935662784, "grad_norm": 0.18374542891979218, "learning_rate": 2.1311461826755847e-05, "loss": 0.543, "step": 1886 }, { "epoch": 0.5232945091514143, "grad_norm": 0.1936596930027008, "learning_rate": 2.1307544523523156e-05, "loss": 0.5677, "step": 1887 }, { "epoch": 0.5235718247365502, "grad_norm": 0.18285861611366272, "learning_rate": 2.1303625501705183e-05, "loss": 0.5738, "step": 1888 }, { "epoch": 0.5238491403216861, "grad_norm": 0.23504310846328735, "learning_rate": 2.1299704762066618e-05, "loss": 0.5785, "step": 1889 }, { "epoch": 0.5241264559068219, "grad_norm": 0.1995551437139511, "learning_rate": 2.129578230537252e-05, "loss": 0.5637, "step": 1890 }, { "epoch": 0.5244037714919578, "grad_norm": 0.18941858410835266, "learning_rate": 2.1291858132388248e-05, "loss": 0.5523, "step": 1891 }, { "epoch": 0.5246810870770937, "grad_norm": 0.18262585997581482, "learning_rate": 2.1287932243879523e-05, "loss": 0.5587, "step": 1892 }, { "epoch": 0.5249584026622296, "grad_norm": 0.1891373097896576, "learning_rate": 2.1284004640612376e-05, "loss": 0.5494, "step": 1893 }, { "epoch": 0.5252357182473655, "grad_norm": 0.1890500783920288, "learning_rate": 2.1280075323353206e-05, "loss": 0.5411, "step": 1894 }, { "epoch": 0.5255130338325014, "grad_norm": 0.182157963514328, "learning_rate": 2.127614429286871e-05, "loss": 0.5265, "step": 1895 }, { "epoch": 0.5257903494176372, "grad_norm": 0.18624311685562134, "learning_rate": 2.1272211549925946e-05, "loss": 0.5309, "step": 1896 }, { "epoch": 0.5260676650027731, "grad_norm": 0.19566656649112701, "learning_rate": 2.1268277095292292e-05, "loss": 0.5694, "step": 1897 }, { "epoch": 0.526344980587909, "grad_norm": 0.19160917401313782, "learning_rate": 2.1264340929735467e-05, "loss": 0.5324, "step": 1898 }, { "epoch": 0.5266222961730449, "grad_norm": 0.18548499047756195, "learning_rate": 2.126040305402352e-05, "loss": 0.5326, "step": 1899 }, { "epoch": 0.5268996117581808, "grad_norm": 0.18145646154880524, "learning_rate": 2.1256463468924837e-05, "loss": 0.5491, "step": 1900 }, { "epoch": 0.5271769273433167, "grad_norm": 0.1829940527677536, "learning_rate": 2.125252217520813e-05, "loss": 0.5235, "step": 1901 }, { "epoch": 0.5274542429284526, "grad_norm": 0.19107188284397125, "learning_rate": 2.1248579173642453e-05, "loss": 0.539, "step": 1902 }, { "epoch": 0.5277315585135884, "grad_norm": 0.18394434452056885, "learning_rate": 2.1244634464997188e-05, "loss": 0.5672, "step": 1903 }, { "epoch": 0.5280088740987243, "grad_norm": 0.19489826261997223, "learning_rate": 2.1240688050042058e-05, "loss": 0.5514, "step": 1904 }, { "epoch": 0.5282861896838602, "grad_norm": 0.18582908809185028, "learning_rate": 2.1236739929547105e-05, "loss": 0.5484, "step": 1905 }, { "epoch": 0.5285635052689961, "grad_norm": 0.19354532659053802, "learning_rate": 2.123279010428272e-05, "loss": 0.5542, "step": 1906 }, { "epoch": 0.528840820854132, "grad_norm": 0.19419358670711517, "learning_rate": 2.1228838575019612e-05, "loss": 0.5349, "step": 1907 }, { "epoch": 0.5291181364392679, "grad_norm": 0.18452230095863342, "learning_rate": 2.1224885342528834e-05, "loss": 0.5676, "step": 1908 }, { "epoch": 0.5293954520244037, "grad_norm": 0.19760510325431824, "learning_rate": 2.1220930407581762e-05, "loss": 0.5439, "step": 1909 }, { "epoch": 0.5296727676095396, "grad_norm": 0.18620994687080383, "learning_rate": 2.121697377095011e-05, "loss": 0.5612, "step": 1910 }, { "epoch": 0.5299500831946755, "grad_norm": 0.1856573075056076, "learning_rate": 2.121301543340593e-05, "loss": 0.5267, "step": 1911 }, { "epoch": 0.5302273987798114, "grad_norm": 0.19919690489768982, "learning_rate": 2.1209055395721586e-05, "loss": 0.5499, "step": 1912 }, { "epoch": 0.5305047143649473, "grad_norm": 0.18541640043258667, "learning_rate": 2.1205093658669793e-05, "loss": 0.5693, "step": 1913 }, { "epoch": 0.5307820299500832, "grad_norm": 0.18908998370170593, "learning_rate": 2.120113022302359e-05, "loss": 0.5421, "step": 1914 }, { "epoch": 0.531059345535219, "grad_norm": 0.19005346298217773, "learning_rate": 2.119716508955635e-05, "loss": 0.5475, "step": 1915 }, { "epoch": 0.5313366611203549, "grad_norm": 0.19358742237091064, "learning_rate": 2.1193198259041774e-05, "loss": 0.5671, "step": 1916 }, { "epoch": 0.5316139767054908, "grad_norm": 0.19891729950904846, "learning_rate": 2.1189229732253894e-05, "loss": 0.5623, "step": 1917 }, { "epoch": 0.5318912922906267, "grad_norm": 0.1928594410419464, "learning_rate": 2.1185259509967082e-05, "loss": 0.5467, "step": 1918 }, { "epoch": 0.5321686078757626, "grad_norm": 0.21051675081253052, "learning_rate": 2.118128759295602e-05, "loss": 0.5504, "step": 1919 }, { "epoch": 0.5324459234608985, "grad_norm": 0.1916693150997162, "learning_rate": 2.1177313981995745e-05, "loss": 0.5376, "step": 1920 }, { "epoch": 0.5327232390460344, "grad_norm": 0.18833374977111816, "learning_rate": 2.1173338677861616e-05, "loss": 0.526, "step": 1921 }, { "epoch": 0.5330005546311702, "grad_norm": 0.1901186853647232, "learning_rate": 2.116936168132931e-05, "loss": 0.5658, "step": 1922 }, { "epoch": 0.5332778702163061, "grad_norm": 0.19015184044837952, "learning_rate": 2.1165382993174848e-05, "loss": 0.5703, "step": 1923 }, { "epoch": 0.533555185801442, "grad_norm": 0.17683614790439606, "learning_rate": 2.116140261417458e-05, "loss": 0.5384, "step": 1924 }, { "epoch": 0.5338325013865779, "grad_norm": 0.18865369260311127, "learning_rate": 2.1157420545105187e-05, "loss": 0.5473, "step": 1925 }, { "epoch": 0.5341098169717138, "grad_norm": 0.1918121576309204, "learning_rate": 2.1153436786743668e-05, "loss": 0.5587, "step": 1926 }, { "epoch": 0.5343871325568497, "grad_norm": 0.20620866119861603, "learning_rate": 2.1149451339867363e-05, "loss": 0.5358, "step": 1927 }, { "epoch": 0.5346644481419855, "grad_norm": 0.19819065928459167, "learning_rate": 2.114546420525394e-05, "loss": 0.5681, "step": 1928 }, { "epoch": 0.5349417637271214, "grad_norm": 0.19372405111789703, "learning_rate": 2.114147538368139e-05, "loss": 0.5615, "step": 1929 }, { "epoch": 0.5352190793122573, "grad_norm": 0.19177958369255066, "learning_rate": 2.1137484875928048e-05, "loss": 0.5336, "step": 1930 }, { "epoch": 0.5354963948973932, "grad_norm": 0.1978660523891449, "learning_rate": 2.1133492682772556e-05, "loss": 0.5465, "step": 1931 }, { "epoch": 0.5357737104825291, "grad_norm": 0.19225792586803436, "learning_rate": 2.1129498804993902e-05, "loss": 0.5548, "step": 1932 }, { "epoch": 0.536051026067665, "grad_norm": 0.19737133383750916, "learning_rate": 2.1125503243371398e-05, "loss": 0.5636, "step": 1933 }, { "epoch": 0.5363283416528009, "grad_norm": 0.19044062495231628, "learning_rate": 2.112150599868468e-05, "loss": 0.5595, "step": 1934 }, { "epoch": 0.5366056572379367, "grad_norm": 0.18862438201904297, "learning_rate": 2.1117507071713724e-05, "loss": 0.5594, "step": 1935 }, { "epoch": 0.5368829728230726, "grad_norm": 0.20726455748081207, "learning_rate": 2.111350646323882e-05, "loss": 0.5678, "step": 1936 }, { "epoch": 0.5371602884082085, "grad_norm": 0.1967361867427826, "learning_rate": 2.1109504174040594e-05, "loss": 0.5518, "step": 1937 }, { "epoch": 0.5374376039933444, "grad_norm": 0.19529032707214355, "learning_rate": 2.1105500204899997e-05, "loss": 0.5775, "step": 1938 }, { "epoch": 0.5377149195784803, "grad_norm": 0.19610485434532166, "learning_rate": 2.110149455659831e-05, "loss": 0.563, "step": 1939 }, { "epoch": 0.5379922351636162, "grad_norm": 0.18098145723342896, "learning_rate": 2.109748722991715e-05, "loss": 0.5401, "step": 1940 }, { "epoch": 0.538269550748752, "grad_norm": 0.21725578606128693, "learning_rate": 2.109347822563844e-05, "loss": 0.5671, "step": 1941 }, { "epoch": 0.5385468663338879, "grad_norm": 0.20272882282733917, "learning_rate": 2.108946754454445e-05, "loss": 0.5699, "step": 1942 }, { "epoch": 0.5388241819190238, "grad_norm": 0.18334084749221802, "learning_rate": 2.108545518741777e-05, "loss": 0.5508, "step": 1943 }, { "epoch": 0.5391014975041597, "grad_norm": 0.20137275755405426, "learning_rate": 2.1081441155041314e-05, "loss": 0.5315, "step": 1944 }, { "epoch": 0.5393788130892956, "grad_norm": 0.21601121127605438, "learning_rate": 2.1077425448198327e-05, "loss": 0.5415, "step": 1945 }, { "epoch": 0.5396561286744315, "grad_norm": 0.19099989533424377, "learning_rate": 2.107340806767238e-05, "loss": 0.5492, "step": 1946 }, { "epoch": 0.5399334442595674, "grad_norm": 0.18626771867275238, "learning_rate": 2.106938901424737e-05, "loss": 0.5622, "step": 1947 }, { "epoch": 0.5402107598447032, "grad_norm": 0.1979578286409378, "learning_rate": 2.1065368288707523e-05, "loss": 0.5659, "step": 1948 }, { "epoch": 0.5404880754298391, "grad_norm": 0.18457596004009247, "learning_rate": 2.1061345891837393e-05, "loss": 0.554, "step": 1949 }, { "epoch": 0.540765391014975, "grad_norm": 0.1944621354341507, "learning_rate": 2.1057321824421843e-05, "loss": 0.5574, "step": 1950 }, { "epoch": 0.5410427066001109, "grad_norm": 0.20393145084381104, "learning_rate": 2.1053296087246087e-05, "loss": 0.5557, "step": 1951 }, { "epoch": 0.5413200221852468, "grad_norm": 0.19211652874946594, "learning_rate": 2.1049268681095647e-05, "loss": 0.5626, "step": 1952 }, { "epoch": 0.5415973377703827, "grad_norm": 0.18954886496067047, "learning_rate": 2.1045239606756378e-05, "loss": 0.5481, "step": 1953 }, { "epoch": 0.5418746533555185, "grad_norm": 0.19707705080509186, "learning_rate": 2.1041208865014464e-05, "loss": 0.5435, "step": 1954 }, { "epoch": 0.5421519689406544, "grad_norm": 0.20506185293197632, "learning_rate": 2.10371764566564e-05, "loss": 0.5322, "step": 1955 }, { "epoch": 0.5424292845257903, "grad_norm": 0.19055700302124023, "learning_rate": 2.103314238246903e-05, "loss": 0.5513, "step": 1956 }, { "epoch": 0.5427066001109262, "grad_norm": 0.2124052196741104, "learning_rate": 2.102910664323949e-05, "loss": 0.574, "step": 1957 }, { "epoch": 0.5429839156960621, "grad_norm": 0.19025616347789764, "learning_rate": 2.1025069239755273e-05, "loss": 0.5342, "step": 1958 }, { "epoch": 0.543261231281198, "grad_norm": 0.18279728293418884, "learning_rate": 2.102103017280418e-05, "loss": 0.5542, "step": 1959 }, { "epoch": 0.5435385468663338, "grad_norm": 0.18862898647785187, "learning_rate": 2.101698944317434e-05, "loss": 0.5743, "step": 1960 }, { "epoch": 0.5438158624514697, "grad_norm": 0.18205633759498596, "learning_rate": 2.101294705165421e-05, "loss": 0.5597, "step": 1961 }, { "epoch": 0.5440931780366056, "grad_norm": 0.1904565840959549, "learning_rate": 2.100890299903256e-05, "loss": 0.5789, "step": 1962 }, { "epoch": 0.5443704936217415, "grad_norm": 0.19704897701740265, "learning_rate": 2.1004857286098495e-05, "loss": 0.5626, "step": 1963 }, { "epoch": 0.5446478092068774, "grad_norm": 0.1878540813922882, "learning_rate": 2.1000809913641445e-05, "loss": 0.5713, "step": 1964 }, { "epoch": 0.5449251247920133, "grad_norm": 0.2131820023059845, "learning_rate": 2.0996760882451148e-05, "loss": 0.5632, "step": 1965 }, { "epoch": 0.5452024403771492, "grad_norm": 0.19665499031543732, "learning_rate": 2.0992710193317693e-05, "loss": 0.5585, "step": 1966 }, { "epoch": 0.545479755962285, "grad_norm": 0.18704085052013397, "learning_rate": 2.0988657847031467e-05, "loss": 0.5534, "step": 1967 }, { "epoch": 0.5457570715474209, "grad_norm": 0.1849927306175232, "learning_rate": 2.0984603844383195e-05, "loss": 0.5654, "step": 1968 }, { "epoch": 0.5460343871325568, "grad_norm": 0.18812572956085205, "learning_rate": 2.0980548186163918e-05, "loss": 0.5412, "step": 1969 }, { "epoch": 0.5463117027176927, "grad_norm": 0.18602755665779114, "learning_rate": 2.0976490873165e-05, "loss": 0.5503, "step": 1970 }, { "epoch": 0.5465890183028286, "grad_norm": 0.19630911946296692, "learning_rate": 2.097243190617813e-05, "loss": 0.5687, "step": 1971 }, { "epoch": 0.5468663338879645, "grad_norm": 0.18516132235527039, "learning_rate": 2.0968371285995323e-05, "loss": 0.5561, "step": 1972 }, { "epoch": 0.5471436494731003, "grad_norm": 0.18923065066337585, "learning_rate": 2.0964309013408914e-05, "loss": 0.5477, "step": 1973 }, { "epoch": 0.5474209650582362, "grad_norm": 0.18436457216739655, "learning_rate": 2.096024508921156e-05, "loss": 0.56, "step": 1974 }, { "epoch": 0.5476982806433721, "grad_norm": 0.18705391883850098, "learning_rate": 2.095617951419624e-05, "loss": 0.536, "step": 1975 }, { "epoch": 0.547975596228508, "grad_norm": 0.18911142647266388, "learning_rate": 2.095211228915625e-05, "loss": 0.5431, "step": 1976 }, { "epoch": 0.5482529118136439, "grad_norm": 0.19025933742523193, "learning_rate": 2.0948043414885222e-05, "loss": 0.5352, "step": 1977 }, { "epoch": 0.5485302273987798, "grad_norm": 0.18421220779418945, "learning_rate": 2.0943972892177094e-05, "loss": 0.536, "step": 1978 }, { "epoch": 0.5488075429839157, "grad_norm": 0.19545422494411469, "learning_rate": 2.0939900721826132e-05, "loss": 0.5563, "step": 1979 }, { "epoch": 0.5490848585690515, "grad_norm": 0.19028547406196594, "learning_rate": 2.0935826904626937e-05, "loss": 0.5081, "step": 1980 }, { "epoch": 0.5493621741541874, "grad_norm": 0.20487383008003235, "learning_rate": 2.0931751441374406e-05, "loss": 0.5426, "step": 1981 }, { "epoch": 0.5496394897393233, "grad_norm": 0.20360392332077026, "learning_rate": 2.0927674332863774e-05, "loss": 0.5686, "step": 1982 }, { "epoch": 0.5499168053244592, "grad_norm": 0.19089289009571075, "learning_rate": 2.092359557989059e-05, "loss": 0.5574, "step": 1983 }, { "epoch": 0.5501941209095951, "grad_norm": 0.1797301024198532, "learning_rate": 2.0919515183250736e-05, "loss": 0.5666, "step": 1984 }, { "epoch": 0.550471436494731, "grad_norm": 0.19856330752372742, "learning_rate": 2.0915433143740393e-05, "loss": 0.5373, "step": 1985 }, { "epoch": 0.5507487520798668, "grad_norm": 0.19353127479553223, "learning_rate": 2.0911349462156082e-05, "loss": 0.5454, "step": 1986 }, { "epoch": 0.5510260676650027, "grad_norm": 0.19544550776481628, "learning_rate": 2.090726413929464e-05, "loss": 0.5705, "step": 1987 }, { "epoch": 0.5513033832501386, "grad_norm": 0.199398934841156, "learning_rate": 2.0903177175953216e-05, "loss": 0.5431, "step": 1988 }, { "epoch": 0.5515806988352745, "grad_norm": 0.19894284009933472, "learning_rate": 2.0899088572929286e-05, "loss": 0.5658, "step": 1989 }, { "epoch": 0.5518580144204104, "grad_norm": 0.1806151121854782, "learning_rate": 2.0894998331020645e-05, "loss": 0.5748, "step": 1990 }, { "epoch": 0.5521353300055463, "grad_norm": 0.19128967821598053, "learning_rate": 2.089090645102541e-05, "loss": 0.5576, "step": 1991 }, { "epoch": 0.5524126455906821, "grad_norm": 0.20147933065891266, "learning_rate": 2.0886812933742013e-05, "loss": 0.5738, "step": 1992 }, { "epoch": 0.552689961175818, "grad_norm": 0.17531876266002655, "learning_rate": 2.0882717779969207e-05, "loss": 0.544, "step": 1993 }, { "epoch": 0.5529672767609539, "grad_norm": 0.19415581226348877, "learning_rate": 2.087862099050607e-05, "loss": 0.5315, "step": 1994 }, { "epoch": 0.5532445923460898, "grad_norm": 0.22229987382888794, "learning_rate": 2.087452256615199e-05, "loss": 0.5324, "step": 1995 }, { "epoch": 0.5535219079312257, "grad_norm": 0.18078093230724335, "learning_rate": 2.0870422507706676e-05, "loss": 0.5378, "step": 1996 }, { "epoch": 0.5537992235163616, "grad_norm": 0.18564875423908234, "learning_rate": 2.0866320815970157e-05, "loss": 0.5238, "step": 1997 }, { "epoch": 0.5540765391014975, "grad_norm": 0.19045381247997284, "learning_rate": 2.086221749174279e-05, "loss": 0.5527, "step": 1998 }, { "epoch": 0.5543538546866333, "grad_norm": 0.18606480956077576, "learning_rate": 2.0858112535825242e-05, "loss": 0.5132, "step": 1999 }, { "epoch": 0.5546311702717692, "grad_norm": 0.19520308077335358, "learning_rate": 2.0854005949018487e-05, "loss": 0.554, "step": 2000 }, { "epoch": 0.5549084858569051, "grad_norm": 0.19881142675876617, "learning_rate": 2.0849897732123838e-05, "loss": 0.5327, "step": 2001 }, { "epoch": 0.555185801442041, "grad_norm": 0.18831515312194824, "learning_rate": 2.0845787885942917e-05, "loss": 0.5541, "step": 2002 }, { "epoch": 0.5554631170271769, "grad_norm": 0.1954246610403061, "learning_rate": 2.0841676411277662e-05, "loss": 0.5744, "step": 2003 }, { "epoch": 0.5557404326123128, "grad_norm": 0.20168912410736084, "learning_rate": 2.0837563308930325e-05, "loss": 0.5704, "step": 2004 }, { "epoch": 0.5560177481974486, "grad_norm": 0.19541358947753906, "learning_rate": 2.0833448579703492e-05, "loss": 0.5555, "step": 2005 }, { "epoch": 0.5562950637825845, "grad_norm": 0.20079733431339264, "learning_rate": 2.082933222440005e-05, "loss": 0.5394, "step": 2006 }, { "epoch": 0.5565723793677204, "grad_norm": 0.1951141506433487, "learning_rate": 2.082521424382321e-05, "loss": 0.5314, "step": 2007 }, { "epoch": 0.5568496949528563, "grad_norm": 0.1864672750234604, "learning_rate": 2.0821094638776497e-05, "loss": 0.5365, "step": 2008 }, { "epoch": 0.5571270105379922, "grad_norm": 0.18978318572044373, "learning_rate": 2.0816973410063754e-05, "loss": 0.5199, "step": 2009 }, { "epoch": 0.5574043261231281, "grad_norm": 0.1931847780942917, "learning_rate": 2.0812850558489153e-05, "loss": 0.5701, "step": 2010 }, { "epoch": 0.557681641708264, "grad_norm": 0.18549029529094696, "learning_rate": 2.0808726084857157e-05, "loss": 0.4933, "step": 2011 }, { "epoch": 0.5579589572933998, "grad_norm": 0.18897327780723572, "learning_rate": 2.0804599989972567e-05, "loss": 0.58, "step": 2012 }, { "epoch": 0.5582362728785357, "grad_norm": 0.20237743854522705, "learning_rate": 2.0800472274640494e-05, "loss": 0.5577, "step": 2013 }, { "epoch": 0.5585135884636716, "grad_norm": 0.18905304372310638, "learning_rate": 2.0796342939666362e-05, "loss": 0.5763, "step": 2014 }, { "epoch": 0.5587909040488075, "grad_norm": 0.2023850679397583, "learning_rate": 2.079221198585592e-05, "loss": 0.5502, "step": 2015 }, { "epoch": 0.5590682196339434, "grad_norm": 0.19735179841518402, "learning_rate": 2.0788079414015215e-05, "loss": 0.5293, "step": 2016 }, { "epoch": 0.5593455352190793, "grad_norm": 0.18779948353767395, "learning_rate": 2.078394522495063e-05, "loss": 0.5524, "step": 2017 }, { "epoch": 0.5596228508042151, "grad_norm": 0.20868045091629028, "learning_rate": 2.0779809419468854e-05, "loss": 0.5724, "step": 2018 }, { "epoch": 0.559900166389351, "grad_norm": 0.21078519523143768, "learning_rate": 2.077567199837689e-05, "loss": 0.5398, "step": 2019 }, { "epoch": 0.5601774819744869, "grad_norm": 0.18992477655410767, "learning_rate": 2.0771532962482057e-05, "loss": 0.5665, "step": 2020 }, { "epoch": 0.5604547975596228, "grad_norm": 0.1859736144542694, "learning_rate": 2.0767392312591992e-05, "loss": 0.5429, "step": 2021 }, { "epoch": 0.5607321131447587, "grad_norm": 0.18972904980182648, "learning_rate": 2.0763250049514654e-05, "loss": 0.5595, "step": 2022 }, { "epoch": 0.5610094287298946, "grad_norm": 0.189973384141922, "learning_rate": 2.0759106174058293e-05, "loss": 0.5669, "step": 2023 }, { "epoch": 0.5612867443150305, "grad_norm": 0.19088168442249298, "learning_rate": 2.07549606870315e-05, "loss": 0.5654, "step": 2024 }, { "epoch": 0.5615640599001663, "grad_norm": 0.1857694834470749, "learning_rate": 2.075081358924317e-05, "loss": 0.5318, "step": 2025 }, { "epoch": 0.5618413754853022, "grad_norm": 0.19243502616882324, "learning_rate": 2.0746664881502496e-05, "loss": 0.549, "step": 2026 }, { "epoch": 0.5621186910704381, "grad_norm": 0.18214242160320282, "learning_rate": 2.0742514564619022e-05, "loss": 0.5318, "step": 2027 }, { "epoch": 0.562396006655574, "grad_norm": 0.18616682291030884, "learning_rate": 2.0738362639402574e-05, "loss": 0.5397, "step": 2028 }, { "epoch": 0.56267332224071, "grad_norm": 0.18527580797672272, "learning_rate": 2.07342091066633e-05, "loss": 0.5513, "step": 2029 }, { "epoch": 0.5629506378258459, "grad_norm": 0.20299869775772095, "learning_rate": 2.073005396721167e-05, "loss": 0.5437, "step": 2030 }, { "epoch": 0.5632279534109818, "grad_norm": 0.18229471147060394, "learning_rate": 2.072589722185846e-05, "loss": 0.5411, "step": 2031 }, { "epoch": 0.5635052689961176, "grad_norm": 0.19586963951587677, "learning_rate": 2.0721738871414763e-05, "loss": 0.5674, "step": 2032 }, { "epoch": 0.5637825845812535, "grad_norm": 0.21511641144752502, "learning_rate": 2.0717578916691977e-05, "loss": 0.5762, "step": 2033 }, { "epoch": 0.5640599001663894, "grad_norm": 0.18836332857608795, "learning_rate": 2.071341735850183e-05, "loss": 0.5599, "step": 2034 }, { "epoch": 0.5643372157515253, "grad_norm": 0.1830950826406479, "learning_rate": 2.070925419765634e-05, "loss": 0.539, "step": 2035 }, { "epoch": 0.5646145313366612, "grad_norm": 0.18749170005321503, "learning_rate": 2.070508943496786e-05, "loss": 0.5516, "step": 2036 }, { "epoch": 0.5648918469217971, "grad_norm": 0.18844857811927795, "learning_rate": 2.070092307124904e-05, "loss": 0.5301, "step": 2037 }, { "epoch": 0.5651691625069329, "grad_norm": 0.1957472264766693, "learning_rate": 2.0696755107312845e-05, "loss": 0.5612, "step": 2038 }, { "epoch": 0.5654464780920688, "grad_norm": 0.1901545226573944, "learning_rate": 2.0692585543972566e-05, "loss": 0.5529, "step": 2039 }, { "epoch": 0.5657237936772047, "grad_norm": 0.1877562254667282, "learning_rate": 2.0688414382041788e-05, "loss": 0.5324, "step": 2040 }, { "epoch": 0.5660011092623406, "grad_norm": 0.18741729855537415, "learning_rate": 2.068424162233441e-05, "loss": 0.5522, "step": 2041 }, { "epoch": 0.5662784248474765, "grad_norm": 0.18096144497394562, "learning_rate": 2.068006726566466e-05, "loss": 0.5382, "step": 2042 }, { "epoch": 0.5665557404326124, "grad_norm": 0.18831691145896912, "learning_rate": 2.0675891312847064e-05, "loss": 0.5547, "step": 2043 }, { "epoch": 0.5668330560177482, "grad_norm": 0.19189482927322388, "learning_rate": 2.0671713764696445e-05, "loss": 0.5699, "step": 2044 }, { "epoch": 0.5671103716028841, "grad_norm": 0.183754563331604, "learning_rate": 2.0667534622027974e-05, "loss": 0.5256, "step": 2045 }, { "epoch": 0.56738768718802, "grad_norm": 0.19887655973434448, "learning_rate": 2.0663353885657098e-05, "loss": 0.5792, "step": 2046 }, { "epoch": 0.5676650027731559, "grad_norm": 0.18769381940364838, "learning_rate": 2.0659171556399596e-05, "loss": 0.5679, "step": 2047 }, { "epoch": 0.5679423183582918, "grad_norm": 0.20135840773582458, "learning_rate": 2.0654987635071554e-05, "loss": 0.5766, "step": 2048 }, { "epoch": 0.5682196339434277, "grad_norm": 0.20204971730709076, "learning_rate": 2.065080212248936e-05, "loss": 0.602, "step": 2049 }, { "epoch": 0.5684969495285636, "grad_norm": 0.18772201240062714, "learning_rate": 2.0646615019469724e-05, "loss": 0.5584, "step": 2050 }, { "epoch": 0.5687742651136994, "grad_norm": 0.18956024944782257, "learning_rate": 2.064242632682965e-05, "loss": 0.573, "step": 2051 }, { "epoch": 0.5690515806988353, "grad_norm": 0.20082207024097443, "learning_rate": 2.0638236045386472e-05, "loss": 0.5478, "step": 2052 }, { "epoch": 0.5693288962839712, "grad_norm": 0.1947973519563675, "learning_rate": 2.063404417595783e-05, "loss": 0.5543, "step": 2053 }, { "epoch": 0.5696062118691071, "grad_norm": 0.19126874208450317, "learning_rate": 2.0629850719361654e-05, "loss": 0.5704, "step": 2054 }, { "epoch": 0.569883527454243, "grad_norm": 0.3870353698730469, "learning_rate": 2.062565567641621e-05, "loss": 0.5323, "step": 2055 }, { "epoch": 0.5701608430393789, "grad_norm": 0.20603255927562714, "learning_rate": 2.0621459047940056e-05, "loss": 0.574, "step": 2056 }, { "epoch": 0.5704381586245147, "grad_norm": 0.18535418808460236, "learning_rate": 2.0617260834752068e-05, "loss": 0.5358, "step": 2057 }, { "epoch": 0.5707154742096506, "grad_norm": 0.24727782607078552, "learning_rate": 2.061306103767143e-05, "loss": 0.5744, "step": 2058 }, { "epoch": 0.5709927897947865, "grad_norm": 0.19518110156059265, "learning_rate": 2.0608859657517633e-05, "loss": 0.5655, "step": 2059 }, { "epoch": 0.5712701053799224, "grad_norm": 0.19751004874706268, "learning_rate": 2.0604656695110476e-05, "loss": 0.5555, "step": 2060 }, { "epoch": 0.5715474209650583, "grad_norm": 0.18788260221481323, "learning_rate": 2.0600452151270068e-05, "loss": 0.5463, "step": 2061 }, { "epoch": 0.5718247365501942, "grad_norm": 0.23272345960140228, "learning_rate": 2.0596246026816826e-05, "loss": 0.5601, "step": 2062 }, { "epoch": 0.57210205213533, "grad_norm": 0.19826073944568634, "learning_rate": 2.059203832257148e-05, "loss": 0.5654, "step": 2063 }, { "epoch": 0.5723793677204659, "grad_norm": 0.19848833978176117, "learning_rate": 2.058782903935506e-05, "loss": 0.5666, "step": 2064 }, { "epoch": 0.5726566833056018, "grad_norm": 0.21063697338104248, "learning_rate": 2.0583618177988917e-05, "loss": 0.5421, "step": 2065 }, { "epoch": 0.5729339988907377, "grad_norm": 0.19619859755039215, "learning_rate": 2.0579405739294695e-05, "loss": 0.5343, "step": 2066 }, { "epoch": 0.5732113144758736, "grad_norm": 0.18998843431472778, "learning_rate": 2.057519172409435e-05, "loss": 0.5486, "step": 2067 }, { "epoch": 0.5734886300610095, "grad_norm": 0.19432705640792847, "learning_rate": 2.0570976133210152e-05, "loss": 0.528, "step": 2068 }, { "epoch": 0.5737659456461454, "grad_norm": 0.18903441727161407, "learning_rate": 2.0566758967464677e-05, "loss": 0.5529, "step": 2069 }, { "epoch": 0.5740432612312812, "grad_norm": 0.19154904782772064, "learning_rate": 2.05625402276808e-05, "loss": 0.5519, "step": 2070 }, { "epoch": 0.5743205768164171, "grad_norm": 0.19544386863708496, "learning_rate": 2.0558319914681713e-05, "loss": 0.5444, "step": 2071 }, { "epoch": 0.574597892401553, "grad_norm": 0.1918288618326187, "learning_rate": 2.055409802929091e-05, "loss": 0.5646, "step": 2072 }, { "epoch": 0.5748752079866889, "grad_norm": 0.19810417294502258, "learning_rate": 2.054987457233219e-05, "loss": 0.5799, "step": 2073 }, { "epoch": 0.5751525235718248, "grad_norm": 0.20202040672302246, "learning_rate": 2.0545649544629665e-05, "loss": 0.5555, "step": 2074 }, { "epoch": 0.5754298391569607, "grad_norm": 0.19962945580482483, "learning_rate": 2.0541422947007748e-05, "loss": 0.5245, "step": 2075 }, { "epoch": 0.5757071547420965, "grad_norm": 0.19925406575202942, "learning_rate": 2.053719478029116e-05, "loss": 0.5755, "step": 2076 }, { "epoch": 0.5759844703272324, "grad_norm": 0.1866733878850937, "learning_rate": 2.0532965045304932e-05, "loss": 0.5339, "step": 2077 }, { "epoch": 0.5762617859123683, "grad_norm": 0.22320881485939026, "learning_rate": 2.052873374287439e-05, "loss": 0.5111, "step": 2078 }, { "epoch": 0.5765391014975042, "grad_norm": 0.19458182156085968, "learning_rate": 2.0524500873825182e-05, "loss": 0.5625, "step": 2079 }, { "epoch": 0.5768164170826401, "grad_norm": 0.18524032831192017, "learning_rate": 2.0520266438983242e-05, "loss": 0.5876, "step": 2080 }, { "epoch": 0.577093732667776, "grad_norm": 0.17904391884803772, "learning_rate": 2.0516030439174833e-05, "loss": 0.5541, "step": 2081 }, { "epoch": 0.5773710482529119, "grad_norm": 0.1819324642419815, "learning_rate": 2.05117928752265e-05, "loss": 0.5326, "step": 2082 }, { "epoch": 0.5776483638380477, "grad_norm": 0.18749113380908966, "learning_rate": 2.0507553747965114e-05, "loss": 0.5502, "step": 2083 }, { "epoch": 0.5779256794231836, "grad_norm": 0.1892707794904709, "learning_rate": 2.050331305821783e-05, "loss": 0.5533, "step": 2084 }, { "epoch": 0.5782029950083195, "grad_norm": 0.1937221884727478, "learning_rate": 2.0499070806812126e-05, "loss": 0.5615, "step": 2085 }, { "epoch": 0.5784803105934554, "grad_norm": 0.20081382989883423, "learning_rate": 2.0494826994575777e-05, "loss": 0.5424, "step": 2086 }, { "epoch": 0.5787576261785913, "grad_norm": 0.18762163817882538, "learning_rate": 2.0490581622336863e-05, "loss": 0.5621, "step": 2087 }, { "epoch": 0.5790349417637272, "grad_norm": 0.18031221628189087, "learning_rate": 2.048633469092377e-05, "loss": 0.5045, "step": 2088 }, { "epoch": 0.579312257348863, "grad_norm": 0.21820896863937378, "learning_rate": 2.048208620116518e-05, "loss": 0.5676, "step": 2089 }, { "epoch": 0.5795895729339989, "grad_norm": 0.19855897128582, "learning_rate": 2.0477836153890095e-05, "loss": 0.5461, "step": 2090 }, { "epoch": 0.5798668885191348, "grad_norm": 0.1899833232164383, "learning_rate": 2.0473584549927806e-05, "loss": 0.5519, "step": 2091 }, { "epoch": 0.5801442041042707, "grad_norm": 0.1892709583044052, "learning_rate": 2.0469331390107914e-05, "loss": 0.5255, "step": 2092 }, { "epoch": 0.5804215196894066, "grad_norm": 0.19004952907562256, "learning_rate": 2.0465076675260326e-05, "loss": 0.5468, "step": 2093 }, { "epoch": 0.5806988352745425, "grad_norm": 0.18664851784706116, "learning_rate": 2.0460820406215247e-05, "loss": 0.5333, "step": 2094 }, { "epoch": 0.5809761508596784, "grad_norm": 0.18904832005500793, "learning_rate": 2.045656258380319e-05, "loss": 0.5653, "step": 2095 }, { "epoch": 0.5812534664448142, "grad_norm": 0.19123364984989166, "learning_rate": 2.0452303208854966e-05, "loss": 0.5368, "step": 2096 }, { "epoch": 0.5815307820299501, "grad_norm": 0.1906225085258484, "learning_rate": 2.0448042282201694e-05, "loss": 0.5672, "step": 2097 }, { "epoch": 0.581808097615086, "grad_norm": 0.19360537827014923, "learning_rate": 2.0443779804674796e-05, "loss": 0.548, "step": 2098 }, { "epoch": 0.5820854132002219, "grad_norm": 0.21694041788578033, "learning_rate": 2.0439515777105987e-05, "loss": 0.5724, "step": 2099 }, { "epoch": 0.5823627287853578, "grad_norm": 0.191674143075943, "learning_rate": 2.04352502003273e-05, "loss": 0.5295, "step": 2100 }, { "epoch": 0.5826400443704937, "grad_norm": 0.19594216346740723, "learning_rate": 2.0430983075171055e-05, "loss": 0.5412, "step": 2101 }, { "epoch": 0.5829173599556295, "grad_norm": 0.19115525484085083, "learning_rate": 2.0426714402469887e-05, "loss": 0.5368, "step": 2102 }, { "epoch": 0.5831946755407654, "grad_norm": 0.19171211123466492, "learning_rate": 2.042244418305673e-05, "loss": 0.57, "step": 2103 }, { "epoch": 0.5834719911259013, "grad_norm": 0.18373924493789673, "learning_rate": 2.0418172417764802e-05, "loss": 0.5648, "step": 2104 }, { "epoch": 0.5837493067110372, "grad_norm": 0.20192669332027435, "learning_rate": 2.0413899107427652e-05, "loss": 0.5699, "step": 2105 }, { "epoch": 0.5840266222961731, "grad_norm": 0.18735186755657196, "learning_rate": 2.0409624252879112e-05, "loss": 0.551, "step": 2106 }, { "epoch": 0.584303937881309, "grad_norm": 0.19031678140163422, "learning_rate": 2.0405347854953316e-05, "loss": 0.5313, "step": 2107 }, { "epoch": 0.5845812534664449, "grad_norm": 0.19912661612033844, "learning_rate": 2.0401069914484707e-05, "loss": 0.5815, "step": 2108 }, { "epoch": 0.5848585690515807, "grad_norm": 0.1985718458890915, "learning_rate": 2.0396790432308025e-05, "loss": 0.5364, "step": 2109 }, { "epoch": 0.5851358846367166, "grad_norm": 0.20383597910404205, "learning_rate": 2.0392509409258303e-05, "loss": 0.5747, "step": 2110 }, { "epoch": 0.5854132002218525, "grad_norm": 0.19357682764530182, "learning_rate": 2.038822684617089e-05, "loss": 0.5508, "step": 2111 }, { "epoch": 0.5856905158069884, "grad_norm": 0.18165504932403564, "learning_rate": 2.0383942743881425e-05, "loss": 0.5234, "step": 2112 }, { "epoch": 0.5859678313921243, "grad_norm": 0.18874984979629517, "learning_rate": 2.0379657103225852e-05, "loss": 0.5691, "step": 2113 }, { "epoch": 0.5862451469772602, "grad_norm": 0.18956932425498962, "learning_rate": 2.0375369925040406e-05, "loss": 0.5664, "step": 2114 }, { "epoch": 0.586522462562396, "grad_norm": 0.17788472771644592, "learning_rate": 2.0371081210161634e-05, "loss": 0.5473, "step": 2115 }, { "epoch": 0.5867997781475319, "grad_norm": 0.22669924795627594, "learning_rate": 2.0366790959426378e-05, "loss": 0.5603, "step": 2116 }, { "epoch": 0.5870770937326678, "grad_norm": 0.1913762092590332, "learning_rate": 2.0362499173671784e-05, "loss": 0.5698, "step": 2117 }, { "epoch": 0.5873544093178037, "grad_norm": 0.19173979759216309, "learning_rate": 2.0358205853735287e-05, "loss": 0.5735, "step": 2118 }, { "epoch": 0.5876317249029396, "grad_norm": 0.18526272475719452, "learning_rate": 2.035391100045462e-05, "loss": 0.5462, "step": 2119 }, { "epoch": 0.5879090404880755, "grad_norm": 0.20534314215183258, "learning_rate": 2.034961461466784e-05, "loss": 0.5643, "step": 2120 }, { "epoch": 0.5881863560732113, "grad_norm": 0.19565671682357788, "learning_rate": 2.0345316697213273e-05, "loss": 0.5599, "step": 2121 }, { "epoch": 0.5884636716583472, "grad_norm": 0.19610898196697235, "learning_rate": 2.034101724892956e-05, "loss": 0.5597, "step": 2122 }, { "epoch": 0.5887409872434831, "grad_norm": 0.19708271324634552, "learning_rate": 2.033671627065564e-05, "loss": 0.5494, "step": 2123 }, { "epoch": 0.589018302828619, "grad_norm": 0.285875141620636, "learning_rate": 2.033241376323075e-05, "loss": 0.5553, "step": 2124 }, { "epoch": 0.5892956184137549, "grad_norm": 0.1879926174879074, "learning_rate": 2.0328109727494417e-05, "loss": 0.5464, "step": 2125 }, { "epoch": 0.5895729339988908, "grad_norm": 0.20556902885437012, "learning_rate": 2.032380416428647e-05, "loss": 0.5533, "step": 2126 }, { "epoch": 0.5898502495840267, "grad_norm": 0.1847870945930481, "learning_rate": 2.0319497074447043e-05, "loss": 0.5629, "step": 2127 }, { "epoch": 0.5901275651691625, "grad_norm": 0.19035299122333527, "learning_rate": 2.0315188458816567e-05, "loss": 0.5491, "step": 2128 }, { "epoch": 0.5904048807542984, "grad_norm": 0.17980261147022247, "learning_rate": 2.031087831823576e-05, "loss": 0.5281, "step": 2129 }, { "epoch": 0.5906821963394343, "grad_norm": 0.19888748228549957, "learning_rate": 2.030656665354565e-05, "loss": 0.5443, "step": 2130 }, { "epoch": 0.5909595119245702, "grad_norm": 0.17473064363002777, "learning_rate": 2.0302253465587555e-05, "loss": 0.5263, "step": 2131 }, { "epoch": 0.5912368275097061, "grad_norm": 0.1915203481912613, "learning_rate": 2.0297938755203088e-05, "loss": 0.5609, "step": 2132 }, { "epoch": 0.591514143094842, "grad_norm": 0.1980845183134079, "learning_rate": 2.029362252323417e-05, "loss": 0.5549, "step": 2133 }, { "epoch": 0.5917914586799778, "grad_norm": 0.18366824090480804, "learning_rate": 2.028930477052301e-05, "loss": 0.5333, "step": 2134 }, { "epoch": 0.5920687742651137, "grad_norm": 0.1946541965007782, "learning_rate": 2.0284985497912118e-05, "loss": 0.588, "step": 2135 }, { "epoch": 0.5923460898502496, "grad_norm": 0.1904669553041458, "learning_rate": 2.028066470624429e-05, "loss": 0.5602, "step": 2136 }, { "epoch": 0.5926234054353855, "grad_norm": 0.1851152926683426, "learning_rate": 2.0276342396362636e-05, "loss": 0.5461, "step": 2137 }, { "epoch": 0.5929007210205214, "grad_norm": 0.18607455492019653, "learning_rate": 2.0272018569110552e-05, "loss": 0.542, "step": 2138 }, { "epoch": 0.5931780366056573, "grad_norm": 0.19023166596889496, "learning_rate": 2.0267693225331726e-05, "loss": 0.5564, "step": 2139 }, { "epoch": 0.5934553521907932, "grad_norm": 0.18977974355220795, "learning_rate": 2.0263366365870152e-05, "loss": 0.5778, "step": 2140 }, { "epoch": 0.593732667775929, "grad_norm": 0.2097538560628891, "learning_rate": 2.0259037991570116e-05, "loss": 0.5822, "step": 2141 }, { "epoch": 0.5940099833610649, "grad_norm": 0.18402041494846344, "learning_rate": 2.0254708103276193e-05, "loss": 0.5207, "step": 2142 }, { "epoch": 0.5942872989462008, "grad_norm": 0.196335569024086, "learning_rate": 2.025037670183326e-05, "loss": 0.5509, "step": 2143 }, { "epoch": 0.5945646145313367, "grad_norm": 0.18417152762413025, "learning_rate": 2.0246043788086498e-05, "loss": 0.5418, "step": 2144 }, { "epoch": 0.5948419301164726, "grad_norm": 0.1963946372270584, "learning_rate": 2.024170936288136e-05, "loss": 0.5717, "step": 2145 }, { "epoch": 0.5951192457016085, "grad_norm": 0.20180796086788177, "learning_rate": 2.023737342706361e-05, "loss": 0.5457, "step": 2146 }, { "epoch": 0.5953965612867443, "grad_norm": 0.18826082348823547, "learning_rate": 2.0233035981479316e-05, "loss": 0.545, "step": 2147 }, { "epoch": 0.5956738768718802, "grad_norm": 0.1920921951532364, "learning_rate": 2.0228697026974808e-05, "loss": 0.5584, "step": 2148 }, { "epoch": 0.5959511924570161, "grad_norm": 0.20350618660449982, "learning_rate": 2.0224356564396747e-05, "loss": 0.5343, "step": 2149 }, { "epoch": 0.596228508042152, "grad_norm": 0.19070284068584442, "learning_rate": 2.0220014594592068e-05, "loss": 0.5573, "step": 2150 }, { "epoch": 0.5965058236272879, "grad_norm": 0.1937059462070465, "learning_rate": 2.0215671118408004e-05, "loss": 0.5548, "step": 2151 }, { "epoch": 0.5967831392124238, "grad_norm": 0.18935304880142212, "learning_rate": 2.021132613669208e-05, "loss": 0.5746, "step": 2152 }, { "epoch": 0.5970604547975596, "grad_norm": 0.20879191160202026, "learning_rate": 2.0206979650292117e-05, "loss": 0.5602, "step": 2153 }, { "epoch": 0.5973377703826955, "grad_norm": 0.19923923909664154, "learning_rate": 2.020263166005624e-05, "loss": 0.5441, "step": 2154 }, { "epoch": 0.5976150859678314, "grad_norm": 0.1885758638381958, "learning_rate": 2.019828216683284e-05, "loss": 0.5362, "step": 2155 }, { "epoch": 0.5978924015529673, "grad_norm": 0.18912041187286377, "learning_rate": 2.019393117147063e-05, "loss": 0.5488, "step": 2156 }, { "epoch": 0.5981697171381032, "grad_norm": 0.18454459309577942, "learning_rate": 2.0189578674818603e-05, "loss": 0.5386, "step": 2157 }, { "epoch": 0.5984470327232391, "grad_norm": 0.1803792268037796, "learning_rate": 2.018522467772604e-05, "loss": 0.5444, "step": 2158 }, { "epoch": 0.598724348308375, "grad_norm": 0.18980631232261658, "learning_rate": 2.0180869181042532e-05, "loss": 0.5521, "step": 2159 }, { "epoch": 0.5990016638935108, "grad_norm": 0.18810968101024628, "learning_rate": 2.0176512185617945e-05, "loss": 0.5342, "step": 2160 }, { "epoch": 0.5992789794786467, "grad_norm": 0.24775730073451996, "learning_rate": 2.0172153692302445e-05, "loss": 0.533, "step": 2161 }, { "epoch": 0.5995562950637826, "grad_norm": 0.19899478554725647, "learning_rate": 2.0167793701946488e-05, "loss": 0.5394, "step": 2162 }, { "epoch": 0.5998336106489185, "grad_norm": 0.19542957842350006, "learning_rate": 2.0163432215400822e-05, "loss": 0.5379, "step": 2163 }, { "epoch": 0.6001109262340544, "grad_norm": 0.2002883106470108, "learning_rate": 2.0159069233516504e-05, "loss": 0.5672, "step": 2164 }, { "epoch": 0.6003882418191903, "grad_norm": 0.18904021382331848, "learning_rate": 2.0154704757144845e-05, "loss": 0.5483, "step": 2165 }, { "epoch": 0.6006655574043261, "grad_norm": 0.19162911176681519, "learning_rate": 2.0150338787137486e-05, "loss": 0.5552, "step": 2166 }, { "epoch": 0.600942872989462, "grad_norm": 0.18318617343902588, "learning_rate": 2.014597132434633e-05, "loss": 0.5548, "step": 2167 }, { "epoch": 0.6012201885745979, "grad_norm": 0.1864987015724182, "learning_rate": 2.01416023696236e-05, "loss": 0.5401, "step": 2168 }, { "epoch": 0.6014975041597338, "grad_norm": 0.18252375721931458, "learning_rate": 2.0137231923821785e-05, "loss": 0.5547, "step": 2169 }, { "epoch": 0.6017748197448697, "grad_norm": 0.196000874042511, "learning_rate": 2.013285998779367e-05, "loss": 0.5518, "step": 2170 }, { "epoch": 0.6020521353300056, "grad_norm": 0.17955927550792694, "learning_rate": 2.0128486562392354e-05, "loss": 0.5312, "step": 2171 }, { "epoch": 0.6023294509151415, "grad_norm": 0.1867658495903015, "learning_rate": 2.0124111648471192e-05, "loss": 0.5374, "step": 2172 }, { "epoch": 0.6026067665002773, "grad_norm": 0.20001055300235748, "learning_rate": 2.0119735246883852e-05, "loss": 0.5501, "step": 2173 }, { "epoch": 0.6028840820854132, "grad_norm": 0.19283756613731384, "learning_rate": 2.011535735848428e-05, "loss": 0.5505, "step": 2174 }, { "epoch": 0.6031613976705491, "grad_norm": 0.18642939627170563, "learning_rate": 2.011097798412673e-05, "loss": 0.5246, "step": 2175 }, { "epoch": 0.603438713255685, "grad_norm": 0.1911175698041916, "learning_rate": 2.0106597124665716e-05, "loss": 0.5134, "step": 2176 }, { "epoch": 0.6037160288408209, "grad_norm": 0.18297746777534485, "learning_rate": 2.0102214780956073e-05, "loss": 0.5598, "step": 2177 }, { "epoch": 0.6039933444259568, "grad_norm": 0.19483189284801483, "learning_rate": 2.0097830953852914e-05, "loss": 0.5502, "step": 2178 }, { "epoch": 0.6042706600110926, "grad_norm": 0.18785777688026428, "learning_rate": 2.009344564421163e-05, "loss": 0.5384, "step": 2179 }, { "epoch": 0.6045479755962285, "grad_norm": 0.19045081734657288, "learning_rate": 2.0089058852887923e-05, "loss": 0.5311, "step": 2180 }, { "epoch": 0.6048252911813644, "grad_norm": 0.19081301987171173, "learning_rate": 2.0084670580737758e-05, "loss": 0.5593, "step": 2181 }, { "epoch": 0.6051026067665003, "grad_norm": 0.19843098521232605, "learning_rate": 2.0080280828617414e-05, "loss": 0.557, "step": 2182 }, { "epoch": 0.6053799223516362, "grad_norm": 0.1962948590517044, "learning_rate": 2.0075889597383446e-05, "loss": 0.5591, "step": 2183 }, { "epoch": 0.6056572379367721, "grad_norm": 0.193936288356781, "learning_rate": 2.0071496887892693e-05, "loss": 0.5721, "step": 2184 }, { "epoch": 0.605934553521908, "grad_norm": 0.19298399984836578, "learning_rate": 2.00671027010023e-05, "loss": 0.5734, "step": 2185 }, { "epoch": 0.6062118691070438, "grad_norm": 0.20237858593463898, "learning_rate": 2.006270703756968e-05, "loss": 0.548, "step": 2186 }, { "epoch": 0.6064891846921797, "grad_norm": 0.19070473313331604, "learning_rate": 2.0058309898452552e-05, "loss": 0.5187, "step": 2187 }, { "epoch": 0.6067665002773156, "grad_norm": 0.18649962544441223, "learning_rate": 2.0053911284508902e-05, "loss": 0.5465, "step": 2188 }, { "epoch": 0.6070438158624515, "grad_norm": 0.21800090372562408, "learning_rate": 2.0049511196597027e-05, "loss": 0.5665, "step": 2189 }, { "epoch": 0.6073211314475874, "grad_norm": 0.18259546160697937, "learning_rate": 2.00451096355755e-05, "loss": 0.5456, "step": 2190 }, { "epoch": 0.6075984470327233, "grad_norm": 0.19721095263957977, "learning_rate": 2.0040706602303173e-05, "loss": 0.5752, "step": 2191 }, { "epoch": 0.6078757626178591, "grad_norm": 0.19083106517791748, "learning_rate": 2.0036302097639204e-05, "loss": 0.552, "step": 2192 }, { "epoch": 0.608153078202995, "grad_norm": 0.18962670862674713, "learning_rate": 2.0031896122443023e-05, "loss": 0.5729, "step": 2193 }, { "epoch": 0.6084303937881309, "grad_norm": 0.19744956493377686, "learning_rate": 2.0027488677574358e-05, "loss": 0.5494, "step": 2194 }, { "epoch": 0.6087077093732668, "grad_norm": 0.19325025379657745, "learning_rate": 2.0023079763893208e-05, "loss": 0.5389, "step": 2195 }, { "epoch": 0.6089850249584027, "grad_norm": 0.1866646558046341, "learning_rate": 2.0018669382259885e-05, "loss": 0.5703, "step": 2196 }, { "epoch": 0.6092623405435386, "grad_norm": 0.21793098747730255, "learning_rate": 2.001425753353496e-05, "loss": 0.5585, "step": 2197 }, { "epoch": 0.6095396561286744, "grad_norm": 0.18613992631435394, "learning_rate": 2.0009844218579298e-05, "loss": 0.544, "step": 2198 }, { "epoch": 0.6098169717138103, "grad_norm": 0.1990228146314621, "learning_rate": 2.0005429438254063e-05, "loss": 0.5681, "step": 2199 }, { "epoch": 0.6100942872989462, "grad_norm": 0.19253648817539215, "learning_rate": 2.000101319342069e-05, "loss": 0.5451, "step": 2200 }, { "epoch": 0.6103716028840821, "grad_norm": 0.18924476206302643, "learning_rate": 1.9996595484940915e-05, "loss": 0.5684, "step": 2201 }, { "epoch": 0.610648918469218, "grad_norm": 0.18552450835704803, "learning_rate": 1.9992176313676737e-05, "loss": 0.5307, "step": 2202 }, { "epoch": 0.6109262340543539, "grad_norm": 0.19528694450855255, "learning_rate": 1.9987755680490456e-05, "loss": 0.5598, "step": 2203 }, { "epoch": 0.6112035496394898, "grad_norm": 0.1909715086221695, "learning_rate": 1.998333358624466e-05, "loss": 0.5647, "step": 2204 }, { "epoch": 0.6114808652246256, "grad_norm": 0.19347867369651794, "learning_rate": 1.9978910031802218e-05, "loss": 0.5726, "step": 2205 }, { "epoch": 0.6117581808097615, "grad_norm": 0.22693443298339844, "learning_rate": 1.9974485018026273e-05, "loss": 0.5403, "step": 2206 }, { "epoch": 0.6120354963948974, "grad_norm": 0.1854747086763382, "learning_rate": 1.997005854578027e-05, "loss": 0.5733, "step": 2207 }, { "epoch": 0.6123128119800333, "grad_norm": 0.2657473683357239, "learning_rate": 1.9965630615927932e-05, "loss": 0.5516, "step": 2208 }, { "epoch": 0.6125901275651692, "grad_norm": 0.19607201218605042, "learning_rate": 1.996120122933326e-05, "loss": 0.5525, "step": 2209 }, { "epoch": 0.6128674431503051, "grad_norm": 0.17779147624969482, "learning_rate": 1.9956770386860547e-05, "loss": 0.5377, "step": 2210 }, { "epoch": 0.6131447587354409, "grad_norm": 0.2034800499677658, "learning_rate": 1.9952338089374366e-05, "loss": 0.5434, "step": 2211 }, { "epoch": 0.6134220743205768, "grad_norm": 0.18624994158744812, "learning_rate": 1.9947904337739582e-05, "loss": 0.5301, "step": 2212 }, { "epoch": 0.6136993899057127, "grad_norm": 0.18844860792160034, "learning_rate": 1.9943469132821334e-05, "loss": 0.5508, "step": 2213 }, { "epoch": 0.6139767054908486, "grad_norm": 0.20432956516742706, "learning_rate": 1.9939032475485043e-05, "loss": 0.5565, "step": 2214 }, { "epoch": 0.6142540210759845, "grad_norm": 0.18925762176513672, "learning_rate": 1.9934594366596423e-05, "loss": 0.5745, "step": 2215 }, { "epoch": 0.6145313366611204, "grad_norm": 0.20550455152988434, "learning_rate": 1.993015480702147e-05, "loss": 0.5689, "step": 2216 }, { "epoch": 0.6148086522462562, "grad_norm": 0.18953469395637512, "learning_rate": 1.992571379762645e-05, "loss": 0.5393, "step": 2217 }, { "epoch": 0.6150859678313921, "grad_norm": 0.19926683604717255, "learning_rate": 1.9921271339277935e-05, "loss": 0.5563, "step": 2218 }, { "epoch": 0.615363283416528, "grad_norm": 0.18314692378044128, "learning_rate": 1.9916827432842756e-05, "loss": 0.5353, "step": 2219 }, { "epoch": 0.6156405990016639, "grad_norm": 0.20692428946495056, "learning_rate": 1.991238207918804e-05, "loss": 0.57, "step": 2220 }, { "epoch": 0.6159179145867998, "grad_norm": 0.202706441283226, "learning_rate": 1.990793527918119e-05, "loss": 0.5496, "step": 2221 }, { "epoch": 0.6161952301719357, "grad_norm": 0.18222178518772125, "learning_rate": 1.99034870336899e-05, "loss": 0.5553, "step": 2222 }, { "epoch": 0.6164725457570716, "grad_norm": 0.20487068593502045, "learning_rate": 1.9899037343582135e-05, "loss": 0.5611, "step": 2223 }, { "epoch": 0.6167498613422074, "grad_norm": 0.18165314197540283, "learning_rate": 1.989458620972615e-05, "loss": 0.5561, "step": 2224 }, { "epoch": 0.6170271769273433, "grad_norm": 0.1925044059753418, "learning_rate": 1.9890133632990488e-05, "loss": 0.5866, "step": 2225 }, { "epoch": 0.6173044925124792, "grad_norm": 0.19239196181297302, "learning_rate": 1.988567961424395e-05, "loss": 0.5629, "step": 2226 }, { "epoch": 0.6175818080976151, "grad_norm": 0.19511006772518158, "learning_rate": 1.9881224154355638e-05, "loss": 0.5484, "step": 2227 }, { "epoch": 0.617859123682751, "grad_norm": 0.18884329497814178, "learning_rate": 1.9876767254194932e-05, "loss": 0.5541, "step": 2228 }, { "epoch": 0.6181364392678869, "grad_norm": 0.1917329877614975, "learning_rate": 1.9872308914631494e-05, "loss": 0.5535, "step": 2229 }, { "epoch": 0.6184137548530227, "grad_norm": 0.19358281791210175, "learning_rate": 1.986784913653526e-05, "loss": 0.5445, "step": 2230 }, { "epoch": 0.6186910704381586, "grad_norm": 0.1858266144990921, "learning_rate": 1.9863387920776454e-05, "loss": 0.5446, "step": 2231 }, { "epoch": 0.6189683860232945, "grad_norm": 0.1879933476448059, "learning_rate": 1.985892526822557e-05, "loss": 0.5511, "step": 2232 }, { "epoch": 0.6192457016084304, "grad_norm": 0.19498036801815033, "learning_rate": 1.9854461179753396e-05, "loss": 0.5535, "step": 2233 }, { "epoch": 0.6195230171935663, "grad_norm": 0.1921449601650238, "learning_rate": 1.9849995656231e-05, "loss": 0.563, "step": 2234 }, { "epoch": 0.6198003327787022, "grad_norm": 0.1764960139989853, "learning_rate": 1.984552869852971e-05, "loss": 0.5604, "step": 2235 }, { "epoch": 0.620077648363838, "grad_norm": 0.18862247467041016, "learning_rate": 1.984106030752116e-05, "loss": 0.5507, "step": 2236 }, { "epoch": 0.6203549639489739, "grad_norm": 0.19972002506256104, "learning_rate": 1.9836590484077244e-05, "loss": 0.5443, "step": 2237 }, { "epoch": 0.6206322795341098, "grad_norm": 0.18754172325134277, "learning_rate": 1.983211922907014e-05, "loss": 0.5361, "step": 2238 }, { "epoch": 0.6209095951192457, "grad_norm": 0.20183882117271423, "learning_rate": 1.9827646543372322e-05, "loss": 0.5537, "step": 2239 }, { "epoch": 0.6211869107043816, "grad_norm": 0.18864542245864868, "learning_rate": 1.9823172427856518e-05, "loss": 0.5515, "step": 2240 }, { "epoch": 0.6214642262895175, "grad_norm": 0.20876039564609528, "learning_rate": 1.981869688339575e-05, "loss": 0.5353, "step": 2241 }, { "epoch": 0.6217415418746534, "grad_norm": 0.20126941800117493, "learning_rate": 1.9814219910863313e-05, "loss": 0.5549, "step": 2242 }, { "epoch": 0.6220188574597892, "grad_norm": 0.1895267367362976, "learning_rate": 1.9809741511132786e-05, "loss": 0.5427, "step": 2243 }, { "epoch": 0.6222961730449251, "grad_norm": 0.1944306641817093, "learning_rate": 1.980526168507802e-05, "loss": 0.5627, "step": 2244 }, { "epoch": 0.622573488630061, "grad_norm": 0.19707219302654266, "learning_rate": 1.980078043357315e-05, "loss": 0.573, "step": 2245 }, { "epoch": 0.6228508042151969, "grad_norm": 0.1903533786535263, "learning_rate": 1.9796297757492587e-05, "loss": 0.5534, "step": 2246 }, { "epoch": 0.6231281198003328, "grad_norm": 0.1989421546459198, "learning_rate": 1.9791813657711022e-05, "loss": 0.5343, "step": 2247 }, { "epoch": 0.6234054353854687, "grad_norm": 0.1919817328453064, "learning_rate": 1.9787328135103418e-05, "loss": 0.5493, "step": 2248 }, { "epoch": 0.6236827509706045, "grad_norm": 0.18703347444534302, "learning_rate": 1.9782841190545024e-05, "loss": 0.572, "step": 2249 }, { "epoch": 0.6239600665557404, "grad_norm": 0.18850034475326538, "learning_rate": 1.9778352824911356e-05, "loss": 0.5358, "step": 2250 }, { "epoch": 0.6242373821408763, "grad_norm": 0.22748717665672302, "learning_rate": 1.9773863039078217e-05, "loss": 0.5569, "step": 2251 }, { "epoch": 0.6245146977260122, "grad_norm": 0.1906166821718216, "learning_rate": 1.976937183392168e-05, "loss": 0.5226, "step": 2252 }, { "epoch": 0.6247920133111481, "grad_norm": 0.1964375227689743, "learning_rate": 1.9764879210318098e-05, "loss": 0.5467, "step": 2253 }, { "epoch": 0.625069328896284, "grad_norm": 0.27289459109306335, "learning_rate": 1.9760385169144108e-05, "loss": 0.5597, "step": 2254 }, { "epoch": 0.6253466444814199, "grad_norm": 0.1879124641418457, "learning_rate": 1.9755889711276603e-05, "loss": 0.5509, "step": 2255 }, { "epoch": 0.6256239600665557, "grad_norm": 0.1970473676919937, "learning_rate": 1.9751392837592782e-05, "loss": 0.53, "step": 2256 }, { "epoch": 0.6259012756516916, "grad_norm": 0.20250020921230316, "learning_rate": 1.9746894548970092e-05, "loss": 0.5458, "step": 2257 }, { "epoch": 0.6261785912368275, "grad_norm": 0.20085811614990234, "learning_rate": 1.9742394846286277e-05, "loss": 0.5483, "step": 2258 }, { "epoch": 0.6264559068219634, "grad_norm": 0.21487122774124146, "learning_rate": 1.9737893730419337e-05, "loss": 0.5522, "step": 2259 }, { "epoch": 0.6267332224070993, "grad_norm": 0.1989215910434723, "learning_rate": 1.9733391202247577e-05, "loss": 0.5269, "step": 2260 }, { "epoch": 0.6270105379922352, "grad_norm": 0.1842491775751114, "learning_rate": 1.9728887262649536e-05, "loss": 0.543, "step": 2261 }, { "epoch": 0.627287853577371, "grad_norm": 0.1977192461490631, "learning_rate": 1.972438191250407e-05, "loss": 0.5457, "step": 2262 }, { "epoch": 0.6275651691625069, "grad_norm": 0.18922410905361176, "learning_rate": 1.9719875152690288e-05, "loss": 0.5508, "step": 2263 }, { "epoch": 0.6278424847476428, "grad_norm": 0.19666342437267303, "learning_rate": 1.9715366984087575e-05, "loss": 0.5359, "step": 2264 }, { "epoch": 0.6281198003327787, "grad_norm": 0.19078543782234192, "learning_rate": 1.9710857407575595e-05, "loss": 0.5349, "step": 2265 }, { "epoch": 0.6283971159179146, "grad_norm": 0.18407784402370453, "learning_rate": 1.970634642403429e-05, "loss": 0.5554, "step": 2266 }, { "epoch": 0.6286744315030505, "grad_norm": 0.1921215057373047, "learning_rate": 1.9701834034343864e-05, "loss": 0.583, "step": 2267 }, { "epoch": 0.6289517470881864, "grad_norm": 0.2015913873910904, "learning_rate": 1.969732023938481e-05, "loss": 0.5656, "step": 2268 }, { "epoch": 0.6292290626733222, "grad_norm": 0.19959089159965515, "learning_rate": 1.969280504003789e-05, "loss": 0.5404, "step": 2269 }, { "epoch": 0.6295063782584581, "grad_norm": 0.18149082362651825, "learning_rate": 1.968828843718414e-05, "loss": 0.5716, "step": 2270 }, { "epoch": 0.629783693843594, "grad_norm": 0.20897513628005981, "learning_rate": 1.9683770431704857e-05, "loss": 0.5566, "step": 2271 }, { "epoch": 0.6300610094287299, "grad_norm": 0.18247532844543457, "learning_rate": 1.9679251024481636e-05, "loss": 0.5463, "step": 2272 }, { "epoch": 0.6303383250138658, "grad_norm": 0.18694794178009033, "learning_rate": 1.9674730216396334e-05, "loss": 0.5611, "step": 2273 }, { "epoch": 0.6306156405990017, "grad_norm": 0.1819789707660675, "learning_rate": 1.9670208008331073e-05, "loss": 0.5336, "step": 2274 }, { "epoch": 0.6308929561841375, "grad_norm": 0.18136881291866302, "learning_rate": 1.9665684401168258e-05, "loss": 0.5496, "step": 2275 }, { "epoch": 0.6311702717692734, "grad_norm": 0.1916753500699997, "learning_rate": 1.9661159395790563e-05, "loss": 0.56, "step": 2276 }, { "epoch": 0.6314475873544093, "grad_norm": 0.18821988999843597, "learning_rate": 1.965663299308094e-05, "loss": 0.5507, "step": 2277 }, { "epoch": 0.6317249029395452, "grad_norm": 0.2045837789773941, "learning_rate": 1.965210519392261e-05, "loss": 0.5495, "step": 2278 }, { "epoch": 0.6320022185246811, "grad_norm": 0.17228901386260986, "learning_rate": 1.964757599919907e-05, "loss": 0.5505, "step": 2279 }, { "epoch": 0.632279534109817, "grad_norm": 0.19306735694408417, "learning_rate": 1.9643045409794074e-05, "loss": 0.5257, "step": 2280 }, { "epoch": 0.6325568496949528, "grad_norm": 0.19630911946296692, "learning_rate": 1.9638513426591668e-05, "loss": 0.5324, "step": 2281 }, { "epoch": 0.6328341652800887, "grad_norm": 0.19862103462219238, "learning_rate": 1.9633980050476164e-05, "loss": 0.5556, "step": 2282 }, { "epoch": 0.6331114808652246, "grad_norm": 0.20335890352725983, "learning_rate": 1.9629445282332136e-05, "loss": 0.5583, "step": 2283 }, { "epoch": 0.6333887964503605, "grad_norm": 0.19751910865306854, "learning_rate": 1.9624909123044448e-05, "loss": 0.5844, "step": 2284 }, { "epoch": 0.6336661120354964, "grad_norm": 0.18284855782985687, "learning_rate": 1.9620371573498212e-05, "loss": 0.5393, "step": 2285 }, { "epoch": 0.6339434276206323, "grad_norm": 0.18531352281570435, "learning_rate": 1.961583263457884e-05, "loss": 0.5591, "step": 2286 }, { "epoch": 0.6342207432057682, "grad_norm": 0.18705891072750092, "learning_rate": 1.9611292307171987e-05, "loss": 0.5309, "step": 2287 }, { "epoch": 0.634498058790904, "grad_norm": 0.18091407418251038, "learning_rate": 1.9606750592163593e-05, "loss": 0.5041, "step": 2288 }, { "epoch": 0.6347753743760399, "grad_norm": 0.19142916798591614, "learning_rate": 1.960220749043987e-05, "loss": 0.5601, "step": 2289 }, { "epoch": 0.6350526899611758, "grad_norm": 0.18897870182991028, "learning_rate": 1.9597663002887294e-05, "loss": 0.5541, "step": 2290 }, { "epoch": 0.6353300055463117, "grad_norm": 0.19178354740142822, "learning_rate": 1.959311713039262e-05, "loss": 0.5217, "step": 2291 }, { "epoch": 0.6356073211314476, "grad_norm": 0.18749533593654633, "learning_rate": 1.9588569873842864e-05, "loss": 0.5291, "step": 2292 }, { "epoch": 0.6358846367165835, "grad_norm": 0.20121093094348907, "learning_rate": 1.9584021234125323e-05, "loss": 0.5414, "step": 2293 }, { "epoch": 0.6361619523017193, "grad_norm": 0.18628259003162384, "learning_rate": 1.957947121212754e-05, "loss": 0.511, "step": 2294 }, { "epoch": 0.6364392678868552, "grad_norm": 0.18616369366645813, "learning_rate": 1.9574919808737364e-05, "loss": 0.5496, "step": 2295 }, { "epoch": 0.6367165834719911, "grad_norm": 0.20249204337596893, "learning_rate": 1.9570367024842888e-05, "loss": 0.5585, "step": 2296 }, { "epoch": 0.636993899057127, "grad_norm": 0.23987984657287598, "learning_rate": 1.9565812861332477e-05, "loss": 0.5659, "step": 2297 }, { "epoch": 0.6372712146422629, "grad_norm": 0.1872101128101349, "learning_rate": 1.956125731909477e-05, "loss": 0.5419, "step": 2298 }, { "epoch": 0.6375485302273988, "grad_norm": 0.18463543057441711, "learning_rate": 1.955670039901868e-05, "loss": 0.5302, "step": 2299 }, { "epoch": 0.6378258458125347, "grad_norm": 0.20480939745903015, "learning_rate": 1.955214210199338e-05, "loss": 0.5343, "step": 2300 }, { "epoch": 0.6381031613976705, "grad_norm": 0.18215136229991913, "learning_rate": 1.9547582428908306e-05, "loss": 0.546, "step": 2301 }, { "epoch": 0.6383804769828064, "grad_norm": 0.17943714559078217, "learning_rate": 1.954302138065318e-05, "loss": 0.5334, "step": 2302 }, { "epoch": 0.6386577925679423, "grad_norm": 0.19718489050865173, "learning_rate": 1.9538458958117982e-05, "loss": 0.5621, "step": 2303 }, { "epoch": 0.6389351081530782, "grad_norm": 0.19121624529361725, "learning_rate": 1.953389516219296e-05, "loss": 0.5523, "step": 2304 }, { "epoch": 0.6392124237382141, "grad_norm": 0.1928092986345291, "learning_rate": 1.9529329993768634e-05, "loss": 0.5455, "step": 2305 }, { "epoch": 0.63948973932335, "grad_norm": 0.1785450428724289, "learning_rate": 1.952476345373579e-05, "loss": 0.5643, "step": 2306 }, { "epoch": 0.6397670549084858, "grad_norm": 0.17965517938137054, "learning_rate": 1.9520195542985476e-05, "loss": 0.5266, "step": 2307 }, { "epoch": 0.6400443704936217, "grad_norm": 0.21014821529388428, "learning_rate": 1.9515626262409016e-05, "loss": 0.5327, "step": 2308 }, { "epoch": 0.6403216860787576, "grad_norm": 0.18984338641166687, "learning_rate": 1.951105561289799e-05, "loss": 0.5678, "step": 2309 }, { "epoch": 0.6405990016638935, "grad_norm": 0.18631823360919952, "learning_rate": 1.9506483595344267e-05, "loss": 0.5469, "step": 2310 }, { "epoch": 0.6408763172490294, "grad_norm": 0.18727704882621765, "learning_rate": 1.9501910210639958e-05, "loss": 0.5657, "step": 2311 }, { "epoch": 0.6411536328341653, "grad_norm": 0.17807155847549438, "learning_rate": 1.9497335459677458e-05, "loss": 0.5044, "step": 2312 }, { "epoch": 0.6414309484193012, "grad_norm": 0.18843533098697662, "learning_rate": 1.9492759343349415e-05, "loss": 0.553, "step": 2313 }, { "epoch": 0.641708264004437, "grad_norm": 0.1941610723733902, "learning_rate": 1.9488181862548753e-05, "loss": 0.587, "step": 2314 }, { "epoch": 0.6419855795895729, "grad_norm": 0.1894078403711319, "learning_rate": 1.9483603018168666e-05, "loss": 0.5285, "step": 2315 }, { "epoch": 0.6422628951747088, "grad_norm": 0.19420726597309113, "learning_rate": 1.9479022811102604e-05, "loss": 0.5302, "step": 2316 }, { "epoch": 0.6425402107598447, "grad_norm": 0.205157071352005, "learning_rate": 1.9474441242244284e-05, "loss": 0.5539, "step": 2317 }, { "epoch": 0.6428175263449806, "grad_norm": 0.1943119317293167, "learning_rate": 1.9469858312487693e-05, "loss": 0.5465, "step": 2318 }, { "epoch": 0.6430948419301165, "grad_norm": 0.19968454539775848, "learning_rate": 1.946527402272708e-05, "loss": 0.5438, "step": 2319 }, { "epoch": 0.6433721575152523, "grad_norm": 0.18584848940372467, "learning_rate": 1.9460688373856967e-05, "loss": 0.546, "step": 2320 }, { "epoch": 0.6436494731003882, "grad_norm": 0.17821067571640015, "learning_rate": 1.945610136677213e-05, "loss": 0.5148, "step": 2321 }, { "epoch": 0.6439267886855241, "grad_norm": 0.19228345155715942, "learning_rate": 1.945151300236762e-05, "loss": 0.5368, "step": 2322 }, { "epoch": 0.64420410427066, "grad_norm": 0.18330131471157074, "learning_rate": 1.9446923281538747e-05, "loss": 0.5611, "step": 2323 }, { "epoch": 0.6444814198557959, "grad_norm": 0.18893574178218842, "learning_rate": 1.9442332205181086e-05, "loss": 0.54, "step": 2324 }, { "epoch": 0.6447587354409318, "grad_norm": 0.19229231774806976, "learning_rate": 1.943773977419047e-05, "loss": 0.5236, "step": 2325 }, { "epoch": 0.6450360510260676, "grad_norm": 0.20103448629379272, "learning_rate": 1.9433145989463027e-05, "loss": 0.554, "step": 2326 }, { "epoch": 0.6453133666112035, "grad_norm": 0.1895090788602829, "learning_rate": 1.9428550851895098e-05, "loss": 0.5676, "step": 2327 }, { "epoch": 0.6455906821963394, "grad_norm": 0.18887649476528168, "learning_rate": 1.9423954362383334e-05, "loss": 0.5601, "step": 2328 }, { "epoch": 0.6458679977814753, "grad_norm": 0.2108272910118103, "learning_rate": 1.941935652182463e-05, "loss": 0.5748, "step": 2329 }, { "epoch": 0.6461453133666112, "grad_norm": 0.20968154072761536, "learning_rate": 1.941475733111614e-05, "loss": 0.5306, "step": 2330 }, { "epoch": 0.6464226289517471, "grad_norm": 0.18780824542045593, "learning_rate": 1.9410156791155297e-05, "loss": 0.5326, "step": 2331 }, { "epoch": 0.646699944536883, "grad_norm": 0.19030767679214478, "learning_rate": 1.9405554902839778e-05, "loss": 0.5533, "step": 2332 }, { "epoch": 0.6469772601220188, "grad_norm": 0.1849377304315567, "learning_rate": 1.9400951667067542e-05, "loss": 0.571, "step": 2333 }, { "epoch": 0.6472545757071547, "grad_norm": 0.1965888887643814, "learning_rate": 1.9396347084736794e-05, "loss": 0.5591, "step": 2334 }, { "epoch": 0.6475318912922906, "grad_norm": 0.1921495646238327, "learning_rate": 1.9391741156746013e-05, "loss": 0.5723, "step": 2335 }, { "epoch": 0.6478092068774265, "grad_norm": 0.20073464512825012, "learning_rate": 1.9387133883993948e-05, "loss": 0.535, "step": 2336 }, { "epoch": 0.6480865224625624, "grad_norm": 0.18834145367145538, "learning_rate": 1.938252526737958e-05, "loss": 0.5256, "step": 2337 }, { "epoch": 0.6483638380476983, "grad_norm": 0.1929401457309723, "learning_rate": 1.9377915307802192e-05, "loss": 0.5204, "step": 2338 }, { "epoch": 0.6486411536328341, "grad_norm": 0.19703806936740875, "learning_rate": 1.9373304006161298e-05, "loss": 0.5603, "step": 2339 }, { "epoch": 0.64891846921797, "grad_norm": 0.205661803483963, "learning_rate": 1.9368691363356682e-05, "loss": 0.522, "step": 2340 }, { "epoch": 0.6491957848031059, "grad_norm": 0.19013790786266327, "learning_rate": 1.9364077380288408e-05, "loss": 0.549, "step": 2341 }, { "epoch": 0.6494731003882418, "grad_norm": 0.18775691092014313, "learning_rate": 1.935946205785677e-05, "loss": 0.5389, "step": 2342 }, { "epoch": 0.6497504159733777, "grad_norm": 0.18783038854599, "learning_rate": 1.9354845396962353e-05, "loss": 0.5378, "step": 2343 }, { "epoch": 0.6500277315585136, "grad_norm": 0.19113753736019135, "learning_rate": 1.9350227398505976e-05, "loss": 0.5461, "step": 2344 }, { "epoch": 0.6503050471436495, "grad_norm": 0.17977707087993622, "learning_rate": 1.9345608063388742e-05, "loss": 0.5512, "step": 2345 }, { "epoch": 0.6505823627287853, "grad_norm": 0.20450885593891144, "learning_rate": 1.9340987392512006e-05, "loss": 0.5501, "step": 2346 }, { "epoch": 0.6508596783139212, "grad_norm": 0.2484101504087448, "learning_rate": 1.9336365386777376e-05, "loss": 0.5223, "step": 2347 }, { "epoch": 0.6511369938990571, "grad_norm": 0.20386487245559692, "learning_rate": 1.9331742047086743e-05, "loss": 0.5262, "step": 2348 }, { "epoch": 0.651414309484193, "grad_norm": 0.18846935033798218, "learning_rate": 1.9327117374342223e-05, "loss": 0.5347, "step": 2349 }, { "epoch": 0.6516916250693289, "grad_norm": 0.18970006704330444, "learning_rate": 1.932249136944623e-05, "loss": 0.5683, "step": 2350 }, { "epoch": 0.6519689406544648, "grad_norm": 0.1962558776140213, "learning_rate": 1.9317864033301407e-05, "loss": 0.5791, "step": 2351 }, { "epoch": 0.6522462562396006, "grad_norm": 0.20231659710407257, "learning_rate": 1.9313235366810676e-05, "loss": 0.5627, "step": 2352 }, { "epoch": 0.6525235718247365, "grad_norm": 0.1958416849374771, "learning_rate": 1.9308605370877215e-05, "loss": 0.5375, "step": 2353 }, { "epoch": 0.6528008874098724, "grad_norm": 0.20812073349952698, "learning_rate": 1.9303974046404455e-05, "loss": 0.5299, "step": 2354 }, { "epoch": 0.6530782029950083, "grad_norm": 0.1921248584985733, "learning_rate": 1.929934139429609e-05, "loss": 0.5544, "step": 2355 }, { "epoch": 0.6533555185801442, "grad_norm": 0.20390520989894867, "learning_rate": 1.929470741545607e-05, "loss": 0.581, "step": 2356 }, { "epoch": 0.6536328341652801, "grad_norm": 0.18480364978313446, "learning_rate": 1.9290072110788616e-05, "loss": 0.5599, "step": 2357 }, { "epoch": 0.653910149750416, "grad_norm": 0.1953095942735672, "learning_rate": 1.928543548119819e-05, "loss": 0.5961, "step": 2358 }, { "epoch": 0.6541874653355518, "grad_norm": 0.19727711379528046, "learning_rate": 1.9280797527589527e-05, "loss": 0.5585, "step": 2359 }, { "epoch": 0.6544647809206877, "grad_norm": 0.19928644597530365, "learning_rate": 1.927615825086761e-05, "loss": 0.5522, "step": 2360 }, { "epoch": 0.6547420965058236, "grad_norm": 0.1891396939754486, "learning_rate": 1.9271517651937688e-05, "loss": 0.5586, "step": 2361 }, { "epoch": 0.6550194120909595, "grad_norm": 0.1949121057987213, "learning_rate": 1.9266875731705266e-05, "loss": 0.5307, "step": 2362 }, { "epoch": 0.6552967276760954, "grad_norm": 0.21069341897964478, "learning_rate": 1.9262232491076104e-05, "loss": 0.5546, "step": 2363 }, { "epoch": 0.6555740432612313, "grad_norm": 0.18490912020206451, "learning_rate": 1.925758793095622e-05, "loss": 0.5613, "step": 2364 }, { "epoch": 0.6558513588463671, "grad_norm": 0.1878899782896042, "learning_rate": 1.9252942052251892e-05, "loss": 0.5497, "step": 2365 }, { "epoch": 0.656128674431503, "grad_norm": 0.19967246055603027, "learning_rate": 1.9248294855869653e-05, "loss": 0.5327, "step": 2366 }, { "epoch": 0.6564059900166389, "grad_norm": 0.19277790188789368, "learning_rate": 1.9243646342716296e-05, "loss": 0.5545, "step": 2367 }, { "epoch": 0.6566833056017748, "grad_norm": 0.19045887887477875, "learning_rate": 1.9238996513698864e-05, "loss": 0.5071, "step": 2368 }, { "epoch": 0.6569606211869107, "grad_norm": 0.19380688667297363, "learning_rate": 1.923434536972467e-05, "loss": 0.5437, "step": 2369 }, { "epoch": 0.6572379367720466, "grad_norm": 0.18202729523181915, "learning_rate": 1.9229692911701275e-05, "loss": 0.5356, "step": 2370 }, { "epoch": 0.6575152523571824, "grad_norm": 0.18908429145812988, "learning_rate": 1.9225039140536488e-05, "loss": 0.5526, "step": 2371 }, { "epoch": 0.6577925679423183, "grad_norm": 0.21290616691112518, "learning_rate": 1.9220384057138386e-05, "loss": 0.5306, "step": 2372 }, { "epoch": 0.6580698835274542, "grad_norm": 0.18612539768218994, "learning_rate": 1.9215727662415303e-05, "loss": 0.5387, "step": 2373 }, { "epoch": 0.6583471991125901, "grad_norm": 0.182894766330719, "learning_rate": 1.9211069957275822e-05, "loss": 0.5365, "step": 2374 }, { "epoch": 0.658624514697726, "grad_norm": 0.1848146617412567, "learning_rate": 1.920641094262879e-05, "loss": 0.5423, "step": 2375 }, { "epoch": 0.6589018302828619, "grad_norm": 0.18521788716316223, "learning_rate": 1.92017506193833e-05, "loss": 0.5534, "step": 2376 }, { "epoch": 0.6591791458679978, "grad_norm": 0.1895231306552887, "learning_rate": 1.9197088988448703e-05, "loss": 0.5447, "step": 2377 }, { "epoch": 0.6594564614531336, "grad_norm": 0.18669599294662476, "learning_rate": 1.9192426050734608e-05, "loss": 0.5446, "step": 2378 }, { "epoch": 0.6597337770382695, "grad_norm": 0.1867615282535553, "learning_rate": 1.9187761807150878e-05, "loss": 0.5269, "step": 2379 }, { "epoch": 0.6600110926234054, "grad_norm": 0.19190800189971924, "learning_rate": 1.918309625860763e-05, "loss": 0.5833, "step": 2380 }, { "epoch": 0.6602884082085413, "grad_norm": 0.18087397515773773, "learning_rate": 1.917842940601524e-05, "loss": 0.5379, "step": 2381 }, { "epoch": 0.6605657237936772, "grad_norm": 0.18190105259418488, "learning_rate": 1.9173761250284324e-05, "loss": 0.5489, "step": 2382 }, { "epoch": 0.6608430393788131, "grad_norm": 0.19485372304916382, "learning_rate": 1.9169091792325777e-05, "loss": 0.5687, "step": 2383 }, { "epoch": 0.6611203549639489, "grad_norm": 0.19502972066402435, "learning_rate": 1.9164421033050724e-05, "loss": 0.5445, "step": 2384 }, { "epoch": 0.6613976705490848, "grad_norm": 0.18996240198612213, "learning_rate": 1.915974897337056e-05, "loss": 0.5621, "step": 2385 }, { "epoch": 0.6616749861342207, "grad_norm": 0.19751591980457306, "learning_rate": 1.915507561419692e-05, "loss": 0.5468, "step": 2386 }, { "epoch": 0.6619523017193566, "grad_norm": 0.2202579826116562, "learning_rate": 1.915040095644171e-05, "loss": 0.5515, "step": 2387 }, { "epoch": 0.6622296173044925, "grad_norm": 0.1802307367324829, "learning_rate": 1.914572500101707e-05, "loss": 0.5449, "step": 2388 }, { "epoch": 0.6625069328896284, "grad_norm": 0.18632298707962036, "learning_rate": 1.914104774883541e-05, "loss": 0.5612, "step": 2389 }, { "epoch": 0.6627842484747642, "grad_norm": 0.19929082691669464, "learning_rate": 1.9136369200809378e-05, "loss": 0.5789, "step": 2390 }, { "epoch": 0.6630615640599001, "grad_norm": 0.18650726974010468, "learning_rate": 1.913168935785189e-05, "loss": 0.5618, "step": 2391 }, { "epoch": 0.663338879645036, "grad_norm": 0.1912173330783844, "learning_rate": 1.912700822087611e-05, "loss": 0.5487, "step": 2392 }, { "epoch": 0.6636161952301719, "grad_norm": 0.19155828654766083, "learning_rate": 1.912232579079544e-05, "loss": 0.5421, "step": 2393 }, { "epoch": 0.6638935108153078, "grad_norm": 0.21673326194286346, "learning_rate": 1.9117642068523556e-05, "loss": 0.5302, "step": 2394 }, { "epoch": 0.6641708264004437, "grad_norm": 0.1941951960325241, "learning_rate": 1.9112957054974373e-05, "loss": 0.5558, "step": 2395 }, { "epoch": 0.6644481419855796, "grad_norm": 0.18575075268745422, "learning_rate": 1.9108270751062064e-05, "loss": 0.5737, "step": 2396 }, { "epoch": 0.6647254575707154, "grad_norm": 0.19413797557353973, "learning_rate": 1.9103583157701046e-05, "loss": 0.5559, "step": 2397 }, { "epoch": 0.6650027731558513, "grad_norm": 0.19030508399009705, "learning_rate": 1.9098894275805994e-05, "loss": 0.5519, "step": 2398 }, { "epoch": 0.6652800887409872, "grad_norm": 0.18635134398937225, "learning_rate": 1.9094204106291842e-05, "loss": 0.5203, "step": 2399 }, { "epoch": 0.6655574043261231, "grad_norm": 0.1943938434123993, "learning_rate": 1.908951265007375e-05, "loss": 0.5664, "step": 2400 }, { "epoch": 0.665834719911259, "grad_norm": 0.2069421112537384, "learning_rate": 1.9084819908067156e-05, "loss": 0.5561, "step": 2401 }, { "epoch": 0.6661120354963949, "grad_norm": 0.18940883874893188, "learning_rate": 1.9080125881187737e-05, "loss": 0.5833, "step": 2402 }, { "epoch": 0.6663893510815307, "grad_norm": 0.1828288733959198, "learning_rate": 1.907543057035142e-05, "loss": 0.5478, "step": 2403 }, { "epoch": 0.6666666666666666, "grad_norm": 0.18825417757034302, "learning_rate": 1.907073397647439e-05, "loss": 0.5362, "step": 2404 }, { "epoch": 0.6669439822518025, "grad_norm": 0.19599126279354095, "learning_rate": 1.906603610047307e-05, "loss": 0.5425, "step": 2405 }, { "epoch": 0.6672212978369384, "grad_norm": 0.1776115894317627, "learning_rate": 1.9061336943264145e-05, "loss": 0.5395, "step": 2406 }, { "epoch": 0.6674986134220743, "grad_norm": 0.20096100866794586, "learning_rate": 1.905663650576454e-05, "loss": 0.547, "step": 2407 }, { "epoch": 0.6677759290072102, "grad_norm": 0.1988787204027176, "learning_rate": 1.9051934788891443e-05, "loss": 0.5336, "step": 2408 }, { "epoch": 0.668053244592346, "grad_norm": 0.19204209744930267, "learning_rate": 1.9047231793562276e-05, "loss": 0.5449, "step": 2409 }, { "epoch": 0.6683305601774819, "grad_norm": 0.1779128760099411, "learning_rate": 1.904252752069472e-05, "loss": 0.5023, "step": 2410 }, { "epoch": 0.6686078757626178, "grad_norm": 0.2877557575702667, "learning_rate": 1.9037821971206703e-05, "loss": 0.5446, "step": 2411 }, { "epoch": 0.6688851913477537, "grad_norm": 0.19600874185562134, "learning_rate": 1.90331151460164e-05, "loss": 0.5275, "step": 2412 }, { "epoch": 0.6691625069328896, "grad_norm": 0.28246966004371643, "learning_rate": 1.9028407046042246e-05, "loss": 0.5321, "step": 2413 }, { "epoch": 0.6694398225180255, "grad_norm": 0.1964629739522934, "learning_rate": 1.9023697672202905e-05, "loss": 0.5727, "step": 2414 }, { "epoch": 0.6697171381031614, "grad_norm": 0.17945913970470428, "learning_rate": 1.901898702541731e-05, "loss": 0.5237, "step": 2415 }, { "epoch": 0.6699944536882972, "grad_norm": 0.190501868724823, "learning_rate": 1.901427510660463e-05, "loss": 0.5491, "step": 2416 }, { "epoch": 0.6702717692734331, "grad_norm": 0.19791793823242188, "learning_rate": 1.9009561916684282e-05, "loss": 0.5586, "step": 2417 }, { "epoch": 0.670549084858569, "grad_norm": 0.18914659321308136, "learning_rate": 1.900484745657594e-05, "loss": 0.5302, "step": 2418 }, { "epoch": 0.6708264004437049, "grad_norm": 0.1981426477432251, "learning_rate": 1.9000131727199513e-05, "loss": 0.5609, "step": 2419 }, { "epoch": 0.6711037160288408, "grad_norm": 0.1939757615327835, "learning_rate": 1.8995414729475165e-05, "loss": 0.5749, "step": 2420 }, { "epoch": 0.6713810316139767, "grad_norm": 0.19931401312351227, "learning_rate": 1.899069646432332e-05, "loss": 0.547, "step": 2421 }, { "epoch": 0.6716583471991125, "grad_norm": 0.19219137728214264, "learning_rate": 1.898597693266462e-05, "loss": 0.5426, "step": 2422 }, { "epoch": 0.6719356627842484, "grad_norm": 0.199588343501091, "learning_rate": 1.898125613541998e-05, "loss": 0.5411, "step": 2423 }, { "epoch": 0.6722129783693843, "grad_norm": 0.19906532764434814, "learning_rate": 1.897653407351055e-05, "loss": 0.5707, "step": 2424 }, { "epoch": 0.6724902939545202, "grad_norm": 0.18722088634967804, "learning_rate": 1.8971810747857726e-05, "loss": 0.5623, "step": 2425 }, { "epoch": 0.6727676095396561, "grad_norm": 0.18101942539215088, "learning_rate": 1.8967086159383162e-05, "loss": 0.5519, "step": 2426 }, { "epoch": 0.673044925124792, "grad_norm": 0.19272929430007935, "learning_rate": 1.8962360309008746e-05, "loss": 0.5413, "step": 2427 }, { "epoch": 0.6733222407099279, "grad_norm": 0.19695578515529633, "learning_rate": 1.8957633197656615e-05, "loss": 0.5299, "step": 2428 }, { "epoch": 0.6735995562950637, "grad_norm": 0.19021473824977875, "learning_rate": 1.8952904826249158e-05, "loss": 0.5453, "step": 2429 }, { "epoch": 0.6738768718801996, "grad_norm": 0.1905011683702469, "learning_rate": 1.8948175195709e-05, "loss": 0.5458, "step": 2430 }, { "epoch": 0.6741541874653355, "grad_norm": 0.1979636549949646, "learning_rate": 1.8943444306959017e-05, "loss": 0.5379, "step": 2431 }, { "epoch": 0.6744315030504714, "grad_norm": 0.19846384227275848, "learning_rate": 1.8938712160922343e-05, "loss": 0.5164, "step": 2432 }, { "epoch": 0.6747088186356073, "grad_norm": 0.18356280028820038, "learning_rate": 1.893397875852233e-05, "loss": 0.5429, "step": 2433 }, { "epoch": 0.6749861342207432, "grad_norm": 0.1836164891719818, "learning_rate": 1.8929244100682597e-05, "loss": 0.5569, "step": 2434 }, { "epoch": 0.675263449805879, "grad_norm": 0.20087088644504547, "learning_rate": 1.8924508188327e-05, "loss": 0.5676, "step": 2435 }, { "epoch": 0.6755407653910149, "grad_norm": 0.1871204376220703, "learning_rate": 1.891977102237964e-05, "loss": 0.5314, "step": 2436 }, { "epoch": 0.6758180809761508, "grad_norm": 0.19372668862342834, "learning_rate": 1.891503260376487e-05, "loss": 0.543, "step": 2437 }, { "epoch": 0.6760953965612867, "grad_norm": 0.20033282041549683, "learning_rate": 1.891029293340727e-05, "loss": 0.5395, "step": 2438 }, { "epoch": 0.6763727121464226, "grad_norm": 0.1941455453634262, "learning_rate": 1.8905552012231684e-05, "loss": 0.5775, "step": 2439 }, { "epoch": 0.6766500277315585, "grad_norm": 0.18717962503433228, "learning_rate": 1.890080984116319e-05, "loss": 0.5305, "step": 2440 }, { "epoch": 0.6769273433166944, "grad_norm": 0.1877082884311676, "learning_rate": 1.8896066421127106e-05, "loss": 0.5438, "step": 2441 }, { "epoch": 0.6772046589018302, "grad_norm": 0.19558964669704437, "learning_rate": 1.8891321753049008e-05, "loss": 0.5452, "step": 2442 }, { "epoch": 0.6774819744869661, "grad_norm": 0.19550803303718567, "learning_rate": 1.8886575837854696e-05, "loss": 0.5409, "step": 2443 }, { "epoch": 0.677759290072102, "grad_norm": 0.18728572130203247, "learning_rate": 1.888182867647023e-05, "loss": 0.5572, "step": 2444 }, { "epoch": 0.6780366056572379, "grad_norm": 0.184623122215271, "learning_rate": 1.8877080269821906e-05, "loss": 0.565, "step": 2445 }, { "epoch": 0.6783139212423738, "grad_norm": 0.19215163588523865, "learning_rate": 1.8872330618836265e-05, "loss": 0.532, "step": 2446 }, { "epoch": 0.6785912368275097, "grad_norm": 0.19430597126483917, "learning_rate": 1.886757972444009e-05, "loss": 0.5409, "step": 2447 }, { "epoch": 0.6788685524126455, "grad_norm": 0.1996561586856842, "learning_rate": 1.88628275875604e-05, "loss": 0.5514, "step": 2448 }, { "epoch": 0.6791458679977814, "grad_norm": 0.20458458364009857, "learning_rate": 1.8858074209124473e-05, "loss": 0.5566, "step": 2449 }, { "epoch": 0.6794231835829173, "grad_norm": 0.20288583636283875, "learning_rate": 1.885331959005981e-05, "loss": 0.5237, "step": 2450 }, { "epoch": 0.6797004991680532, "grad_norm": 0.18585625290870667, "learning_rate": 1.8848563731294172e-05, "loss": 0.53, "step": 2451 }, { "epoch": 0.6799778147531891, "grad_norm": 0.22308149933815002, "learning_rate": 1.8843806633755544e-05, "loss": 0.5513, "step": 2452 }, { "epoch": 0.680255130338325, "grad_norm": 0.18869346380233765, "learning_rate": 1.8839048298372165e-05, "loss": 0.5527, "step": 2453 }, { "epoch": 0.6805324459234608, "grad_norm": 0.1881789267063141, "learning_rate": 1.8834288726072513e-05, "loss": 0.5368, "step": 2454 }, { "epoch": 0.6808097615085967, "grad_norm": 0.18957830965518951, "learning_rate": 1.882952791778531e-05, "loss": 0.5235, "step": 2455 }, { "epoch": 0.6810870770937326, "grad_norm": 0.1874406933784485, "learning_rate": 1.882476587443951e-05, "loss": 0.5164, "step": 2456 }, { "epoch": 0.6813643926788685, "grad_norm": 0.19134515523910522, "learning_rate": 1.8820002596964316e-05, "loss": 0.5606, "step": 2457 }, { "epoch": 0.6816417082640044, "grad_norm": 0.18497633934020996, "learning_rate": 1.881523808628917e-05, "loss": 0.5681, "step": 2458 }, { "epoch": 0.6819190238491403, "grad_norm": 0.18682947754859924, "learning_rate": 1.881047234334376e-05, "loss": 0.55, "step": 2459 }, { "epoch": 0.6821963394342762, "grad_norm": 0.1982649862766266, "learning_rate": 1.8805705369057993e-05, "loss": 0.5321, "step": 2460 }, { "epoch": 0.682473655019412, "grad_norm": 0.19017384946346283, "learning_rate": 1.880093716436205e-05, "loss": 0.5295, "step": 2461 }, { "epoch": 0.6827509706045479, "grad_norm": 0.20334112644195557, "learning_rate": 1.8796167730186322e-05, "loss": 0.5601, "step": 2462 }, { "epoch": 0.6830282861896838, "grad_norm": 0.1974753588438034, "learning_rate": 1.8791397067461457e-05, "loss": 0.5572, "step": 2463 }, { "epoch": 0.6833056017748197, "grad_norm": 0.17885488271713257, "learning_rate": 1.878662517711834e-05, "loss": 0.5245, "step": 2464 }, { "epoch": 0.6835829173599556, "grad_norm": 0.18409696221351624, "learning_rate": 1.8781852060088083e-05, "loss": 0.5321, "step": 2465 }, { "epoch": 0.6838602329450915, "grad_norm": 0.19201841950416565, "learning_rate": 1.877707771730206e-05, "loss": 0.5563, "step": 2466 }, { "epoch": 0.6841375485302273, "grad_norm": 0.1807066947221756, "learning_rate": 1.8772302149691866e-05, "loss": 0.5253, "step": 2467 }, { "epoch": 0.6844148641153632, "grad_norm": 0.18462277948856354, "learning_rate": 1.8767525358189343e-05, "loss": 0.5315, "step": 2468 }, { "epoch": 0.6846921797004991, "grad_norm": 0.18250201642513275, "learning_rate": 1.876274734372656e-05, "loss": 0.5383, "step": 2469 }, { "epoch": 0.684969495285635, "grad_norm": 0.1986282765865326, "learning_rate": 1.8757968107235853e-05, "loss": 0.5282, "step": 2470 }, { "epoch": 0.6852468108707709, "grad_norm": 0.26469552516937256, "learning_rate": 1.8753187649649757e-05, "loss": 0.5564, "step": 2471 }, { "epoch": 0.6855241264559068, "grad_norm": 0.19594305753707886, "learning_rate": 1.874840597190108e-05, "loss": 0.5546, "step": 2472 }, { "epoch": 0.6858014420410427, "grad_norm": 0.18754026293754578, "learning_rate": 1.8743623074922843e-05, "loss": 0.5309, "step": 2473 }, { "epoch": 0.6860787576261785, "grad_norm": 0.1846148520708084, "learning_rate": 1.873883895964833e-05, "loss": 0.5288, "step": 2474 }, { "epoch": 0.6863560732113144, "grad_norm": 0.18642264604568481, "learning_rate": 1.873405362701104e-05, "loss": 0.5316, "step": 2475 }, { "epoch": 0.6866333887964503, "grad_norm": 0.20203615725040436, "learning_rate": 1.8729267077944717e-05, "loss": 0.5235, "step": 2476 }, { "epoch": 0.6869107043815862, "grad_norm": 0.18540050089359283, "learning_rate": 1.872447931338335e-05, "loss": 0.5488, "step": 2477 }, { "epoch": 0.6871880199667221, "grad_norm": 0.19175854325294495, "learning_rate": 1.8719690334261148e-05, "loss": 0.5529, "step": 2478 }, { "epoch": 0.687465335551858, "grad_norm": 0.18168555200099945, "learning_rate": 1.8714900141512574e-05, "loss": 0.5119, "step": 2479 }, { "epoch": 0.687742651136994, "grad_norm": 0.1855335384607315, "learning_rate": 1.871010873607233e-05, "loss": 0.5448, "step": 2480 }, { "epoch": 0.6880199667221298, "grad_norm": 0.1834007054567337, "learning_rate": 1.870531611887533e-05, "loss": 0.5583, "step": 2481 }, { "epoch": 0.6882972823072657, "grad_norm": 0.1926104575395584, "learning_rate": 1.870052229085675e-05, "loss": 0.5549, "step": 2482 }, { "epoch": 0.6885745978924016, "grad_norm": 0.19106236100196838, "learning_rate": 1.8695727252951995e-05, "loss": 0.5146, "step": 2483 }, { "epoch": 0.6888519134775375, "grad_norm": 0.18813811242580414, "learning_rate": 1.8690931006096695e-05, "loss": 0.5773, "step": 2484 }, { "epoch": 0.6891292290626734, "grad_norm": 0.19836729764938354, "learning_rate": 1.8686133551226735e-05, "loss": 0.5793, "step": 2485 }, { "epoch": 0.6894065446478093, "grad_norm": 0.1816731095314026, "learning_rate": 1.8681334889278217e-05, "loss": 0.5205, "step": 2486 }, { "epoch": 0.6896838602329451, "grad_norm": 0.1861170530319214, "learning_rate": 1.8676535021187495e-05, "loss": 0.5589, "step": 2487 }, { "epoch": 0.689961175818081, "grad_norm": 0.18680687248706818, "learning_rate": 1.867173394789114e-05, "loss": 0.5283, "step": 2488 }, { "epoch": 0.6902384914032169, "grad_norm": 0.19478975236415863, "learning_rate": 1.866693167032598e-05, "loss": 0.5559, "step": 2489 }, { "epoch": 0.6905158069883528, "grad_norm": 0.19333085417747498, "learning_rate": 1.8662128189429058e-05, "loss": 0.516, "step": 2490 }, { "epoch": 0.6907931225734887, "grad_norm": 0.19102855026721954, "learning_rate": 1.8657323506137668e-05, "loss": 0.5587, "step": 2491 }, { "epoch": 0.6910704381586246, "grad_norm": 0.19927440583705902, "learning_rate": 1.8652517621389324e-05, "loss": 0.5475, "step": 2492 }, { "epoch": 0.6913477537437605, "grad_norm": 0.19673167169094086, "learning_rate": 1.8647710536121784e-05, "loss": 0.5504, "step": 2493 }, { "epoch": 0.6916250693288963, "grad_norm": 0.19291207194328308, "learning_rate": 1.8642902251273038e-05, "loss": 0.555, "step": 2494 }, { "epoch": 0.6919023849140322, "grad_norm": 0.22686271369457245, "learning_rate": 1.863809276778131e-05, "loss": 0.5169, "step": 2495 }, { "epoch": 0.6921797004991681, "grad_norm": 0.19432714581489563, "learning_rate": 1.8633282086585057e-05, "loss": 0.5287, "step": 2496 }, { "epoch": 0.692457016084304, "grad_norm": 0.18512091040611267, "learning_rate": 1.8628470208622972e-05, "loss": 0.5452, "step": 2497 }, { "epoch": 0.6927343316694399, "grad_norm": 0.18711940944194794, "learning_rate": 1.8623657134833976e-05, "loss": 0.5489, "step": 2498 }, { "epoch": 0.6930116472545758, "grad_norm": 0.19019177556037903, "learning_rate": 1.8618842866157234e-05, "loss": 0.5188, "step": 2499 }, { "epoch": 0.6932889628397116, "grad_norm": 0.18888919055461884, "learning_rate": 1.861402740353213e-05, "loss": 0.5563, "step": 2500 }, { "epoch": 0.6935662784248475, "grad_norm": 0.19487237930297852, "learning_rate": 1.8609210747898293e-05, "loss": 0.5627, "step": 2501 }, { "epoch": 0.6938435940099834, "grad_norm": 0.18301233649253845, "learning_rate": 1.8604392900195573e-05, "loss": 0.5539, "step": 2502 }, { "epoch": 0.6941209095951193, "grad_norm": 0.18450453877449036, "learning_rate": 1.8599573861364074e-05, "loss": 0.5238, "step": 2503 }, { "epoch": 0.6943982251802552, "grad_norm": 0.19734638929367065, "learning_rate": 1.8594753632344104e-05, "loss": 0.5619, "step": 2504 }, { "epoch": 0.6946755407653911, "grad_norm": 0.1974724531173706, "learning_rate": 1.858993221407622e-05, "loss": 0.5805, "step": 2505 }, { "epoch": 0.694952856350527, "grad_norm": 0.20074540376663208, "learning_rate": 1.858510960750122e-05, "loss": 0.5515, "step": 2506 }, { "epoch": 0.6952301719356628, "grad_norm": 0.18881772458553314, "learning_rate": 1.8580285813560104e-05, "loss": 0.5371, "step": 2507 }, { "epoch": 0.6955074875207987, "grad_norm": 0.19910936057567596, "learning_rate": 1.8575460833194142e-05, "loss": 0.5288, "step": 2508 }, { "epoch": 0.6957848031059346, "grad_norm": 0.18827465176582336, "learning_rate": 1.8570634667344795e-05, "loss": 0.5591, "step": 2509 }, { "epoch": 0.6960621186910705, "grad_norm": 0.18730634450912476, "learning_rate": 1.8565807316953796e-05, "loss": 0.5126, "step": 2510 }, { "epoch": 0.6963394342762064, "grad_norm": 0.18552148342132568, "learning_rate": 1.856097878296307e-05, "loss": 0.5208, "step": 2511 }, { "epoch": 0.6966167498613423, "grad_norm": 0.18370574712753296, "learning_rate": 1.8556149066314803e-05, "loss": 0.5122, "step": 2512 }, { "epoch": 0.6968940654464781, "grad_norm": 0.19410766661167145, "learning_rate": 1.8551318167951403e-05, "loss": 0.5062, "step": 2513 }, { "epoch": 0.697171381031614, "grad_norm": 0.1899997889995575, "learning_rate": 1.85464860888155e-05, "loss": 0.5355, "step": 2514 }, { "epoch": 0.6974486966167499, "grad_norm": 0.2276785969734192, "learning_rate": 1.854165282984996e-05, "loss": 0.5434, "step": 2515 }, { "epoch": 0.6977260122018858, "grad_norm": 0.20629100501537323, "learning_rate": 1.8536818391997884e-05, "loss": 0.5434, "step": 2516 }, { "epoch": 0.6980033277870217, "grad_norm": 0.1940404623746872, "learning_rate": 1.8531982776202598e-05, "loss": 0.5566, "step": 2517 }, { "epoch": 0.6982806433721576, "grad_norm": 0.18856097757816315, "learning_rate": 1.8527145983407658e-05, "loss": 0.5414, "step": 2518 }, { "epoch": 0.6985579589572934, "grad_norm": 0.18844252824783325, "learning_rate": 1.8522308014556843e-05, "loss": 0.5535, "step": 2519 }, { "epoch": 0.6988352745424293, "grad_norm": 0.19054186344146729, "learning_rate": 1.8517468870594188e-05, "loss": 0.5436, "step": 2520 }, { "epoch": 0.6991125901275652, "grad_norm": 0.1948595494031906, "learning_rate": 1.8512628552463917e-05, "loss": 0.5589, "step": 2521 }, { "epoch": 0.6993899057127011, "grad_norm": 0.2020605355501175, "learning_rate": 1.850778706111052e-05, "loss": 0.5586, "step": 2522 }, { "epoch": 0.699667221297837, "grad_norm": 0.1909698247909546, "learning_rate": 1.8502944397478693e-05, "loss": 0.5231, "step": 2523 }, { "epoch": 0.6999445368829729, "grad_norm": 0.19061705470085144, "learning_rate": 1.849810056251337e-05, "loss": 0.5436, "step": 2524 }, { "epoch": 0.7002218524681088, "grad_norm": 0.18788163363933563, "learning_rate": 1.8493255557159704e-05, "loss": 0.5223, "step": 2525 }, { "epoch": 0.7004991680532446, "grad_norm": 0.18889035284519196, "learning_rate": 1.8488409382363095e-05, "loss": 0.5178, "step": 2526 }, { "epoch": 0.7007764836383805, "grad_norm": 0.18843677639961243, "learning_rate": 1.8483562039069157e-05, "loss": 0.5535, "step": 2527 }, { "epoch": 0.7010537992235164, "grad_norm": 0.195985808968544, "learning_rate": 1.847871352822373e-05, "loss": 0.5592, "step": 2528 }, { "epoch": 0.7013311148086523, "grad_norm": 0.1854049116373062, "learning_rate": 1.8473863850772897e-05, "loss": 0.5272, "step": 2529 }, { "epoch": 0.7016084303937882, "grad_norm": 0.17917431890964508, "learning_rate": 1.8469013007662946e-05, "loss": 0.5239, "step": 2530 }, { "epoch": 0.7018857459789241, "grad_norm": 0.18059198558330536, "learning_rate": 1.8464160999840417e-05, "loss": 0.565, "step": 2531 }, { "epoch": 0.7021630615640599, "grad_norm": 0.18871940672397614, "learning_rate": 1.8459307828252052e-05, "loss": 0.5422, "step": 2532 }, { "epoch": 0.7024403771491958, "grad_norm": 0.1852055937051773, "learning_rate": 1.845445349384485e-05, "loss": 0.5211, "step": 2533 }, { "epoch": 0.7027176927343317, "grad_norm": 0.1867264062166214, "learning_rate": 1.8449597997566005e-05, "loss": 0.5318, "step": 2534 }, { "epoch": 0.7029950083194676, "grad_norm": 0.1850994974374771, "learning_rate": 1.844474134036296e-05, "loss": 0.5375, "step": 2535 }, { "epoch": 0.7032723239046035, "grad_norm": 0.188653826713562, "learning_rate": 1.8439883523183377e-05, "loss": 0.5223, "step": 2536 }, { "epoch": 0.7035496394897394, "grad_norm": 0.19549886882305145, "learning_rate": 1.8435024546975142e-05, "loss": 0.5946, "step": 2537 }, { "epoch": 0.7038269550748752, "grad_norm": 0.19603100419044495, "learning_rate": 1.8430164412686375e-05, "loss": 0.5579, "step": 2538 }, { "epoch": 0.7041042706600111, "grad_norm": 0.27463892102241516, "learning_rate": 1.8425303121265414e-05, "loss": 0.561, "step": 2539 }, { "epoch": 0.704381586245147, "grad_norm": 0.1948249191045761, "learning_rate": 1.842044067366082e-05, "loss": 0.5715, "step": 2540 }, { "epoch": 0.7046589018302829, "grad_norm": 0.191276416182518, "learning_rate": 1.8415577070821398e-05, "loss": 0.5448, "step": 2541 }, { "epoch": 0.7049362174154188, "grad_norm": 0.19777309894561768, "learning_rate": 1.841071231369616e-05, "loss": 0.5333, "step": 2542 }, { "epoch": 0.7052135330005547, "grad_norm": 0.1844799816608429, "learning_rate": 1.8405846403234346e-05, "loss": 0.5455, "step": 2543 }, { "epoch": 0.7054908485856906, "grad_norm": 0.17811760306358337, "learning_rate": 1.840097934038543e-05, "loss": 0.5307, "step": 2544 }, { "epoch": 0.7057681641708264, "grad_norm": 0.1874844878911972, "learning_rate": 1.8396111126099094e-05, "loss": 0.5755, "step": 2545 }, { "epoch": 0.7060454797559623, "grad_norm": 0.20174047350883484, "learning_rate": 1.839124176132527e-05, "loss": 0.5422, "step": 2546 }, { "epoch": 0.7063227953410982, "grad_norm": 0.1862955093383789, "learning_rate": 1.838637124701409e-05, "loss": 0.5469, "step": 2547 }, { "epoch": 0.7066001109262341, "grad_norm": 0.18790222704410553, "learning_rate": 1.8381499584115924e-05, "loss": 0.5472, "step": 2548 }, { "epoch": 0.70687742651137, "grad_norm": 0.17207522690296173, "learning_rate": 1.8376626773581358e-05, "loss": 0.5074, "step": 2549 }, { "epoch": 0.7071547420965059, "grad_norm": 0.19320419430732727, "learning_rate": 1.8371752816361215e-05, "loss": 0.5416, "step": 2550 }, { "epoch": 0.7074320576816417, "grad_norm": 0.19113574922084808, "learning_rate": 1.8366877713406526e-05, "loss": 0.533, "step": 2551 }, { "epoch": 0.7077093732667776, "grad_norm": 0.19141121208667755, "learning_rate": 1.8362001465668554e-05, "loss": 0.5435, "step": 2552 }, { "epoch": 0.7079866888519135, "grad_norm": 0.17392635345458984, "learning_rate": 1.8357124074098788e-05, "loss": 0.5126, "step": 2553 }, { "epoch": 0.7082640044370494, "grad_norm": 0.18224339187145233, "learning_rate": 1.8352245539648933e-05, "loss": 0.5369, "step": 2554 }, { "epoch": 0.7085413200221853, "grad_norm": 0.193458691239357, "learning_rate": 1.834736586327092e-05, "loss": 0.5581, "step": 2555 }, { "epoch": 0.7088186356073212, "grad_norm": 0.19396451115608215, "learning_rate": 1.8342485045916902e-05, "loss": 0.546, "step": 2556 }, { "epoch": 0.709095951192457, "grad_norm": 0.2662739157676697, "learning_rate": 1.8337603088539263e-05, "loss": 0.5557, "step": 2557 }, { "epoch": 0.7093732667775929, "grad_norm": 0.18841521441936493, "learning_rate": 1.8332719992090592e-05, "loss": 0.5359, "step": 2558 }, { "epoch": 0.7096505823627288, "grad_norm": 0.20101507008075714, "learning_rate": 1.8327835757523716e-05, "loss": 0.54, "step": 2559 }, { "epoch": 0.7099278979478647, "grad_norm": 0.17808422446250916, "learning_rate": 1.832295038579168e-05, "loss": 0.5263, "step": 2560 }, { "epoch": 0.7102052135330006, "grad_norm": 0.19543784856796265, "learning_rate": 1.8318063877847747e-05, "loss": 0.5342, "step": 2561 }, { "epoch": 0.7104825291181365, "grad_norm": 0.1944831758737564, "learning_rate": 1.8313176234645406e-05, "loss": 0.4973, "step": 2562 }, { "epoch": 0.7107598447032724, "grad_norm": 0.19557087123394012, "learning_rate": 1.8308287457138362e-05, "loss": 0.533, "step": 2563 }, { "epoch": 0.7110371602884082, "grad_norm": 0.19200831651687622, "learning_rate": 1.8303397546280547e-05, "loss": 0.5417, "step": 2564 }, { "epoch": 0.7113144758735441, "grad_norm": 0.1837347149848938, "learning_rate": 1.829850650302612e-05, "loss": 0.5594, "step": 2565 }, { "epoch": 0.71159179145868, "grad_norm": 0.1859401911497116, "learning_rate": 1.8293614328329437e-05, "loss": 0.5249, "step": 2566 }, { "epoch": 0.7118691070438159, "grad_norm": 0.18670018017292023, "learning_rate": 1.8288721023145105e-05, "loss": 0.5339, "step": 2567 }, { "epoch": 0.7121464226289518, "grad_norm": 0.19364280998706818, "learning_rate": 1.8283826588427927e-05, "loss": 0.5729, "step": 2568 }, { "epoch": 0.7124237382140877, "grad_norm": 0.19787278771400452, "learning_rate": 1.827893102513295e-05, "loss": 0.5544, "step": 2569 }, { "epoch": 0.7127010537992235, "grad_norm": 0.19540858268737793, "learning_rate": 1.827403433421541e-05, "loss": 0.5233, "step": 2570 }, { "epoch": 0.7129783693843594, "grad_norm": 0.20480972528457642, "learning_rate": 1.8269136516630798e-05, "loss": 0.545, "step": 2571 }, { "epoch": 0.7132556849694953, "grad_norm": 0.20005930960178375, "learning_rate": 1.82642375733348e-05, "loss": 0.561, "step": 2572 }, { "epoch": 0.7135330005546312, "grad_norm": 0.18213188648223877, "learning_rate": 1.825933750528333e-05, "loss": 0.5277, "step": 2573 }, { "epoch": 0.7138103161397671, "grad_norm": 0.19505798816680908, "learning_rate": 1.8254436313432522e-05, "loss": 0.5283, "step": 2574 }, { "epoch": 0.714087631724903, "grad_norm": 0.1885930299758911, "learning_rate": 1.824953399873873e-05, "loss": 0.5189, "step": 2575 }, { "epoch": 0.7143649473100389, "grad_norm": 0.19202375411987305, "learning_rate": 1.824463056215852e-05, "loss": 0.5197, "step": 2576 }, { "epoch": 0.7146422628951747, "grad_norm": 0.19426311552524567, "learning_rate": 1.823972600464869e-05, "loss": 0.5243, "step": 2577 }, { "epoch": 0.7149195784803106, "grad_norm": 0.18350745737552643, "learning_rate": 1.8234820327166244e-05, "loss": 0.5283, "step": 2578 }, { "epoch": 0.7151968940654465, "grad_norm": 0.1888923943042755, "learning_rate": 1.822991353066841e-05, "loss": 0.5298, "step": 2579 }, { "epoch": 0.7154742096505824, "grad_norm": 0.19120195508003235, "learning_rate": 1.8225005616112636e-05, "loss": 0.5583, "step": 2580 }, { "epoch": 0.7157515252357183, "grad_norm": 0.18390871584415436, "learning_rate": 1.8220096584456587e-05, "loss": 0.5174, "step": 2581 }, { "epoch": 0.7160288408208542, "grad_norm": 0.19268232583999634, "learning_rate": 1.8215186436658142e-05, "loss": 0.5706, "step": 2582 }, { "epoch": 0.71630615640599, "grad_norm": 0.2088870108127594, "learning_rate": 1.82102751736754e-05, "loss": 0.5467, "step": 2583 }, { "epoch": 0.7165834719911259, "grad_norm": 0.18982863426208496, "learning_rate": 1.8205362796466682e-05, "loss": 0.547, "step": 2584 }, { "epoch": 0.7168607875762618, "grad_norm": 0.20448660850524902, "learning_rate": 1.820044930599052e-05, "loss": 0.5523, "step": 2585 }, { "epoch": 0.7171381031613977, "grad_norm": 0.19304388761520386, "learning_rate": 1.8195534703205674e-05, "loss": 0.5766, "step": 2586 }, { "epoch": 0.7174154187465336, "grad_norm": 0.20076531171798706, "learning_rate": 1.81906189890711e-05, "loss": 0.5327, "step": 2587 }, { "epoch": 0.7176927343316695, "grad_norm": 0.24745801091194153, "learning_rate": 1.8185702164546e-05, "loss": 0.5386, "step": 2588 }, { "epoch": 0.7179700499168054, "grad_norm": 0.1958095282316208, "learning_rate": 1.8180784230589758e-05, "loss": 0.5592, "step": 2589 }, { "epoch": 0.7182473655019412, "grad_norm": 0.1962784081697464, "learning_rate": 1.8175865188162007e-05, "loss": 0.5547, "step": 2590 }, { "epoch": 0.7185246810870771, "grad_norm": 0.21214796602725983, "learning_rate": 1.8170945038222577e-05, "loss": 0.5789, "step": 2591 }, { "epoch": 0.718801996672213, "grad_norm": 0.18657919764518738, "learning_rate": 1.8166023781731523e-05, "loss": 0.5909, "step": 2592 }, { "epoch": 0.7190793122573489, "grad_norm": 0.1949455291032791, "learning_rate": 1.816110141964911e-05, "loss": 0.5804, "step": 2593 }, { "epoch": 0.7193566278424848, "grad_norm": 0.20440496504306793, "learning_rate": 1.8156177952935824e-05, "loss": 0.5819, "step": 2594 }, { "epoch": 0.7196339434276207, "grad_norm": 0.19375431537628174, "learning_rate": 1.815125338255236e-05, "loss": 0.5707, "step": 2595 }, { "epoch": 0.7199112590127565, "grad_norm": 0.2573017477989197, "learning_rate": 1.8146327709459635e-05, "loss": 0.5622, "step": 2596 }, { "epoch": 0.7201885745978924, "grad_norm": 0.19676506519317627, "learning_rate": 1.8141400934618775e-05, "loss": 0.5668, "step": 2597 }, { "epoch": 0.7204658901830283, "grad_norm": 0.19473743438720703, "learning_rate": 1.8136473058991126e-05, "loss": 0.5654, "step": 2598 }, { "epoch": 0.7207432057681642, "grad_norm": 0.18709680438041687, "learning_rate": 1.8131544083538253e-05, "loss": 0.5283, "step": 2599 }, { "epoch": 0.7210205213533001, "grad_norm": 0.19263465702533722, "learning_rate": 1.812661400922192e-05, "loss": 0.5379, "step": 2600 }, { "epoch": 0.721297836938436, "grad_norm": 0.19263778626918793, "learning_rate": 1.8121682837004118e-05, "loss": 0.5678, "step": 2601 }, { "epoch": 0.7215751525235718, "grad_norm": 0.19861197471618652, "learning_rate": 1.8116750567847058e-05, "loss": 0.5456, "step": 2602 }, { "epoch": 0.7218524681087077, "grad_norm": 0.1754927933216095, "learning_rate": 1.8111817202713143e-05, "loss": 0.5164, "step": 2603 }, { "epoch": 0.7221297836938436, "grad_norm": 0.19112901389598846, "learning_rate": 1.8106882742565008e-05, "loss": 0.5362, "step": 2604 }, { "epoch": 0.7224070992789795, "grad_norm": 0.1980351209640503, "learning_rate": 1.8101947188365503e-05, "loss": 0.5687, "step": 2605 }, { "epoch": 0.7226844148641154, "grad_norm": 0.19299866259098053, "learning_rate": 1.8097010541077678e-05, "loss": 0.5589, "step": 2606 }, { "epoch": 0.7229617304492513, "grad_norm": 0.19257931411266327, "learning_rate": 1.809207280166481e-05, "loss": 0.5701, "step": 2607 }, { "epoch": 0.7232390460343872, "grad_norm": 0.1912074089050293, "learning_rate": 1.8087133971090374e-05, "loss": 0.544, "step": 2608 }, { "epoch": 0.723516361619523, "grad_norm": 0.19741860032081604, "learning_rate": 1.808219405031808e-05, "loss": 0.5527, "step": 2609 }, { "epoch": 0.7237936772046589, "grad_norm": 0.18676535785198212, "learning_rate": 1.807725304031182e-05, "loss": 0.5539, "step": 2610 }, { "epoch": 0.7240709927897948, "grad_norm": 0.17912089824676514, "learning_rate": 1.807231094203573e-05, "loss": 0.5342, "step": 2611 }, { "epoch": 0.7243483083749307, "grad_norm": 0.18593581020832062, "learning_rate": 1.806736775645414e-05, "loss": 0.5366, "step": 2612 }, { "epoch": 0.7246256239600666, "grad_norm": 0.20467206835746765, "learning_rate": 1.8062423484531592e-05, "loss": 0.527, "step": 2613 }, { "epoch": 0.7249029395452025, "grad_norm": 0.18463392555713654, "learning_rate": 1.8057478127232854e-05, "loss": 0.545, "step": 2614 }, { "epoch": 0.7251802551303383, "grad_norm": 0.20011630654335022, "learning_rate": 1.805253168552289e-05, "loss": 0.5242, "step": 2615 }, { "epoch": 0.7254575707154742, "grad_norm": 0.18936677277088165, "learning_rate": 1.804758416036688e-05, "loss": 0.5285, "step": 2616 }, { "epoch": 0.7257348863006101, "grad_norm": 0.19428247213363647, "learning_rate": 1.804263555273022e-05, "loss": 0.5448, "step": 2617 }, { "epoch": 0.726012201885746, "grad_norm": 0.18848338723182678, "learning_rate": 1.8037685863578514e-05, "loss": 0.5673, "step": 2618 }, { "epoch": 0.7262895174708819, "grad_norm": 0.19170448184013367, "learning_rate": 1.803273509387758e-05, "loss": 0.5234, "step": 2619 }, { "epoch": 0.7265668330560178, "grad_norm": 0.18249256908893585, "learning_rate": 1.8027783244593443e-05, "loss": 0.5377, "step": 2620 }, { "epoch": 0.7268441486411537, "grad_norm": 0.18726296722888947, "learning_rate": 1.8022830316692336e-05, "loss": 0.5381, "step": 2621 }, { "epoch": 0.7271214642262895, "grad_norm": 0.1881718784570694, "learning_rate": 1.801787631114071e-05, "loss": 0.5453, "step": 2622 }, { "epoch": 0.7273987798114254, "grad_norm": 0.19469492137432098, "learning_rate": 1.8012921228905225e-05, "loss": 0.5231, "step": 2623 }, { "epoch": 0.7276760953965613, "grad_norm": 0.18475371599197388, "learning_rate": 1.8007965070952743e-05, "loss": 0.5323, "step": 2624 }, { "epoch": 0.7279534109816972, "grad_norm": 0.18544836342334747, "learning_rate": 1.8003007838250343e-05, "loss": 0.5345, "step": 2625 }, { "epoch": 0.7282307265668331, "grad_norm": 0.19587865471839905, "learning_rate": 1.799804953176532e-05, "loss": 0.5168, "step": 2626 }, { "epoch": 0.728508042151969, "grad_norm": 0.23171131312847137, "learning_rate": 1.7993090152465163e-05, "loss": 0.5235, "step": 2627 }, { "epoch": 0.7287853577371048, "grad_norm": 0.19391484558582306, "learning_rate": 1.7988129701317582e-05, "loss": 0.5345, "step": 2628 }, { "epoch": 0.7290626733222407, "grad_norm": 0.19040954113006592, "learning_rate": 1.7983168179290488e-05, "loss": 0.5226, "step": 2629 }, { "epoch": 0.7293399889073766, "grad_norm": 0.1814422607421875, "learning_rate": 1.797820558735201e-05, "loss": 0.5291, "step": 2630 }, { "epoch": 0.7296173044925125, "grad_norm": 0.19977906346321106, "learning_rate": 1.797324192647048e-05, "loss": 0.5641, "step": 2631 }, { "epoch": 0.7298946200776484, "grad_norm": 0.19043037295341492, "learning_rate": 1.796827719761444e-05, "loss": 0.5686, "step": 2632 }, { "epoch": 0.7301719356627843, "grad_norm": 0.19778837263584137, "learning_rate": 1.7963311401752638e-05, "loss": 0.5648, "step": 2633 }, { "epoch": 0.7304492512479202, "grad_norm": 0.45009469985961914, "learning_rate": 1.7958344539854034e-05, "loss": 0.5244, "step": 2634 }, { "epoch": 0.730726566833056, "grad_norm": 0.1936669498682022, "learning_rate": 1.7953376612887793e-05, "loss": 0.5296, "step": 2635 }, { "epoch": 0.7310038824181919, "grad_norm": 0.19970230758190155, "learning_rate": 1.7948407621823287e-05, "loss": 0.5832, "step": 2636 }, { "epoch": 0.7312811980033278, "grad_norm": 0.19142326712608337, "learning_rate": 1.794343756763011e-05, "loss": 0.5478, "step": 2637 }, { "epoch": 0.7315585135884637, "grad_norm": 0.18312163650989532, "learning_rate": 1.7938466451278034e-05, "loss": 0.5382, "step": 2638 }, { "epoch": 0.7318358291735996, "grad_norm": 0.19482283294200897, "learning_rate": 1.793349427373707e-05, "loss": 0.5392, "step": 2639 }, { "epoch": 0.7321131447587355, "grad_norm": 0.20726899802684784, "learning_rate": 1.7928521035977413e-05, "loss": 0.5597, "step": 2640 }, { "epoch": 0.7323904603438713, "grad_norm": 0.20082899928092957, "learning_rate": 1.7923546738969478e-05, "loss": 0.5293, "step": 2641 }, { "epoch": 0.7326677759290072, "grad_norm": 0.19428935647010803, "learning_rate": 1.791857138368388e-05, "loss": 0.5428, "step": 2642 }, { "epoch": 0.7329450915141431, "grad_norm": 0.19222451746463776, "learning_rate": 1.791359497109144e-05, "loss": 0.5417, "step": 2643 }, { "epoch": 0.733222407099279, "grad_norm": 0.1904270201921463, "learning_rate": 1.7908617502163188e-05, "loss": 0.5368, "step": 2644 }, { "epoch": 0.7334997226844149, "grad_norm": 0.1973213404417038, "learning_rate": 1.7903638977870372e-05, "loss": 0.5347, "step": 2645 }, { "epoch": 0.7337770382695508, "grad_norm": 0.1838080883026123, "learning_rate": 1.7898659399184415e-05, "loss": 0.5239, "step": 2646 }, { "epoch": 0.7340543538546866, "grad_norm": 0.18665340542793274, "learning_rate": 1.7893678767076982e-05, "loss": 0.5469, "step": 2647 }, { "epoch": 0.7343316694398225, "grad_norm": 0.18644295632839203, "learning_rate": 1.788869708251991e-05, "loss": 0.5423, "step": 2648 }, { "epoch": 0.7346089850249584, "grad_norm": 0.18912896513938904, "learning_rate": 1.788371434648528e-05, "loss": 0.5289, "step": 2649 }, { "epoch": 0.7348863006100943, "grad_norm": 0.1896572709083557, "learning_rate": 1.7878730559945327e-05, "loss": 0.544, "step": 2650 }, { "epoch": 0.7351636161952302, "grad_norm": 0.18456673622131348, "learning_rate": 1.7873745723872545e-05, "loss": 0.5437, "step": 2651 }, { "epoch": 0.7354409317803661, "grad_norm": 0.18309368193149567, "learning_rate": 1.7868759839239596e-05, "loss": 0.5079, "step": 2652 }, { "epoch": 0.735718247365502, "grad_norm": 0.18934939801692963, "learning_rate": 1.7863772907019356e-05, "loss": 0.5473, "step": 2653 }, { "epoch": 0.7359955629506378, "grad_norm": 0.19800709187984467, "learning_rate": 1.7858784928184916e-05, "loss": 0.5386, "step": 2654 }, { "epoch": 0.7362728785357737, "grad_norm": 0.20616304874420166, "learning_rate": 1.7853795903709556e-05, "loss": 0.5388, "step": 2655 }, { "epoch": 0.7365501941209096, "grad_norm": 0.18855001032352448, "learning_rate": 1.7848805834566768e-05, "loss": 0.5499, "step": 2656 }, { "epoch": 0.7368275097060455, "grad_norm": 0.18595397472381592, "learning_rate": 1.7843814721730244e-05, "loss": 0.5599, "step": 2657 }, { "epoch": 0.7371048252911814, "grad_norm": 0.1929646134376526, "learning_rate": 1.7838822566173894e-05, "loss": 0.5412, "step": 2658 }, { "epoch": 0.7373821408763173, "grad_norm": 0.19367007911205292, "learning_rate": 1.7833829368871808e-05, "loss": 0.5328, "step": 2659 }, { "epoch": 0.7376594564614531, "grad_norm": 0.1873459815979004, "learning_rate": 1.7828835130798296e-05, "loss": 0.5444, "step": 2660 }, { "epoch": 0.737936772046589, "grad_norm": 0.20193496346473694, "learning_rate": 1.7823839852927867e-05, "loss": 0.5287, "step": 2661 }, { "epoch": 0.7382140876317249, "grad_norm": 0.18771541118621826, "learning_rate": 1.7818843536235224e-05, "loss": 0.5404, "step": 2662 }, { "epoch": 0.7384914032168608, "grad_norm": 0.1829247921705246, "learning_rate": 1.781384618169529e-05, "loss": 0.5512, "step": 2663 }, { "epoch": 0.7387687188019967, "grad_norm": 0.2635699212551117, "learning_rate": 1.7808847790283183e-05, "loss": 0.5678, "step": 2664 }, { "epoch": 0.7390460343871326, "grad_norm": 0.1860508918762207, "learning_rate": 1.780384836297421e-05, "loss": 0.5348, "step": 2665 }, { "epoch": 0.7393233499722685, "grad_norm": 0.18465931713581085, "learning_rate": 1.7798847900743904e-05, "loss": 0.5043, "step": 2666 }, { "epoch": 0.7396006655574043, "grad_norm": 0.18881580233573914, "learning_rate": 1.779384640456798e-05, "loss": 0.5251, "step": 2667 }, { "epoch": 0.7398779811425402, "grad_norm": 0.18016085028648376, "learning_rate": 1.7788843875422367e-05, "loss": 0.5585, "step": 2668 }, { "epoch": 0.7401552967276761, "grad_norm": 0.19220809638500214, "learning_rate": 1.7783840314283183e-05, "loss": 0.5263, "step": 2669 }, { "epoch": 0.740432612312812, "grad_norm": 0.18954598903656006, "learning_rate": 1.7778835722126764e-05, "loss": 0.542, "step": 2670 }, { "epoch": 0.7407099278979479, "grad_norm": 0.18674500286579132, "learning_rate": 1.7773830099929635e-05, "loss": 0.5247, "step": 2671 }, { "epoch": 0.7409872434830838, "grad_norm": 0.18231706321239471, "learning_rate": 1.776882344866853e-05, "loss": 0.5115, "step": 2672 }, { "epoch": 0.7412645590682196, "grad_norm": 0.18846355378627777, "learning_rate": 1.776381576932037e-05, "loss": 0.5186, "step": 2673 }, { "epoch": 0.7415418746533555, "grad_norm": 0.18721552193164825, "learning_rate": 1.7758807062862292e-05, "loss": 0.5313, "step": 2674 }, { "epoch": 0.7418191902384914, "grad_norm": 0.191980242729187, "learning_rate": 1.775379733027163e-05, "loss": 0.5505, "step": 2675 }, { "epoch": 0.7420965058236273, "grad_norm": 0.18773001432418823, "learning_rate": 1.7748786572525907e-05, "loss": 0.5334, "step": 2676 }, { "epoch": 0.7423738214087632, "grad_norm": 0.19638672471046448, "learning_rate": 1.7743774790602864e-05, "loss": 0.5718, "step": 2677 }, { "epoch": 0.7426511369938991, "grad_norm": 0.19333893060684204, "learning_rate": 1.7738761985480425e-05, "loss": 0.5479, "step": 2678 }, { "epoch": 0.742928452579035, "grad_norm": 0.20004448294639587, "learning_rate": 1.7733748158136725e-05, "loss": 0.5331, "step": 2679 }, { "epoch": 0.7432057681641708, "grad_norm": 0.19896887242794037, "learning_rate": 1.7728733309550097e-05, "loss": 0.5484, "step": 2680 }, { "epoch": 0.7434830837493067, "grad_norm": 0.1829969435930252, "learning_rate": 1.7723717440699066e-05, "loss": 0.5459, "step": 2681 }, { "epoch": 0.7437603993344426, "grad_norm": 0.1870088130235672, "learning_rate": 1.771870055256236e-05, "loss": 0.5628, "step": 2682 }, { "epoch": 0.7440377149195785, "grad_norm": 0.18835529685020447, "learning_rate": 1.7713682646118914e-05, "loss": 0.5439, "step": 2683 }, { "epoch": 0.7443150305047144, "grad_norm": 0.20633459091186523, "learning_rate": 1.7708663722347845e-05, "loss": 0.5677, "step": 2684 }, { "epoch": 0.7445923460898503, "grad_norm": 0.18712858855724335, "learning_rate": 1.7703643782228488e-05, "loss": 0.5666, "step": 2685 }, { "epoch": 0.7448696616749861, "grad_norm": 0.18475639820098877, "learning_rate": 1.769862282674036e-05, "loss": 0.5548, "step": 2686 }, { "epoch": 0.745146977260122, "grad_norm": 0.19311586022377014, "learning_rate": 1.769360085686318e-05, "loss": 0.5428, "step": 2687 }, { "epoch": 0.7454242928452579, "grad_norm": 0.19635801017284393, "learning_rate": 1.7688577873576872e-05, "loss": 0.5228, "step": 2688 }, { "epoch": 0.7457016084303938, "grad_norm": 0.18878091871738434, "learning_rate": 1.7683553877861554e-05, "loss": 0.4995, "step": 2689 }, { "epoch": 0.7459789240155297, "grad_norm": 0.1772637963294983, "learning_rate": 1.7678528870697537e-05, "loss": 0.5241, "step": 2690 }, { "epoch": 0.7462562396006656, "grad_norm": 0.18778811395168304, "learning_rate": 1.7673502853065335e-05, "loss": 0.5247, "step": 2691 }, { "epoch": 0.7465335551858014, "grad_norm": 0.20334573090076447, "learning_rate": 1.7668475825945656e-05, "loss": 0.5369, "step": 2692 }, { "epoch": 0.7468108707709373, "grad_norm": 0.18709522485733032, "learning_rate": 1.766344779031941e-05, "loss": 0.5701, "step": 2693 }, { "epoch": 0.7470881863560732, "grad_norm": 0.18577025830745697, "learning_rate": 1.7658418747167694e-05, "loss": 0.5409, "step": 2694 }, { "epoch": 0.7473655019412091, "grad_norm": 0.19616863131523132, "learning_rate": 1.765338869747181e-05, "loss": 0.5533, "step": 2695 }, { "epoch": 0.747642817526345, "grad_norm": 0.20094148814678192, "learning_rate": 1.764835764221326e-05, "loss": 0.5252, "step": 2696 }, { "epoch": 0.7479201331114809, "grad_norm": 0.19073578715324402, "learning_rate": 1.7643325582373728e-05, "loss": 0.52, "step": 2697 }, { "epoch": 0.7481974486966168, "grad_norm": 0.19425810873508453, "learning_rate": 1.7638292518935103e-05, "loss": 0.5612, "step": 2698 }, { "epoch": 0.7484747642817526, "grad_norm": 0.1896180808544159, "learning_rate": 1.7633258452879475e-05, "loss": 0.5504, "step": 2699 }, { "epoch": 0.7487520798668885, "grad_norm": 0.19124028086662292, "learning_rate": 1.762822338518912e-05, "loss": 0.5335, "step": 2700 }, { "epoch": 0.7490293954520244, "grad_norm": 0.19054248929023743, "learning_rate": 1.762318731684651e-05, "loss": 0.552, "step": 2701 }, { "epoch": 0.7493067110371603, "grad_norm": 0.18848967552185059, "learning_rate": 1.761815024883432e-05, "loss": 0.557, "step": 2702 }, { "epoch": 0.7495840266222962, "grad_norm": 0.1953321397304535, "learning_rate": 1.7613112182135406e-05, "loss": 0.5916, "step": 2703 }, { "epoch": 0.7498613422074321, "grad_norm": 0.18123508989810944, "learning_rate": 1.7608073117732848e-05, "loss": 0.5643, "step": 2704 }, { "epoch": 0.7501386577925679, "grad_norm": 0.19151508808135986, "learning_rate": 1.760303305660988e-05, "loss": 0.5512, "step": 2705 }, { "epoch": 0.7504159733777038, "grad_norm": 0.18512828648090363, "learning_rate": 1.7597991999749967e-05, "loss": 0.5627, "step": 2706 }, { "epoch": 0.7506932889628397, "grad_norm": 0.20286305248737335, "learning_rate": 1.7592949948136737e-05, "loss": 0.568, "step": 2707 }, { "epoch": 0.7509706045479756, "grad_norm": 0.20783564448356628, "learning_rate": 1.758790690275405e-05, "loss": 0.5563, "step": 2708 }, { "epoch": 0.7512479201331115, "grad_norm": 0.18659134209156036, "learning_rate": 1.7582862864585913e-05, "loss": 0.5285, "step": 2709 }, { "epoch": 0.7515252357182474, "grad_norm": 0.17614908516407013, "learning_rate": 1.757781783461657e-05, "loss": 0.4952, "step": 2710 }, { "epoch": 0.7518025513033832, "grad_norm": 0.18984104692935944, "learning_rate": 1.757277181383043e-05, "loss": 0.5564, "step": 2711 }, { "epoch": 0.7520798668885191, "grad_norm": 0.19199056923389435, "learning_rate": 1.756772480321211e-05, "loss": 0.5815, "step": 2712 }, { "epoch": 0.752357182473655, "grad_norm": 0.5123929381370544, "learning_rate": 1.7562676803746414e-05, "loss": 0.537, "step": 2713 }, { "epoch": 0.7526344980587909, "grad_norm": 0.1854097694158554, "learning_rate": 1.7557627816418337e-05, "loss": 0.5275, "step": 2714 }, { "epoch": 0.7529118136439268, "grad_norm": 0.1879512071609497, "learning_rate": 1.755257784221308e-05, "loss": 0.5292, "step": 2715 }, { "epoch": 0.7531891292290627, "grad_norm": 0.18993008136749268, "learning_rate": 1.7547526882116014e-05, "loss": 0.5282, "step": 2716 }, { "epoch": 0.7534664448141986, "grad_norm": 0.18528947234153748, "learning_rate": 1.7542474937112725e-05, "loss": 0.5457, "step": 2717 }, { "epoch": 0.7537437603993344, "grad_norm": 0.1859760731458664, "learning_rate": 1.753742200818898e-05, "loss": 0.5448, "step": 2718 }, { "epoch": 0.7540210759844703, "grad_norm": 0.17713625729084015, "learning_rate": 1.753236809633073e-05, "loss": 0.5479, "step": 2719 }, { "epoch": 0.7542983915696062, "grad_norm": 0.17718899250030518, "learning_rate": 1.7527313202524144e-05, "loss": 0.5378, "step": 2720 }, { "epoch": 0.7545757071547421, "grad_norm": 0.19346462190151215, "learning_rate": 1.752225732775555e-05, "loss": 0.552, "step": 2721 }, { "epoch": 0.754853022739878, "grad_norm": 0.17717614769935608, "learning_rate": 1.7517200473011488e-05, "loss": 0.5348, "step": 2722 }, { "epoch": 0.7551303383250139, "grad_norm": 0.1940585970878601, "learning_rate": 1.751214263927869e-05, "loss": 0.5405, "step": 2723 }, { "epoch": 0.7554076539101497, "grad_norm": 0.19863441586494446, "learning_rate": 1.7507083827544065e-05, "loss": 0.5357, "step": 2724 }, { "epoch": 0.7556849694952856, "grad_norm": 0.18913887441158295, "learning_rate": 1.7502024038794727e-05, "loss": 0.5658, "step": 2725 }, { "epoch": 0.7559622850804215, "grad_norm": 0.18873843550682068, "learning_rate": 1.7496963274017975e-05, "loss": 0.5774, "step": 2726 }, { "epoch": 0.7562396006655574, "grad_norm": 0.1888992041349411, "learning_rate": 1.7491901534201295e-05, "loss": 0.5319, "step": 2727 }, { "epoch": 0.7565169162506933, "grad_norm": 0.18841078877449036, "learning_rate": 1.7486838820332362e-05, "loss": 0.5542, "step": 2728 }, { "epoch": 0.7567942318358292, "grad_norm": 0.19633720815181732, "learning_rate": 1.7481775133399057e-05, "loss": 0.5615, "step": 2729 }, { "epoch": 0.757071547420965, "grad_norm": 0.19098395109176636, "learning_rate": 1.7476710474389434e-05, "loss": 0.5642, "step": 2730 }, { "epoch": 0.7573488630061009, "grad_norm": 0.1895277500152588, "learning_rate": 1.747164484429174e-05, "loss": 0.557, "step": 2731 }, { "epoch": 0.7576261785912368, "grad_norm": 0.18626920878887177, "learning_rate": 1.7466578244094417e-05, "loss": 0.5314, "step": 2732 }, { "epoch": 0.7579034941763727, "grad_norm": 0.1883586198091507, "learning_rate": 1.746151067478609e-05, "loss": 0.5457, "step": 2733 }, { "epoch": 0.7581808097615086, "grad_norm": 0.18349739909172058, "learning_rate": 1.745644213735558e-05, "loss": 0.5467, "step": 2734 }, { "epoch": 0.7584581253466445, "grad_norm": 0.17938470840454102, "learning_rate": 1.7451372632791888e-05, "loss": 0.5444, "step": 2735 }, { "epoch": 0.7587354409317804, "grad_norm": 0.19910098612308502, "learning_rate": 1.7446302162084215e-05, "loss": 0.5777, "step": 2736 }, { "epoch": 0.7590127565169162, "grad_norm": 0.20055991411209106, "learning_rate": 1.7441230726221936e-05, "loss": 0.5546, "step": 2737 }, { "epoch": 0.7592900721020521, "grad_norm": 0.18521596491336823, "learning_rate": 1.743615832619463e-05, "loss": 0.5239, "step": 2738 }, { "epoch": 0.759567387687188, "grad_norm": 0.19067248702049255, "learning_rate": 1.7431084962992052e-05, "loss": 0.5288, "step": 2739 }, { "epoch": 0.7598447032723239, "grad_norm": 0.18803349137306213, "learning_rate": 1.7426010637604152e-05, "loss": 0.5397, "step": 2740 }, { "epoch": 0.7601220188574598, "grad_norm": 0.1846192330121994, "learning_rate": 1.7420935351021062e-05, "loss": 0.5379, "step": 2741 }, { "epoch": 0.7603993344425957, "grad_norm": 0.1797967106103897, "learning_rate": 1.7415859104233108e-05, "loss": 0.5528, "step": 2742 }, { "epoch": 0.7606766500277315, "grad_norm": 0.18609130382537842, "learning_rate": 1.7410781898230797e-05, "loss": 0.5473, "step": 2743 }, { "epoch": 0.7609539656128674, "grad_norm": 0.23831035196781158, "learning_rate": 1.7405703734004837e-05, "loss": 0.5114, "step": 2744 }, { "epoch": 0.7612312811980033, "grad_norm": 0.16559574007987976, "learning_rate": 1.74006246125461e-05, "loss": 0.501, "step": 2745 }, { "epoch": 0.7615085967831392, "grad_norm": 0.1880342662334442, "learning_rate": 1.7395544534845663e-05, "loss": 0.5344, "step": 2746 }, { "epoch": 0.7617859123682751, "grad_norm": 0.1892349123954773, "learning_rate": 1.7390463501894778e-05, "loss": 0.5745, "step": 2747 }, { "epoch": 0.762063227953411, "grad_norm": 0.2029358148574829, "learning_rate": 1.7385381514684896e-05, "loss": 0.5206, "step": 2748 }, { "epoch": 0.7623405435385469, "grad_norm": 0.2009795904159546, "learning_rate": 1.7380298574207645e-05, "loss": 0.5765, "step": 2749 }, { "epoch": 0.7626178591236827, "grad_norm": 0.18493135273456573, "learning_rate": 1.737521468145484e-05, "loss": 0.567, "step": 2750 }, { "epoch": 0.7628951747088186, "grad_norm": 0.2014453411102295, "learning_rate": 1.7370129837418487e-05, "loss": 0.564, "step": 2751 }, { "epoch": 0.7631724902939545, "grad_norm": 0.19111500680446625, "learning_rate": 1.7365044043090766e-05, "loss": 0.5589, "step": 2752 }, { "epoch": 0.7634498058790904, "grad_norm": 0.19902296364307404, "learning_rate": 1.7359957299464062e-05, "loss": 0.5543, "step": 2753 }, { "epoch": 0.7637271214642263, "grad_norm": 0.19618061184883118, "learning_rate": 1.7354869607530923e-05, "loss": 0.5639, "step": 2754 }, { "epoch": 0.7640044370493622, "grad_norm": 0.17767266929149628, "learning_rate": 1.7349780968284094e-05, "loss": 0.5041, "step": 2755 }, { "epoch": 0.764281752634498, "grad_norm": 0.19212745130062103, "learning_rate": 1.7344691382716508e-05, "loss": 0.5416, "step": 2756 }, { "epoch": 0.7645590682196339, "grad_norm": 0.19896987080574036, "learning_rate": 1.7339600851821274e-05, "loss": 0.5505, "step": 2757 }, { "epoch": 0.7648363838047698, "grad_norm": 0.1840539574623108, "learning_rate": 1.7334509376591695e-05, "loss": 0.5373, "step": 2758 }, { "epoch": 0.7651136993899057, "grad_norm": 0.20208869874477386, "learning_rate": 1.7329416958021247e-05, "loss": 0.5553, "step": 2759 }, { "epoch": 0.7653910149750416, "grad_norm": 0.18945381045341492, "learning_rate": 1.7324323597103597e-05, "loss": 0.536, "step": 2760 }, { "epoch": 0.7656683305601775, "grad_norm": 0.19363372027873993, "learning_rate": 1.7319229294832597e-05, "loss": 0.5607, "step": 2761 }, { "epoch": 0.7659456461453134, "grad_norm": 0.20634472370147705, "learning_rate": 1.7314134052202272e-05, "loss": 0.5451, "step": 2762 }, { "epoch": 0.7662229617304492, "grad_norm": 0.19053952395915985, "learning_rate": 1.730903787020685e-05, "loss": 0.5661, "step": 2763 }, { "epoch": 0.7665002773155851, "grad_norm": 0.18681201338768005, "learning_rate": 1.7303940749840726e-05, "loss": 0.5312, "step": 2764 }, { "epoch": 0.766777592900721, "grad_norm": 0.19541014730930328, "learning_rate": 1.7298842692098488e-05, "loss": 0.5366, "step": 2765 }, { "epoch": 0.7670549084858569, "grad_norm": 0.19399814307689667, "learning_rate": 1.729374369797489e-05, "loss": 0.547, "step": 2766 }, { "epoch": 0.7673322240709928, "grad_norm": 0.19590160250663757, "learning_rate": 1.7288643768464892e-05, "loss": 0.5286, "step": 2767 }, { "epoch": 0.7676095396561287, "grad_norm": 0.18889807164669037, "learning_rate": 1.7283542904563625e-05, "loss": 0.5357, "step": 2768 }, { "epoch": 0.7678868552412645, "grad_norm": 0.1905566155910492, "learning_rate": 1.7278441107266395e-05, "loss": 0.5663, "step": 2769 }, { "epoch": 0.7681641708264004, "grad_norm": 0.19881530106067657, "learning_rate": 1.7273338377568707e-05, "loss": 0.5829, "step": 2770 }, { "epoch": 0.7684414864115363, "grad_norm": 0.17877154052257538, "learning_rate": 1.726823471646623e-05, "loss": 0.5607, "step": 2771 }, { "epoch": 0.7687188019966722, "grad_norm": 0.1801327019929886, "learning_rate": 1.7263130124954832e-05, "loss": 0.5608, "step": 2772 }, { "epoch": 0.7689961175818081, "grad_norm": 0.1976090669631958, "learning_rate": 1.7258024604030547e-05, "loss": 0.4987, "step": 2773 }, { "epoch": 0.769273433166944, "grad_norm": 0.18582318723201752, "learning_rate": 1.72529181546896e-05, "loss": 0.5422, "step": 2774 }, { "epoch": 0.7695507487520798, "grad_norm": 0.1935378760099411, "learning_rate": 1.7247810777928396e-05, "loss": 0.5464, "step": 2775 }, { "epoch": 0.7698280643372157, "grad_norm": 0.187955841422081, "learning_rate": 1.7242702474743517e-05, "loss": 0.514, "step": 2776 }, { "epoch": 0.7701053799223516, "grad_norm": 0.18268531560897827, "learning_rate": 1.7237593246131735e-05, "loss": 0.517, "step": 2777 }, { "epoch": 0.7703826955074875, "grad_norm": 0.18831866979599, "learning_rate": 1.7232483093089986e-05, "loss": 0.579, "step": 2778 }, { "epoch": 0.7706600110926234, "grad_norm": 0.20355799794197083, "learning_rate": 1.7227372016615402e-05, "loss": 0.5656, "step": 2779 }, { "epoch": 0.7709373266777593, "grad_norm": 0.1870361566543579, "learning_rate": 1.7222260017705286e-05, "loss": 0.5528, "step": 2780 }, { "epoch": 0.7712146422628952, "grad_norm": 0.2032066434621811, "learning_rate": 1.7217147097357127e-05, "loss": 0.5684, "step": 2781 }, { "epoch": 0.771491957848031, "grad_norm": 0.18692703545093536, "learning_rate": 1.7212033256568595e-05, "loss": 0.5654, "step": 2782 }, { "epoch": 0.7717692734331669, "grad_norm": 0.1919548362493515, "learning_rate": 1.7206918496337525e-05, "loss": 0.5801, "step": 2783 }, { "epoch": 0.7720465890183028, "grad_norm": 0.19089831411838531, "learning_rate": 1.7201802817661955e-05, "loss": 0.5617, "step": 2784 }, { "epoch": 0.7723239046034387, "grad_norm": 0.19473430514335632, "learning_rate": 1.7196686221540077e-05, "loss": 0.5376, "step": 2785 }, { "epoch": 0.7726012201885746, "grad_norm": 0.1860806941986084, "learning_rate": 1.7191568708970286e-05, "loss": 0.5541, "step": 2786 }, { "epoch": 0.7728785357737105, "grad_norm": 0.18971897661685944, "learning_rate": 1.7186450280951137e-05, "loss": 0.5514, "step": 2787 }, { "epoch": 0.7731558513588463, "grad_norm": 0.18644990026950836, "learning_rate": 1.7181330938481375e-05, "loss": 0.5504, "step": 2788 }, { "epoch": 0.7734331669439822, "grad_norm": 0.18728910386562347, "learning_rate": 1.717621068255992e-05, "loss": 0.5528, "step": 2789 }, { "epoch": 0.7737104825291181, "grad_norm": 0.19061507284641266, "learning_rate": 1.7171089514185857e-05, "loss": 0.5356, "step": 2790 }, { "epoch": 0.773987798114254, "grad_norm": 0.18289197981357574, "learning_rate": 1.7165967434358483e-05, "loss": 0.5487, "step": 2791 }, { "epoch": 0.7742651136993899, "grad_norm": 0.19676977396011353, "learning_rate": 1.716084444407723e-05, "loss": 0.5625, "step": 2792 }, { "epoch": 0.7745424292845258, "grad_norm": 0.18007270991802216, "learning_rate": 1.7155720544341746e-05, "loss": 0.543, "step": 2793 }, { "epoch": 0.7748197448696617, "grad_norm": 0.19169475138187408, "learning_rate": 1.715059573615183e-05, "loss": 0.5378, "step": 2794 }, { "epoch": 0.7750970604547975, "grad_norm": 0.1876417100429535, "learning_rate": 1.714547002050747e-05, "loss": 0.5304, "step": 2795 }, { "epoch": 0.7753743760399334, "grad_norm": 0.18966078758239746, "learning_rate": 1.714034339840883e-05, "loss": 0.5302, "step": 2796 }, { "epoch": 0.7756516916250693, "grad_norm": 0.1796397715806961, "learning_rate": 1.7135215870856253e-05, "loss": 0.5388, "step": 2797 }, { "epoch": 0.7759290072102052, "grad_norm": 0.19350911676883698, "learning_rate": 1.7130087438850252e-05, "loss": 0.553, "step": 2798 }, { "epoch": 0.7762063227953411, "grad_norm": 0.19393737614154816, "learning_rate": 1.7124958103391516e-05, "loss": 0.514, "step": 2799 }, { "epoch": 0.776483638380477, "grad_norm": 0.2177378535270691, "learning_rate": 1.711982786548092e-05, "loss": 0.5187, "step": 2800 }, { "epoch": 0.7767609539656128, "grad_norm": 0.18803465366363525, "learning_rate": 1.7114696726119505e-05, "loss": 0.5661, "step": 2801 }, { "epoch": 0.7770382695507487, "grad_norm": 0.19588807225227356, "learning_rate": 1.7109564686308498e-05, "loss": 0.5463, "step": 2802 }, { "epoch": 0.7773155851358846, "grad_norm": 0.18581606447696686, "learning_rate": 1.710443174704929e-05, "loss": 0.548, "step": 2803 }, { "epoch": 0.7775929007210205, "grad_norm": 0.18562249839305878, "learning_rate": 1.7099297909343455e-05, "loss": 0.5313, "step": 2804 }, { "epoch": 0.7778702163061564, "grad_norm": 0.17982806265354156, "learning_rate": 1.7094163174192744e-05, "loss": 0.5499, "step": 2805 }, { "epoch": 0.7781475318912923, "grad_norm": 0.18412619829177856, "learning_rate": 1.708902754259908e-05, "loss": 0.5495, "step": 2806 }, { "epoch": 0.7784248474764282, "grad_norm": 0.19657637178897858, "learning_rate": 1.7083891015564555e-05, "loss": 0.5565, "step": 2807 }, { "epoch": 0.778702163061564, "grad_norm": 0.17935718595981598, "learning_rate": 1.7078753594091445e-05, "loss": 0.5446, "step": 2808 }, { "epoch": 0.7789794786466999, "grad_norm": 0.18647761642932892, "learning_rate": 1.7073615279182198e-05, "loss": 0.5416, "step": 2809 }, { "epoch": 0.7792567942318358, "grad_norm": 0.19295147061347961, "learning_rate": 1.7068476071839434e-05, "loss": 0.5498, "step": 2810 }, { "epoch": 0.7795341098169717, "grad_norm": 0.19305361807346344, "learning_rate": 1.706333597306595e-05, "loss": 0.5613, "step": 2811 }, { "epoch": 0.7798114254021076, "grad_norm": 0.1887744814157486, "learning_rate": 1.7058194983864715e-05, "loss": 0.5493, "step": 2812 }, { "epoch": 0.7800887409872435, "grad_norm": 0.18676388263702393, "learning_rate": 1.7053053105238866e-05, "loss": 0.527, "step": 2813 }, { "epoch": 0.7803660565723793, "grad_norm": 0.1888217329978943, "learning_rate": 1.7047910338191732e-05, "loss": 0.5543, "step": 2814 }, { "epoch": 0.7806433721575152, "grad_norm": 0.19341908395290375, "learning_rate": 1.7042766683726793e-05, "loss": 0.5188, "step": 2815 }, { "epoch": 0.7809206877426511, "grad_norm": 0.18204385042190552, "learning_rate": 1.7037622142847717e-05, "loss": 0.541, "step": 2816 }, { "epoch": 0.781198003327787, "grad_norm": 0.20170709490776062, "learning_rate": 1.7032476716558338e-05, "loss": 0.5925, "step": 2817 }, { "epoch": 0.7814753189129229, "grad_norm": 0.18066342175006866, "learning_rate": 1.7027330405862668e-05, "loss": 0.5537, "step": 2818 }, { "epoch": 0.7817526344980588, "grad_norm": 0.18021415174007416, "learning_rate": 1.7022183211764886e-05, "loss": 0.5098, "step": 2819 }, { "epoch": 0.7820299500831946, "grad_norm": 0.19226256012916565, "learning_rate": 1.7017035135269345e-05, "loss": 0.5626, "step": 2820 }, { "epoch": 0.7823072656683305, "grad_norm": 0.1880798488855362, "learning_rate": 1.7011886177380572e-05, "loss": 0.5601, "step": 2821 }, { "epoch": 0.7825845812534664, "grad_norm": 0.19588159024715424, "learning_rate": 1.7006736339103267e-05, "loss": 0.56, "step": 2822 }, { "epoch": 0.7828618968386023, "grad_norm": 0.21878387033939362, "learning_rate": 1.7001585621442295e-05, "loss": 0.5683, "step": 2823 }, { "epoch": 0.7831392124237382, "grad_norm": 0.18837064504623413, "learning_rate": 1.6996434025402706e-05, "loss": 0.5827, "step": 2824 }, { "epoch": 0.7834165280088741, "grad_norm": 0.1791253536939621, "learning_rate": 1.6991281551989704e-05, "loss": 0.5125, "step": 2825 }, { "epoch": 0.78369384359401, "grad_norm": 0.19195421040058136, "learning_rate": 1.698612820220868e-05, "loss": 0.5348, "step": 2826 }, { "epoch": 0.7839711591791458, "grad_norm": 0.1831100434064865, "learning_rate": 1.6980973977065185e-05, "loss": 0.5354, "step": 2827 }, { "epoch": 0.7842484747642817, "grad_norm": 0.19085463881492615, "learning_rate": 1.6975818877564945e-05, "loss": 0.5724, "step": 2828 }, { "epoch": 0.7845257903494176, "grad_norm": 0.18546035885810852, "learning_rate": 1.6970662904713857e-05, "loss": 0.5433, "step": 2829 }, { "epoch": 0.7848031059345535, "grad_norm": 0.17541223764419556, "learning_rate": 1.6965506059517988e-05, "loss": 0.5348, "step": 2830 }, { "epoch": 0.7850804215196894, "grad_norm": 0.18578267097473145, "learning_rate": 1.696034834298358e-05, "loss": 0.5355, "step": 2831 }, { "epoch": 0.7853577371048253, "grad_norm": 0.19077135622501373, "learning_rate": 1.6955189756117028e-05, "loss": 0.5387, "step": 2832 }, { "epoch": 0.7856350526899611, "grad_norm": 0.19363151490688324, "learning_rate": 1.6950030299924925e-05, "loss": 0.54, "step": 2833 }, { "epoch": 0.785912368275097, "grad_norm": 0.19038641452789307, "learning_rate": 1.6944869975414e-05, "loss": 0.5365, "step": 2834 }, { "epoch": 0.7861896838602329, "grad_norm": 0.18864920735359192, "learning_rate": 1.6939708783591184e-05, "loss": 0.5272, "step": 2835 }, { "epoch": 0.7864669994453688, "grad_norm": 0.195342555642128, "learning_rate": 1.6934546725463558e-05, "loss": 0.5459, "step": 2836 }, { "epoch": 0.7867443150305047, "grad_norm": 0.1781705915927887, "learning_rate": 1.6929383802038372e-05, "loss": 0.525, "step": 2837 }, { "epoch": 0.7870216306156406, "grad_norm": 0.18777886033058167, "learning_rate": 1.6924220014323054e-05, "loss": 0.5254, "step": 2838 }, { "epoch": 0.7872989462007765, "grad_norm": 0.19651533663272858, "learning_rate": 1.6919055363325193e-05, "loss": 0.5388, "step": 2839 }, { "epoch": 0.7875762617859123, "grad_norm": 0.19021400809288025, "learning_rate": 1.6913889850052546e-05, "loss": 0.5362, "step": 2840 }, { "epoch": 0.7878535773710482, "grad_norm": 0.18892168998718262, "learning_rate": 1.690872347551305e-05, "loss": 0.5446, "step": 2841 }, { "epoch": 0.7881308929561841, "grad_norm": 0.1848224401473999, "learning_rate": 1.6903556240714795e-05, "loss": 0.5353, "step": 2842 }, { "epoch": 0.78840820854132, "grad_norm": 0.19968490302562714, "learning_rate": 1.6898388146666046e-05, "loss": 0.5173, "step": 2843 }, { "epoch": 0.7886855241264559, "grad_norm": 0.19547419250011444, "learning_rate": 1.689321919437524e-05, "loss": 0.5421, "step": 2844 }, { "epoch": 0.7889628397115918, "grad_norm": 0.18620312213897705, "learning_rate": 1.688804938485097e-05, "loss": 0.5297, "step": 2845 }, { "epoch": 0.7892401552967276, "grad_norm": 0.18695352971553802, "learning_rate": 1.6882878719102007e-05, "loss": 0.5386, "step": 2846 }, { "epoch": 0.7895174708818635, "grad_norm": 0.18656794726848602, "learning_rate": 1.6877707198137285e-05, "loss": 0.557, "step": 2847 }, { "epoch": 0.7897947864669994, "grad_norm": 0.18816913664340973, "learning_rate": 1.6872534822965903e-05, "loss": 0.5638, "step": 2848 }, { "epoch": 0.7900721020521353, "grad_norm": 0.18742120265960693, "learning_rate": 1.6867361594597126e-05, "loss": 0.5482, "step": 2849 }, { "epoch": 0.7903494176372712, "grad_norm": 0.1863267421722412, "learning_rate": 1.6862187514040396e-05, "loss": 0.5471, "step": 2850 }, { "epoch": 0.7906267332224071, "grad_norm": 0.24633349478244781, "learning_rate": 1.6857012582305303e-05, "loss": 0.5251, "step": 2851 }, { "epoch": 0.790904048807543, "grad_norm": 0.1922018826007843, "learning_rate": 1.6851836800401624e-05, "loss": 0.5706, "step": 2852 }, { "epoch": 0.7911813643926788, "grad_norm": 0.18678666651248932, "learning_rate": 1.684666016933928e-05, "loss": 0.55, "step": 2853 }, { "epoch": 0.7914586799778147, "grad_norm": 0.1959305703639984, "learning_rate": 1.6841482690128376e-05, "loss": 0.5414, "step": 2854 }, { "epoch": 0.7917359955629506, "grad_norm": 0.19318552315235138, "learning_rate": 1.6836304363779178e-05, "loss": 0.5383, "step": 2855 }, { "epoch": 0.7920133111480865, "grad_norm": 0.18508724868297577, "learning_rate": 1.6831125191302104e-05, "loss": 0.5371, "step": 2856 }, { "epoch": 0.7922906267332224, "grad_norm": 0.18992586433887482, "learning_rate": 1.682594517370776e-05, "loss": 0.499, "step": 2857 }, { "epoch": 0.7925679423183583, "grad_norm": 0.19711445271968842, "learning_rate": 1.682076431200689e-05, "loss": 0.5777, "step": 2858 }, { "epoch": 0.7928452579034941, "grad_norm": 0.19418618083000183, "learning_rate": 1.6815582607210435e-05, "loss": 0.5459, "step": 2859 }, { "epoch": 0.79312257348863, "grad_norm": 0.19879227876663208, "learning_rate": 1.6810400060329472e-05, "loss": 0.5578, "step": 2860 }, { "epoch": 0.7933998890737659, "grad_norm": 0.1837853342294693, "learning_rate": 1.680521667237525e-05, "loss": 0.534, "step": 2861 }, { "epoch": 0.7936772046589018, "grad_norm": 0.18517963588237762, "learning_rate": 1.68000324443592e-05, "loss": 0.5368, "step": 2862 }, { "epoch": 0.7939545202440377, "grad_norm": 0.18853336572647095, "learning_rate": 1.6794847377292885e-05, "loss": 0.5728, "step": 2863 }, { "epoch": 0.7942318358291736, "grad_norm": 0.1775895059108734, "learning_rate": 1.678966147218806e-05, "loss": 0.5283, "step": 2864 }, { "epoch": 0.7945091514143094, "grad_norm": 0.18305779993534088, "learning_rate": 1.678447473005663e-05, "loss": 0.531, "step": 2865 }, { "epoch": 0.7947864669994453, "grad_norm": 0.19156721234321594, "learning_rate": 1.6779287151910665e-05, "loss": 0.5485, "step": 2866 }, { "epoch": 0.7950637825845812, "grad_norm": 0.18802639842033386, "learning_rate": 1.6774098738762398e-05, "loss": 0.548, "step": 2867 }, { "epoch": 0.7953410981697171, "grad_norm": 0.19210045039653778, "learning_rate": 1.6768909491624224e-05, "loss": 0.548, "step": 2868 }, { "epoch": 0.795618413754853, "grad_norm": 0.18298813700675964, "learning_rate": 1.6763719411508713e-05, "loss": 0.5385, "step": 2869 }, { "epoch": 0.7958957293399889, "grad_norm": 0.206680029630661, "learning_rate": 1.675852849942857e-05, "loss": 0.5443, "step": 2870 }, { "epoch": 0.7961730449251248, "grad_norm": 0.19752557575702667, "learning_rate": 1.67533367563967e-05, "loss": 0.541, "step": 2871 }, { "epoch": 0.7964503605102606, "grad_norm": 0.1768447309732437, "learning_rate": 1.674814418342613e-05, "loss": 0.5125, "step": 2872 }, { "epoch": 0.7967276760953965, "grad_norm": 0.16779829561710358, "learning_rate": 1.6742950781530086e-05, "loss": 0.5446, "step": 2873 }, { "epoch": 0.7970049916805324, "grad_norm": 0.18453587591648102, "learning_rate": 1.6737756551721924e-05, "loss": 0.538, "step": 2874 }, { "epoch": 0.7972823072656683, "grad_norm": 0.18118412792682648, "learning_rate": 1.673256149501518e-05, "loss": 0.538, "step": 2875 }, { "epoch": 0.7975596228508042, "grad_norm": 0.18454188108444214, "learning_rate": 1.672736561242355e-05, "loss": 0.5208, "step": 2876 }, { "epoch": 0.7978369384359401, "grad_norm": 0.18930627405643463, "learning_rate": 1.672216890496089e-05, "loss": 0.5545, "step": 2877 }, { "epoch": 0.7981142540210759, "grad_norm": 0.18117345869541168, "learning_rate": 1.6716971373641212e-05, "loss": 0.5444, "step": 2878 }, { "epoch": 0.7983915696062118, "grad_norm": 0.20891335606575012, "learning_rate": 1.671177301947869e-05, "loss": 0.5583, "step": 2879 }, { "epoch": 0.7986688851913477, "grad_norm": 0.18599353730678558, "learning_rate": 1.670657384348766e-05, "loss": 0.56, "step": 2880 }, { "epoch": 0.7989462007764836, "grad_norm": 0.23091299831867218, "learning_rate": 1.6701373846682626e-05, "loss": 0.5332, "step": 2881 }, { "epoch": 0.7992235163616195, "grad_norm": 0.18974825739860535, "learning_rate": 1.6696173030078242e-05, "loss": 0.56, "step": 2882 }, { "epoch": 0.7995008319467554, "grad_norm": 0.1794738471508026, "learning_rate": 1.6690971394689324e-05, "loss": 0.5671, "step": 2883 }, { "epoch": 0.7997781475318912, "grad_norm": 0.24422964453697205, "learning_rate": 1.6685768941530848e-05, "loss": 0.5261, "step": 2884 }, { "epoch": 0.8000554631170271, "grad_norm": 0.18428778648376465, "learning_rate": 1.6680565671617955e-05, "loss": 0.5315, "step": 2885 }, { "epoch": 0.800332778702163, "grad_norm": 0.18910729885101318, "learning_rate": 1.667536158596593e-05, "loss": 0.5447, "step": 2886 }, { "epoch": 0.8006100942872989, "grad_norm": 0.1889326423406601, "learning_rate": 1.667015668559024e-05, "loss": 0.5313, "step": 2887 }, { "epoch": 0.8008874098724348, "grad_norm": 0.19478273391723633, "learning_rate": 1.66649509715065e-05, "loss": 0.5358, "step": 2888 }, { "epoch": 0.8011647254575707, "grad_norm": 0.1966940313577652, "learning_rate": 1.6659744444730467e-05, "loss": 0.5367, "step": 2889 }, { "epoch": 0.8014420410427066, "grad_norm": 0.19594216346740723, "learning_rate": 1.665453710627809e-05, "loss": 0.5571, "step": 2890 }, { "epoch": 0.8017193566278424, "grad_norm": 0.19384315609931946, "learning_rate": 1.6649328957165448e-05, "loss": 0.5478, "step": 2891 }, { "epoch": 0.8019966722129783, "grad_norm": 0.18817134201526642, "learning_rate": 1.6644119998408795e-05, "loss": 0.5533, "step": 2892 }, { "epoch": 0.8022739877981142, "grad_norm": 0.19234129786491394, "learning_rate": 1.6638910231024528e-05, "loss": 0.55, "step": 2893 }, { "epoch": 0.8025513033832501, "grad_norm": 0.19165417551994324, "learning_rate": 1.6633699656029224e-05, "loss": 0.5372, "step": 2894 }, { "epoch": 0.802828618968386, "grad_norm": 0.20091277360916138, "learning_rate": 1.6628488274439592e-05, "loss": 0.5571, "step": 2895 }, { "epoch": 0.8031059345535219, "grad_norm": 0.1875421106815338, "learning_rate": 1.6623276087272517e-05, "loss": 0.5346, "step": 2896 }, { "epoch": 0.8033832501386577, "grad_norm": 0.24504978954792023, "learning_rate": 1.661806309554503e-05, "loss": 0.5277, "step": 2897 }, { "epoch": 0.8036605657237936, "grad_norm": 0.188764289021492, "learning_rate": 1.661284930027433e-05, "loss": 0.5354, "step": 2898 }, { "epoch": 0.8039378813089295, "grad_norm": 0.1898432970046997, "learning_rate": 1.6607634702477765e-05, "loss": 0.5349, "step": 2899 }, { "epoch": 0.8042151968940654, "grad_norm": 0.18189935386180878, "learning_rate": 1.6602419303172835e-05, "loss": 0.5146, "step": 2900 }, { "epoch": 0.8044925124792013, "grad_norm": 0.19001318514347076, "learning_rate": 1.659720310337721e-05, "loss": 0.5438, "step": 2901 }, { "epoch": 0.8047698280643372, "grad_norm": 0.19630925357341766, "learning_rate": 1.6591986104108706e-05, "loss": 0.5644, "step": 2902 }, { "epoch": 0.805047143649473, "grad_norm": 0.1834658980369568, "learning_rate": 1.65867683063853e-05, "loss": 0.5588, "step": 2903 }, { "epoch": 0.8053244592346089, "grad_norm": 0.198238343000412, "learning_rate": 1.658154971122512e-05, "loss": 0.5471, "step": 2904 }, { "epoch": 0.8056017748197448, "grad_norm": 0.18793722987174988, "learning_rate": 1.657633031964645e-05, "loss": 0.5416, "step": 2905 }, { "epoch": 0.8058790904048807, "grad_norm": 0.1919628083705902, "learning_rate": 1.657111013266774e-05, "loss": 0.5489, "step": 2906 }, { "epoch": 0.8061564059900166, "grad_norm": 0.18970000743865967, "learning_rate": 1.6565889151307576e-05, "loss": 0.5374, "step": 2907 }, { "epoch": 0.8064337215751525, "grad_norm": 0.19195859134197235, "learning_rate": 1.656066737658471e-05, "loss": 0.5696, "step": 2908 }, { "epoch": 0.8067110371602884, "grad_norm": 0.18715813755989075, "learning_rate": 1.6555444809518066e-05, "loss": 0.5365, "step": 2909 }, { "epoch": 0.8069883527454242, "grad_norm": 0.19223229587078094, "learning_rate": 1.6550221451126682e-05, "loss": 0.5148, "step": 2910 }, { "epoch": 0.8072656683305601, "grad_norm": 0.2136392742395401, "learning_rate": 1.6544997302429794e-05, "loss": 0.5269, "step": 2911 }, { "epoch": 0.807542983915696, "grad_norm": 0.19737182557582855, "learning_rate": 1.6539772364446755e-05, "loss": 0.5592, "step": 2912 }, { "epoch": 0.8078202995008319, "grad_norm": 0.20777487754821777, "learning_rate": 1.6534546638197098e-05, "loss": 0.5124, "step": 2913 }, { "epoch": 0.8080976150859678, "grad_norm": 0.1888158619403839, "learning_rate": 1.6529320124700495e-05, "loss": 0.5433, "step": 2914 }, { "epoch": 0.8083749306711037, "grad_norm": 0.24319934844970703, "learning_rate": 1.6524092824976787e-05, "loss": 0.5763, "step": 2915 }, { "epoch": 0.8086522462562395, "grad_norm": 0.19144728779792786, "learning_rate": 1.6518864740045947e-05, "loss": 0.5245, "step": 2916 }, { "epoch": 0.8089295618413754, "grad_norm": 0.19189846515655518, "learning_rate": 1.6513635870928122e-05, "loss": 0.5435, "step": 2917 }, { "epoch": 0.8092068774265113, "grad_norm": 0.24125546216964722, "learning_rate": 1.6508406218643597e-05, "loss": 0.5599, "step": 2918 }, { "epoch": 0.8094841930116472, "grad_norm": 0.2077561616897583, "learning_rate": 1.650317578421282e-05, "loss": 0.5466, "step": 2919 }, { "epoch": 0.8097615085967831, "grad_norm": 0.2178879827260971, "learning_rate": 1.6497944568656383e-05, "loss": 0.5306, "step": 2920 }, { "epoch": 0.810038824181919, "grad_norm": 0.24422143399715424, "learning_rate": 1.649271257299504e-05, "loss": 0.5388, "step": 2921 }, { "epoch": 0.8103161397670549, "grad_norm": 0.18929331004619598, "learning_rate": 1.6487479798249687e-05, "loss": 0.5236, "step": 2922 }, { "epoch": 0.8105934553521907, "grad_norm": 0.17720143496990204, "learning_rate": 1.648224624544138e-05, "loss": 0.5172, "step": 2923 }, { "epoch": 0.8108707709373266, "grad_norm": 0.18783891201019287, "learning_rate": 1.6477011915591325e-05, "loss": 0.5389, "step": 2924 }, { "epoch": 0.8111480865224625, "grad_norm": 0.19297097623348236, "learning_rate": 1.6471776809720873e-05, "loss": 0.543, "step": 2925 }, { "epoch": 0.8114254021075984, "grad_norm": 0.18489378690719604, "learning_rate": 1.6466540928851538e-05, "loss": 0.5512, "step": 2926 }, { "epoch": 0.8117027176927343, "grad_norm": 0.18271875381469727, "learning_rate": 1.6461304274004972e-05, "loss": 0.5417, "step": 2927 }, { "epoch": 0.8119800332778702, "grad_norm": 0.1847040057182312, "learning_rate": 1.6456066846202994e-05, "loss": 0.5387, "step": 2928 }, { "epoch": 0.812257348863006, "grad_norm": 0.20353619754314423, "learning_rate": 1.6450828646467555e-05, "loss": 0.5454, "step": 2929 }, { "epoch": 0.812534664448142, "grad_norm": 0.2704160809516907, "learning_rate": 1.644558967582078e-05, "loss": 0.5598, "step": 2930 }, { "epoch": 0.8128119800332779, "grad_norm": 0.2357567995786667, "learning_rate": 1.6440349935284917e-05, "loss": 0.5724, "step": 2931 }, { "epoch": 0.8130892956184138, "grad_norm": 0.18530336022377014, "learning_rate": 1.6435109425882385e-05, "loss": 0.5365, "step": 2932 }, { "epoch": 0.8133666112035497, "grad_norm": 0.18962644040584564, "learning_rate": 1.6429868148635745e-05, "loss": 0.5429, "step": 2933 }, { "epoch": 0.8136439267886856, "grad_norm": 0.1827327460050583, "learning_rate": 1.6424626104567708e-05, "loss": 0.5368, "step": 2934 }, { "epoch": 0.8139212423738215, "grad_norm": 0.19838617742061615, "learning_rate": 1.641938329470114e-05, "loss": 0.5366, "step": 2935 }, { "epoch": 0.8141985579589573, "grad_norm": 0.18842142820358276, "learning_rate": 1.6414139720059045e-05, "loss": 0.546, "step": 2936 }, { "epoch": 0.8144758735440932, "grad_norm": 0.1957085281610489, "learning_rate": 1.6408895381664594e-05, "loss": 0.5144, "step": 2937 }, { "epoch": 0.8147531891292291, "grad_norm": 0.18413354456424713, "learning_rate": 1.6403650280541087e-05, "loss": 0.5441, "step": 2938 }, { "epoch": 0.815030504714365, "grad_norm": 0.18572686612606049, "learning_rate": 1.6398404417711984e-05, "loss": 0.543, "step": 2939 }, { "epoch": 0.8153078202995009, "grad_norm": 0.18513022363185883, "learning_rate": 1.639315779420089e-05, "loss": 0.5416, "step": 2940 }, { "epoch": 0.8155851358846368, "grad_norm": 0.1878947764635086, "learning_rate": 1.6387910411031564e-05, "loss": 0.5487, "step": 2941 }, { "epoch": 0.8158624514697727, "grad_norm": 0.20760974287986755, "learning_rate": 1.6382662269227912e-05, "loss": 0.5861, "step": 2942 }, { "epoch": 0.8161397670549085, "grad_norm": 0.1904260218143463, "learning_rate": 1.637741336981398e-05, "loss": 0.549, "step": 2943 }, { "epoch": 0.8164170826400444, "grad_norm": 0.1811297982931137, "learning_rate": 1.637216371381397e-05, "loss": 0.5556, "step": 2944 }, { "epoch": 0.8166943982251803, "grad_norm": 0.18820282816886902, "learning_rate": 1.6366913302252228e-05, "loss": 0.5262, "step": 2945 }, { "epoch": 0.8169717138103162, "grad_norm": 0.18562357127666473, "learning_rate": 1.636166213615325e-05, "loss": 0.5353, "step": 2946 }, { "epoch": 0.8172490293954521, "grad_norm": 0.19570721685886383, "learning_rate": 1.6356410216541675e-05, "loss": 0.524, "step": 2947 }, { "epoch": 0.817526344980588, "grad_norm": 0.19673757255077362, "learning_rate": 1.635115754444229e-05, "loss": 0.5532, "step": 2948 }, { "epoch": 0.8178036605657238, "grad_norm": 0.1866602897644043, "learning_rate": 1.6345904120880045e-05, "loss": 0.5407, "step": 2949 }, { "epoch": 0.8180809761508597, "grad_norm": 0.1872847080230713, "learning_rate": 1.634064994688e-05, "loss": 0.522, "step": 2950 }, { "epoch": 0.8183582917359956, "grad_norm": 0.18448910117149353, "learning_rate": 1.63353950234674e-05, "loss": 0.5506, "step": 2951 }, { "epoch": 0.8186356073211315, "grad_norm": 0.19117306172847748, "learning_rate": 1.6330139351667607e-05, "loss": 0.538, "step": 2952 }, { "epoch": 0.8189129229062674, "grad_norm": 0.19007255136966705, "learning_rate": 1.6324882932506152e-05, "loss": 0.545, "step": 2953 }, { "epoch": 0.8191902384914033, "grad_norm": 0.1907234787940979, "learning_rate": 1.63196257670087e-05, "loss": 0.5216, "step": 2954 }, { "epoch": 0.8194675540765392, "grad_norm": 0.19023658335208893, "learning_rate": 1.6314367856201063e-05, "loss": 0.528, "step": 2955 }, { "epoch": 0.819744869661675, "grad_norm": 0.19689738750457764, "learning_rate": 1.6309109201109197e-05, "loss": 0.5579, "step": 2956 }, { "epoch": 0.8200221852468109, "grad_norm": 0.18936869502067566, "learning_rate": 1.63038498027592e-05, "loss": 0.5171, "step": 2957 }, { "epoch": 0.8202995008319468, "grad_norm": 0.18959283828735352, "learning_rate": 1.6298589662177334e-05, "loss": 0.5109, "step": 2958 }, { "epoch": 0.8205768164170827, "grad_norm": 0.18975764513015747, "learning_rate": 1.6293328780389976e-05, "loss": 0.5241, "step": 2959 }, { "epoch": 0.8208541320022186, "grad_norm": 0.1963840126991272, "learning_rate": 1.6288067158423676e-05, "loss": 0.5311, "step": 2960 }, { "epoch": 0.8211314475873545, "grad_norm": 0.1869080811738968, "learning_rate": 1.6282804797305107e-05, "loss": 0.5268, "step": 2961 }, { "epoch": 0.8214087631724903, "grad_norm": 0.19208255410194397, "learning_rate": 1.62775416980611e-05, "loss": 0.5561, "step": 2962 }, { "epoch": 0.8216860787576262, "grad_norm": 0.21065904200077057, "learning_rate": 1.6272277861718622e-05, "loss": 0.5612, "step": 2963 }, { "epoch": 0.8219633943427621, "grad_norm": 0.19388340413570404, "learning_rate": 1.626701328930479e-05, "loss": 0.5448, "step": 2964 }, { "epoch": 0.822240709927898, "grad_norm": 0.19354204833507538, "learning_rate": 1.626174798184686e-05, "loss": 0.5521, "step": 2965 }, { "epoch": 0.8225180255130339, "grad_norm": 0.1918734908103943, "learning_rate": 1.6256481940372235e-05, "loss": 0.5737, "step": 2966 }, { "epoch": 0.8227953410981698, "grad_norm": 0.19950683414936066, "learning_rate": 1.625121516590845e-05, "loss": 0.5251, "step": 2967 }, { "epoch": 0.8230726566833056, "grad_norm": 0.1884751170873642, "learning_rate": 1.624594765948321e-05, "loss": 0.5487, "step": 2968 }, { "epoch": 0.8233499722684415, "grad_norm": 0.20038394629955292, "learning_rate": 1.624067942212433e-05, "loss": 0.5366, "step": 2969 }, { "epoch": 0.8236272878535774, "grad_norm": 0.18730241060256958, "learning_rate": 1.6235410454859784e-05, "loss": 0.5574, "step": 2970 }, { "epoch": 0.8239046034387133, "grad_norm": 0.1982334703207016, "learning_rate": 1.6230140758717692e-05, "loss": 0.5472, "step": 2971 }, { "epoch": 0.8241819190238492, "grad_norm": 0.19432491064071655, "learning_rate": 1.6224870334726315e-05, "loss": 0.54, "step": 2972 }, { "epoch": 0.8244592346089851, "grad_norm": 0.1818057745695114, "learning_rate": 1.6219599183914038e-05, "loss": 0.5594, "step": 2973 }, { "epoch": 0.824736550194121, "grad_norm": 0.18658170104026794, "learning_rate": 1.6214327307309417e-05, "loss": 0.537, "step": 2974 }, { "epoch": 0.8250138657792568, "grad_norm": 0.19775459170341492, "learning_rate": 1.620905470594113e-05, "loss": 0.5423, "step": 2975 }, { "epoch": 0.8252911813643927, "grad_norm": 0.1877543330192566, "learning_rate": 1.6203781380837997e-05, "loss": 0.5337, "step": 2976 }, { "epoch": 0.8255684969495286, "grad_norm": 0.1873265951871872, "learning_rate": 1.619850733302899e-05, "loss": 0.5153, "step": 2977 }, { "epoch": 0.8258458125346645, "grad_norm": 0.1832212209701538, "learning_rate": 1.619323256354321e-05, "loss": 0.5291, "step": 2978 }, { "epoch": 0.8261231281198004, "grad_norm": 0.1939922571182251, "learning_rate": 1.6187957073409907e-05, "loss": 0.5428, "step": 2979 }, { "epoch": 0.8264004437049363, "grad_norm": 0.2593255639076233, "learning_rate": 1.6182680863658468e-05, "loss": 0.5448, "step": 2980 }, { "epoch": 0.8266777592900721, "grad_norm": 0.18556508421897888, "learning_rate": 1.6177403935318422e-05, "loss": 0.529, "step": 2981 }, { "epoch": 0.826955074875208, "grad_norm": 0.20643781125545502, "learning_rate": 1.6172126289419437e-05, "loss": 0.535, "step": 2982 }, { "epoch": 0.8272323904603439, "grad_norm": 0.192849263548851, "learning_rate": 1.6166847926991324e-05, "loss": 0.5457, "step": 2983 }, { "epoch": 0.8275097060454798, "grad_norm": 0.19120796024799347, "learning_rate": 1.616156884906403e-05, "loss": 0.542, "step": 2984 }, { "epoch": 0.8277870216306157, "grad_norm": 0.20270869135856628, "learning_rate": 1.615628905666764e-05, "loss": 0.5621, "step": 2985 }, { "epoch": 0.8280643372157516, "grad_norm": 0.20787782967090607, "learning_rate": 1.6151008550832377e-05, "loss": 0.5007, "step": 2986 }, { "epoch": 0.8283416528008875, "grad_norm": 0.23962002992630005, "learning_rate": 1.6145727332588626e-05, "loss": 0.5351, "step": 2987 }, { "epoch": 0.8286189683860233, "grad_norm": 0.19550423324108124, "learning_rate": 1.614044540296687e-05, "loss": 0.5599, "step": 2988 }, { "epoch": 0.8288962839711592, "grad_norm": 0.20781289041042328, "learning_rate": 1.6135162762997776e-05, "loss": 0.5381, "step": 2989 }, { "epoch": 0.8291735995562951, "grad_norm": 0.1908697932958603, "learning_rate": 1.61298794137121e-05, "loss": 0.5109, "step": 2990 }, { "epoch": 0.829450915141431, "grad_norm": 0.19248178601264954, "learning_rate": 1.6124595356140794e-05, "loss": 0.5061, "step": 2991 }, { "epoch": 0.8297282307265669, "grad_norm": 0.4288308918476105, "learning_rate": 1.611931059131489e-05, "loss": 0.5418, "step": 2992 }, { "epoch": 0.8300055463117028, "grad_norm": 0.2553388178348541, "learning_rate": 1.6114025120265604e-05, "loss": 0.5308, "step": 2993 }, { "epoch": 0.8302828618968386, "grad_norm": 0.19662028551101685, "learning_rate": 1.6108738944024265e-05, "loss": 0.5647, "step": 2994 }, { "epoch": 0.8305601774819745, "grad_norm": 0.1986682265996933, "learning_rate": 1.6103452063622343e-05, "loss": 0.5594, "step": 2995 }, { "epoch": 0.8308374930671104, "grad_norm": 0.18815158307552338, "learning_rate": 1.6098164480091454e-05, "loss": 0.5347, "step": 2996 }, { "epoch": 0.8311148086522463, "grad_norm": 0.19016186892986298, "learning_rate": 1.6092876194463343e-05, "loss": 0.5022, "step": 2997 }, { "epoch": 0.8313921242373822, "grad_norm": 0.19305314123630524, "learning_rate": 1.6087587207769897e-05, "loss": 0.5561, "step": 2998 }, { "epoch": 0.8316694398225181, "grad_norm": 0.19363488256931305, "learning_rate": 1.6082297521043134e-05, "loss": 0.5704, "step": 2999 }, { "epoch": 0.831946755407654, "grad_norm": 0.2677402198314667, "learning_rate": 1.6077007135315212e-05, "loss": 0.5709, "step": 3000 }, { "epoch": 0.8322240709927898, "grad_norm": 0.18514001369476318, "learning_rate": 1.6071716051618426e-05, "loss": 0.5328, "step": 3001 }, { "epoch": 0.8325013865779257, "grad_norm": 0.20008349418640137, "learning_rate": 1.606642427098521e-05, "loss": 0.5535, "step": 3002 }, { "epoch": 0.8327787021630616, "grad_norm": 0.20995013415813446, "learning_rate": 1.606113179444813e-05, "loss": 0.5541, "step": 3003 }, { "epoch": 0.8330560177481975, "grad_norm": 0.21254870295524597, "learning_rate": 1.6055838623039886e-05, "loss": 0.5221, "step": 3004 }, { "epoch": 0.8333333333333334, "grad_norm": 0.1829461008310318, "learning_rate": 1.6050544757793312e-05, "loss": 0.5445, "step": 3005 }, { "epoch": 0.8336106489184693, "grad_norm": 0.20135165750980377, "learning_rate": 1.604525019974139e-05, "loss": 0.5337, "step": 3006 }, { "epoch": 0.8338879645036051, "grad_norm": 0.1872393935918808, "learning_rate": 1.6039954949917218e-05, "loss": 0.5506, "step": 3007 }, { "epoch": 0.834165280088741, "grad_norm": 0.17982006072998047, "learning_rate": 1.6034659009354055e-05, "loss": 0.5252, "step": 3008 }, { "epoch": 0.8344425956738769, "grad_norm": 0.19605563580989838, "learning_rate": 1.6029362379085264e-05, "loss": 0.527, "step": 3009 }, { "epoch": 0.8347199112590128, "grad_norm": 0.20329341292381287, "learning_rate": 1.602406506014437e-05, "loss": 0.5729, "step": 3010 }, { "epoch": 0.8349972268441487, "grad_norm": 0.18132315576076508, "learning_rate": 1.6018767053565008e-05, "loss": 0.5391, "step": 3011 }, { "epoch": 0.8352745424292846, "grad_norm": 0.19777770340442657, "learning_rate": 1.6013468360380966e-05, "loss": 0.5366, "step": 3012 }, { "epoch": 0.8355518580144204, "grad_norm": 0.19389207661151886, "learning_rate": 1.6008168981626164e-05, "loss": 0.5569, "step": 3013 }, { "epoch": 0.8358291735995563, "grad_norm": 0.18683470785617828, "learning_rate": 1.6002868918334647e-05, "loss": 0.5496, "step": 3014 }, { "epoch": 0.8361064891846922, "grad_norm": 0.18466855585575104, "learning_rate": 1.5997568171540594e-05, "loss": 0.5547, "step": 3015 }, { "epoch": 0.8363838047698281, "grad_norm": 0.19307468831539154, "learning_rate": 1.5992266742278322e-05, "loss": 0.5518, "step": 3016 }, { "epoch": 0.836661120354964, "grad_norm": 0.19225814938545227, "learning_rate": 1.5986964631582287e-05, "loss": 0.548, "step": 3017 }, { "epoch": 0.8369384359400999, "grad_norm": 0.19282346963882446, "learning_rate": 1.5981661840487063e-05, "loss": 0.5594, "step": 3018 }, { "epoch": 0.8372157515252358, "grad_norm": 0.18779483437538147, "learning_rate": 1.5976358370027373e-05, "loss": 0.5191, "step": 3019 }, { "epoch": 0.8374930671103716, "grad_norm": 0.18765175342559814, "learning_rate": 1.597105422123806e-05, "loss": 0.5401, "step": 3020 }, { "epoch": 0.8377703826955075, "grad_norm": 0.1844259798526764, "learning_rate": 1.5965749395154107e-05, "loss": 0.5459, "step": 3021 }, { "epoch": 0.8380476982806434, "grad_norm": 0.18752428889274597, "learning_rate": 1.5960443892810617e-05, "loss": 0.5339, "step": 3022 }, { "epoch": 0.8383250138657793, "grad_norm": 0.19530834257602692, "learning_rate": 1.5955137715242847e-05, "loss": 0.5479, "step": 3023 }, { "epoch": 0.8386023294509152, "grad_norm": 0.1960788518190384, "learning_rate": 1.5949830863486166e-05, "loss": 0.5511, "step": 3024 }, { "epoch": 0.8388796450360511, "grad_norm": 0.2669583261013031, "learning_rate": 1.594452333857608e-05, "loss": 0.5637, "step": 3025 }, { "epoch": 0.8391569606211869, "grad_norm": 0.18217816948890686, "learning_rate": 1.5939215141548224e-05, "loss": 0.4905, "step": 3026 }, { "epoch": 0.8394342762063228, "grad_norm": 0.19247345626354218, "learning_rate": 1.5933906273438383e-05, "loss": 0.5234, "step": 3027 }, { "epoch": 0.8397115917914587, "grad_norm": 0.1867692619562149, "learning_rate": 1.592859673528244e-05, "loss": 0.5166, "step": 3028 }, { "epoch": 0.8399889073765946, "grad_norm": 0.18724261224269867, "learning_rate": 1.5923286528116446e-05, "loss": 0.5524, "step": 3029 }, { "epoch": 0.8402662229617305, "grad_norm": 0.18874408304691315, "learning_rate": 1.5917975652976544e-05, "loss": 0.5283, "step": 3030 }, { "epoch": 0.8405435385468664, "grad_norm": 0.2242184728384018, "learning_rate": 1.5912664110899038e-05, "loss": 0.532, "step": 3031 }, { "epoch": 0.8408208541320022, "grad_norm": 0.20637090504169464, "learning_rate": 1.5907351902920346e-05, "loss": 0.5619, "step": 3032 }, { "epoch": 0.8410981697171381, "grad_norm": 0.20153935253620148, "learning_rate": 1.590203903007702e-05, "loss": 0.5704, "step": 3033 }, { "epoch": 0.841375485302274, "grad_norm": 0.1838448941707611, "learning_rate": 1.5896725493405746e-05, "loss": 0.5533, "step": 3034 }, { "epoch": 0.8416528008874099, "grad_norm": 0.187151700258255, "learning_rate": 1.589141129394333e-05, "loss": 0.5276, "step": 3035 }, { "epoch": 0.8419301164725458, "grad_norm": 0.19188909232616425, "learning_rate": 1.5886096432726723e-05, "loss": 0.5588, "step": 3036 }, { "epoch": 0.8422074320576817, "grad_norm": 0.18532563745975494, "learning_rate": 1.5880780910792984e-05, "loss": 0.5363, "step": 3037 }, { "epoch": 0.8424847476428176, "grad_norm": 0.18388155102729797, "learning_rate": 1.587546472917932e-05, "loss": 0.5544, "step": 3038 }, { "epoch": 0.8427620632279534, "grad_norm": 0.18780486285686493, "learning_rate": 1.5870147888923054e-05, "loss": 0.5501, "step": 3039 }, { "epoch": 0.8430393788130893, "grad_norm": 0.18718671798706055, "learning_rate": 1.5864830391061644e-05, "loss": 0.5491, "step": 3040 }, { "epoch": 0.8433166943982252, "grad_norm": 0.1841050237417221, "learning_rate": 1.585951223663268e-05, "loss": 0.539, "step": 3041 }, { "epoch": 0.8435940099833611, "grad_norm": 0.17960785329341888, "learning_rate": 1.5854193426673862e-05, "loss": 0.5134, "step": 3042 }, { "epoch": 0.843871325568497, "grad_norm": 0.1937013566493988, "learning_rate": 1.5848873962223044e-05, "loss": 0.5083, "step": 3043 }, { "epoch": 0.8441486411536329, "grad_norm": 0.18566550314426422, "learning_rate": 1.5843553844318193e-05, "loss": 0.5569, "step": 3044 }, { "epoch": 0.8444259567387687, "grad_norm": 0.1879187971353531, "learning_rate": 1.5838233073997395e-05, "loss": 0.5611, "step": 3045 }, { "epoch": 0.8447032723239046, "grad_norm": 0.1857539415359497, "learning_rate": 1.5832911652298882e-05, "loss": 0.5464, "step": 3046 }, { "epoch": 0.8449805879090405, "grad_norm": 0.1914960891008377, "learning_rate": 1.5827589580261e-05, "loss": 0.5547, "step": 3047 }, { "epoch": 0.8452579034941764, "grad_norm": 0.18924832344055176, "learning_rate": 1.582226685892223e-05, "loss": 0.5739, "step": 3048 }, { "epoch": 0.8455352190793123, "grad_norm": 0.18020758032798767, "learning_rate": 1.5816943489321174e-05, "loss": 0.5355, "step": 3049 }, { "epoch": 0.8458125346644482, "grad_norm": 0.22242438793182373, "learning_rate": 1.5811619472496562e-05, "loss": 0.5551, "step": 3050 }, { "epoch": 0.846089850249584, "grad_norm": 0.18375588953495026, "learning_rate": 1.5806294809487248e-05, "loss": 0.5393, "step": 3051 }, { "epoch": 0.8463671658347199, "grad_norm": 0.20009155571460724, "learning_rate": 1.5800969501332223e-05, "loss": 0.5555, "step": 3052 }, { "epoch": 0.8466444814198558, "grad_norm": 0.1867419183254242, "learning_rate": 1.5795643549070588e-05, "loss": 0.5541, "step": 3053 }, { "epoch": 0.8469217970049917, "grad_norm": 0.20315201580524445, "learning_rate": 1.5790316953741583e-05, "loss": 0.5302, "step": 3054 }, { "epoch": 0.8471991125901276, "grad_norm": 0.19646741449832916, "learning_rate": 1.578498971638456e-05, "loss": 0.5491, "step": 3055 }, { "epoch": 0.8474764281752635, "grad_norm": 0.1929592341184616, "learning_rate": 1.5779661838039013e-05, "loss": 0.554, "step": 3056 }, { "epoch": 0.8477537437603994, "grad_norm": 0.18516919016838074, "learning_rate": 1.577433331974455e-05, "loss": 0.5474, "step": 3057 }, { "epoch": 0.8480310593455352, "grad_norm": 0.17926859855651855, "learning_rate": 1.57690041625409e-05, "loss": 0.541, "step": 3058 }, { "epoch": 0.8483083749306711, "grad_norm": 0.19980867207050323, "learning_rate": 1.576367436746793e-05, "loss": 0.581, "step": 3059 }, { "epoch": 0.848585690515807, "grad_norm": 0.20209050178527832, "learning_rate": 1.575834393556562e-05, "loss": 0.5749, "step": 3060 }, { "epoch": 0.8488630061009429, "grad_norm": 0.19711333513259888, "learning_rate": 1.575301286787408e-05, "loss": 0.5537, "step": 3061 }, { "epoch": 0.8491403216860788, "grad_norm": 0.185336172580719, "learning_rate": 1.5747681165433544e-05, "loss": 0.5378, "step": 3062 }, { "epoch": 0.8494176372712147, "grad_norm": 0.2029864341020584, "learning_rate": 1.5742348829284366e-05, "loss": 0.5489, "step": 3063 }, { "epoch": 0.8496949528563505, "grad_norm": 0.18520487844944, "learning_rate": 1.5737015860467032e-05, "loss": 0.5589, "step": 3064 }, { "epoch": 0.8499722684414864, "grad_norm": 0.18894660472869873, "learning_rate": 1.573168226002213e-05, "loss": 0.5486, "step": 3065 }, { "epoch": 0.8502495840266223, "grad_norm": 0.2093021273612976, "learning_rate": 1.5726348028990404e-05, "loss": 0.528, "step": 3066 }, { "epoch": 0.8505268996117582, "grad_norm": 0.18530364334583282, "learning_rate": 1.5721013168412698e-05, "loss": 0.5244, "step": 3067 }, { "epoch": 0.8508042151968941, "grad_norm": 0.18845012784004211, "learning_rate": 1.5715677679329978e-05, "loss": 0.5211, "step": 3068 }, { "epoch": 0.85108153078203, "grad_norm": 0.20350565016269684, "learning_rate": 1.571034156278335e-05, "loss": 0.5711, "step": 3069 }, { "epoch": 0.8513588463671659, "grad_norm": 0.18500900268554688, "learning_rate": 1.5705004819814025e-05, "loss": 0.5116, "step": 3070 }, { "epoch": 0.8516361619523017, "grad_norm": 0.18262405693531036, "learning_rate": 1.5699667451463344e-05, "loss": 0.5485, "step": 3071 }, { "epoch": 0.8519134775374376, "grad_norm": 0.19544097781181335, "learning_rate": 1.5694329458772776e-05, "loss": 0.5465, "step": 3072 }, { "epoch": 0.8521907931225735, "grad_norm": 0.18556345999240875, "learning_rate": 1.5688990842783892e-05, "loss": 0.5409, "step": 3073 }, { "epoch": 0.8524681087077094, "grad_norm": 0.23834922909736633, "learning_rate": 1.5683651604538405e-05, "loss": 0.5192, "step": 3074 }, { "epoch": 0.8527454242928453, "grad_norm": 0.20461677014827728, "learning_rate": 1.5678311745078138e-05, "loss": 0.5674, "step": 3075 }, { "epoch": 0.8530227398779812, "grad_norm": 0.19039872288703918, "learning_rate": 1.5672971265445046e-05, "loss": 0.5433, "step": 3076 }, { "epoch": 0.853300055463117, "grad_norm": 0.19499389827251434, "learning_rate": 1.566763016668119e-05, "loss": 0.5577, "step": 3077 }, { "epoch": 0.8535773710482529, "grad_norm": 0.185109481215477, "learning_rate": 1.5662288449828767e-05, "loss": 0.5192, "step": 3078 }, { "epoch": 0.8538546866333888, "grad_norm": 0.2005089968442917, "learning_rate": 1.5656946115930084e-05, "loss": 0.5423, "step": 3079 }, { "epoch": 0.8541320022185247, "grad_norm": 0.19989848136901855, "learning_rate": 1.5651603166027574e-05, "loss": 0.5307, "step": 3080 }, { "epoch": 0.8544093178036606, "grad_norm": 0.18417179584503174, "learning_rate": 1.5646259601163783e-05, "loss": 0.5479, "step": 3081 }, { "epoch": 0.8546866333887965, "grad_norm": 0.19350256025791168, "learning_rate": 1.5640915422381387e-05, "loss": 0.547, "step": 3082 }, { "epoch": 0.8549639489739324, "grad_norm": 0.19599801301956177, "learning_rate": 1.5635570630723173e-05, "loss": 0.572, "step": 3083 }, { "epoch": 0.8552412645590682, "grad_norm": 0.18424679338932037, "learning_rate": 1.5630225227232055e-05, "loss": 0.5421, "step": 3084 }, { "epoch": 0.8555185801442041, "grad_norm": 0.18912889063358307, "learning_rate": 1.562487921295106e-05, "loss": 0.5503, "step": 3085 }, { "epoch": 0.85579589572934, "grad_norm": 0.1971350610256195, "learning_rate": 1.561953258892334e-05, "loss": 0.5493, "step": 3086 }, { "epoch": 0.8560732113144759, "grad_norm": 0.19339221715927124, "learning_rate": 1.5614185356192156e-05, "loss": 0.5249, "step": 3087 }, { "epoch": 0.8563505268996118, "grad_norm": 0.22969551384449005, "learning_rate": 1.5608837515800906e-05, "loss": 0.5398, "step": 3088 }, { "epoch": 0.8566278424847477, "grad_norm": 0.20485931634902954, "learning_rate": 1.560348906879309e-05, "loss": 0.5455, "step": 3089 }, { "epoch": 0.8569051580698835, "grad_norm": 0.2660558521747589, "learning_rate": 1.5598140016212328e-05, "loss": 0.5636, "step": 3090 }, { "epoch": 0.8571824736550194, "grad_norm": 0.19878970086574554, "learning_rate": 1.559279035910237e-05, "loss": 0.5513, "step": 3091 }, { "epoch": 0.8574597892401553, "grad_norm": 0.19739742577075958, "learning_rate": 1.5587440098507067e-05, "loss": 0.5521, "step": 3092 }, { "epoch": 0.8577371048252912, "grad_norm": 0.19214226305484772, "learning_rate": 1.55820892354704e-05, "loss": 0.5721, "step": 3093 }, { "epoch": 0.8580144204104271, "grad_norm": 0.19705668091773987, "learning_rate": 1.5576737771036464e-05, "loss": 0.555, "step": 3094 }, { "epoch": 0.858291735995563, "grad_norm": 0.17740927636623383, "learning_rate": 1.557138570624948e-05, "loss": 0.5478, "step": 3095 }, { "epoch": 0.8585690515806988, "grad_norm": 0.19050319492816925, "learning_rate": 1.556603304215376e-05, "loss": 0.5511, "step": 3096 }, { "epoch": 0.8588463671658347, "grad_norm": 0.1907372921705246, "learning_rate": 1.556067977979377e-05, "loss": 0.543, "step": 3097 }, { "epoch": 0.8591236827509706, "grad_norm": 0.18947333097457886, "learning_rate": 1.5555325920214055e-05, "loss": 0.56, "step": 3098 }, { "epoch": 0.8594009983361065, "grad_norm": 0.19369062781333923, "learning_rate": 1.5549971464459308e-05, "loss": 0.5506, "step": 3099 }, { "epoch": 0.8596783139212424, "grad_norm": 0.18818899989128113, "learning_rate": 1.554461641357432e-05, "loss": 0.5434, "step": 3100 }, { "epoch": 0.8599556295063783, "grad_norm": 0.19132985174655914, "learning_rate": 1.5539260768604e-05, "loss": 0.5378, "step": 3101 }, { "epoch": 0.8602329450915142, "grad_norm": 0.20558297634124756, "learning_rate": 1.5533904530593386e-05, "loss": 0.5428, "step": 3102 }, { "epoch": 0.86051026067665, "grad_norm": 0.19788722693920135, "learning_rate": 1.5528547700587616e-05, "loss": 0.5417, "step": 3103 }, { "epoch": 0.8607875762617859, "grad_norm": 0.20902171730995178, "learning_rate": 1.552319027963195e-05, "loss": 0.5426, "step": 3104 }, { "epoch": 0.8610648918469218, "grad_norm": 0.18526656925678253, "learning_rate": 1.5517832268771764e-05, "loss": 0.5342, "step": 3105 }, { "epoch": 0.8613422074320577, "grad_norm": 0.1860983669757843, "learning_rate": 1.551247366905254e-05, "loss": 0.5466, "step": 3106 }, { "epoch": 0.8616195230171936, "grad_norm": 0.18875092267990112, "learning_rate": 1.5507114481519895e-05, "loss": 0.5417, "step": 3107 }, { "epoch": 0.8618968386023295, "grad_norm": 0.1913326233625412, "learning_rate": 1.5501754707219536e-05, "loss": 0.5589, "step": 3108 }, { "epoch": 0.8621741541874653, "grad_norm": 0.19133377075195312, "learning_rate": 1.549639434719731e-05, "loss": 0.5345, "step": 3109 }, { "epoch": 0.8624514697726012, "grad_norm": 0.18826799094676971, "learning_rate": 1.549103340249916e-05, "loss": 0.527, "step": 3110 }, { "epoch": 0.8627287853577371, "grad_norm": 0.18173575401306152, "learning_rate": 1.548567187417114e-05, "loss": 0.5431, "step": 3111 }, { "epoch": 0.863006100942873, "grad_norm": 0.19065049290657043, "learning_rate": 1.548030976325944e-05, "loss": 0.5147, "step": 3112 }, { "epoch": 0.8632834165280089, "grad_norm": 0.2025771290063858, "learning_rate": 1.547494707081034e-05, "loss": 0.5635, "step": 3113 }, { "epoch": 0.8635607321131448, "grad_norm": 0.18411415815353394, "learning_rate": 1.546958379787025e-05, "loss": 0.5246, "step": 3114 }, { "epoch": 0.8638380476982807, "grad_norm": 0.19902820885181427, "learning_rate": 1.546421994548568e-05, "loss": 0.5564, "step": 3115 }, { "epoch": 0.8641153632834165, "grad_norm": 0.18202657997608185, "learning_rate": 1.5458855514703266e-05, "loss": 0.5364, "step": 3116 }, { "epoch": 0.8643926788685524, "grad_norm": 0.19231395423412323, "learning_rate": 1.545349050656974e-05, "loss": 0.5521, "step": 3117 }, { "epoch": 0.8646699944536883, "grad_norm": 0.18620963394641876, "learning_rate": 1.5448124922131974e-05, "loss": 0.5308, "step": 3118 }, { "epoch": 0.8649473100388242, "grad_norm": 0.18969541788101196, "learning_rate": 1.5442758762436923e-05, "loss": 0.5446, "step": 3119 }, { "epoch": 0.8652246256239601, "grad_norm": 0.19268915057182312, "learning_rate": 1.543739202853167e-05, "loss": 0.531, "step": 3120 }, { "epoch": 0.865501941209096, "grad_norm": 0.4672553837299347, "learning_rate": 1.5432024721463413e-05, "loss": 0.5564, "step": 3121 }, { "epoch": 0.8657792567942318, "grad_norm": 0.19262655079364777, "learning_rate": 1.5426656842279445e-05, "loss": 0.558, "step": 3122 }, { "epoch": 0.8660565723793677, "grad_norm": 0.20529362559318542, "learning_rate": 1.5421288392027185e-05, "loss": 0.5247, "step": 3123 }, { "epoch": 0.8663338879645036, "grad_norm": 0.19508616626262665, "learning_rate": 1.5415919371754166e-05, "loss": 0.5307, "step": 3124 }, { "epoch": 0.8666112035496395, "grad_norm": 0.18688172101974487, "learning_rate": 1.541054978250802e-05, "loss": 0.5422, "step": 3125 }, { "epoch": 0.8668885191347754, "grad_norm": 0.19961917400360107, "learning_rate": 1.5405179625336495e-05, "loss": 0.5598, "step": 3126 }, { "epoch": 0.8671658347199113, "grad_norm": 0.18617486953735352, "learning_rate": 1.5399808901287457e-05, "loss": 0.5221, "step": 3127 }, { "epoch": 0.8674431503050472, "grad_norm": 0.1895580142736435, "learning_rate": 1.5394437611408873e-05, "loss": 0.5515, "step": 3128 }, { "epoch": 0.867720465890183, "grad_norm": 0.19370147585868835, "learning_rate": 1.5389065756748826e-05, "loss": 0.5312, "step": 3129 }, { "epoch": 0.8679977814753189, "grad_norm": 0.19625920057296753, "learning_rate": 1.5383693338355504e-05, "loss": 0.526, "step": 3130 }, { "epoch": 0.8682750970604548, "grad_norm": 0.1878737509250641, "learning_rate": 1.537832035727721e-05, "loss": 0.5229, "step": 3131 }, { "epoch": 0.8685524126455907, "grad_norm": 0.19030463695526123, "learning_rate": 1.537294681456235e-05, "loss": 0.5197, "step": 3132 }, { "epoch": 0.8688297282307266, "grad_norm": 0.19420188665390015, "learning_rate": 1.536757271125946e-05, "loss": 0.5369, "step": 3133 }, { "epoch": 0.8691070438158625, "grad_norm": 0.2041894942522049, "learning_rate": 1.5362198048417147e-05, "loss": 0.5315, "step": 3134 }, { "epoch": 0.8693843594009983, "grad_norm": 0.1823713183403015, "learning_rate": 1.535682282708417e-05, "loss": 0.5358, "step": 3135 }, { "epoch": 0.8696616749861342, "grad_norm": 0.18803556263446808, "learning_rate": 1.5351447048309367e-05, "loss": 0.5234, "step": 3136 }, { "epoch": 0.8699389905712701, "grad_norm": 0.20315398275852203, "learning_rate": 1.5346070713141697e-05, "loss": 0.5523, "step": 3137 }, { "epoch": 0.870216306156406, "grad_norm": 0.18089507520198822, "learning_rate": 1.5340693822630224e-05, "loss": 0.5425, "step": 3138 }, { "epoch": 0.8704936217415419, "grad_norm": 0.19417926669120789, "learning_rate": 1.5335316377824127e-05, "loss": 0.5329, "step": 3139 }, { "epoch": 0.8707709373266778, "grad_norm": 0.19868281483650208, "learning_rate": 1.5329938379772685e-05, "loss": 0.5564, "step": 3140 }, { "epoch": 0.8710482529118136, "grad_norm": 0.186373770236969, "learning_rate": 1.5324559829525285e-05, "loss": 0.5498, "step": 3141 }, { "epoch": 0.8713255684969495, "grad_norm": 0.21897022426128387, "learning_rate": 1.531918072813143e-05, "loss": 0.5508, "step": 3142 }, { "epoch": 0.8716028840820854, "grad_norm": 0.19098646938800812, "learning_rate": 1.5313801076640715e-05, "loss": 0.5481, "step": 3143 }, { "epoch": 0.8718801996672213, "grad_norm": 0.1954183280467987, "learning_rate": 1.5308420876102863e-05, "loss": 0.5531, "step": 3144 }, { "epoch": 0.8721575152523572, "grad_norm": 0.18958479166030884, "learning_rate": 1.5303040127567694e-05, "loss": 0.5437, "step": 3145 }, { "epoch": 0.8724348308374931, "grad_norm": 0.17530956864356995, "learning_rate": 1.5297658832085126e-05, "loss": 0.5216, "step": 3146 }, { "epoch": 0.872712146422629, "grad_norm": 0.19810372591018677, "learning_rate": 1.5292276990705202e-05, "loss": 0.5623, "step": 3147 }, { "epoch": 0.8729894620077648, "grad_norm": 0.19809319078922272, "learning_rate": 1.5286894604478054e-05, "loss": 0.5226, "step": 3148 }, { "epoch": 0.8732667775929007, "grad_norm": 0.20191790163516998, "learning_rate": 1.528151167445393e-05, "loss": 0.5612, "step": 3149 }, { "epoch": 0.8735440931780366, "grad_norm": 0.18988439440727234, "learning_rate": 1.5276128201683187e-05, "loss": 0.5362, "step": 3150 }, { "epoch": 0.8738214087631725, "grad_norm": 0.181321382522583, "learning_rate": 1.5270744187216277e-05, "loss": 0.5082, "step": 3151 }, { "epoch": 0.8740987243483084, "grad_norm": 0.18067540228366852, "learning_rate": 1.526535963210377e-05, "loss": 0.529, "step": 3152 }, { "epoch": 0.8743760399334443, "grad_norm": 0.19349637627601624, "learning_rate": 1.5259974537396325e-05, "loss": 0.5495, "step": 3153 }, { "epoch": 0.8746533555185801, "grad_norm": 0.17970281839370728, "learning_rate": 1.5254588904144735e-05, "loss": 0.529, "step": 3154 }, { "epoch": 0.874930671103716, "grad_norm": 0.18023180961608887, "learning_rate": 1.5249202733399859e-05, "loss": 0.5638, "step": 3155 }, { "epoch": 0.8752079866888519, "grad_norm": 0.20077262818813324, "learning_rate": 1.5243816026212695e-05, "loss": 0.5346, "step": 3156 }, { "epoch": 0.8754853022739878, "grad_norm": 0.18828438222408295, "learning_rate": 1.5238428783634326e-05, "loss": 0.5721, "step": 3157 }, { "epoch": 0.8757626178591237, "grad_norm": 0.19122296571731567, "learning_rate": 1.5233041006715948e-05, "loss": 0.5158, "step": 3158 }, { "epoch": 0.8760399334442596, "grad_norm": 0.19371193647384644, "learning_rate": 1.5227652696508859e-05, "loss": 0.5758, "step": 3159 }, { "epoch": 0.8763172490293955, "grad_norm": 0.19302338361740112, "learning_rate": 1.5222263854064465e-05, "loss": 0.5643, "step": 3160 }, { "epoch": 0.8765945646145313, "grad_norm": 0.28244075179100037, "learning_rate": 1.5216874480434264e-05, "loss": 0.5462, "step": 3161 }, { "epoch": 0.8768718801996672, "grad_norm": 0.1919064074754715, "learning_rate": 1.521148457666987e-05, "loss": 0.5317, "step": 3162 }, { "epoch": 0.8771491957848031, "grad_norm": 0.18293742835521698, "learning_rate": 1.5206094143823e-05, "loss": 0.514, "step": 3163 }, { "epoch": 0.877426511369939, "grad_norm": 0.193914532661438, "learning_rate": 1.520070318294546e-05, "loss": 0.5488, "step": 3164 }, { "epoch": 0.8777038269550749, "grad_norm": 0.1869155466556549, "learning_rate": 1.5195311695089175e-05, "loss": 0.5591, "step": 3165 }, { "epoch": 0.8779811425402108, "grad_norm": 0.19056154787540436, "learning_rate": 1.5189919681306173e-05, "loss": 0.5584, "step": 3166 }, { "epoch": 0.8782584581253466, "grad_norm": 0.1936255842447281, "learning_rate": 1.5184527142648569e-05, "loss": 0.5477, "step": 3167 }, { "epoch": 0.8785357737104825, "grad_norm": 0.19368582963943481, "learning_rate": 1.5179134080168595e-05, "loss": 0.5568, "step": 3168 }, { "epoch": 0.8788130892956184, "grad_norm": 0.19679602980613708, "learning_rate": 1.517374049491858e-05, "loss": 0.5549, "step": 3169 }, { "epoch": 0.8790904048807543, "grad_norm": 0.20116961002349854, "learning_rate": 1.5168346387950955e-05, "loss": 0.5565, "step": 3170 }, { "epoch": 0.8793677204658902, "grad_norm": 0.1948871910572052, "learning_rate": 1.5162951760318256e-05, "loss": 0.5502, "step": 3171 }, { "epoch": 0.8796450360510261, "grad_norm": 0.19126254320144653, "learning_rate": 1.515755661307311e-05, "loss": 0.5574, "step": 3172 }, { "epoch": 0.879922351636162, "grad_norm": 0.18868835270404816, "learning_rate": 1.5152160947268262e-05, "loss": 0.5377, "step": 3173 }, { "epoch": 0.8801996672212978, "grad_norm": 0.20093926787376404, "learning_rate": 1.5146764763956542e-05, "loss": 0.5486, "step": 3174 }, { "epoch": 0.8804769828064337, "grad_norm": 0.20768702030181885, "learning_rate": 1.5141368064190897e-05, "loss": 0.5431, "step": 3175 }, { "epoch": 0.8807542983915696, "grad_norm": 0.19229131937026978, "learning_rate": 1.5135970849024356e-05, "loss": 0.5298, "step": 3176 }, { "epoch": 0.8810316139767055, "grad_norm": 0.18817956745624542, "learning_rate": 1.5130573119510064e-05, "loss": 0.5414, "step": 3177 }, { "epoch": 0.8813089295618414, "grad_norm": 0.19554218649864197, "learning_rate": 1.5125174876701262e-05, "loss": 0.5381, "step": 3178 }, { "epoch": 0.8815862451469773, "grad_norm": 0.1836453080177307, "learning_rate": 1.5119776121651288e-05, "loss": 0.508, "step": 3179 }, { "epoch": 0.8818635607321131, "grad_norm": 0.2512352764606476, "learning_rate": 1.5114376855413586e-05, "loss": 0.5146, "step": 3180 }, { "epoch": 0.882140876317249, "grad_norm": 0.19116099178791046, "learning_rate": 1.5108977079041692e-05, "loss": 0.5332, "step": 3181 }, { "epoch": 0.8824181919023849, "grad_norm": 0.19376236200332642, "learning_rate": 1.5103576793589244e-05, "loss": 0.5188, "step": 3182 }, { "epoch": 0.8826955074875208, "grad_norm": 0.2322767972946167, "learning_rate": 1.5098176000109984e-05, "loss": 0.5468, "step": 3183 }, { "epoch": 0.8829728230726567, "grad_norm": 0.19449107348918915, "learning_rate": 1.5092774699657747e-05, "loss": 0.5223, "step": 3184 }, { "epoch": 0.8832501386577926, "grad_norm": 0.19483071565628052, "learning_rate": 1.5087372893286475e-05, "loss": 0.5526, "step": 3185 }, { "epoch": 0.8835274542429284, "grad_norm": 0.18979991972446442, "learning_rate": 1.5081970582050201e-05, "loss": 0.5145, "step": 3186 }, { "epoch": 0.8838047698280643, "grad_norm": 0.18646521866321564, "learning_rate": 1.5076567767003056e-05, "loss": 0.5308, "step": 3187 }, { "epoch": 0.8840820854132002, "grad_norm": 0.1952681541442871, "learning_rate": 1.5071164449199277e-05, "loss": 0.5384, "step": 3188 }, { "epoch": 0.8843594009983361, "grad_norm": 0.1863500475883484, "learning_rate": 1.506576062969319e-05, "loss": 0.5721, "step": 3189 }, { "epoch": 0.884636716583472, "grad_norm": 0.19182690978050232, "learning_rate": 1.5060356309539226e-05, "loss": 0.5171, "step": 3190 }, { "epoch": 0.8849140321686079, "grad_norm": 0.18404962122440338, "learning_rate": 1.5054951489791908e-05, "loss": 0.5362, "step": 3191 }, { "epoch": 0.8851913477537438, "grad_norm": 0.19122952222824097, "learning_rate": 1.5049546171505869e-05, "loss": 0.5424, "step": 3192 }, { "epoch": 0.8854686633388796, "grad_norm": 0.1863165944814682, "learning_rate": 1.5044140355735816e-05, "loss": 0.53, "step": 3193 }, { "epoch": 0.8857459789240155, "grad_norm": 0.19876129925251007, "learning_rate": 1.5038734043536582e-05, "loss": 0.5484, "step": 3194 }, { "epoch": 0.8860232945091514, "grad_norm": 0.18133728206157684, "learning_rate": 1.5033327235963065e-05, "loss": 0.5036, "step": 3195 }, { "epoch": 0.8863006100942873, "grad_norm": 0.19382363557815552, "learning_rate": 1.5027919934070291e-05, "loss": 0.5558, "step": 3196 }, { "epoch": 0.8865779256794232, "grad_norm": 0.2071426957845688, "learning_rate": 1.5022512138913358e-05, "loss": 0.539, "step": 3197 }, { "epoch": 0.8868552412645591, "grad_norm": 0.19322386384010315, "learning_rate": 1.5017103851547476e-05, "loss": 0.551, "step": 3198 }, { "epoch": 0.8871325568496949, "grad_norm": 0.18517249822616577, "learning_rate": 1.5011695073027942e-05, "loss": 0.5171, "step": 3199 }, { "epoch": 0.8874098724348308, "grad_norm": 0.18865883350372314, "learning_rate": 1.5006285804410156e-05, "loss": 0.5206, "step": 3200 }, { "epoch": 0.8876871880199667, "grad_norm": 0.19007782638072968, "learning_rate": 1.5000876046749603e-05, "loss": 0.5194, "step": 3201 }, { "epoch": 0.8879645036051026, "grad_norm": 0.18567977845668793, "learning_rate": 1.4995465801101877e-05, "loss": 0.535, "step": 3202 }, { "epoch": 0.8882418191902385, "grad_norm": 0.19124850630760193, "learning_rate": 1.4990055068522654e-05, "loss": 0.5304, "step": 3203 }, { "epoch": 0.8885191347753744, "grad_norm": 0.19194400310516357, "learning_rate": 1.4984643850067717e-05, "loss": 0.549, "step": 3204 }, { "epoch": 0.8887964503605102, "grad_norm": 0.18528962135314941, "learning_rate": 1.4979232146792936e-05, "loss": 0.5053, "step": 3205 }, { "epoch": 0.8890737659456461, "grad_norm": 0.1908206045627594, "learning_rate": 1.4973819959754273e-05, "loss": 0.5427, "step": 3206 }, { "epoch": 0.889351081530782, "grad_norm": 0.18456414341926575, "learning_rate": 1.4968407290007796e-05, "loss": 0.567, "step": 3207 }, { "epoch": 0.8896283971159179, "grad_norm": 0.19162026047706604, "learning_rate": 1.496299413860966e-05, "loss": 0.5609, "step": 3208 }, { "epoch": 0.8899057127010538, "grad_norm": 0.18303340673446655, "learning_rate": 1.4957580506616109e-05, "loss": 0.5281, "step": 3209 }, { "epoch": 0.8901830282861897, "grad_norm": 0.19368955492973328, "learning_rate": 1.4952166395083486e-05, "loss": 0.5348, "step": 3210 }, { "epoch": 0.8904603438713256, "grad_norm": 0.1844678521156311, "learning_rate": 1.4946751805068238e-05, "loss": 0.542, "step": 3211 }, { "epoch": 0.8907376594564614, "grad_norm": 0.18950358033180237, "learning_rate": 1.4941336737626879e-05, "loss": 0.5558, "step": 3212 }, { "epoch": 0.8910149750415973, "grad_norm": 0.19502434134483337, "learning_rate": 1.4935921193816046e-05, "loss": 0.5109, "step": 3213 }, { "epoch": 0.8912922906267332, "grad_norm": 0.18396303057670593, "learning_rate": 1.4930505174692447e-05, "loss": 0.5152, "step": 3214 }, { "epoch": 0.8915696062118691, "grad_norm": 0.20252270996570587, "learning_rate": 1.4925088681312895e-05, "loss": 0.5219, "step": 3215 }, { "epoch": 0.891846921797005, "grad_norm": 0.18550090491771698, "learning_rate": 1.4919671714734288e-05, "loss": 0.5321, "step": 3216 }, { "epoch": 0.8921242373821409, "grad_norm": 0.19548344612121582, "learning_rate": 1.4914254276013622e-05, "loss": 0.559, "step": 3217 }, { "epoch": 0.8924015529672767, "grad_norm": 0.1747296005487442, "learning_rate": 1.4908836366207985e-05, "loss": 0.5218, "step": 3218 }, { "epoch": 0.8926788685524126, "grad_norm": 0.18561501801013947, "learning_rate": 1.4903417986374548e-05, "loss": 0.5615, "step": 3219 }, { "epoch": 0.8929561841375485, "grad_norm": 0.19387111067771912, "learning_rate": 1.4897999137570586e-05, "loss": 0.5437, "step": 3220 }, { "epoch": 0.8932334997226844, "grad_norm": 0.19557684659957886, "learning_rate": 1.4892579820853459e-05, "loss": 0.5387, "step": 3221 }, { "epoch": 0.8935108153078203, "grad_norm": 0.1842055469751358, "learning_rate": 1.488716003728062e-05, "loss": 0.5277, "step": 3222 }, { "epoch": 0.8937881308929562, "grad_norm": 0.1835155189037323, "learning_rate": 1.4881739787909607e-05, "loss": 0.5342, "step": 3223 }, { "epoch": 0.894065446478092, "grad_norm": 0.1914055347442627, "learning_rate": 1.4876319073798061e-05, "loss": 0.548, "step": 3224 }, { "epoch": 0.8943427620632279, "grad_norm": 0.18737882375717163, "learning_rate": 1.4870897896003705e-05, "loss": 0.5281, "step": 3225 }, { "epoch": 0.8946200776483638, "grad_norm": 0.20303812623023987, "learning_rate": 1.4865476255584351e-05, "loss": 0.5603, "step": 3226 }, { "epoch": 0.8948973932334997, "grad_norm": 0.20020204782485962, "learning_rate": 1.486005415359791e-05, "loss": 0.5434, "step": 3227 }, { "epoch": 0.8951747088186356, "grad_norm": 0.1958005726337433, "learning_rate": 1.4854631591102374e-05, "loss": 0.522, "step": 3228 }, { "epoch": 0.8954520244037715, "grad_norm": 0.23392035067081451, "learning_rate": 1.4849208569155829e-05, "loss": 0.5574, "step": 3229 }, { "epoch": 0.8957293399889074, "grad_norm": 0.1955118626356125, "learning_rate": 1.4843785088816455e-05, "loss": 0.5327, "step": 3230 }, { "epoch": 0.8960066555740432, "grad_norm": 0.18367242813110352, "learning_rate": 1.4838361151142511e-05, "loss": 0.5166, "step": 3231 }, { "epoch": 0.8962839711591791, "grad_norm": 0.19902175664901733, "learning_rate": 1.4832936757192354e-05, "loss": 0.5283, "step": 3232 }, { "epoch": 0.896561286744315, "grad_norm": 0.19180850684642792, "learning_rate": 1.4827511908024419e-05, "loss": 0.5277, "step": 3233 }, { "epoch": 0.8968386023294509, "grad_norm": 0.18553797900676727, "learning_rate": 1.4822086604697253e-05, "loss": 0.5325, "step": 3234 }, { "epoch": 0.8971159179145868, "grad_norm": 0.1895224153995514, "learning_rate": 1.4816660848269462e-05, "loss": 0.552, "step": 3235 }, { "epoch": 0.8973932334997227, "grad_norm": 0.18854071199893951, "learning_rate": 1.4811234639799761e-05, "loss": 0.5471, "step": 3236 }, { "epoch": 0.8976705490848585, "grad_norm": 0.17953041195869446, "learning_rate": 1.480580798034695e-05, "loss": 0.5556, "step": 3237 }, { "epoch": 0.8979478646699944, "grad_norm": 0.1898965984582901, "learning_rate": 1.480038087096991e-05, "loss": 0.5282, "step": 3238 }, { "epoch": 0.8982251802551303, "grad_norm": 0.19018866121768951, "learning_rate": 1.4794953312727613e-05, "loss": 0.5301, "step": 3239 }, { "epoch": 0.8985024958402662, "grad_norm": 0.1814391165971756, "learning_rate": 1.4789525306679122e-05, "loss": 0.5526, "step": 3240 }, { "epoch": 0.8987798114254021, "grad_norm": 0.19814236462116241, "learning_rate": 1.4784096853883586e-05, "loss": 0.5741, "step": 3241 }, { "epoch": 0.899057127010538, "grad_norm": 0.19339510798454285, "learning_rate": 1.4778667955400233e-05, "loss": 0.5375, "step": 3242 }, { "epoch": 0.8993344425956739, "grad_norm": 0.29308080673217773, "learning_rate": 1.4773238612288393e-05, "loss": 0.5239, "step": 3243 }, { "epoch": 0.8996117581808097, "grad_norm": 0.1853429526090622, "learning_rate": 1.476780882560747e-05, "loss": 0.5436, "step": 3244 }, { "epoch": 0.8998890737659456, "grad_norm": 0.18454696238040924, "learning_rate": 1.4762378596416961e-05, "loss": 0.5032, "step": 3245 }, { "epoch": 0.9001663893510815, "grad_norm": 0.19088363647460938, "learning_rate": 1.4756947925776448e-05, "loss": 0.5369, "step": 3246 }, { "epoch": 0.9004437049362174, "grad_norm": 0.18263565003871918, "learning_rate": 1.4751516814745598e-05, "loss": 0.5313, "step": 3247 }, { "epoch": 0.9007210205213533, "grad_norm": 0.1870230883359909, "learning_rate": 1.4746085264384165e-05, "loss": 0.551, "step": 3248 }, { "epoch": 0.9009983361064892, "grad_norm": 0.18567577004432678, "learning_rate": 1.4740653275751987e-05, "loss": 0.5585, "step": 3249 }, { "epoch": 0.901275651691625, "grad_norm": 0.20058301091194153, "learning_rate": 1.4735220849908987e-05, "loss": 0.5031, "step": 3250 }, { "epoch": 0.9015529672767609, "grad_norm": 0.18694059550762177, "learning_rate": 1.4729787987915186e-05, "loss": 0.5334, "step": 3251 }, { "epoch": 0.9018302828618968, "grad_norm": 0.18202227354049683, "learning_rate": 1.4724354690830663e-05, "loss": 0.5553, "step": 3252 }, { "epoch": 0.9021075984470327, "grad_norm": 0.19465142488479614, "learning_rate": 1.4718920959715616e-05, "loss": 0.5115, "step": 3253 }, { "epoch": 0.9023849140321686, "grad_norm": 0.18764632940292358, "learning_rate": 1.4713486795630291e-05, "loss": 0.5546, "step": 3254 }, { "epoch": 0.9026622296173045, "grad_norm": 0.2006525844335556, "learning_rate": 1.4708052199635053e-05, "loss": 0.5239, "step": 3255 }, { "epoch": 0.9029395452024404, "grad_norm": 0.18893550336360931, "learning_rate": 1.4702617172790325e-05, "loss": 0.5246, "step": 3256 }, { "epoch": 0.9032168607875762, "grad_norm": 0.2028273344039917, "learning_rate": 1.4697181716156633e-05, "loss": 0.5548, "step": 3257 }, { "epoch": 0.9034941763727121, "grad_norm": 0.18957914412021637, "learning_rate": 1.4691745830794574e-05, "loss": 0.5261, "step": 3258 }, { "epoch": 0.903771491957848, "grad_norm": 0.19126209616661072, "learning_rate": 1.4686309517764835e-05, "loss": 0.5479, "step": 3259 }, { "epoch": 0.9040488075429839, "grad_norm": 0.1924603134393692, "learning_rate": 1.4680872778128183e-05, "loss": 0.5456, "step": 3260 }, { "epoch": 0.9043261231281198, "grad_norm": 0.1945699006319046, "learning_rate": 1.4675435612945468e-05, "loss": 0.5437, "step": 3261 }, { "epoch": 0.9046034387132557, "grad_norm": 0.18470460176467896, "learning_rate": 1.466999802327763e-05, "loss": 0.5342, "step": 3262 }, { "epoch": 0.9048807542983915, "grad_norm": 0.18286101520061493, "learning_rate": 1.4664560010185685e-05, "loss": 0.5007, "step": 3263 }, { "epoch": 0.9051580698835274, "grad_norm": 0.20376376807689667, "learning_rate": 1.4659121574730736e-05, "loss": 0.5517, "step": 3264 }, { "epoch": 0.9054353854686633, "grad_norm": 0.18289178609848022, "learning_rate": 1.465368271797396e-05, "loss": 0.5254, "step": 3265 }, { "epoch": 0.9057127010537992, "grad_norm": 0.18297207355499268, "learning_rate": 1.4648243440976625e-05, "loss": 0.5287, "step": 3266 }, { "epoch": 0.9059900166389351, "grad_norm": 0.19243334233760834, "learning_rate": 1.464280374480008e-05, "loss": 0.5408, "step": 3267 }, { "epoch": 0.906267332224071, "grad_norm": 0.19214653968811035, "learning_rate": 1.463736363050575e-05, "loss": 0.5455, "step": 3268 }, { "epoch": 0.9065446478092068, "grad_norm": 0.1994084268808365, "learning_rate": 1.4631923099155143e-05, "loss": 0.546, "step": 3269 }, { "epoch": 0.9068219633943427, "grad_norm": 0.19682733714580536, "learning_rate": 1.4626482151809865e-05, "loss": 0.5031, "step": 3270 }, { "epoch": 0.9070992789794786, "grad_norm": 0.3178056478500366, "learning_rate": 1.462104078953157e-05, "loss": 0.5183, "step": 3271 }, { "epoch": 0.9073765945646145, "grad_norm": 0.1968078464269638, "learning_rate": 1.4615599013382028e-05, "loss": 0.5475, "step": 3272 }, { "epoch": 0.9076539101497504, "grad_norm": 0.18579484522342682, "learning_rate": 1.461015682442306e-05, "loss": 0.5296, "step": 3273 }, { "epoch": 0.9079312257348863, "grad_norm": 0.1962941437959671, "learning_rate": 1.4604714223716595e-05, "loss": 0.5565, "step": 3274 }, { "epoch": 0.9082085413200222, "grad_norm": 0.19378679990768433, "learning_rate": 1.4599271212324617e-05, "loss": 0.5506, "step": 3275 }, { "epoch": 0.908485856905158, "grad_norm": 0.22947950661182404, "learning_rate": 1.4593827791309206e-05, "loss": 0.5625, "step": 3276 }, { "epoch": 0.9087631724902939, "grad_norm": 0.1949935257434845, "learning_rate": 1.458838396173252e-05, "loss": 0.5327, "step": 3277 }, { "epoch": 0.9090404880754298, "grad_norm": 0.19015999138355255, "learning_rate": 1.458293972465679e-05, "loss": 0.5168, "step": 3278 }, { "epoch": 0.9093178036605657, "grad_norm": 0.19812412559986115, "learning_rate": 1.4577495081144337e-05, "loss": 0.5389, "step": 3279 }, { "epoch": 0.9095951192457016, "grad_norm": 0.19743192195892334, "learning_rate": 1.4572050032257548e-05, "loss": 0.5492, "step": 3280 }, { "epoch": 0.9098724348308375, "grad_norm": 0.17543548345565796, "learning_rate": 1.4566604579058904e-05, "loss": 0.5064, "step": 3281 }, { "epoch": 0.9101497504159733, "grad_norm": 0.20693279802799225, "learning_rate": 1.4561158722610948e-05, "loss": 0.5312, "step": 3282 }, { "epoch": 0.9104270660011092, "grad_norm": 0.19201938807964325, "learning_rate": 1.4555712463976318e-05, "loss": 0.518, "step": 3283 }, { "epoch": 0.9107043815862451, "grad_norm": 0.1977842152118683, "learning_rate": 1.4550265804217722e-05, "loss": 0.5263, "step": 3284 }, { "epoch": 0.910981697171381, "grad_norm": 0.18007364869117737, "learning_rate": 1.4544818744397947e-05, "loss": 0.521, "step": 3285 }, { "epoch": 0.9112590127565169, "grad_norm": 0.19621910154819489, "learning_rate": 1.453937128557986e-05, "loss": 0.5701, "step": 3286 }, { "epoch": 0.9115363283416528, "grad_norm": 0.1956057995557785, "learning_rate": 1.4533923428826399e-05, "loss": 0.5542, "step": 3287 }, { "epoch": 0.9118136439267887, "grad_norm": 0.20553717017173767, "learning_rate": 1.452847517520059e-05, "loss": 0.5379, "step": 3288 }, { "epoch": 0.9120909595119245, "grad_norm": 0.19297057390213013, "learning_rate": 1.4523026525765532e-05, "loss": 0.5562, "step": 3289 }, { "epoch": 0.9123682750970604, "grad_norm": 0.19896887242794037, "learning_rate": 1.4517577481584399e-05, "loss": 0.5502, "step": 3290 }, { "epoch": 0.9126455906821963, "grad_norm": 0.18490025401115417, "learning_rate": 1.4512128043720447e-05, "loss": 0.5426, "step": 3291 }, { "epoch": 0.9129229062673322, "grad_norm": 0.19716937839984894, "learning_rate": 1.4506678213236998e-05, "loss": 0.5433, "step": 3292 }, { "epoch": 0.9132002218524681, "grad_norm": 0.19106687605381012, "learning_rate": 1.4501227991197472e-05, "loss": 0.5437, "step": 3293 }, { "epoch": 0.913477537437604, "grad_norm": 0.18353629112243652, "learning_rate": 1.4495777378665337e-05, "loss": 0.5374, "step": 3294 }, { "epoch": 0.9137548530227398, "grad_norm": 0.19178996980190277, "learning_rate": 1.4490326376704161e-05, "loss": 0.5471, "step": 3295 }, { "epoch": 0.9140321686078757, "grad_norm": 0.18615961074829102, "learning_rate": 1.4484874986377573e-05, "loss": 0.5503, "step": 3296 }, { "epoch": 0.9143094841930116, "grad_norm": 0.19387958943843842, "learning_rate": 1.447942320874929e-05, "loss": 0.5174, "step": 3297 }, { "epoch": 0.9145867997781475, "grad_norm": 0.18303687870502472, "learning_rate": 1.4473971044883095e-05, "loss": 0.5335, "step": 3298 }, { "epoch": 0.9148641153632834, "grad_norm": 0.9202612638473511, "learning_rate": 1.4468518495842848e-05, "loss": 0.547, "step": 3299 }, { "epoch": 0.9151414309484193, "grad_norm": 0.1895207315683365, "learning_rate": 1.446306556269249e-05, "loss": 0.5561, "step": 3300 }, { "epoch": 0.9154187465335551, "grad_norm": 0.18821243941783905, "learning_rate": 1.4457612246496027e-05, "loss": 0.5318, "step": 3301 }, { "epoch": 0.915696062118691, "grad_norm": 0.1886141002178192, "learning_rate": 1.4452158548317551e-05, "loss": 0.5517, "step": 3302 }, { "epoch": 0.9159733777038269, "grad_norm": 0.1940373033285141, "learning_rate": 1.444670446922122e-05, "loss": 0.5617, "step": 3303 }, { "epoch": 0.9162506932889628, "grad_norm": 0.18342509865760803, "learning_rate": 1.444125001027127e-05, "loss": 0.5438, "step": 3304 }, { "epoch": 0.9165280088740987, "grad_norm": 0.19425570964813232, "learning_rate": 1.4435795172532014e-05, "loss": 0.5413, "step": 3305 }, { "epoch": 0.9168053244592346, "grad_norm": 0.19505858421325684, "learning_rate": 1.4430339957067826e-05, "loss": 0.5666, "step": 3306 }, { "epoch": 0.9170826400443705, "grad_norm": 0.19822010397911072, "learning_rate": 1.4424884364943172e-05, "loss": 0.521, "step": 3307 }, { "epoch": 0.9173599556295063, "grad_norm": 0.1916007101535797, "learning_rate": 1.4419428397222582e-05, "loss": 0.5649, "step": 3308 }, { "epoch": 0.9176372712146422, "grad_norm": 0.19743779301643372, "learning_rate": 1.441397205497065e-05, "loss": 0.5337, "step": 3309 }, { "epoch": 0.9179145867997781, "grad_norm": 0.19250613451004028, "learning_rate": 1.4408515339252068e-05, "loss": 0.5477, "step": 3310 }, { "epoch": 0.918191902384914, "grad_norm": 0.18895046412944794, "learning_rate": 1.4403058251131574e-05, "loss": 0.5782, "step": 3311 }, { "epoch": 0.9184692179700499, "grad_norm": 0.1895778477191925, "learning_rate": 1.4397600791673999e-05, "loss": 0.5344, "step": 3312 }, { "epoch": 0.9187465335551858, "grad_norm": 0.1935378462076187, "learning_rate": 1.4392142961944228e-05, "loss": 0.5568, "step": 3313 }, { "epoch": 0.9190238491403216, "grad_norm": 0.18773426115512848, "learning_rate": 1.4386684763007235e-05, "loss": 0.546, "step": 3314 }, { "epoch": 0.9193011647254575, "grad_norm": 0.18107493221759796, "learning_rate": 1.438122619592806e-05, "loss": 0.5007, "step": 3315 }, { "epoch": 0.9195784803105934, "grad_norm": 0.19908292591571808, "learning_rate": 1.4375767261771814e-05, "loss": 0.545, "step": 3316 }, { "epoch": 0.9198557958957293, "grad_norm": 0.19010509550571442, "learning_rate": 1.4370307961603673e-05, "loss": 0.5593, "step": 3317 }, { "epoch": 0.9201331114808652, "grad_norm": 0.19176799058914185, "learning_rate": 1.4364848296488897e-05, "loss": 0.5676, "step": 3318 }, { "epoch": 0.9204104270660011, "grad_norm": 0.1841881275177002, "learning_rate": 1.4359388267492812e-05, "loss": 0.5361, "step": 3319 }, { "epoch": 0.920687742651137, "grad_norm": 0.19731801748275757, "learning_rate": 1.4353927875680808e-05, "loss": 0.5213, "step": 3320 }, { "epoch": 0.9209650582362728, "grad_norm": 0.18500731885433197, "learning_rate": 1.4348467122118364e-05, "loss": 0.5417, "step": 3321 }, { "epoch": 0.9212423738214087, "grad_norm": 0.2047429382801056, "learning_rate": 1.4343006007871004e-05, "loss": 0.5333, "step": 3322 }, { "epoch": 0.9215196894065446, "grad_norm": 0.2538173198699951, "learning_rate": 1.4337544534004346e-05, "loss": 0.5529, "step": 3323 }, { "epoch": 0.9217970049916805, "grad_norm": 0.1965561956167221, "learning_rate": 1.4332082701584063e-05, "loss": 0.5432, "step": 3324 }, { "epoch": 0.9220743205768164, "grad_norm": 0.1974562108516693, "learning_rate": 1.4326620511675906e-05, "loss": 0.5159, "step": 3325 }, { "epoch": 0.9223516361619523, "grad_norm": 0.19231897592544556, "learning_rate": 1.4321157965345688e-05, "loss": 0.5674, "step": 3326 }, { "epoch": 0.9226289517470881, "grad_norm": 0.19817808270454407, "learning_rate": 1.4315695063659304e-05, "loss": 0.5496, "step": 3327 }, { "epoch": 0.922906267332224, "grad_norm": 0.1902833878993988, "learning_rate": 1.4310231807682706e-05, "loss": 0.5342, "step": 3328 }, { "epoch": 0.9231835829173599, "grad_norm": 0.19699136912822723, "learning_rate": 1.4304768198481923e-05, "loss": 0.5417, "step": 3329 }, { "epoch": 0.9234608985024958, "grad_norm": 0.19054579734802246, "learning_rate": 1.4299304237123043e-05, "loss": 0.5556, "step": 3330 }, { "epoch": 0.9237382140876317, "grad_norm": 0.191939577460289, "learning_rate": 1.4293839924672242e-05, "loss": 0.5304, "step": 3331 }, { "epoch": 0.9240155296727676, "grad_norm": 0.22973594069480896, "learning_rate": 1.4288375262195739e-05, "loss": 0.5337, "step": 3332 }, { "epoch": 0.9242928452579035, "grad_norm": 0.1892794668674469, "learning_rate": 1.4282910250759843e-05, "loss": 0.5527, "step": 3333 }, { "epoch": 0.9245701608430393, "grad_norm": 0.19066397845745087, "learning_rate": 1.427744489143092e-05, "loss": 0.5446, "step": 3334 }, { "epoch": 0.9248474764281752, "grad_norm": 0.18730609118938446, "learning_rate": 1.4271979185275402e-05, "loss": 0.5425, "step": 3335 }, { "epoch": 0.9251247920133111, "grad_norm": 0.18019746243953705, "learning_rate": 1.4266513133359802e-05, "loss": 0.5263, "step": 3336 }, { "epoch": 0.925402107598447, "grad_norm": 0.19604218006134033, "learning_rate": 1.4261046736750686e-05, "loss": 0.5552, "step": 3337 }, { "epoch": 0.9256794231835829, "grad_norm": 0.1902369260787964, "learning_rate": 1.4255579996514693e-05, "loss": 0.5531, "step": 3338 }, { "epoch": 0.9259567387687188, "grad_norm": 0.19522936642169952, "learning_rate": 1.4250112913718525e-05, "loss": 0.545, "step": 3339 }, { "epoch": 0.9262340543538546, "grad_norm": 0.21855325996875763, "learning_rate": 1.4244645489428968e-05, "loss": 0.5133, "step": 3340 }, { "epoch": 0.9265113699389905, "grad_norm": 0.20022931694984436, "learning_rate": 1.4239177724712843e-05, "loss": 0.5392, "step": 3341 }, { "epoch": 0.9267886855241264, "grad_norm": 0.19183357059955597, "learning_rate": 1.423370962063707e-05, "loss": 0.5338, "step": 3342 }, { "epoch": 0.9270660011092623, "grad_norm": 0.18884657323360443, "learning_rate": 1.4228241178268617e-05, "loss": 0.5143, "step": 3343 }, { "epoch": 0.9273433166943982, "grad_norm": 0.19082637131214142, "learning_rate": 1.4222772398674522e-05, "loss": 0.5281, "step": 3344 }, { "epoch": 0.9276206322795341, "grad_norm": 0.20209652185440063, "learning_rate": 1.4217303282921888e-05, "loss": 0.526, "step": 3345 }, { "epoch": 0.92789794786467, "grad_norm": 0.18856458365917206, "learning_rate": 1.4211833832077881e-05, "loss": 0.52, "step": 3346 }, { "epoch": 0.9281752634498058, "grad_norm": 0.1941954344511032, "learning_rate": 1.4206364047209742e-05, "loss": 0.5401, "step": 3347 }, { "epoch": 0.9284525790349417, "grad_norm": 0.18749526143074036, "learning_rate": 1.4200893929384767e-05, "loss": 0.5449, "step": 3348 }, { "epoch": 0.9287298946200776, "grad_norm": 0.18534240126609802, "learning_rate": 1.4195423479670319e-05, "loss": 0.5039, "step": 3349 }, { "epoch": 0.9290072102052135, "grad_norm": 0.18905483186244965, "learning_rate": 1.4189952699133837e-05, "loss": 0.5632, "step": 3350 }, { "epoch": 0.9292845257903494, "grad_norm": 0.20442554354667664, "learning_rate": 1.4184481588842805e-05, "loss": 0.5339, "step": 3351 }, { "epoch": 0.9295618413754853, "grad_norm": 0.1810925155878067, "learning_rate": 1.4179010149864785e-05, "loss": 0.512, "step": 3352 }, { "epoch": 0.9298391569606211, "grad_norm": 0.2010018676519394, "learning_rate": 1.4173538383267404e-05, "loss": 0.5514, "step": 3353 }, { "epoch": 0.930116472545757, "grad_norm": 0.18728989362716675, "learning_rate": 1.4168066290118342e-05, "loss": 0.5161, "step": 3354 }, { "epoch": 0.9303937881308929, "grad_norm": 0.18747897446155548, "learning_rate": 1.4162593871485352e-05, "loss": 0.5549, "step": 3355 }, { "epoch": 0.9306711037160288, "grad_norm": 0.20961935818195343, "learning_rate": 1.415712112843625e-05, "loss": 0.5422, "step": 3356 }, { "epoch": 0.9309484193011647, "grad_norm": 0.1834210753440857, "learning_rate": 1.415164806203891e-05, "loss": 0.5074, "step": 3357 }, { "epoch": 0.9312257348863006, "grad_norm": 0.1954893171787262, "learning_rate": 1.4146174673361273e-05, "loss": 0.5722, "step": 3358 }, { "epoch": 0.9315030504714364, "grad_norm": 0.1870308816432953, "learning_rate": 1.4140700963471346e-05, "loss": 0.5381, "step": 3359 }, { "epoch": 0.9317803660565723, "grad_norm": 0.19276247918605804, "learning_rate": 1.4135226933437185e-05, "loss": 0.5517, "step": 3360 }, { "epoch": 0.9320576816417082, "grad_norm": 0.18969060480594635, "learning_rate": 1.412975258432693e-05, "loss": 0.5347, "step": 3361 }, { "epoch": 0.9323349972268441, "grad_norm": 0.19431856274604797, "learning_rate": 1.4124277917208765e-05, "loss": 0.5077, "step": 3362 }, { "epoch": 0.93261231281198, "grad_norm": 0.1918652504682541, "learning_rate": 1.4118802933150943e-05, "loss": 0.526, "step": 3363 }, { "epoch": 0.9328896283971159, "grad_norm": 0.19903786480426788, "learning_rate": 1.4113327633221782e-05, "loss": 0.5312, "step": 3364 }, { "epoch": 0.9331669439822518, "grad_norm": 0.21233876049518585, "learning_rate": 1.4107852018489653e-05, "loss": 0.5256, "step": 3365 }, { "epoch": 0.9334442595673876, "grad_norm": 0.1878969967365265, "learning_rate": 1.4102376090022997e-05, "loss": 0.5439, "step": 3366 }, { "epoch": 0.9337215751525235, "grad_norm": 0.194586381316185, "learning_rate": 1.409689984889031e-05, "loss": 0.5392, "step": 3367 }, { "epoch": 0.9339988907376594, "grad_norm": 0.1912042200565338, "learning_rate": 1.4091423296160152e-05, "loss": 0.5269, "step": 3368 }, { "epoch": 0.9342762063227953, "grad_norm": 0.18847908079624176, "learning_rate": 1.4085946432901154e-05, "loss": 0.5175, "step": 3369 }, { "epoch": 0.9345535219079312, "grad_norm": 0.19912898540496826, "learning_rate": 1.4080469260181977e-05, "loss": 0.5336, "step": 3370 }, { "epoch": 0.9348308374930671, "grad_norm": 0.1928989291191101, "learning_rate": 1.4074991779071378e-05, "loss": 0.5847, "step": 3371 }, { "epoch": 0.9351081530782029, "grad_norm": 0.20281600952148438, "learning_rate": 1.4069513990638156e-05, "loss": 0.5772, "step": 3372 }, { "epoch": 0.9353854686633388, "grad_norm": 0.21058966219425201, "learning_rate": 1.4064035895951169e-05, "loss": 0.5201, "step": 3373 }, { "epoch": 0.9356627842484747, "grad_norm": 0.190606027841568, "learning_rate": 1.4058557496079342e-05, "loss": 0.5226, "step": 3374 }, { "epoch": 0.9359400998336106, "grad_norm": 0.1862584948539734, "learning_rate": 1.4053078792091654e-05, "loss": 0.5275, "step": 3375 }, { "epoch": 0.9362174154187465, "grad_norm": 0.1864033192396164, "learning_rate": 1.4047599785057144e-05, "loss": 0.515, "step": 3376 }, { "epoch": 0.9364947310038824, "grad_norm": 0.21961943805217743, "learning_rate": 1.4042120476044912e-05, "loss": 0.5344, "step": 3377 }, { "epoch": 0.9367720465890182, "grad_norm": 0.18101632595062256, "learning_rate": 1.4036640866124123e-05, "loss": 0.547, "step": 3378 }, { "epoch": 0.9370493621741541, "grad_norm": 0.21359553933143616, "learning_rate": 1.4031160956363982e-05, "loss": 0.5392, "step": 3379 }, { "epoch": 0.93732667775929, "grad_norm": 0.1979496330022812, "learning_rate": 1.4025680747833775e-05, "loss": 0.5188, "step": 3380 }, { "epoch": 0.937603993344426, "grad_norm": 0.18914289772510529, "learning_rate": 1.402020024160283e-05, "loss": 0.5291, "step": 3381 }, { "epoch": 0.9378813089295619, "grad_norm": 0.18374024331569672, "learning_rate": 1.4014719438740543e-05, "loss": 0.5368, "step": 3382 }, { "epoch": 0.9381586245146978, "grad_norm": 0.201919287443161, "learning_rate": 1.4009238340316358e-05, "loss": 0.5429, "step": 3383 }, { "epoch": 0.9384359400998337, "grad_norm": 0.19644081592559814, "learning_rate": 1.400375694739979e-05, "loss": 0.5151, "step": 3384 }, { "epoch": 0.9387132556849695, "grad_norm": 0.18970987200737, "learning_rate": 1.39982752610604e-05, "loss": 0.5852, "step": 3385 }, { "epoch": 0.9389905712701054, "grad_norm": 0.19986121356487274, "learning_rate": 1.3992793282367808e-05, "loss": 0.5209, "step": 3386 }, { "epoch": 0.9392678868552413, "grad_norm": 0.19541729986667633, "learning_rate": 1.3987311012391698e-05, "loss": 0.5315, "step": 3387 }, { "epoch": 0.9395452024403772, "grad_norm": 0.1904657930135727, "learning_rate": 1.3981828452201804e-05, "loss": 0.5483, "step": 3388 }, { "epoch": 0.9398225180255131, "grad_norm": 0.19447685778141022, "learning_rate": 1.3976345602867916e-05, "loss": 0.5446, "step": 3389 }, { "epoch": 0.940099833610649, "grad_norm": 0.1892615556716919, "learning_rate": 1.3970862465459891e-05, "loss": 0.5366, "step": 3390 }, { "epoch": 0.9403771491957849, "grad_norm": 0.1949482262134552, "learning_rate": 1.3965379041047624e-05, "loss": 0.5577, "step": 3391 }, { "epoch": 0.9406544647809207, "grad_norm": 0.19417253136634827, "learning_rate": 1.3959895330701083e-05, "loss": 0.5431, "step": 3392 }, { "epoch": 0.9409317803660566, "grad_norm": 0.19106963276863098, "learning_rate": 1.3954411335490284e-05, "loss": 0.5308, "step": 3393 }, { "epoch": 0.9412090959511925, "grad_norm": 0.19582101702690125, "learning_rate": 1.39489270564853e-05, "loss": 0.5221, "step": 3394 }, { "epoch": 0.9414864115363284, "grad_norm": 0.1944214105606079, "learning_rate": 1.3943442494756259e-05, "loss": 0.5428, "step": 3395 }, { "epoch": 0.9417637271214643, "grad_norm": 0.19008556008338928, "learning_rate": 1.3937957651373342e-05, "loss": 0.5458, "step": 3396 }, { "epoch": 0.9420410427066002, "grad_norm": 0.186477929353714, "learning_rate": 1.3932472527406792e-05, "loss": 0.5315, "step": 3397 }, { "epoch": 0.942318358291736, "grad_norm": 0.20386064052581787, "learning_rate": 1.3926987123926897e-05, "loss": 0.5309, "step": 3398 }, { "epoch": 0.9425956738768719, "grad_norm": 0.18348954617977142, "learning_rate": 1.3921501442004011e-05, "loss": 0.5333, "step": 3399 }, { "epoch": 0.9428729894620078, "grad_norm": 0.19923634827136993, "learning_rate": 1.3916015482708528e-05, "loss": 0.5301, "step": 3400 }, { "epoch": 0.9431503050471437, "grad_norm": 0.19134169816970825, "learning_rate": 1.3910529247110906e-05, "loss": 0.519, "step": 3401 }, { "epoch": 0.9434276206322796, "grad_norm": 0.18783889710903168, "learning_rate": 1.390504273628166e-05, "loss": 0.5207, "step": 3402 }, { "epoch": 0.9437049362174155, "grad_norm": 0.18133123219013214, "learning_rate": 1.3899555951291348e-05, "loss": 0.5244, "step": 3403 }, { "epoch": 0.9439822518025514, "grad_norm": 0.17967382073402405, "learning_rate": 1.3894068893210594e-05, "loss": 0.558, "step": 3404 }, { "epoch": 0.9442595673876872, "grad_norm": 0.1942291408777237, "learning_rate": 1.3888581563110059e-05, "loss": 0.5307, "step": 3405 }, { "epoch": 0.9445368829728231, "grad_norm": 0.22371730208396912, "learning_rate": 1.3883093962060472e-05, "loss": 0.5386, "step": 3406 }, { "epoch": 0.944814198557959, "grad_norm": 0.18771930038928986, "learning_rate": 1.387760609113261e-05, "loss": 0.5267, "step": 3407 }, { "epoch": 0.9450915141430949, "grad_norm": 0.19484716653823853, "learning_rate": 1.3872117951397298e-05, "loss": 0.5578, "step": 3408 }, { "epoch": 0.9453688297282308, "grad_norm": 0.190118208527565, "learning_rate": 1.3866629543925424e-05, "loss": 0.4886, "step": 3409 }, { "epoch": 0.9456461453133667, "grad_norm": 0.18805035948753357, "learning_rate": 1.3861140869787914e-05, "loss": 0.5393, "step": 3410 }, { "epoch": 0.9459234608985025, "grad_norm": 0.1881994605064392, "learning_rate": 1.385565193005576e-05, "loss": 0.5368, "step": 3411 }, { "epoch": 0.9462007764836384, "grad_norm": 0.18705572187900543, "learning_rate": 1.3850162725799997e-05, "loss": 0.5706, "step": 3412 }, { "epoch": 0.9464780920687743, "grad_norm": 0.23857946693897247, "learning_rate": 1.3844673258091714e-05, "loss": 0.5151, "step": 3413 }, { "epoch": 0.9467554076539102, "grad_norm": 0.190442755818367, "learning_rate": 1.383918352800205e-05, "loss": 0.5287, "step": 3414 }, { "epoch": 0.9470327232390461, "grad_norm": 0.19661828875541687, "learning_rate": 1.38336935366022e-05, "loss": 0.5474, "step": 3415 }, { "epoch": 0.947310038824182, "grad_norm": 0.1797836571931839, "learning_rate": 1.3828203284963409e-05, "loss": 0.5416, "step": 3416 }, { "epoch": 0.9475873544093179, "grad_norm": 0.23815502226352692, "learning_rate": 1.382271277415696e-05, "loss": 0.5501, "step": 3417 }, { "epoch": 0.9478646699944537, "grad_norm": 0.19180168211460114, "learning_rate": 1.381722200525421e-05, "loss": 0.5254, "step": 3418 }, { "epoch": 0.9481419855795896, "grad_norm": 0.2042774260044098, "learning_rate": 1.3811730979326545e-05, "loss": 0.5437, "step": 3419 }, { "epoch": 0.9484193011647255, "grad_norm": 0.1914202868938446, "learning_rate": 1.3806239697445414e-05, "loss": 0.5602, "step": 3420 }, { "epoch": 0.9486966167498614, "grad_norm": 0.2009340077638626, "learning_rate": 1.3800748160682309e-05, "loss": 0.5497, "step": 3421 }, { "epoch": 0.9489739323349973, "grad_norm": 0.19518032670021057, "learning_rate": 1.3795256370108776e-05, "loss": 0.5149, "step": 3422 }, { "epoch": 0.9492512479201332, "grad_norm": 0.19419553875923157, "learning_rate": 1.3789764326796407e-05, "loss": 0.5407, "step": 3423 }, { "epoch": 0.949528563505269, "grad_norm": 0.19669625163078308, "learning_rate": 1.3784272031816844e-05, "loss": 0.537, "step": 3424 }, { "epoch": 0.9498058790904049, "grad_norm": 0.1969965547323227, "learning_rate": 1.3778779486241786e-05, "loss": 0.5324, "step": 3425 }, { "epoch": 0.9500831946755408, "grad_norm": 0.1851070374250412, "learning_rate": 1.3773286691142966e-05, "loss": 0.5533, "step": 3426 }, { "epoch": 0.9503605102606767, "grad_norm": 0.18586984276771545, "learning_rate": 1.3767793647592175e-05, "loss": 0.5347, "step": 3427 }, { "epoch": 0.9506378258458126, "grad_norm": 0.1796058714389801, "learning_rate": 1.3762300356661261e-05, "loss": 0.5223, "step": 3428 }, { "epoch": 0.9509151414309485, "grad_norm": 0.1968797892332077, "learning_rate": 1.3756806819422097e-05, "loss": 0.5296, "step": 3429 }, { "epoch": 0.9511924570160843, "grad_norm": 0.19946354627609253, "learning_rate": 1.3751313036946627e-05, "loss": 0.555, "step": 3430 }, { "epoch": 0.9514697726012202, "grad_norm": 0.19412177801132202, "learning_rate": 1.3745819010306832e-05, "loss": 0.5367, "step": 3431 }, { "epoch": 0.9517470881863561, "grad_norm": 0.1967850923538208, "learning_rate": 1.374032474057474e-05, "loss": 0.5355, "step": 3432 }, { "epoch": 0.952024403771492, "grad_norm": 0.26605215668678284, "learning_rate": 1.3734830228822428e-05, "loss": 0.5161, "step": 3433 }, { "epoch": 0.9523017193566279, "grad_norm": 0.21650773286819458, "learning_rate": 1.372933547612202e-05, "loss": 0.5398, "step": 3434 }, { "epoch": 0.9525790349417638, "grad_norm": 0.20562243461608887, "learning_rate": 1.3723840483545697e-05, "loss": 0.5509, "step": 3435 }, { "epoch": 0.9528563505268997, "grad_norm": 0.19407188892364502, "learning_rate": 1.3718345252165663e-05, "loss": 0.5373, "step": 3436 }, { "epoch": 0.9531336661120355, "grad_norm": 0.19095157086849213, "learning_rate": 1.3712849783054197e-05, "loss": 0.54, "step": 3437 }, { "epoch": 0.9534109816971714, "grad_norm": 0.1831715852022171, "learning_rate": 1.3707354077283599e-05, "loss": 0.5451, "step": 3438 }, { "epoch": 0.9536882972823073, "grad_norm": 0.19795599579811096, "learning_rate": 1.3701858135926238e-05, "loss": 0.5329, "step": 3439 }, { "epoch": 0.9539656128674432, "grad_norm": 0.19685760140419006, "learning_rate": 1.3696361960054506e-05, "loss": 0.5525, "step": 3440 }, { "epoch": 0.9542429284525791, "grad_norm": 0.1895620971918106, "learning_rate": 1.3690865550740864e-05, "loss": 0.539, "step": 3441 }, { "epoch": 0.954520244037715, "grad_norm": 0.1906086504459381, "learning_rate": 1.3685368909057799e-05, "loss": 0.5449, "step": 3442 }, { "epoch": 0.9547975596228508, "grad_norm": 0.19026583433151245, "learning_rate": 1.3679872036077853e-05, "loss": 0.5303, "step": 3443 }, { "epoch": 0.9550748752079867, "grad_norm": 0.1858745813369751, "learning_rate": 1.3674374932873615e-05, "loss": 0.5364, "step": 3444 }, { "epoch": 0.9553521907931226, "grad_norm": 0.19552922248840332, "learning_rate": 1.3668877600517712e-05, "loss": 0.5367, "step": 3445 }, { "epoch": 0.9556295063782585, "grad_norm": 0.1855076402425766, "learning_rate": 1.3663380040082821e-05, "loss": 0.532, "step": 3446 }, { "epoch": 0.9559068219633944, "grad_norm": 0.19864481687545776, "learning_rate": 1.365788225264166e-05, "loss": 0.5252, "step": 3447 }, { "epoch": 0.9561841375485303, "grad_norm": 0.18544144928455353, "learning_rate": 1.3652384239266993e-05, "loss": 0.5359, "step": 3448 }, { "epoch": 0.9564614531336662, "grad_norm": 0.18177370727062225, "learning_rate": 1.364688600103163e-05, "loss": 0.5516, "step": 3449 }, { "epoch": 0.956738768718802, "grad_norm": 0.2049761265516281, "learning_rate": 1.3641387539008424e-05, "loss": 0.5446, "step": 3450 }, { "epoch": 0.9570160843039379, "grad_norm": 0.18312260508537292, "learning_rate": 1.3635888854270268e-05, "loss": 0.5292, "step": 3451 }, { "epoch": 0.9572933998890738, "grad_norm": 0.18809424340724945, "learning_rate": 1.36303899478901e-05, "loss": 0.5296, "step": 3452 }, { "epoch": 0.9575707154742097, "grad_norm": 0.183831587433815, "learning_rate": 1.3624890820940902e-05, "loss": 0.5213, "step": 3453 }, { "epoch": 0.9578480310593456, "grad_norm": 0.18347223103046417, "learning_rate": 1.3619391474495708e-05, "loss": 0.5516, "step": 3454 }, { "epoch": 0.9581253466444815, "grad_norm": 0.1897597759962082, "learning_rate": 1.3613891909627575e-05, "loss": 0.5539, "step": 3455 }, { "epoch": 0.9584026622296173, "grad_norm": 0.19610610604286194, "learning_rate": 1.360839212740962e-05, "loss": 0.5581, "step": 3456 }, { "epoch": 0.9586799778147532, "grad_norm": 0.20589366555213928, "learning_rate": 1.3602892128914992e-05, "loss": 0.5266, "step": 3457 }, { "epoch": 0.9589572933998891, "grad_norm": 0.18203216791152954, "learning_rate": 1.3597391915216896e-05, "loss": 0.5222, "step": 3458 }, { "epoch": 0.959234608985025, "grad_norm": 0.19360937178134918, "learning_rate": 1.3591891487388553e-05, "loss": 0.5271, "step": 3459 }, { "epoch": 0.9595119245701609, "grad_norm": 0.2998809814453125, "learning_rate": 1.3586390846503259e-05, "loss": 0.5281, "step": 3460 }, { "epoch": 0.9597892401552968, "grad_norm": 0.18753725290298462, "learning_rate": 1.3580889993634322e-05, "loss": 0.5553, "step": 3461 }, { "epoch": 0.9600665557404326, "grad_norm": 0.19862516224384308, "learning_rate": 1.3575388929855112e-05, "loss": 0.4975, "step": 3462 }, { "epoch": 0.9603438713255685, "grad_norm": 0.18844319880008698, "learning_rate": 1.3569887656239033e-05, "loss": 0.4977, "step": 3463 }, { "epoch": 0.9606211869107044, "grad_norm": 0.2365204095840454, "learning_rate": 1.3564386173859523e-05, "loss": 0.5243, "step": 3464 }, { "epoch": 0.9608985024958403, "grad_norm": 0.19346970319747925, "learning_rate": 1.3558884483790072e-05, "loss": 0.5504, "step": 3465 }, { "epoch": 0.9611758180809762, "grad_norm": 0.19397403299808502, "learning_rate": 1.3553382587104201e-05, "loss": 0.5448, "step": 3466 }, { "epoch": 0.9614531336661121, "grad_norm": 0.19531095027923584, "learning_rate": 1.3547880484875477e-05, "loss": 0.5614, "step": 3467 }, { "epoch": 0.961730449251248, "grad_norm": 0.20053645968437195, "learning_rate": 1.354237817817751e-05, "loss": 0.5412, "step": 3468 }, { "epoch": 0.9620077648363838, "grad_norm": 0.19779855012893677, "learning_rate": 1.3536875668083943e-05, "loss": 0.5675, "step": 3469 }, { "epoch": 0.9622850804215197, "grad_norm": 0.19257789850234985, "learning_rate": 1.3531372955668462e-05, "loss": 0.5128, "step": 3470 }, { "epoch": 0.9625623960066556, "grad_norm": 0.19003498554229736, "learning_rate": 1.352587004200479e-05, "loss": 0.545, "step": 3471 }, { "epoch": 0.9628397115917915, "grad_norm": 0.1935829222202301, "learning_rate": 1.3520366928166695e-05, "loss": 0.5568, "step": 3472 }, { "epoch": 0.9631170271769274, "grad_norm": 0.19856667518615723, "learning_rate": 1.3514863615227979e-05, "loss": 0.5247, "step": 3473 }, { "epoch": 0.9633943427620633, "grad_norm": 0.18320922553539276, "learning_rate": 1.3509360104262478e-05, "loss": 0.5358, "step": 3474 }, { "epoch": 0.9636716583471991, "grad_norm": 0.20552265644073486, "learning_rate": 1.3503856396344086e-05, "loss": 0.5631, "step": 3475 }, { "epoch": 0.963948973932335, "grad_norm": 0.19140848517417908, "learning_rate": 1.3498352492546706e-05, "loss": 0.5241, "step": 3476 }, { "epoch": 0.9642262895174709, "grad_norm": 0.18474581837654114, "learning_rate": 1.3492848393944312e-05, "loss": 0.4961, "step": 3477 }, { "epoch": 0.9645036051026068, "grad_norm": 0.1950991153717041, "learning_rate": 1.3487344101610885e-05, "loss": 0.5223, "step": 3478 }, { "epoch": 0.9647809206877427, "grad_norm": 0.19947074353694916, "learning_rate": 1.348183961662047e-05, "loss": 0.5232, "step": 3479 }, { "epoch": 0.9650582362728786, "grad_norm": 0.19454912841320038, "learning_rate": 1.3476334940047127e-05, "loss": 0.5611, "step": 3480 }, { "epoch": 0.9653355518580145, "grad_norm": 0.19796130061149597, "learning_rate": 1.3470830072964973e-05, "loss": 0.5459, "step": 3481 }, { "epoch": 0.9656128674431503, "grad_norm": 0.1934625804424286, "learning_rate": 1.346532501644815e-05, "loss": 0.5142, "step": 3482 }, { "epoch": 0.9658901830282862, "grad_norm": 0.18265816569328308, "learning_rate": 1.345981977157084e-05, "loss": 0.5416, "step": 3483 }, { "epoch": 0.9661674986134221, "grad_norm": 0.19438108801841736, "learning_rate": 1.3454314339407262e-05, "loss": 0.5264, "step": 3484 }, { "epoch": 0.966444814198558, "grad_norm": 0.20060043036937714, "learning_rate": 1.3448808721031673e-05, "loss": 0.5472, "step": 3485 }, { "epoch": 0.9667221297836939, "grad_norm": 0.20769962668418884, "learning_rate": 1.3443302917518361e-05, "loss": 0.5334, "step": 3486 }, { "epoch": 0.9669994453688298, "grad_norm": 0.19313625991344452, "learning_rate": 1.3437796929941661e-05, "loss": 0.5565, "step": 3487 }, { "epoch": 0.9672767609539656, "grad_norm": 0.20908023416996002, "learning_rate": 1.3432290759375935e-05, "loss": 0.5485, "step": 3488 }, { "epoch": 0.9675540765391015, "grad_norm": 0.1994648575782776, "learning_rate": 1.342678440689558e-05, "loss": 0.5566, "step": 3489 }, { "epoch": 0.9678313921242374, "grad_norm": 0.18501751124858856, "learning_rate": 1.342127787357503e-05, "loss": 0.5345, "step": 3490 }, { "epoch": 0.9681087077093733, "grad_norm": 0.17410939931869507, "learning_rate": 1.341577116048876e-05, "loss": 0.5523, "step": 3491 }, { "epoch": 0.9683860232945092, "grad_norm": 0.1948653906583786, "learning_rate": 1.3410264268711276e-05, "loss": 0.5419, "step": 3492 }, { "epoch": 0.9686633388796451, "grad_norm": 0.1910163313150406, "learning_rate": 1.3404757199317108e-05, "loss": 0.5215, "step": 3493 }, { "epoch": 0.968940654464781, "grad_norm": 0.21614839136600494, "learning_rate": 1.3399249953380849e-05, "loss": 0.5235, "step": 3494 }, { "epoch": 0.9692179700499168, "grad_norm": 0.19417038559913635, "learning_rate": 1.3393742531977094e-05, "loss": 0.5504, "step": 3495 }, { "epoch": 0.9694952856350527, "grad_norm": 0.18813247978687286, "learning_rate": 1.3388234936180493e-05, "loss": 0.5334, "step": 3496 }, { "epoch": 0.9697726012201886, "grad_norm": 0.18560314178466797, "learning_rate": 1.3382727167065723e-05, "loss": 0.5295, "step": 3497 }, { "epoch": 0.9700499168053245, "grad_norm": 0.19894826412200928, "learning_rate": 1.3377219225707495e-05, "loss": 0.5235, "step": 3498 }, { "epoch": 0.9703272323904604, "grad_norm": 0.18726693093776703, "learning_rate": 1.3371711113180552e-05, "loss": 0.5032, "step": 3499 }, { "epoch": 0.9706045479755963, "grad_norm": 0.20481140911579132, "learning_rate": 1.3366202830559679e-05, "loss": 0.5465, "step": 3500 }, { "epoch": 0.9708818635607321, "grad_norm": 0.18740220367908478, "learning_rate": 1.3360694378919683e-05, "loss": 0.5052, "step": 3501 }, { "epoch": 0.971159179145868, "grad_norm": 0.2005304992198944, "learning_rate": 1.3355185759335409e-05, "loss": 0.5565, "step": 3502 }, { "epoch": 0.9714364947310039, "grad_norm": 0.1884390115737915, "learning_rate": 1.3349676972881736e-05, "loss": 0.5513, "step": 3503 }, { "epoch": 0.9717138103161398, "grad_norm": 0.19259166717529297, "learning_rate": 1.3344168020633574e-05, "loss": 0.5136, "step": 3504 }, { "epoch": 0.9719911259012757, "grad_norm": 0.18772351741790771, "learning_rate": 1.3338658903665868e-05, "loss": 0.5418, "step": 3505 }, { "epoch": 0.9722684414864116, "grad_norm": 0.19562283158302307, "learning_rate": 1.3333149623053584e-05, "loss": 0.5291, "step": 3506 }, { "epoch": 0.9725457570715474, "grad_norm": 0.18652015924453735, "learning_rate": 1.332764017987174e-05, "loss": 0.5406, "step": 3507 }, { "epoch": 0.9728230726566833, "grad_norm": 0.19781257212162018, "learning_rate": 1.3322130575195366e-05, "loss": 0.5253, "step": 3508 }, { "epoch": 0.9731003882418192, "grad_norm": 0.20048947632312775, "learning_rate": 1.3316620810099536e-05, "loss": 0.5339, "step": 3509 }, { "epoch": 0.9733777038269551, "grad_norm": 0.1913914531469345, "learning_rate": 1.331111088565935e-05, "loss": 0.5399, "step": 3510 }, { "epoch": 0.973655019412091, "grad_norm": 0.18670813739299774, "learning_rate": 1.3305600802949941e-05, "loss": 0.4995, "step": 3511 }, { "epoch": 0.9739323349972269, "grad_norm": 0.1913759857416153, "learning_rate": 1.3300090563046472e-05, "loss": 0.5324, "step": 3512 }, { "epoch": 0.9742096505823628, "grad_norm": 0.18199758231639862, "learning_rate": 1.3294580167024135e-05, "loss": 0.535, "step": 3513 }, { "epoch": 0.9744869661674986, "grad_norm": 0.18627969920635223, "learning_rate": 1.328906961595815e-05, "loss": 0.5282, "step": 3514 }, { "epoch": 0.9747642817526345, "grad_norm": 0.1850520372390747, "learning_rate": 1.3283558910923785e-05, "loss": 0.5126, "step": 3515 }, { "epoch": 0.9750415973377704, "grad_norm": 0.19907443225383759, "learning_rate": 1.327804805299631e-05, "loss": 0.5228, "step": 3516 }, { "epoch": 0.9753189129229063, "grad_norm": 0.19155220687389374, "learning_rate": 1.3272537043251054e-05, "loss": 0.5289, "step": 3517 }, { "epoch": 0.9755962285080422, "grad_norm": 0.19737032055854797, "learning_rate": 1.3267025882763345e-05, "loss": 0.5342, "step": 3518 }, { "epoch": 0.9758735440931781, "grad_norm": 0.186650350689888, "learning_rate": 1.3261514572608569e-05, "loss": 0.5542, "step": 3519 }, { "epoch": 0.9761508596783139, "grad_norm": 0.1934208869934082, "learning_rate": 1.3256003113862122e-05, "loss": 0.53, "step": 3520 }, { "epoch": 0.9764281752634498, "grad_norm": 0.1932929903268814, "learning_rate": 1.3250491507599439e-05, "loss": 0.5322, "step": 3521 }, { "epoch": 0.9767054908485857, "grad_norm": 0.20356619358062744, "learning_rate": 1.3244979754895978e-05, "loss": 0.5443, "step": 3522 }, { "epoch": 0.9769828064337216, "grad_norm": 0.19713589549064636, "learning_rate": 1.3239467856827229e-05, "loss": 0.5335, "step": 3523 }, { "epoch": 0.9772601220188575, "grad_norm": 0.18841132521629333, "learning_rate": 1.323395581446871e-05, "loss": 0.5614, "step": 3524 }, { "epoch": 0.9775374376039934, "grad_norm": 0.18878857791423798, "learning_rate": 1.3228443628895962e-05, "loss": 0.536, "step": 3525 }, { "epoch": 0.9778147531891292, "grad_norm": 0.1925961673259735, "learning_rate": 1.3222931301184565e-05, "loss": 0.5743, "step": 3526 }, { "epoch": 0.9780920687742651, "grad_norm": 0.2000453919172287, "learning_rate": 1.321741883241012e-05, "loss": 0.5144, "step": 3527 }, { "epoch": 0.978369384359401, "grad_norm": 0.1939268261194229, "learning_rate": 1.3211906223648251e-05, "loss": 0.5379, "step": 3528 }, { "epoch": 0.9786466999445369, "grad_norm": 0.19331765174865723, "learning_rate": 1.3206393475974615e-05, "loss": 0.5428, "step": 3529 }, { "epoch": 0.9789240155296728, "grad_norm": 0.20399914681911469, "learning_rate": 1.3200880590464898e-05, "loss": 0.5323, "step": 3530 }, { "epoch": 0.9792013311148087, "grad_norm": 0.2036525458097458, "learning_rate": 1.3195367568194807e-05, "loss": 0.541, "step": 3531 }, { "epoch": 0.9794786466999446, "grad_norm": 0.19523896276950836, "learning_rate": 1.3189854410240082e-05, "loss": 0.5479, "step": 3532 }, { "epoch": 0.9797559622850804, "grad_norm": 0.20129665732383728, "learning_rate": 1.318434111767648e-05, "loss": 0.5565, "step": 3533 }, { "epoch": 0.9800332778702163, "grad_norm": 0.19564275443553925, "learning_rate": 1.3178827691579801e-05, "loss": 0.5781, "step": 3534 }, { "epoch": 0.9803105934553522, "grad_norm": 0.2035827934741974, "learning_rate": 1.317331413302585e-05, "loss": 0.554, "step": 3535 }, { "epoch": 0.9805879090404881, "grad_norm": 0.1899930238723755, "learning_rate": 1.3167800443090475e-05, "loss": 0.5187, "step": 3536 }, { "epoch": 0.980865224625624, "grad_norm": 0.1863166242837906, "learning_rate": 1.3162286622849538e-05, "loss": 0.5199, "step": 3537 }, { "epoch": 0.9811425402107599, "grad_norm": 0.20462384819984436, "learning_rate": 1.3156772673378936e-05, "loss": 0.5479, "step": 3538 }, { "epoch": 0.9814198557958957, "grad_norm": 0.19956259429454803, "learning_rate": 1.3151258595754581e-05, "loss": 0.5492, "step": 3539 }, { "epoch": 0.9816971713810316, "grad_norm": 0.2239493578672409, "learning_rate": 1.3145744391052422e-05, "loss": 0.563, "step": 3540 }, { "epoch": 0.9819744869661675, "grad_norm": 0.20383387804031372, "learning_rate": 1.3140230060348425e-05, "loss": 0.5309, "step": 3541 }, { "epoch": 0.9822518025513034, "grad_norm": 0.1931990385055542, "learning_rate": 1.3134715604718579e-05, "loss": 0.5233, "step": 3542 }, { "epoch": 0.9825291181364393, "grad_norm": 0.19594894349575043, "learning_rate": 1.3129201025238902e-05, "loss": 0.5366, "step": 3543 }, { "epoch": 0.9828064337215752, "grad_norm": 0.3933532238006592, "learning_rate": 1.3123686322985434e-05, "loss": 0.5762, "step": 3544 }, { "epoch": 0.983083749306711, "grad_norm": 0.19356705248355865, "learning_rate": 1.311817149903424e-05, "loss": 0.5247, "step": 3545 }, { "epoch": 0.9833610648918469, "grad_norm": 0.19310222566127777, "learning_rate": 1.3112656554461405e-05, "loss": 0.5501, "step": 3546 }, { "epoch": 0.9836383804769828, "grad_norm": 0.1923639476299286, "learning_rate": 1.310714149034305e-05, "loss": 0.5265, "step": 3547 }, { "epoch": 0.9839156960621187, "grad_norm": 0.19612760841846466, "learning_rate": 1.3101626307755303e-05, "loss": 0.5292, "step": 3548 }, { "epoch": 0.9841930116472546, "grad_norm": 0.19919681549072266, "learning_rate": 1.3096111007774322e-05, "loss": 0.565, "step": 3549 }, { "epoch": 0.9844703272323905, "grad_norm": 0.20053736865520477, "learning_rate": 1.3090595591476293e-05, "loss": 0.5669, "step": 3550 }, { "epoch": 0.9847476428175264, "grad_norm": 0.18242114782333374, "learning_rate": 1.3085080059937413e-05, "loss": 0.531, "step": 3551 }, { "epoch": 0.9850249584026622, "grad_norm": 0.18836815655231476, "learning_rate": 1.3079564414233912e-05, "loss": 0.5238, "step": 3552 }, { "epoch": 0.9853022739877981, "grad_norm": 0.185248464345932, "learning_rate": 1.3074048655442042e-05, "loss": 0.5294, "step": 3553 }, { "epoch": 0.985579589572934, "grad_norm": 0.18509770929813385, "learning_rate": 1.3068532784638065e-05, "loss": 0.5492, "step": 3554 }, { "epoch": 0.9858569051580699, "grad_norm": 0.1903439611196518, "learning_rate": 1.3063016802898288e-05, "loss": 0.5463, "step": 3555 }, { "epoch": 0.9861342207432058, "grad_norm": 0.19315816462039948, "learning_rate": 1.3057500711299006e-05, "loss": 0.533, "step": 3556 }, { "epoch": 0.9864115363283417, "grad_norm": 0.19946327805519104, "learning_rate": 1.305198451091657e-05, "loss": 0.5282, "step": 3557 }, { "epoch": 0.9866888519134775, "grad_norm": 0.1908363550901413, "learning_rate": 1.3046468202827328e-05, "loss": 0.5208, "step": 3558 }, { "epoch": 0.9869661674986134, "grad_norm": 0.1873141974210739, "learning_rate": 1.304095178810766e-05, "loss": 0.5146, "step": 3559 }, { "epoch": 0.9872434830837493, "grad_norm": 0.18219764530658722, "learning_rate": 1.303543526783397e-05, "loss": 0.5109, "step": 3560 }, { "epoch": 0.9875207986688852, "grad_norm": 0.1835818737745285, "learning_rate": 1.3029918643082673e-05, "loss": 0.5352, "step": 3561 }, { "epoch": 0.9877981142540211, "grad_norm": 0.1892389953136444, "learning_rate": 1.3024401914930207e-05, "loss": 0.5209, "step": 3562 }, { "epoch": 0.988075429839157, "grad_norm": 0.18364709615707397, "learning_rate": 1.3018885084453036e-05, "loss": 0.5213, "step": 3563 }, { "epoch": 0.9883527454242929, "grad_norm": 0.18862204253673553, "learning_rate": 1.3013368152727634e-05, "loss": 0.5151, "step": 3564 }, { "epoch": 0.9886300610094287, "grad_norm": 0.1929624229669571, "learning_rate": 1.3007851120830506e-05, "loss": 0.5347, "step": 3565 }, { "epoch": 0.9889073765945646, "grad_norm": 0.19009216129779816, "learning_rate": 1.3002333989838167e-05, "loss": 0.5589, "step": 3566 }, { "epoch": 0.9891846921797005, "grad_norm": 0.1797810047864914, "learning_rate": 1.299681676082716e-05, "loss": 0.5186, "step": 3567 }, { "epoch": 0.9894620077648364, "grad_norm": 0.1861737072467804, "learning_rate": 1.2991299434874038e-05, "loss": 0.5292, "step": 3568 }, { "epoch": 0.9897393233499723, "grad_norm": 0.186946839094162, "learning_rate": 1.298578201305538e-05, "loss": 0.5393, "step": 3569 }, { "epoch": 0.9900166389351082, "grad_norm": 0.20192833244800568, "learning_rate": 1.2980264496447784e-05, "loss": 0.5487, "step": 3570 }, { "epoch": 0.990293954520244, "grad_norm": 0.1849672794342041, "learning_rate": 1.2974746886127858e-05, "loss": 0.5342, "step": 3571 }, { "epoch": 0.9905712701053799, "grad_norm": 0.18184737861156464, "learning_rate": 1.2969229183172236e-05, "loss": 0.5387, "step": 3572 }, { "epoch": 0.9908485856905158, "grad_norm": 0.19623394310474396, "learning_rate": 1.2963711388657566e-05, "loss": 0.5588, "step": 3573 }, { "epoch": 0.9911259012756517, "grad_norm": 0.19016727805137634, "learning_rate": 1.2958193503660524e-05, "loss": 0.5393, "step": 3574 }, { "epoch": 0.9914032168607876, "grad_norm": 0.19216102361679077, "learning_rate": 1.2952675529257785e-05, "loss": 0.5383, "step": 3575 }, { "epoch": 0.9916805324459235, "grad_norm": 0.21044804155826569, "learning_rate": 1.2947157466526062e-05, "loss": 0.5453, "step": 3576 }, { "epoch": 0.9919578480310594, "grad_norm": 0.19791410863399506, "learning_rate": 1.2941639316542062e-05, "loss": 0.5562, "step": 3577 }, { "epoch": 0.9922351636161952, "grad_norm": 0.18991726636886597, "learning_rate": 1.2936121080382534e-05, "loss": 0.4977, "step": 3578 }, { "epoch": 0.9925124792013311, "grad_norm": 0.19211836159229279, "learning_rate": 1.293060275912423e-05, "loss": 0.5227, "step": 3579 }, { "epoch": 0.992789794786467, "grad_norm": 0.19635222852230072, "learning_rate": 1.292508435384392e-05, "loss": 0.5454, "step": 3580 }, { "epoch": 0.9930671103716029, "grad_norm": 0.18526218831539154, "learning_rate": 1.2919565865618388e-05, "loss": 0.5429, "step": 3581 }, { "epoch": 0.9933444259567388, "grad_norm": 0.1954220086336136, "learning_rate": 1.291404729552444e-05, "loss": 0.5479, "step": 3582 }, { "epoch": 0.9936217415418747, "grad_norm": 0.1860000044107437, "learning_rate": 1.2908528644638895e-05, "loss": 0.5291, "step": 3583 }, { "epoch": 0.9938990571270105, "grad_norm": 0.19651706516742706, "learning_rate": 1.2903009914038586e-05, "loss": 0.5345, "step": 3584 }, { "epoch": 0.9941763727121464, "grad_norm": 0.18839724361896515, "learning_rate": 1.2897491104800366e-05, "loss": 0.5624, "step": 3585 }, { "epoch": 0.9944536882972823, "grad_norm": 0.1895817518234253, "learning_rate": 1.28919722180011e-05, "loss": 0.5577, "step": 3586 }, { "epoch": 0.9947310038824182, "grad_norm": 0.18223224580287933, "learning_rate": 1.288645325471767e-05, "loss": 0.5511, "step": 3587 }, { "epoch": 0.9950083194675541, "grad_norm": 0.18843968212604523, "learning_rate": 1.2880934216026971e-05, "loss": 0.5416, "step": 3588 }, { "epoch": 0.99528563505269, "grad_norm": 0.18690787255764008, "learning_rate": 1.2875415103005915e-05, "loss": 0.5348, "step": 3589 }, { "epoch": 0.9955629506378258, "grad_norm": 0.18256263434886932, "learning_rate": 1.2869895916731426e-05, "loss": 0.5147, "step": 3590 }, { "epoch": 0.9958402662229617, "grad_norm": 0.18710075318813324, "learning_rate": 1.2864376658280441e-05, "loss": 0.5245, "step": 3591 }, { "epoch": 0.9961175818080976, "grad_norm": 0.20847736299037933, "learning_rate": 1.2858857328729915e-05, "loss": 0.5432, "step": 3592 }, { "epoch": 0.9963948973932335, "grad_norm": 0.18452630937099457, "learning_rate": 1.2853337929156822e-05, "loss": 0.5279, "step": 3593 }, { "epoch": 0.9966722129783694, "grad_norm": 0.19052648544311523, "learning_rate": 1.2847818460638131e-05, "loss": 0.5312, "step": 3594 }, { "epoch": 0.9969495285635053, "grad_norm": 0.19351086020469666, "learning_rate": 1.2842298924250848e-05, "loss": 0.525, "step": 3595 }, { "epoch": 0.9972268441486412, "grad_norm": 0.17867478728294373, "learning_rate": 1.2836779321071974e-05, "loss": 0.5493, "step": 3596 }, { "epoch": 0.997504159733777, "grad_norm": 0.18988832831382751, "learning_rate": 1.2831259652178532e-05, "loss": 0.5449, "step": 3597 }, { "epoch": 0.9977814753189129, "grad_norm": 0.18697360157966614, "learning_rate": 1.2825739918647553e-05, "loss": 0.5146, "step": 3598 }, { "epoch": 0.9980587909040488, "grad_norm": 0.19430890679359436, "learning_rate": 1.2820220121556087e-05, "loss": 0.5423, "step": 3599 }, { "epoch": 0.9983361064891847, "grad_norm": 0.19263558089733124, "learning_rate": 1.2814700261981195e-05, "loss": 0.5393, "step": 3600 }, { "epoch": 0.9986134220743206, "grad_norm": 0.1892475187778473, "learning_rate": 1.2809180340999938e-05, "loss": 0.5205, "step": 3601 }, { "epoch": 0.9988907376594565, "grad_norm": 0.19061142206192017, "learning_rate": 1.280366035968941e-05, "loss": 0.5195, "step": 3602 }, { "epoch": 0.9991680532445923, "grad_norm": 0.1829683780670166, "learning_rate": 1.2798140319126695e-05, "loss": 0.5111, "step": 3603 }, { "epoch": 0.9994453688297282, "grad_norm": 0.18301549553871155, "learning_rate": 1.279262022038891e-05, "loss": 0.5393, "step": 3604 }, { "epoch": 0.9997226844148641, "grad_norm": 0.18907521665096283, "learning_rate": 1.2787100064553162e-05, "loss": 0.534, "step": 3605 }, { "epoch": 1.0, "grad_norm": 0.20034904778003693, "learning_rate": 1.2781579852696588e-05, "loss": 0.5388, "step": 3606 }, { "epoch": 1.0, "eval_loss": 0.8209076523780823, "eval_runtime": 415.5978, "eval_samples_per_second": 98.684, "eval_steps_per_second": 1.542, "step": 3606 }, { "epoch": 1.0002773155851359, "grad_norm": 0.19878089427947998, "learning_rate": 1.2776059585896324e-05, "loss": 0.537, "step": 3607 }, { "epoch": 1.0005546311702718, "grad_norm": 0.18699733912944794, "learning_rate": 1.2770539265229522e-05, "loss": 0.5383, "step": 3608 }, { "epoch": 1.0008319467554077, "grad_norm": 0.18291421234607697, "learning_rate": 1.2765018891773343e-05, "loss": 0.5536, "step": 3609 }, { "epoch": 1.0011092623405435, "grad_norm": 0.19343863427639008, "learning_rate": 1.2759498466604951e-05, "loss": 0.5325, "step": 3610 }, { "epoch": 1.0013865779256794, "grad_norm": 0.19165514409542084, "learning_rate": 1.2753977990801536e-05, "loss": 0.5405, "step": 3611 }, { "epoch": 1.0016638935108153, "grad_norm": 0.1763203889131546, "learning_rate": 1.2748457465440289e-05, "loss": 0.5398, "step": 3612 }, { "epoch": 1.0019412090959512, "grad_norm": 0.2186586856842041, "learning_rate": 1.27429368915984e-05, "loss": 0.5192, "step": 3613 }, { "epoch": 1.002218524681087, "grad_norm": 0.19609209895133972, "learning_rate": 1.2737416270353094e-05, "loss": 0.5364, "step": 3614 }, { "epoch": 1.002495840266223, "grad_norm": 0.20778897404670715, "learning_rate": 1.273189560278158e-05, "loss": 0.5505, "step": 3615 }, { "epoch": 1.0027731558513588, "grad_norm": 0.2087172418832779, "learning_rate": 1.2726374889961095e-05, "loss": 0.5627, "step": 3616 }, { "epoch": 1.0030504714364947, "grad_norm": 0.19748911261558533, "learning_rate": 1.2720854132968865e-05, "loss": 0.541, "step": 3617 }, { "epoch": 1.0033277870216306, "grad_norm": 0.18848268687725067, "learning_rate": 1.2715333332882146e-05, "loss": 0.5373, "step": 3618 }, { "epoch": 1.0036051026067665, "grad_norm": 0.19345615804195404, "learning_rate": 1.2709812490778187e-05, "loss": 0.5429, "step": 3619 }, { "epoch": 1.0038824181919024, "grad_norm": 0.18660636246204376, "learning_rate": 1.270429160773425e-05, "loss": 0.5608, "step": 3620 }, { "epoch": 1.0041597337770383, "grad_norm": 0.1913338303565979, "learning_rate": 1.2698770684827612e-05, "loss": 0.54, "step": 3621 }, { "epoch": 1.0044370493621742, "grad_norm": 0.258350670337677, "learning_rate": 1.2693249723135542e-05, "loss": 0.4896, "step": 3622 }, { "epoch": 1.00471436494731, "grad_norm": 0.18567179143428802, "learning_rate": 1.2687728723735337e-05, "loss": 0.5154, "step": 3623 }, { "epoch": 1.004991680532446, "grad_norm": 0.1973087191581726, "learning_rate": 1.2682207687704279e-05, "loss": 0.557, "step": 3624 }, { "epoch": 1.0052689961175818, "grad_norm": 0.22406698763370514, "learning_rate": 1.2676686616119675e-05, "loss": 0.5303, "step": 3625 }, { "epoch": 1.0055463117027177, "grad_norm": 0.19072286784648895, "learning_rate": 1.2671165510058834e-05, "loss": 0.5455, "step": 3626 }, { "epoch": 1.0058236272878536, "grad_norm": 0.1937733143568039, "learning_rate": 1.2665644370599064e-05, "loss": 0.5198, "step": 3627 }, { "epoch": 1.0061009428729895, "grad_norm": 0.2021392285823822, "learning_rate": 1.2660123198817692e-05, "loss": 0.5313, "step": 3628 }, { "epoch": 1.0063782584581253, "grad_norm": 0.18881459534168243, "learning_rate": 1.2654601995792036e-05, "loss": 0.5254, "step": 3629 }, { "epoch": 1.0066555740432612, "grad_norm": 0.18779276311397552, "learning_rate": 1.2649080762599442e-05, "loss": 0.5202, "step": 3630 }, { "epoch": 1.006932889628397, "grad_norm": 0.1880016028881073, "learning_rate": 1.2643559500317234e-05, "loss": 0.5177, "step": 3631 }, { "epoch": 1.007210205213533, "grad_norm": 0.18926078081130981, "learning_rate": 1.2638038210022765e-05, "loss": 0.5349, "step": 3632 }, { "epoch": 1.0074875207986689, "grad_norm": 0.18671317398548126, "learning_rate": 1.2632516892793389e-05, "loss": 0.5284, "step": 3633 }, { "epoch": 1.0077648363838048, "grad_norm": 0.19339770078659058, "learning_rate": 1.2626995549706452e-05, "loss": 0.5238, "step": 3634 }, { "epoch": 1.0080421519689406, "grad_norm": 0.18992464244365692, "learning_rate": 1.2621474181839322e-05, "loss": 0.5461, "step": 3635 }, { "epoch": 1.0083194675540765, "grad_norm": 0.19679436087608337, "learning_rate": 1.2615952790269356e-05, "loss": 0.5174, "step": 3636 }, { "epoch": 1.0085967831392124, "grad_norm": 0.1883174180984497, "learning_rate": 1.2610431376073931e-05, "loss": 0.5542, "step": 3637 }, { "epoch": 1.0088740987243483, "grad_norm": 0.1904815286397934, "learning_rate": 1.260490994033042e-05, "loss": 0.5224, "step": 3638 }, { "epoch": 1.0091514143094842, "grad_norm": 0.18984469771385193, "learning_rate": 1.2599388484116198e-05, "loss": 0.5237, "step": 3639 }, { "epoch": 1.00942872989462, "grad_norm": 0.19481045007705688, "learning_rate": 1.259386700850865e-05, "loss": 0.5315, "step": 3640 }, { "epoch": 1.009706045479756, "grad_norm": 0.17928001284599304, "learning_rate": 1.2588345514585163e-05, "loss": 0.5292, "step": 3641 }, { "epoch": 1.0099833610648918, "grad_norm": 0.1879146546125412, "learning_rate": 1.2582824003423124e-05, "loss": 0.5269, "step": 3642 }, { "epoch": 1.0102606766500277, "grad_norm": 0.18970663845539093, "learning_rate": 1.2577302476099926e-05, "loss": 0.5107, "step": 3643 }, { "epoch": 1.0105379922351636, "grad_norm": 0.19013284146785736, "learning_rate": 1.257178093369297e-05, "loss": 0.5348, "step": 3644 }, { "epoch": 1.0108153078202995, "grad_norm": 0.19010770320892334, "learning_rate": 1.2566259377279652e-05, "loss": 0.5264, "step": 3645 }, { "epoch": 1.0110926234054354, "grad_norm": 0.18681344389915466, "learning_rate": 1.2560737807937374e-05, "loss": 0.497, "step": 3646 }, { "epoch": 1.0113699389905713, "grad_norm": 0.1887647807598114, "learning_rate": 1.2555216226743537e-05, "loss": 0.5271, "step": 3647 }, { "epoch": 1.0116472545757071, "grad_norm": 0.18254730105400085, "learning_rate": 1.2549694634775555e-05, "loss": 0.5283, "step": 3648 }, { "epoch": 1.011924570160843, "grad_norm": 0.1893124133348465, "learning_rate": 1.2544173033110832e-05, "loss": 0.5273, "step": 3649 }, { "epoch": 1.012201885745979, "grad_norm": 0.18291127681732178, "learning_rate": 1.2538651422826777e-05, "loss": 0.5425, "step": 3650 }, { "epoch": 1.0124792013311148, "grad_norm": 0.200171560049057, "learning_rate": 1.2533129805000807e-05, "loss": 0.532, "step": 3651 }, { "epoch": 1.0127565169162507, "grad_norm": 0.1930498629808426, "learning_rate": 1.2527608180710338e-05, "loss": 0.5295, "step": 3652 }, { "epoch": 1.0130338325013866, "grad_norm": 0.18753856420516968, "learning_rate": 1.2522086551032778e-05, "loss": 0.5358, "step": 3653 }, { "epoch": 1.0133111480865225, "grad_norm": 0.19522987306118011, "learning_rate": 1.2516564917045548e-05, "loss": 0.5332, "step": 3654 }, { "epoch": 1.0135884636716583, "grad_norm": 0.17891767621040344, "learning_rate": 1.2511043279826062e-05, "loss": 0.5166, "step": 3655 }, { "epoch": 1.0138657792567942, "grad_norm": 0.18819722533226013, "learning_rate": 1.250552164045174e-05, "loss": 0.5445, "step": 3656 }, { "epoch": 1.01414309484193, "grad_norm": 0.1814819872379303, "learning_rate": 1.25e-05, "loss": 0.5191, "step": 3657 }, { "epoch": 1.014420410427066, "grad_norm": 0.18296414613723755, "learning_rate": 1.2494478359548261e-05, "loss": 0.5173, "step": 3658 }, { "epoch": 1.0146977260122019, "grad_norm": 0.20985326170921326, "learning_rate": 1.2488956720173939e-05, "loss": 0.5062, "step": 3659 }, { "epoch": 1.0149750415973378, "grad_norm": 0.17893539369106293, "learning_rate": 1.2483435082954453e-05, "loss": 0.5458, "step": 3660 }, { "epoch": 1.0152523571824736, "grad_norm": 0.19261109828948975, "learning_rate": 1.2477913448967227e-05, "loss": 0.5637, "step": 3661 }, { "epoch": 1.0155296727676095, "grad_norm": 0.2319900244474411, "learning_rate": 1.2472391819289667e-05, "loss": 0.5379, "step": 3662 }, { "epoch": 1.0158069883527454, "grad_norm": 0.1867554783821106, "learning_rate": 1.2466870194999192e-05, "loss": 0.5321, "step": 3663 }, { "epoch": 1.0160843039378813, "grad_norm": 0.1840423047542572, "learning_rate": 1.2461348577173224e-05, "loss": 0.5228, "step": 3664 }, { "epoch": 1.0163616195230172, "grad_norm": 0.18526844680309296, "learning_rate": 1.2455826966889175e-05, "loss": 0.5387, "step": 3665 }, { "epoch": 1.016638935108153, "grad_norm": 0.19361324608325958, "learning_rate": 1.2450305365224446e-05, "loss": 0.5261, "step": 3666 }, { "epoch": 1.016916250693289, "grad_norm": 0.19102880358695984, "learning_rate": 1.2444783773256466e-05, "loss": 0.5433, "step": 3667 }, { "epoch": 1.0171935662784248, "grad_norm": 0.18817844986915588, "learning_rate": 1.2439262192062631e-05, "loss": 0.538, "step": 3668 }, { "epoch": 1.0174708818635607, "grad_norm": 0.1901366263628006, "learning_rate": 1.2433740622720353e-05, "loss": 0.5453, "step": 3669 }, { "epoch": 1.0177481974486966, "grad_norm": 0.22999554872512817, "learning_rate": 1.242821906630703e-05, "loss": 0.5164, "step": 3670 }, { "epoch": 1.0180255130338325, "grad_norm": 0.1897146850824356, "learning_rate": 1.2422697523900075e-05, "loss": 0.5528, "step": 3671 }, { "epoch": 1.0183028286189684, "grad_norm": 0.1865740269422531, "learning_rate": 1.241717599657688e-05, "loss": 0.53, "step": 3672 }, { "epoch": 1.0185801442041043, "grad_norm": 0.18005676567554474, "learning_rate": 1.2411654485414839e-05, "loss": 0.5056, "step": 3673 }, { "epoch": 1.0188574597892401, "grad_norm": 0.1854349970817566, "learning_rate": 1.240613299149135e-05, "loss": 0.549, "step": 3674 }, { "epoch": 1.019134775374376, "grad_norm": 0.19649791717529297, "learning_rate": 1.2400611515883805e-05, "loss": 0.5321, "step": 3675 }, { "epoch": 1.019412090959512, "grad_norm": 0.18856771290302277, "learning_rate": 1.2395090059669585e-05, "loss": 0.5304, "step": 3676 }, { "epoch": 1.0196894065446478, "grad_norm": 0.18438786268234253, "learning_rate": 1.238956862392607e-05, "loss": 0.551, "step": 3677 }, { "epoch": 1.0199667221297837, "grad_norm": 0.1798408478498459, "learning_rate": 1.2384047209730647e-05, "loss": 0.5237, "step": 3678 }, { "epoch": 1.0202440377149196, "grad_norm": 0.17963095009326935, "learning_rate": 1.2378525818160683e-05, "loss": 0.5229, "step": 3679 }, { "epoch": 1.0205213533000554, "grad_norm": 0.18897545337677002, "learning_rate": 1.237300445029355e-05, "loss": 0.519, "step": 3680 }, { "epoch": 1.0207986688851913, "grad_norm": 0.18016240000724792, "learning_rate": 1.2367483107206614e-05, "loss": 0.5092, "step": 3681 }, { "epoch": 1.0210759844703272, "grad_norm": 0.18622079491615295, "learning_rate": 1.2361961789977238e-05, "loss": 0.505, "step": 3682 }, { "epoch": 1.021353300055463, "grad_norm": 0.1844959259033203, "learning_rate": 1.2356440499682769e-05, "loss": 0.5358, "step": 3683 }, { "epoch": 1.021630615640599, "grad_norm": 0.18017494678497314, "learning_rate": 1.2350919237400563e-05, "loss": 0.521, "step": 3684 }, { "epoch": 1.0219079312257349, "grad_norm": 0.18644961714744568, "learning_rate": 1.2345398004207965e-05, "loss": 0.5239, "step": 3685 }, { "epoch": 1.0221852468108708, "grad_norm": 0.18692266941070557, "learning_rate": 1.2339876801182315e-05, "loss": 0.5055, "step": 3686 }, { "epoch": 1.0224625623960066, "grad_norm": 0.18799254298210144, "learning_rate": 1.2334355629400934e-05, "loss": 0.5153, "step": 3687 }, { "epoch": 1.0227398779811425, "grad_norm": 0.19313998520374298, "learning_rate": 1.2328834489941168e-05, "loss": 0.5274, "step": 3688 }, { "epoch": 1.0230171935662784, "grad_norm": 0.24748623371124268, "learning_rate": 1.2323313383880326e-05, "loss": 0.5168, "step": 3689 }, { "epoch": 1.0232945091514143, "grad_norm": 0.18509146571159363, "learning_rate": 1.231779231229572e-05, "loss": 0.508, "step": 3690 }, { "epoch": 1.0235718247365502, "grad_norm": 0.2279065102338791, "learning_rate": 1.2312271276264666e-05, "loss": 0.5219, "step": 3691 }, { "epoch": 1.023849140321686, "grad_norm": 0.18918190896511078, "learning_rate": 1.230675027686446e-05, "loss": 0.509, "step": 3692 }, { "epoch": 1.024126455906822, "grad_norm": 0.19168592989444733, "learning_rate": 1.2301229315172394e-05, "loss": 0.5128, "step": 3693 }, { "epoch": 1.0244037714919578, "grad_norm": 0.18460464477539062, "learning_rate": 1.229570839226575e-05, "loss": 0.4949, "step": 3694 }, { "epoch": 1.0246810870770937, "grad_norm": 0.1955164670944214, "learning_rate": 1.2290187509221816e-05, "loss": 0.5084, "step": 3695 }, { "epoch": 1.0249584026622296, "grad_norm": 0.21448062360286713, "learning_rate": 1.2284666667117858e-05, "loss": 0.5258, "step": 3696 }, { "epoch": 1.0252357182473655, "grad_norm": 0.195621058344841, "learning_rate": 1.2279145867031136e-05, "loss": 0.5409, "step": 3697 }, { "epoch": 1.0255130338325014, "grad_norm": 0.18348294496536255, "learning_rate": 1.2273625110038908e-05, "loss": 0.5288, "step": 3698 }, { "epoch": 1.0257903494176372, "grad_norm": 0.2350398451089859, "learning_rate": 1.2268104397218421e-05, "loss": 0.5176, "step": 3699 }, { "epoch": 1.0260676650027731, "grad_norm": 0.18816140294075012, "learning_rate": 1.2262583729646909e-05, "loss": 0.5174, "step": 3700 }, { "epoch": 1.026344980587909, "grad_norm": 0.18344999849796295, "learning_rate": 1.22570631084016e-05, "loss": 0.5001, "step": 3701 }, { "epoch": 1.026622296173045, "grad_norm": 0.19813844561576843, "learning_rate": 1.2251542534559716e-05, "loss": 0.5229, "step": 3702 }, { "epoch": 1.0268996117581808, "grad_norm": 0.18872547149658203, "learning_rate": 1.2246022009198469e-05, "loss": 0.5265, "step": 3703 }, { "epoch": 1.0271769273433167, "grad_norm": 0.19747066497802734, "learning_rate": 1.2240501533395048e-05, "loss": 0.5381, "step": 3704 }, { "epoch": 1.0274542429284526, "grad_norm": 0.1966402530670166, "learning_rate": 1.2234981108226662e-05, "loss": 0.5447, "step": 3705 }, { "epoch": 1.0277315585135884, "grad_norm": 0.1881251186132431, "learning_rate": 1.222946073477048e-05, "loss": 0.4944, "step": 3706 }, { "epoch": 1.0280088740987243, "grad_norm": 0.19006265699863434, "learning_rate": 1.222394041410368e-05, "loss": 0.5279, "step": 3707 }, { "epoch": 1.0282861896838602, "grad_norm": 0.17501787841320038, "learning_rate": 1.2218420147303412e-05, "loss": 0.4797, "step": 3708 }, { "epoch": 1.028563505268996, "grad_norm": 0.17646637558937073, "learning_rate": 1.2212899935446841e-05, "loss": 0.4878, "step": 3709 }, { "epoch": 1.028840820854132, "grad_norm": 0.19758492708206177, "learning_rate": 1.2207379779611095e-05, "loss": 0.5238, "step": 3710 }, { "epoch": 1.0291181364392679, "grad_norm": 0.19391943514347076, "learning_rate": 1.2201859680873305e-05, "loss": 0.5086, "step": 3711 }, { "epoch": 1.0293954520244037, "grad_norm": 0.18827463686466217, "learning_rate": 1.2196339640310595e-05, "loss": 0.5257, "step": 3712 }, { "epoch": 1.0296727676095396, "grad_norm": 0.19285833835601807, "learning_rate": 1.2190819659000063e-05, "loss": 0.5179, "step": 3713 }, { "epoch": 1.0299500831946755, "grad_norm": 0.18147997558116913, "learning_rate": 1.2185299738018813e-05, "loss": 0.4985, "step": 3714 }, { "epoch": 1.0302273987798114, "grad_norm": 0.19085568189620972, "learning_rate": 1.2179779878443915e-05, "loss": 0.5131, "step": 3715 }, { "epoch": 1.0305047143649473, "grad_norm": 0.20084667205810547, "learning_rate": 1.217426008135245e-05, "loss": 0.519, "step": 3716 }, { "epoch": 1.0307820299500832, "grad_norm": 0.20030198991298676, "learning_rate": 1.2168740347821473e-05, "loss": 0.5277, "step": 3717 }, { "epoch": 1.031059345535219, "grad_norm": 0.1940310150384903, "learning_rate": 1.2163220678928028e-05, "loss": 0.5001, "step": 3718 }, { "epoch": 1.031336661120355, "grad_norm": 0.18660347163677216, "learning_rate": 1.2157701075749153e-05, "loss": 0.529, "step": 3719 }, { "epoch": 1.0316139767054908, "grad_norm": 0.1930490881204605, "learning_rate": 1.2152181539361871e-05, "loss": 0.5056, "step": 3720 }, { "epoch": 1.0318912922906267, "grad_norm": 0.1963385045528412, "learning_rate": 1.2146662070843184e-05, "loss": 0.5057, "step": 3721 }, { "epoch": 1.0321686078757626, "grad_norm": 0.19734053313732147, "learning_rate": 1.2141142671270085e-05, "loss": 0.5287, "step": 3722 }, { "epoch": 1.0324459234608985, "grad_norm": 0.18422985076904297, "learning_rate": 1.2135623341719561e-05, "loss": 0.5135, "step": 3723 }, { "epoch": 1.0327232390460344, "grad_norm": 0.18918901681900024, "learning_rate": 1.213010408326858e-05, "loss": 0.5098, "step": 3724 }, { "epoch": 1.0330005546311702, "grad_norm": 0.18276064097881317, "learning_rate": 1.2124584896994085e-05, "loss": 0.5098, "step": 3725 }, { "epoch": 1.0332778702163061, "grad_norm": 0.2021724432706833, "learning_rate": 1.2119065783973031e-05, "loss": 0.5127, "step": 3726 }, { "epoch": 1.033555185801442, "grad_norm": 0.18858903646469116, "learning_rate": 1.2113546745282333e-05, "loss": 0.5057, "step": 3727 }, { "epoch": 1.033832501386578, "grad_norm": 0.18476137518882751, "learning_rate": 1.2108027781998902e-05, "loss": 0.5254, "step": 3728 }, { "epoch": 1.0341098169717138, "grad_norm": 0.18318045139312744, "learning_rate": 1.2102508895199633e-05, "loss": 0.5248, "step": 3729 }, { "epoch": 1.0343871325568497, "grad_norm": 0.18871383368968964, "learning_rate": 1.2096990085961417e-05, "loss": 0.5073, "step": 3730 }, { "epoch": 1.0346644481419855, "grad_norm": 0.2086830884218216, "learning_rate": 1.209147135536111e-05, "loss": 0.5038, "step": 3731 }, { "epoch": 1.0349417637271214, "grad_norm": 0.1914805769920349, "learning_rate": 1.2085952704475562e-05, "loss": 0.5322, "step": 3732 }, { "epoch": 1.0352190793122573, "grad_norm": 0.18802616000175476, "learning_rate": 1.2080434134381615e-05, "loss": 0.5161, "step": 3733 }, { "epoch": 1.0354963948973932, "grad_norm": 0.1888350397348404, "learning_rate": 1.2074915646156083e-05, "loss": 0.5178, "step": 3734 }, { "epoch": 1.035773710482529, "grad_norm": 0.1935519278049469, "learning_rate": 1.2069397240875774e-05, "loss": 0.5151, "step": 3735 }, { "epoch": 1.036051026067665, "grad_norm": 0.18621766567230225, "learning_rate": 1.2063878919617467e-05, "loss": 0.5034, "step": 3736 }, { "epoch": 1.0363283416528009, "grad_norm": 0.20036379992961884, "learning_rate": 1.2058360683457941e-05, "loss": 0.518, "step": 3737 }, { "epoch": 1.0366056572379367, "grad_norm": 0.19412393867969513, "learning_rate": 1.2052842533473945e-05, "loss": 0.5095, "step": 3738 }, { "epoch": 1.0368829728230726, "grad_norm": 0.20189900696277618, "learning_rate": 1.2047324470742216e-05, "loss": 0.5, "step": 3739 }, { "epoch": 1.0371602884082085, "grad_norm": 0.19598862528800964, "learning_rate": 1.204180649633948e-05, "loss": 0.5352, "step": 3740 }, { "epoch": 1.0374376039933444, "grad_norm": 0.18859632313251495, "learning_rate": 1.2036288611342436e-05, "loss": 0.5148, "step": 3741 }, { "epoch": 1.0377149195784803, "grad_norm": 0.18398115038871765, "learning_rate": 1.2030770816827769e-05, "loss": 0.519, "step": 3742 }, { "epoch": 1.0379922351636162, "grad_norm": 0.19209997355937958, "learning_rate": 1.2025253113872144e-05, "loss": 0.535, "step": 3743 }, { "epoch": 1.038269550748752, "grad_norm": 0.20428086817264557, "learning_rate": 1.2019735503552219e-05, "loss": 0.5346, "step": 3744 }, { "epoch": 1.038546866333888, "grad_norm": 0.20181040465831757, "learning_rate": 1.2014217986944624e-05, "loss": 0.5171, "step": 3745 }, { "epoch": 1.0388241819190238, "grad_norm": 0.18710096180438995, "learning_rate": 1.200870056512596e-05, "loss": 0.5059, "step": 3746 }, { "epoch": 1.0391014975041597, "grad_norm": 0.18923644721508026, "learning_rate": 1.2003183239172843e-05, "loss": 0.5271, "step": 3747 }, { "epoch": 1.0393788130892956, "grad_norm": 0.20316655933856964, "learning_rate": 1.1997666010161836e-05, "loss": 0.5138, "step": 3748 }, { "epoch": 1.0396561286744315, "grad_norm": 0.18475180864334106, "learning_rate": 1.1992148879169499e-05, "loss": 0.4857, "step": 3749 }, { "epoch": 1.0399334442595674, "grad_norm": 0.18830114603042603, "learning_rate": 1.1986631847272367e-05, "loss": 0.5201, "step": 3750 }, { "epoch": 1.0402107598447032, "grad_norm": 0.1905670464038849, "learning_rate": 1.1981114915546967e-05, "loss": 0.4813, "step": 3751 }, { "epoch": 1.0404880754298391, "grad_norm": 0.18634763360023499, "learning_rate": 1.1975598085069798e-05, "loss": 0.53, "step": 3752 }, { "epoch": 1.040765391014975, "grad_norm": 0.19833336770534515, "learning_rate": 1.197008135691733e-05, "loss": 0.5381, "step": 3753 }, { "epoch": 1.041042706600111, "grad_norm": 0.19526107609272003, "learning_rate": 1.1964564732166032e-05, "loss": 0.514, "step": 3754 }, { "epoch": 1.0413200221852468, "grad_norm": 0.1905699223279953, "learning_rate": 1.195904821189234e-05, "loss": 0.5321, "step": 3755 }, { "epoch": 1.0415973377703827, "grad_norm": 0.203719362616539, "learning_rate": 1.1953531797172673e-05, "loss": 0.5565, "step": 3756 }, { "epoch": 1.0418746533555185, "grad_norm": 0.1842213273048401, "learning_rate": 1.1948015489083433e-05, "loss": 0.5008, "step": 3757 }, { "epoch": 1.0421519689406544, "grad_norm": 0.2012074589729309, "learning_rate": 1.1942499288700997e-05, "loss": 0.5199, "step": 3758 }, { "epoch": 1.0424292845257903, "grad_norm": 0.19444574415683746, "learning_rate": 1.193698319710172e-05, "loss": 0.5228, "step": 3759 }, { "epoch": 1.0427066001109262, "grad_norm": 0.1847216635942459, "learning_rate": 1.1931467215361934e-05, "loss": 0.4973, "step": 3760 }, { "epoch": 1.042983915696062, "grad_norm": 0.2011430710554123, "learning_rate": 1.192595134455796e-05, "loss": 0.5167, "step": 3761 }, { "epoch": 1.043261231281198, "grad_norm": 0.1845628172159195, "learning_rate": 1.192043558576609e-05, "loss": 0.5279, "step": 3762 }, { "epoch": 1.0435385468663338, "grad_norm": 0.19030199944972992, "learning_rate": 1.1914919940062585e-05, "loss": 0.5217, "step": 3763 }, { "epoch": 1.0438158624514697, "grad_norm": 0.1975383758544922, "learning_rate": 1.190940440852371e-05, "loss": 0.5218, "step": 3764 }, { "epoch": 1.0440931780366056, "grad_norm": 0.19011008739471436, "learning_rate": 1.190388899222568e-05, "loss": 0.5273, "step": 3765 }, { "epoch": 1.0443704936217415, "grad_norm": 0.18708908557891846, "learning_rate": 1.1898373692244699e-05, "loss": 0.5336, "step": 3766 }, { "epoch": 1.0446478092068774, "grad_norm": 0.18800389766693115, "learning_rate": 1.189285850965695e-05, "loss": 0.5131, "step": 3767 }, { "epoch": 1.0449251247920133, "grad_norm": 0.187372624874115, "learning_rate": 1.1887343445538597e-05, "loss": 0.5171, "step": 3768 }, { "epoch": 1.0452024403771492, "grad_norm": 0.19775499403476715, "learning_rate": 1.1881828500965765e-05, "loss": 0.5291, "step": 3769 }, { "epoch": 1.045479755962285, "grad_norm": 0.19341982901096344, "learning_rate": 1.1876313677014569e-05, "loss": 0.5253, "step": 3770 }, { "epoch": 1.045757071547421, "grad_norm": 0.19665475189685822, "learning_rate": 1.1870798974761102e-05, "loss": 0.5123, "step": 3771 }, { "epoch": 1.0460343871325568, "grad_norm": 0.19843867421150208, "learning_rate": 1.1865284395281426e-05, "loss": 0.4966, "step": 3772 }, { "epoch": 1.0463117027176927, "grad_norm": 0.18300753831863403, "learning_rate": 1.1859769939651582e-05, "loss": 0.5091, "step": 3773 }, { "epoch": 1.0465890183028286, "grad_norm": 0.1935308873653412, "learning_rate": 1.1854255608947581e-05, "loss": 0.5112, "step": 3774 }, { "epoch": 1.0468663338879645, "grad_norm": 0.19752877950668335, "learning_rate": 1.1848741404245421e-05, "loss": 0.5062, "step": 3775 }, { "epoch": 1.0471436494731003, "grad_norm": 0.18157391250133514, "learning_rate": 1.1843227326621069e-05, "loss": 0.5018, "step": 3776 }, { "epoch": 1.0474209650582362, "grad_norm": 0.20472866296768188, "learning_rate": 1.1837713377150463e-05, "loss": 0.5358, "step": 3777 }, { "epoch": 1.0476982806433721, "grad_norm": 0.1965123564004898, "learning_rate": 1.1832199556909528e-05, "loss": 0.5269, "step": 3778 }, { "epoch": 1.047975596228508, "grad_norm": 0.19199901819229126, "learning_rate": 1.1826685866974153e-05, "loss": 0.4876, "step": 3779 }, { "epoch": 1.0482529118136439, "grad_norm": 0.1838652491569519, "learning_rate": 1.1821172308420203e-05, "loss": 0.542, "step": 3780 }, { "epoch": 1.0485302273987798, "grad_norm": 0.19129469990730286, "learning_rate": 1.1815658882323519e-05, "loss": 0.4951, "step": 3781 }, { "epoch": 1.0488075429839157, "grad_norm": 0.18997079133987427, "learning_rate": 1.181014558975992e-05, "loss": 0.4899, "step": 3782 }, { "epoch": 1.0490848585690515, "grad_norm": 0.19084186851978302, "learning_rate": 1.1804632431805197e-05, "loss": 0.5315, "step": 3783 }, { "epoch": 1.0493621741541874, "grad_norm": 0.18761594593524933, "learning_rate": 1.1799119409535101e-05, "loss": 0.5063, "step": 3784 }, { "epoch": 1.0496394897393233, "grad_norm": 0.1877257525920868, "learning_rate": 1.1793606524025388e-05, "loss": 0.506, "step": 3785 }, { "epoch": 1.0499168053244592, "grad_norm": 0.19573919475078583, "learning_rate": 1.1788093776351752e-05, "loss": 0.5218, "step": 3786 }, { "epoch": 1.050194120909595, "grad_norm": 0.19596537947654724, "learning_rate": 1.1782581167589883e-05, "loss": 0.5252, "step": 3787 }, { "epoch": 1.050471436494731, "grad_norm": 0.19336049258708954, "learning_rate": 1.1777068698815434e-05, "loss": 0.5247, "step": 3788 }, { "epoch": 1.0507487520798668, "grad_norm": 0.18527399003505707, "learning_rate": 1.1771556371104039e-05, "loss": 0.5034, "step": 3789 }, { "epoch": 1.0510260676650027, "grad_norm": 0.18503272533416748, "learning_rate": 1.1766044185531296e-05, "loss": 0.4683, "step": 3790 }, { "epoch": 1.0513033832501386, "grad_norm": 0.1880425363779068, "learning_rate": 1.1760532143172772e-05, "loss": 0.5317, "step": 3791 }, { "epoch": 1.0515806988352745, "grad_norm": 0.18435950577259064, "learning_rate": 1.1755020245104025e-05, "loss": 0.5474, "step": 3792 }, { "epoch": 1.0518580144204104, "grad_norm": 0.18995283544063568, "learning_rate": 1.1749508492400564e-05, "loss": 0.5066, "step": 3793 }, { "epoch": 1.0521353300055463, "grad_norm": 0.18789511919021606, "learning_rate": 1.1743996886137882e-05, "loss": 0.5064, "step": 3794 }, { "epoch": 1.0524126455906821, "grad_norm": 0.17998188734054565, "learning_rate": 1.1738485427391431e-05, "loss": 0.5077, "step": 3795 }, { "epoch": 1.052689961175818, "grad_norm": 0.21583081781864166, "learning_rate": 1.1732974117236656e-05, "loss": 0.5048, "step": 3796 }, { "epoch": 1.052967276760954, "grad_norm": 0.21214094758033752, "learning_rate": 1.172746295674895e-05, "loss": 0.5244, "step": 3797 }, { "epoch": 1.0532445923460898, "grad_norm": 0.1979261040687561, "learning_rate": 1.1721951947003689e-05, "loss": 0.5102, "step": 3798 }, { "epoch": 1.0535219079312257, "grad_norm": 0.1923878788948059, "learning_rate": 1.1716441089076216e-05, "loss": 0.5165, "step": 3799 }, { "epoch": 1.0537992235163616, "grad_norm": 0.19521905481815338, "learning_rate": 1.1710930384041852e-05, "loss": 0.5411, "step": 3800 }, { "epoch": 1.0540765391014975, "grad_norm": 0.1916571408510208, "learning_rate": 1.1705419832975873e-05, "loss": 0.5001, "step": 3801 }, { "epoch": 1.0543538546866333, "grad_norm": 0.19345450401306152, "learning_rate": 1.1699909436953532e-05, "loss": 0.5371, "step": 3802 }, { "epoch": 1.0546311702717692, "grad_norm": 0.2184910923242569, "learning_rate": 1.1694399197050062e-05, "loss": 0.5138, "step": 3803 }, { "epoch": 1.054908485856905, "grad_norm": 0.19563588500022888, "learning_rate": 1.1688889114340653e-05, "loss": 0.51, "step": 3804 }, { "epoch": 1.055185801442041, "grad_norm": 0.19763770699501038, "learning_rate": 1.1683379189900465e-05, "loss": 0.5183, "step": 3805 }, { "epoch": 1.0554631170271769, "grad_norm": 0.19711549580097198, "learning_rate": 1.1677869424804637e-05, "loss": 0.4926, "step": 3806 }, { "epoch": 1.0557404326123128, "grad_norm": 0.2019127458333969, "learning_rate": 1.1672359820128265e-05, "loss": 0.5459, "step": 3807 }, { "epoch": 1.0560177481974486, "grad_norm": 0.2181474268436432, "learning_rate": 1.166685037694642e-05, "loss": 0.5211, "step": 3808 }, { "epoch": 1.0562950637825845, "grad_norm": 0.20076914131641388, "learning_rate": 1.1661341096334136e-05, "loss": 0.5064, "step": 3809 }, { "epoch": 1.0565723793677204, "grad_norm": 0.19240239262580872, "learning_rate": 1.1655831979366427e-05, "loss": 0.5198, "step": 3810 }, { "epoch": 1.0568496949528563, "grad_norm": 0.19020958244800568, "learning_rate": 1.1650323027118269e-05, "loss": 0.5131, "step": 3811 }, { "epoch": 1.0571270105379922, "grad_norm": 0.19757165014743805, "learning_rate": 1.1644814240664594e-05, "loss": 0.5075, "step": 3812 }, { "epoch": 1.057404326123128, "grad_norm": 0.199832484126091, "learning_rate": 1.1639305621080321e-05, "loss": 0.4984, "step": 3813 }, { "epoch": 1.057681641708264, "grad_norm": 0.19308264553546906, "learning_rate": 1.1633797169440326e-05, "loss": 0.5108, "step": 3814 }, { "epoch": 1.0579589572933998, "grad_norm": 0.19443538784980774, "learning_rate": 1.1628288886819453e-05, "loss": 0.4896, "step": 3815 }, { "epoch": 1.0582362728785357, "grad_norm": 0.21316394209861755, "learning_rate": 1.1622780774292506e-05, "loss": 0.5378, "step": 3816 }, { "epoch": 1.0585135884636716, "grad_norm": 0.1971164047718048, "learning_rate": 1.1617272832934282e-05, "loss": 0.5282, "step": 3817 }, { "epoch": 1.0587909040488075, "grad_norm": 0.19226108491420746, "learning_rate": 1.161176506381951e-05, "loss": 0.5409, "step": 3818 }, { "epoch": 1.0590682196339434, "grad_norm": 0.17965470254421234, "learning_rate": 1.1606257468022907e-05, "loss": 0.5022, "step": 3819 }, { "epoch": 1.0593455352190793, "grad_norm": 0.191642165184021, "learning_rate": 1.1600750046619154e-05, "loss": 0.5034, "step": 3820 }, { "epoch": 1.0596228508042151, "grad_norm": 0.19330790638923645, "learning_rate": 1.1595242800682893e-05, "loss": 0.5155, "step": 3821 }, { "epoch": 1.059900166389351, "grad_norm": 0.20066289603710175, "learning_rate": 1.1589735731288725e-05, "loss": 0.5337, "step": 3822 }, { "epoch": 1.060177481974487, "grad_norm": 0.1891581416130066, "learning_rate": 1.1584228839511242e-05, "loss": 0.5264, "step": 3823 }, { "epoch": 1.0604547975596228, "grad_norm": 0.19226865470409393, "learning_rate": 1.1578722126424971e-05, "loss": 0.4739, "step": 3824 }, { "epoch": 1.0607321131447587, "grad_norm": 0.1854773312807083, "learning_rate": 1.1573215593104425e-05, "loss": 0.527, "step": 3825 }, { "epoch": 1.0610094287298946, "grad_norm": 0.21180889010429382, "learning_rate": 1.1567709240624067e-05, "loss": 0.4695, "step": 3826 }, { "epoch": 1.0612867443150305, "grad_norm": 0.1897820234298706, "learning_rate": 1.1562203070058341e-05, "loss": 0.4995, "step": 3827 }, { "epoch": 1.0615640599001663, "grad_norm": 0.1939866989850998, "learning_rate": 1.155669708248164e-05, "loss": 0.5126, "step": 3828 }, { "epoch": 1.0618413754853022, "grad_norm": 0.22312824428081512, "learning_rate": 1.1551191278968328e-05, "loss": 0.5291, "step": 3829 }, { "epoch": 1.062118691070438, "grad_norm": 0.19337856769561768, "learning_rate": 1.1545685660592741e-05, "loss": 0.5188, "step": 3830 }, { "epoch": 1.062396006655574, "grad_norm": 0.19467610120773315, "learning_rate": 1.1540180228429164e-05, "loss": 0.5388, "step": 3831 }, { "epoch": 1.0626733222407099, "grad_norm": 0.19284315407276154, "learning_rate": 1.1534674983551857e-05, "loss": 0.5011, "step": 3832 }, { "epoch": 1.0629506378258458, "grad_norm": 0.22472496330738068, "learning_rate": 1.1529169927035028e-05, "loss": 0.5105, "step": 3833 }, { "epoch": 1.0632279534109816, "grad_norm": 0.18030276894569397, "learning_rate": 1.1523665059952876e-05, "loss": 0.515, "step": 3834 }, { "epoch": 1.0635052689961175, "grad_norm": 0.19472011923789978, "learning_rate": 1.1518160383379534e-05, "loss": 0.5005, "step": 3835 }, { "epoch": 1.0637825845812534, "grad_norm": 0.1929817497730255, "learning_rate": 1.1512655898389115e-05, "loss": 0.5114, "step": 3836 }, { "epoch": 1.0640599001663893, "grad_norm": 0.1856139898300171, "learning_rate": 1.150715160605569e-05, "loss": 0.4847, "step": 3837 }, { "epoch": 1.0643372157515252, "grad_norm": 0.1880098432302475, "learning_rate": 1.1501647507453295e-05, "loss": 0.5346, "step": 3838 }, { "epoch": 1.064614531336661, "grad_norm": 0.18800011277198792, "learning_rate": 1.149614360365592e-05, "loss": 0.5113, "step": 3839 }, { "epoch": 1.064891846921797, "grad_norm": 0.19980834424495697, "learning_rate": 1.1490639895737523e-05, "loss": 0.5214, "step": 3840 }, { "epoch": 1.0651691625069328, "grad_norm": 0.18565940856933594, "learning_rate": 1.1485136384772024e-05, "loss": 0.5108, "step": 3841 }, { "epoch": 1.0654464780920687, "grad_norm": 0.18709343671798706, "learning_rate": 1.1479633071833306e-05, "loss": 0.5071, "step": 3842 }, { "epoch": 1.0657237936772046, "grad_norm": 0.19222643971443176, "learning_rate": 1.1474129957995209e-05, "loss": 0.5084, "step": 3843 }, { "epoch": 1.0660011092623405, "grad_norm": 0.18951910734176636, "learning_rate": 1.146862704433154e-05, "loss": 0.4997, "step": 3844 }, { "epoch": 1.0662784248474764, "grad_norm": 0.19320812821388245, "learning_rate": 1.146312433191606e-05, "loss": 0.5397, "step": 3845 }, { "epoch": 1.0665557404326123, "grad_norm": 0.19874915480613708, "learning_rate": 1.1457621821822492e-05, "loss": 0.4976, "step": 3846 }, { "epoch": 1.0668330560177481, "grad_norm": 0.19041554629802704, "learning_rate": 1.1452119515124524e-05, "loss": 0.5293, "step": 3847 }, { "epoch": 1.067110371602884, "grad_norm": 0.18619757890701294, "learning_rate": 1.1446617412895802e-05, "loss": 0.5008, "step": 3848 }, { "epoch": 1.06738768718802, "grad_norm": 0.18963642418384552, "learning_rate": 1.1441115516209936e-05, "loss": 0.5099, "step": 3849 }, { "epoch": 1.0676650027731558, "grad_norm": 0.1950794905424118, "learning_rate": 1.143561382614048e-05, "loss": 0.5103, "step": 3850 }, { "epoch": 1.0679423183582917, "grad_norm": 0.1896001100540161, "learning_rate": 1.1430112343760971e-05, "loss": 0.529, "step": 3851 }, { "epoch": 1.0682196339434276, "grad_norm": 0.20062585175037384, "learning_rate": 1.142461107014489e-05, "loss": 0.5222, "step": 3852 }, { "epoch": 1.0684969495285634, "grad_norm": 0.19482149183750153, "learning_rate": 1.1419110006365682e-05, "loss": 0.497, "step": 3853 }, { "epoch": 1.0687742651136993, "grad_norm": 0.1913536936044693, "learning_rate": 1.1413609153496742e-05, "loss": 0.5164, "step": 3854 }, { "epoch": 1.0690515806988352, "grad_norm": 0.20729845762252808, "learning_rate": 1.140810851261145e-05, "loss": 0.5594, "step": 3855 }, { "epoch": 1.069328896283971, "grad_norm": 0.20025323331356049, "learning_rate": 1.1402608084783112e-05, "loss": 0.4986, "step": 3856 }, { "epoch": 1.069606211869107, "grad_norm": 0.21821513772010803, "learning_rate": 1.1397107871085009e-05, "loss": 0.5312, "step": 3857 }, { "epoch": 1.0698835274542429, "grad_norm": 0.19112320244312286, "learning_rate": 1.1391607872590381e-05, "loss": 0.4957, "step": 3858 }, { "epoch": 1.0701608430393788, "grad_norm": 0.18980076909065247, "learning_rate": 1.138610809037243e-05, "loss": 0.5235, "step": 3859 }, { "epoch": 1.0704381586245146, "grad_norm": 0.18163329362869263, "learning_rate": 1.1380608525504298e-05, "loss": 0.4966, "step": 3860 }, { "epoch": 1.0707154742096505, "grad_norm": 0.19191473722457886, "learning_rate": 1.1375109179059098e-05, "loss": 0.5233, "step": 3861 }, { "epoch": 1.0709927897947864, "grad_norm": 0.18972338736057281, "learning_rate": 1.1369610052109902e-05, "loss": 0.516, "step": 3862 }, { "epoch": 1.0712701053799223, "grad_norm": 0.1891545057296753, "learning_rate": 1.1364111145729737e-05, "loss": 0.5029, "step": 3863 }, { "epoch": 1.0715474209650582, "grad_norm": 0.1886187046766281, "learning_rate": 1.1358612460991577e-05, "loss": 0.5157, "step": 3864 }, { "epoch": 1.071824736550194, "grad_norm": 0.2039126604795456, "learning_rate": 1.1353113998968371e-05, "loss": 0.535, "step": 3865 }, { "epoch": 1.07210205213533, "grad_norm": 0.19889415800571442, "learning_rate": 1.134761576073301e-05, "loss": 0.5235, "step": 3866 }, { "epoch": 1.0723793677204658, "grad_norm": 0.18273714184761047, "learning_rate": 1.1342117747358344e-05, "loss": 0.5083, "step": 3867 }, { "epoch": 1.0726566833056017, "grad_norm": 0.19936375319957733, "learning_rate": 1.1336619959917182e-05, "loss": 0.508, "step": 3868 }, { "epoch": 1.0729339988907376, "grad_norm": 0.19671058654785156, "learning_rate": 1.133112239948229e-05, "loss": 0.5239, "step": 3869 }, { "epoch": 1.0732113144758735, "grad_norm": 0.18869206309318542, "learning_rate": 1.132562506712639e-05, "loss": 0.5204, "step": 3870 }, { "epoch": 1.0734886300610094, "grad_norm": 0.18764182925224304, "learning_rate": 1.132012796392215e-05, "loss": 0.4993, "step": 3871 }, { "epoch": 1.0737659456461452, "grad_norm": 0.21141819655895233, "learning_rate": 1.1314631090942204e-05, "loss": 0.4947, "step": 3872 }, { "epoch": 1.0740432612312811, "grad_norm": 0.18649768829345703, "learning_rate": 1.130913444925914e-05, "loss": 0.5258, "step": 3873 }, { "epoch": 1.074320576816417, "grad_norm": 0.18479731678962708, "learning_rate": 1.1303638039945498e-05, "loss": 0.4958, "step": 3874 }, { "epoch": 1.074597892401553, "grad_norm": 0.20172236859798431, "learning_rate": 1.1298141864073763e-05, "loss": 0.5181, "step": 3875 }, { "epoch": 1.0748752079866888, "grad_norm": 0.194131001830101, "learning_rate": 1.1292645922716404e-05, "loss": 0.538, "step": 3876 }, { "epoch": 1.0751525235718247, "grad_norm": 0.18898829817771912, "learning_rate": 1.1287150216945808e-05, "loss": 0.4733, "step": 3877 }, { "epoch": 1.0754298391569606, "grad_norm": 0.19593171775341034, "learning_rate": 1.1281654747834337e-05, "loss": 0.5304, "step": 3878 }, { "epoch": 1.0757071547420964, "grad_norm": 0.1899636685848236, "learning_rate": 1.1276159516454308e-05, "loss": 0.5191, "step": 3879 }, { "epoch": 1.0759844703272323, "grad_norm": 0.1919141411781311, "learning_rate": 1.1270664523877982e-05, "loss": 0.5044, "step": 3880 }, { "epoch": 1.0762617859123682, "grad_norm": 0.1983211487531662, "learning_rate": 1.1265169771177573e-05, "loss": 0.4931, "step": 3881 }, { "epoch": 1.076539101497504, "grad_norm": 0.1884281188249588, "learning_rate": 1.1259675259425263e-05, "loss": 0.5212, "step": 3882 }, { "epoch": 1.07681641708264, "grad_norm": 0.18257342278957367, "learning_rate": 1.125418098969317e-05, "loss": 0.4964, "step": 3883 }, { "epoch": 1.0770937326677759, "grad_norm": 0.19225247204303741, "learning_rate": 1.1248686963053374e-05, "loss": 0.5216, "step": 3884 }, { "epoch": 1.0773710482529117, "grad_norm": 0.21373282372951508, "learning_rate": 1.1243193180577902e-05, "loss": 0.5106, "step": 3885 }, { "epoch": 1.0776483638380476, "grad_norm": 0.21381765604019165, "learning_rate": 1.123769964333874e-05, "loss": 0.4909, "step": 3886 }, { "epoch": 1.0779256794231835, "grad_norm": 0.18304717540740967, "learning_rate": 1.1232206352407828e-05, "loss": 0.5087, "step": 3887 }, { "epoch": 1.0782029950083194, "grad_norm": 0.18388831615447998, "learning_rate": 1.1226713308857036e-05, "loss": 0.4925, "step": 3888 }, { "epoch": 1.0784803105934553, "grad_norm": 0.19412866234779358, "learning_rate": 1.1221220513758219e-05, "loss": 0.5099, "step": 3889 }, { "epoch": 1.0787576261785912, "grad_norm": 0.19013476371765137, "learning_rate": 1.1215727968183159e-05, "loss": 0.5227, "step": 3890 }, { "epoch": 1.079034941763727, "grad_norm": 0.19106021523475647, "learning_rate": 1.1210235673203601e-05, "loss": 0.5123, "step": 3891 }, { "epoch": 1.079312257348863, "grad_norm": 0.18381288647651672, "learning_rate": 1.1204743629891225e-05, "loss": 0.5182, "step": 3892 }, { "epoch": 1.0795895729339988, "grad_norm": 0.19413931667804718, "learning_rate": 1.1199251839317696e-05, "loss": 0.5014, "step": 3893 }, { "epoch": 1.0798668885191347, "grad_norm": 0.18920141458511353, "learning_rate": 1.119376030255459e-05, "loss": 0.5136, "step": 3894 }, { "epoch": 1.0801442041042706, "grad_norm": 0.19880147278308868, "learning_rate": 1.1188269020673456e-05, "loss": 0.5539, "step": 3895 }, { "epoch": 1.0804215196894065, "grad_norm": 0.1854148656129837, "learning_rate": 1.118277799474579e-05, "loss": 0.4911, "step": 3896 }, { "epoch": 1.0806988352745424, "grad_norm": 0.18824362754821777, "learning_rate": 1.1177287225843041e-05, "loss": 0.4769, "step": 3897 }, { "epoch": 1.0809761508596782, "grad_norm": 0.1937967985868454, "learning_rate": 1.1171796715036597e-05, "loss": 0.5147, "step": 3898 }, { "epoch": 1.0812534664448141, "grad_norm": 0.18958862125873566, "learning_rate": 1.11663064633978e-05, "loss": 0.5197, "step": 3899 }, { "epoch": 1.08153078202995, "grad_norm": 0.1861356496810913, "learning_rate": 1.1160816471997951e-05, "loss": 0.5102, "step": 3900 }, { "epoch": 1.081808097615086, "grad_norm": 0.19085288047790527, "learning_rate": 1.115532674190829e-05, "loss": 0.4957, "step": 3901 }, { "epoch": 1.0820854132002218, "grad_norm": 0.19016428291797638, "learning_rate": 1.1149837274200004e-05, "loss": 0.523, "step": 3902 }, { "epoch": 1.0823627287853577, "grad_norm": 0.18524803221225739, "learning_rate": 1.1144348069944244e-05, "loss": 0.4999, "step": 3903 }, { "epoch": 1.0826400443704935, "grad_norm": 0.18277236819267273, "learning_rate": 1.1138859130212089e-05, "loss": 0.5139, "step": 3904 }, { "epoch": 1.0829173599556294, "grad_norm": 0.18841886520385742, "learning_rate": 1.113337045607458e-05, "loss": 0.5124, "step": 3905 }, { "epoch": 1.0831946755407653, "grad_norm": 0.19280953705310822, "learning_rate": 1.1127882048602703e-05, "loss": 0.4953, "step": 3906 }, { "epoch": 1.0834719911259012, "grad_norm": 0.19617757201194763, "learning_rate": 1.1122393908867392e-05, "loss": 0.5144, "step": 3907 }, { "epoch": 1.083749306711037, "grad_norm": 0.19670793414115906, "learning_rate": 1.1116906037939532e-05, "loss": 0.5048, "step": 3908 }, { "epoch": 1.084026622296173, "grad_norm": 0.18718746304512024, "learning_rate": 1.1111418436889944e-05, "loss": 0.5164, "step": 3909 }, { "epoch": 1.0843039378813089, "grad_norm": 0.18242870271205902, "learning_rate": 1.110593110678941e-05, "loss": 0.5263, "step": 3910 }, { "epoch": 1.0845812534664447, "grad_norm": 0.1876215934753418, "learning_rate": 1.1100444048708653e-05, "loss": 0.504, "step": 3911 }, { "epoch": 1.0848585690515806, "grad_norm": 0.19080670177936554, "learning_rate": 1.1094957263718345e-05, "loss": 0.5236, "step": 3912 }, { "epoch": 1.0851358846367165, "grad_norm": 0.194438174366951, "learning_rate": 1.1089470752889093e-05, "loss": 0.5175, "step": 3913 }, { "epoch": 1.0854132002218524, "grad_norm": 0.19020043313503265, "learning_rate": 1.1083984517291476e-05, "loss": 0.5164, "step": 3914 }, { "epoch": 1.0856905158069883, "grad_norm": 0.1921057403087616, "learning_rate": 1.1078498557995995e-05, "loss": 0.5049, "step": 3915 }, { "epoch": 1.0859678313921242, "grad_norm": 0.19786351919174194, "learning_rate": 1.1073012876073103e-05, "loss": 0.5317, "step": 3916 }, { "epoch": 1.08624514697726, "grad_norm": 0.1904228925704956, "learning_rate": 1.106752747259321e-05, "loss": 0.5635, "step": 3917 }, { "epoch": 1.086522462562396, "grad_norm": 0.1912548840045929, "learning_rate": 1.106204234862666e-05, "loss": 0.5369, "step": 3918 }, { "epoch": 1.0867997781475318, "grad_norm": 0.1982937604188919, "learning_rate": 1.1056557505243746e-05, "loss": 0.5137, "step": 3919 }, { "epoch": 1.0870770937326677, "grad_norm": 0.19235102832317352, "learning_rate": 1.1051072943514703e-05, "loss": 0.5317, "step": 3920 }, { "epoch": 1.0873544093178036, "grad_norm": 0.2086210548877716, "learning_rate": 1.1045588664509717e-05, "loss": 0.4916, "step": 3921 }, { "epoch": 1.0876317249029395, "grad_norm": 0.19181272387504578, "learning_rate": 1.104010466929892e-05, "loss": 0.5142, "step": 3922 }, { "epoch": 1.0879090404880754, "grad_norm": 0.18342861533164978, "learning_rate": 1.1034620958952377e-05, "loss": 0.5328, "step": 3923 }, { "epoch": 1.0881863560732112, "grad_norm": 0.18724067509174347, "learning_rate": 1.1029137534540113e-05, "loss": 0.4951, "step": 3924 }, { "epoch": 1.0884636716583471, "grad_norm": 0.19535782933235168, "learning_rate": 1.1023654397132087e-05, "loss": 0.498, "step": 3925 }, { "epoch": 1.088740987243483, "grad_norm": 0.24757403135299683, "learning_rate": 1.10181715477982e-05, "loss": 0.4967, "step": 3926 }, { "epoch": 1.089018302828619, "grad_norm": 0.1959121972322464, "learning_rate": 1.1012688987608303e-05, "loss": 0.5174, "step": 3927 }, { "epoch": 1.0892956184137548, "grad_norm": 0.20022380352020264, "learning_rate": 1.1007206717632193e-05, "loss": 0.5224, "step": 3928 }, { "epoch": 1.0895729339988907, "grad_norm": 0.18869644403457642, "learning_rate": 1.1001724738939606e-05, "loss": 0.5269, "step": 3929 }, { "epoch": 1.0898502495840265, "grad_norm": 0.19060127437114716, "learning_rate": 1.099624305260021e-05, "loss": 0.517, "step": 3930 }, { "epoch": 1.0901275651691624, "grad_norm": 0.18450330197811127, "learning_rate": 1.0990761659683643e-05, "loss": 0.5134, "step": 3931 }, { "epoch": 1.0904048807542983, "grad_norm": 0.20092949271202087, "learning_rate": 1.0985280561259462e-05, "loss": 0.5229, "step": 3932 }, { "epoch": 1.0906821963394342, "grad_norm": 0.187063530087471, "learning_rate": 1.0979799758397173e-05, "loss": 0.5074, "step": 3933 }, { "epoch": 1.09095951192457, "grad_norm": 0.21675853431224823, "learning_rate": 1.0974319252166226e-05, "loss": 0.5443, "step": 3934 }, { "epoch": 1.091236827509706, "grad_norm": 0.19182507693767548, "learning_rate": 1.0968839043636021e-05, "loss": 0.5165, "step": 3935 }, { "epoch": 1.0915141430948418, "grad_norm": 0.19931842386722565, "learning_rate": 1.0963359133875884e-05, "loss": 0.5362, "step": 3936 }, { "epoch": 1.0917914586799777, "grad_norm": 0.19970446825027466, "learning_rate": 1.0957879523955087e-05, "loss": 0.5175, "step": 3937 }, { "epoch": 1.0920687742651136, "grad_norm": 0.18823319673538208, "learning_rate": 1.0952400214942857e-05, "loss": 0.5184, "step": 3938 }, { "epoch": 1.0923460898502495, "grad_norm": 0.19610384106636047, "learning_rate": 1.094692120790835e-05, "loss": 0.5194, "step": 3939 }, { "epoch": 1.0926234054353854, "grad_norm": 0.2017711102962494, "learning_rate": 1.0941442503920664e-05, "loss": 0.5254, "step": 3940 }, { "epoch": 1.0929007210205213, "grad_norm": 0.18085281550884247, "learning_rate": 1.0935964104048834e-05, "loss": 0.4865, "step": 3941 }, { "epoch": 1.0931780366056572, "grad_norm": 0.18639199435710907, "learning_rate": 1.0930486009361847e-05, "loss": 0.4902, "step": 3942 }, { "epoch": 1.093455352190793, "grad_norm": 0.1943678855895996, "learning_rate": 1.0925008220928624e-05, "loss": 0.5216, "step": 3943 }, { "epoch": 1.093732667775929, "grad_norm": 0.19227345287799835, "learning_rate": 1.0919530739818022e-05, "loss": 0.4983, "step": 3944 }, { "epoch": 1.0940099833610648, "grad_norm": 0.18952839076519012, "learning_rate": 1.091405356709885e-05, "loss": 0.496, "step": 3945 }, { "epoch": 1.0942872989462007, "grad_norm": 0.20736488699913025, "learning_rate": 1.090857670383985e-05, "loss": 0.5296, "step": 3946 }, { "epoch": 1.0945646145313366, "grad_norm": 0.1944408416748047, "learning_rate": 1.090310015110969e-05, "loss": 0.5024, "step": 3947 }, { "epoch": 1.0948419301164725, "grad_norm": 0.2053939253091812, "learning_rate": 1.0897623909977006e-05, "loss": 0.4962, "step": 3948 }, { "epoch": 1.0951192457016083, "grad_norm": 0.200760155916214, "learning_rate": 1.089214798151035e-05, "loss": 0.5123, "step": 3949 }, { "epoch": 1.0953965612867442, "grad_norm": 0.19097808003425598, "learning_rate": 1.0886672366778224e-05, "loss": 0.4831, "step": 3950 }, { "epoch": 1.0956738768718801, "grad_norm": 0.1900891810655594, "learning_rate": 1.0881197066849055e-05, "loss": 0.5154, "step": 3951 }, { "epoch": 1.095951192457016, "grad_norm": 0.19628465175628662, "learning_rate": 1.0875722082791237e-05, "loss": 0.4804, "step": 3952 }, { "epoch": 1.0962285080421519, "grad_norm": 0.18627870082855225, "learning_rate": 1.0870247415673072e-05, "loss": 0.4819, "step": 3953 }, { "epoch": 1.0965058236272878, "grad_norm": 0.1861443817615509, "learning_rate": 1.0864773066562814e-05, "loss": 0.4924, "step": 3954 }, { "epoch": 1.0967831392124237, "grad_norm": 0.2025613784790039, "learning_rate": 1.0859299036528657e-05, "loss": 0.5246, "step": 3955 }, { "epoch": 1.0970604547975595, "grad_norm": 0.18845801055431366, "learning_rate": 1.0853825326638731e-05, "loss": 0.499, "step": 3956 }, { "epoch": 1.0973377703826954, "grad_norm": 0.1874900460243225, "learning_rate": 1.0848351937961094e-05, "loss": 0.5166, "step": 3957 }, { "epoch": 1.0976150859678313, "grad_norm": 0.20147672295570374, "learning_rate": 1.0842878871563752e-05, "loss": 0.5236, "step": 3958 }, { "epoch": 1.0978924015529672, "grad_norm": 0.2169303447008133, "learning_rate": 1.083740612851465e-05, "loss": 0.4983, "step": 3959 }, { "epoch": 1.098169717138103, "grad_norm": 0.183817520737648, "learning_rate": 1.083193370988166e-05, "loss": 0.5103, "step": 3960 }, { "epoch": 1.098447032723239, "grad_norm": 0.2015179991722107, "learning_rate": 1.0826461616732596e-05, "loss": 0.4943, "step": 3961 }, { "epoch": 1.0987243483083748, "grad_norm": 0.19928953051567078, "learning_rate": 1.0820989850135216e-05, "loss": 0.5246, "step": 3962 }, { "epoch": 1.0990016638935107, "grad_norm": 0.18814896047115326, "learning_rate": 1.0815518411157198e-05, "loss": 0.5284, "step": 3963 }, { "epoch": 1.0992789794786466, "grad_norm": 0.20342199504375458, "learning_rate": 1.0810047300866166e-05, "loss": 0.5014, "step": 3964 }, { "epoch": 1.0995562950637825, "grad_norm": 0.19590109586715698, "learning_rate": 1.0804576520329679e-05, "loss": 0.5148, "step": 3965 }, { "epoch": 1.0998336106489184, "grad_norm": 0.1944301873445511, "learning_rate": 1.0799106070615235e-05, "loss": 0.4809, "step": 3966 }, { "epoch": 1.1001109262340543, "grad_norm": 0.19030778110027313, "learning_rate": 1.0793635952790264e-05, "loss": 0.5093, "step": 3967 }, { "epoch": 1.1003882418191901, "grad_norm": 0.1883806735277176, "learning_rate": 1.0788166167922118e-05, "loss": 0.5018, "step": 3968 }, { "epoch": 1.100665557404326, "grad_norm": 0.21474680304527283, "learning_rate": 1.0782696717078117e-05, "loss": 0.5091, "step": 3969 }, { "epoch": 1.100942872989462, "grad_norm": 0.24823465943336487, "learning_rate": 1.0777227601325482e-05, "loss": 0.5073, "step": 3970 }, { "epoch": 1.1012201885745978, "grad_norm": 0.18175634741783142, "learning_rate": 1.0771758821731386e-05, "loss": 0.4993, "step": 3971 }, { "epoch": 1.1014975041597337, "grad_norm": 0.18903425335884094, "learning_rate": 1.0766290379362928e-05, "loss": 0.5283, "step": 3972 }, { "epoch": 1.1017748197448696, "grad_norm": 0.19946777820587158, "learning_rate": 1.0760822275287159e-05, "loss": 0.5284, "step": 3973 }, { "epoch": 1.1020521353300055, "grad_norm": 0.19179320335388184, "learning_rate": 1.075535451057104e-05, "loss": 0.5264, "step": 3974 }, { "epoch": 1.1023294509151413, "grad_norm": 0.18837237358093262, "learning_rate": 1.0749887086281474e-05, "loss": 0.5051, "step": 3975 }, { "epoch": 1.1026067665002772, "grad_norm": 0.19744771718978882, "learning_rate": 1.0744420003485312e-05, "loss": 0.5313, "step": 3976 }, { "epoch": 1.102884082085413, "grad_norm": 0.20925208926200867, "learning_rate": 1.0738953263249319e-05, "loss": 0.5033, "step": 3977 }, { "epoch": 1.103161397670549, "grad_norm": 0.19691763818264008, "learning_rate": 1.0733486866640203e-05, "loss": 0.5205, "step": 3978 }, { "epoch": 1.1034387132556849, "grad_norm": 0.19739267230033875, "learning_rate": 1.07280208147246e-05, "loss": 0.4742, "step": 3979 }, { "epoch": 1.1037160288408208, "grad_norm": 0.1945018321275711, "learning_rate": 1.0722555108569085e-05, "loss": 0.4977, "step": 3980 }, { "epoch": 1.1039933444259566, "grad_norm": 0.19838854670524597, "learning_rate": 1.071708974924016e-05, "loss": 0.5137, "step": 3981 }, { "epoch": 1.1042706600110925, "grad_norm": 0.21607692539691925, "learning_rate": 1.071162473780426e-05, "loss": 0.5166, "step": 3982 }, { "epoch": 1.1045479755962284, "grad_norm": 0.17799584567546844, "learning_rate": 1.0706160075327761e-05, "loss": 0.4938, "step": 3983 }, { "epoch": 1.1048252911813643, "grad_norm": 0.1956464648246765, "learning_rate": 1.0700695762876958e-05, "loss": 0.5244, "step": 3984 }, { "epoch": 1.1051026067665002, "grad_norm": 0.18592742085456848, "learning_rate": 1.0695231801518083e-05, "loss": 0.466, "step": 3985 }, { "epoch": 1.105379922351636, "grad_norm": 0.19382350146770477, "learning_rate": 1.0689768192317296e-05, "loss": 0.4924, "step": 3986 }, { "epoch": 1.105657237936772, "grad_norm": 0.2073841392993927, "learning_rate": 1.0684304936340697e-05, "loss": 0.5109, "step": 3987 }, { "epoch": 1.1059345535219078, "grad_norm": 0.19084088504314423, "learning_rate": 1.0678842034654315e-05, "loss": 0.5137, "step": 3988 }, { "epoch": 1.1062118691070437, "grad_norm": 0.1962527185678482, "learning_rate": 1.0673379488324095e-05, "loss": 0.5245, "step": 3989 }, { "epoch": 1.1064891846921796, "grad_norm": 0.18985575437545776, "learning_rate": 1.066791729841594e-05, "loss": 0.4747, "step": 3990 }, { "epoch": 1.1067665002773155, "grad_norm": 0.2049971967935562, "learning_rate": 1.0662455465995657e-05, "loss": 0.4954, "step": 3991 }, { "epoch": 1.1070438158624514, "grad_norm": 0.19408412277698517, "learning_rate": 1.0656993992128999e-05, "loss": 0.5252, "step": 3992 }, { "epoch": 1.1073211314475873, "grad_norm": 0.19346264004707336, "learning_rate": 1.0651532877881639e-05, "loss": 0.4995, "step": 3993 }, { "epoch": 1.1075984470327231, "grad_norm": 0.19205859303474426, "learning_rate": 1.0646072124319193e-05, "loss": 0.5091, "step": 3994 }, { "epoch": 1.107875762617859, "grad_norm": 0.20613279938697815, "learning_rate": 1.0640611732507192e-05, "loss": 0.5191, "step": 3995 }, { "epoch": 1.108153078202995, "grad_norm": 0.1821645051240921, "learning_rate": 1.0635151703511104e-05, "loss": 0.493, "step": 3996 }, { "epoch": 1.1084303937881308, "grad_norm": 0.20445798337459564, "learning_rate": 1.062969203839633e-05, "loss": 0.5365, "step": 3997 }, { "epoch": 1.1087077093732667, "grad_norm": 0.19484387338161469, "learning_rate": 1.062423273822819e-05, "loss": 0.5099, "step": 3998 }, { "epoch": 1.1089850249584026, "grad_norm": 0.19972245395183563, "learning_rate": 1.0618773804071943e-05, "loss": 0.5233, "step": 3999 }, { "epoch": 1.1092623405435384, "grad_norm": 0.20230787992477417, "learning_rate": 1.0613315236992766e-05, "loss": 0.524, "step": 4000 }, { "epoch": 1.1095396561286743, "grad_norm": 0.18941400945186615, "learning_rate": 1.0607857038055774e-05, "loss": 0.4988, "step": 4001 }, { "epoch": 1.1098169717138102, "grad_norm": 0.1921200454235077, "learning_rate": 1.0602399208326006e-05, "loss": 0.5355, "step": 4002 }, { "epoch": 1.110094287298946, "grad_norm": 0.1832018345594406, "learning_rate": 1.0596941748868426e-05, "loss": 0.499, "step": 4003 }, { "epoch": 1.110371602884082, "grad_norm": 0.19228579103946686, "learning_rate": 1.0591484660747933e-05, "loss": 0.5189, "step": 4004 }, { "epoch": 1.1106489184692179, "grad_norm": 0.19055628776550293, "learning_rate": 1.0586027945029352e-05, "loss": 0.5012, "step": 4005 }, { "epoch": 1.1109262340543538, "grad_norm": 0.19514985382556915, "learning_rate": 1.0580571602777425e-05, "loss": 0.4883, "step": 4006 }, { "epoch": 1.1112035496394896, "grad_norm": 0.1857844740152359, "learning_rate": 1.057511563505683e-05, "loss": 0.4863, "step": 4007 }, { "epoch": 1.1114808652246255, "grad_norm": 0.19990986585617065, "learning_rate": 1.0569660042932177e-05, "loss": 0.5079, "step": 4008 }, { "epoch": 1.1117581808097614, "grad_norm": 0.1960282176733017, "learning_rate": 1.0564204827467994e-05, "loss": 0.5277, "step": 4009 }, { "epoch": 1.1120354963948973, "grad_norm": 0.20044219493865967, "learning_rate": 1.0558749989728729e-05, "loss": 0.5121, "step": 4010 }, { "epoch": 1.1123128119800332, "grad_norm": 0.19683614373207092, "learning_rate": 1.0553295530778784e-05, "loss": 0.4932, "step": 4011 }, { "epoch": 1.112590127565169, "grad_norm": 0.1851675808429718, "learning_rate": 1.0547841451682453e-05, "loss": 0.4994, "step": 4012 }, { "epoch": 1.112867443150305, "grad_norm": 0.20108520984649658, "learning_rate": 1.0542387753503974e-05, "loss": 0.5352, "step": 4013 }, { "epoch": 1.1131447587354408, "grad_norm": 0.18748925626277924, "learning_rate": 1.0536934437307514e-05, "loss": 0.4929, "step": 4014 }, { "epoch": 1.1134220743205767, "grad_norm": 0.183696448802948, "learning_rate": 1.0531481504157153e-05, "loss": 0.5099, "step": 4015 }, { "epoch": 1.1136993899057126, "grad_norm": 0.20530031621456146, "learning_rate": 1.0526028955116912e-05, "loss": 0.5101, "step": 4016 }, { "epoch": 1.1139767054908485, "grad_norm": 0.20076102018356323, "learning_rate": 1.0520576791250711e-05, "loss": 0.533, "step": 4017 }, { "epoch": 1.1142540210759844, "grad_norm": 0.2018887996673584, "learning_rate": 1.0515125013622428e-05, "loss": 0.5015, "step": 4018 }, { "epoch": 1.1145313366611203, "grad_norm": 0.1974845677614212, "learning_rate": 1.0509673623295843e-05, "loss": 0.4778, "step": 4019 }, { "epoch": 1.1148086522462561, "grad_norm": 0.18530985713005066, "learning_rate": 1.0504222621334664e-05, "loss": 0.5003, "step": 4020 }, { "epoch": 1.115085967831392, "grad_norm": 0.1943405568599701, "learning_rate": 1.0498772008802531e-05, "loss": 0.5041, "step": 4021 }, { "epoch": 1.115363283416528, "grad_norm": 0.19365082681179047, "learning_rate": 1.0493321786763003e-05, "loss": 0.5156, "step": 4022 }, { "epoch": 1.1156405990016638, "grad_norm": 0.20058946311473846, "learning_rate": 1.0487871956279558e-05, "loss": 0.4925, "step": 4023 }, { "epoch": 1.1159179145867997, "grad_norm": 0.1888214498758316, "learning_rate": 1.0482422518415602e-05, "loss": 0.5321, "step": 4024 }, { "epoch": 1.1161952301719356, "grad_norm": 0.19549217820167542, "learning_rate": 1.047697347423447e-05, "loss": 0.5257, "step": 4025 }, { "epoch": 1.1164725457570714, "grad_norm": 0.19175288081169128, "learning_rate": 1.0471524824799413e-05, "loss": 0.4774, "step": 4026 }, { "epoch": 1.1167498613422073, "grad_norm": 0.19548679888248444, "learning_rate": 1.04660765711736e-05, "loss": 0.4782, "step": 4027 }, { "epoch": 1.1170271769273432, "grad_norm": 0.20252208411693573, "learning_rate": 1.0460628714420145e-05, "loss": 0.5024, "step": 4028 }, { "epoch": 1.117304492512479, "grad_norm": 0.18733076751232147, "learning_rate": 1.0455181255602056e-05, "loss": 0.4894, "step": 4029 }, { "epoch": 1.117581808097615, "grad_norm": 0.19177298247814178, "learning_rate": 1.0449734195782281e-05, "loss": 0.5067, "step": 4030 }, { "epoch": 1.1178591236827509, "grad_norm": 0.18974518775939941, "learning_rate": 1.0444287536023681e-05, "loss": 0.4863, "step": 4031 }, { "epoch": 1.1181364392678868, "grad_norm": 0.20173045992851257, "learning_rate": 1.0438841277389055e-05, "loss": 0.507, "step": 4032 }, { "epoch": 1.1184137548530226, "grad_norm": 0.189706489443779, "learning_rate": 1.0433395420941101e-05, "loss": 0.495, "step": 4033 }, { "epoch": 1.1186910704381585, "grad_norm": 0.20018784701824188, "learning_rate": 1.0427949967742452e-05, "loss": 0.5267, "step": 4034 }, { "epoch": 1.1189683860232944, "grad_norm": 0.18162201344966888, "learning_rate": 1.0422504918855664e-05, "loss": 0.5028, "step": 4035 }, { "epoch": 1.1192457016084303, "grad_norm": 0.2000206708908081, "learning_rate": 1.041706027534321e-05, "loss": 0.5162, "step": 4036 }, { "epoch": 1.1195230171935662, "grad_norm": 0.19805558025836945, "learning_rate": 1.0411616038267486e-05, "loss": 0.4861, "step": 4037 }, { "epoch": 1.119800332778702, "grad_norm": 0.19431713223457336, "learning_rate": 1.0406172208690797e-05, "loss": 0.4991, "step": 4038 }, { "epoch": 1.120077648363838, "grad_norm": 0.2039175033569336, "learning_rate": 1.0400728787675387e-05, "loss": 0.539, "step": 4039 }, { "epoch": 1.1203549639489738, "grad_norm": 0.19760684669017792, "learning_rate": 1.039528577628341e-05, "loss": 0.5149, "step": 4040 }, { "epoch": 1.1206322795341097, "grad_norm": 0.18955372273921967, "learning_rate": 1.038984317557694e-05, "loss": 0.4901, "step": 4041 }, { "epoch": 1.1209095951192456, "grad_norm": 0.19011299312114716, "learning_rate": 1.0384400986617977e-05, "loss": 0.518, "step": 4042 }, { "epoch": 1.1211869107043815, "grad_norm": 0.18622393906116486, "learning_rate": 1.0378959210468434e-05, "loss": 0.4912, "step": 4043 }, { "epoch": 1.1214642262895174, "grad_norm": 0.19130538403987885, "learning_rate": 1.0373517848190143e-05, "loss": 0.4952, "step": 4044 }, { "epoch": 1.1217415418746532, "grad_norm": 0.19988033175468445, "learning_rate": 1.0368076900844856e-05, "loss": 0.4964, "step": 4045 }, { "epoch": 1.1220188574597891, "grad_norm": 0.20225760340690613, "learning_rate": 1.0362636369494254e-05, "loss": 0.5338, "step": 4046 }, { "epoch": 1.122296173044925, "grad_norm": 0.2051144242286682, "learning_rate": 1.0357196255199928e-05, "loss": 0.5251, "step": 4047 }, { "epoch": 1.122573488630061, "grad_norm": 0.18915054202079773, "learning_rate": 1.0351756559023374e-05, "loss": 0.4812, "step": 4048 }, { "epoch": 1.1228508042151968, "grad_norm": 0.20346355438232422, "learning_rate": 1.0346317282026045e-05, "loss": 0.5366, "step": 4049 }, { "epoch": 1.1231281198003327, "grad_norm": 0.1956186592578888, "learning_rate": 1.0340878425269269e-05, "loss": 0.5396, "step": 4050 }, { "epoch": 1.1234054353854686, "grad_norm": 0.18970191478729248, "learning_rate": 1.0335439989814316e-05, "loss": 0.4687, "step": 4051 }, { "epoch": 1.1236827509706044, "grad_norm": 0.19612254202365875, "learning_rate": 1.033000197672237e-05, "loss": 0.5025, "step": 4052 }, { "epoch": 1.1239600665557403, "grad_norm": 0.19224813580513, "learning_rate": 1.0324564387054535e-05, "loss": 0.494, "step": 4053 }, { "epoch": 1.1242373821408762, "grad_norm": 0.1851874589920044, "learning_rate": 1.0319127221871823e-05, "loss": 0.521, "step": 4054 }, { "epoch": 1.124514697726012, "grad_norm": 0.19579973816871643, "learning_rate": 1.0313690482235168e-05, "loss": 0.5012, "step": 4055 }, { "epoch": 1.124792013311148, "grad_norm": 0.21103787422180176, "learning_rate": 1.0308254169205428e-05, "loss": 0.4969, "step": 4056 }, { "epoch": 1.1250693288962839, "grad_norm": 0.1894204467535019, "learning_rate": 1.030281828384337e-05, "loss": 0.494, "step": 4057 }, { "epoch": 1.1253466444814197, "grad_norm": 0.1914178431034088, "learning_rate": 1.0297382827209679e-05, "loss": 0.5094, "step": 4058 }, { "epoch": 1.1256239600665556, "grad_norm": 0.18707357347011566, "learning_rate": 1.0291947800364948e-05, "loss": 0.5023, "step": 4059 }, { "epoch": 1.1259012756516915, "grad_norm": 0.19480562210083008, "learning_rate": 1.0286513204369712e-05, "loss": 0.5154, "step": 4060 }, { "epoch": 1.1261785912368274, "grad_norm": 0.18776677548885345, "learning_rate": 1.0281079040284392e-05, "loss": 0.5143, "step": 4061 }, { "epoch": 1.1264559068219633, "grad_norm": 0.18544964492321014, "learning_rate": 1.0275645309169337e-05, "loss": 0.4906, "step": 4062 }, { "epoch": 1.1267332224070992, "grad_norm": 0.18632815778255463, "learning_rate": 1.0270212012084817e-05, "loss": 0.5276, "step": 4063 }, { "epoch": 1.127010537992235, "grad_norm": 0.206988126039505, "learning_rate": 1.0264779150091014e-05, "loss": 0.5146, "step": 4064 }, { "epoch": 1.127287853577371, "grad_norm": 0.19988800585269928, "learning_rate": 1.0259346724248018e-05, "loss": 0.4973, "step": 4065 }, { "epoch": 1.1275651691625068, "grad_norm": 0.18873746693134308, "learning_rate": 1.0253914735615838e-05, "loss": 0.5136, "step": 4066 }, { "epoch": 1.1278424847476427, "grad_norm": 0.19661124050617218, "learning_rate": 1.0248483185254403e-05, "loss": 0.5418, "step": 4067 }, { "epoch": 1.1281198003327786, "grad_norm": 0.2008582353591919, "learning_rate": 1.0243052074223555e-05, "loss": 0.508, "step": 4068 }, { "epoch": 1.1283971159179145, "grad_norm": 0.19019687175750732, "learning_rate": 1.023762140358304e-05, "loss": 0.5072, "step": 4069 }, { "epoch": 1.1286744315030504, "grad_norm": 0.19475378096103668, "learning_rate": 1.0232191174392532e-05, "loss": 0.5282, "step": 4070 }, { "epoch": 1.1289517470881862, "grad_norm": 0.18883496522903442, "learning_rate": 1.0226761387711612e-05, "loss": 0.5092, "step": 4071 }, { "epoch": 1.1292290626733221, "grad_norm": 0.1904006004333496, "learning_rate": 1.0221332044599768e-05, "loss": 0.5302, "step": 4072 }, { "epoch": 1.129506378258458, "grad_norm": 0.19037893414497375, "learning_rate": 1.0215903146116417e-05, "loss": 0.4916, "step": 4073 }, { "epoch": 1.129783693843594, "grad_norm": 0.2003484070301056, "learning_rate": 1.021047469332088e-05, "loss": 0.5259, "step": 4074 }, { "epoch": 1.1300610094287298, "grad_norm": 0.18021680414676666, "learning_rate": 1.0205046687272392e-05, "loss": 0.5038, "step": 4075 }, { "epoch": 1.1303383250138657, "grad_norm": 0.19366797804832458, "learning_rate": 1.0199619129030093e-05, "loss": 0.5091, "step": 4076 }, { "epoch": 1.1306156405990015, "grad_norm": 0.19460560381412506, "learning_rate": 1.0194192019653053e-05, "loss": 0.5071, "step": 4077 }, { "epoch": 1.1308929561841374, "grad_norm": 0.18521569669246674, "learning_rate": 1.018876536020024e-05, "loss": 0.5031, "step": 4078 }, { "epoch": 1.1311702717692733, "grad_norm": 0.18586260080337524, "learning_rate": 1.018333915173054e-05, "loss": 0.4893, "step": 4079 }, { "epoch": 1.1314475873544092, "grad_norm": 0.20619140565395355, "learning_rate": 1.0177913395302748e-05, "loss": 0.5124, "step": 4080 }, { "epoch": 1.131724902939545, "grad_norm": 0.2014811784029007, "learning_rate": 1.0172488091975583e-05, "loss": 0.5045, "step": 4081 }, { "epoch": 1.132002218524681, "grad_norm": 0.2058635950088501, "learning_rate": 1.0167063242807654e-05, "loss": 0.4832, "step": 4082 }, { "epoch": 1.1322795341098169, "grad_norm": 0.19578316807746887, "learning_rate": 1.016163884885749e-05, "loss": 0.501, "step": 4083 }, { "epoch": 1.1325568496949527, "grad_norm": 0.1896321028470993, "learning_rate": 1.0156214911183546e-05, "loss": 0.5113, "step": 4084 }, { "epoch": 1.1328341652800886, "grad_norm": 0.19131551682949066, "learning_rate": 1.0150791430844172e-05, "loss": 0.502, "step": 4085 }, { "epoch": 1.1331114808652245, "grad_norm": 0.20685172080993652, "learning_rate": 1.0145368408897624e-05, "loss": 0.4907, "step": 4086 }, { "epoch": 1.1333887964503604, "grad_norm": 0.21301031112670898, "learning_rate": 1.0139945846402091e-05, "loss": 0.5013, "step": 4087 }, { "epoch": 1.1336661120354963, "grad_norm": 0.1879834532737732, "learning_rate": 1.013452374441565e-05, "loss": 0.5197, "step": 4088 }, { "epoch": 1.1339434276206322, "grad_norm": 0.19364705681800842, "learning_rate": 1.01291021039963e-05, "loss": 0.5064, "step": 4089 }, { "epoch": 1.134220743205768, "grad_norm": 0.18613065779209137, "learning_rate": 1.012368092620194e-05, "loss": 0.4874, "step": 4090 }, { "epoch": 1.134498058790904, "grad_norm": 0.1905667632818222, "learning_rate": 1.0118260212090397e-05, "loss": 0.5038, "step": 4091 }, { "epoch": 1.1347753743760398, "grad_norm": 0.19485412538051605, "learning_rate": 1.0112839962719387e-05, "loss": 0.4862, "step": 4092 }, { "epoch": 1.1350526899611757, "grad_norm": 0.1890053004026413, "learning_rate": 1.0107420179146542e-05, "loss": 0.5268, "step": 4093 }, { "epoch": 1.1353300055463116, "grad_norm": 0.19711565971374512, "learning_rate": 1.0102000862429415e-05, "loss": 0.4994, "step": 4094 }, { "epoch": 1.1356073211314475, "grad_norm": 0.18928836286067963, "learning_rate": 1.0096582013625455e-05, "loss": 0.5112, "step": 4095 }, { "epoch": 1.1358846367165834, "grad_norm": 0.1987476795911789, "learning_rate": 1.0091163633792023e-05, "loss": 0.4815, "step": 4096 }, { "epoch": 1.1361619523017192, "grad_norm": 0.20385397970676422, "learning_rate": 1.0085745723986379e-05, "loss": 0.5387, "step": 4097 }, { "epoch": 1.1364392678868551, "grad_norm": 0.1833743155002594, "learning_rate": 1.0080328285265715e-05, "loss": 0.5216, "step": 4098 }, { "epoch": 1.136716583471991, "grad_norm": 0.19815048575401306, "learning_rate": 1.007491131868711e-05, "loss": 0.5257, "step": 4099 }, { "epoch": 1.1369938990571269, "grad_norm": 0.19522307813167572, "learning_rate": 1.0069494825307554e-05, "loss": 0.5109, "step": 4100 }, { "epoch": 1.1372712146422628, "grad_norm": 0.20600605010986328, "learning_rate": 1.0064078806183956e-05, "loss": 0.5222, "step": 4101 }, { "epoch": 1.1375485302273987, "grad_norm": 0.19125322997570038, "learning_rate": 1.0058663262373125e-05, "loss": 0.5145, "step": 4102 }, { "epoch": 1.1378258458125345, "grad_norm": 0.19589276611804962, "learning_rate": 1.005324819493177e-05, "loss": 0.5181, "step": 4103 }, { "epoch": 1.1381031613976704, "grad_norm": 0.182989239692688, "learning_rate": 1.0047833604916515e-05, "loss": 0.5209, "step": 4104 }, { "epoch": 1.1383804769828063, "grad_norm": 0.18455688655376434, "learning_rate": 1.0042419493383896e-05, "loss": 0.4888, "step": 4105 }, { "epoch": 1.1386577925679422, "grad_norm": 0.19933606684207916, "learning_rate": 1.0037005861390346e-05, "loss": 0.5306, "step": 4106 }, { "epoch": 1.138935108153078, "grad_norm": 0.19362375140190125, "learning_rate": 1.0031592709992204e-05, "loss": 0.4934, "step": 4107 }, { "epoch": 1.139212423738214, "grad_norm": 0.1925627440214157, "learning_rate": 1.0026180040245728e-05, "loss": 0.5044, "step": 4108 }, { "epoch": 1.1394897393233498, "grad_norm": 0.19093094766139984, "learning_rate": 1.0020767853207069e-05, "loss": 0.5083, "step": 4109 }, { "epoch": 1.1397670549084857, "grad_norm": 0.18980608880519867, "learning_rate": 1.0015356149932288e-05, "loss": 0.5099, "step": 4110 }, { "epoch": 1.1400443704936216, "grad_norm": 0.18821600079536438, "learning_rate": 1.0009944931477346e-05, "loss": 0.5194, "step": 4111 }, { "epoch": 1.1403216860787575, "grad_norm": 0.1851465255022049, "learning_rate": 1.0004534198898124e-05, "loss": 0.503, "step": 4112 }, { "epoch": 1.1405990016638934, "grad_norm": 0.19353441894054413, "learning_rate": 9.9991239532504e-06, "loss": 0.511, "step": 4113 }, { "epoch": 1.1408763172490295, "grad_norm": 0.19810713827610016, "learning_rate": 9.993714195589847e-06, "loss": 0.5106, "step": 4114 }, { "epoch": 1.1411536328341654, "grad_norm": 0.19608542323112488, "learning_rate": 9.98830492697206e-06, "loss": 0.5384, "step": 4115 }, { "epoch": 1.1414309484193013, "grad_norm": 0.43584343791007996, "learning_rate": 9.982896148452527e-06, "loss": 0.5011, "step": 4116 }, { "epoch": 1.1417082640044371, "grad_norm": 0.19775094091892242, "learning_rate": 9.977487861086647e-06, "loss": 0.5184, "step": 4117 }, { "epoch": 1.141985579589573, "grad_norm": 0.2092062532901764, "learning_rate": 9.97208006592971e-06, "loss": 0.4917, "step": 4118 }, { "epoch": 1.142262895174709, "grad_norm": 0.18967856466770172, "learning_rate": 9.966672764036936e-06, "loss": 0.4851, "step": 4119 }, { "epoch": 1.1425402107598448, "grad_norm": 0.1816510409116745, "learning_rate": 9.961265956463424e-06, "loss": 0.4985, "step": 4120 }, { "epoch": 1.1428175263449807, "grad_norm": 0.18744854629039764, "learning_rate": 9.955859644264183e-06, "loss": 0.4988, "step": 4121 }, { "epoch": 1.1430948419301166, "grad_norm": 0.19459229707717896, "learning_rate": 9.950453828494132e-06, "loss": 0.5171, "step": 4122 }, { "epoch": 1.1433721575152525, "grad_norm": 0.19057153165340424, "learning_rate": 9.945048510208094e-06, "loss": 0.5126, "step": 4123 }, { "epoch": 1.1436494731003883, "grad_norm": 0.19819004833698273, "learning_rate": 9.939643690460779e-06, "loss": 0.5449, "step": 4124 }, { "epoch": 1.1439267886855242, "grad_norm": 0.19458059966564178, "learning_rate": 9.934239370306813e-06, "loss": 0.4927, "step": 4125 }, { "epoch": 1.14420410427066, "grad_norm": 0.20325776934623718, "learning_rate": 9.928835550800727e-06, "loss": 0.5221, "step": 4126 }, { "epoch": 1.144481419855796, "grad_norm": 0.19814957678318024, "learning_rate": 9.923432232996947e-06, "loss": 0.5417, "step": 4127 }, { "epoch": 1.1447587354409319, "grad_norm": 0.1916591227054596, "learning_rate": 9.9180294179498e-06, "loss": 0.4876, "step": 4128 }, { "epoch": 1.1450360510260678, "grad_norm": 0.19258858263492584, "learning_rate": 9.912627106713528e-06, "loss": 0.4838, "step": 4129 }, { "epoch": 1.1453133666112036, "grad_norm": 0.18850946426391602, "learning_rate": 9.907225300342256e-06, "loss": 0.4996, "step": 4130 }, { "epoch": 1.1455906821963395, "grad_norm": 0.21922184526920319, "learning_rate": 9.901823999890021e-06, "loss": 0.5001, "step": 4131 }, { "epoch": 1.1458679977814754, "grad_norm": 0.1909458041191101, "learning_rate": 9.896423206410759e-06, "loss": 0.5119, "step": 4132 }, { "epoch": 1.1461453133666113, "grad_norm": 0.18811345100402832, "learning_rate": 9.891022920958313e-06, "loss": 0.5086, "step": 4133 }, { "epoch": 1.1464226289517472, "grad_norm": 0.20187973976135254, "learning_rate": 9.88562314458642e-06, "loss": 0.5163, "step": 4134 }, { "epoch": 1.146699944536883, "grad_norm": 0.1905764937400818, "learning_rate": 9.880223878348713e-06, "loss": 0.5092, "step": 4135 }, { "epoch": 1.146977260122019, "grad_norm": 0.19595111906528473, "learning_rate": 9.874825123298741e-06, "loss": 0.5066, "step": 4136 }, { "epoch": 1.1472545757071548, "grad_norm": 0.19632695615291595, "learning_rate": 9.869426880489939e-06, "loss": 0.5007, "step": 4137 }, { "epoch": 1.1475318912922907, "grad_norm": 0.19036737084388733, "learning_rate": 9.864029150975646e-06, "loss": 0.4899, "step": 4138 }, { "epoch": 1.1478092068774266, "grad_norm": 0.18314070999622345, "learning_rate": 9.858631935809107e-06, "loss": 0.4764, "step": 4139 }, { "epoch": 1.1480865224625625, "grad_norm": 0.25677627325057983, "learning_rate": 9.85323523604346e-06, "loss": 0.5034, "step": 4140 }, { "epoch": 1.1483638380476984, "grad_norm": 0.18910054862499237, "learning_rate": 9.847839052731742e-06, "loss": 0.5234, "step": 4141 }, { "epoch": 1.1486411536328343, "grad_norm": 0.19723178446292877, "learning_rate": 9.842443386926892e-06, "loss": 0.5099, "step": 4142 }, { "epoch": 1.1489184692179701, "grad_norm": 0.1976155936717987, "learning_rate": 9.837048239681747e-06, "loss": 0.498, "step": 4143 }, { "epoch": 1.149195784803106, "grad_norm": 0.20631377398967743, "learning_rate": 9.83165361204905e-06, "loss": 0.4832, "step": 4144 }, { "epoch": 1.149473100388242, "grad_norm": 0.204478919506073, "learning_rate": 9.826259505081419e-06, "loss": 0.494, "step": 4145 }, { "epoch": 1.1497504159733778, "grad_norm": 0.19692255556583405, "learning_rate": 9.820865919831406e-06, "loss": 0.5219, "step": 4146 }, { "epoch": 1.1500277315585137, "grad_norm": 0.1929834634065628, "learning_rate": 9.815472857351433e-06, "loss": 0.4878, "step": 4147 }, { "epoch": 1.1503050471436496, "grad_norm": 0.1798853874206543, "learning_rate": 9.810080318693832e-06, "loss": 0.4949, "step": 4148 }, { "epoch": 1.1505823627287854, "grad_norm": 0.19789017736911774, "learning_rate": 9.804688304910824e-06, "loss": 0.5227, "step": 4149 }, { "epoch": 1.1508596783139213, "grad_norm": 0.19549022614955902, "learning_rate": 9.799296817054542e-06, "loss": 0.5191, "step": 4150 }, { "epoch": 1.1511369938990572, "grad_norm": 0.19911247491836548, "learning_rate": 9.793905856177008e-06, "loss": 0.5172, "step": 4151 }, { "epoch": 1.151414309484193, "grad_norm": 0.29837673902511597, "learning_rate": 9.78851542333013e-06, "loss": 0.5081, "step": 4152 }, { "epoch": 1.151691625069329, "grad_norm": 0.1918451339006424, "learning_rate": 9.783125519565737e-06, "loss": 0.5116, "step": 4153 }, { "epoch": 1.1519689406544649, "grad_norm": 0.19545188546180725, "learning_rate": 9.777736145935538e-06, "loss": 0.5042, "step": 4154 }, { "epoch": 1.1522462562396008, "grad_norm": 0.19828790426254272, "learning_rate": 9.772347303491144e-06, "loss": 0.4944, "step": 4155 }, { "epoch": 1.1525235718247366, "grad_norm": 0.19536136090755463, "learning_rate": 9.766958993284051e-06, "loss": 0.511, "step": 4156 }, { "epoch": 1.1528008874098725, "grad_norm": 0.19933415949344635, "learning_rate": 9.761571216365678e-06, "loss": 0.5035, "step": 4157 }, { "epoch": 1.1530782029950084, "grad_norm": 0.20315343141555786, "learning_rate": 9.75618397378731e-06, "loss": 0.5105, "step": 4158 }, { "epoch": 1.1533555185801443, "grad_norm": 0.19544823467731476, "learning_rate": 9.750797266600142e-06, "loss": 0.4965, "step": 4159 }, { "epoch": 1.1536328341652802, "grad_norm": 0.1914399266242981, "learning_rate": 9.74541109585527e-06, "loss": 0.4964, "step": 4160 }, { "epoch": 1.153910149750416, "grad_norm": 0.19952736794948578, "learning_rate": 9.740025462603675e-06, "loss": 0.4937, "step": 4161 }, { "epoch": 1.154187465335552, "grad_norm": 0.19488787651062012, "learning_rate": 9.734640367896236e-06, "loss": 0.5137, "step": 4162 }, { "epoch": 1.1544647809206878, "grad_norm": 0.18852956593036652, "learning_rate": 9.729255812783724e-06, "loss": 0.5133, "step": 4163 }, { "epoch": 1.1547420965058237, "grad_norm": 0.23788957297801971, "learning_rate": 9.723871798316815e-06, "loss": 0.5007, "step": 4164 }, { "epoch": 1.1550194120909596, "grad_norm": 0.19701410830020905, "learning_rate": 9.718488325546072e-06, "loss": 0.4874, "step": 4165 }, { "epoch": 1.1552967276760955, "grad_norm": 0.1924983710050583, "learning_rate": 9.713105395521947e-06, "loss": 0.491, "step": 4166 }, { "epoch": 1.1555740432612314, "grad_norm": 0.19456344842910767, "learning_rate": 9.707723009294802e-06, "loss": 0.5032, "step": 4167 }, { "epoch": 1.1558513588463672, "grad_norm": 0.2932334840297699, "learning_rate": 9.702341167914875e-06, "loss": 0.5158, "step": 4168 }, { "epoch": 1.1561286744315031, "grad_norm": 0.19522738456726074, "learning_rate": 9.696959872432311e-06, "loss": 0.486, "step": 4169 }, { "epoch": 1.156405990016639, "grad_norm": 0.19203148782253265, "learning_rate": 9.691579123897137e-06, "loss": 0.5187, "step": 4170 }, { "epoch": 1.156683305601775, "grad_norm": 0.19738537073135376, "learning_rate": 9.686198923359286e-06, "loss": 0.4969, "step": 4171 }, { "epoch": 1.1569606211869108, "grad_norm": 0.198639377951622, "learning_rate": 9.680819271868578e-06, "loss": 0.4974, "step": 4172 }, { "epoch": 1.1572379367720467, "grad_norm": 0.19397403299808502, "learning_rate": 9.675440170474717e-06, "loss": 0.513, "step": 4173 }, { "epoch": 1.1575152523571826, "grad_norm": 0.19262705743312836, "learning_rate": 9.670061620227318e-06, "loss": 0.5025, "step": 4174 }, { "epoch": 1.1577925679423184, "grad_norm": 0.20281995832920074, "learning_rate": 9.664683622175874e-06, "loss": 0.5014, "step": 4175 }, { "epoch": 1.1580698835274543, "grad_norm": 0.18004655838012695, "learning_rate": 9.659306177369779e-06, "loss": 0.5118, "step": 4176 }, { "epoch": 1.1583471991125902, "grad_norm": 0.21135984361171722, "learning_rate": 9.653929286858302e-06, "loss": 0.4946, "step": 4177 }, { "epoch": 1.158624514697726, "grad_norm": 0.2009570449590683, "learning_rate": 9.648552951690635e-06, "loss": 0.5502, "step": 4178 }, { "epoch": 1.158901830282862, "grad_norm": 0.19548632204532623, "learning_rate": 9.643177172915833e-06, "loss": 0.5107, "step": 4179 }, { "epoch": 1.1591791458679979, "grad_norm": 0.2423223853111267, "learning_rate": 9.637801951582851e-06, "loss": 0.4754, "step": 4180 }, { "epoch": 1.1594564614531337, "grad_norm": 0.18860936164855957, "learning_rate": 9.632427288740545e-06, "loss": 0.4951, "step": 4181 }, { "epoch": 1.1597337770382696, "grad_norm": 0.19608083367347717, "learning_rate": 9.627053185437651e-06, "loss": 0.4936, "step": 4182 }, { "epoch": 1.1600110926234055, "grad_norm": 0.18897056579589844, "learning_rate": 9.621679642722794e-06, "loss": 0.507, "step": 4183 }, { "epoch": 1.1602884082085414, "grad_norm": 0.18415631353855133, "learning_rate": 9.616306661644497e-06, "loss": 0.5157, "step": 4184 }, { "epoch": 1.1605657237936773, "grad_norm": 0.300893098115921, "learning_rate": 9.610934243251177e-06, "loss": 0.5148, "step": 4185 }, { "epoch": 1.1608430393788132, "grad_norm": 0.19000142812728882, "learning_rate": 9.60556238859113e-06, "loss": 0.5156, "step": 4186 }, { "epoch": 1.161120354963949, "grad_norm": 0.20633526146411896, "learning_rate": 9.60019109871254e-06, "loss": 0.4992, "step": 4187 }, { "epoch": 1.161397670549085, "grad_norm": 0.20525197684764862, "learning_rate": 9.594820374663506e-06, "loss": 0.4937, "step": 4188 }, { "epoch": 1.1616749861342208, "grad_norm": 0.19740667939186096, "learning_rate": 9.589450217491984e-06, "loss": 0.4853, "step": 4189 }, { "epoch": 1.1619523017193567, "grad_norm": 0.20025213062763214, "learning_rate": 9.58408062824584e-06, "loss": 0.5031, "step": 4190 }, { "epoch": 1.1622296173044926, "grad_norm": 0.20170235633850098, "learning_rate": 9.578711607972815e-06, "loss": 0.5283, "step": 4191 }, { "epoch": 1.1625069328896285, "grad_norm": 0.19781838357448578, "learning_rate": 9.573343157720558e-06, "loss": 0.5155, "step": 4192 }, { "epoch": 1.1627842484747644, "grad_norm": 0.19260545074939728, "learning_rate": 9.567975278536595e-06, "loss": 0.4877, "step": 4193 }, { "epoch": 1.1630615640599002, "grad_norm": 0.1921887993812561, "learning_rate": 9.562607971468328e-06, "loss": 0.5139, "step": 4194 }, { "epoch": 1.1633388796450361, "grad_norm": 0.19691529870033264, "learning_rate": 9.55724123756308e-06, "loss": 0.5187, "step": 4195 }, { "epoch": 1.163616195230172, "grad_norm": 0.1975255310535431, "learning_rate": 9.551875077868028e-06, "loss": 0.5256, "step": 4196 }, { "epoch": 1.163893510815308, "grad_norm": 0.20154882967472076, "learning_rate": 9.546509493430257e-06, "loss": 0.5213, "step": 4197 }, { "epoch": 1.1641708264004438, "grad_norm": 0.18560223281383514, "learning_rate": 9.541144485296737e-06, "loss": 0.501, "step": 4198 }, { "epoch": 1.1644481419855797, "grad_norm": 0.18955029547214508, "learning_rate": 9.535780054514325e-06, "loss": 0.5273, "step": 4199 }, { "epoch": 1.1647254575707155, "grad_norm": 0.1917003095149994, "learning_rate": 9.530416202129756e-06, "loss": 0.4887, "step": 4200 }, { "epoch": 1.1650027731558514, "grad_norm": 0.20266690850257874, "learning_rate": 9.525052929189661e-06, "loss": 0.5112, "step": 4201 }, { "epoch": 1.1652800887409873, "grad_norm": 0.18932059407234192, "learning_rate": 9.519690236740563e-06, "loss": 0.4984, "step": 4202 }, { "epoch": 1.1655574043261232, "grad_norm": 0.19648674130439758, "learning_rate": 9.51432812582886e-06, "loss": 0.5004, "step": 4203 }, { "epoch": 1.165834719911259, "grad_norm": 0.19848087430000305, "learning_rate": 9.508966597500843e-06, "loss": 0.5153, "step": 4204 }, { "epoch": 1.166112035496395, "grad_norm": 0.2016671746969223, "learning_rate": 9.50360565280269e-06, "loss": 0.5134, "step": 4205 }, { "epoch": 1.1663893510815309, "grad_norm": 0.22022029757499695, "learning_rate": 9.498245292780463e-06, "loss": 0.5048, "step": 4206 }, { "epoch": 1.1666666666666667, "grad_norm": 0.19555556774139404, "learning_rate": 9.49288551848011e-06, "loss": 0.511, "step": 4207 }, { "epoch": 1.1669439822518026, "grad_norm": 0.21261551976203918, "learning_rate": 9.487526330947461e-06, "loss": 0.5123, "step": 4208 }, { "epoch": 1.1672212978369385, "grad_norm": 0.20261640846729279, "learning_rate": 9.482167731228241e-06, "loss": 0.5132, "step": 4209 }, { "epoch": 1.1674986134220744, "grad_norm": 0.20545309782028198, "learning_rate": 9.476809720368054e-06, "loss": 0.5108, "step": 4210 }, { "epoch": 1.1677759290072103, "grad_norm": 0.18970844149589539, "learning_rate": 9.471452299412384e-06, "loss": 0.5205, "step": 4211 }, { "epoch": 1.1680532445923462, "grad_norm": 0.19278816878795624, "learning_rate": 9.466095469406613e-06, "loss": 0.5095, "step": 4212 }, { "epoch": 1.168330560177482, "grad_norm": 0.19161520898342133, "learning_rate": 9.460739231395999e-06, "loss": 0.5151, "step": 4213 }, { "epoch": 1.168607875762618, "grad_norm": 0.19239401817321777, "learning_rate": 9.455383586425685e-06, "loss": 0.4932, "step": 4214 }, { "epoch": 1.1688851913477538, "grad_norm": 0.18713194131851196, "learning_rate": 9.450028535540692e-06, "loss": 0.4842, "step": 4215 }, { "epoch": 1.1691625069328897, "grad_norm": 0.1931910663843155, "learning_rate": 9.444674079785948e-06, "loss": 0.4839, "step": 4216 }, { "epoch": 1.1694398225180256, "grad_norm": 0.2027042806148529, "learning_rate": 9.439320220206236e-06, "loss": 0.4839, "step": 4217 }, { "epoch": 1.1697171381031615, "grad_norm": 0.19624637067317963, "learning_rate": 9.43396695784624e-06, "loss": 0.5165, "step": 4218 }, { "epoch": 1.1699944536882974, "grad_norm": 0.19271767139434814, "learning_rate": 9.428614293750523e-06, "loss": 0.5085, "step": 4219 }, { "epoch": 1.1702717692734332, "grad_norm": 0.19581159949302673, "learning_rate": 9.423262228963537e-06, "loss": 0.5038, "step": 4220 }, { "epoch": 1.1705490848585691, "grad_norm": 0.18047882616519928, "learning_rate": 9.417910764529605e-06, "loss": 0.5177, "step": 4221 }, { "epoch": 1.170826400443705, "grad_norm": 0.19481384754180908, "learning_rate": 9.412559901492935e-06, "loss": 0.528, "step": 4222 }, { "epoch": 1.171103716028841, "grad_norm": 0.2017330676317215, "learning_rate": 9.407209640897635e-06, "loss": 0.4888, "step": 4223 }, { "epoch": 1.1713810316139768, "grad_norm": 0.2008255124092102, "learning_rate": 9.401859983787674e-06, "loss": 0.5369, "step": 4224 }, { "epoch": 1.1716583471991127, "grad_norm": 0.19789865612983704, "learning_rate": 9.396510931206912e-06, "loss": 0.5095, "step": 4225 }, { "epoch": 1.1719356627842485, "grad_norm": 0.18955931067466736, "learning_rate": 9.391162484199097e-06, "loss": 0.4998, "step": 4226 }, { "epoch": 1.1722129783693844, "grad_norm": 0.19305095076560974, "learning_rate": 9.385814643807845e-06, "loss": 0.5198, "step": 4227 }, { "epoch": 1.1724902939545203, "grad_norm": 0.19664254784584045, "learning_rate": 9.380467411076667e-06, "loss": 0.5123, "step": 4228 }, { "epoch": 1.1727676095396562, "grad_norm": 0.1945103108882904, "learning_rate": 9.375120787048944e-06, "loss": 0.5063, "step": 4229 }, { "epoch": 1.173044925124792, "grad_norm": 0.19993437826633453, "learning_rate": 9.36977477276795e-06, "loss": 0.502, "step": 4230 }, { "epoch": 1.173322240709928, "grad_norm": 0.19845424592494965, "learning_rate": 9.364429369276833e-06, "loss": 0.4798, "step": 4231 }, { "epoch": 1.1735995562950639, "grad_norm": 0.21802715957164764, "learning_rate": 9.359084577618615e-06, "loss": 0.5008, "step": 4232 }, { "epoch": 1.1738768718801997, "grad_norm": 0.20177268981933594, "learning_rate": 9.353740398836222e-06, "loss": 0.5126, "step": 4233 }, { "epoch": 1.1741541874653356, "grad_norm": 0.21453998982906342, "learning_rate": 9.34839683397243e-06, "loss": 0.5087, "step": 4234 }, { "epoch": 1.1744315030504715, "grad_norm": 0.19861598312854767, "learning_rate": 9.343053884069922e-06, "loss": 0.5108, "step": 4235 }, { "epoch": 1.1747088186356074, "grad_norm": 0.19748757779598236, "learning_rate": 9.337711550171232e-06, "loss": 0.5079, "step": 4236 }, { "epoch": 1.1749861342207433, "grad_norm": 0.19762682914733887, "learning_rate": 9.33236983331881e-06, "loss": 0.4911, "step": 4237 }, { "epoch": 1.1752634498058792, "grad_norm": 0.19971820712089539, "learning_rate": 9.327028734554957e-06, "loss": 0.5251, "step": 4238 }, { "epoch": 1.175540765391015, "grad_norm": 0.20210981369018555, "learning_rate": 9.321688254921862e-06, "loss": 0.5057, "step": 4239 }, { "epoch": 1.175818080976151, "grad_norm": 0.1958346962928772, "learning_rate": 9.316348395461598e-06, "loss": 0.4868, "step": 4240 }, { "epoch": 1.1760953965612868, "grad_norm": 0.20058873295783997, "learning_rate": 9.31100915721611e-06, "loss": 0.5178, "step": 4241 }, { "epoch": 1.1763727121464227, "grad_norm": 0.19912679493427277, "learning_rate": 9.305670541227232e-06, "loss": 0.5216, "step": 4242 }, { "epoch": 1.1766500277315586, "grad_norm": 0.21662193536758423, "learning_rate": 9.300332548536655e-06, "loss": 0.5162, "step": 4243 }, { "epoch": 1.1769273433166945, "grad_norm": 0.19769549369812012, "learning_rate": 9.294995180185976e-06, "loss": 0.5196, "step": 4244 }, { "epoch": 1.1772046589018303, "grad_norm": 0.19544221460819244, "learning_rate": 9.289658437216652e-06, "loss": 0.5208, "step": 4245 }, { "epoch": 1.1774819744869662, "grad_norm": 0.19446879625320435, "learning_rate": 9.28432232067002e-06, "loss": 0.5124, "step": 4246 }, { "epoch": 1.1777592900721021, "grad_norm": 0.27411291003227234, "learning_rate": 9.278986831587305e-06, "loss": 0.4815, "step": 4247 }, { "epoch": 1.178036605657238, "grad_norm": 0.19502323865890503, "learning_rate": 9.273651971009599e-06, "loss": 0.4944, "step": 4248 }, { "epoch": 1.1783139212423739, "grad_norm": 0.2004517912864685, "learning_rate": 9.268317739977872e-06, "loss": 0.491, "step": 4249 }, { "epoch": 1.1785912368275098, "grad_norm": 0.21437488496303558, "learning_rate": 9.262984139532973e-06, "loss": 0.4984, "step": 4250 }, { "epoch": 1.1788685524126457, "grad_norm": 0.19150178134441376, "learning_rate": 9.257651170715635e-06, "loss": 0.4888, "step": 4251 }, { "epoch": 1.1791458679977815, "grad_norm": 0.21565623581409454, "learning_rate": 9.25231883456646e-06, "loss": 0.5025, "step": 4252 }, { "epoch": 1.1794231835829174, "grad_norm": 0.20012259483337402, "learning_rate": 9.246987132125919e-06, "loss": 0.533, "step": 4253 }, { "epoch": 1.1797004991680533, "grad_norm": 0.1878737509250641, "learning_rate": 9.241656064434382e-06, "loss": 0.5027, "step": 4254 }, { "epoch": 1.1799778147531892, "grad_norm": 0.19911964237689972, "learning_rate": 9.236325632532074e-06, "loss": 0.494, "step": 4255 }, { "epoch": 1.180255130338325, "grad_norm": 0.208717480301857, "learning_rate": 9.230995837459103e-06, "loss": 0.5262, "step": 4256 }, { "epoch": 1.180532445923461, "grad_norm": 0.19873370230197906, "learning_rate": 9.225666680255452e-06, "loss": 0.5128, "step": 4257 }, { "epoch": 1.1808097615085968, "grad_norm": 0.19655872881412506, "learning_rate": 9.22033816196099e-06, "loss": 0.4987, "step": 4258 }, { "epoch": 1.1810870770937327, "grad_norm": 0.2015899121761322, "learning_rate": 9.215010283615443e-06, "loss": 0.5177, "step": 4259 }, { "epoch": 1.1813643926788686, "grad_norm": 0.19780929386615753, "learning_rate": 9.20968304625842e-06, "loss": 0.4959, "step": 4260 }, { "epoch": 1.1816417082640045, "grad_norm": 0.18897369503974915, "learning_rate": 9.204356450929413e-06, "loss": 0.4847, "step": 4261 }, { "epoch": 1.1819190238491404, "grad_norm": 0.19352437555789948, "learning_rate": 9.19903049866778e-06, "loss": 0.4902, "step": 4262 }, { "epoch": 1.1821963394342763, "grad_norm": 0.2053721845149994, "learning_rate": 9.19370519051275e-06, "loss": 0.5054, "step": 4263 }, { "epoch": 1.1824736550194122, "grad_norm": 0.19881947338581085, "learning_rate": 9.188380527503443e-06, "loss": 0.4888, "step": 4264 }, { "epoch": 1.182750970604548, "grad_norm": 0.19415348768234253, "learning_rate": 9.18305651067883e-06, "loss": 0.4889, "step": 4265 }, { "epoch": 1.183028286189684, "grad_norm": 0.1971830129623413, "learning_rate": 9.177733141077775e-06, "loss": 0.5026, "step": 4266 }, { "epoch": 1.1833056017748198, "grad_norm": 0.19726091623306274, "learning_rate": 9.172410419739e-06, "loss": 0.5027, "step": 4267 }, { "epoch": 1.1835829173599557, "grad_norm": 0.19601614773273468, "learning_rate": 9.167088347701119e-06, "loss": 0.495, "step": 4268 }, { "epoch": 1.1838602329450916, "grad_norm": 0.18969157338142395, "learning_rate": 9.16176692600261e-06, "loss": 0.4859, "step": 4269 }, { "epoch": 1.1841375485302275, "grad_norm": 0.18396854400634766, "learning_rate": 9.156446155681811e-06, "loss": 0.4981, "step": 4270 }, { "epoch": 1.1844148641153633, "grad_norm": 0.19818167388439178, "learning_rate": 9.151126037776955e-06, "loss": 0.5199, "step": 4271 }, { "epoch": 1.1846921797004992, "grad_norm": 0.20459376275539398, "learning_rate": 9.145806573326137e-06, "loss": 0.5215, "step": 4272 }, { "epoch": 1.184969495285635, "grad_norm": 0.1927737444639206, "learning_rate": 9.140487763367328e-06, "loss": 0.5089, "step": 4273 }, { "epoch": 1.185246810870771, "grad_norm": 0.18682962656021118, "learning_rate": 9.135169608938354e-06, "loss": 0.4812, "step": 4274 }, { "epoch": 1.1855241264559069, "grad_norm": 0.2066759467124939, "learning_rate": 9.12985211107695e-06, "loss": 0.5111, "step": 4275 }, { "epoch": 1.1858014420410428, "grad_norm": 0.20103536546230316, "learning_rate": 9.124535270820685e-06, "loss": 0.4976, "step": 4276 }, { "epoch": 1.1860787576261786, "grad_norm": 0.1959027498960495, "learning_rate": 9.119219089207017e-06, "loss": 0.5396, "step": 4277 }, { "epoch": 1.1863560732113145, "grad_norm": 0.1914219707250595, "learning_rate": 9.11390356727328e-06, "loss": 0.4949, "step": 4278 }, { "epoch": 1.1866333887964504, "grad_norm": 0.20492805540561676, "learning_rate": 9.108588706056673e-06, "loss": 0.5225, "step": 4279 }, { "epoch": 1.1869107043815863, "grad_norm": 0.19187003374099731, "learning_rate": 9.10327450659426e-06, "loss": 0.4893, "step": 4280 }, { "epoch": 1.1871880199667222, "grad_norm": 0.2073322832584381, "learning_rate": 9.097960969922983e-06, "loss": 0.5182, "step": 4281 }, { "epoch": 1.187465335551858, "grad_norm": 0.1949753612279892, "learning_rate": 9.092648097079659e-06, "loss": 0.5037, "step": 4282 }, { "epoch": 1.187742651136994, "grad_norm": 0.20522314310073853, "learning_rate": 9.087335889100967e-06, "loss": 0.5164, "step": 4283 }, { "epoch": 1.1880199667221298, "grad_norm": 0.19784744083881378, "learning_rate": 9.082024347023457e-06, "loss": 0.5207, "step": 4284 }, { "epoch": 1.1882972823072657, "grad_norm": 0.20166823267936707, "learning_rate": 9.076713471883557e-06, "loss": 0.5184, "step": 4285 }, { "epoch": 1.1885745978924016, "grad_norm": 0.20087933540344238, "learning_rate": 9.07140326471756e-06, "loss": 0.4829, "step": 4286 }, { "epoch": 1.1888519134775375, "grad_norm": 0.2303885966539383, "learning_rate": 9.066093726561622e-06, "loss": 0.5022, "step": 4287 }, { "epoch": 1.1891292290626734, "grad_norm": 0.19996869564056396, "learning_rate": 9.060784858451774e-06, "loss": 0.4958, "step": 4288 }, { "epoch": 1.1894065446478093, "grad_norm": 0.19278928637504578, "learning_rate": 9.055476661423925e-06, "loss": 0.5143, "step": 4289 }, { "epoch": 1.1896838602329451, "grad_norm": 0.19539318978786469, "learning_rate": 9.050169136513842e-06, "loss": 0.4935, "step": 4290 }, { "epoch": 1.189961175818081, "grad_norm": 0.20165032148361206, "learning_rate": 9.044862284757154e-06, "loss": 0.484, "step": 4291 }, { "epoch": 1.190238491403217, "grad_norm": 0.2109079658985138, "learning_rate": 9.039556107189384e-06, "loss": 0.5046, "step": 4292 }, { "epoch": 1.1905158069883528, "grad_norm": 0.19363267719745636, "learning_rate": 9.034250604845898e-06, "loss": 0.4955, "step": 4293 }, { "epoch": 1.1907931225734887, "grad_norm": 0.190843403339386, "learning_rate": 9.028945778761942e-06, "loss": 0.4936, "step": 4294 }, { "epoch": 1.1910704381586246, "grad_norm": 0.1952875405550003, "learning_rate": 9.023641629972626e-06, "loss": 0.4903, "step": 4295 }, { "epoch": 1.1913477537437605, "grad_norm": 0.1969594806432724, "learning_rate": 9.018338159512937e-06, "loss": 0.4951, "step": 4296 }, { "epoch": 1.1916250693288963, "grad_norm": 0.19602110981941223, "learning_rate": 9.013035368417716e-06, "loss": 0.5064, "step": 4297 }, { "epoch": 1.1919023849140322, "grad_norm": 0.1936318725347519, "learning_rate": 9.007733257721679e-06, "loss": 0.5272, "step": 4298 }, { "epoch": 1.192179700499168, "grad_norm": 0.19315633177757263, "learning_rate": 9.002431828459409e-06, "loss": 0.5085, "step": 4299 }, { "epoch": 1.192457016084304, "grad_norm": 0.20358605682849884, "learning_rate": 8.997131081665357e-06, "loss": 0.5105, "step": 4300 }, { "epoch": 1.1927343316694399, "grad_norm": 0.194684237241745, "learning_rate": 8.991831018373841e-06, "loss": 0.5007, "step": 4301 }, { "epoch": 1.1930116472545758, "grad_norm": 0.19725412130355835, "learning_rate": 8.986531639619033e-06, "loss": 0.4953, "step": 4302 }, { "epoch": 1.1932889628397116, "grad_norm": 0.1992274820804596, "learning_rate": 8.981232946434995e-06, "loss": 0.4917, "step": 4303 }, { "epoch": 1.1935662784248475, "grad_norm": 0.1960635781288147, "learning_rate": 8.975934939855637e-06, "loss": 0.4854, "step": 4304 }, { "epoch": 1.1938435940099834, "grad_norm": 0.2065868228673935, "learning_rate": 8.970637620914735e-06, "loss": 0.497, "step": 4305 }, { "epoch": 1.1941209095951193, "grad_norm": 0.19648823142051697, "learning_rate": 8.965340990645947e-06, "loss": 0.4887, "step": 4306 }, { "epoch": 1.1943982251802552, "grad_norm": 0.20130853354930878, "learning_rate": 8.960045050082783e-06, "loss": 0.5177, "step": 4307 }, { "epoch": 1.194675540765391, "grad_norm": 0.20241306722164154, "learning_rate": 8.954749800258615e-06, "loss": 0.5014, "step": 4308 }, { "epoch": 1.194952856350527, "grad_norm": 0.1977449208498001, "learning_rate": 8.94945524220669e-06, "loss": 0.5068, "step": 4309 }, { "epoch": 1.1952301719356628, "grad_norm": 0.19154322147369385, "learning_rate": 8.944161376960119e-06, "loss": 0.516, "step": 4310 }, { "epoch": 1.1955074875207987, "grad_norm": 0.19326725602149963, "learning_rate": 8.938868205551877e-06, "loss": 0.493, "step": 4311 }, { "epoch": 1.1957848031059346, "grad_norm": 0.1888340413570404, "learning_rate": 8.933575729014788e-06, "loss": 0.4899, "step": 4312 }, { "epoch": 1.1960621186910705, "grad_norm": 0.19526329636573792, "learning_rate": 8.928283948381575e-06, "loss": 0.4789, "step": 4313 }, { "epoch": 1.1963394342762064, "grad_norm": 0.1940467208623886, "learning_rate": 8.922992864684791e-06, "loss": 0.5252, "step": 4314 }, { "epoch": 1.1966167498613423, "grad_norm": 0.20158347487449646, "learning_rate": 8.917702478956872e-06, "loss": 0.5069, "step": 4315 }, { "epoch": 1.1968940654464781, "grad_norm": 0.20539873838424683, "learning_rate": 8.912412792230104e-06, "loss": 0.4972, "step": 4316 }, { "epoch": 1.197171381031614, "grad_norm": 0.1870296597480774, "learning_rate": 8.90712380553666e-06, "loss": 0.4905, "step": 4317 }, { "epoch": 1.19744869661675, "grad_norm": 0.1965525895357132, "learning_rate": 8.90183551990855e-06, "loss": 0.4966, "step": 4318 }, { "epoch": 1.1977260122018858, "grad_norm": 0.20157171785831451, "learning_rate": 8.896547936377658e-06, "loss": 0.5305, "step": 4319 }, { "epoch": 1.1980033277870217, "grad_norm": 0.2052687555551529, "learning_rate": 8.89126105597574e-06, "loss": 0.5178, "step": 4320 }, { "epoch": 1.1982806433721576, "grad_norm": 0.19930841028690338, "learning_rate": 8.885974879734399e-06, "loss": 0.5181, "step": 4321 }, { "epoch": 1.1985579589572934, "grad_norm": 0.19460836052894592, "learning_rate": 8.880689408685114e-06, "loss": 0.5114, "step": 4322 }, { "epoch": 1.1988352745424293, "grad_norm": 0.19336189329624176, "learning_rate": 8.87540464385921e-06, "loss": 0.4967, "step": 4323 }, { "epoch": 1.1991125901275652, "grad_norm": 0.19514434039592743, "learning_rate": 8.8701205862879e-06, "loss": 0.4995, "step": 4324 }, { "epoch": 1.199389905712701, "grad_norm": 0.19089075922966003, "learning_rate": 8.864837237002232e-06, "loss": 0.4795, "step": 4325 }, { "epoch": 1.199667221297837, "grad_norm": 0.2026674598455429, "learning_rate": 8.85955459703313e-06, "loss": 0.5033, "step": 4326 }, { "epoch": 1.1999445368829729, "grad_norm": 0.2059757262468338, "learning_rate": 8.854272667411379e-06, "loss": 0.4985, "step": 4327 }, { "epoch": 1.2002218524681088, "grad_norm": 0.20040294528007507, "learning_rate": 8.848991449167623e-06, "loss": 0.493, "step": 4328 }, { "epoch": 1.2004991680532446, "grad_norm": 0.19929878413677216, "learning_rate": 8.843710943332362e-06, "loss": 0.495, "step": 4329 }, { "epoch": 1.2007764836383805, "grad_norm": 0.1958535760641098, "learning_rate": 8.838431150935975e-06, "loss": 0.4971, "step": 4330 }, { "epoch": 1.2010537992235164, "grad_norm": 0.18605239689350128, "learning_rate": 8.83315207300868e-06, "loss": 0.4861, "step": 4331 }, { "epoch": 1.2013311148086523, "grad_norm": 0.43775662779808044, "learning_rate": 8.827873710580564e-06, "loss": 0.4962, "step": 4332 }, { "epoch": 1.2016084303937882, "grad_norm": 0.1940368264913559, "learning_rate": 8.822596064681577e-06, "loss": 0.5247, "step": 4333 }, { "epoch": 1.201885745978924, "grad_norm": 0.2012210190296173, "learning_rate": 8.817319136341535e-06, "loss": 0.5178, "step": 4334 }, { "epoch": 1.20216306156406, "grad_norm": 0.18737319111824036, "learning_rate": 8.812042926590098e-06, "loss": 0.5166, "step": 4335 }, { "epoch": 1.2024403771491958, "grad_norm": 0.19788531959056854, "learning_rate": 8.806767436456792e-06, "loss": 0.4959, "step": 4336 }, { "epoch": 1.2027176927343317, "grad_norm": 0.1986195296049118, "learning_rate": 8.801492666971012e-06, "loss": 0.4669, "step": 4337 }, { "epoch": 1.2029950083194676, "grad_norm": 0.21253184974193573, "learning_rate": 8.796218619162004e-06, "loss": 0.5126, "step": 4338 }, { "epoch": 1.2032723239046035, "grad_norm": 0.19485290348529816, "learning_rate": 8.790945294058876e-06, "loss": 0.475, "step": 4339 }, { "epoch": 1.2035496394897394, "grad_norm": 0.19246895611286163, "learning_rate": 8.785672692690584e-06, "loss": 0.5123, "step": 4340 }, { "epoch": 1.2038269550748752, "grad_norm": 0.21028102934360504, "learning_rate": 8.780400816085963e-06, "loss": 0.5072, "step": 4341 }, { "epoch": 1.2041042706600111, "grad_norm": 0.20249207317829132, "learning_rate": 8.775129665273691e-06, "loss": 0.4979, "step": 4342 }, { "epoch": 1.204381586245147, "grad_norm": 0.19815127551555634, "learning_rate": 8.769859241282307e-06, "loss": 0.5116, "step": 4343 }, { "epoch": 1.204658901830283, "grad_norm": 0.20030280947685242, "learning_rate": 8.764589545140217e-06, "loss": 0.5121, "step": 4344 }, { "epoch": 1.2049362174154188, "grad_norm": 0.2132977396249771, "learning_rate": 8.759320577875676e-06, "loss": 0.5192, "step": 4345 }, { "epoch": 1.2052135330005547, "grad_norm": 0.19407188892364502, "learning_rate": 8.754052340516796e-06, "loss": 0.4912, "step": 4346 }, { "epoch": 1.2054908485856906, "grad_norm": 0.2085913121700287, "learning_rate": 8.748784834091549e-06, "loss": 0.4861, "step": 4347 }, { "epoch": 1.2057681641708264, "grad_norm": 0.20071199536323547, "learning_rate": 8.74351805962777e-06, "loss": 0.5451, "step": 4348 }, { "epoch": 1.2060454797559623, "grad_norm": 0.20174618065357208, "learning_rate": 8.738252018153145e-06, "loss": 0.5094, "step": 4349 }, { "epoch": 1.2063227953410982, "grad_norm": 0.2093082219362259, "learning_rate": 8.73298671069521e-06, "loss": 0.5161, "step": 4350 }, { "epoch": 1.206600110926234, "grad_norm": 0.21680103242397308, "learning_rate": 8.727722138281381e-06, "loss": 0.5058, "step": 4351 }, { "epoch": 1.20687742651137, "grad_norm": 0.19285671412944794, "learning_rate": 8.722458301938904e-06, "loss": 0.4775, "step": 4352 }, { "epoch": 1.2071547420965059, "grad_norm": 0.20253880321979523, "learning_rate": 8.717195202694898e-06, "loss": 0.501, "step": 4353 }, { "epoch": 1.2074320576816417, "grad_norm": 0.19431787729263306, "learning_rate": 8.711932841576325e-06, "loss": 0.4927, "step": 4354 }, { "epoch": 1.2077093732667776, "grad_norm": 0.20108050107955933, "learning_rate": 8.706671219610027e-06, "loss": 0.5135, "step": 4355 }, { "epoch": 1.2079866888519135, "grad_norm": 0.20072422921657562, "learning_rate": 8.70141033782267e-06, "loss": 0.507, "step": 4356 }, { "epoch": 1.2082640044370494, "grad_norm": 0.21369212865829468, "learning_rate": 8.696150197240798e-06, "loss": 0.5259, "step": 4357 }, { "epoch": 1.2085413200221853, "grad_norm": 0.1956164538860321, "learning_rate": 8.690890798890806e-06, "loss": 0.5293, "step": 4358 }, { "epoch": 1.2088186356073212, "grad_norm": 0.19677644968032837, "learning_rate": 8.685632143798938e-06, "loss": 0.5185, "step": 4359 }, { "epoch": 1.209095951192457, "grad_norm": 0.19301322102546692, "learning_rate": 8.680374232991304e-06, "loss": 0.5092, "step": 4360 }, { "epoch": 1.209373266777593, "grad_norm": 0.19476044178009033, "learning_rate": 8.67511706749385e-06, "loss": 0.5197, "step": 4361 }, { "epoch": 1.2096505823627288, "grad_norm": 0.20564420521259308, "learning_rate": 8.669860648332395e-06, "loss": 0.519, "step": 4362 }, { "epoch": 1.2099278979478647, "grad_norm": 0.20334792137145996, "learning_rate": 8.664604976532605e-06, "loss": 0.5407, "step": 4363 }, { "epoch": 1.2102052135330006, "grad_norm": 0.1910853236913681, "learning_rate": 8.659350053120003e-06, "loss": 0.5174, "step": 4364 }, { "epoch": 1.2104825291181365, "grad_norm": 0.1837315857410431, "learning_rate": 8.65409587911996e-06, "loss": 0.5198, "step": 4365 }, { "epoch": 1.2107598447032724, "grad_norm": 0.19404159486293793, "learning_rate": 8.64884245555771e-06, "loss": 0.4944, "step": 4366 }, { "epoch": 1.2110371602884082, "grad_norm": 0.18698063492774963, "learning_rate": 8.643589783458328e-06, "loss": 0.5151, "step": 4367 }, { "epoch": 1.2113144758735441, "grad_norm": 0.2053699940443039, "learning_rate": 8.638337863846752e-06, "loss": 0.5182, "step": 4368 }, { "epoch": 1.21159179145868, "grad_norm": 0.18703065812587738, "learning_rate": 8.633086697747773e-06, "loss": 0.4803, "step": 4369 }, { "epoch": 1.211869107043816, "grad_norm": 0.19493581354618073, "learning_rate": 8.627836286186035e-06, "loss": 0.5313, "step": 4370 }, { "epoch": 1.2121464226289518, "grad_norm": 0.1981756091117859, "learning_rate": 8.622586630186019e-06, "loss": 0.5012, "step": 4371 }, { "epoch": 1.2124237382140877, "grad_norm": 0.18528394401073456, "learning_rate": 8.61733773077209e-06, "loss": 0.4845, "step": 4372 }, { "epoch": 1.2127010537992235, "grad_norm": 0.20212890207767487, "learning_rate": 8.612089588968437e-06, "loss": 0.498, "step": 4373 }, { "epoch": 1.2129783693843594, "grad_norm": 0.19095052778720856, "learning_rate": 8.606842205799113e-06, "loss": 0.4707, "step": 4374 }, { "epoch": 1.2132556849694953, "grad_norm": 0.18967384099960327, "learning_rate": 8.60159558228802e-06, "loss": 0.5123, "step": 4375 }, { "epoch": 1.2135330005546312, "grad_norm": 0.20997461676597595, "learning_rate": 8.596349719458916e-06, "loss": 0.4683, "step": 4376 }, { "epoch": 1.213810316139767, "grad_norm": 0.19758398830890656, "learning_rate": 8.591104618335413e-06, "loss": 0.5227, "step": 4377 }, { "epoch": 1.214087631724903, "grad_norm": 0.19900359213352203, "learning_rate": 8.585860279940954e-06, "loss": 0.4995, "step": 4378 }, { "epoch": 1.2143649473100389, "grad_norm": 0.19332247972488403, "learning_rate": 8.580616705298864e-06, "loss": 0.5178, "step": 4379 }, { "epoch": 1.2146422628951747, "grad_norm": 0.18881361186504364, "learning_rate": 8.575373895432293e-06, "loss": 0.493, "step": 4380 }, { "epoch": 1.2149195784803106, "grad_norm": 0.222880020737648, "learning_rate": 8.57013185136426e-06, "loss": 0.4933, "step": 4381 }, { "epoch": 1.2151968940654465, "grad_norm": 0.19780275225639343, "learning_rate": 8.564890574117616e-06, "loss": 0.4966, "step": 4382 }, { "epoch": 1.2154742096505824, "grad_norm": 0.1969325840473175, "learning_rate": 8.559650064715088e-06, "loss": 0.5132, "step": 4383 }, { "epoch": 1.2157515252357183, "grad_norm": 0.2127065658569336, "learning_rate": 8.554410324179226e-06, "loss": 0.496, "step": 4384 }, { "epoch": 1.2160288408208542, "grad_norm": 0.20196539163589478, "learning_rate": 8.549171353532443e-06, "loss": 0.4894, "step": 4385 }, { "epoch": 1.21630615640599, "grad_norm": 0.19963231682777405, "learning_rate": 8.543933153797007e-06, "loss": 0.5355, "step": 4386 }, { "epoch": 1.216583471991126, "grad_norm": 0.20684166252613068, "learning_rate": 8.53869572599503e-06, "loss": 0.497, "step": 4387 }, { "epoch": 1.2168607875762618, "grad_norm": 0.20197083055973053, "learning_rate": 8.533459071148462e-06, "loss": 0.4998, "step": 4388 }, { "epoch": 1.2171381031613977, "grad_norm": 0.19906753301620483, "learning_rate": 8.528223190279128e-06, "loss": 0.4932, "step": 4389 }, { "epoch": 1.2174154187465336, "grad_norm": 0.1864069402217865, "learning_rate": 8.522988084408678e-06, "loss": 0.4894, "step": 4390 }, { "epoch": 1.2176927343316695, "grad_norm": 0.1982210874557495, "learning_rate": 8.517753754558621e-06, "loss": 0.4868, "step": 4391 }, { "epoch": 1.2179700499168054, "grad_norm": 0.19526433944702148, "learning_rate": 8.512520201750312e-06, "loss": 0.5154, "step": 4392 }, { "epoch": 1.2182473655019412, "grad_norm": 0.19613106548786163, "learning_rate": 8.507287427004962e-06, "loss": 0.5095, "step": 4393 }, { "epoch": 1.2185246810870771, "grad_norm": 0.18780362606048584, "learning_rate": 8.502055431343618e-06, "loss": 0.5064, "step": 4394 }, { "epoch": 1.218801996672213, "grad_norm": 0.20160475373268127, "learning_rate": 8.49682421578718e-06, "loss": 0.5269, "step": 4395 }, { "epoch": 1.219079312257349, "grad_norm": 0.22338464856147766, "learning_rate": 8.491593781356404e-06, "loss": 0.5086, "step": 4396 }, { "epoch": 1.2193566278424848, "grad_norm": 0.18976663053035736, "learning_rate": 8.48636412907188e-06, "loss": 0.5017, "step": 4397 }, { "epoch": 1.2196339434276207, "grad_norm": 0.18721407651901245, "learning_rate": 8.481135259954057e-06, "loss": 0.4921, "step": 4398 }, { "epoch": 1.2199112590127565, "grad_norm": 0.19557468593120575, "learning_rate": 8.475907175023218e-06, "loss": 0.495, "step": 4399 }, { "epoch": 1.2201885745978924, "grad_norm": 0.1959930807352066, "learning_rate": 8.470679875299507e-06, "loss": 0.5033, "step": 4400 }, { "epoch": 1.2204658901830283, "grad_norm": 0.19299939274787903, "learning_rate": 8.465453361802907e-06, "loss": 0.5275, "step": 4401 }, { "epoch": 1.2207432057681642, "grad_norm": 0.19271641969680786, "learning_rate": 8.460227635553247e-06, "loss": 0.4899, "step": 4402 }, { "epoch": 1.2210205213533, "grad_norm": 0.19292891025543213, "learning_rate": 8.455002697570211e-06, "loss": 0.4917, "step": 4403 }, { "epoch": 1.221297836938436, "grad_norm": 0.19001443684101105, "learning_rate": 8.44977854887332e-06, "loss": 0.4953, "step": 4404 }, { "epoch": 1.2215751525235718, "grad_norm": 0.2002885341644287, "learning_rate": 8.44455519048194e-06, "loss": 0.5099, "step": 4405 }, { "epoch": 1.2218524681087077, "grad_norm": 0.1973315179347992, "learning_rate": 8.439332623415287e-06, "loss": 0.5097, "step": 4406 }, { "epoch": 1.2221297836938436, "grad_norm": 0.1916041374206543, "learning_rate": 8.434110848692427e-06, "loss": 0.5177, "step": 4407 }, { "epoch": 1.2224070992789795, "grad_norm": 0.2049477994441986, "learning_rate": 8.428889867332268e-06, "loss": 0.4952, "step": 4408 }, { "epoch": 1.2226844148641154, "grad_norm": 0.2035469114780426, "learning_rate": 8.423669680353549e-06, "loss": 0.5438, "step": 4409 }, { "epoch": 1.2229617304492513, "grad_norm": 0.1949811726808548, "learning_rate": 8.418450288774884e-06, "loss": 0.5214, "step": 4410 }, { "epoch": 1.2232390460343872, "grad_norm": 0.22771196067333221, "learning_rate": 8.413231693614704e-06, "loss": 0.5108, "step": 4411 }, { "epoch": 1.223516361619523, "grad_norm": 0.19894933700561523, "learning_rate": 8.408013895891295e-06, "loss": 0.4941, "step": 4412 }, { "epoch": 1.223793677204659, "grad_norm": 0.19783443212509155, "learning_rate": 8.40279689662279e-06, "loss": 0.4934, "step": 4413 }, { "epoch": 1.2240709927897948, "grad_norm": 0.1914057582616806, "learning_rate": 8.397580696827166e-06, "loss": 0.5151, "step": 4414 }, { "epoch": 1.2243483083749307, "grad_norm": 0.19515813887119293, "learning_rate": 8.392365297522243e-06, "loss": 0.4888, "step": 4415 }, { "epoch": 1.2246256239600666, "grad_norm": 0.19242678582668304, "learning_rate": 8.387150699725673e-06, "loss": 0.5101, "step": 4416 }, { "epoch": 1.2249029395452025, "grad_norm": 0.2145782858133316, "learning_rate": 8.381936904454973e-06, "loss": 0.5157, "step": 4417 }, { "epoch": 1.2251802551303383, "grad_norm": 0.19011950492858887, "learning_rate": 8.376723912727488e-06, "loss": 0.4891, "step": 4418 }, { "epoch": 1.2254575707154742, "grad_norm": 0.19757391512393951, "learning_rate": 8.371511725560416e-06, "loss": 0.5173, "step": 4419 }, { "epoch": 1.2257348863006101, "grad_norm": 0.2027006596326828, "learning_rate": 8.36630034397078e-06, "loss": 0.5116, "step": 4420 }, { "epoch": 1.226012201885746, "grad_norm": 0.2004849761724472, "learning_rate": 8.361089768975475e-06, "loss": 0.5077, "step": 4421 }, { "epoch": 1.2262895174708819, "grad_norm": 0.20179691910743713, "learning_rate": 8.355880001591212e-06, "loss": 0.4969, "step": 4422 }, { "epoch": 1.2265668330560178, "grad_norm": 0.20845246315002441, "learning_rate": 8.350671042834555e-06, "loss": 0.4789, "step": 4423 }, { "epoch": 1.2268441486411537, "grad_norm": 0.19270509481430054, "learning_rate": 8.345462893721911e-06, "loss": 0.4874, "step": 4424 }, { "epoch": 1.2271214642262895, "grad_norm": 0.19383633136749268, "learning_rate": 8.340255555269535e-06, "loss": 0.5006, "step": 4425 }, { "epoch": 1.2273987798114254, "grad_norm": 0.18180836737155914, "learning_rate": 8.335049028493509e-06, "loss": 0.5117, "step": 4426 }, { "epoch": 1.2276760953965613, "grad_norm": 0.196406289935112, "learning_rate": 8.32984331440976e-06, "loss": 0.5224, "step": 4427 }, { "epoch": 1.2279534109816972, "grad_norm": 0.19745458662509918, "learning_rate": 8.324638414034069e-06, "loss": 0.4721, "step": 4428 }, { "epoch": 1.228230726566833, "grad_norm": 0.2035657912492752, "learning_rate": 8.31943432838205e-06, "loss": 0.4984, "step": 4429 }, { "epoch": 1.228508042151969, "grad_norm": 0.204306960105896, "learning_rate": 8.314231058469152e-06, "loss": 0.4978, "step": 4430 }, { "epoch": 1.2287853577371048, "grad_norm": 0.20982758700847626, "learning_rate": 8.309028605310679e-06, "loss": 0.4996, "step": 4431 }, { "epoch": 1.2290626733222407, "grad_norm": 0.19134745001792908, "learning_rate": 8.30382696992176e-06, "loss": 0.4947, "step": 4432 }, { "epoch": 1.2293399889073766, "grad_norm": 0.19805335998535156, "learning_rate": 8.298626153317376e-06, "loss": 0.5233, "step": 4433 }, { "epoch": 1.2296173044925125, "grad_norm": 0.19368603825569153, "learning_rate": 8.293426156512341e-06, "loss": 0.5168, "step": 4434 }, { "epoch": 1.2298946200776484, "grad_norm": 0.19627097249031067, "learning_rate": 8.288226980521314e-06, "loss": 0.4882, "step": 4435 }, { "epoch": 1.2301719356627843, "grad_norm": 0.19731606543064117, "learning_rate": 8.283028626358796e-06, "loss": 0.5079, "step": 4436 }, { "epoch": 1.2304492512479202, "grad_norm": 0.22327442467212677, "learning_rate": 8.277831095039113e-06, "loss": 0.5173, "step": 4437 }, { "epoch": 1.230726566833056, "grad_norm": 0.2100764513015747, "learning_rate": 8.272634387576453e-06, "loss": 0.501, "step": 4438 }, { "epoch": 1.231003882418192, "grad_norm": 0.19981728494167328, "learning_rate": 8.267438504984823e-06, "loss": 0.477, "step": 4439 }, { "epoch": 1.2312811980033278, "grad_norm": 0.19193001091480255, "learning_rate": 8.262243448278084e-06, "loss": 0.5078, "step": 4440 }, { "epoch": 1.2315585135884637, "grad_norm": 0.2083725780248642, "learning_rate": 8.257049218469917e-06, "loss": 0.5017, "step": 4441 }, { "epoch": 1.2318358291735996, "grad_norm": 0.19924737513065338, "learning_rate": 8.251855816573873e-06, "loss": 0.5036, "step": 4442 }, { "epoch": 1.2321131447587355, "grad_norm": 0.19425547122955322, "learning_rate": 8.246663243603305e-06, "loss": 0.5207, "step": 4443 }, { "epoch": 1.2323904603438713, "grad_norm": 0.20682436227798462, "learning_rate": 8.241471500571428e-06, "loss": 0.5119, "step": 4444 }, { "epoch": 1.2326677759290072, "grad_norm": 0.1909773349761963, "learning_rate": 8.236280588491292e-06, "loss": 0.4723, "step": 4445 }, { "epoch": 1.232945091514143, "grad_norm": 0.2076360434293747, "learning_rate": 8.231090508375777e-06, "loss": 0.4959, "step": 4446 }, { "epoch": 1.233222407099279, "grad_norm": 0.19352662563323975, "learning_rate": 8.225901261237609e-06, "loss": 0.5002, "step": 4447 }, { "epoch": 1.2334997226844149, "grad_norm": 0.21294668316841125, "learning_rate": 8.220712848089338e-06, "loss": 0.5099, "step": 4448 }, { "epoch": 1.2337770382695508, "grad_norm": 0.20197489857673645, "learning_rate": 8.215525269943374e-06, "loss": 0.4944, "step": 4449 }, { "epoch": 1.2340543538546866, "grad_norm": 0.20826977491378784, "learning_rate": 8.210338527811943e-06, "loss": 0.5321, "step": 4450 }, { "epoch": 1.2343316694398225, "grad_norm": 0.20179514586925507, "learning_rate": 8.205152622707116e-06, "loss": 0.5053, "step": 4451 }, { "epoch": 1.2346089850249584, "grad_norm": 0.21204441785812378, "learning_rate": 8.199967555640805e-06, "loss": 0.4856, "step": 4452 }, { "epoch": 1.2348863006100943, "grad_norm": 0.20789694786071777, "learning_rate": 8.194783327624751e-06, "loss": 0.5174, "step": 4453 }, { "epoch": 1.2351636161952302, "grad_norm": 0.207809716463089, "learning_rate": 8.189599939670531e-06, "loss": 0.5028, "step": 4454 }, { "epoch": 1.235440931780366, "grad_norm": 0.3477715849876404, "learning_rate": 8.184417392789568e-06, "loss": 0.4812, "step": 4455 }, { "epoch": 1.235718247365502, "grad_norm": 0.20378254354000092, "learning_rate": 8.179235687993108e-06, "loss": 0.499, "step": 4456 }, { "epoch": 1.2359955629506378, "grad_norm": 0.19878922402858734, "learning_rate": 8.174054826292249e-06, "loss": 0.508, "step": 4457 }, { "epoch": 1.2362728785357737, "grad_norm": 0.21171467006206512, "learning_rate": 8.168874808697896e-06, "loss": 0.5037, "step": 4458 }, { "epoch": 1.2365501941209096, "grad_norm": 0.1954033523797989, "learning_rate": 8.163695636220828e-06, "loss": 0.5014, "step": 4459 }, { "epoch": 1.2368275097060455, "grad_norm": 0.19483618438243866, "learning_rate": 8.158517309871626e-06, "loss": 0.4929, "step": 4460 }, { "epoch": 1.2371048252911814, "grad_norm": 0.19928301870822906, "learning_rate": 8.153339830660719e-06, "loss": 0.5078, "step": 4461 }, { "epoch": 1.2373821408763173, "grad_norm": 0.20188100636005402, "learning_rate": 8.148163199598379e-06, "loss": 0.4913, "step": 4462 }, { "epoch": 1.2376594564614531, "grad_norm": 0.21561436355113983, "learning_rate": 8.142987417694699e-06, "loss": 0.4676, "step": 4463 }, { "epoch": 1.237936772046589, "grad_norm": 0.21500158309936523, "learning_rate": 8.137812485959608e-06, "loss": 0.4817, "step": 4464 }, { "epoch": 1.238214087631725, "grad_norm": 0.20233996212482452, "learning_rate": 8.132638405402874e-06, "loss": 0.5101, "step": 4465 }, { "epoch": 1.2384914032168608, "grad_norm": 0.20174197852611542, "learning_rate": 8.1274651770341e-06, "loss": 0.4972, "step": 4466 }, { "epoch": 1.2387687188019967, "grad_norm": 0.190217986702919, "learning_rate": 8.122292801862716e-06, "loss": 0.498, "step": 4467 }, { "epoch": 1.2390460343871326, "grad_norm": 0.20103545486927032, "learning_rate": 8.11712128089799e-06, "loss": 0.5011, "step": 4468 }, { "epoch": 1.2393233499722685, "grad_norm": 0.20134317874908447, "learning_rate": 8.111950615149031e-06, "loss": 0.5089, "step": 4469 }, { "epoch": 1.2396006655574043, "grad_norm": 0.20507292449474335, "learning_rate": 8.106780805624764e-06, "loss": 0.5092, "step": 4470 }, { "epoch": 1.2398779811425402, "grad_norm": 0.19532889127731323, "learning_rate": 8.101611853333955e-06, "loss": 0.4975, "step": 4471 }, { "epoch": 1.240155296727676, "grad_norm": 0.20147058367729187, "learning_rate": 8.096443759285206e-06, "loss": 0.5055, "step": 4472 }, { "epoch": 1.240432612312812, "grad_norm": 0.20608854293823242, "learning_rate": 8.09127652448695e-06, "loss": 0.5094, "step": 4473 }, { "epoch": 1.2407099278979479, "grad_norm": 0.20161250233650208, "learning_rate": 8.086110149947457e-06, "loss": 0.5182, "step": 4474 }, { "epoch": 1.2409872434830838, "grad_norm": 0.19690173864364624, "learning_rate": 8.080944636674811e-06, "loss": 0.5211, "step": 4475 }, { "epoch": 1.2412645590682196, "grad_norm": 0.1985633373260498, "learning_rate": 8.075779985676949e-06, "loss": 0.4994, "step": 4476 }, { "epoch": 1.2415418746533555, "grad_norm": 0.1929730325937271, "learning_rate": 8.070616197961631e-06, "loss": 0.4815, "step": 4477 }, { "epoch": 1.2418191902384914, "grad_norm": 0.19387488067150116, "learning_rate": 8.065453274536447e-06, "loss": 0.5091, "step": 4478 }, { "epoch": 1.2420965058236273, "grad_norm": 0.19065174460411072, "learning_rate": 8.060291216408814e-06, "loss": 0.4868, "step": 4479 }, { "epoch": 1.2423738214087632, "grad_norm": 0.20745143294334412, "learning_rate": 8.055130024586e-06, "loss": 0.4938, "step": 4480 }, { "epoch": 1.242651136993899, "grad_norm": 0.19908511638641357, "learning_rate": 8.04996970007508e-06, "loss": 0.4939, "step": 4481 }, { "epoch": 1.242928452579035, "grad_norm": 0.2046736627817154, "learning_rate": 8.044810243882971e-06, "loss": 0.5194, "step": 4482 }, { "epoch": 1.2432057681641708, "grad_norm": 0.20767585933208466, "learning_rate": 8.039651657016423e-06, "loss": 0.5176, "step": 4483 }, { "epoch": 1.2434830837493067, "grad_norm": 0.20827417075634003, "learning_rate": 8.034493940482016e-06, "loss": 0.4997, "step": 4484 }, { "epoch": 1.2437603993344426, "grad_norm": 0.21914151310920715, "learning_rate": 8.029337095286147e-06, "loss": 0.4901, "step": 4485 }, { "epoch": 1.2440377149195785, "grad_norm": 0.21400579810142517, "learning_rate": 8.024181122435058e-06, "loss": 0.4926, "step": 4486 }, { "epoch": 1.2443150305047144, "grad_norm": 0.19826753437519073, "learning_rate": 8.01902602293482e-06, "loss": 0.5113, "step": 4487 }, { "epoch": 1.2445923460898503, "grad_norm": 0.20881658792495728, "learning_rate": 8.013871797791324e-06, "loss": 0.4921, "step": 4488 }, { "epoch": 1.2448696616749861, "grad_norm": 0.18899375200271606, "learning_rate": 8.008718448010297e-06, "loss": 0.5139, "step": 4489 }, { "epoch": 1.245146977260122, "grad_norm": 0.1959080994129181, "learning_rate": 8.003565974597298e-06, "loss": 0.4864, "step": 4490 }, { "epoch": 1.245424292845258, "grad_norm": 0.20414955914020538, "learning_rate": 7.99841437855771e-06, "loss": 0.4847, "step": 4491 }, { "epoch": 1.2457016084303938, "grad_norm": 0.1957169771194458, "learning_rate": 7.993263660896738e-06, "loss": 0.5117, "step": 4492 }, { "epoch": 1.2459789240155297, "grad_norm": 0.2063983678817749, "learning_rate": 7.988113822619431e-06, "loss": 0.4969, "step": 4493 }, { "epoch": 1.2462562396006656, "grad_norm": 0.1924324780702591, "learning_rate": 7.982964864730658e-06, "loss": 0.4815, "step": 4494 }, { "epoch": 1.2465335551858014, "grad_norm": 0.19338028132915497, "learning_rate": 7.97781678823512e-06, "loss": 0.4859, "step": 4495 }, { "epoch": 1.2468108707709373, "grad_norm": 0.19696307182312012, "learning_rate": 7.972669594137333e-06, "loss": 0.5102, "step": 4496 }, { "epoch": 1.2470881863560732, "grad_norm": 0.19288606941699982, "learning_rate": 7.967523283441664e-06, "loss": 0.4892, "step": 4497 }, { "epoch": 1.247365501941209, "grad_norm": 0.19867482781410217, "learning_rate": 7.962377857152284e-06, "loss": 0.4955, "step": 4498 }, { "epoch": 1.247642817526345, "grad_norm": 0.20236186683177948, "learning_rate": 7.957233316273211e-06, "loss": 0.5143, "step": 4499 }, { "epoch": 1.2479201331114809, "grad_norm": 0.2065366804599762, "learning_rate": 7.952089661808268e-06, "loss": 0.4996, "step": 4500 }, { "epoch": 1.2481974486966168, "grad_norm": 0.1983073204755783, "learning_rate": 7.946946894761134e-06, "loss": 0.4892, "step": 4501 }, { "epoch": 1.2484747642817526, "grad_norm": 0.19757206737995148, "learning_rate": 7.94180501613529e-06, "loss": 0.5111, "step": 4502 }, { "epoch": 1.2487520798668885, "grad_norm": 0.20565056800842285, "learning_rate": 7.936664026934052e-06, "loss": 0.4943, "step": 4503 }, { "epoch": 1.2490293954520244, "grad_norm": 0.19800062477588654, "learning_rate": 7.931523928160567e-06, "loss": 0.4838, "step": 4504 }, { "epoch": 1.2493067110371603, "grad_norm": 0.2019055336713791, "learning_rate": 7.926384720817807e-06, "loss": 0.5413, "step": 4505 }, { "epoch": 1.2495840266222962, "grad_norm": 0.19655373692512512, "learning_rate": 7.921246405908558e-06, "loss": 0.5043, "step": 4506 }, { "epoch": 1.249861342207432, "grad_norm": 0.1993308812379837, "learning_rate": 7.916108984435448e-06, "loss": 0.5122, "step": 4507 }, { "epoch": 1.250138657792568, "grad_norm": 0.1926482617855072, "learning_rate": 7.910972457400923e-06, "loss": 0.5103, "step": 4508 }, { "epoch": 1.2504159733777038, "grad_norm": 0.20536184310913086, "learning_rate": 7.905836825807257e-06, "loss": 0.5124, "step": 4509 }, { "epoch": 1.2506932889628397, "grad_norm": 0.19252263009548187, "learning_rate": 7.900702090656545e-06, "loss": 0.5069, "step": 4510 }, { "epoch": 1.2509706045479756, "grad_norm": 0.20172010362148285, "learning_rate": 7.895568252950711e-06, "loss": 0.5081, "step": 4511 }, { "epoch": 1.2512479201331115, "grad_norm": 0.1947304904460907, "learning_rate": 7.890435313691507e-06, "loss": 0.5038, "step": 4512 }, { "epoch": 1.2515252357182474, "grad_norm": 0.20323634147644043, "learning_rate": 7.885303273880498e-06, "loss": 0.5116, "step": 4513 }, { "epoch": 1.2518025513033832, "grad_norm": 0.21063730120658875, "learning_rate": 7.880172134519082e-06, "loss": 0.5288, "step": 4514 }, { "epoch": 1.2520798668885191, "grad_norm": 0.19582238793373108, "learning_rate": 7.875041896608487e-06, "loss": 0.507, "step": 4515 }, { "epoch": 1.252357182473655, "grad_norm": 0.21959121525287628, "learning_rate": 7.869912561149755e-06, "loss": 0.5066, "step": 4516 }, { "epoch": 1.252634498058791, "grad_norm": 0.21660704910755157, "learning_rate": 7.864784129143747e-06, "loss": 0.5058, "step": 4517 }, { "epoch": 1.2529118136439268, "grad_norm": 0.20365749299526215, "learning_rate": 7.85965660159117e-06, "loss": 0.4966, "step": 4518 }, { "epoch": 1.2531891292290627, "grad_norm": 0.21940533816814423, "learning_rate": 7.85452997949253e-06, "loss": 0.4925, "step": 4519 }, { "epoch": 1.2534664448141986, "grad_norm": 0.2422044575214386, "learning_rate": 7.84940426384817e-06, "loss": 0.4982, "step": 4520 }, { "epoch": 1.2537437603993344, "grad_norm": 0.1977468580007553, "learning_rate": 7.844279455658257e-06, "loss": 0.5266, "step": 4521 }, { "epoch": 1.2540210759844703, "grad_norm": 0.20944328606128693, "learning_rate": 7.839155555922773e-06, "loss": 0.4822, "step": 4522 }, { "epoch": 1.2542983915696062, "grad_norm": 0.1976504623889923, "learning_rate": 7.834032565641525e-06, "loss": 0.4964, "step": 4523 }, { "epoch": 1.254575707154742, "grad_norm": 0.20402562618255615, "learning_rate": 7.828910485814142e-06, "loss": 0.5108, "step": 4524 }, { "epoch": 1.254853022739878, "grad_norm": 0.19931097328662872, "learning_rate": 7.823789317440086e-06, "loss": 0.4945, "step": 4525 }, { "epoch": 1.2551303383250139, "grad_norm": 0.22738957405090332, "learning_rate": 7.818669061518628e-06, "loss": 0.5012, "step": 4526 }, { "epoch": 1.2554076539101497, "grad_norm": 0.20075024664402008, "learning_rate": 7.813549719048862e-06, "loss": 0.4728, "step": 4527 }, { "epoch": 1.2556849694952856, "grad_norm": 0.18941983580589294, "learning_rate": 7.808431291029717e-06, "loss": 0.5031, "step": 4528 }, { "epoch": 1.2559622850804215, "grad_norm": 0.1993735283613205, "learning_rate": 7.803313778459925e-06, "loss": 0.4867, "step": 4529 }, { "epoch": 1.2562396006655574, "grad_norm": 0.20413918793201447, "learning_rate": 7.798197182338051e-06, "loss": 0.4906, "step": 4530 }, { "epoch": 1.2565169162506933, "grad_norm": 0.19167618453502655, "learning_rate": 7.793081503662477e-06, "loss": 0.4974, "step": 4531 }, { "epoch": 1.2567942318358292, "grad_norm": 0.20251625776290894, "learning_rate": 7.78796674343141e-06, "loss": 0.5196, "step": 4532 }, { "epoch": 1.257071547420965, "grad_norm": 0.19917502999305725, "learning_rate": 7.78285290264288e-06, "loss": 0.5262, "step": 4533 }, { "epoch": 1.257348863006101, "grad_norm": 0.23531167209148407, "learning_rate": 7.777739982294719e-06, "loss": 0.5052, "step": 4534 }, { "epoch": 1.2576261785912368, "grad_norm": 0.20423223078250885, "learning_rate": 7.772627983384604e-06, "loss": 0.5154, "step": 4535 }, { "epoch": 1.2579034941763727, "grad_norm": 0.19194969534873962, "learning_rate": 7.767516906910018e-06, "loss": 0.4841, "step": 4536 }, { "epoch": 1.2581808097615086, "grad_norm": 0.20146818459033966, "learning_rate": 7.762406753868273e-06, "loss": 0.5213, "step": 4537 }, { "epoch": 1.2584581253466445, "grad_norm": 0.2293848693370819, "learning_rate": 7.757297525256482e-06, "loss": 0.5098, "step": 4538 }, { "epoch": 1.2587354409317804, "grad_norm": 0.201215922832489, "learning_rate": 7.752189222071607e-06, "loss": 0.5166, "step": 4539 }, { "epoch": 1.2590127565169162, "grad_norm": 0.20330609381198883, "learning_rate": 7.747081845310403e-06, "loss": 0.478, "step": 4540 }, { "epoch": 1.2592900721020521, "grad_norm": 0.19179701805114746, "learning_rate": 7.741975395969456e-06, "loss": 0.5067, "step": 4541 }, { "epoch": 1.259567387687188, "grad_norm": 0.19922393560409546, "learning_rate": 7.736869875045171e-06, "loss": 0.519, "step": 4542 }, { "epoch": 1.259844703272324, "grad_norm": 0.19848206639289856, "learning_rate": 7.731765283533773e-06, "loss": 0.4716, "step": 4543 }, { "epoch": 1.2601220188574598, "grad_norm": 0.2069179117679596, "learning_rate": 7.7266616224313e-06, "loss": 0.5092, "step": 4544 }, { "epoch": 1.2603993344425957, "grad_norm": 0.20515765249729156, "learning_rate": 7.721558892733608e-06, "loss": 0.5013, "step": 4545 }, { "epoch": 1.2606766500277315, "grad_norm": 0.195294588804245, "learning_rate": 7.716457095436378e-06, "loss": 0.5055, "step": 4546 }, { "epoch": 1.2609539656128674, "grad_norm": 0.1927175670862198, "learning_rate": 7.711356231535111e-06, "loss": 0.5105, "step": 4547 }, { "epoch": 1.2612312811980033, "grad_norm": 0.19795742630958557, "learning_rate": 7.706256302025109e-06, "loss": 0.5169, "step": 4548 }, { "epoch": 1.2615085967831392, "grad_norm": 0.2066584676504135, "learning_rate": 7.701157307901515e-06, "loss": 0.5266, "step": 4549 }, { "epoch": 1.261785912368275, "grad_norm": 0.20272813737392426, "learning_rate": 7.696059250159277e-06, "loss": 0.4815, "step": 4550 }, { "epoch": 1.262063227953411, "grad_norm": 0.20374014973640442, "learning_rate": 7.690962129793153e-06, "loss": 0.4925, "step": 4551 }, { "epoch": 1.2623405435385469, "grad_norm": 0.19825245440006256, "learning_rate": 7.68586594779773e-06, "loss": 0.5236, "step": 4552 }, { "epoch": 1.2626178591236827, "grad_norm": 0.19967573881149292, "learning_rate": 7.68077070516741e-06, "loss": 0.4796, "step": 4553 }, { "epoch": 1.2628951747088186, "grad_norm": 0.19628724455833435, "learning_rate": 7.67567640289641e-06, "loss": 0.5064, "step": 4554 }, { "epoch": 1.2631724902939545, "grad_norm": 0.20510771870613098, "learning_rate": 7.670583041978754e-06, "loss": 0.5365, "step": 4555 }, { "epoch": 1.2634498058790904, "grad_norm": 0.1963818073272705, "learning_rate": 7.665490623408308e-06, "loss": 0.4989, "step": 4556 }, { "epoch": 1.2637271214642263, "grad_norm": 0.2057606726884842, "learning_rate": 7.660399148178727e-06, "loss": 0.4998, "step": 4557 }, { "epoch": 1.2640044370493622, "grad_norm": 0.18892644345760345, "learning_rate": 7.655308617283493e-06, "loss": 0.492, "step": 4558 }, { "epoch": 1.264281752634498, "grad_norm": 0.1982104480266571, "learning_rate": 7.650219031715906e-06, "loss": 0.4747, "step": 4559 }, { "epoch": 1.264559068219634, "grad_norm": 0.19365094602108002, "learning_rate": 7.645130392469082e-06, "loss": 0.491, "step": 4560 }, { "epoch": 1.2648363838047698, "grad_norm": 0.1940053105354309, "learning_rate": 7.640042700535944e-06, "loss": 0.5134, "step": 4561 }, { "epoch": 1.2651136993899057, "grad_norm": 0.19653920829296112, "learning_rate": 7.634955956909234e-06, "loss": 0.4746, "step": 4562 }, { "epoch": 1.2653910149750416, "grad_norm": 0.19754603505134583, "learning_rate": 7.629870162581516e-06, "loss": 0.4839, "step": 4563 }, { "epoch": 1.2656683305601775, "grad_norm": 0.1982675939798355, "learning_rate": 7.62478531854516e-06, "loss": 0.4898, "step": 4564 }, { "epoch": 1.2659456461453134, "grad_norm": 0.20128501951694489, "learning_rate": 7.61970142579236e-06, "loss": 0.5171, "step": 4565 }, { "epoch": 1.2662229617304492, "grad_norm": 0.20672714710235596, "learning_rate": 7.6146184853151055e-06, "loss": 0.51, "step": 4566 }, { "epoch": 1.2665002773155851, "grad_norm": 0.1908549964427948, "learning_rate": 7.609536498105224e-06, "loss": 0.4792, "step": 4567 }, { "epoch": 1.266777592900721, "grad_norm": 0.19964845478534698, "learning_rate": 7.6044554651543424e-06, "loss": 0.5005, "step": 4568 }, { "epoch": 1.267054908485857, "grad_norm": 0.2022092193365097, "learning_rate": 7.5993753874539015e-06, "loss": 0.4815, "step": 4569 }, { "epoch": 1.2673322240709928, "grad_norm": 0.20571796596050262, "learning_rate": 7.594296265995164e-06, "loss": 0.5162, "step": 4570 }, { "epoch": 1.2676095396561287, "grad_norm": 0.2025289386510849, "learning_rate": 7.589218101769202e-06, "loss": 0.5145, "step": 4571 }, { "epoch": 1.2678868552412645, "grad_norm": 0.18278613686561584, "learning_rate": 7.584140895766895e-06, "loss": 0.4959, "step": 4572 }, { "epoch": 1.2681641708264004, "grad_norm": 0.20485979318618774, "learning_rate": 7.579064648978939e-06, "loss": 0.4931, "step": 4573 }, { "epoch": 1.2684414864115363, "grad_norm": 0.20338453352451324, "learning_rate": 7.5739893623958515e-06, "loss": 0.501, "step": 4574 }, { "epoch": 1.2687188019966722, "grad_norm": 0.1984669268131256, "learning_rate": 7.5689150370079535e-06, "loss": 0.5088, "step": 4575 }, { "epoch": 1.268996117581808, "grad_norm": 0.19437134265899658, "learning_rate": 7.563841673805372e-06, "loss": 0.4721, "step": 4576 }, { "epoch": 1.269273433166944, "grad_norm": 0.2029186487197876, "learning_rate": 7.558769273778066e-06, "loss": 0.5095, "step": 4577 }, { "epoch": 1.2695507487520798, "grad_norm": 0.19555479288101196, "learning_rate": 7.553697837915791e-06, "loss": 0.4698, "step": 4578 }, { "epoch": 1.2698280643372157, "grad_norm": 0.20628714561462402, "learning_rate": 7.548627367208111e-06, "loss": 0.4781, "step": 4579 }, { "epoch": 1.2701053799223516, "grad_norm": 0.19298192858695984, "learning_rate": 7.543557862644421e-06, "loss": 0.4903, "step": 4580 }, { "epoch": 1.2703826955074875, "grad_norm": 0.19302377104759216, "learning_rate": 7.538489325213913e-06, "loss": 0.5233, "step": 4581 }, { "epoch": 1.2706600110926234, "grad_norm": 0.20301836729049683, "learning_rate": 7.533421755905587e-06, "loss": 0.5101, "step": 4582 }, { "epoch": 1.2709373266777593, "grad_norm": 0.19993318617343903, "learning_rate": 7.528355155708261e-06, "loss": 0.4907, "step": 4583 }, { "epoch": 1.2712146422628952, "grad_norm": 0.19807255268096924, "learning_rate": 7.523289525610569e-06, "loss": 0.4911, "step": 4584 }, { "epoch": 1.271491957848031, "grad_norm": 0.20224188268184662, "learning_rate": 7.518224866600945e-06, "loss": 0.4961, "step": 4585 }, { "epoch": 1.271769273433167, "grad_norm": 0.194349467754364, "learning_rate": 7.513161179667636e-06, "loss": 0.4848, "step": 4586 }, { "epoch": 1.2720465890183028, "grad_norm": 0.19369667768478394, "learning_rate": 7.508098465798707e-06, "loss": 0.5079, "step": 4587 }, { "epoch": 1.2723239046034387, "grad_norm": 0.19323889911174774, "learning_rate": 7.50303672598203e-06, "loss": 0.4986, "step": 4588 }, { "epoch": 1.2726012201885746, "grad_norm": 0.19939887523651123, "learning_rate": 7.4979759612052754e-06, "loss": 0.5052, "step": 4589 }, { "epoch": 1.2728785357737105, "grad_norm": 0.19758670032024384, "learning_rate": 7.4929161724559355e-06, "loss": 0.5028, "step": 4590 }, { "epoch": 1.2731558513588463, "grad_norm": 0.19541890919208527, "learning_rate": 7.487857360721312e-06, "loss": 0.471, "step": 4591 }, { "epoch": 1.2734331669439822, "grad_norm": 0.2024282068014145, "learning_rate": 7.482799526988515e-06, "loss": 0.5235, "step": 4592 }, { "epoch": 1.2737104825291181, "grad_norm": 0.19754981994628906, "learning_rate": 7.4777426722444505e-06, "loss": 0.4934, "step": 4593 }, { "epoch": 1.273987798114254, "grad_norm": 0.20427443087100983, "learning_rate": 7.472686797475861e-06, "loss": 0.4731, "step": 4594 }, { "epoch": 1.2742651136993899, "grad_norm": 0.18958698213100433, "learning_rate": 7.46763190366927e-06, "loss": 0.4863, "step": 4595 }, { "epoch": 1.2745424292845258, "grad_norm": 0.19229987263679504, "learning_rate": 7.462577991811028e-06, "loss": 0.4881, "step": 4596 }, { "epoch": 1.2748197448696617, "grad_norm": 0.2041274905204773, "learning_rate": 7.4575250628872745e-06, "loss": 0.4933, "step": 4597 }, { "epoch": 1.2750970604547975, "grad_norm": 0.1980563998222351, "learning_rate": 7.452473117883989e-06, "loss": 0.5078, "step": 4598 }, { "epoch": 1.2753743760399334, "grad_norm": 0.19617661833763123, "learning_rate": 7.4474221577869265e-06, "loss": 0.5231, "step": 4599 }, { "epoch": 1.2756516916250693, "grad_norm": 0.19393262267112732, "learning_rate": 7.442372183581664e-06, "loss": 0.4959, "step": 4600 }, { "epoch": 1.2759290072102052, "grad_norm": 0.2189975529909134, "learning_rate": 7.43732319625359e-06, "loss": 0.501, "step": 4601 }, { "epoch": 1.276206322795341, "grad_norm": 0.21371406316757202, "learning_rate": 7.432275196787894e-06, "loss": 0.5012, "step": 4602 }, { "epoch": 1.276483638380477, "grad_norm": 0.20440706610679626, "learning_rate": 7.427228186169575e-06, "loss": 0.4698, "step": 4603 }, { "epoch": 1.2767609539656128, "grad_norm": 0.19952230155467987, "learning_rate": 7.422182165383434e-06, "loss": 0.5198, "step": 4604 }, { "epoch": 1.2770382695507487, "grad_norm": 0.20963895320892334, "learning_rate": 7.417137135414088e-06, "loss": 0.4953, "step": 4605 }, { "epoch": 1.2773155851358846, "grad_norm": 0.210064098238945, "learning_rate": 7.412093097245956e-06, "loss": 0.5, "step": 4606 }, { "epoch": 1.2775929007210205, "grad_norm": 0.20027229189872742, "learning_rate": 7.4070500518632595e-06, "loss": 0.499, "step": 4607 }, { "epoch": 1.2778702163061564, "grad_norm": 0.19522671401500702, "learning_rate": 7.4020080002500355e-06, "loss": 0.4978, "step": 4608 }, { "epoch": 1.2781475318912923, "grad_norm": 0.1936059147119522, "learning_rate": 7.396966943390121e-06, "loss": 0.4872, "step": 4609 }, { "epoch": 1.2784248474764282, "grad_norm": 0.2015063762664795, "learning_rate": 7.391926882267159e-06, "loss": 0.5015, "step": 4610 }, { "epoch": 1.278702163061564, "grad_norm": 0.20017270743846893, "learning_rate": 7.386887817864592e-06, "loss": 0.5063, "step": 4611 }, { "epoch": 1.2789794786467, "grad_norm": 0.20706325769424438, "learning_rate": 7.381849751165684e-06, "loss": 0.5311, "step": 4612 }, { "epoch": 1.2792567942318358, "grad_norm": 0.19370242953300476, "learning_rate": 7.376812683153496e-06, "loss": 0.4901, "step": 4613 }, { "epoch": 1.2795341098169717, "grad_norm": 0.20481263101100922, "learning_rate": 7.371776614810883e-06, "loss": 0.5185, "step": 4614 }, { "epoch": 1.2798114254021076, "grad_norm": 0.20902620255947113, "learning_rate": 7.366741547120527e-06, "loss": 0.5268, "step": 4615 }, { "epoch": 1.2800887409872435, "grad_norm": 0.2147327959537506, "learning_rate": 7.361707481064898e-06, "loss": 0.5251, "step": 4616 }, { "epoch": 1.2803660565723793, "grad_norm": 0.20069043338298798, "learning_rate": 7.356674417626275e-06, "loss": 0.5176, "step": 4617 }, { "epoch": 1.2806433721575152, "grad_norm": 0.2038661241531372, "learning_rate": 7.351642357786741e-06, "loss": 0.5001, "step": 4618 }, { "epoch": 1.280920687742651, "grad_norm": 0.20059525966644287, "learning_rate": 7.34661130252819e-06, "loss": 0.5081, "step": 4619 }, { "epoch": 1.281198003327787, "grad_norm": 0.2055322527885437, "learning_rate": 7.341581252832309e-06, "loss": 0.5234, "step": 4620 }, { "epoch": 1.2814753189129229, "grad_norm": 0.1974288672208786, "learning_rate": 7.336552209680592e-06, "loss": 0.4774, "step": 4621 }, { "epoch": 1.2817526344980588, "grad_norm": 0.19128572940826416, "learning_rate": 7.3315241740543455e-06, "loss": 0.4954, "step": 4622 }, { "epoch": 1.2820299500831946, "grad_norm": 0.3327876925468445, "learning_rate": 7.326497146934669e-06, "loss": 0.5184, "step": 4623 }, { "epoch": 1.2823072656683305, "grad_norm": 0.19013381004333496, "learning_rate": 7.3214711293024694e-06, "loss": 0.4776, "step": 4624 }, { "epoch": 1.2825845812534664, "grad_norm": 0.20403680205345154, "learning_rate": 7.316446122138451e-06, "loss": 0.522, "step": 4625 }, { "epoch": 1.2828618968386023, "grad_norm": 0.19423431158065796, "learning_rate": 7.311422126423131e-06, "loss": 0.4802, "step": 4626 }, { "epoch": 1.2831392124237382, "grad_norm": 0.20686981081962585, "learning_rate": 7.306399143136825e-06, "loss": 0.4908, "step": 4627 }, { "epoch": 1.283416528008874, "grad_norm": 0.19685044884681702, "learning_rate": 7.301377173259644e-06, "loss": 0.4766, "step": 4628 }, { "epoch": 1.28369384359401, "grad_norm": 0.20665518939495087, "learning_rate": 7.296356217771515e-06, "loss": 0.4733, "step": 4629 }, { "epoch": 1.2839711591791458, "grad_norm": 0.20772451162338257, "learning_rate": 7.291336277652158e-06, "loss": 0.4944, "step": 4630 }, { "epoch": 1.2842484747642817, "grad_norm": 0.20459213852882385, "learning_rate": 7.286317353881094e-06, "loss": 0.5046, "step": 4631 }, { "epoch": 1.2845257903494176, "grad_norm": 0.198153555393219, "learning_rate": 7.281299447437637e-06, "loss": 0.5134, "step": 4632 }, { "epoch": 1.2848031059345535, "grad_norm": 0.2004951536655426, "learning_rate": 7.276282559300937e-06, "loss": 0.4975, "step": 4633 }, { "epoch": 1.2850804215196894, "grad_norm": 0.19988703727722168, "learning_rate": 7.271266690449907e-06, "loss": 0.5216, "step": 4634 }, { "epoch": 1.2853577371048253, "grad_norm": 0.20041632652282715, "learning_rate": 7.266251841863275e-06, "loss": 0.4996, "step": 4635 }, { "epoch": 1.2856350526899611, "grad_norm": 0.19913561642169952, "learning_rate": 7.2612380145195735e-06, "loss": 0.4907, "step": 4636 }, { "epoch": 1.285912368275097, "grad_norm": 0.21043738722801208, "learning_rate": 7.256225209397139e-06, "loss": 0.5161, "step": 4637 }, { "epoch": 1.286189683860233, "grad_norm": 0.2178940773010254, "learning_rate": 7.2512134274740986e-06, "loss": 0.5072, "step": 4638 }, { "epoch": 1.2864669994453688, "grad_norm": 0.2040819674730301, "learning_rate": 7.246202669728375e-06, "loss": 0.522, "step": 4639 }, { "epoch": 1.2867443150305047, "grad_norm": 0.20290911197662354, "learning_rate": 7.241192937137708e-06, "loss": 0.5249, "step": 4640 }, { "epoch": 1.2870216306156406, "grad_norm": 0.2622935473918915, "learning_rate": 7.2361842306796356e-06, "loss": 0.5143, "step": 4641 }, { "epoch": 1.2872989462007765, "grad_norm": 0.20093023777008057, "learning_rate": 7.231176551331476e-06, "loss": 0.5218, "step": 4642 }, { "epoch": 1.2875762617859123, "grad_norm": 0.19977106153964996, "learning_rate": 7.226169900070365e-06, "loss": 0.5273, "step": 4643 }, { "epoch": 1.2878535773710482, "grad_norm": 0.20094482600688934, "learning_rate": 7.221164277873238e-06, "loss": 0.5079, "step": 4644 }, { "epoch": 1.288130892956184, "grad_norm": 0.2162494659423828, "learning_rate": 7.216159685716817e-06, "loss": 0.4924, "step": 4645 }, { "epoch": 1.28840820854132, "grad_norm": 0.19907453656196594, "learning_rate": 7.211156124577639e-06, "loss": 0.4912, "step": 4646 }, { "epoch": 1.2886855241264559, "grad_norm": 0.20266936719417572, "learning_rate": 7.206153595432022e-06, "loss": 0.4964, "step": 4647 }, { "epoch": 1.2889628397115918, "grad_norm": 0.19707994163036346, "learning_rate": 7.2011520992561e-06, "loss": 0.5026, "step": 4648 }, { "epoch": 1.2892401552967276, "grad_norm": 0.1953379213809967, "learning_rate": 7.196151637025788e-06, "loss": 0.5141, "step": 4649 }, { "epoch": 1.2895174708818635, "grad_norm": 0.1917579025030136, "learning_rate": 7.191152209716822e-06, "loss": 0.4888, "step": 4650 }, { "epoch": 1.2897947864669994, "grad_norm": 0.20420075953006744, "learning_rate": 7.186153818304708e-06, "loss": 0.5009, "step": 4651 }, { "epoch": 1.2900721020521353, "grad_norm": 0.19985781610012054, "learning_rate": 7.1811564637647734e-06, "loss": 0.5, "step": 4652 }, { "epoch": 1.2903494176372712, "grad_norm": 0.2002001255750656, "learning_rate": 7.176160147072138e-06, "loss": 0.5068, "step": 4653 }, { "epoch": 1.290626733222407, "grad_norm": 0.2185979187488556, "learning_rate": 7.171164869201709e-06, "loss": 0.5064, "step": 4654 }, { "epoch": 1.290904048807543, "grad_norm": 0.20650553703308105, "learning_rate": 7.166170631128194e-06, "loss": 0.5152, "step": 4655 }, { "epoch": 1.2911813643926788, "grad_norm": 0.20761191844940186, "learning_rate": 7.161177433826108e-06, "loss": 0.4963, "step": 4656 }, { "epoch": 1.2914586799778147, "grad_norm": 0.20723947882652283, "learning_rate": 7.156185278269756e-06, "loss": 0.5188, "step": 4657 }, { "epoch": 1.2917359955629506, "grad_norm": 0.21793848276138306, "learning_rate": 7.151194165433234e-06, "loss": 0.4931, "step": 4658 }, { "epoch": 1.2920133111480865, "grad_norm": 0.19249577820301056, "learning_rate": 7.146204096290446e-06, "loss": 0.5235, "step": 4659 }, { "epoch": 1.2922906267332224, "grad_norm": 0.20929144322872162, "learning_rate": 7.1412150718150884e-06, "loss": 0.5141, "step": 4660 }, { "epoch": 1.2925679423183583, "grad_norm": 0.3471876382827759, "learning_rate": 7.136227092980649e-06, "loss": 0.5095, "step": 4661 }, { "epoch": 1.2928452579034941, "grad_norm": 0.1970401108264923, "learning_rate": 7.131240160760408e-06, "loss": 0.4993, "step": 4662 }, { "epoch": 1.29312257348863, "grad_norm": 0.2073030173778534, "learning_rate": 7.126254276127456e-06, "loss": 0.5111, "step": 4663 }, { "epoch": 1.293399889073766, "grad_norm": 0.19547441601753235, "learning_rate": 7.1212694400546734e-06, "loss": 0.5045, "step": 4664 }, { "epoch": 1.2936772046589018, "grad_norm": 0.1951918601989746, "learning_rate": 7.116285653514729e-06, "loss": 0.4969, "step": 4665 }, { "epoch": 1.2939545202440377, "grad_norm": 0.1991628259420395, "learning_rate": 7.111302917480089e-06, "loss": 0.4958, "step": 4666 }, { "epoch": 1.2942318358291736, "grad_norm": 0.19393709301948547, "learning_rate": 7.10632123292302e-06, "loss": 0.4918, "step": 4667 }, { "epoch": 1.2945091514143094, "grad_norm": 0.19460806250572205, "learning_rate": 7.101340600815587e-06, "loss": 0.4921, "step": 4668 }, { "epoch": 1.2947864669994453, "grad_norm": 0.1952556073665619, "learning_rate": 7.096361022129637e-06, "loss": 0.4937, "step": 4669 }, { "epoch": 1.2950637825845812, "grad_norm": 0.20183762907981873, "learning_rate": 7.0913824978368075e-06, "loss": 0.4777, "step": 4670 }, { "epoch": 1.295341098169717, "grad_norm": 0.2849336564540863, "learning_rate": 7.086405028908563e-06, "loss": 0.4967, "step": 4671 }, { "epoch": 1.295618413754853, "grad_norm": 0.19828931987285614, "learning_rate": 7.081428616316127e-06, "loss": 0.523, "step": 4672 }, { "epoch": 1.2958957293399889, "grad_norm": 0.20423446595668793, "learning_rate": 7.076453261030524e-06, "loss": 0.4865, "step": 4673 }, { "epoch": 1.2961730449251248, "grad_norm": 0.18961788713932037, "learning_rate": 7.0714789640225865e-06, "loss": 0.4901, "step": 4674 }, { "epoch": 1.2964503605102606, "grad_norm": 0.2092195451259613, "learning_rate": 7.0665057262629316e-06, "loss": 0.5204, "step": 4675 }, { "epoch": 1.2967276760953965, "grad_norm": 0.2078777551651001, "learning_rate": 7.061533548721969e-06, "loss": 0.5013, "step": 4676 }, { "epoch": 1.2970049916805324, "grad_norm": 0.2117914855480194, "learning_rate": 7.0565624323698955e-06, "loss": 0.5268, "step": 4677 }, { "epoch": 1.2972823072656683, "grad_norm": 0.19487378001213074, "learning_rate": 7.051592378176711e-06, "loss": 0.5368, "step": 4678 }, { "epoch": 1.2975596228508042, "grad_norm": 0.19526413083076477, "learning_rate": 7.046623387112212e-06, "loss": 0.502, "step": 4679 }, { "epoch": 1.29783693843594, "grad_norm": 0.19563087821006775, "learning_rate": 7.041655460145971e-06, "loss": 0.5248, "step": 4680 }, { "epoch": 1.298114254021076, "grad_norm": 0.1995537430047989, "learning_rate": 7.0366885982473635e-06, "loss": 0.5115, "step": 4681 }, { "epoch": 1.2983915696062118, "grad_norm": 0.20692096650600433, "learning_rate": 7.0317228023855654e-06, "loss": 0.4994, "step": 4682 }, { "epoch": 1.2986688851913477, "grad_norm": 0.20720021426677704, "learning_rate": 7.026758073529527e-06, "loss": 0.4766, "step": 4683 }, { "epoch": 1.2989462007764836, "grad_norm": 0.20226465165615082, "learning_rate": 7.021794412647993e-06, "loss": 0.5035, "step": 4684 }, { "epoch": 1.2992235163616195, "grad_norm": 0.21326394379138947, "learning_rate": 7.016831820709513e-06, "loss": 0.4912, "step": 4685 }, { "epoch": 1.2995008319467554, "grad_norm": 0.23130756616592407, "learning_rate": 7.0118702986824225e-06, "loss": 0.5091, "step": 4686 }, { "epoch": 1.2997781475318912, "grad_norm": 0.19730432331562042, "learning_rate": 7.006909847534837e-06, "loss": 0.5001, "step": 4687 }, { "epoch": 1.3000554631170271, "grad_norm": 0.21078179776668549, "learning_rate": 7.0019504682346835e-06, "loss": 0.4988, "step": 4688 }, { "epoch": 1.300332778702163, "grad_norm": 0.2023860216140747, "learning_rate": 6.996992161749656e-06, "loss": 0.4996, "step": 4689 }, { "epoch": 1.300610094287299, "grad_norm": 0.19739584624767303, "learning_rate": 6.992034929047261e-06, "loss": 0.4843, "step": 4690 }, { "epoch": 1.3008874098724348, "grad_norm": 0.2066974639892578, "learning_rate": 6.987078771094779e-06, "loss": 0.5207, "step": 4691 }, { "epoch": 1.3011647254575707, "grad_norm": 0.20626220107078552, "learning_rate": 6.982123688859295e-06, "loss": 0.5055, "step": 4692 }, { "epoch": 1.3014420410427066, "grad_norm": 0.19193756580352783, "learning_rate": 6.977169683307667e-06, "loss": 0.5372, "step": 4693 }, { "epoch": 1.3017193566278424, "grad_norm": 0.19858404994010925, "learning_rate": 6.972216755406559e-06, "loss": 0.4943, "step": 4694 }, { "epoch": 1.3019966722129783, "grad_norm": 0.19214889407157898, "learning_rate": 6.967264906122422e-06, "loss": 0.4917, "step": 4695 }, { "epoch": 1.3022739877981142, "grad_norm": 0.24965661764144897, "learning_rate": 6.962314136421485e-06, "loss": 0.5006, "step": 4696 }, { "epoch": 1.30255130338325, "grad_norm": 0.20609916746616364, "learning_rate": 6.957364447269785e-06, "loss": 0.4954, "step": 4697 }, { "epoch": 1.302828618968386, "grad_norm": 0.18501780927181244, "learning_rate": 6.9524158396331225e-06, "loss": 0.4895, "step": 4698 }, { "epoch": 1.3031059345535219, "grad_norm": 0.19009456038475037, "learning_rate": 6.947468314477115e-06, "loss": 0.4812, "step": 4699 }, { "epoch": 1.3033832501386577, "grad_norm": 0.20104162395000458, "learning_rate": 6.942521872767148e-06, "loss": 0.5101, "step": 4700 }, { "epoch": 1.3036605657237936, "grad_norm": 0.21302615106105804, "learning_rate": 6.937576515468405e-06, "loss": 0.4959, "step": 4701 }, { "epoch": 1.3039378813089295, "grad_norm": 0.19924134016036987, "learning_rate": 6.932632243545864e-06, "loss": 0.5155, "step": 4702 }, { "epoch": 1.3042151968940654, "grad_norm": 0.2015244960784912, "learning_rate": 6.927689057964274e-06, "loss": 0.5192, "step": 4703 }, { "epoch": 1.3044925124792013, "grad_norm": 0.21109920740127563, "learning_rate": 6.9227469596881825e-06, "loss": 0.5109, "step": 4704 }, { "epoch": 1.3047698280643372, "grad_norm": 0.19857889413833618, "learning_rate": 6.9178059496819246e-06, "loss": 0.4955, "step": 4705 }, { "epoch": 1.305047143649473, "grad_norm": 0.20074202120304108, "learning_rate": 6.912866028909627e-06, "loss": 0.4898, "step": 4706 }, { "epoch": 1.305324459234609, "grad_norm": 0.20996864140033722, "learning_rate": 6.907927198335197e-06, "loss": 0.5067, "step": 4707 }, { "epoch": 1.3056017748197448, "grad_norm": 0.20864242315292358, "learning_rate": 6.902989458922319e-06, "loss": 0.5017, "step": 4708 }, { "epoch": 1.3058790904048807, "grad_norm": 0.2010875642299652, "learning_rate": 6.898052811634498e-06, "loss": 0.5122, "step": 4709 }, { "epoch": 1.3061564059900166, "grad_norm": 0.1969454139471054, "learning_rate": 6.893117257434994e-06, "loss": 0.5032, "step": 4710 }, { "epoch": 1.3064337215751525, "grad_norm": 0.2069738507270813, "learning_rate": 6.88818279728686e-06, "loss": 0.501, "step": 4711 }, { "epoch": 1.3067110371602884, "grad_norm": 0.1900450736284256, "learning_rate": 6.883249432152944e-06, "loss": 0.5205, "step": 4712 }, { "epoch": 1.3069883527454242, "grad_norm": 0.22067134082317352, "learning_rate": 6.878317162995881e-06, "loss": 0.4896, "step": 4713 }, { "epoch": 1.3072656683305601, "grad_norm": 0.20373979210853577, "learning_rate": 6.8733859907780865e-06, "loss": 0.5217, "step": 4714 }, { "epoch": 1.307542983915696, "grad_norm": 0.20570501685142517, "learning_rate": 6.8684559164617525e-06, "loss": 0.5074, "step": 4715 }, { "epoch": 1.307820299500832, "grad_norm": 0.2102414071559906, "learning_rate": 6.8635269410088725e-06, "loss": 0.5142, "step": 4716 }, { "epoch": 1.3080976150859678, "grad_norm": 0.20017775893211365, "learning_rate": 6.8585990653812285e-06, "loss": 0.5078, "step": 4717 }, { "epoch": 1.3083749306711037, "grad_norm": 0.22755825519561768, "learning_rate": 6.8536722905403666e-06, "loss": 0.5103, "step": 4718 }, { "epoch": 1.3086522462562395, "grad_norm": 0.20453383028507233, "learning_rate": 6.848746617447644e-06, "loss": 0.4962, "step": 4719 }, { "epoch": 1.3089295618413754, "grad_norm": 0.21632753312587738, "learning_rate": 6.8438220470641785e-06, "loss": 0.5031, "step": 4720 }, { "epoch": 1.3092068774265113, "grad_norm": 0.19868405163288116, "learning_rate": 6.838898580350895e-06, "loss": 0.4784, "step": 4721 }, { "epoch": 1.3094841930116472, "grad_norm": 0.20900887250900269, "learning_rate": 6.833976218268478e-06, "loss": 0.5007, "step": 4722 }, { "epoch": 1.309761508596783, "grad_norm": 0.20499762892723083, "learning_rate": 6.829054961777423e-06, "loss": 0.5004, "step": 4723 }, { "epoch": 1.310038824181919, "grad_norm": 0.19857996702194214, "learning_rate": 6.8241348118379966e-06, "loss": 0.5087, "step": 4724 }, { "epoch": 1.3103161397670549, "grad_norm": 0.1966305822134018, "learning_rate": 6.819215769410243e-06, "loss": 0.4808, "step": 4725 }, { "epoch": 1.3105934553521907, "grad_norm": 0.2024124413728714, "learning_rate": 6.814297835454009e-06, "loss": 0.4972, "step": 4726 }, { "epoch": 1.3108707709373266, "grad_norm": 0.21003521978855133, "learning_rate": 6.8093810109289e-06, "loss": 0.4985, "step": 4727 }, { "epoch": 1.3111480865224625, "grad_norm": 0.19540292024612427, "learning_rate": 6.804465296794332e-06, "loss": 0.4898, "step": 4728 }, { "epoch": 1.3114254021075984, "grad_norm": 0.2004072666168213, "learning_rate": 6.799550694009479e-06, "loss": 0.4926, "step": 4729 }, { "epoch": 1.3117027176927343, "grad_norm": 0.19552730023860931, "learning_rate": 6.794637203533321e-06, "loss": 0.4841, "step": 4730 }, { "epoch": 1.3119800332778702, "grad_norm": 0.20091231167316437, "learning_rate": 6.789724826324602e-06, "loss": 0.4953, "step": 4731 }, { "epoch": 1.312257348863006, "grad_norm": 0.20696038007736206, "learning_rate": 6.78481356334186e-06, "loss": 0.5117, "step": 4732 }, { "epoch": 1.312534664448142, "grad_norm": 0.19968093931674957, "learning_rate": 6.779903415543418e-06, "loss": 0.4942, "step": 4733 }, { "epoch": 1.3128119800332778, "grad_norm": 0.19521543383598328, "learning_rate": 6.7749943838873636e-06, "loss": 0.4901, "step": 4734 }, { "epoch": 1.3130892956184137, "grad_norm": 0.24511419236660004, "learning_rate": 6.770086469331592e-06, "loss": 0.5023, "step": 4735 }, { "epoch": 1.3133666112035496, "grad_norm": 0.29729798436164856, "learning_rate": 6.765179672833757e-06, "loss": 0.4767, "step": 4736 }, { "epoch": 1.3136439267886855, "grad_norm": 0.19325131177902222, "learning_rate": 6.760273995351313e-06, "loss": 0.4878, "step": 4737 }, { "epoch": 1.3139212423738214, "grad_norm": 0.1989395171403885, "learning_rate": 6.75536943784148e-06, "loss": 0.4873, "step": 4738 }, { "epoch": 1.3141985579589572, "grad_norm": 0.20601066946983337, "learning_rate": 6.750466001261271e-06, "loss": 0.4805, "step": 4739 }, { "epoch": 1.3144758735440931, "grad_norm": 0.19977515935897827, "learning_rate": 6.74556368656748e-06, "loss": 0.5138, "step": 4740 }, { "epoch": 1.314753189129229, "grad_norm": 0.19737781584262848, "learning_rate": 6.740662494716675e-06, "loss": 0.4954, "step": 4741 }, { "epoch": 1.315030504714365, "grad_norm": 0.19452109932899475, "learning_rate": 6.7357624266652044e-06, "loss": 0.4723, "step": 4742 }, { "epoch": 1.3153078202995008, "grad_norm": 0.19652196764945984, "learning_rate": 6.730863483369203e-06, "loss": 0.483, "step": 4743 }, { "epoch": 1.3155851358846367, "grad_norm": 0.21024899184703827, "learning_rate": 6.725965665784592e-06, "loss": 0.5212, "step": 4744 }, { "epoch": 1.3158624514697725, "grad_norm": 0.20697370171546936, "learning_rate": 6.721068974867059e-06, "loss": 0.4693, "step": 4745 }, { "epoch": 1.3161397670549084, "grad_norm": 0.20207835733890533, "learning_rate": 6.71617341157207e-06, "loss": 0.5116, "step": 4746 }, { "epoch": 1.3164170826400443, "grad_norm": 0.19856862723827362, "learning_rate": 6.711278976854898e-06, "loss": 0.5203, "step": 4747 }, { "epoch": 1.3166943982251802, "grad_norm": 0.20049835741519928, "learning_rate": 6.706385671670566e-06, "loss": 0.485, "step": 4748 }, { "epoch": 1.316971713810316, "grad_norm": 0.21266470849514008, "learning_rate": 6.701493496973885e-06, "loss": 0.5069, "step": 4749 }, { "epoch": 1.317249029395452, "grad_norm": 0.20254302024841309, "learning_rate": 6.69660245371945e-06, "loss": 0.5294, "step": 4750 }, { "epoch": 1.3175263449805878, "grad_norm": 0.19671931862831116, "learning_rate": 6.691712542861639e-06, "loss": 0.4901, "step": 4751 }, { "epoch": 1.3178036605657237, "grad_norm": 0.2424955815076828, "learning_rate": 6.686823765354599e-06, "loss": 0.518, "step": 4752 }, { "epoch": 1.3180809761508596, "grad_norm": 0.2099234014749527, "learning_rate": 6.681936122152255e-06, "loss": 0.5131, "step": 4753 }, { "epoch": 1.3183582917359955, "grad_norm": 0.1978161633014679, "learning_rate": 6.67704961420832e-06, "loss": 0.4835, "step": 4754 }, { "epoch": 1.3186356073211314, "grad_norm": 0.197959765791893, "learning_rate": 6.6721642424762866e-06, "loss": 0.4862, "step": 4755 }, { "epoch": 1.3189129229062673, "grad_norm": 0.19919045269489288, "learning_rate": 6.667280007909416e-06, "loss": 0.4858, "step": 4756 }, { "epoch": 1.3191902384914032, "grad_norm": 0.2116900086402893, "learning_rate": 6.662396911460745e-06, "loss": 0.5011, "step": 4757 }, { "epoch": 1.319467554076539, "grad_norm": 0.20922990143299103, "learning_rate": 6.657514954083099e-06, "loss": 0.5122, "step": 4758 }, { "epoch": 1.319744869661675, "grad_norm": 0.24156232178211212, "learning_rate": 6.652634136729086e-06, "loss": 0.4908, "step": 4759 }, { "epoch": 1.3200221852468108, "grad_norm": 0.1963520497083664, "learning_rate": 6.647754460351072e-06, "loss": 0.5212, "step": 4760 }, { "epoch": 1.3202995008319467, "grad_norm": 0.20835274457931519, "learning_rate": 6.642875925901213e-06, "loss": 0.5094, "step": 4761 }, { "epoch": 1.3205768164170826, "grad_norm": 0.18857994675636292, "learning_rate": 6.63799853433145e-06, "loss": 0.4601, "step": 4762 }, { "epoch": 1.3208541320022185, "grad_norm": 0.20583271980285645, "learning_rate": 6.633122286593481e-06, "loss": 0.5022, "step": 4763 }, { "epoch": 1.3211314475873543, "grad_norm": 0.21066723763942719, "learning_rate": 6.628247183638789e-06, "loss": 0.4997, "step": 4764 }, { "epoch": 1.3214087631724902, "grad_norm": 0.19231611490249634, "learning_rate": 6.623373226418642e-06, "loss": 0.4751, "step": 4765 }, { "epoch": 1.3216860787576261, "grad_norm": 0.1879042237997055, "learning_rate": 6.618500415884083e-06, "loss": 0.4813, "step": 4766 }, { "epoch": 1.321963394342762, "grad_norm": 0.20243264734745026, "learning_rate": 6.613628752985912e-06, "loss": 0.4951, "step": 4767 }, { "epoch": 1.3222407099278979, "grad_norm": 0.20071490108966827, "learning_rate": 6.608758238674733e-06, "loss": 0.4892, "step": 4768 }, { "epoch": 1.3225180255130338, "grad_norm": 0.213031604886055, "learning_rate": 6.603888873900905e-06, "loss": 0.5118, "step": 4769 }, { "epoch": 1.3227953410981697, "grad_norm": 0.2158973515033722, "learning_rate": 6.599020659614572e-06, "loss": 0.4931, "step": 4770 }, { "epoch": 1.3230726566833055, "grad_norm": 0.20497101545333862, "learning_rate": 6.594153596765655e-06, "loss": 0.5167, "step": 4771 }, { "epoch": 1.3233499722684414, "grad_norm": 0.19923308491706848, "learning_rate": 6.5892876863038385e-06, "loss": 0.477, "step": 4772 }, { "epoch": 1.3236272878535773, "grad_norm": 0.21297168731689453, "learning_rate": 6.584422929178602e-06, "loss": 0.5255, "step": 4773 }, { "epoch": 1.3239046034387132, "grad_norm": 0.20591717958450317, "learning_rate": 6.579559326339177e-06, "loss": 0.5326, "step": 4774 }, { "epoch": 1.324181919023849, "grad_norm": 0.20993812382221222, "learning_rate": 6.574696878734592e-06, "loss": 0.4997, "step": 4775 }, { "epoch": 1.324459234608985, "grad_norm": 0.20869500935077667, "learning_rate": 6.569835587313627e-06, "loss": 0.5205, "step": 4776 }, { "epoch": 1.3247365501941208, "grad_norm": 0.19962731003761292, "learning_rate": 6.5649754530248575e-06, "loss": 0.4994, "step": 4777 }, { "epoch": 1.3250138657792567, "grad_norm": 0.20245380699634552, "learning_rate": 6.560116476816627e-06, "loss": 0.5062, "step": 4778 }, { "epoch": 1.3252911813643926, "grad_norm": 0.20509305596351624, "learning_rate": 6.5552586596370465e-06, "loss": 0.501, "step": 4779 }, { "epoch": 1.3255684969495285, "grad_norm": 0.19942280650138855, "learning_rate": 6.5504020024340005e-06, "loss": 0.4664, "step": 4780 }, { "epoch": 1.3258458125346644, "grad_norm": 0.2001029998064041, "learning_rate": 6.545546506155154e-06, "loss": 0.4858, "step": 4781 }, { "epoch": 1.3261231281198003, "grad_norm": 0.19130992889404297, "learning_rate": 6.5406921717479474e-06, "loss": 0.5081, "step": 4782 }, { "epoch": 1.3264004437049361, "grad_norm": 0.20136502385139465, "learning_rate": 6.53583900015959e-06, "loss": 0.4889, "step": 4783 }, { "epoch": 1.326677759290072, "grad_norm": 0.20278695225715637, "learning_rate": 6.53098699233705e-06, "loss": 0.5011, "step": 4784 }, { "epoch": 1.326955074875208, "grad_norm": 0.1966562420129776, "learning_rate": 6.5261361492271054e-06, "loss": 0.4887, "step": 4785 }, { "epoch": 1.3272323904603438, "grad_norm": 0.20651812851428986, "learning_rate": 6.5212864717762696e-06, "loss": 0.5006, "step": 4786 }, { "epoch": 1.3275097060454797, "grad_norm": 0.210739865899086, "learning_rate": 6.516437960930843e-06, "loss": 0.5149, "step": 4787 }, { "epoch": 1.3277870216306156, "grad_norm": 0.21981686353683472, "learning_rate": 6.5115906176369025e-06, "loss": 0.5234, "step": 4788 }, { "epoch": 1.3280643372157515, "grad_norm": 0.19809675216674805, "learning_rate": 6.506744442840296e-06, "loss": 0.5058, "step": 4789 }, { "epoch": 1.3283416528008873, "grad_norm": 0.19718238711357117, "learning_rate": 6.501899437486637e-06, "loss": 0.5016, "step": 4790 }, { "epoch": 1.3286189683860232, "grad_norm": 0.19665461778640747, "learning_rate": 6.4970556025213095e-06, "loss": 0.4995, "step": 4791 }, { "epoch": 1.328896283971159, "grad_norm": 0.1910347193479538, "learning_rate": 6.492212938889481e-06, "loss": 0.4835, "step": 4792 }, { "epoch": 1.329173599556295, "grad_norm": 0.1989067643880844, "learning_rate": 6.487371447536084e-06, "loss": 0.4774, "step": 4793 }, { "epoch": 1.3294509151414309, "grad_norm": 0.1986207813024521, "learning_rate": 6.482531129405819e-06, "loss": 0.5111, "step": 4794 }, { "epoch": 1.3297282307265668, "grad_norm": 0.20336773991584778, "learning_rate": 6.477691985443157e-06, "loss": 0.5037, "step": 4795 }, { "epoch": 1.3300055463117026, "grad_norm": 0.19637836515903473, "learning_rate": 6.472854016592346e-06, "loss": 0.5016, "step": 4796 }, { "epoch": 1.3302828618968385, "grad_norm": 0.2004927545785904, "learning_rate": 6.468017223797407e-06, "loss": 0.509, "step": 4797 }, { "epoch": 1.3305601774819744, "grad_norm": 0.2119477391242981, "learning_rate": 6.463181608002118e-06, "loss": 0.5365, "step": 4798 }, { "epoch": 1.3308374930671103, "grad_norm": 0.20132261514663696, "learning_rate": 6.4583471701500395e-06, "loss": 0.5112, "step": 4799 }, { "epoch": 1.3311148086522462, "grad_norm": 0.2103944718837738, "learning_rate": 6.453513911184503e-06, "loss": 0.4846, "step": 4800 }, { "epoch": 1.331392124237382, "grad_norm": 0.18914499878883362, "learning_rate": 6.448681832048603e-06, "loss": 0.4821, "step": 4801 }, { "epoch": 1.331669439822518, "grad_norm": 0.19525542855262756, "learning_rate": 6.443850933685197e-06, "loss": 0.5018, "step": 4802 }, { "epoch": 1.3319467554076538, "grad_norm": 0.19284358620643616, "learning_rate": 6.4390212170369305e-06, "loss": 0.478, "step": 4803 }, { "epoch": 1.3322240709927897, "grad_norm": 0.20030631124973297, "learning_rate": 6.43419268304621e-06, "loss": 0.5084, "step": 4804 }, { "epoch": 1.3325013865779256, "grad_norm": 0.19547629356384277, "learning_rate": 6.429365332655204e-06, "loss": 0.4884, "step": 4805 }, { "epoch": 1.3327787021630615, "grad_norm": 0.20050625503063202, "learning_rate": 6.4245391668058655e-06, "loss": 0.526, "step": 4806 }, { "epoch": 1.3330560177481974, "grad_norm": 0.24080616235733032, "learning_rate": 6.419714186439896e-06, "loss": 0.4912, "step": 4807 }, { "epoch": 1.3333333333333333, "grad_norm": 0.2031663954257965, "learning_rate": 6.414890392498787e-06, "loss": 0.5048, "step": 4808 }, { "epoch": 1.3336106489184691, "grad_norm": 0.20676189661026, "learning_rate": 6.410067785923779e-06, "loss": 0.4905, "step": 4809 }, { "epoch": 1.333887964503605, "grad_norm": 0.19812701642513275, "learning_rate": 6.405246367655903e-06, "loss": 0.5076, "step": 4810 }, { "epoch": 1.334165280088741, "grad_norm": 0.20559370517730713, "learning_rate": 6.4004261386359315e-06, "loss": 0.4868, "step": 4811 }, { "epoch": 1.3344425956738768, "grad_norm": 0.21239563822746277, "learning_rate": 6.395607099804426e-06, "loss": 0.4941, "step": 4812 }, { "epoch": 1.3347199112590127, "grad_norm": 0.20791207253932953, "learning_rate": 6.390789252101713e-06, "loss": 0.5058, "step": 4813 }, { "epoch": 1.3349972268441486, "grad_norm": 0.20702631771564484, "learning_rate": 6.3859725964678735e-06, "loss": 0.5146, "step": 4814 }, { "epoch": 1.3352745424292845, "grad_norm": 0.1996176540851593, "learning_rate": 6.381157133842772e-06, "loss": 0.4976, "step": 4815 }, { "epoch": 1.3355518580144203, "grad_norm": 0.20184668898582458, "learning_rate": 6.376342865166024e-06, "loss": 0.4912, "step": 4816 }, { "epoch": 1.3358291735995562, "grad_norm": 0.20000986754894257, "learning_rate": 6.371529791377031e-06, "loss": 0.5215, "step": 4817 }, { "epoch": 1.336106489184692, "grad_norm": 0.1989196389913559, "learning_rate": 6.366717913414943e-06, "loss": 0.4972, "step": 4818 }, { "epoch": 1.336383804769828, "grad_norm": 0.19104993343353271, "learning_rate": 6.361907232218689e-06, "loss": 0.464, "step": 4819 }, { "epoch": 1.3366611203549639, "grad_norm": 0.20721067488193512, "learning_rate": 6.357097748726965e-06, "loss": 0.5046, "step": 4820 }, { "epoch": 1.3369384359400998, "grad_norm": 0.2302693873643875, "learning_rate": 6.3522894638782204e-06, "loss": 0.4778, "step": 4821 }, { "epoch": 1.3372157515252356, "grad_norm": 0.20015782117843628, "learning_rate": 6.347482378610678e-06, "loss": 0.5031, "step": 4822 }, { "epoch": 1.3374930671103715, "grad_norm": 0.19521984457969666, "learning_rate": 6.342676493862332e-06, "loss": 0.4701, "step": 4823 }, { "epoch": 1.3377703826955074, "grad_norm": 0.20052647590637207, "learning_rate": 6.337871810570943e-06, "loss": 0.498, "step": 4824 }, { "epoch": 1.3380476982806433, "grad_norm": 0.20175038278102875, "learning_rate": 6.333068329674021e-06, "loss": 0.5112, "step": 4825 }, { "epoch": 1.3383250138657792, "grad_norm": 0.21088631451129913, "learning_rate": 6.328266052108856e-06, "loss": 0.5264, "step": 4826 }, { "epoch": 1.338602329450915, "grad_norm": 0.20293080806732178, "learning_rate": 6.323464978812507e-06, "loss": 0.47, "step": 4827 }, { "epoch": 1.338879645036051, "grad_norm": 0.22624416649341583, "learning_rate": 6.318665110721786e-06, "loss": 0.5097, "step": 4828 }, { "epoch": 1.3391569606211868, "grad_norm": 0.20791591703891754, "learning_rate": 6.3138664487732675e-06, "loss": 0.4856, "step": 4829 }, { "epoch": 1.3394342762063227, "grad_norm": 0.20461194217205048, "learning_rate": 6.309068993903303e-06, "loss": 0.4912, "step": 4830 }, { "epoch": 1.3397115917914586, "grad_norm": 0.2024698108434677, "learning_rate": 6.304272747048009e-06, "loss": 0.4838, "step": 4831 }, { "epoch": 1.3399889073765945, "grad_norm": 0.20816753804683685, "learning_rate": 6.2994777091432535e-06, "loss": 0.5021, "step": 4832 }, { "epoch": 1.3402662229617304, "grad_norm": 0.2043784111738205, "learning_rate": 6.2946838811246734e-06, "loss": 0.5101, "step": 4833 }, { "epoch": 1.3405435385468663, "grad_norm": 0.19572801887989044, "learning_rate": 6.289891263927675e-06, "loss": 0.4688, "step": 4834 }, { "epoch": 1.3408208541320021, "grad_norm": 0.22099921107292175, "learning_rate": 6.285099858487428e-06, "loss": 0.4935, "step": 4835 }, { "epoch": 1.341098169717138, "grad_norm": 0.19714292883872986, "learning_rate": 6.280309665738854e-06, "loss": 0.496, "step": 4836 }, { "epoch": 1.341375485302274, "grad_norm": 0.21856801211833954, "learning_rate": 6.275520686616654e-06, "loss": 0.4743, "step": 4837 }, { "epoch": 1.3416528008874098, "grad_norm": 0.2152533382177353, "learning_rate": 6.270732922055286e-06, "loss": 0.499, "step": 4838 }, { "epoch": 1.3419301164725457, "grad_norm": 0.201574444770813, "learning_rate": 6.2659463729889665e-06, "loss": 0.4826, "step": 4839 }, { "epoch": 1.3422074320576816, "grad_norm": 0.18966776132583618, "learning_rate": 6.261161040351673e-06, "loss": 0.4745, "step": 4840 }, { "epoch": 1.3424847476428174, "grad_norm": 0.20077745616436005, "learning_rate": 6.256376925077155e-06, "loss": 0.4785, "step": 4841 }, { "epoch": 1.3427620632279533, "grad_norm": 0.19951693713665009, "learning_rate": 6.251594028098925e-06, "loss": 0.4906, "step": 4842 }, { "epoch": 1.3430393788130892, "grad_norm": 0.20605219900608063, "learning_rate": 6.246812350350245e-06, "loss": 0.5253, "step": 4843 }, { "epoch": 1.343316694398225, "grad_norm": 0.1993149369955063, "learning_rate": 6.242031892764156e-06, "loss": 0.5034, "step": 4844 }, { "epoch": 1.343594009983361, "grad_norm": 0.20574994385242462, "learning_rate": 6.237252656273439e-06, "loss": 0.5055, "step": 4845 }, { "epoch": 1.343871325568497, "grad_norm": 0.20431512594223022, "learning_rate": 6.232474641810664e-06, "loss": 0.4775, "step": 4846 }, { "epoch": 1.344148641153633, "grad_norm": 0.1970827877521515, "learning_rate": 6.2276978503081355e-06, "loss": 0.4963, "step": 4847 }, { "epoch": 1.3444259567387689, "grad_norm": 0.20088131725788116, "learning_rate": 6.222922282697944e-06, "loss": 0.5, "step": 4848 }, { "epoch": 1.3447032723239047, "grad_norm": 0.2057836651802063, "learning_rate": 6.218147939911917e-06, "loss": 0.4928, "step": 4849 }, { "epoch": 1.3449805879090406, "grad_norm": 0.20138069987297058, "learning_rate": 6.213374822881661e-06, "loss": 0.4929, "step": 4850 }, { "epoch": 1.3452579034941765, "grad_norm": 0.20331014692783356, "learning_rate": 6.208602932538545e-06, "loss": 0.492, "step": 4851 }, { "epoch": 1.3455352190793124, "grad_norm": 0.20663729310035706, "learning_rate": 6.203832269813678e-06, "loss": 0.5118, "step": 4852 }, { "epoch": 1.3458125346644483, "grad_norm": 0.20441415905952454, "learning_rate": 6.1990628356379535e-06, "loss": 0.5004, "step": 4853 }, { "epoch": 1.3460898502495842, "grad_norm": 0.2063564509153366, "learning_rate": 6.194294630942006e-06, "loss": 0.5033, "step": 4854 }, { "epoch": 1.34636716583472, "grad_norm": 0.2034982442855835, "learning_rate": 6.1895276566562465e-06, "loss": 0.4578, "step": 4855 }, { "epoch": 1.346644481419856, "grad_norm": 0.19431711733341217, "learning_rate": 6.184761913710829e-06, "loss": 0.4876, "step": 4856 }, { "epoch": 1.3469217970049918, "grad_norm": 0.2182120680809021, "learning_rate": 6.179997403035682e-06, "loss": 0.4799, "step": 4857 }, { "epoch": 1.3471991125901277, "grad_norm": 0.19659878313541412, "learning_rate": 6.175234125560492e-06, "loss": 0.4867, "step": 4858 }, { "epoch": 1.3474764281752636, "grad_norm": 0.21084415912628174, "learning_rate": 6.1704720822146955e-06, "loss": 0.4893, "step": 4859 }, { "epoch": 1.3477537437603995, "grad_norm": 0.20155581831932068, "learning_rate": 6.165711273927488e-06, "loss": 0.5106, "step": 4860 }, { "epoch": 1.3480310593455354, "grad_norm": 0.20054039359092712, "learning_rate": 6.160951701627836e-06, "loss": 0.4754, "step": 4861 }, { "epoch": 1.3483083749306712, "grad_norm": 0.2048387974500656, "learning_rate": 6.15619336624446e-06, "loss": 0.4928, "step": 4862 }, { "epoch": 1.3485856905158071, "grad_norm": 0.19565939903259277, "learning_rate": 6.151436268705831e-06, "loss": 0.4936, "step": 4863 }, { "epoch": 1.348863006100943, "grad_norm": 0.21946604549884796, "learning_rate": 6.1466804099401874e-06, "loss": 0.4924, "step": 4864 }, { "epoch": 1.349140321686079, "grad_norm": 0.20226682722568512, "learning_rate": 6.141925790875529e-06, "loss": 0.4983, "step": 4865 }, { "epoch": 1.3494176372712148, "grad_norm": 0.191081702709198, "learning_rate": 6.137172412439601e-06, "loss": 0.5019, "step": 4866 }, { "epoch": 1.3496949528563507, "grad_norm": 0.1935385912656784, "learning_rate": 6.132420275559912e-06, "loss": 0.4948, "step": 4867 }, { "epoch": 1.3499722684414865, "grad_norm": 0.20531508326530457, "learning_rate": 6.127669381163734e-06, "loss": 0.5112, "step": 4868 }, { "epoch": 1.3502495840266224, "grad_norm": 0.22489938139915466, "learning_rate": 6.122919730178095e-06, "loss": 0.5146, "step": 4869 }, { "epoch": 1.3505268996117583, "grad_norm": 0.20186354219913483, "learning_rate": 6.118171323529774e-06, "loss": 0.4934, "step": 4870 }, { "epoch": 1.3508042151968942, "grad_norm": 0.20553670823574066, "learning_rate": 6.113424162145307e-06, "loss": 0.5131, "step": 4871 }, { "epoch": 1.35108153078203, "grad_norm": 0.20304057002067566, "learning_rate": 6.108678246950994e-06, "loss": 0.5157, "step": 4872 }, { "epoch": 1.351358846367166, "grad_norm": 0.2151353359222412, "learning_rate": 6.103933578872896e-06, "loss": 0.4806, "step": 4873 }, { "epoch": 1.3516361619523019, "grad_norm": 0.1945381909608841, "learning_rate": 6.099190158836816e-06, "loss": 0.5012, "step": 4874 }, { "epoch": 1.3519134775374377, "grad_norm": 0.20316410064697266, "learning_rate": 6.094447987768315e-06, "loss": 0.4882, "step": 4875 }, { "epoch": 1.3521907931225736, "grad_norm": 0.2029489129781723, "learning_rate": 6.08970706659273e-06, "loss": 0.5021, "step": 4876 }, { "epoch": 1.3524681087077095, "grad_norm": 0.21302883327007294, "learning_rate": 6.084967396235136e-06, "loss": 0.527, "step": 4877 }, { "epoch": 1.3527454242928454, "grad_norm": 0.204268679022789, "learning_rate": 6.08022897762036e-06, "loss": 0.521, "step": 4878 }, { "epoch": 1.3530227398779813, "grad_norm": 0.20695167779922485, "learning_rate": 6.0754918116730004e-06, "loss": 0.4753, "step": 4879 }, { "epoch": 1.3533000554631172, "grad_norm": 0.2412513643503189, "learning_rate": 6.070755899317407e-06, "loss": 0.4799, "step": 4880 }, { "epoch": 1.353577371048253, "grad_norm": 0.1893726885318756, "learning_rate": 6.066021241477676e-06, "loss": 0.4823, "step": 4881 }, { "epoch": 1.353854686633389, "grad_norm": 0.20982672274112701, "learning_rate": 6.061287839077661e-06, "loss": 0.5285, "step": 4882 }, { "epoch": 1.3541320022185248, "grad_norm": 0.19612151384353638, "learning_rate": 6.056555693040981e-06, "loss": 0.5243, "step": 4883 }, { "epoch": 1.3544093178036607, "grad_norm": 0.20173022150993347, "learning_rate": 6.051824804291005e-06, "loss": 0.5089, "step": 4884 }, { "epoch": 1.3546866333887966, "grad_norm": 0.20177814364433289, "learning_rate": 6.047095173750846e-06, "loss": 0.508, "step": 4885 }, { "epoch": 1.3549639489739325, "grad_norm": 0.21217969059944153, "learning_rate": 6.042366802343389e-06, "loss": 0.4961, "step": 4886 }, { "epoch": 1.3552412645590683, "grad_norm": 0.21045343577861786, "learning_rate": 6.0376396909912575e-06, "loss": 0.471, "step": 4887 }, { "epoch": 1.3555185801442042, "grad_norm": 0.20939841866493225, "learning_rate": 6.032913840616843e-06, "loss": 0.5066, "step": 4888 }, { "epoch": 1.3557958957293401, "grad_norm": 0.20125700533390045, "learning_rate": 6.028189252142276e-06, "loss": 0.4869, "step": 4889 }, { "epoch": 1.356073211314476, "grad_norm": 0.19696246087551117, "learning_rate": 6.023465926489453e-06, "loss": 0.5013, "step": 4890 }, { "epoch": 1.3563505268996119, "grad_norm": 0.19802086055278778, "learning_rate": 6.018743864580025e-06, "loss": 0.4694, "step": 4891 }, { "epoch": 1.3566278424847478, "grad_norm": 0.19850695133209229, "learning_rate": 6.014023067335382e-06, "loss": 0.5089, "step": 4892 }, { "epoch": 1.3569051580698837, "grad_norm": 0.20037485659122467, "learning_rate": 6.009303535676686e-06, "loss": 0.4823, "step": 4893 }, { "epoch": 1.3571824736550195, "grad_norm": 0.19831885397434235, "learning_rate": 6.004585270524833e-06, "loss": 0.4821, "step": 4894 }, { "epoch": 1.3574597892401554, "grad_norm": 0.20965071022510529, "learning_rate": 5.999868272800492e-06, "loss": 0.4821, "step": 4895 }, { "epoch": 1.3577371048252913, "grad_norm": 0.19735604524612427, "learning_rate": 5.995152543424064e-06, "loss": 0.472, "step": 4896 }, { "epoch": 1.3580144204104272, "grad_norm": 0.20253437757492065, "learning_rate": 5.990438083315721e-06, "loss": 0.5013, "step": 4897 }, { "epoch": 1.358291735995563, "grad_norm": 0.19695337116718292, "learning_rate": 5.985724893395371e-06, "loss": 0.4982, "step": 4898 }, { "epoch": 1.358569051580699, "grad_norm": 0.2138115018606186, "learning_rate": 5.981012974582688e-06, "loss": 0.511, "step": 4899 }, { "epoch": 1.3588463671658348, "grad_norm": 0.20029625296592712, "learning_rate": 5.976302327797096e-06, "loss": 0.5215, "step": 4900 }, { "epoch": 1.3591236827509707, "grad_norm": 0.20940783619880676, "learning_rate": 5.9715929539577595e-06, "loss": 0.5094, "step": 4901 }, { "epoch": 1.3594009983361066, "grad_norm": 0.2135714590549469, "learning_rate": 5.966884853983597e-06, "loss": 0.5108, "step": 4902 }, { "epoch": 1.3596783139212425, "grad_norm": 0.19404084980487823, "learning_rate": 5.9621780287932995e-06, "loss": 0.4732, "step": 4903 }, { "epoch": 1.3599556295063784, "grad_norm": 0.20225512981414795, "learning_rate": 5.957472479305286e-06, "loss": 0.4923, "step": 4904 }, { "epoch": 1.3602329450915143, "grad_norm": 0.20398668944835663, "learning_rate": 5.952768206437727e-06, "loss": 0.478, "step": 4905 }, { "epoch": 1.3605102606766502, "grad_norm": 0.2068692445755005, "learning_rate": 5.9480652111085566e-06, "loss": 0.5464, "step": 4906 }, { "epoch": 1.360787576261786, "grad_norm": 0.20321063697338104, "learning_rate": 5.9433634942354595e-06, "loss": 0.4954, "step": 4907 }, { "epoch": 1.361064891846922, "grad_norm": 0.21151407063007355, "learning_rate": 5.938663056735859e-06, "loss": 0.5176, "step": 4908 }, { "epoch": 1.3613422074320578, "grad_norm": 0.20232000946998596, "learning_rate": 5.93396389952693e-06, "loss": 0.524, "step": 4909 }, { "epoch": 1.3616195230171937, "grad_norm": 0.21282248198986053, "learning_rate": 5.92926602352561e-06, "loss": 0.5043, "step": 4910 }, { "epoch": 1.3618968386023296, "grad_norm": 0.20285063982009888, "learning_rate": 5.92456942964858e-06, "loss": 0.4926, "step": 4911 }, { "epoch": 1.3621741541874655, "grad_norm": 0.26317086815834045, "learning_rate": 5.9198741188122675e-06, "loss": 0.4847, "step": 4912 }, { "epoch": 1.3624514697726013, "grad_norm": 0.20316633582115173, "learning_rate": 5.915180091932843e-06, "loss": 0.521, "step": 4913 }, { "epoch": 1.3627287853577372, "grad_norm": 0.2053111493587494, "learning_rate": 5.910487349926251e-06, "loss": 0.4979, "step": 4914 }, { "epoch": 1.3630061009428731, "grad_norm": 1.0747162103652954, "learning_rate": 5.905795893708166e-06, "loss": 0.4933, "step": 4915 }, { "epoch": 1.363283416528009, "grad_norm": 0.19813428819179535, "learning_rate": 5.901105724194006e-06, "loss": 0.5115, "step": 4916 }, { "epoch": 1.3635607321131449, "grad_norm": 0.2020360231399536, "learning_rate": 5.896416842298953e-06, "loss": 0.4945, "step": 4917 }, { "epoch": 1.3638380476982808, "grad_norm": 0.20236392319202423, "learning_rate": 5.891729248937938e-06, "loss": 0.4844, "step": 4918 }, { "epoch": 1.3641153632834166, "grad_norm": 0.20511947572231293, "learning_rate": 5.8870429450256295e-06, "loss": 0.4872, "step": 4919 }, { "epoch": 1.3643926788685525, "grad_norm": 0.2103184312582016, "learning_rate": 5.882357931476446e-06, "loss": 0.519, "step": 4920 }, { "epoch": 1.3646699944536884, "grad_norm": 0.20535695552825928, "learning_rate": 5.877674209204559e-06, "loss": 0.5072, "step": 4921 }, { "epoch": 1.3649473100388243, "grad_norm": 0.20085355639457703, "learning_rate": 5.872991779123894e-06, "loss": 0.4931, "step": 4922 }, { "epoch": 1.3652246256239602, "grad_norm": 0.19907495379447937, "learning_rate": 5.8683106421481084e-06, "loss": 0.5263, "step": 4923 }, { "epoch": 1.365501941209096, "grad_norm": 0.2058345228433609, "learning_rate": 5.863630799190624e-06, "loss": 0.5022, "step": 4924 }, { "epoch": 1.365779256794232, "grad_norm": 0.19591206312179565, "learning_rate": 5.8589522511645944e-06, "loss": 0.4979, "step": 4925 }, { "epoch": 1.3660565723793678, "grad_norm": 0.212891086935997, "learning_rate": 5.854274998982935e-06, "loss": 0.5211, "step": 4926 }, { "epoch": 1.3663338879645037, "grad_norm": 0.20310524106025696, "learning_rate": 5.8495990435582945e-06, "loss": 0.4923, "step": 4927 }, { "epoch": 1.3666112035496396, "grad_norm": 0.2070825695991516, "learning_rate": 5.844924385803078e-06, "loss": 0.5088, "step": 4928 }, { "epoch": 1.3668885191347755, "grad_norm": 0.1995309591293335, "learning_rate": 5.8402510266294435e-06, "loss": 0.4637, "step": 4929 }, { "epoch": 1.3671658347199114, "grad_norm": 0.19737666845321655, "learning_rate": 5.835578966949276e-06, "loss": 0.5073, "step": 4930 }, { "epoch": 1.3674431503050473, "grad_norm": 0.2089546024799347, "learning_rate": 5.830908207674225e-06, "loss": 0.4981, "step": 4931 }, { "epoch": 1.3677204658901831, "grad_norm": 0.20078176259994507, "learning_rate": 5.826238749715675e-06, "loss": 0.4811, "step": 4932 }, { "epoch": 1.367997781475319, "grad_norm": 0.20612461864948273, "learning_rate": 5.821570593984765e-06, "loss": 0.488, "step": 4933 }, { "epoch": 1.368275097060455, "grad_norm": 0.19565999507904053, "learning_rate": 5.816903741392371e-06, "loss": 0.4613, "step": 4934 }, { "epoch": 1.3685524126455908, "grad_norm": 0.20337355136871338, "learning_rate": 5.812238192849126e-06, "loss": 0.5004, "step": 4935 }, { "epoch": 1.3688297282307267, "grad_norm": 0.1981084942817688, "learning_rate": 5.8075739492653936e-06, "loss": 0.4963, "step": 4936 }, { "epoch": 1.3691070438158626, "grad_norm": 0.21550583839416504, "learning_rate": 5.8029110115512975e-06, "loss": 0.5181, "step": 4937 }, { "epoch": 1.3693843594009985, "grad_norm": 0.2106407731771469, "learning_rate": 5.7982493806167025e-06, "loss": 0.5001, "step": 4938 }, { "epoch": 1.3696616749861343, "grad_norm": 0.20755064487457275, "learning_rate": 5.793589057371214e-06, "loss": 0.4996, "step": 4939 }, { "epoch": 1.3699389905712702, "grad_norm": 0.20589278638362885, "learning_rate": 5.788930042724178e-06, "loss": 0.4731, "step": 4940 }, { "epoch": 1.370216306156406, "grad_norm": 0.19447284936904907, "learning_rate": 5.7842723375846964e-06, "loss": 0.4805, "step": 4941 }, { "epoch": 1.370493621741542, "grad_norm": 0.18845054507255554, "learning_rate": 5.779615942861617e-06, "loss": 0.5183, "step": 4942 }, { "epoch": 1.3707709373266779, "grad_norm": 0.2057630866765976, "learning_rate": 5.774960859463516e-06, "loss": 0.4762, "step": 4943 }, { "epoch": 1.3710482529118138, "grad_norm": 0.20433704555034637, "learning_rate": 5.770307088298728e-06, "loss": 0.5085, "step": 4944 }, { "epoch": 1.3713255684969496, "grad_norm": 0.20006833970546722, "learning_rate": 5.76565463027533e-06, "loss": 0.5217, "step": 4945 }, { "epoch": 1.3716028840820855, "grad_norm": 0.1979144960641861, "learning_rate": 5.761003486301138e-06, "loss": 0.4903, "step": 4946 }, { "epoch": 1.3718801996672214, "grad_norm": 0.20525884628295898, "learning_rate": 5.756353657283707e-06, "loss": 0.5108, "step": 4947 }, { "epoch": 1.3721575152523573, "grad_norm": 0.20061984658241272, "learning_rate": 5.7517051441303486e-06, "loss": 0.5068, "step": 4948 }, { "epoch": 1.3724348308374932, "grad_norm": 0.19711720943450928, "learning_rate": 5.747057947748112e-06, "loss": 0.5206, "step": 4949 }, { "epoch": 1.372712146422629, "grad_norm": 0.20400893688201904, "learning_rate": 5.742412069043786e-06, "loss": 0.4879, "step": 4950 }, { "epoch": 1.372989462007765, "grad_norm": 0.20518803596496582, "learning_rate": 5.737767508923896e-06, "loss": 0.5172, "step": 4951 }, { "epoch": 1.3732667775929008, "grad_norm": 0.20601531863212585, "learning_rate": 5.733124268294734e-06, "loss": 0.535, "step": 4952 }, { "epoch": 1.3735440931780367, "grad_norm": 0.21886709332466125, "learning_rate": 5.728482348062314e-06, "loss": 0.4945, "step": 4953 }, { "epoch": 1.3738214087631726, "grad_norm": 0.2085854411125183, "learning_rate": 5.723841749132395e-06, "loss": 0.5162, "step": 4954 }, { "epoch": 1.3740987243483085, "grad_norm": 0.2051091194152832, "learning_rate": 5.719202472410475e-06, "loss": 0.4917, "step": 4955 }, { "epoch": 1.3743760399334444, "grad_norm": 0.20153647661209106, "learning_rate": 5.714564518801813e-06, "loss": 0.4679, "step": 4956 }, { "epoch": 1.3746533555185803, "grad_norm": 0.20284593105316162, "learning_rate": 5.709927889211391e-06, "loss": 0.4813, "step": 4957 }, { "epoch": 1.3749306711037161, "grad_norm": 0.20113231241703033, "learning_rate": 5.705292584543932e-06, "loss": 0.4958, "step": 4958 }, { "epoch": 1.375207986688852, "grad_norm": 0.21188965439796448, "learning_rate": 5.700658605703912e-06, "loss": 0.5014, "step": 4959 }, { "epoch": 1.375485302273988, "grad_norm": 0.20827704668045044, "learning_rate": 5.696025953595549e-06, "loss": 0.505, "step": 4960 }, { "epoch": 1.3757626178591238, "grad_norm": 0.2128673493862152, "learning_rate": 5.691394629122786e-06, "loss": 0.5019, "step": 4961 }, { "epoch": 1.3760399334442597, "grad_norm": 0.20192976295948029, "learning_rate": 5.686764633189325e-06, "loss": 0.4714, "step": 4962 }, { "epoch": 1.3763172490293956, "grad_norm": 0.20420150458812714, "learning_rate": 5.6821359666985925e-06, "loss": 0.5052, "step": 4963 }, { "epoch": 1.3765945646145314, "grad_norm": 0.19757725298404694, "learning_rate": 5.677508630553774e-06, "loss": 0.5136, "step": 4964 }, { "epoch": 1.3768718801996673, "grad_norm": 0.19914944469928741, "learning_rate": 5.672882625657776e-06, "loss": 0.4925, "step": 4965 }, { "epoch": 1.3771491957848032, "grad_norm": 0.20814815163612366, "learning_rate": 5.668257952913259e-06, "loss": 0.4955, "step": 4966 }, { "epoch": 1.377426511369939, "grad_norm": 0.19726327061653137, "learning_rate": 5.663634613222623e-06, "loss": 0.4675, "step": 4967 }, { "epoch": 1.377703826955075, "grad_norm": 0.20900078117847443, "learning_rate": 5.659012607487994e-06, "loss": 0.4912, "step": 4968 }, { "epoch": 1.3779811425402109, "grad_norm": 0.21374890208244324, "learning_rate": 5.65439193661126e-06, "loss": 0.5166, "step": 4969 }, { "epoch": 1.3782584581253468, "grad_norm": 0.21223044395446777, "learning_rate": 5.649772601494026e-06, "loss": 0.494, "step": 4970 }, { "epoch": 1.3785357737104826, "grad_norm": 0.20285436511039734, "learning_rate": 5.645154603037654e-06, "loss": 0.5161, "step": 4971 }, { "epoch": 1.3788130892956185, "grad_norm": 0.20432451367378235, "learning_rate": 5.64053794214323e-06, "loss": 0.4738, "step": 4972 }, { "epoch": 1.3790904048807544, "grad_norm": 0.20761069655418396, "learning_rate": 5.635922619711598e-06, "loss": 0.5038, "step": 4973 }, { "epoch": 1.3793677204658903, "grad_norm": 0.21306075155735016, "learning_rate": 5.631308636643317e-06, "loss": 0.4948, "step": 4974 }, { "epoch": 1.3796450360510262, "grad_norm": 0.20453637838363647, "learning_rate": 5.626695993838704e-06, "loss": 0.5061, "step": 4975 }, { "epoch": 1.379922351636162, "grad_norm": 0.20153799653053284, "learning_rate": 5.622084692197811e-06, "loss": 0.4923, "step": 4976 }, { "epoch": 1.380199667221298, "grad_norm": 0.2005978673696518, "learning_rate": 5.617474732620423e-06, "loss": 0.4906, "step": 4977 }, { "epoch": 1.3804769828064338, "grad_norm": 0.20294132828712463, "learning_rate": 5.612866116006059e-06, "loss": 0.4977, "step": 4978 }, { "epoch": 1.3807542983915697, "grad_norm": 0.19424274563789368, "learning_rate": 5.608258843253985e-06, "loss": 0.4766, "step": 4979 }, { "epoch": 1.3810316139767056, "grad_norm": 0.20564329624176025, "learning_rate": 5.60365291526321e-06, "loss": 0.5037, "step": 4980 }, { "epoch": 1.3813089295618415, "grad_norm": 0.19803635776042938, "learning_rate": 5.599048332932462e-06, "loss": 0.4728, "step": 4981 }, { "epoch": 1.3815862451469774, "grad_norm": 0.20995113253593445, "learning_rate": 5.594445097160221e-06, "loss": 0.5107, "step": 4982 }, { "epoch": 1.3818635607321132, "grad_norm": 0.20244790613651276, "learning_rate": 5.589843208844707e-06, "loss": 0.4971, "step": 4983 }, { "epoch": 1.3821408763172491, "grad_norm": 0.2114204615354538, "learning_rate": 5.5852426688838625e-06, "loss": 0.5059, "step": 4984 }, { "epoch": 1.382418191902385, "grad_norm": 0.2145579308271408, "learning_rate": 5.580643478175372e-06, "loss": 0.4949, "step": 4985 }, { "epoch": 1.382695507487521, "grad_norm": 0.1978285163640976, "learning_rate": 5.576045637616663e-06, "loss": 0.4941, "step": 4986 }, { "epoch": 1.3829728230726568, "grad_norm": 0.1951082944869995, "learning_rate": 5.571449148104903e-06, "loss": 0.4821, "step": 4987 }, { "epoch": 1.3832501386577927, "grad_norm": 0.4343617260456085, "learning_rate": 5.5668540105369815e-06, "loss": 0.5103, "step": 4988 }, { "epoch": 1.3835274542429286, "grad_norm": 0.19941391050815582, "learning_rate": 5.562260225809524e-06, "loss": 0.4847, "step": 4989 }, { "epoch": 1.3838047698280644, "grad_norm": 0.2054002434015274, "learning_rate": 5.557667794818917e-06, "loss": 0.5059, "step": 4990 }, { "epoch": 1.3840820854132003, "grad_norm": 0.19655835628509521, "learning_rate": 5.5530767184612584e-06, "loss": 0.5045, "step": 4991 }, { "epoch": 1.3843594009983362, "grad_norm": 0.2060847282409668, "learning_rate": 5.548486997632386e-06, "loss": 0.5122, "step": 4992 }, { "epoch": 1.384636716583472, "grad_norm": 0.20054815709590912, "learning_rate": 5.543898633227869e-06, "loss": 0.4945, "step": 4993 }, { "epoch": 1.384914032168608, "grad_norm": 0.20712971687316895, "learning_rate": 5.539311626143034e-06, "loss": 0.4962, "step": 4994 }, { "epoch": 1.3851913477537439, "grad_norm": 0.20592884719371796, "learning_rate": 5.534725977272923e-06, "loss": 0.5046, "step": 4995 }, { "epoch": 1.3854686633388797, "grad_norm": 0.19674254953861237, "learning_rate": 5.530141687512311e-06, "loss": 0.5111, "step": 4996 }, { "epoch": 1.3857459789240156, "grad_norm": 0.20650383830070496, "learning_rate": 5.525558757755716e-06, "loss": 0.4801, "step": 4997 }, { "epoch": 1.3860232945091515, "grad_norm": 0.20828871428966522, "learning_rate": 5.520977188897398e-06, "loss": 0.4849, "step": 4998 }, { "epoch": 1.3863006100942874, "grad_norm": 0.20563003420829773, "learning_rate": 5.516396981831337e-06, "loss": 0.528, "step": 4999 }, { "epoch": 1.3865779256794233, "grad_norm": 0.19634945690631866, "learning_rate": 5.511818137451247e-06, "loss": 0.482, "step": 5000 }, { "epoch": 1.3868552412645592, "grad_norm": 0.19331537187099457, "learning_rate": 5.507240656650586e-06, "loss": 0.5174, "step": 5001 }, { "epoch": 1.387132556849695, "grad_norm": 0.19204163551330566, "learning_rate": 5.502664540322547e-06, "loss": 0.4646, "step": 5002 }, { "epoch": 1.387409872434831, "grad_norm": 0.19761690497398376, "learning_rate": 5.498089789360043e-06, "loss": 0.4901, "step": 5003 }, { "epoch": 1.3876871880199668, "grad_norm": 0.22188632190227509, "learning_rate": 5.493516404655733e-06, "loss": 0.5084, "step": 5004 }, { "epoch": 1.3879645036051027, "grad_norm": 0.20775288343429565, "learning_rate": 5.48894438710201e-06, "loss": 0.4741, "step": 5005 }, { "epoch": 1.3882418191902386, "grad_norm": 0.2092396765947342, "learning_rate": 5.484373737590992e-06, "loss": 0.5163, "step": 5006 }, { "epoch": 1.3885191347753745, "grad_norm": 0.20691785216331482, "learning_rate": 5.479804457014528e-06, "loss": 0.4954, "step": 5007 }, { "epoch": 1.3887964503605104, "grad_norm": 0.20370697975158691, "learning_rate": 5.4752365462642115e-06, "loss": 0.492, "step": 5008 }, { "epoch": 1.3890737659456462, "grad_norm": 0.19262473285198212, "learning_rate": 5.4706700062313686e-06, "loss": 0.5066, "step": 5009 }, { "epoch": 1.3893510815307821, "grad_norm": 0.20191557705402374, "learning_rate": 5.466104837807038e-06, "loss": 0.4935, "step": 5010 }, { "epoch": 1.389628397115918, "grad_norm": 0.2161003202199936, "learning_rate": 5.461541041882021e-06, "loss": 0.5021, "step": 5011 }, { "epoch": 1.389905712701054, "grad_norm": 0.2024545818567276, "learning_rate": 5.456978619346821e-06, "loss": 0.4968, "step": 5012 }, { "epoch": 1.3901830282861898, "grad_norm": 0.19628916680812836, "learning_rate": 5.452417571091699e-06, "loss": 0.5084, "step": 5013 }, { "epoch": 1.3904603438713257, "grad_norm": 0.21482454240322113, "learning_rate": 5.447857898006625e-06, "loss": 0.5142, "step": 5014 }, { "epoch": 1.3907376594564616, "grad_norm": 0.19978967308998108, "learning_rate": 5.4432996009813235e-06, "loss": 0.448, "step": 5015 }, { "epoch": 1.3910149750415974, "grad_norm": 0.21920114755630493, "learning_rate": 5.43874268090523e-06, "loss": 0.5065, "step": 5016 }, { "epoch": 1.3912922906267333, "grad_norm": 0.1980915367603302, "learning_rate": 5.434187138667522e-06, "loss": 0.5073, "step": 5017 }, { "epoch": 1.3915696062118692, "grad_norm": 0.19898997247219086, "learning_rate": 5.429632975157115e-06, "loss": 0.5052, "step": 5018 }, { "epoch": 1.391846921797005, "grad_norm": 0.20365798473358154, "learning_rate": 5.425080191262634e-06, "loss": 0.4903, "step": 5019 }, { "epoch": 1.392124237382141, "grad_norm": 0.21267344057559967, "learning_rate": 5.42052878787246e-06, "loss": 0.5094, "step": 5020 }, { "epoch": 1.3924015529672769, "grad_norm": 0.1986275613307953, "learning_rate": 5.415978765874681e-06, "loss": 0.5039, "step": 5021 }, { "epoch": 1.3926788685524127, "grad_norm": 0.20506128668785095, "learning_rate": 5.411430126157138e-06, "loss": 0.5119, "step": 5022 }, { "epoch": 1.3929561841375486, "grad_norm": 0.20554526150226593, "learning_rate": 5.406882869607381e-06, "loss": 0.4979, "step": 5023 }, { "epoch": 1.3932334997226845, "grad_norm": 0.19483192265033722, "learning_rate": 5.402336997112703e-06, "loss": 0.4768, "step": 5024 }, { "epoch": 1.3935108153078204, "grad_norm": 0.18851692974567413, "learning_rate": 5.397792509560132e-06, "loss": 0.4797, "step": 5025 }, { "epoch": 1.3937881308929563, "grad_norm": 0.19844412803649902, "learning_rate": 5.3932494078364125e-06, "loss": 0.501, "step": 5026 }, { "epoch": 1.3940654464780922, "grad_norm": 0.20119161903858185, "learning_rate": 5.388707692828013e-06, "loss": 0.4795, "step": 5027 }, { "epoch": 1.394342762063228, "grad_norm": 0.1947944462299347, "learning_rate": 5.384167365421161e-06, "loss": 0.4779, "step": 5028 }, { "epoch": 1.394620077648364, "grad_norm": 0.20363706350326538, "learning_rate": 5.379628426501789e-06, "loss": 0.4925, "step": 5029 }, { "epoch": 1.3948973932334998, "grad_norm": 0.21294178068637848, "learning_rate": 5.375090876955559e-06, "loss": 0.4978, "step": 5030 }, { "epoch": 1.3951747088186357, "grad_norm": 0.19777782261371613, "learning_rate": 5.370554717667861e-06, "loss": 0.4888, "step": 5031 }, { "epoch": 1.3954520244037716, "grad_norm": 0.201828733086586, "learning_rate": 5.36601994952384e-06, "loss": 0.5061, "step": 5032 }, { "epoch": 1.3957293399889075, "grad_norm": 0.20095033943653107, "learning_rate": 5.3614865734083365e-06, "loss": 0.4768, "step": 5033 }, { "epoch": 1.3960066555740434, "grad_norm": 0.20809856057167053, "learning_rate": 5.3569545902059285e-06, "loss": 0.5032, "step": 5034 }, { "epoch": 1.3962839711591792, "grad_norm": 0.20099328458309174, "learning_rate": 5.352424000800934e-06, "loss": 0.4971, "step": 5035 }, { "epoch": 1.3965612867443151, "grad_norm": 0.20865300297737122, "learning_rate": 5.34789480607739e-06, "loss": 0.49, "step": 5036 }, { "epoch": 1.396838602329451, "grad_norm": 0.28519347310066223, "learning_rate": 5.3433670069190616e-06, "loss": 0.4885, "step": 5037 }, { "epoch": 1.397115917914587, "grad_norm": 0.19800283014774323, "learning_rate": 5.338840604209438e-06, "loss": 0.5033, "step": 5038 }, { "epoch": 1.3973932334997228, "grad_norm": 0.2006731927394867, "learning_rate": 5.334315598831743e-06, "loss": 0.5022, "step": 5039 }, { "epoch": 1.3976705490848587, "grad_norm": 0.200321763753891, "learning_rate": 5.329791991668931e-06, "loss": 0.4846, "step": 5040 }, { "epoch": 1.3979478646699945, "grad_norm": 0.21275892853736877, "learning_rate": 5.3252697836036675e-06, "loss": 0.503, "step": 5041 }, { "epoch": 1.3982251802551304, "grad_norm": 0.19826599955558777, "learning_rate": 5.320748975518361e-06, "loss": 0.4912, "step": 5042 }, { "epoch": 1.3985024958402663, "grad_norm": 0.20074287056922913, "learning_rate": 5.316229568295143e-06, "loss": 0.5036, "step": 5043 }, { "epoch": 1.3987798114254022, "grad_norm": 0.19842980802059174, "learning_rate": 5.311711562815869e-06, "loss": 0.5017, "step": 5044 }, { "epoch": 1.399057127010538, "grad_norm": 0.19995532929897308, "learning_rate": 5.307194959962112e-06, "loss": 0.4696, "step": 5045 }, { "epoch": 1.399334442595674, "grad_norm": 0.20109152793884277, "learning_rate": 5.302679760615189e-06, "loss": 0.4875, "step": 5046 }, { "epoch": 1.3996117581808099, "grad_norm": 0.19944603741168976, "learning_rate": 5.298165965656139e-06, "loss": 0.485, "step": 5047 }, { "epoch": 1.3998890737659457, "grad_norm": 0.19794021546840668, "learning_rate": 5.293653575965714e-06, "loss": 0.508, "step": 5048 }, { "epoch": 1.4001663893510816, "grad_norm": 0.20664082467556, "learning_rate": 5.2891425924244095e-06, "loss": 0.498, "step": 5049 }, { "epoch": 1.4004437049362175, "grad_norm": 0.2035246342420578, "learning_rate": 5.284633015912428e-06, "loss": 0.5023, "step": 5050 }, { "epoch": 1.4007210205213534, "grad_norm": 0.2174990326166153, "learning_rate": 5.280124847309717e-06, "loss": 0.483, "step": 5051 }, { "epoch": 1.4009983361064893, "grad_norm": 0.1980149745941162, "learning_rate": 5.275618087495932e-06, "loss": 0.4739, "step": 5052 }, { "epoch": 1.4012756516916252, "grad_norm": 0.19647562503814697, "learning_rate": 5.271112737350467e-06, "loss": 0.4788, "step": 5053 }, { "epoch": 1.401552967276761, "grad_norm": 0.1995796263217926, "learning_rate": 5.266608797752429e-06, "loss": 0.4577, "step": 5054 }, { "epoch": 1.401830282861897, "grad_norm": 0.2159973829984665, "learning_rate": 5.26210626958066e-06, "loss": 0.5022, "step": 5055 }, { "epoch": 1.4021075984470328, "grad_norm": 0.20078851282596588, "learning_rate": 5.257605153713727e-06, "loss": 0.4864, "step": 5056 }, { "epoch": 1.4023849140321687, "grad_norm": 0.20937801897525787, "learning_rate": 5.253105451029908e-06, "loss": 0.4945, "step": 5057 }, { "epoch": 1.4026622296173046, "grad_norm": 0.2047879546880722, "learning_rate": 5.248607162407221e-06, "loss": 0.5019, "step": 5058 }, { "epoch": 1.4029395452024405, "grad_norm": 0.19715817272663116, "learning_rate": 5.244110288723396e-06, "loss": 0.492, "step": 5059 }, { "epoch": 1.4032168607875763, "grad_norm": 0.20772488415241241, "learning_rate": 5.2396148308558976e-06, "loss": 0.5004, "step": 5060 }, { "epoch": 1.4034941763727122, "grad_norm": 0.19815978407859802, "learning_rate": 5.235120789681902e-06, "loss": 0.452, "step": 5061 }, { "epoch": 1.4037714919578481, "grad_norm": 0.2069665491580963, "learning_rate": 5.23062816607832e-06, "loss": 0.4955, "step": 5062 }, { "epoch": 1.404048807542984, "grad_norm": 0.20308996737003326, "learning_rate": 5.226136960921786e-06, "loss": 0.5137, "step": 5063 }, { "epoch": 1.4043261231281199, "grad_norm": 0.2034929245710373, "learning_rate": 5.221647175088648e-06, "loss": 0.4617, "step": 5064 }, { "epoch": 1.4046034387132558, "grad_norm": 0.21139517426490784, "learning_rate": 5.217158809454979e-06, "loss": 0.5068, "step": 5065 }, { "epoch": 1.4048807542983917, "grad_norm": 0.21125884354114532, "learning_rate": 5.212671864896581e-06, "loss": 0.537, "step": 5066 }, { "epoch": 1.4051580698835275, "grad_norm": 0.21030350029468536, "learning_rate": 5.208186342288979e-06, "loss": 0.5214, "step": 5067 }, { "epoch": 1.4054353854686634, "grad_norm": 0.19781124591827393, "learning_rate": 5.203702242507416e-06, "loss": 0.4858, "step": 5068 }, { "epoch": 1.4057127010537993, "grad_norm": 0.20567484200000763, "learning_rate": 5.199219566426848e-06, "loss": 0.4904, "step": 5069 }, { "epoch": 1.4059900166389352, "grad_norm": 0.267501562833786, "learning_rate": 5.194738314921982e-06, "loss": 0.4828, "step": 5070 }, { "epoch": 1.406267332224071, "grad_norm": 0.2026844173669815, "learning_rate": 5.1902584888672206e-06, "loss": 0.4974, "step": 5071 }, { "epoch": 1.406544647809207, "grad_norm": 0.20914383232593536, "learning_rate": 5.185780089136691e-06, "loss": 0.4762, "step": 5072 }, { "epoch": 1.4068219633943428, "grad_norm": 0.20183809101581573, "learning_rate": 5.181303116604253e-06, "loss": 0.4782, "step": 5073 }, { "epoch": 1.4070992789794787, "grad_norm": 0.20599482953548431, "learning_rate": 5.176827572143486e-06, "loss": 0.4911, "step": 5074 }, { "epoch": 1.4073765945646146, "grad_norm": 0.1961178332567215, "learning_rate": 5.172353456627683e-06, "loss": 0.4784, "step": 5075 }, { "epoch": 1.4076539101497505, "grad_norm": 0.2111572027206421, "learning_rate": 5.1678807709298605e-06, "loss": 0.4856, "step": 5076 }, { "epoch": 1.4079312257348864, "grad_norm": 0.19795557856559753, "learning_rate": 5.163409515922758e-06, "loss": 0.4855, "step": 5077 }, { "epoch": 1.4082085413200223, "grad_norm": 0.2135738879442215, "learning_rate": 5.158939692478845e-06, "loss": 0.5018, "step": 5078 }, { "epoch": 1.4084858569051582, "grad_norm": 0.2012612372636795, "learning_rate": 5.154471301470294e-06, "loss": 0.4999, "step": 5079 }, { "epoch": 1.408763172490294, "grad_norm": 0.20429253578186035, "learning_rate": 5.150004343769001e-06, "loss": 0.4955, "step": 5080 }, { "epoch": 1.40904048807543, "grad_norm": 0.2064649909734726, "learning_rate": 5.1455388202466025e-06, "loss": 0.5041, "step": 5081 }, { "epoch": 1.4093178036605658, "grad_norm": 0.19653840363025665, "learning_rate": 5.141074731774433e-06, "loss": 0.5023, "step": 5082 }, { "epoch": 1.4095951192457017, "grad_norm": 0.20743335783481598, "learning_rate": 5.13661207922355e-06, "loss": 0.5033, "step": 5083 }, { "epoch": 1.4098724348308376, "grad_norm": 0.21355509757995605, "learning_rate": 5.13215086346474e-06, "loss": 0.5117, "step": 5084 }, { "epoch": 1.4101497504159735, "grad_norm": 0.21359796822071075, "learning_rate": 5.127691085368508e-06, "loss": 0.4959, "step": 5085 }, { "epoch": 1.4104270660011093, "grad_norm": 0.21122997999191284, "learning_rate": 5.123232745805067e-06, "loss": 0.5095, "step": 5086 }, { "epoch": 1.4107043815862452, "grad_norm": 0.20164433121681213, "learning_rate": 5.118775845644365e-06, "loss": 0.5069, "step": 5087 }, { "epoch": 1.410981697171381, "grad_norm": 0.205289825797081, "learning_rate": 5.114320385756052e-06, "loss": 0.5191, "step": 5088 }, { "epoch": 1.411259012756517, "grad_norm": 0.20195280015468597, "learning_rate": 5.109866367009518e-06, "loss": 0.4861, "step": 5089 }, { "epoch": 1.4115363283416529, "grad_norm": 0.2103511095046997, "learning_rate": 5.105413790273848e-06, "loss": 0.5086, "step": 5090 }, { "epoch": 1.4118136439267888, "grad_norm": 0.2056579887866974, "learning_rate": 5.1009626564178685e-06, "loss": 0.5043, "step": 5091 }, { "epoch": 1.4120909595119246, "grad_norm": 0.22724954783916473, "learning_rate": 5.096512966310103e-06, "loss": 0.5088, "step": 5092 }, { "epoch": 1.4123682750970605, "grad_norm": 0.20662304759025574, "learning_rate": 5.0920647208188105e-06, "loss": 0.4832, "step": 5093 }, { "epoch": 1.4126455906821964, "grad_norm": 0.20837345719337463, "learning_rate": 5.087617920811966e-06, "loss": 0.5003, "step": 5094 }, { "epoch": 1.4129229062673323, "grad_norm": 0.20730285346508026, "learning_rate": 5.083172567157246e-06, "loss": 0.4922, "step": 5095 }, { "epoch": 1.4132002218524682, "grad_norm": 0.20552845299243927, "learning_rate": 5.078728660722068e-06, "loss": 0.5003, "step": 5096 }, { "epoch": 1.413477537437604, "grad_norm": 0.21342763304710388, "learning_rate": 5.074286202373547e-06, "loss": 0.5156, "step": 5097 }, { "epoch": 1.41375485302274, "grad_norm": 0.20104481279850006, "learning_rate": 5.069845192978534e-06, "loss": 0.5034, "step": 5098 }, { "epoch": 1.4140321686078758, "grad_norm": 0.1914806067943573, "learning_rate": 5.065405633403576e-06, "loss": 0.474, "step": 5099 }, { "epoch": 1.4143094841930117, "grad_norm": 0.19734273850917816, "learning_rate": 5.060967524514956e-06, "loss": 0.4821, "step": 5100 }, { "epoch": 1.4145867997781476, "grad_norm": 0.2332550585269928, "learning_rate": 5.05653086717867e-06, "loss": 0.4929, "step": 5101 }, { "epoch": 1.4148641153632835, "grad_norm": 0.20123951137065887, "learning_rate": 5.052095662260421e-06, "loss": 0.4948, "step": 5102 }, { "epoch": 1.4151414309484194, "grad_norm": 0.20322628319263458, "learning_rate": 5.047661910625634e-06, "loss": 0.5161, "step": 5103 }, { "epoch": 1.4154187465335553, "grad_norm": 0.2106594741344452, "learning_rate": 5.043229613139454e-06, "loss": 0.4567, "step": 5104 }, { "epoch": 1.4156960621186911, "grad_norm": 0.21148160099983215, "learning_rate": 5.038798770666744e-06, "loss": 0.5119, "step": 5105 }, { "epoch": 1.415973377703827, "grad_norm": 0.20894502103328705, "learning_rate": 5.034369384072075e-06, "loss": 0.4947, "step": 5106 }, { "epoch": 1.416250693288963, "grad_norm": 0.20449240505695343, "learning_rate": 5.029941454219728e-06, "loss": 0.5044, "step": 5107 }, { "epoch": 1.4165280088740988, "grad_norm": 0.21614056825637817, "learning_rate": 5.025514981973728e-06, "loss": 0.4952, "step": 5108 }, { "epoch": 1.4168053244592347, "grad_norm": 0.19919288158416748, "learning_rate": 5.0210899681977865e-06, "loss": 0.5098, "step": 5109 }, { "epoch": 1.4170826400443706, "grad_norm": 0.20138102769851685, "learning_rate": 5.01666641375534e-06, "loss": 0.4817, "step": 5110 }, { "epoch": 1.4173599556295065, "grad_norm": 0.20684263110160828, "learning_rate": 5.0122443195095416e-06, "loss": 0.4934, "step": 5111 }, { "epoch": 1.4176372712146423, "grad_norm": 0.20800793170928955, "learning_rate": 5.007823686323267e-06, "loss": 0.4999, "step": 5112 }, { "epoch": 1.4179145867997782, "grad_norm": 0.20006629824638367, "learning_rate": 5.0034045150590905e-06, "loss": 0.4677, "step": 5113 }, { "epoch": 1.418191902384914, "grad_norm": 0.20559728145599365, "learning_rate": 4.998986806579309e-06, "loss": 0.473, "step": 5114 }, { "epoch": 1.41846921797005, "grad_norm": 0.2054857760667801, "learning_rate": 4.994570561745936e-06, "loss": 0.5104, "step": 5115 }, { "epoch": 1.4187465335551859, "grad_norm": 0.21354462206363678, "learning_rate": 4.990155781420704e-06, "loss": 0.5001, "step": 5116 }, { "epoch": 1.4190238491403218, "grad_norm": 0.1943047046661377, "learning_rate": 4.985742466465047e-06, "loss": 0.4839, "step": 5117 }, { "epoch": 1.4193011647254576, "grad_norm": 0.2034555971622467, "learning_rate": 4.981330617740118e-06, "loss": 0.5107, "step": 5118 }, { "epoch": 1.4195784803105935, "grad_norm": 0.2111237347126007, "learning_rate": 4.9769202361067895e-06, "loss": 0.5202, "step": 5119 }, { "epoch": 1.4198557958957294, "grad_norm": 0.19280794262886047, "learning_rate": 4.972511322425648e-06, "loss": 0.5044, "step": 5120 }, { "epoch": 1.4201331114808653, "grad_norm": 0.198442280292511, "learning_rate": 4.968103877556979e-06, "loss": 0.4788, "step": 5121 }, { "epoch": 1.4204104270660012, "grad_norm": 0.20548267662525177, "learning_rate": 4.963697902360798e-06, "loss": 0.4731, "step": 5122 }, { "epoch": 1.420687742651137, "grad_norm": 0.22825707495212555, "learning_rate": 4.959293397696831e-06, "loss": 0.4998, "step": 5123 }, { "epoch": 1.420965058236273, "grad_norm": 0.20349940657615662, "learning_rate": 4.954890364424508e-06, "loss": 0.5058, "step": 5124 }, { "epoch": 1.4212423738214088, "grad_norm": 0.20583848655223846, "learning_rate": 4.950488803402975e-06, "loss": 0.484, "step": 5125 }, { "epoch": 1.4215196894065447, "grad_norm": 0.20237183570861816, "learning_rate": 4.9460887154910985e-06, "loss": 0.5071, "step": 5126 }, { "epoch": 1.4217970049916806, "grad_norm": 0.20790375769138336, "learning_rate": 4.941690101547454e-06, "loss": 0.4937, "step": 5127 }, { "epoch": 1.4220743205768165, "grad_norm": 0.22605054080486298, "learning_rate": 4.9372929624303205e-06, "loss": 0.4915, "step": 5128 }, { "epoch": 1.4223516361619524, "grad_norm": 0.20419445633888245, "learning_rate": 4.932897298997703e-06, "loss": 0.4946, "step": 5129 }, { "epoch": 1.4226289517470883, "grad_norm": 0.21240122616291046, "learning_rate": 4.928503112107306e-06, "loss": 0.5002, "step": 5130 }, { "epoch": 1.4229062673322241, "grad_norm": 0.2051856964826584, "learning_rate": 4.92411040261656e-06, "loss": 0.4913, "step": 5131 }, { "epoch": 1.42318358291736, "grad_norm": 0.20038112998008728, "learning_rate": 4.919719171382588e-06, "loss": 0.5047, "step": 5132 }, { "epoch": 1.423460898502496, "grad_norm": 0.20497171580791473, "learning_rate": 4.915329419262242e-06, "loss": 0.5028, "step": 5133 }, { "epoch": 1.4237382140876318, "grad_norm": 0.1987651139497757, "learning_rate": 4.910941147112083e-06, "loss": 0.4977, "step": 5134 }, { "epoch": 1.4240155296727677, "grad_norm": 0.28679215908050537, "learning_rate": 4.906554355788369e-06, "loss": 0.4989, "step": 5135 }, { "epoch": 1.4242928452579036, "grad_norm": 0.202947735786438, "learning_rate": 4.90216904614709e-06, "loss": 0.4802, "step": 5136 }, { "epoch": 1.4245701608430394, "grad_norm": 0.2093580663204193, "learning_rate": 4.897785219043927e-06, "loss": 0.4787, "step": 5137 }, { "epoch": 1.4248474764281753, "grad_norm": 0.23357978463172913, "learning_rate": 4.893402875334288e-06, "loss": 0.4944, "step": 5138 }, { "epoch": 1.4251247920133112, "grad_norm": 0.20304684340953827, "learning_rate": 4.889022015873277e-06, "loss": 0.4846, "step": 5139 }, { "epoch": 1.425402107598447, "grad_norm": 0.20766004920005798, "learning_rate": 4.884642641515724e-06, "loss": 0.4871, "step": 5140 }, { "epoch": 1.425679423183583, "grad_norm": 0.20664039254188538, "learning_rate": 4.880264753116153e-06, "loss": 0.481, "step": 5141 }, { "epoch": 1.4259567387687189, "grad_norm": 0.20463590323925018, "learning_rate": 4.875888351528808e-06, "loss": 0.5147, "step": 5142 }, { "epoch": 1.4262340543538548, "grad_norm": 0.20236745476722717, "learning_rate": 4.871513437607648e-06, "loss": 0.4744, "step": 5143 }, { "epoch": 1.4265113699389906, "grad_norm": 0.19818803668022156, "learning_rate": 4.867140012206331e-06, "loss": 0.5014, "step": 5144 }, { "epoch": 1.4267886855241265, "grad_norm": 0.22346088290214539, "learning_rate": 4.86276807617822e-06, "loss": 0.6068, "step": 5145 }, { "epoch": 1.4270660011092624, "grad_norm": 0.21238963305950165, "learning_rate": 4.858397630376402e-06, "loss": 0.4987, "step": 5146 }, { "epoch": 1.4273433166943983, "grad_norm": 0.20392395555973053, "learning_rate": 4.854028675653673e-06, "loss": 0.4896, "step": 5147 }, { "epoch": 1.4276206322795342, "grad_norm": 0.20558500289916992, "learning_rate": 4.849661212862519e-06, "loss": 0.485, "step": 5148 }, { "epoch": 1.42789794786467, "grad_norm": 0.20195691287517548, "learning_rate": 4.8452952428551555e-06, "loss": 0.4898, "step": 5149 }, { "epoch": 1.428175263449806, "grad_norm": 0.20537883043289185, "learning_rate": 4.8409307664835005e-06, "loss": 0.51, "step": 5150 }, { "epoch": 1.4284525790349418, "grad_norm": 0.1957499235868454, "learning_rate": 4.836567784599177e-06, "loss": 0.4805, "step": 5151 }, { "epoch": 1.4287298946200777, "grad_norm": 0.20477133989334106, "learning_rate": 4.832206298053514e-06, "loss": 0.5113, "step": 5152 }, { "epoch": 1.4290072102052136, "grad_norm": 0.20341917872428894, "learning_rate": 4.827846307697556e-06, "loss": 0.4972, "step": 5153 }, { "epoch": 1.4292845257903495, "grad_norm": 0.20808421075344086, "learning_rate": 4.823487814382058e-06, "loss": 0.5044, "step": 5154 }, { "epoch": 1.4295618413754854, "grad_norm": 0.19239382445812225, "learning_rate": 4.819130818957472e-06, "loss": 0.47, "step": 5155 }, { "epoch": 1.4298391569606212, "grad_norm": 0.20638294517993927, "learning_rate": 4.814775322273961e-06, "loss": 0.5008, "step": 5156 }, { "epoch": 1.4301164725457571, "grad_norm": 0.20612825453281403, "learning_rate": 4.8104213251814e-06, "loss": 0.4738, "step": 5157 }, { "epoch": 1.430393788130893, "grad_norm": 0.20103727281093597, "learning_rate": 4.806068828529374e-06, "loss": 0.4854, "step": 5158 }, { "epoch": 1.430671103716029, "grad_norm": 0.20140987634658813, "learning_rate": 4.801717833167162e-06, "loss": 0.498, "step": 5159 }, { "epoch": 1.4309484193011648, "grad_norm": 0.20108608901500702, "learning_rate": 4.797368339943763e-06, "loss": 0.4587, "step": 5160 }, { "epoch": 1.4312257348863007, "grad_norm": 0.20430995523929596, "learning_rate": 4.793020349707883e-06, "loss": 0.502, "step": 5161 }, { "epoch": 1.4315030504714366, "grad_norm": 0.1962215155363083, "learning_rate": 4.7886738633079254e-06, "loss": 0.4873, "step": 5162 }, { "epoch": 1.4317803660565724, "grad_norm": 0.2041066735982895, "learning_rate": 4.784328881592e-06, "loss": 0.4807, "step": 5163 }, { "epoch": 1.4320576816417083, "grad_norm": 0.2137051373720169, "learning_rate": 4.779985405407933e-06, "loss": 0.4924, "step": 5164 }, { "epoch": 1.4323349972268442, "grad_norm": 0.20929081737995148, "learning_rate": 4.775643435603255e-06, "loss": 0.5052, "step": 5165 }, { "epoch": 1.43261231281198, "grad_norm": 0.2020164579153061, "learning_rate": 4.7713029730251925e-06, "loss": 0.5179, "step": 5166 }, { "epoch": 1.432889628397116, "grad_norm": 0.21634337306022644, "learning_rate": 4.766964018520691e-06, "loss": 0.5043, "step": 5167 }, { "epoch": 1.4331669439822519, "grad_norm": 0.20789554715156555, "learning_rate": 4.762626572936389e-06, "loss": 0.4949, "step": 5168 }, { "epoch": 1.4334442595673877, "grad_norm": 0.20186547935009003, "learning_rate": 4.7582906371186435e-06, "loss": 0.4865, "step": 5169 }, { "epoch": 1.4337215751525236, "grad_norm": 0.20182502269744873, "learning_rate": 4.753956211913504e-06, "loss": 0.4684, "step": 5170 }, { "epoch": 1.4339988907376595, "grad_norm": 0.20346789062023163, "learning_rate": 4.749623298166736e-06, "loss": 0.498, "step": 5171 }, { "epoch": 1.4342762063227954, "grad_norm": 0.21296192705631256, "learning_rate": 4.745291896723808e-06, "loss": 0.4982, "step": 5172 }, { "epoch": 1.4345535219079313, "grad_norm": 0.21046607196331024, "learning_rate": 4.740962008429885e-06, "loss": 0.5046, "step": 5173 }, { "epoch": 1.4348308374930672, "grad_norm": 0.20590196549892426, "learning_rate": 4.736633634129849e-06, "loss": 0.4919, "step": 5174 }, { "epoch": 1.435108153078203, "grad_norm": 0.18597187101840973, "learning_rate": 4.732306774668274e-06, "loss": 0.492, "step": 5175 }, { "epoch": 1.435385468663339, "grad_norm": 0.20251090824604034, "learning_rate": 4.727981430889452e-06, "loss": 0.534, "step": 5176 }, { "epoch": 1.4356627842484748, "grad_norm": 0.19426211714744568, "learning_rate": 4.723657603637364e-06, "loss": 0.4626, "step": 5177 }, { "epoch": 1.4359400998336107, "grad_norm": 0.20881786942481995, "learning_rate": 4.7193352937557125e-06, "loss": 0.4843, "step": 5178 }, { "epoch": 1.4362174154187466, "grad_norm": 0.19851280748844147, "learning_rate": 4.7150145020878865e-06, "loss": 0.5019, "step": 5179 }, { "epoch": 1.4364947310038825, "grad_norm": 0.2078125774860382, "learning_rate": 4.71069522947699e-06, "loss": 0.4671, "step": 5180 }, { "epoch": 1.4367720465890184, "grad_norm": 0.20463737845420837, "learning_rate": 4.706377476765832e-06, "loss": 0.5038, "step": 5181 }, { "epoch": 1.4370493621741542, "grad_norm": 0.20316970348358154, "learning_rate": 4.702061244796916e-06, "loss": 0.4726, "step": 5182 }, { "epoch": 1.4373266777592901, "grad_norm": 0.1965150386095047, "learning_rate": 4.69774653441245e-06, "loss": 0.4753, "step": 5183 }, { "epoch": 1.437603993344426, "grad_norm": 0.19861292839050293, "learning_rate": 4.693433346454352e-06, "loss": 0.491, "step": 5184 }, { "epoch": 1.437881308929562, "grad_norm": 0.2055879682302475, "learning_rate": 4.689121681764243e-06, "loss": 0.496, "step": 5185 }, { "epoch": 1.4381586245146978, "grad_norm": 0.20754766464233398, "learning_rate": 4.684811541183436e-06, "loss": 0.5039, "step": 5186 }, { "epoch": 1.4384359400998337, "grad_norm": 0.2045608013868332, "learning_rate": 4.680502925552956e-06, "loss": 0.4964, "step": 5187 }, { "epoch": 1.4387132556849695, "grad_norm": 0.20918679237365723, "learning_rate": 4.676195835713533e-06, "loss": 0.505, "step": 5188 }, { "epoch": 1.4389905712701054, "grad_norm": 0.21245136857032776, "learning_rate": 4.67189027250559e-06, "loss": 0.4968, "step": 5189 }, { "epoch": 1.4392678868552413, "grad_norm": 0.30057552456855774, "learning_rate": 4.667586236769253e-06, "loss": 0.4925, "step": 5190 }, { "epoch": 1.4395452024403772, "grad_norm": 0.2126583307981491, "learning_rate": 4.6632837293443575e-06, "loss": 0.4906, "step": 5191 }, { "epoch": 1.439822518025513, "grad_norm": 0.20761823654174805, "learning_rate": 4.65898275107044e-06, "loss": 0.507, "step": 5192 }, { "epoch": 1.440099833610649, "grad_norm": 0.19843849539756775, "learning_rate": 4.65468330278673e-06, "loss": 0.4824, "step": 5193 }, { "epoch": 1.4403771491957849, "grad_norm": 0.2050592452287674, "learning_rate": 4.650385385332163e-06, "loss": 0.4696, "step": 5194 }, { "epoch": 1.4406544647809207, "grad_norm": 0.20630647242069244, "learning_rate": 4.646088999545378e-06, "loss": 0.514, "step": 5195 }, { "epoch": 1.4409317803660566, "grad_norm": 0.20964011549949646, "learning_rate": 4.6417941462647206e-06, "loss": 0.5106, "step": 5196 }, { "epoch": 1.4412090959511925, "grad_norm": 0.20389756560325623, "learning_rate": 4.637500826328223e-06, "loss": 0.5036, "step": 5197 }, { "epoch": 1.4414864115363284, "grad_norm": 0.21767324209213257, "learning_rate": 4.633209040573619e-06, "loss": 0.5197, "step": 5198 }, { "epoch": 1.4417637271214643, "grad_norm": 0.22654587030410767, "learning_rate": 4.628918789838367e-06, "loss": 0.5067, "step": 5199 }, { "epoch": 1.4420410427066002, "grad_norm": 0.19964653253555298, "learning_rate": 4.624630074959599e-06, "loss": 0.4778, "step": 5200 }, { "epoch": 1.442318358291736, "grad_norm": 0.20028294622898102, "learning_rate": 4.620342896774152e-06, "loss": 0.4896, "step": 5201 }, { "epoch": 1.442595673876872, "grad_norm": 0.20315535366535187, "learning_rate": 4.616057256118575e-06, "loss": 0.495, "step": 5202 }, { "epoch": 1.4428729894620078, "grad_norm": 0.23156633973121643, "learning_rate": 4.611773153829111e-06, "loss": 0.4772, "step": 5203 }, { "epoch": 1.4431503050471437, "grad_norm": 0.22991783916950226, "learning_rate": 4.607490590741702e-06, "loss": 0.5134, "step": 5204 }, { "epoch": 1.4434276206322796, "grad_norm": 0.20309005677700043, "learning_rate": 4.603209567691979e-06, "loss": 0.4889, "step": 5205 }, { "epoch": 1.4437049362174155, "grad_norm": 0.20435002446174622, "learning_rate": 4.598930085515293e-06, "loss": 0.4943, "step": 5206 }, { "epoch": 1.4439822518025514, "grad_norm": 0.19982177019119263, "learning_rate": 4.594652145046688e-06, "loss": 0.4995, "step": 5207 }, { "epoch": 1.4442595673876872, "grad_norm": 0.22092601656913757, "learning_rate": 4.5903757471208914e-06, "loss": 0.5108, "step": 5208 }, { "epoch": 1.4445368829728231, "grad_norm": 0.2120787650346756, "learning_rate": 4.586100892572352e-06, "loss": 0.48, "step": 5209 }, { "epoch": 1.444814198557959, "grad_norm": 0.209842249751091, "learning_rate": 4.5818275822352e-06, "loss": 0.4888, "step": 5210 }, { "epoch": 1.445091514143095, "grad_norm": 0.1947280764579773, "learning_rate": 4.577555816943279e-06, "loss": 0.5057, "step": 5211 }, { "epoch": 1.4453688297282308, "grad_norm": 0.20627637207508087, "learning_rate": 4.573285597530114e-06, "loss": 0.5058, "step": 5212 }, { "epoch": 1.4456461453133667, "grad_norm": 0.2139754295349121, "learning_rate": 4.569016924828945e-06, "loss": 0.4867, "step": 5213 }, { "epoch": 1.4459234608985025, "grad_norm": 0.21095266938209534, "learning_rate": 4.564749799672705e-06, "loss": 0.5132, "step": 5214 }, { "epoch": 1.4462007764836384, "grad_norm": 0.21572549641132355, "learning_rate": 4.560484222894014e-06, "loss": 0.4752, "step": 5215 }, { "epoch": 1.4464780920687743, "grad_norm": 0.21467210352420807, "learning_rate": 4.55622019532521e-06, "loss": 0.5311, "step": 5216 }, { "epoch": 1.4467554076539102, "grad_norm": 0.21415258944034576, "learning_rate": 4.551957717798308e-06, "loss": 0.4949, "step": 5217 }, { "epoch": 1.447032723239046, "grad_norm": 0.2082109898328781, "learning_rate": 4.5476967911450345e-06, "loss": 0.501, "step": 5218 }, { "epoch": 1.447310038824182, "grad_norm": 0.22765327990055084, "learning_rate": 4.543437416196814e-06, "loss": 0.4966, "step": 5219 }, { "epoch": 1.4475873544093179, "grad_norm": 0.20551997423171997, "learning_rate": 4.539179593784758e-06, "loss": 0.4974, "step": 5220 }, { "epoch": 1.4478646699944537, "grad_norm": 0.20596256852149963, "learning_rate": 4.534923324739677e-06, "loss": 0.4995, "step": 5221 }, { "epoch": 1.4481419855795896, "grad_norm": 0.1983633041381836, "learning_rate": 4.530668609892087e-06, "loss": 0.4959, "step": 5222 }, { "epoch": 1.4484193011647255, "grad_norm": 0.19731079041957855, "learning_rate": 4.526415450072198e-06, "loss": 0.4974, "step": 5223 }, { "epoch": 1.4486966167498614, "grad_norm": 0.21283119916915894, "learning_rate": 4.522163846109908e-06, "loss": 0.521, "step": 5224 }, { "epoch": 1.4489739323349973, "grad_norm": 0.21046127378940582, "learning_rate": 4.517913798834818e-06, "loss": 0.483, "step": 5225 }, { "epoch": 1.4492512479201332, "grad_norm": 0.22251440584659576, "learning_rate": 4.513665309076233e-06, "loss": 0.4978, "step": 5226 }, { "epoch": 1.449528563505269, "grad_norm": 0.20175869762897491, "learning_rate": 4.50941837766314e-06, "loss": 0.5356, "step": 5227 }, { "epoch": 1.449805879090405, "grad_norm": 0.21276865899562836, "learning_rate": 4.505173005424224e-06, "loss": 0.4813, "step": 5228 }, { "epoch": 1.4500831946755408, "grad_norm": 0.20545652508735657, "learning_rate": 4.500929193187872e-06, "loss": 0.4697, "step": 5229 }, { "epoch": 1.4503605102606767, "grad_norm": 0.21193096041679382, "learning_rate": 4.496686941782172e-06, "loss": 0.5156, "step": 5230 }, { "epoch": 1.4506378258458126, "grad_norm": 0.2051508128643036, "learning_rate": 4.492446252034893e-06, "loss": 0.5015, "step": 5231 }, { "epoch": 1.4509151414309485, "grad_norm": 0.2122107744216919, "learning_rate": 4.488207124773501e-06, "loss": 0.4912, "step": 5232 }, { "epoch": 1.4511924570160843, "grad_norm": 0.19480577111244202, "learning_rate": 4.483969560825169e-06, "loss": 0.4876, "step": 5233 }, { "epoch": 1.4514697726012202, "grad_norm": 0.20153845846652985, "learning_rate": 4.479733561016759e-06, "loss": 0.5092, "step": 5234 }, { "epoch": 1.4517470881863561, "grad_norm": 0.21945208311080933, "learning_rate": 4.475499126174826e-06, "loss": 0.5147, "step": 5235 }, { "epoch": 1.452024403771492, "grad_norm": 0.215095654129982, "learning_rate": 4.471266257125609e-06, "loss": 0.5222, "step": 5236 }, { "epoch": 1.4523017193566279, "grad_norm": 0.20627257227897644, "learning_rate": 4.467034954695071e-06, "loss": 0.4668, "step": 5237 }, { "epoch": 1.4525790349417638, "grad_norm": 0.2098441869020462, "learning_rate": 4.462805219708843e-06, "loss": 0.4668, "step": 5238 }, { "epoch": 1.4528563505268997, "grad_norm": 0.22378282248973846, "learning_rate": 4.4585770529922535e-06, "loss": 0.4728, "step": 5239 }, { "epoch": 1.4531336661120355, "grad_norm": 0.21106509864330292, "learning_rate": 4.454350455370334e-06, "loss": 0.5006, "step": 5240 }, { "epoch": 1.4534109816971714, "grad_norm": 0.21677586436271667, "learning_rate": 4.450125427667812e-06, "loss": 0.4977, "step": 5241 }, { "epoch": 1.4536882972823073, "grad_norm": 0.2078382968902588, "learning_rate": 4.4459019707090956e-06, "loss": 0.5159, "step": 5242 }, { "epoch": 1.4539656128674432, "grad_norm": 0.19706477224826813, "learning_rate": 4.441680085318289e-06, "loss": 0.4935, "step": 5243 }, { "epoch": 1.454242928452579, "grad_norm": 0.19929809868335724, "learning_rate": 4.437459772319201e-06, "loss": 0.4725, "step": 5244 }, { "epoch": 1.454520244037715, "grad_norm": 0.19897820055484772, "learning_rate": 4.4332410325353265e-06, "loss": 0.4693, "step": 5245 }, { "epoch": 1.4547975596228508, "grad_norm": 0.19526724517345428, "learning_rate": 4.429023866789848e-06, "loss": 0.4856, "step": 5246 }, { "epoch": 1.4550748752079867, "grad_norm": 0.2073434442281723, "learning_rate": 4.424808275905654e-06, "loss": 0.4909, "step": 5247 }, { "epoch": 1.4553521907931226, "grad_norm": 0.2015712559223175, "learning_rate": 4.420594260705309e-06, "loss": 0.4805, "step": 5248 }, { "epoch": 1.4556295063782585, "grad_norm": 0.20347121357917786, "learning_rate": 4.416381822011087e-06, "loss": 0.5058, "step": 5249 }, { "epoch": 1.4559068219633944, "grad_norm": 0.20566286146640778, "learning_rate": 4.412170960644939e-06, "loss": 0.4688, "step": 5250 }, { "epoch": 1.4561841375485303, "grad_norm": 0.2034814953804016, "learning_rate": 4.407961677428521e-06, "loss": 0.5307, "step": 5251 }, { "epoch": 1.4564614531336662, "grad_norm": 0.21038807928562164, "learning_rate": 4.403753973183177e-06, "loss": 0.5287, "step": 5252 }, { "epoch": 1.456738768718802, "grad_norm": 0.2552982270717621, "learning_rate": 4.399547848729935e-06, "loss": 0.4838, "step": 5253 }, { "epoch": 1.457016084303938, "grad_norm": 0.194267138838768, "learning_rate": 4.395343304889529e-06, "loss": 0.4981, "step": 5254 }, { "epoch": 1.4572933998890738, "grad_norm": 0.20130032300949097, "learning_rate": 4.391140342482369e-06, "loss": 0.4863, "step": 5255 }, { "epoch": 1.4575707154742097, "grad_norm": 0.21114356815814972, "learning_rate": 4.3869389623285725e-06, "loss": 0.511, "step": 5256 }, { "epoch": 1.4578480310593456, "grad_norm": 0.19720643758773804, "learning_rate": 4.382739165247933e-06, "loss": 0.4973, "step": 5257 }, { "epoch": 1.4581253466444815, "grad_norm": 0.1979367434978485, "learning_rate": 4.378540952059948e-06, "loss": 0.5119, "step": 5258 }, { "epoch": 1.4584026622296173, "grad_norm": 0.1983027458190918, "learning_rate": 4.374344323583793e-06, "loss": 0.5073, "step": 5259 }, { "epoch": 1.4586799778147532, "grad_norm": 0.20961186289787292, "learning_rate": 4.370149280638347e-06, "loss": 0.5181, "step": 5260 }, { "epoch": 1.458957293399889, "grad_norm": 0.1928398460149765, "learning_rate": 4.3659558240421755e-06, "loss": 0.504, "step": 5261 }, { "epoch": 1.459234608985025, "grad_norm": 0.20917226374149323, "learning_rate": 4.361763954613526e-06, "loss": 0.5018, "step": 5262 }, { "epoch": 1.4595119245701609, "grad_norm": 0.19321578741073608, "learning_rate": 4.357573673170352e-06, "loss": 0.4699, "step": 5263 }, { "epoch": 1.4597892401552968, "grad_norm": 0.19718189537525177, "learning_rate": 4.35338498053028e-06, "loss": 0.4754, "step": 5264 }, { "epoch": 1.4600665557404326, "grad_norm": 0.20244868099689484, "learning_rate": 4.349197877510643e-06, "loss": 0.4869, "step": 5265 }, { "epoch": 1.4603438713255685, "grad_norm": 0.20539158582687378, "learning_rate": 4.345012364928447e-06, "loss": 0.5209, "step": 5266 }, { "epoch": 1.4606211869107044, "grad_norm": 0.19396114349365234, "learning_rate": 4.340828443600401e-06, "loss": 0.4901, "step": 5267 }, { "epoch": 1.4608985024958403, "grad_norm": 0.21171221137046814, "learning_rate": 4.336646114342903e-06, "loss": 0.49, "step": 5268 }, { "epoch": 1.4611758180809762, "grad_norm": 0.20075739920139313, "learning_rate": 4.332465377972031e-06, "loss": 0.5127, "step": 5269 }, { "epoch": 1.461453133666112, "grad_norm": 0.2100810706615448, "learning_rate": 4.328286235303555e-06, "loss": 0.506, "step": 5270 }, { "epoch": 1.461730449251248, "grad_norm": 0.224534273147583, "learning_rate": 4.324108687152941e-06, "loss": 0.5033, "step": 5271 }, { "epoch": 1.4620077648363838, "grad_norm": 0.20113049447536469, "learning_rate": 4.3199327343353415e-06, "loss": 0.4867, "step": 5272 }, { "epoch": 1.4622850804215197, "grad_norm": 0.20711010694503784, "learning_rate": 4.315758377665592e-06, "loss": 0.4818, "step": 5273 }, { "epoch": 1.4625623960066556, "grad_norm": 0.20650602877140045, "learning_rate": 4.311585617958214e-06, "loss": 0.4794, "step": 5274 }, { "epoch": 1.4628397115917915, "grad_norm": 0.19701525568962097, "learning_rate": 4.307414456027437e-06, "loss": 0.5037, "step": 5275 }, { "epoch": 1.4631170271769274, "grad_norm": 0.2053639143705368, "learning_rate": 4.303244892687157e-06, "loss": 0.4924, "step": 5276 }, { "epoch": 1.4633943427620633, "grad_norm": 0.20747579634189606, "learning_rate": 4.299076928750964e-06, "loss": 0.4847, "step": 5277 }, { "epoch": 1.4636716583471991, "grad_norm": 0.19542749226093292, "learning_rate": 4.294910565032143e-06, "loss": 0.4799, "step": 5278 }, { "epoch": 1.463948973932335, "grad_norm": 0.20481598377227783, "learning_rate": 4.290745802343663e-06, "loss": 0.4998, "step": 5279 }, { "epoch": 1.464226289517471, "grad_norm": 0.20202511548995972, "learning_rate": 4.286582641498177e-06, "loss": 0.5196, "step": 5280 }, { "epoch": 1.4645036051026068, "grad_norm": 0.20724746584892273, "learning_rate": 4.282421083308024e-06, "loss": 0.4854, "step": 5281 }, { "epoch": 1.4647809206877427, "grad_norm": 0.22080543637275696, "learning_rate": 4.2782611285852386e-06, "loss": 0.5355, "step": 5282 }, { "epoch": 1.4650582362728786, "grad_norm": 0.20695441961288452, "learning_rate": 4.274102778141542e-06, "loss": 0.4853, "step": 5283 }, { "epoch": 1.4653355518580145, "grad_norm": 0.20421764254570007, "learning_rate": 4.26994603278833e-06, "loss": 0.4865, "step": 5284 }, { "epoch": 1.4656128674431503, "grad_norm": 0.19895236194133759, "learning_rate": 4.265790893336702e-06, "loss": 0.5041, "step": 5285 }, { "epoch": 1.4658901830282862, "grad_norm": 0.20470494031906128, "learning_rate": 4.261637360597428e-06, "loss": 0.4982, "step": 5286 }, { "epoch": 1.466167498613422, "grad_norm": 0.20590339601039886, "learning_rate": 4.257485435380981e-06, "loss": 0.4576, "step": 5287 }, { "epoch": 1.466444814198558, "grad_norm": 0.20538951456546783, "learning_rate": 4.253335118497503e-06, "loss": 0.513, "step": 5288 }, { "epoch": 1.4667221297836939, "grad_norm": 0.19989731907844543, "learning_rate": 4.249186410756834e-06, "loss": 0.4728, "step": 5289 }, { "epoch": 1.4669994453688298, "grad_norm": 0.19829894602298737, "learning_rate": 4.245039312968502e-06, "loss": 0.5188, "step": 5290 }, { "epoch": 1.4672767609539656, "grad_norm": 0.2066100686788559, "learning_rate": 4.240893825941707e-06, "loss": 0.5106, "step": 5291 }, { "epoch": 1.4675540765391015, "grad_norm": 0.2136521190404892, "learning_rate": 4.236749950485351e-06, "loss": 0.4995, "step": 5292 }, { "epoch": 1.4678313921242374, "grad_norm": 0.2011151909828186, "learning_rate": 4.232607687408007e-06, "loss": 0.4716, "step": 5293 }, { "epoch": 1.4681087077093733, "grad_norm": 0.20019252598285675, "learning_rate": 4.228467037517945e-06, "loss": 0.5025, "step": 5294 }, { "epoch": 1.4683860232945092, "grad_norm": 0.21552008390426636, "learning_rate": 4.224328001623114e-06, "loss": 0.4994, "step": 5295 }, { "epoch": 1.468663338879645, "grad_norm": 0.2044316530227661, "learning_rate": 4.2201905805311515e-06, "loss": 0.4901, "step": 5296 }, { "epoch": 1.468940654464781, "grad_norm": 0.22182868421077728, "learning_rate": 4.216054775049372e-06, "loss": 0.533, "step": 5297 }, { "epoch": 1.4692179700499168, "grad_norm": 0.20859482884407043, "learning_rate": 4.211920585984786e-06, "loss": 0.5134, "step": 5298 }, { "epoch": 1.4694952856350527, "grad_norm": 0.2023824155330658, "learning_rate": 4.207788014144084e-06, "loss": 0.5222, "step": 5299 }, { "epoch": 1.4697726012201886, "grad_norm": 0.23869916796684265, "learning_rate": 4.203657060333641e-06, "loss": 0.5025, "step": 5300 }, { "epoch": 1.4700499168053245, "grad_norm": 0.23270224034786224, "learning_rate": 4.199527725359508e-06, "loss": 0.5166, "step": 5301 }, { "epoch": 1.4703272323904604, "grad_norm": 0.22280660271644592, "learning_rate": 4.195400010027432e-06, "loss": 0.4891, "step": 5302 }, { "epoch": 1.4706045479755963, "grad_norm": 0.21119210124015808, "learning_rate": 4.191273915142846e-06, "loss": 0.5021, "step": 5303 }, { "epoch": 1.4708818635607321, "grad_norm": 0.21453891694545746, "learning_rate": 4.187149441510848e-06, "loss": 0.52, "step": 5304 }, { "epoch": 1.471159179145868, "grad_norm": 0.20360562205314636, "learning_rate": 4.183026589936241e-06, "loss": 0.5032, "step": 5305 }, { "epoch": 1.471436494731004, "grad_norm": 0.20575298368930817, "learning_rate": 4.178905361223505e-06, "loss": 0.4982, "step": 5306 }, { "epoch": 1.4717138103161398, "grad_norm": 0.20205079019069672, "learning_rate": 4.174785756176794e-06, "loss": 0.5021, "step": 5307 }, { "epoch": 1.4719911259012757, "grad_norm": 0.22563353180885315, "learning_rate": 4.170667775599951e-06, "loss": 0.4909, "step": 5308 }, { "epoch": 1.4722684414864116, "grad_norm": 0.19961145520210266, "learning_rate": 4.166551420296508e-06, "loss": 0.5033, "step": 5309 }, { "epoch": 1.4725457570715474, "grad_norm": 0.20603464543819427, "learning_rate": 4.162436691069676e-06, "loss": 0.4849, "step": 5310 }, { "epoch": 1.4728230726566833, "grad_norm": 0.20677094161510468, "learning_rate": 4.1583235887223455e-06, "loss": 0.481, "step": 5311 }, { "epoch": 1.4731003882418192, "grad_norm": 0.20227313041687012, "learning_rate": 4.154212114057082e-06, "loss": 0.5013, "step": 5312 }, { "epoch": 1.473377703826955, "grad_norm": 0.2114361971616745, "learning_rate": 4.150102267876163e-06, "loss": 0.4746, "step": 5313 }, { "epoch": 1.473655019412091, "grad_norm": 0.20628516376018524, "learning_rate": 4.145994050981516e-06, "loss": 0.5033, "step": 5314 }, { "epoch": 1.4739323349972269, "grad_norm": 0.1993936002254486, "learning_rate": 4.141887464174763e-06, "loss": 0.4915, "step": 5315 }, { "epoch": 1.4742096505823628, "grad_norm": 0.200067400932312, "learning_rate": 4.137782508257207e-06, "loss": 0.4849, "step": 5316 }, { "epoch": 1.4744869661674986, "grad_norm": 0.20835205912590027, "learning_rate": 4.1336791840298425e-06, "loss": 0.5053, "step": 5317 }, { "epoch": 1.4747642817526345, "grad_norm": 0.207503080368042, "learning_rate": 4.1295774922933295e-06, "loss": 0.4963, "step": 5318 }, { "epoch": 1.4750415973377704, "grad_norm": 0.20725563168525696, "learning_rate": 4.125477433848014e-06, "loss": 0.5063, "step": 5319 }, { "epoch": 1.4753189129229063, "grad_norm": 0.20372618734836578, "learning_rate": 4.121379009493931e-06, "loss": 0.4971, "step": 5320 }, { "epoch": 1.4755962285080422, "grad_norm": 0.20692381262779236, "learning_rate": 4.117282220030794e-06, "loss": 0.5218, "step": 5321 }, { "epoch": 1.475873544093178, "grad_norm": 0.19848695397377014, "learning_rate": 4.113187066257991e-06, "loss": 0.5201, "step": 5322 }, { "epoch": 1.476150859678314, "grad_norm": 0.20803263783454895, "learning_rate": 4.109093548974592e-06, "loss": 0.5094, "step": 5323 }, { "epoch": 1.4764281752634498, "grad_norm": 0.20934459567070007, "learning_rate": 4.105001668979355e-06, "loss": 0.4812, "step": 5324 }, { "epoch": 1.4767054908485857, "grad_norm": 0.22413517534732819, "learning_rate": 4.100911427070718e-06, "loss": 0.5026, "step": 5325 }, { "epoch": 1.4769828064337216, "grad_norm": 0.19864612817764282, "learning_rate": 4.096822824046787e-06, "loss": 0.5218, "step": 5326 }, { "epoch": 1.4772601220188575, "grad_norm": 0.20662501454353333, "learning_rate": 4.09273586070536e-06, "loss": 0.5018, "step": 5327 }, { "epoch": 1.4775374376039934, "grad_norm": 0.20286211371421814, "learning_rate": 4.088650537843919e-06, "loss": 0.4921, "step": 5328 }, { "epoch": 1.4778147531891292, "grad_norm": 0.21929144859313965, "learning_rate": 4.084566856259611e-06, "loss": 0.483, "step": 5329 }, { "epoch": 1.4780920687742651, "grad_norm": 0.20432603359222412, "learning_rate": 4.080484816749268e-06, "loss": 0.4813, "step": 5330 }, { "epoch": 1.478369384359401, "grad_norm": 0.19982703030109406, "learning_rate": 4.076404420109409e-06, "loss": 0.4792, "step": 5331 }, { "epoch": 1.478646699944537, "grad_norm": 0.20570825040340424, "learning_rate": 4.072325667136228e-06, "loss": 0.4854, "step": 5332 }, { "epoch": 1.4789240155296728, "grad_norm": 0.2098425328731537, "learning_rate": 4.068248558625595e-06, "loss": 0.484, "step": 5333 }, { "epoch": 1.4792013311148087, "grad_norm": 0.20449112355709076, "learning_rate": 4.064173095373067e-06, "loss": 0.5152, "step": 5334 }, { "epoch": 1.4794786466999446, "grad_norm": 0.2116931974887848, "learning_rate": 4.060099278173867e-06, "loss": 0.4783, "step": 5335 }, { "epoch": 1.4797559622850804, "grad_norm": 0.1945703774690628, "learning_rate": 4.056027107822911e-06, "loss": 0.4861, "step": 5336 }, { "epoch": 1.4800332778702163, "grad_norm": 0.20104999840259552, "learning_rate": 4.051956585114783e-06, "loss": 0.4833, "step": 5337 }, { "epoch": 1.4803105934553522, "grad_norm": 0.2020789533853531, "learning_rate": 4.047887710843756e-06, "loss": 0.4986, "step": 5338 }, { "epoch": 1.480587909040488, "grad_norm": 0.2103864550590515, "learning_rate": 4.043820485803766e-06, "loss": 0.4997, "step": 5339 }, { "epoch": 1.480865224625624, "grad_norm": 0.21654918789863586, "learning_rate": 4.039754910788442e-06, "loss": 0.4647, "step": 5340 }, { "epoch": 1.4811425402107599, "grad_norm": 0.2058803290128708, "learning_rate": 4.0356909865910895e-06, "loss": 0.5266, "step": 5341 }, { "epoch": 1.4814198557958957, "grad_norm": 0.20043110847473145, "learning_rate": 4.031628714004678e-06, "loss": 0.514, "step": 5342 }, { "epoch": 1.4816971713810316, "grad_norm": 0.2056841254234314, "learning_rate": 4.0275680938218705e-06, "loss": 0.5026, "step": 5343 }, { "epoch": 1.4819744869661675, "grad_norm": 0.2606765627861023, "learning_rate": 4.023509126835004e-06, "loss": 0.4976, "step": 5344 }, { "epoch": 1.4822518025513034, "grad_norm": 0.20417386293411255, "learning_rate": 4.019451813836088e-06, "loss": 0.5086, "step": 5345 }, { "epoch": 1.4825291181364393, "grad_norm": 0.21264471113681793, "learning_rate": 4.015396155616806e-06, "loss": 0.4835, "step": 5346 }, { "epoch": 1.4828064337215752, "grad_norm": 0.19947127997875214, "learning_rate": 4.011342152968531e-06, "loss": 0.4885, "step": 5347 }, { "epoch": 1.483083749306711, "grad_norm": 0.20617523789405823, "learning_rate": 4.007289806682307e-06, "loss": 0.5123, "step": 5348 }, { "epoch": 1.483361064891847, "grad_norm": 0.21049796044826508, "learning_rate": 4.003239117548853e-06, "loss": 0.5122, "step": 5349 }, { "epoch": 1.4836383804769828, "grad_norm": 0.22191189229488373, "learning_rate": 3.999190086358556e-06, "loss": 0.5074, "step": 5350 }, { "epoch": 1.4839156960621187, "grad_norm": 0.20709733664989471, "learning_rate": 3.995142713901506e-06, "loss": 0.507, "step": 5351 }, { "epoch": 1.4841930116472546, "grad_norm": 0.21513330936431885, "learning_rate": 3.9910970009674445e-06, "loss": 0.5193, "step": 5352 }, { "epoch": 1.4844703272323905, "grad_norm": 0.2153635323047638, "learning_rate": 3.987052948345797e-06, "loss": 0.4891, "step": 5353 }, { "epoch": 1.4847476428175264, "grad_norm": 0.21485386788845062, "learning_rate": 3.983010556825658e-06, "loss": 0.5012, "step": 5354 }, { "epoch": 1.4850249584026622, "grad_norm": 0.20851318538188934, "learning_rate": 3.978969827195821e-06, "loss": 0.4958, "step": 5355 }, { "epoch": 1.4853022739877981, "grad_norm": 0.2146236151456833, "learning_rate": 3.97493076024473e-06, "loss": 0.4802, "step": 5356 }, { "epoch": 1.485579589572934, "grad_norm": 0.22128230333328247, "learning_rate": 3.970893356760512e-06, "loss": 0.52, "step": 5357 }, { "epoch": 1.48585690515807, "grad_norm": 0.20890596508979797, "learning_rate": 3.966857617530974e-06, "loss": 0.4722, "step": 5358 }, { "epoch": 1.4861342207432058, "grad_norm": 0.20855508744716644, "learning_rate": 3.9628235433436e-06, "loss": 0.4897, "step": 5359 }, { "epoch": 1.4864115363283417, "grad_norm": 0.21735462546348572, "learning_rate": 3.958791134985541e-06, "loss": 0.5027, "step": 5360 }, { "epoch": 1.4866888519134775, "grad_norm": 0.20674702525138855, "learning_rate": 3.954760393243623e-06, "loss": 0.4892, "step": 5361 }, { "epoch": 1.4869661674986134, "grad_norm": 0.20703266561031342, "learning_rate": 3.950731318904355e-06, "loss": 0.5145, "step": 5362 }, { "epoch": 1.4872434830837493, "grad_norm": 0.20337079465389252, "learning_rate": 3.946703912753917e-06, "loss": 0.5017, "step": 5363 }, { "epoch": 1.4875207986688852, "grad_norm": 0.2117006629705429, "learning_rate": 3.942678175578159e-06, "loss": 0.5023, "step": 5364 }, { "epoch": 1.487798114254021, "grad_norm": 0.20295751094818115, "learning_rate": 3.938654108162611e-06, "loss": 0.4966, "step": 5365 }, { "epoch": 1.488075429839157, "grad_norm": 0.20612047612667084, "learning_rate": 3.934631711292477e-06, "loss": 0.4974, "step": 5366 }, { "epoch": 1.4883527454242929, "grad_norm": 0.1999398022890091, "learning_rate": 3.930610985752633e-06, "loss": 0.5003, "step": 5367 }, { "epoch": 1.4886300610094287, "grad_norm": 0.20393617451190948, "learning_rate": 3.926591932327622e-06, "loss": 0.499, "step": 5368 }, { "epoch": 1.4889073765945646, "grad_norm": 0.20448175072669983, "learning_rate": 3.922574551801675e-06, "loss": 0.4882, "step": 5369 }, { "epoch": 1.4891846921797005, "grad_norm": 0.20072409510612488, "learning_rate": 3.918558844958691e-06, "loss": 0.4902, "step": 5370 }, { "epoch": 1.4894620077648364, "grad_norm": 0.2017168551683426, "learning_rate": 3.9145448125822325e-06, "loss": 0.4926, "step": 5371 }, { "epoch": 1.4897393233499723, "grad_norm": 0.20040208101272583, "learning_rate": 3.910532455455553e-06, "loss": 0.5075, "step": 5372 }, { "epoch": 1.4900166389351082, "grad_norm": 0.19632667303085327, "learning_rate": 3.9065217743615605e-06, "loss": 0.4889, "step": 5373 }, { "epoch": 1.490293954520244, "grad_norm": 0.21401472389698029, "learning_rate": 3.902512770082854e-06, "loss": 0.4837, "step": 5374 }, { "epoch": 1.49057127010538, "grad_norm": 0.21348023414611816, "learning_rate": 3.898505443401689e-06, "loss": 0.5165, "step": 5375 }, { "epoch": 1.4908485856905158, "grad_norm": 0.20388244092464447, "learning_rate": 3.8944997951000066e-06, "loss": 0.5116, "step": 5376 }, { "epoch": 1.4911259012756517, "grad_norm": 0.20777450501918793, "learning_rate": 3.89049582595941e-06, "loss": 0.5097, "step": 5377 }, { "epoch": 1.4914032168607876, "grad_norm": 0.20261326432228088, "learning_rate": 3.886493536761182e-06, "loss": 0.5027, "step": 5378 }, { "epoch": 1.4916805324459235, "grad_norm": 0.20818550884723663, "learning_rate": 3.882492928286279e-06, "loss": 0.4847, "step": 5379 }, { "epoch": 1.4919578480310594, "grad_norm": 0.1958109438419342, "learning_rate": 3.878494001315319e-06, "loss": 0.4782, "step": 5380 }, { "epoch": 1.4922351636161952, "grad_norm": 0.20695748925209045, "learning_rate": 3.874496756628606e-06, "loss": 0.4889, "step": 5381 }, { "epoch": 1.4925124792013311, "grad_norm": 0.20474323630332947, "learning_rate": 3.8705011950060985e-06, "loss": 0.4796, "step": 5382 }, { "epoch": 1.492789794786467, "grad_norm": 0.20276211202144623, "learning_rate": 3.8665073172274465e-06, "loss": 0.4914, "step": 5383 }, { "epoch": 1.493067110371603, "grad_norm": 0.20544062554836273, "learning_rate": 3.862515124071954e-06, "loss": 0.5119, "step": 5384 }, { "epoch": 1.4933444259567388, "grad_norm": 0.19975237548351288, "learning_rate": 3.858524616318607e-06, "loss": 0.4804, "step": 5385 }, { "epoch": 1.4936217415418747, "grad_norm": 0.20508365333080292, "learning_rate": 3.8545357947460625e-06, "loss": 0.4953, "step": 5386 }, { "epoch": 1.4938990571270105, "grad_norm": 0.20914685726165771, "learning_rate": 3.850548660132641e-06, "loss": 0.4684, "step": 5387 }, { "epoch": 1.4941763727121464, "grad_norm": 0.23906511068344116, "learning_rate": 3.846563213256335e-06, "loss": 0.5059, "step": 5388 }, { "epoch": 1.4944536882972823, "grad_norm": 0.240016907453537, "learning_rate": 3.842579454894815e-06, "loss": 0.5089, "step": 5389 }, { "epoch": 1.4947310038824182, "grad_norm": 0.2028864324092865, "learning_rate": 3.838597385825421e-06, "loss": 0.4835, "step": 5390 }, { "epoch": 1.495008319467554, "grad_norm": 0.21252113580703735, "learning_rate": 3.834617006825157e-06, "loss": 0.4773, "step": 5391 }, { "epoch": 1.49528563505269, "grad_norm": 0.20465324819087982, "learning_rate": 3.830638318670691e-06, "loss": 0.4668, "step": 5392 }, { "epoch": 1.4955629506378258, "grad_norm": 0.20127691328525543, "learning_rate": 3.826661322138389e-06, "loss": 0.5119, "step": 5393 }, { "epoch": 1.4958402662229617, "grad_norm": 0.2034253180027008, "learning_rate": 3.822686018004258e-06, "loss": 0.5025, "step": 5394 }, { "epoch": 1.4961175818080976, "grad_norm": 0.2095792442560196, "learning_rate": 3.8187124070439815e-06, "loss": 0.4719, "step": 5395 }, { "epoch": 1.4963948973932335, "grad_norm": 0.21357692778110504, "learning_rate": 3.814740490032921e-06, "loss": 0.5078, "step": 5396 }, { "epoch": 1.4966722129783694, "grad_norm": 0.2135164737701416, "learning_rate": 3.8107702677461067e-06, "loss": 0.4999, "step": 5397 }, { "epoch": 1.4969495285635053, "grad_norm": 0.21254348754882812, "learning_rate": 3.8068017409582294e-06, "loss": 0.4971, "step": 5398 }, { "epoch": 1.4972268441486412, "grad_norm": 0.2134813517332077, "learning_rate": 3.802834910443652e-06, "loss": 0.5112, "step": 5399 }, { "epoch": 1.497504159733777, "grad_norm": 0.20528295636177063, "learning_rate": 3.798869776976409e-06, "loss": 0.4773, "step": 5400 }, { "epoch": 1.497781475318913, "grad_norm": 0.20741385221481323, "learning_rate": 3.7949063413302093e-06, "loss": 0.5007, "step": 5401 }, { "epoch": 1.4980587909040488, "grad_norm": 0.2020622044801712, "learning_rate": 3.7909446042784193e-06, "loss": 0.4935, "step": 5402 }, { "epoch": 1.4983361064891847, "grad_norm": 0.20889051258563995, "learning_rate": 3.786984566594071e-06, "loss": 0.471, "step": 5403 }, { "epoch": 1.4986134220743206, "grad_norm": 0.19613035023212433, "learning_rate": 3.7830262290498896e-06, "loss": 0.5077, "step": 5404 }, { "epoch": 1.4988907376594565, "grad_norm": 0.2048519104719162, "learning_rate": 3.7790695924182413e-06, "loss": 0.4703, "step": 5405 }, { "epoch": 1.4991680532445923, "grad_norm": 0.22040173411369324, "learning_rate": 3.775114657471168e-06, "loss": 0.4972, "step": 5406 }, { "epoch": 1.4994453688297282, "grad_norm": 0.2038019597530365, "learning_rate": 3.771161424980388e-06, "loss": 0.5195, "step": 5407 }, { "epoch": 1.4997226844148641, "grad_norm": 0.20490224659442902, "learning_rate": 3.7672098957172846e-06, "loss": 0.4884, "step": 5408 }, { "epoch": 1.5, "grad_norm": 0.20087428390979767, "learning_rate": 3.763260070452895e-06, "loss": 0.4781, "step": 5409 }, { "epoch": 1.5002773155851359, "grad_norm": 0.21157024800777435, "learning_rate": 3.759311949957947e-06, "loss": 0.5003, "step": 5410 }, { "epoch": 1.5005546311702718, "grad_norm": 0.23590406775474548, "learning_rate": 3.755365535002814e-06, "loss": 0.4798, "step": 5411 }, { "epoch": 1.5008319467554077, "grad_norm": 0.21228978037834167, "learning_rate": 3.751420826357553e-06, "loss": 0.5178, "step": 5412 }, { "epoch": 1.5011092623405435, "grad_norm": 0.21098542213439941, "learning_rate": 3.7474778247918743e-06, "loss": 0.4764, "step": 5413 }, { "epoch": 1.5013865779256794, "grad_norm": 0.20355211198329926, "learning_rate": 3.743536531075169e-06, "loss": 0.4938, "step": 5414 }, { "epoch": 1.5016638935108153, "grad_norm": 0.20924827456474304, "learning_rate": 3.739596945976481e-06, "loss": 0.5048, "step": 5415 }, { "epoch": 1.5019412090959512, "grad_norm": 0.20819483697414398, "learning_rate": 3.7356590702645327e-06, "loss": 0.4889, "step": 5416 }, { "epoch": 1.502218524681087, "grad_norm": 0.20322836935520172, "learning_rate": 3.7317229047077086e-06, "loss": 0.4886, "step": 5417 }, { "epoch": 1.502495840266223, "grad_norm": 0.20074166357517242, "learning_rate": 3.7277884500740543e-06, "loss": 0.5019, "step": 5418 }, { "epoch": 1.5027731558513588, "grad_norm": 0.21029093861579895, "learning_rate": 3.723855707131292e-06, "loss": 0.4973, "step": 5419 }, { "epoch": 1.5030504714364947, "grad_norm": 0.20609499514102936, "learning_rate": 3.7199246766467964e-06, "loss": 0.5115, "step": 5420 }, { "epoch": 1.5033277870216306, "grad_norm": 0.21178114414215088, "learning_rate": 3.715995359387625e-06, "loss": 0.5378, "step": 5421 }, { "epoch": 1.5036051026067665, "grad_norm": 0.2018701434135437, "learning_rate": 3.7120677561204823e-06, "loss": 0.4945, "step": 5422 }, { "epoch": 1.5038824181919024, "grad_norm": 0.2033861130475998, "learning_rate": 3.708141867611753e-06, "loss": 0.4795, "step": 5423 }, { "epoch": 1.5041597337770383, "grad_norm": 0.20554760098457336, "learning_rate": 3.7042176946274846e-06, "loss": 0.4864, "step": 5424 }, { "epoch": 1.5044370493621742, "grad_norm": 0.20681151747703552, "learning_rate": 3.7002952379333837e-06, "loss": 0.4868, "step": 5425 }, { "epoch": 1.50471436494731, "grad_norm": 0.1939842700958252, "learning_rate": 3.6963744982948216e-06, "loss": 0.4813, "step": 5426 }, { "epoch": 1.504991680532446, "grad_norm": 0.20737822353839874, "learning_rate": 3.6924554764768428e-06, "loss": 0.5158, "step": 5427 }, { "epoch": 1.5052689961175818, "grad_norm": 0.20026959478855133, "learning_rate": 3.688538173244156e-06, "loss": 0.493, "step": 5428 }, { "epoch": 1.5055463117027177, "grad_norm": 0.20325352251529694, "learning_rate": 3.6846225893611265e-06, "loss": 0.5011, "step": 5429 }, { "epoch": 1.5058236272878536, "grad_norm": 0.20845289528369904, "learning_rate": 3.680708725591782e-06, "loss": 0.4736, "step": 5430 }, { "epoch": 1.5061009428729895, "grad_norm": 0.20399589836597443, "learning_rate": 3.6767965826998345e-06, "loss": 0.5033, "step": 5431 }, { "epoch": 1.5063782584581253, "grad_norm": 0.19747604429721832, "learning_rate": 3.672886161448641e-06, "loss": 0.4793, "step": 5432 }, { "epoch": 1.5066555740432612, "grad_norm": 0.2051791548728943, "learning_rate": 3.6689774626012224e-06, "loss": 0.5145, "step": 5433 }, { "epoch": 1.506932889628397, "grad_norm": 0.1973484754562378, "learning_rate": 3.665070486920276e-06, "loss": 0.4764, "step": 5434 }, { "epoch": 1.507210205213533, "grad_norm": 0.195805624127388, "learning_rate": 3.6611652351681568e-06, "loss": 0.4818, "step": 5435 }, { "epoch": 1.5074875207986689, "grad_norm": 0.20444104075431824, "learning_rate": 3.657261708106882e-06, "loss": 0.4878, "step": 5436 }, { "epoch": 1.5077648363838048, "grad_norm": 0.20944221317768097, "learning_rate": 3.653359906498127e-06, "loss": 0.5183, "step": 5437 }, { "epoch": 1.5080421519689406, "grad_norm": 0.2081797569990158, "learning_rate": 3.6494598311032415e-06, "loss": 0.4885, "step": 5438 }, { "epoch": 1.5083194675540765, "grad_norm": 0.20127315819263458, "learning_rate": 3.645561482683238e-06, "loss": 0.4935, "step": 5439 }, { "epoch": 1.5085967831392124, "grad_norm": 0.20761513710021973, "learning_rate": 3.6416648619987837e-06, "loss": 0.4706, "step": 5440 }, { "epoch": 1.5088740987243483, "grad_norm": 0.2054562270641327, "learning_rate": 3.6377699698102035e-06, "loss": 0.5033, "step": 5441 }, { "epoch": 1.5091514143094842, "grad_norm": 0.21789734065532684, "learning_rate": 3.6338768068775104e-06, "loss": 0.4885, "step": 5442 }, { "epoch": 1.50942872989462, "grad_norm": 0.21268032491207123, "learning_rate": 3.6299853739603555e-06, "loss": 0.4779, "step": 5443 }, { "epoch": 1.509706045479756, "grad_norm": 0.19880391657352448, "learning_rate": 3.6260956718180568e-06, "loss": 0.4655, "step": 5444 }, { "epoch": 1.5099833610648918, "grad_norm": 0.21325719356536865, "learning_rate": 3.6222077012096026e-06, "loss": 0.5004, "step": 5445 }, { "epoch": 1.5102606766500277, "grad_norm": 0.20476926863193512, "learning_rate": 3.6183214628936425e-06, "loss": 0.4965, "step": 5446 }, { "epoch": 1.5105379922351636, "grad_norm": 0.20836953818798065, "learning_rate": 3.614436957628481e-06, "loss": 0.4832, "step": 5447 }, { "epoch": 1.5108153078202995, "grad_norm": 0.2125098705291748, "learning_rate": 3.6105541861720826e-06, "loss": 0.5014, "step": 5448 }, { "epoch": 1.5110926234054354, "grad_norm": 0.2126840204000473, "learning_rate": 3.6066731492820844e-06, "loss": 0.5311, "step": 5449 }, { "epoch": 1.5113699389905713, "grad_norm": 0.2019418478012085, "learning_rate": 3.6027938477157838e-06, "loss": 0.5007, "step": 5450 }, { "epoch": 1.5116472545757071, "grad_norm": 0.25397753715515137, "learning_rate": 3.598916282230126e-06, "loss": 0.5272, "step": 5451 }, { "epoch": 1.511924570160843, "grad_norm": 0.20074354112148285, "learning_rate": 3.595040453581734e-06, "loss": 0.4877, "step": 5452 }, { "epoch": 1.512201885745979, "grad_norm": 0.2004215121269226, "learning_rate": 3.5911663625268792e-06, "loss": 0.4733, "step": 5453 }, { "epoch": 1.5124792013311148, "grad_norm": 0.21349315345287323, "learning_rate": 3.587294009821507e-06, "loss": 0.5166, "step": 5454 }, { "epoch": 1.5127565169162507, "grad_norm": 0.20270389318466187, "learning_rate": 3.5834233962212056e-06, "loss": 0.5115, "step": 5455 }, { "epoch": 1.5130338325013866, "grad_norm": 0.20797984302043915, "learning_rate": 3.5795545224812405e-06, "loss": 0.4746, "step": 5456 }, { "epoch": 1.5133111480865225, "grad_norm": 0.2141650915145874, "learning_rate": 3.575687389356534e-06, "loss": 0.5169, "step": 5457 }, { "epoch": 1.5135884636716583, "grad_norm": 0.20348010957241058, "learning_rate": 3.5718219976016614e-06, "loss": 0.5052, "step": 5458 }, { "epoch": 1.5138657792567942, "grad_norm": 0.2087784856557846, "learning_rate": 3.5679583479708664e-06, "loss": 0.5135, "step": 5459 }, { "epoch": 1.51414309484193, "grad_norm": 0.2124669849872589, "learning_rate": 3.564096441218044e-06, "loss": 0.513, "step": 5460 }, { "epoch": 1.514420410427066, "grad_norm": 0.20236825942993164, "learning_rate": 3.5602362780967624e-06, "loss": 0.4963, "step": 5461 }, { "epoch": 1.5146977260122019, "grad_norm": 0.1994817554950714, "learning_rate": 3.556377859360234e-06, "loss": 0.4795, "step": 5462 }, { "epoch": 1.5149750415973378, "grad_norm": 0.20891886949539185, "learning_rate": 3.5525211857613454e-06, "loss": 0.5117, "step": 5463 }, { "epoch": 1.5152523571824736, "grad_norm": 0.19349178671836853, "learning_rate": 3.5486662580526285e-06, "loss": 0.4596, "step": 5464 }, { "epoch": 1.5155296727676095, "grad_norm": 0.21563927829265594, "learning_rate": 3.5448130769862864e-06, "loss": 0.4859, "step": 5465 }, { "epoch": 1.5158069883527454, "grad_norm": 0.2011108696460724, "learning_rate": 3.5409616433141793e-06, "loss": 0.4963, "step": 5466 }, { "epoch": 1.5160843039378813, "grad_norm": 0.19179309904575348, "learning_rate": 3.537111957787821e-06, "loss": 0.4804, "step": 5467 }, { "epoch": 1.5163616195230172, "grad_norm": 0.194498673081398, "learning_rate": 3.5332640211583807e-06, "loss": 0.4814, "step": 5468 }, { "epoch": 1.516638935108153, "grad_norm": 0.2188744843006134, "learning_rate": 3.5294178341767043e-06, "loss": 0.5059, "step": 5469 }, { "epoch": 1.516916250693289, "grad_norm": 0.2129710465669632, "learning_rate": 3.525573397593282e-06, "loss": 0.4924, "step": 5470 }, { "epoch": 1.5171935662784248, "grad_norm": 0.21179741621017456, "learning_rate": 3.521730712158257e-06, "loss": 0.4987, "step": 5471 }, { "epoch": 1.5174708818635607, "grad_norm": 0.20567160844802856, "learning_rate": 3.517889778621446e-06, "loss": 0.5164, "step": 5472 }, { "epoch": 1.5177481974486966, "grad_norm": 0.2281576544046402, "learning_rate": 3.5140505977323186e-06, "loss": 0.47, "step": 5473 }, { "epoch": 1.5180255130338325, "grad_norm": 0.20740900933742523, "learning_rate": 3.5102131702399983e-06, "loss": 0.4853, "step": 5474 }, { "epoch": 1.5183028286189684, "grad_norm": 0.20481230318546295, "learning_rate": 3.506377496893265e-06, "loss": 0.5248, "step": 5475 }, { "epoch": 1.5185801442041043, "grad_norm": 0.20815472304821014, "learning_rate": 3.502543578440562e-06, "loss": 0.4855, "step": 5476 }, { "epoch": 1.5188574597892401, "grad_norm": 0.20264998078346252, "learning_rate": 3.498711415629996e-06, "loss": 0.4765, "step": 5477 }, { "epoch": 1.519134775374376, "grad_norm": 0.2170276939868927, "learning_rate": 3.494881009209315e-06, "loss": 0.5036, "step": 5478 }, { "epoch": 1.519412090959512, "grad_norm": 0.2021361142396927, "learning_rate": 3.4910523599259294e-06, "loss": 0.4986, "step": 5479 }, { "epoch": 1.5196894065446478, "grad_norm": 0.195895254611969, "learning_rate": 3.487225468526924e-06, "loss": 0.4801, "step": 5480 }, { "epoch": 1.5199667221297837, "grad_norm": 0.21378639340400696, "learning_rate": 3.483400335759017e-06, "loss": 0.5152, "step": 5481 }, { "epoch": 1.5202440377149196, "grad_norm": 0.213482066988945, "learning_rate": 3.4795769623685912e-06, "loss": 0.5034, "step": 5482 }, { "epoch": 1.5205213533000554, "grad_norm": 0.2165437638759613, "learning_rate": 3.475755349101692e-06, "loss": 0.4841, "step": 5483 }, { "epoch": 1.5207986688851913, "grad_norm": 0.20139548182487488, "learning_rate": 3.471935496704022e-06, "loss": 0.4711, "step": 5484 }, { "epoch": 1.5210759844703272, "grad_norm": 0.2036607712507248, "learning_rate": 3.4681174059209288e-06, "loss": 0.4757, "step": 5485 }, { "epoch": 1.521353300055463, "grad_norm": 0.19882218539714813, "learning_rate": 3.4643010774974226e-06, "loss": 0.5082, "step": 5486 }, { "epoch": 1.521630615640599, "grad_norm": 0.21727705001831055, "learning_rate": 3.460486512178171e-06, "loss": 0.474, "step": 5487 }, { "epoch": 1.5219079312257349, "grad_norm": 0.21604587137699127, "learning_rate": 3.456673710707503e-06, "loss": 0.4752, "step": 5488 }, { "epoch": 1.5221852468108708, "grad_norm": 0.20462778210639954, "learning_rate": 3.4528626738293893e-06, "loss": 0.5011, "step": 5489 }, { "epoch": 1.5224625623960066, "grad_norm": 0.20358699560165405, "learning_rate": 3.4490534022874706e-06, "loss": 0.4666, "step": 5490 }, { "epoch": 1.5227398779811425, "grad_norm": 0.2057787925004959, "learning_rate": 3.4452458968250293e-06, "loss": 0.5309, "step": 5491 }, { "epoch": 1.5230171935662784, "grad_norm": 0.1948128193616867, "learning_rate": 3.441440158185018e-06, "loss": 0.486, "step": 5492 }, { "epoch": 1.5232945091514143, "grad_norm": 0.22251975536346436, "learning_rate": 3.4376361871100313e-06, "loss": 0.505, "step": 5493 }, { "epoch": 1.5235718247365502, "grad_norm": 0.20903460681438446, "learning_rate": 3.433833984342326e-06, "loss": 0.5135, "step": 5494 }, { "epoch": 1.523849140321686, "grad_norm": 0.20052765309810638, "learning_rate": 3.430033550623818e-06, "loss": 0.5181, "step": 5495 }, { "epoch": 1.524126455906822, "grad_norm": 0.20799602568149567, "learning_rate": 3.4262348866960655e-06, "loss": 0.5056, "step": 5496 }, { "epoch": 1.5244037714919578, "grad_norm": 0.21278195083141327, "learning_rate": 3.4224379933002953e-06, "loss": 0.4942, "step": 5497 }, { "epoch": 1.5246810870770937, "grad_norm": 0.30083736777305603, "learning_rate": 3.418642871177373e-06, "loss": 0.4991, "step": 5498 }, { "epoch": 1.5249584026622296, "grad_norm": 0.20429125428199768, "learning_rate": 3.4148495210678366e-06, "loss": 0.4891, "step": 5499 }, { "epoch": 1.5252357182473655, "grad_norm": 0.2550550401210785, "learning_rate": 3.4110579437118624e-06, "loss": 0.4804, "step": 5500 }, { "epoch": 1.5255130338325014, "grad_norm": 0.20311662554740906, "learning_rate": 3.4072681398492942e-06, "loss": 0.4673, "step": 5501 }, { "epoch": 1.5257903494176372, "grad_norm": 0.1889600306749344, "learning_rate": 3.4034801102196146e-06, "loss": 0.4716, "step": 5502 }, { "epoch": 1.5260676650027731, "grad_norm": 0.21437427401542664, "learning_rate": 3.399693855561974e-06, "loss": 0.5053, "step": 5503 }, { "epoch": 1.526344980587909, "grad_norm": 0.21157221496105194, "learning_rate": 3.395909376615172e-06, "loss": 0.4717, "step": 5504 }, { "epoch": 1.526622296173045, "grad_norm": 0.2132854014635086, "learning_rate": 3.3921266741176614e-06, "loss": 0.4751, "step": 5505 }, { "epoch": 1.5268996117581808, "grad_norm": 0.21224236488342285, "learning_rate": 3.3883457488075406e-06, "loss": 0.4927, "step": 5506 }, { "epoch": 1.5271769273433167, "grad_norm": 0.20786736905574799, "learning_rate": 3.384566601422573e-06, "loss": 0.4666, "step": 5507 }, { "epoch": 1.5274542429284526, "grad_norm": 0.21545729041099548, "learning_rate": 3.3807892327001737e-06, "loss": 0.4783, "step": 5508 }, { "epoch": 1.5277315585135884, "grad_norm": 0.20959782600402832, "learning_rate": 3.377013643377401e-06, "loss": 0.5053, "step": 5509 }, { "epoch": 1.5280088740987243, "grad_norm": 0.20486906170845032, "learning_rate": 3.373239834190975e-06, "loss": 0.4873, "step": 5510 }, { "epoch": 1.5282861896838602, "grad_norm": 0.2007468342781067, "learning_rate": 3.3694678058772704e-06, "loss": 0.4899, "step": 5511 }, { "epoch": 1.528563505268996, "grad_norm": 0.19690293073654175, "learning_rate": 3.365697559172304e-06, "loss": 0.4941, "step": 5512 }, { "epoch": 1.528840820854132, "grad_norm": 0.2142125815153122, "learning_rate": 3.3619290948117513e-06, "loss": 0.4746, "step": 5513 }, { "epoch": 1.5291181364392679, "grad_norm": 0.20775122940540314, "learning_rate": 3.3581624135309395e-06, "loss": 0.5111, "step": 5514 }, { "epoch": 1.5293954520244037, "grad_norm": 0.21404637396335602, "learning_rate": 3.3543975160648526e-06, "loss": 0.4836, "step": 5515 }, { "epoch": 1.5296727676095396, "grad_norm": 0.20010797679424286, "learning_rate": 3.3506344031481187e-06, "loss": 0.5035, "step": 5516 }, { "epoch": 1.5299500831946755, "grad_norm": 0.20686432719230652, "learning_rate": 3.346873075515014e-06, "loss": 0.4676, "step": 5517 }, { "epoch": 1.5302273987798114, "grad_norm": 0.1945461481809616, "learning_rate": 3.3431135338994864e-06, "loss": 0.491, "step": 5518 }, { "epoch": 1.5305047143649473, "grad_norm": 0.20243096351623535, "learning_rate": 3.3393557790351167e-06, "loss": 0.5112, "step": 5519 }, { "epoch": 1.5307820299500832, "grad_norm": 0.19688890874385834, "learning_rate": 3.3355998116551395e-06, "loss": 0.4847, "step": 5520 }, { "epoch": 1.531059345535219, "grad_norm": 0.20034383237361908, "learning_rate": 3.331845632492439e-06, "loss": 0.4914, "step": 5521 }, { "epoch": 1.531336661120355, "grad_norm": 0.21226158738136292, "learning_rate": 3.328093242279569e-06, "loss": 0.5064, "step": 5522 }, { "epoch": 1.5316139767054908, "grad_norm": 0.20493634045124054, "learning_rate": 3.3243426417487107e-06, "loss": 0.5008, "step": 5523 }, { "epoch": 1.5318912922906267, "grad_norm": 0.2078971415758133, "learning_rate": 3.3205938316317047e-06, "loss": 0.4862, "step": 5524 }, { "epoch": 1.5321686078757626, "grad_norm": 0.19189336895942688, "learning_rate": 3.316846812660046e-06, "loss": 0.4914, "step": 5525 }, { "epoch": 1.5324459234608985, "grad_norm": 0.18428561091423035, "learning_rate": 3.313101585564882e-06, "loss": 0.4801, "step": 5526 }, { "epoch": 1.5327232390460344, "grad_norm": 0.19429829716682434, "learning_rate": 3.3093581510769995e-06, "loss": 0.4672, "step": 5527 }, { "epoch": 1.5330005546311702, "grad_norm": 0.20795489847660065, "learning_rate": 3.3056165099268398e-06, "loss": 0.5051, "step": 5528 }, { "epoch": 1.5332778702163061, "grad_norm": 0.20628464221954346, "learning_rate": 3.3018766628445e-06, "loss": 0.5073, "step": 5529 }, { "epoch": 1.533555185801442, "grad_norm": 0.1983129382133484, "learning_rate": 3.2981386105597256e-06, "loss": 0.4834, "step": 5530 }, { "epoch": 1.533832501386578, "grad_norm": 0.20056569576263428, "learning_rate": 3.294402353801905e-06, "loss": 0.4883, "step": 5531 }, { "epoch": 1.5341098169717138, "grad_norm": 0.20827257633209229, "learning_rate": 3.2906678933000813e-06, "loss": 0.4962, "step": 5532 }, { "epoch": 1.5343871325568497, "grad_norm": 0.20605547726154327, "learning_rate": 3.2869352297829532e-06, "loss": 0.4792, "step": 5533 }, { "epoch": 1.5346644481419855, "grad_norm": 0.20513512194156647, "learning_rate": 3.283204363978852e-06, "loss": 0.507, "step": 5534 }, { "epoch": 1.5349417637271214, "grad_norm": 0.2062227725982666, "learning_rate": 3.2794752966157793e-06, "loss": 0.4994, "step": 5535 }, { "epoch": 1.5352190793122573, "grad_norm": 0.2113962173461914, "learning_rate": 3.2757480284213646e-06, "loss": 0.4754, "step": 5536 }, { "epoch": 1.5354963948973932, "grad_norm": 0.21231204271316528, "learning_rate": 3.272022560122906e-06, "loss": 0.488, "step": 5537 }, { "epoch": 1.535773710482529, "grad_norm": 0.19757109880447388, "learning_rate": 3.2682988924473316e-06, "loss": 0.4951, "step": 5538 }, { "epoch": 1.536051026067665, "grad_norm": 0.20043742656707764, "learning_rate": 3.264577026121238e-06, "loss": 0.5039, "step": 5539 }, { "epoch": 1.5363283416528009, "grad_norm": 0.1949152946472168, "learning_rate": 3.2608569618708494e-06, "loss": 0.5021, "step": 5540 }, { "epoch": 1.5366056572379367, "grad_norm": 0.20842109620571136, "learning_rate": 3.257138700422055e-06, "loss": 0.5008, "step": 5541 }, { "epoch": 1.5368829728230726, "grad_norm": 0.21400795876979828, "learning_rate": 3.253422242500388e-06, "loss": 0.5048, "step": 5542 }, { "epoch": 1.5371602884082085, "grad_norm": 0.21231138706207275, "learning_rate": 3.249707588831025e-06, "loss": 0.4919, "step": 5543 }, { "epoch": 1.5374376039933444, "grad_norm": 0.2112995684146881, "learning_rate": 3.24599474013879e-06, "loss": 0.5135, "step": 5544 }, { "epoch": 1.5377149195784803, "grad_norm": 0.20431232452392578, "learning_rate": 3.2422836971481616e-06, "loss": 0.5028, "step": 5545 }, { "epoch": 1.5379922351636162, "grad_norm": 0.20688453316688538, "learning_rate": 3.238574460583266e-06, "loss": 0.4814, "step": 5546 }, { "epoch": 1.538269550748752, "grad_norm": 0.21401961147785187, "learning_rate": 3.234867031167865e-06, "loss": 0.5026, "step": 5547 }, { "epoch": 1.538546866333888, "grad_norm": 0.20731821656227112, "learning_rate": 3.231161409625383e-06, "loss": 0.5069, "step": 5548 }, { "epoch": 1.5388241819190238, "grad_norm": 0.20831851661205292, "learning_rate": 3.227457596678886e-06, "loss": 0.4883, "step": 5549 }, { "epoch": 1.5391014975041597, "grad_norm": 0.2025631219148636, "learning_rate": 3.223755593051084e-06, "loss": 0.4687, "step": 5550 }, { "epoch": 1.5393788130892956, "grad_norm": 0.21386443078517914, "learning_rate": 3.2200553994643307e-06, "loss": 0.4807, "step": 5551 }, { "epoch": 1.5396561286744315, "grad_norm": 0.22764462232589722, "learning_rate": 3.2163570166406366e-06, "loss": 0.4884, "step": 5552 }, { "epoch": 1.5399334442595674, "grad_norm": 0.20263278484344482, "learning_rate": 3.2126604453016574e-06, "loss": 0.5059, "step": 5553 }, { "epoch": 1.5402107598447032, "grad_norm": 0.22203831374645233, "learning_rate": 3.20896568616869e-06, "loss": 0.5071, "step": 5554 }, { "epoch": 1.5404880754298391, "grad_norm": 0.21261858940124512, "learning_rate": 3.205272739962674e-06, "loss": 0.4967, "step": 5555 }, { "epoch": 1.540765391014975, "grad_norm": 0.2063218355178833, "learning_rate": 3.2015816074042052e-06, "loss": 0.4983, "step": 5556 }, { "epoch": 1.541042706600111, "grad_norm": 0.21330100297927856, "learning_rate": 3.197892289213526e-06, "loss": 0.4947, "step": 5557 }, { "epoch": 1.5413200221852468, "grad_norm": 0.2076280117034912, "learning_rate": 3.1942047861105163e-06, "loss": 0.5006, "step": 5558 }, { "epoch": 1.5415973377703827, "grad_norm": 0.2099052220582962, "learning_rate": 3.190519098814697e-06, "loss": 0.4869, "step": 5559 }, { "epoch": 1.5418746533555185, "grad_norm": 0.20259883999824524, "learning_rate": 3.1868352280452595e-06, "loss": 0.4829, "step": 5560 }, { "epoch": 1.5421519689406544, "grad_norm": 0.20924805104732513, "learning_rate": 3.1831531745210168e-06, "loss": 0.4715, "step": 5561 }, { "epoch": 1.5424292845257903, "grad_norm": 0.2099631428718567, "learning_rate": 3.1794729389604304e-06, "loss": 0.491, "step": 5562 }, { "epoch": 1.5427066001109262, "grad_norm": 0.2203865945339203, "learning_rate": 3.1757945220816167e-06, "loss": 0.5099, "step": 5563 }, { "epoch": 1.542983915696062, "grad_norm": 0.20883382856845856, "learning_rate": 3.1721179246023356e-06, "loss": 0.4756, "step": 5564 }, { "epoch": 1.543261231281198, "grad_norm": 0.24059654772281647, "learning_rate": 3.1684431472399856e-06, "loss": 0.4966, "step": 5565 }, { "epoch": 1.5435385468663338, "grad_norm": 0.20498476922512054, "learning_rate": 3.164770190711608e-06, "loss": 0.5164, "step": 5566 }, { "epoch": 1.5438158624514697, "grad_norm": 0.22245949506759644, "learning_rate": 3.1610990557338987e-06, "loss": 0.502, "step": 5567 }, { "epoch": 1.5440931780366056, "grad_norm": 0.2156648188829422, "learning_rate": 3.1574297430231977e-06, "loss": 0.5193, "step": 5568 }, { "epoch": 1.5443704936217415, "grad_norm": 0.22104239463806152, "learning_rate": 3.153762253295475e-06, "loss": 0.5048, "step": 5569 }, { "epoch": 1.5446478092068774, "grad_norm": 0.20960746705532074, "learning_rate": 3.1500965872663628e-06, "loss": 0.5105, "step": 5570 }, { "epoch": 1.5449251247920133, "grad_norm": 0.20040079951286316, "learning_rate": 3.1464327456511288e-06, "loss": 0.5048, "step": 5571 }, { "epoch": 1.5452024403771492, "grad_norm": 0.20150801539421082, "learning_rate": 3.142770729164686e-06, "loss": 0.4976, "step": 5572 }, { "epoch": 1.545479755962285, "grad_norm": 0.20172013342380524, "learning_rate": 3.1391105385215847e-06, "loss": 0.4958, "step": 5573 }, { "epoch": 1.545757071547421, "grad_norm": 0.2224036306142807, "learning_rate": 3.1354521744360295e-06, "loss": 0.5085, "step": 5574 }, { "epoch": 1.5460343871325568, "grad_norm": 0.21245881915092468, "learning_rate": 3.131795637621868e-06, "loss": 0.4828, "step": 5575 }, { "epoch": 1.5463117027176927, "grad_norm": 0.21667277812957764, "learning_rate": 3.1281409287925793e-06, "loss": 0.492, "step": 5576 }, { "epoch": 1.5465890183028286, "grad_norm": 0.2037910372018814, "learning_rate": 3.1244880486613017e-06, "loss": 0.5103, "step": 5577 }, { "epoch": 1.5468663338879645, "grad_norm": 0.3222982585430145, "learning_rate": 3.1208369979408013e-06, "loss": 0.4977, "step": 5578 }, { "epoch": 1.5471436494731003, "grad_norm": 0.2243606150150299, "learning_rate": 3.117187777343504e-06, "loss": 0.4867, "step": 5579 }, { "epoch": 1.5474209650582362, "grad_norm": 0.2110065221786499, "learning_rate": 3.1135403875814593e-06, "loss": 0.5011, "step": 5580 }, { "epoch": 1.5476982806433721, "grad_norm": 0.1963575929403305, "learning_rate": 3.109894829366379e-06, "loss": 0.4776, "step": 5581 }, { "epoch": 1.547975596228508, "grad_norm": 0.20565971732139587, "learning_rate": 3.1062511034095993e-06, "loss": 0.4844, "step": 5582 }, { "epoch": 1.5482529118136439, "grad_norm": 0.20696739852428436, "learning_rate": 3.1026092104221124e-06, "loss": 0.4766, "step": 5583 }, { "epoch": 1.5485302273987798, "grad_norm": 0.27509939670562744, "learning_rate": 3.098969151114552e-06, "loss": 0.4783, "step": 5584 }, { "epoch": 1.5488075429839157, "grad_norm": 0.20480765402317047, "learning_rate": 3.0953309261971804e-06, "loss": 0.4953, "step": 5585 }, { "epoch": 1.5490848585690515, "grad_norm": 0.38247838616371155, "learning_rate": 3.0916945363799214e-06, "loss": 0.4516, "step": 5586 }, { "epoch": 1.5493621741541874, "grad_norm": 0.21146711707115173, "learning_rate": 3.088059982372324e-06, "loss": 0.4855, "step": 5587 }, { "epoch": 1.5496394897393233, "grad_norm": 0.2040729522705078, "learning_rate": 3.0844272648835908e-06, "loss": 0.5106, "step": 5588 }, { "epoch": 1.5499168053244592, "grad_norm": 0.2061615288257599, "learning_rate": 3.080796384622557e-06, "loss": 0.4986, "step": 5589 }, { "epoch": 1.550194120909595, "grad_norm": 0.19639424979686737, "learning_rate": 3.0771673422977044e-06, "loss": 0.5096, "step": 5590 }, { "epoch": 1.550471436494731, "grad_norm": 0.22984932363033295, "learning_rate": 3.0735401386171618e-06, "loss": 0.4803, "step": 5591 }, { "epoch": 1.5507487520798668, "grad_norm": 0.23276609182357788, "learning_rate": 3.0699147742886862e-06, "loss": 0.4857, "step": 5592 }, { "epoch": 1.5510260676650027, "grad_norm": 0.22025956213474274, "learning_rate": 3.0662912500196804e-06, "loss": 0.5067, "step": 5593 }, { "epoch": 1.5513033832501386, "grad_norm": 0.19536203145980835, "learning_rate": 3.0626695665171927e-06, "loss": 0.4849, "step": 5594 }, { "epoch": 1.5515806988352745, "grad_norm": 0.2104620635509491, "learning_rate": 3.059049724487914e-06, "loss": 0.5033, "step": 5595 }, { "epoch": 1.5518580144204104, "grad_norm": 0.2038435935974121, "learning_rate": 3.0554317246381664e-06, "loss": 0.5164, "step": 5596 }, { "epoch": 1.5521353300055463, "grad_norm": 0.23535074293613434, "learning_rate": 3.051815567673913e-06, "loss": 0.4933, "step": 5597 }, { "epoch": 1.5524126455906821, "grad_norm": 0.20647317171096802, "learning_rate": 3.0482012543007725e-06, "loss": 0.5152, "step": 5598 }, { "epoch": 1.552689961175818, "grad_norm": 0.19642433524131775, "learning_rate": 3.0445887852239897e-06, "loss": 0.4896, "step": 5599 }, { "epoch": 1.552967276760954, "grad_norm": 0.20343849062919617, "learning_rate": 3.040978161148446e-06, "loss": 0.4754, "step": 5600 }, { "epoch": 1.5532445923460898, "grad_norm": 0.20736227929592133, "learning_rate": 3.0373693827786766e-06, "loss": 0.4728, "step": 5601 }, { "epoch": 1.5535219079312257, "grad_norm": 0.2885718047618866, "learning_rate": 3.03376245081885e-06, "loss": 0.4844, "step": 5602 }, { "epoch": 1.5537992235163616, "grad_norm": 0.21258218586444855, "learning_rate": 3.0301573659727746e-06, "loss": 0.4675, "step": 5603 }, { "epoch": 1.5540765391014975, "grad_norm": 0.2054533213376999, "learning_rate": 3.026554128943891e-06, "loss": 0.4944, "step": 5604 }, { "epoch": 1.5543538546866333, "grad_norm": 0.21457917988300323, "learning_rate": 3.022952740435292e-06, "loss": 0.4545, "step": 5605 }, { "epoch": 1.5546311702717692, "grad_norm": 0.2189054936170578, "learning_rate": 3.019353201149705e-06, "loss": 0.4932, "step": 5606 }, { "epoch": 1.554908485856905, "grad_norm": 0.19639572501182556, "learning_rate": 3.0157555117894907e-06, "loss": 0.4758, "step": 5607 }, { "epoch": 1.555185801442041, "grad_norm": 0.21322979032993317, "learning_rate": 3.012159673056661e-06, "loss": 0.4928, "step": 5608 }, { "epoch": 1.5554631170271769, "grad_norm": 0.2102145254611969, "learning_rate": 3.008565685652849e-06, "loss": 0.515, "step": 5609 }, { "epoch": 1.5557404326123128, "grad_norm": 0.22351384162902832, "learning_rate": 3.004973550279348e-06, "loss": 0.5084, "step": 5610 }, { "epoch": 1.5560177481974486, "grad_norm": 0.2032230645418167, "learning_rate": 3.001383267637069e-06, "loss": 0.4957, "step": 5611 }, { "epoch": 1.5562950637825845, "grad_norm": 0.2006012350320816, "learning_rate": 2.997794838426575e-06, "loss": 0.481, "step": 5612 }, { "epoch": 1.5565723793677204, "grad_norm": 0.20619052648544312, "learning_rate": 2.9942082633480696e-06, "loss": 0.4755, "step": 5613 }, { "epoch": 1.5568496949528563, "grad_norm": 0.21772894263267517, "learning_rate": 2.990623543101377e-06, "loss": 0.4795, "step": 5614 }, { "epoch": 1.5571270105379922, "grad_norm": 0.19506295025348663, "learning_rate": 2.9870406783859827e-06, "loss": 0.4655, "step": 5615 }, { "epoch": 1.557404326123128, "grad_norm": 0.20197400450706482, "learning_rate": 2.98345966990099e-06, "loss": 0.5146, "step": 5616 }, { "epoch": 1.557681641708264, "grad_norm": 0.20348283648490906, "learning_rate": 2.979880518345153e-06, "loss": 0.4377, "step": 5617 }, { "epoch": 1.5579589572933998, "grad_norm": 0.20409370958805084, "learning_rate": 2.976303224416856e-06, "loss": 0.5216, "step": 5618 }, { "epoch": 1.5582362728785357, "grad_norm": 0.21259050071239471, "learning_rate": 2.972727788814128e-06, "loss": 0.4988, "step": 5619 }, { "epoch": 1.5585135884636716, "grad_norm": 0.19778795540332794, "learning_rate": 2.9691542122346262e-06, "loss": 0.5165, "step": 5620 }, { "epoch": 1.5587909040488075, "grad_norm": 0.27131521701812744, "learning_rate": 2.9655824953756517e-06, "loss": 0.4913, "step": 5621 }, { "epoch": 1.5590682196339434, "grad_norm": 0.2034967541694641, "learning_rate": 2.962012638934146e-06, "loss": 0.4725, "step": 5622 }, { "epoch": 1.5593455352190793, "grad_norm": 0.19683615863323212, "learning_rate": 2.9584446436066752e-06, "loss": 0.4949, "step": 5623 }, { "epoch": 1.5596228508042151, "grad_norm": 0.21673937141895294, "learning_rate": 2.9548785100894565e-06, "loss": 0.5125, "step": 5624 }, { "epoch": 1.559900166389351, "grad_norm": 0.22336265444755554, "learning_rate": 2.9513142390783303e-06, "loss": 0.4803, "step": 5625 }, { "epoch": 1.560177481974487, "grad_norm": 0.21071960031986237, "learning_rate": 2.947751831268787e-06, "loss": 0.5069, "step": 5626 }, { "epoch": 1.5604547975596228, "grad_norm": 0.2002846896648407, "learning_rate": 2.9441912873559406e-06, "loss": 0.4857, "step": 5627 }, { "epoch": 1.5607321131447587, "grad_norm": 0.20336318016052246, "learning_rate": 2.940632608034549e-06, "loss": 0.5015, "step": 5628 }, { "epoch": 1.5610094287298946, "grad_norm": 0.199977844953537, "learning_rate": 2.9370757939990114e-06, "loss": 0.5078, "step": 5629 }, { "epoch": 1.5612867443150305, "grad_norm": 0.20519019663333893, "learning_rate": 2.933520845943351e-06, "loss": 0.5052, "step": 5630 }, { "epoch": 1.5615640599001663, "grad_norm": 0.21687155961990356, "learning_rate": 2.9299677645612282e-06, "loss": 0.4759, "step": 5631 }, { "epoch": 1.5618413754853022, "grad_norm": 0.2080409973859787, "learning_rate": 2.9264165505459476e-06, "loss": 0.4918, "step": 5632 }, { "epoch": 1.562118691070438, "grad_norm": 0.2009446918964386, "learning_rate": 2.92286720459045e-06, "loss": 0.4763, "step": 5633 }, { "epoch": 1.562396006655574, "grad_norm": 0.22965089976787567, "learning_rate": 2.9193197273873014e-06, "loss": 0.483, "step": 5634 }, { "epoch": 1.5626733222407099, "grad_norm": 0.20917722582817078, "learning_rate": 2.9157741196287017e-06, "loss": 0.496, "step": 5635 }, { "epoch": 1.5629506378258458, "grad_norm": 0.23011514544487, "learning_rate": 2.9122303820065087e-06, "loss": 0.4844, "step": 5636 }, { "epoch": 1.5632279534109816, "grad_norm": 0.2208072394132614, "learning_rate": 2.908688515212191e-06, "loss": 0.4847, "step": 5637 }, { "epoch": 1.5635052689961175, "grad_norm": 0.2128543108701706, "learning_rate": 2.905148519936857e-06, "loss": 0.5051, "step": 5638 }, { "epoch": 1.5637825845812534, "grad_norm": 0.2076890915632248, "learning_rate": 2.9016103968712584e-06, "loss": 0.5162, "step": 5639 }, { "epoch": 1.5640599001663893, "grad_norm": 0.20999306440353394, "learning_rate": 2.8980741467057786e-06, "loss": 0.4999, "step": 5640 }, { "epoch": 1.5643372157515252, "grad_norm": 0.22848869860172272, "learning_rate": 2.8945397701304304e-06, "loss": 0.4806, "step": 5641 }, { "epoch": 1.564614531336661, "grad_norm": 0.23232677578926086, "learning_rate": 2.8910072678348625e-06, "loss": 0.4944, "step": 5642 }, { "epoch": 1.564891846921797, "grad_norm": 0.2025647908449173, "learning_rate": 2.887476640508363e-06, "loss": 0.4707, "step": 5643 }, { "epoch": 1.5651691625069328, "grad_norm": 0.20394767820835114, "learning_rate": 2.8839478888398523e-06, "loss": 0.5031, "step": 5644 }, { "epoch": 1.5654464780920687, "grad_norm": 0.2066267430782318, "learning_rate": 2.880421013517881e-06, "loss": 0.4955, "step": 5645 }, { "epoch": 1.5657237936772046, "grad_norm": 0.20270580053329468, "learning_rate": 2.876896015230632e-06, "loss": 0.474, "step": 5646 }, { "epoch": 1.5660011092623405, "grad_norm": 0.2070900797843933, "learning_rate": 2.8733728946659293e-06, "loss": 0.4942, "step": 5647 }, { "epoch": 1.5662784248474764, "grad_norm": 0.20855194330215454, "learning_rate": 2.8698516525112324e-06, "loss": 0.4801, "step": 5648 }, { "epoch": 1.5665557404326123, "grad_norm": 0.2164842188358307, "learning_rate": 2.86633228945362e-06, "loss": 0.4946, "step": 5649 }, { "epoch": 1.5668330560177481, "grad_norm": 0.21440735459327698, "learning_rate": 2.862814806179817e-06, "loss": 0.5068, "step": 5650 }, { "epoch": 1.567110371602884, "grad_norm": 0.2091311663389206, "learning_rate": 2.8592992033761814e-06, "loss": 0.4693, "step": 5651 }, { "epoch": 1.56738768718802, "grad_norm": 0.20943525433540344, "learning_rate": 2.855785481728697e-06, "loss": 0.5165, "step": 5652 }, { "epoch": 1.5676650027731558, "grad_norm": 0.20559410750865936, "learning_rate": 2.85227364192298e-06, "loss": 0.5084, "step": 5653 }, { "epoch": 1.5679423183582917, "grad_norm": 0.20715293288230896, "learning_rate": 2.8487636846442873e-06, "loss": 0.5199, "step": 5654 }, { "epoch": 1.5682196339434276, "grad_norm": 0.21139737963676453, "learning_rate": 2.8452556105775073e-06, "loss": 0.5422, "step": 5655 }, { "epoch": 1.5684969495285634, "grad_norm": 0.20338012278079987, "learning_rate": 2.8417494204071526e-06, "loss": 0.5006, "step": 5656 }, { "epoch": 1.5687742651136993, "grad_norm": 0.21125024557113647, "learning_rate": 2.8382451148173798e-06, "loss": 0.5144, "step": 5657 }, { "epoch": 1.5690515806988352, "grad_norm": 0.2064410001039505, "learning_rate": 2.8347426944919637e-06, "loss": 0.4919, "step": 5658 }, { "epoch": 1.569328896283971, "grad_norm": 0.19719727337360382, "learning_rate": 2.8312421601143267e-06, "loss": 0.4966, "step": 5659 }, { "epoch": 1.569606211869107, "grad_norm": 0.2056899517774582, "learning_rate": 2.8277435123675144e-06, "loss": 0.5109, "step": 5660 }, { "epoch": 1.5698835274542429, "grad_norm": 0.20407631993293762, "learning_rate": 2.8242467519342022e-06, "loss": 0.476, "step": 5661 }, { "epoch": 1.5701608430393788, "grad_norm": 0.2034579962491989, "learning_rate": 2.8207518794967054e-06, "loss": 0.5173, "step": 5662 }, { "epoch": 1.5704381586245146, "grad_norm": 0.21342724561691284, "learning_rate": 2.81725889573696e-06, "loss": 0.4815, "step": 5663 }, { "epoch": 1.5707154742096505, "grad_norm": 0.2078382819890976, "learning_rate": 2.813767801336549e-06, "loss": 0.5132, "step": 5664 }, { "epoch": 1.5709927897947864, "grad_norm": 0.20430020987987518, "learning_rate": 2.810278596976666e-06, "loss": 0.5055, "step": 5665 }, { "epoch": 1.5712701053799223, "grad_norm": 0.2025614082813263, "learning_rate": 2.806791283338155e-06, "loss": 0.4966, "step": 5666 }, { "epoch": 1.5715474209650582, "grad_norm": 0.20510238409042358, "learning_rate": 2.8033058611014838e-06, "loss": 0.489, "step": 5667 }, { "epoch": 1.571824736550194, "grad_norm": 0.2076101154088974, "learning_rate": 2.7998223309467484e-06, "loss": 0.5016, "step": 5668 }, { "epoch": 1.57210205213533, "grad_norm": 0.19899241626262665, "learning_rate": 2.7963406935536728e-06, "loss": 0.5041, "step": 5669 }, { "epoch": 1.5723793677204658, "grad_norm": 0.22239477932453156, "learning_rate": 2.7928609496016235e-06, "loss": 0.5068, "step": 5670 }, { "epoch": 1.5726566833056017, "grad_norm": 0.20445238053798676, "learning_rate": 2.789383099769591e-06, "loss": 0.4886, "step": 5671 }, { "epoch": 1.5729339988907376, "grad_norm": 0.20267120003700256, "learning_rate": 2.785907144736194e-06, "loss": 0.4763, "step": 5672 }, { "epoch": 1.5732113144758735, "grad_norm": 0.20123063027858734, "learning_rate": 2.7824330851796755e-06, "loss": 0.4932, "step": 5673 }, { "epoch": 1.5734886300610094, "grad_norm": 0.20822611451148987, "learning_rate": 2.7789609217779316e-06, "loss": 0.47, "step": 5674 }, { "epoch": 1.5737659456461452, "grad_norm": 0.21520821750164032, "learning_rate": 2.7754906552084667e-06, "loss": 0.4951, "step": 5675 }, { "epoch": 1.5740432612312811, "grad_norm": 0.20591013133525848, "learning_rate": 2.7720222861484167e-06, "loss": 0.4938, "step": 5676 }, { "epoch": 1.574320576816417, "grad_norm": 0.22870129346847534, "learning_rate": 2.768555815274557e-06, "loss": 0.4814, "step": 5677 }, { "epoch": 1.574597892401553, "grad_norm": 0.1999206393957138, "learning_rate": 2.76509124326329e-06, "loss": 0.5062, "step": 5678 }, { "epoch": 1.5748752079866888, "grad_norm": 0.21404382586479187, "learning_rate": 2.7616285707906447e-06, "loss": 0.5205, "step": 5679 }, { "epoch": 1.5751525235718247, "grad_norm": 0.22264225780963898, "learning_rate": 2.7581677985322742e-06, "loss": 0.4951, "step": 5680 }, { "epoch": 1.5754298391569606, "grad_norm": 0.21465402841567993, "learning_rate": 2.75470892716347e-06, "loss": 0.4647, "step": 5681 }, { "epoch": 1.5757071547420964, "grad_norm": 0.21319536864757538, "learning_rate": 2.751251957359155e-06, "loss": 0.5177, "step": 5682 }, { "epoch": 1.5759844703272323, "grad_norm": 0.21107381582260132, "learning_rate": 2.7477968897938717e-06, "loss": 0.4781, "step": 5683 }, { "epoch": 1.5762617859123682, "grad_norm": 0.22035937011241913, "learning_rate": 2.744343725141792e-06, "loss": 0.4566, "step": 5684 }, { "epoch": 1.576539101497504, "grad_norm": 0.21590951085090637, "learning_rate": 2.7408924640767218e-06, "loss": 0.5025, "step": 5685 }, { "epoch": 1.57681641708264, "grad_norm": 0.20270653069019318, "learning_rate": 2.7374431072720975e-06, "loss": 0.529, "step": 5686 }, { "epoch": 1.5770937326677759, "grad_norm": 0.2021293044090271, "learning_rate": 2.733995655400974e-06, "loss": 0.4969, "step": 5687 }, { "epoch": 1.5773710482529117, "grad_norm": 0.19878076016902924, "learning_rate": 2.7305501091360444e-06, "loss": 0.476, "step": 5688 }, { "epoch": 1.5776483638380476, "grad_norm": 0.2047017365694046, "learning_rate": 2.7271064691496277e-06, "loss": 0.4912, "step": 5689 }, { "epoch": 1.5779256794231835, "grad_norm": 0.20984232425689697, "learning_rate": 2.723664736113668e-06, "loss": 0.4956, "step": 5690 }, { "epoch": 1.5782029950083194, "grad_norm": 0.2142646759748459, "learning_rate": 2.720224910699733e-06, "loss": 0.5026, "step": 5691 }, { "epoch": 1.5784803105934553, "grad_norm": 0.2094864547252655, "learning_rate": 2.7167869935790276e-06, "loss": 0.4846, "step": 5692 }, { "epoch": 1.5787576261785912, "grad_norm": 0.21176081895828247, "learning_rate": 2.713350985422386e-06, "loss": 0.5049, "step": 5693 }, { "epoch": 1.579034941763727, "grad_norm": 0.1998070776462555, "learning_rate": 2.7099168869002543e-06, "loss": 0.4521, "step": 5694 }, { "epoch": 1.579312257348863, "grad_norm": 0.20673444867134094, "learning_rate": 2.7064846986827264e-06, "loss": 0.511, "step": 5695 }, { "epoch": 1.5795895729339988, "grad_norm": 0.21131543815135956, "learning_rate": 2.7030544214395035e-06, "loss": 0.4869, "step": 5696 }, { "epoch": 1.5798668885191347, "grad_norm": 0.21434102952480316, "learning_rate": 2.6996260558399324e-06, "loss": 0.4945, "step": 5697 }, { "epoch": 1.5801442041042706, "grad_norm": 0.2118789553642273, "learning_rate": 2.696199602552971e-06, "loss": 0.4696, "step": 5698 }, { "epoch": 1.5804215196894065, "grad_norm": 0.21816542744636536, "learning_rate": 2.6927750622472157e-06, "loss": 0.4896, "step": 5699 }, { "epoch": 1.5806988352745424, "grad_norm": 0.2012248933315277, "learning_rate": 2.6893524355908804e-06, "loss": 0.4781, "step": 5700 }, { "epoch": 1.5809761508596782, "grad_norm": 0.1999814510345459, "learning_rate": 2.685931723251814e-06, "loss": 0.507, "step": 5701 }, { "epoch": 1.5812534664448141, "grad_norm": 0.21244245767593384, "learning_rate": 2.682512925897489e-06, "loss": 0.4788, "step": 5702 }, { "epoch": 1.58153078202995, "grad_norm": 0.19930778443813324, "learning_rate": 2.6790960441949996e-06, "loss": 0.5082, "step": 5703 }, { "epoch": 1.581808097615086, "grad_norm": 0.19802865386009216, "learning_rate": 2.675681078811075e-06, "loss": 0.4891, "step": 5704 }, { "epoch": 1.5820854132002218, "grad_norm": 0.2145322859287262, "learning_rate": 2.6722680304120575e-06, "loss": 0.5113, "step": 5705 }, { "epoch": 1.5823627287853577, "grad_norm": 0.20376405119895935, "learning_rate": 2.6688568996639337e-06, "loss": 0.4751, "step": 5706 }, { "epoch": 1.5826400443704935, "grad_norm": 0.21193109452724457, "learning_rate": 2.6654476872322948e-06, "loss": 0.4846, "step": 5707 }, { "epoch": 1.5829173599556294, "grad_norm": 0.21111689507961273, "learning_rate": 2.662040393782375e-06, "loss": 0.4803, "step": 5708 }, { "epoch": 1.5831946755407653, "grad_norm": 0.204023540019989, "learning_rate": 2.658635019979029e-06, "loss": 0.5126, "step": 5709 }, { "epoch": 1.5834719911259012, "grad_norm": 0.20971745252609253, "learning_rate": 2.655231566486732e-06, "loss": 0.5089, "step": 5710 }, { "epoch": 1.583749306711037, "grad_norm": 0.19988863170146942, "learning_rate": 2.6518300339695865e-06, "loss": 0.5121, "step": 5711 }, { "epoch": 1.584026622296173, "grad_norm": 0.21533909440040588, "learning_rate": 2.6484304230913236e-06, "loss": 0.4912, "step": 5712 }, { "epoch": 1.5843039378813089, "grad_norm": 0.20329493284225464, "learning_rate": 2.6450327345153e-06, "loss": 0.4747, "step": 5713 }, { "epoch": 1.5845812534664447, "grad_norm": 0.2205643355846405, "learning_rate": 2.6416369689044903e-06, "loss": 0.5216, "step": 5714 }, { "epoch": 1.5848585690515806, "grad_norm": 0.20560097694396973, "learning_rate": 2.638243126921498e-06, "loss": 0.4795, "step": 5715 }, { "epoch": 1.5851358846367165, "grad_norm": 0.19872471690177917, "learning_rate": 2.6348512092285603e-06, "loss": 0.516, "step": 5716 }, { "epoch": 1.5854132002218524, "grad_norm": 0.2076081484556198, "learning_rate": 2.6314612164875213e-06, "loss": 0.4939, "step": 5717 }, { "epoch": 1.5856905158069883, "grad_norm": 0.19979238510131836, "learning_rate": 2.6280731493598596e-06, "loss": 0.4665, "step": 5718 }, { "epoch": 1.5859678313921242, "grad_norm": 0.23124083876609802, "learning_rate": 2.6246870085066764e-06, "loss": 0.5132, "step": 5719 }, { "epoch": 1.58624514697726, "grad_norm": 0.22173020243644714, "learning_rate": 2.6213027945887035e-06, "loss": 0.5085, "step": 5720 }, { "epoch": 1.586522462562396, "grad_norm": 0.19916951656341553, "learning_rate": 2.6179205082662862e-06, "loss": 0.4924, "step": 5721 }, { "epoch": 1.5867997781475318, "grad_norm": 0.21253123879432678, "learning_rate": 2.614540150199396e-06, "loss": 0.5012, "step": 5722 }, { "epoch": 1.5870770937326677, "grad_norm": 0.21266770362854004, "learning_rate": 2.611161721047632e-06, "loss": 0.51, "step": 5723 }, { "epoch": 1.5873544093178036, "grad_norm": 0.20849168300628662, "learning_rate": 2.60778522147022e-06, "loss": 0.5161, "step": 5724 }, { "epoch": 1.5876317249029395, "grad_norm": 0.20374304056167603, "learning_rate": 2.6044106521259963e-06, "loss": 0.4899, "step": 5725 }, { "epoch": 1.5879090404880754, "grad_norm": 0.2087586522102356, "learning_rate": 2.6010380136734347e-06, "loss": 0.5035, "step": 5726 }, { "epoch": 1.5881863560732112, "grad_norm": 0.20148667693138123, "learning_rate": 2.5976673067706262e-06, "loss": 0.504, "step": 5727 }, { "epoch": 1.5884636716583471, "grad_norm": 0.2141498327255249, "learning_rate": 2.5942985320752843e-06, "loss": 0.5007, "step": 5728 }, { "epoch": 1.588740987243483, "grad_norm": 0.20762969553470612, "learning_rate": 2.5909316902447426e-06, "loss": 0.4928, "step": 5729 }, { "epoch": 1.589018302828619, "grad_norm": 0.20631739497184753, "learning_rate": 2.5875667819359626e-06, "loss": 0.4968, "step": 5730 }, { "epoch": 1.5892956184137548, "grad_norm": 0.2064218521118164, "learning_rate": 2.5842038078055327e-06, "loss": 0.4878, "step": 5731 }, { "epoch": 1.5895729339988907, "grad_norm": 0.20431697368621826, "learning_rate": 2.5808427685096505e-06, "loss": 0.4973, "step": 5732 }, { "epoch": 1.5898502495840265, "grad_norm": 0.2072911262512207, "learning_rate": 2.5774836647041515e-06, "loss": 0.5057, "step": 5733 }, { "epoch": 1.5901275651691624, "grad_norm": 0.22611966729164124, "learning_rate": 2.5741264970444768e-06, "loss": 0.4918, "step": 5734 }, { "epoch": 1.5904048807542983, "grad_norm": 0.20940294861793518, "learning_rate": 2.570771266185708e-06, "loss": 0.4741, "step": 5735 }, { "epoch": 1.5906821963394342, "grad_norm": 0.21070222556591034, "learning_rate": 2.5674179727825307e-06, "loss": 0.4841, "step": 5736 }, { "epoch": 1.59095951192457, "grad_norm": 0.21693362295627594, "learning_rate": 2.564066617489269e-06, "loss": 0.4725, "step": 5737 }, { "epoch": 1.591236827509706, "grad_norm": 0.20319637656211853, "learning_rate": 2.5607172009598556e-06, "loss": 0.5041, "step": 5738 }, { "epoch": 1.5915141430948418, "grad_norm": 0.2054455578327179, "learning_rate": 2.5573697238478523e-06, "loss": 0.4975, "step": 5739 }, { "epoch": 1.5917914586799777, "grad_norm": 0.20702321827411652, "learning_rate": 2.5540241868064434e-06, "loss": 0.4761, "step": 5740 }, { "epoch": 1.5920687742651136, "grad_norm": 0.23018468916416168, "learning_rate": 2.5506805904884272e-06, "loss": 0.5258, "step": 5741 }, { "epoch": 1.5923460898502495, "grad_norm": 0.2070692479610443, "learning_rate": 2.5473389355462325e-06, "loss": 0.5011, "step": 5742 }, { "epoch": 1.5926234054353854, "grad_norm": 0.21105840802192688, "learning_rate": 2.543999222631899e-06, "loss": 0.491, "step": 5743 }, { "epoch": 1.5929007210205213, "grad_norm": 0.21551688015460968, "learning_rate": 2.5406614523971e-06, "loss": 0.4878, "step": 5744 }, { "epoch": 1.5931780366056572, "grad_norm": 0.21947219967842102, "learning_rate": 2.537325625493116e-06, "loss": 0.4982, "step": 5745 }, { "epoch": 1.593455352190793, "grad_norm": 0.21670997142791748, "learning_rate": 2.5339917425708584e-06, "loss": 0.5216, "step": 5746 }, { "epoch": 1.593732667775929, "grad_norm": 0.22349973022937775, "learning_rate": 2.5306598042808592e-06, "loss": 0.5216, "step": 5747 }, { "epoch": 1.5940099833610648, "grad_norm": 0.20010913908481598, "learning_rate": 2.5273298112732657e-06, "loss": 0.467, "step": 5748 }, { "epoch": 1.5942872989462007, "grad_norm": 0.2064429521560669, "learning_rate": 2.5240017641978435e-06, "loss": 0.4931, "step": 5749 }, { "epoch": 1.5945646145313366, "grad_norm": 0.19768312573432922, "learning_rate": 2.520675663703985e-06, "loss": 0.4852, "step": 5750 }, { "epoch": 1.5948419301164725, "grad_norm": 0.21115142107009888, "learning_rate": 2.517351510440706e-06, "loss": 0.5125, "step": 5751 }, { "epoch": 1.5951192457016083, "grad_norm": 0.19108986854553223, "learning_rate": 2.5140293050566295e-06, "loss": 0.4878, "step": 5752 }, { "epoch": 1.5953965612867442, "grad_norm": 0.20708754658699036, "learning_rate": 2.510709048200009e-06, "loss": 0.4871, "step": 5753 }, { "epoch": 1.5956738768718801, "grad_norm": 0.20507743954658508, "learning_rate": 2.507390740518717e-06, "loss": 0.5031, "step": 5754 }, { "epoch": 1.595951192457016, "grad_norm": 0.2134825587272644, "learning_rate": 2.5040743826602405e-06, "loss": 0.4753, "step": 5755 }, { "epoch": 1.5962285080421519, "grad_norm": 0.20486515760421753, "learning_rate": 2.5007599752716867e-06, "loss": 0.5022, "step": 5756 }, { "epoch": 1.5965058236272878, "grad_norm": 0.20787964761257172, "learning_rate": 2.4974475189997862e-06, "loss": 0.498, "step": 5757 }, { "epoch": 1.5967831392124237, "grad_norm": 0.20221605896949768, "learning_rate": 2.494137014490891e-06, "loss": 0.518, "step": 5758 }, { "epoch": 1.5970604547975595, "grad_norm": 0.2075101137161255, "learning_rate": 2.4908284623909638e-06, "loss": 0.5026, "step": 5759 }, { "epoch": 1.5973377703826954, "grad_norm": 0.21558356285095215, "learning_rate": 2.487521863345589e-06, "loss": 0.488, "step": 5760 }, { "epoch": 1.5976150859678313, "grad_norm": 0.20336772501468658, "learning_rate": 2.4842172179999736e-06, "loss": 0.4776, "step": 5761 }, { "epoch": 1.5978924015529672, "grad_norm": 0.2211672067642212, "learning_rate": 2.480914526998945e-06, "loss": 0.4935, "step": 5762 }, { "epoch": 1.598169717138103, "grad_norm": 0.2278861552476883, "learning_rate": 2.4776137909869434e-06, "loss": 0.4833, "step": 5763 }, { "epoch": 1.598447032723239, "grad_norm": 0.20449037849903107, "learning_rate": 2.4743150106080233e-06, "loss": 0.4874, "step": 5764 }, { "epoch": 1.5987243483083748, "grad_norm": 0.20173722505569458, "learning_rate": 2.471018186505876e-06, "loss": 0.4963, "step": 5765 }, { "epoch": 1.5990016638935107, "grad_norm": 0.2080666571855545, "learning_rate": 2.4677233193237945e-06, "loss": 0.4766, "step": 5766 }, { "epoch": 1.5992789794786466, "grad_norm": 0.2013276368379593, "learning_rate": 2.4644304097046892e-06, "loss": 0.4742, "step": 5767 }, { "epoch": 1.5995562950637825, "grad_norm": 0.21188431978225708, "learning_rate": 2.461139458291098e-06, "loss": 0.4795, "step": 5768 }, { "epoch": 1.5998336106489184, "grad_norm": 0.20740027725696564, "learning_rate": 2.457850465725177e-06, "loss": 0.4796, "step": 5769 }, { "epoch": 1.6001109262340543, "grad_norm": 0.20362474024295807, "learning_rate": 2.454563432648692e-06, "loss": 0.5114, "step": 5770 }, { "epoch": 1.6003882418191901, "grad_norm": 0.3291257619857788, "learning_rate": 2.451278359703027e-06, "loss": 0.4933, "step": 5771 }, { "epoch": 1.600665557404326, "grad_norm": 0.20942147076129913, "learning_rate": 2.447995247529189e-06, "loss": 0.4955, "step": 5772 }, { "epoch": 1.600942872989462, "grad_norm": 0.20903481543064117, "learning_rate": 2.4447140967678057e-06, "loss": 0.4991, "step": 5773 }, { "epoch": 1.6012201885745978, "grad_norm": 0.20149755477905273, "learning_rate": 2.4414349080591064e-06, "loss": 0.4864, "step": 5774 }, { "epoch": 1.6014975041597337, "grad_norm": 0.20722270011901855, "learning_rate": 2.4381576820429588e-06, "loss": 0.4994, "step": 5775 }, { "epoch": 1.6017748197448696, "grad_norm": 0.21165424585342407, "learning_rate": 2.434882419358826e-06, "loss": 0.4968, "step": 5776 }, { "epoch": 1.6020521353300055, "grad_norm": 0.2054947316646576, "learning_rate": 2.4316091206458073e-06, "loss": 0.48, "step": 5777 }, { "epoch": 1.6023294509151413, "grad_norm": 0.2067498415708542, "learning_rate": 2.428337786542603e-06, "loss": 0.4804, "step": 5778 }, { "epoch": 1.6026067665002772, "grad_norm": 0.19922077655792236, "learning_rate": 2.42506841768754e-06, "loss": 0.4946, "step": 5779 }, { "epoch": 1.602884082085413, "grad_norm": 0.202239990234375, "learning_rate": 2.4218010147185625e-06, "loss": 0.4929, "step": 5780 }, { "epoch": 1.603161397670549, "grad_norm": 0.2116711288690567, "learning_rate": 2.4185355782732205e-06, "loss": 0.4702, "step": 5781 }, { "epoch": 1.6034387132556849, "grad_norm": 0.1949220895767212, "learning_rate": 2.4152721089886933e-06, "loss": 0.4611, "step": 5782 }, { "epoch": 1.6037160288408208, "grad_norm": 0.20025965571403503, "learning_rate": 2.412010607501765e-06, "loss": 0.5017, "step": 5783 }, { "epoch": 1.6039933444259566, "grad_norm": 0.22020164132118225, "learning_rate": 2.4087510744488465e-06, "loss": 0.4937, "step": 5784 }, { "epoch": 1.6042706600110925, "grad_norm": 0.21315798163414001, "learning_rate": 2.4054935104659533e-06, "loss": 0.4847, "step": 5785 }, { "epoch": 1.6045479755962284, "grad_norm": 0.22190795838832855, "learning_rate": 2.4022379161887265e-06, "loss": 0.4754, "step": 5786 }, { "epoch": 1.6048252911813643, "grad_norm": 0.21109223365783691, "learning_rate": 2.3989842922524154e-06, "loss": 0.5015, "step": 5787 }, { "epoch": 1.6051026067665002, "grad_norm": 0.21155421435832977, "learning_rate": 2.3957326392918906e-06, "loss": 0.4992, "step": 5788 }, { "epoch": 1.605379922351636, "grad_norm": 0.2093130350112915, "learning_rate": 2.392482957941637e-06, "loss": 0.5007, "step": 5789 }, { "epoch": 1.605657237936772, "grad_norm": 0.20884621143341064, "learning_rate": 2.389235248835754e-06, "loss": 0.5155, "step": 5790 }, { "epoch": 1.6059345535219078, "grad_norm": 0.20897847414016724, "learning_rate": 2.385989512607946e-06, "loss": 0.5158, "step": 5791 }, { "epoch": 1.6062118691070437, "grad_norm": 0.2077287882566452, "learning_rate": 2.382745749891556e-06, "loss": 0.4918, "step": 5792 }, { "epoch": 1.6064891846921796, "grad_norm": 0.2046043872833252, "learning_rate": 2.379503961319522e-06, "loss": 0.4631, "step": 5793 }, { "epoch": 1.6067665002773155, "grad_norm": 0.22556503117084503, "learning_rate": 2.3762641475244e-06, "loss": 0.4901, "step": 5794 }, { "epoch": 1.6070438158624514, "grad_norm": 0.20669564604759216, "learning_rate": 2.3730263091383654e-06, "loss": 0.5145, "step": 5795 }, { "epoch": 1.6073211314475873, "grad_norm": 0.20489123463630676, "learning_rate": 2.3697904467932104e-06, "loss": 0.49, "step": 5796 }, { "epoch": 1.6075984470327231, "grad_norm": 0.2056889533996582, "learning_rate": 2.366556561120334e-06, "loss": 0.5178, "step": 5797 }, { "epoch": 1.607875762617859, "grad_norm": 0.22281832993030548, "learning_rate": 2.3633246527507507e-06, "loss": 0.4962, "step": 5798 }, { "epoch": 1.608153078202995, "grad_norm": 0.20517009496688843, "learning_rate": 2.3600947223150926e-06, "loss": 0.517, "step": 5799 }, { "epoch": 1.6084303937881308, "grad_norm": 0.2156504988670349, "learning_rate": 2.3568667704436096e-06, "loss": 0.4901, "step": 5800 }, { "epoch": 1.6087077093732667, "grad_norm": 0.20395740866661072, "learning_rate": 2.3536407977661573e-06, "loss": 0.4848, "step": 5801 }, { "epoch": 1.6089850249584026, "grad_norm": 0.20527851581573486, "learning_rate": 2.3504168049122006e-06, "loss": 0.5139, "step": 5802 }, { "epoch": 1.6092623405435384, "grad_norm": 0.20943453907966614, "learning_rate": 2.34719479251084e-06, "loss": 0.5047, "step": 5803 }, { "epoch": 1.6095396561286743, "grad_norm": 0.19371294975280762, "learning_rate": 2.34397476119077e-06, "loss": 0.4913, "step": 5804 }, { "epoch": 1.6098169717138102, "grad_norm": 0.20842455327510834, "learning_rate": 2.3407567115802983e-06, "loss": 0.5115, "step": 5805 }, { "epoch": 1.610094287298946, "grad_norm": 0.21230155229568481, "learning_rate": 2.337540644307358e-06, "loss": 0.4878, "step": 5806 }, { "epoch": 1.610371602884082, "grad_norm": 0.20435665547847748, "learning_rate": 2.334326559999489e-06, "loss": 0.5108, "step": 5807 }, { "epoch": 1.6106489184692179, "grad_norm": 0.20020738244056702, "learning_rate": 2.3311144592838425e-06, "loss": 0.4754, "step": 5808 }, { "epoch": 1.6109262340543538, "grad_norm": 0.20572920143604279, "learning_rate": 2.327904342787182e-06, "loss": 0.5035, "step": 5809 }, { "epoch": 1.6112035496394896, "grad_norm": 0.2309185266494751, "learning_rate": 2.324696211135889e-06, "loss": 0.5065, "step": 5810 }, { "epoch": 1.6114808652246255, "grad_norm": 0.22252123057842255, "learning_rate": 2.3214900649559572e-06, "loss": 0.5174, "step": 5811 }, { "epoch": 1.6117581808097614, "grad_norm": 0.1993841528892517, "learning_rate": 2.3182859048729856e-06, "loss": 0.4873, "step": 5812 }, { "epoch": 1.6120354963948973, "grad_norm": 0.2186397761106491, "learning_rate": 2.3150837315121966e-06, "loss": 0.5177, "step": 5813 }, { "epoch": 1.6123128119800332, "grad_norm": 0.21579131484031677, "learning_rate": 2.3118835454984126e-06, "loss": 0.4963, "step": 5814 }, { "epoch": 1.612590127565169, "grad_norm": 0.23498544096946716, "learning_rate": 2.3086853474560814e-06, "loss": 0.4963, "step": 5815 }, { "epoch": 1.612867443150305, "grad_norm": 0.20521143078804016, "learning_rate": 2.305489138009248e-06, "loss": 0.4848, "step": 5816 }, { "epoch": 1.6131447587354408, "grad_norm": 0.1934400200843811, "learning_rate": 2.302294917781583e-06, "loss": 0.4906, "step": 5817 }, { "epoch": 1.6134220743205767, "grad_norm": 0.20979085564613342, "learning_rate": 2.2991026873963676e-06, "loss": 0.4736, "step": 5818 }, { "epoch": 1.6136993899057126, "grad_norm": 0.2139722853899002, "learning_rate": 2.295912447476481e-06, "loss": 0.4949, "step": 5819 }, { "epoch": 1.6139767054908485, "grad_norm": 0.21068985760211945, "learning_rate": 2.2927241986444303e-06, "loss": 0.4978, "step": 5820 }, { "epoch": 1.6142540210759844, "grad_norm": 0.20163820683956146, "learning_rate": 2.289537941522324e-06, "loss": 0.5178, "step": 5821 }, { "epoch": 1.6145313366611203, "grad_norm": 0.20904479920864105, "learning_rate": 2.286353676731888e-06, "loss": 0.5101, "step": 5822 }, { "epoch": 1.6148086522462561, "grad_norm": 0.2054595649242401, "learning_rate": 2.283171404894452e-06, "loss": 0.4829, "step": 5823 }, { "epoch": 1.615085967831392, "grad_norm": 0.21114782989025116, "learning_rate": 2.279991126630969e-06, "loss": 0.4995, "step": 5824 }, { "epoch": 1.615363283416528, "grad_norm": 0.20004698634147644, "learning_rate": 2.2768128425619858e-06, "loss": 0.4823, "step": 5825 }, { "epoch": 1.6156405990016638, "grad_norm": 0.21248266100883484, "learning_rate": 2.273636553307677e-06, "loss": 0.5148, "step": 5826 }, { "epoch": 1.6159179145867997, "grad_norm": 0.21467168629169464, "learning_rate": 2.2704622594878225e-06, "loss": 0.4923, "step": 5827 }, { "epoch": 1.6161952301719356, "grad_norm": 0.21232888102531433, "learning_rate": 2.2672899617218065e-06, "loss": 0.5014, "step": 5828 }, { "epoch": 1.6164725457570714, "grad_norm": 0.2272992879152298, "learning_rate": 2.2641196606286274e-06, "loss": 0.5026, "step": 5829 }, { "epoch": 1.6167498613422073, "grad_norm": 0.2556884288787842, "learning_rate": 2.2609513568268958e-06, "loss": 0.5019, "step": 5830 }, { "epoch": 1.6170271769273432, "grad_norm": 0.2090773582458496, "learning_rate": 2.257785050934838e-06, "loss": 0.5294, "step": 5831 }, { "epoch": 1.617304492512479, "grad_norm": 0.2042931616306305, "learning_rate": 2.2546207435702738e-06, "loss": 0.507, "step": 5832 }, { "epoch": 1.617581808097615, "grad_norm": 0.21114638447761536, "learning_rate": 2.2514584353506505e-06, "loss": 0.4889, "step": 5833 }, { "epoch": 1.6178591236827509, "grad_norm": 0.21107198297977448, "learning_rate": 2.2482981268930183e-06, "loss": 0.4965, "step": 5834 }, { "epoch": 1.6181364392678868, "grad_norm": 0.22077830135822296, "learning_rate": 2.2451398188140365e-06, "loss": 0.4955, "step": 5835 }, { "epoch": 1.6184137548530226, "grad_norm": 0.20404209196567535, "learning_rate": 2.2419835117299682e-06, "loss": 0.4862, "step": 5836 }, { "epoch": 1.6186910704381585, "grad_norm": 0.2278210073709488, "learning_rate": 2.2388292062567e-06, "loss": 0.4899, "step": 5837 }, { "epoch": 1.6189683860232944, "grad_norm": 0.20038627088069916, "learning_rate": 2.2356769030097203e-06, "loss": 0.4979, "step": 5838 }, { "epoch": 1.6192457016084303, "grad_norm": 0.2179410755634308, "learning_rate": 2.232526602604125e-06, "loss": 0.4971, "step": 5839 }, { "epoch": 1.6195230171935662, "grad_norm": 0.2113763988018036, "learning_rate": 2.2293783056546156e-06, "loss": 0.5079, "step": 5840 }, { "epoch": 1.619800332778702, "grad_norm": 0.2023729383945465, "learning_rate": 2.2262320127755184e-06, "loss": 0.5081, "step": 5841 }, { "epoch": 1.620077648363838, "grad_norm": 0.2042120099067688, "learning_rate": 2.2230877245807553e-06, "loss": 0.4965, "step": 5842 }, { "epoch": 1.6203549639489738, "grad_norm": 0.21033427119255066, "learning_rate": 2.2199454416838585e-06, "loss": 0.4882, "step": 5843 }, { "epoch": 1.6206322795341097, "grad_norm": 0.2538556456565857, "learning_rate": 2.2168051646979647e-06, "loss": 0.4803, "step": 5844 }, { "epoch": 1.6209095951192456, "grad_norm": 0.20632927119731903, "learning_rate": 2.2136668942358373e-06, "loss": 0.4971, "step": 5845 }, { "epoch": 1.6211869107043815, "grad_norm": 0.20020337402820587, "learning_rate": 2.210530630909831e-06, "loss": 0.4935, "step": 5846 }, { "epoch": 1.6214642262895174, "grad_norm": 0.21856513619422913, "learning_rate": 2.2073963753319076e-06, "loss": 0.4761, "step": 5847 }, { "epoch": 1.6217415418746532, "grad_norm": 0.2104158103466034, "learning_rate": 2.2042641281136508e-06, "loss": 0.4971, "step": 5848 }, { "epoch": 1.6220188574597891, "grad_norm": 0.20630767941474915, "learning_rate": 2.2011338898662458e-06, "loss": 0.4878, "step": 5849 }, { "epoch": 1.622296173044925, "grad_norm": 0.20518629252910614, "learning_rate": 2.198005661200478e-06, "loss": 0.5087, "step": 5850 }, { "epoch": 1.622573488630061, "grad_norm": 0.25231507420539856, "learning_rate": 2.1948794427267565e-06, "loss": 0.514, "step": 5851 }, { "epoch": 1.6228508042151968, "grad_norm": 0.21174240112304688, "learning_rate": 2.1917552350550803e-06, "loss": 0.4977, "step": 5852 }, { "epoch": 1.6231281198003327, "grad_norm": 0.2215232104063034, "learning_rate": 2.1886330387950737e-06, "loss": 0.4799, "step": 5853 }, { "epoch": 1.6234054353854686, "grad_norm": 0.2032269835472107, "learning_rate": 2.1855128545559517e-06, "loss": 0.4942, "step": 5854 }, { "epoch": 1.6236827509706044, "grad_norm": 0.2067827433347702, "learning_rate": 2.1823946829465496e-06, "loss": 0.5164, "step": 5855 }, { "epoch": 1.6239600665557403, "grad_norm": 0.19948525726795197, "learning_rate": 2.1792785245753082e-06, "loss": 0.4838, "step": 5856 }, { "epoch": 1.6242373821408762, "grad_norm": 0.21956472098827362, "learning_rate": 2.176164380050265e-06, "loss": 0.4999, "step": 5857 }, { "epoch": 1.624514697726012, "grad_norm": 0.20982100069522858, "learning_rate": 2.1730522499790793e-06, "loss": 0.4661, "step": 5858 }, { "epoch": 1.624792013311148, "grad_norm": 0.20440787076950073, "learning_rate": 2.169942134969004e-06, "loss": 0.4907, "step": 5859 }, { "epoch": 1.6250693288962839, "grad_norm": 0.21445617079734802, "learning_rate": 2.1668340356269107e-06, "loss": 0.5016, "step": 5860 }, { "epoch": 1.6253466444814197, "grad_norm": 0.25135523080825806, "learning_rate": 2.163727952559266e-06, "loss": 0.4958, "step": 5861 }, { "epoch": 1.6256239600665556, "grad_norm": 0.20553235709667206, "learning_rate": 2.1606238863721568e-06, "loss": 0.4758, "step": 5862 }, { "epoch": 1.6259012756516915, "grad_norm": 0.1968127191066742, "learning_rate": 2.157521837671259e-06, "loss": 0.4919, "step": 5863 }, { "epoch": 1.6261785912368274, "grad_norm": 0.28879934549331665, "learning_rate": 2.1544218070618695e-06, "loss": 0.4916, "step": 5864 }, { "epoch": 1.6264559068219633, "grad_norm": 0.2082158625125885, "learning_rate": 2.1513237951488907e-06, "loss": 0.4969, "step": 5865 }, { "epoch": 1.6267332224070992, "grad_norm": 0.20625808835029602, "learning_rate": 2.1482278025368214e-06, "loss": 0.4714, "step": 5866 }, { "epoch": 1.627010537992235, "grad_norm": 0.2138909250497818, "learning_rate": 2.1451338298297706e-06, "loss": 0.4911, "step": 5867 }, { "epoch": 1.627287853577371, "grad_norm": 0.20718157291412354, "learning_rate": 2.1420418776314565e-06, "loss": 0.4918, "step": 5868 }, { "epoch": 1.6275651691625068, "grad_norm": 0.2110764980316162, "learning_rate": 2.1389519465452035e-06, "loss": 0.4946, "step": 5869 }, { "epoch": 1.6278424847476427, "grad_norm": 0.21094083786010742, "learning_rate": 2.135864037173933e-06, "loss": 0.4799, "step": 5870 }, { "epoch": 1.6281198003327786, "grad_norm": 0.20451469719409943, "learning_rate": 2.132778150120182e-06, "loss": 0.4805, "step": 5871 }, { "epoch": 1.6283971159179145, "grad_norm": 0.20834794640541077, "learning_rate": 2.129694285986092e-06, "loss": 0.5009, "step": 5872 }, { "epoch": 1.6286744315030504, "grad_norm": 0.20821170508861542, "learning_rate": 2.126612445373402e-06, "loss": 0.5267, "step": 5873 }, { "epoch": 1.6289517470881862, "grad_norm": 0.2115286886692047, "learning_rate": 2.1235326288834595e-06, "loss": 0.5084, "step": 5874 }, { "epoch": 1.6292290626733221, "grad_norm": 0.2055329829454422, "learning_rate": 2.1204548371172194e-06, "loss": 0.4826, "step": 5875 }, { "epoch": 1.629506378258458, "grad_norm": 0.20218388736248016, "learning_rate": 2.117379070675245e-06, "loss": 0.5188, "step": 5876 }, { "epoch": 1.629783693843594, "grad_norm": 0.22304154932498932, "learning_rate": 2.1143053301576954e-06, "loss": 0.5008, "step": 5877 }, { "epoch": 1.6300610094287298, "grad_norm": 0.1911652535200119, "learning_rate": 2.1112336161643347e-06, "loss": 0.4933, "step": 5878 }, { "epoch": 1.6303383250138657, "grad_norm": 0.20941410958766937, "learning_rate": 2.108163929294546e-06, "loss": 0.5065, "step": 5879 }, { "epoch": 1.6306156405990015, "grad_norm": 0.20953522622585297, "learning_rate": 2.1050962701473014e-06, "loss": 0.4801, "step": 5880 }, { "epoch": 1.6308929561841374, "grad_norm": 0.2114289551973343, "learning_rate": 2.102030639321183e-06, "loss": 0.4981, "step": 5881 }, { "epoch": 1.6311702717692733, "grad_norm": 0.21842321753501892, "learning_rate": 2.0989670374143693e-06, "loss": 0.5037, "step": 5882 }, { "epoch": 1.6314475873544092, "grad_norm": 0.2003999501466751, "learning_rate": 2.0959054650246626e-06, "loss": 0.4975, "step": 5883 }, { "epoch": 1.631724902939545, "grad_norm": 0.21246321499347687, "learning_rate": 2.0928459227494505e-06, "loss": 0.4951, "step": 5884 }, { "epoch": 1.632002218524681, "grad_norm": 0.1984589397907257, "learning_rate": 2.0897884111857292e-06, "loss": 0.4977, "step": 5885 }, { "epoch": 1.6322795341098169, "grad_norm": 0.20242911577224731, "learning_rate": 2.086732930930102e-06, "loss": 0.4695, "step": 5886 }, { "epoch": 1.6325568496949527, "grad_norm": 0.2037610560655594, "learning_rate": 2.0836794825787763e-06, "loss": 0.4774, "step": 5887 }, { "epoch": 1.6328341652800886, "grad_norm": 0.23012907803058624, "learning_rate": 2.0806280667275594e-06, "loss": 0.4965, "step": 5888 }, { "epoch": 1.6331114808652245, "grad_norm": 0.21143199503421783, "learning_rate": 2.0775786839718607e-06, "loss": 0.4985, "step": 5889 }, { "epoch": 1.6333887964503604, "grad_norm": 0.20616495609283447, "learning_rate": 2.074531334906696e-06, "loss": 0.5273, "step": 5890 }, { "epoch": 1.6336661120354963, "grad_norm": 0.2136596441268921, "learning_rate": 2.0714860201266895e-06, "loss": 0.4846, "step": 5891 }, { "epoch": 1.6339434276206322, "grad_norm": 0.22074300050735474, "learning_rate": 2.068442740226055e-06, "loss": 0.5065, "step": 5892 }, { "epoch": 1.634220743205768, "grad_norm": 0.20592841506004333, "learning_rate": 2.065401495798622e-06, "loss": 0.4774, "step": 5893 }, { "epoch": 1.634498058790904, "grad_norm": 0.20553089678287506, "learning_rate": 2.062362287437818e-06, "loss": 0.4521, "step": 5894 }, { "epoch": 1.6347753743760398, "grad_norm": 0.21127671003341675, "learning_rate": 2.0593251157366727e-06, "loss": 0.5082, "step": 5895 }, { "epoch": 1.6350526899611757, "grad_norm": 0.20608097314834595, "learning_rate": 2.056289981287815e-06, "loss": 0.4999, "step": 5896 }, { "epoch": 1.6353300055463116, "grad_norm": 0.20263780653476715, "learning_rate": 2.0532568846834825e-06, "loss": 0.469, "step": 5897 }, { "epoch": 1.6356073211314475, "grad_norm": 0.2072770744562149, "learning_rate": 2.050225826515516e-06, "loss": 0.476, "step": 5898 }, { "epoch": 1.6358846367165834, "grad_norm": 0.20764364302158356, "learning_rate": 2.0471968073753483e-06, "loss": 0.4831, "step": 5899 }, { "epoch": 1.6361619523017192, "grad_norm": 0.20213009417057037, "learning_rate": 2.0441698278540296e-06, "loss": 0.4584, "step": 5900 }, { "epoch": 1.6364392678868551, "grad_norm": 0.19651676714420319, "learning_rate": 2.041144888542196e-06, "loss": 0.4947, "step": 5901 }, { "epoch": 1.636716583471991, "grad_norm": 0.20257742702960968, "learning_rate": 2.0381219900300993e-06, "loss": 0.5024, "step": 5902 }, { "epoch": 1.6369938990571269, "grad_norm": 0.20740802586078644, "learning_rate": 2.0351011329075823e-06, "loss": 0.5088, "step": 5903 }, { "epoch": 1.6372712146422628, "grad_norm": 0.21403908729553223, "learning_rate": 2.0320823177641e-06, "loss": 0.4876, "step": 5904 }, { "epoch": 1.6375485302273987, "grad_norm": 0.2182864248752594, "learning_rate": 2.0290655451886965e-06, "loss": 0.4772, "step": 5905 }, { "epoch": 1.6378258458125345, "grad_norm": 0.21262075006961823, "learning_rate": 2.0260508157700266e-06, "loss": 0.4797, "step": 5906 }, { "epoch": 1.6381031613976704, "grad_norm": 0.21089348196983337, "learning_rate": 2.023038130096347e-06, "loss": 0.4934, "step": 5907 }, { "epoch": 1.6383804769828063, "grad_norm": 0.2085971236228943, "learning_rate": 2.020027488755509e-06, "loss": 0.4815, "step": 5908 }, { "epoch": 1.6386577925679422, "grad_norm": 0.2214841991662979, "learning_rate": 2.017018892334971e-06, "loss": 0.5057, "step": 5909 }, { "epoch": 1.638935108153078, "grad_norm": 0.2010078728199005, "learning_rate": 2.0140123414217867e-06, "loss": 0.4997, "step": 5910 }, { "epoch": 1.639212423738214, "grad_norm": 0.21052546799182892, "learning_rate": 2.0110078366026173e-06, "loss": 0.4891, "step": 5911 }, { "epoch": 1.6394897393233498, "grad_norm": 0.2032831907272339, "learning_rate": 2.008005378463716e-06, "loss": 0.5115, "step": 5912 }, { "epoch": 1.6397670549084857, "grad_norm": 0.20649638772010803, "learning_rate": 2.0050049675909467e-06, "loss": 0.4735, "step": 5913 }, { "epoch": 1.6400443704936216, "grad_norm": 0.20801763236522675, "learning_rate": 2.0020066045697714e-06, "loss": 0.4761, "step": 5914 }, { "epoch": 1.6403216860787575, "grad_norm": 0.19369389116764069, "learning_rate": 1.999010289985247e-06, "loss": 0.5107, "step": 5915 }, { "epoch": 1.6405990016638934, "grad_norm": 0.20587508380413055, "learning_rate": 1.9960160244220263e-06, "loss": 0.4925, "step": 5916 }, { "epoch": 1.6408763172490293, "grad_norm": 0.20227836072444916, "learning_rate": 1.993023808464382e-06, "loss": 0.5093, "step": 5917 }, { "epoch": 1.6411536328341652, "grad_norm": 0.18990160524845123, "learning_rate": 1.990033642696172e-06, "loss": 0.4542, "step": 5918 }, { "epoch": 1.641430948419301, "grad_norm": 0.20094534754753113, "learning_rate": 1.9870455277008536e-06, "loss": 0.4991, "step": 5919 }, { "epoch": 1.641708264004437, "grad_norm": 0.22286152839660645, "learning_rate": 1.9840594640614816e-06, "loss": 0.5301, "step": 5920 }, { "epoch": 1.6419855795895728, "grad_norm": 0.19467279314994812, "learning_rate": 1.9810754523607296e-06, "loss": 0.4759, "step": 5921 }, { "epoch": 1.6422628951747087, "grad_norm": 0.21738329529762268, "learning_rate": 1.9780934931808506e-06, "loss": 0.4728, "step": 5922 }, { "epoch": 1.6425402107598446, "grad_norm": 0.21225155889987946, "learning_rate": 1.9751135871036995e-06, "loss": 0.4957, "step": 5923 }, { "epoch": 1.6428175263449805, "grad_norm": 0.20787325501441956, "learning_rate": 1.9721357347107406e-06, "loss": 0.4905, "step": 5924 }, { "epoch": 1.6430948419301163, "grad_norm": 0.21133233606815338, "learning_rate": 1.9691599365830328e-06, "loss": 0.4878, "step": 5925 }, { "epoch": 1.6433721575152522, "grad_norm": 0.19063884019851685, "learning_rate": 1.966186193301231e-06, "loss": 0.4926, "step": 5926 }, { "epoch": 1.6436494731003881, "grad_norm": 0.2176242172718048, "learning_rate": 1.9632145054455873e-06, "loss": 0.4622, "step": 5927 }, { "epoch": 1.643926788685524, "grad_norm": 0.2013779729604721, "learning_rate": 1.960244873595961e-06, "loss": 0.4814, "step": 5928 }, { "epoch": 1.6442041042706599, "grad_norm": 0.21530881524085999, "learning_rate": 1.957277298331808e-06, "loss": 0.5058, "step": 5929 }, { "epoch": 1.6444814198557958, "grad_norm": 0.20872762799263, "learning_rate": 1.9543117802321764e-06, "loss": 0.4875, "step": 5930 }, { "epoch": 1.6447587354409317, "grad_norm": 0.21378107368946075, "learning_rate": 1.9513483198757176e-06, "loss": 0.4703, "step": 5931 }, { "epoch": 1.6450360510260675, "grad_norm": 0.22160130739212036, "learning_rate": 1.9483869178406875e-06, "loss": 0.4991, "step": 5932 }, { "epoch": 1.6453133666112034, "grad_norm": 0.21556591987609863, "learning_rate": 1.945427574704928e-06, "loss": 0.5146, "step": 5933 }, { "epoch": 1.6455906821963393, "grad_norm": 0.2028489112854004, "learning_rate": 1.9424702910458837e-06, "loss": 0.5054, "step": 5934 }, { "epoch": 1.6458679977814752, "grad_norm": 0.2313559353351593, "learning_rate": 1.939515067440603e-06, "loss": 0.5166, "step": 5935 }, { "epoch": 1.646145313366611, "grad_norm": 0.20201341807842255, "learning_rate": 1.9365619044657306e-06, "loss": 0.478, "step": 5936 }, { "epoch": 1.646422628951747, "grad_norm": 0.20596764981746674, "learning_rate": 1.9336108026975e-06, "loss": 0.4794, "step": 5937 }, { "epoch": 1.6466999445368828, "grad_norm": 0.20622918009757996, "learning_rate": 1.9306617627117567e-06, "loss": 0.4991, "step": 5938 }, { "epoch": 1.6469772601220187, "grad_norm": 0.20188166201114655, "learning_rate": 1.927714785083928e-06, "loss": 0.5167, "step": 5939 }, { "epoch": 1.6472545757071546, "grad_norm": 0.2264070063829422, "learning_rate": 1.9247698703890566e-06, "loss": 0.5025, "step": 5940 }, { "epoch": 1.6475318912922905, "grad_norm": 0.20895101130008698, "learning_rate": 1.921827019201766e-06, "loss": 0.5183, "step": 5941 }, { "epoch": 1.6478092068774264, "grad_norm": 0.2241641879081726, "learning_rate": 1.91888623209629e-06, "loss": 0.4804, "step": 5942 }, { "epoch": 1.6480865224625623, "grad_norm": 0.1995384395122528, "learning_rate": 1.9159475096464484e-06, "loss": 0.4742, "step": 5943 }, { "epoch": 1.6483638380476981, "grad_norm": 0.213314950466156, "learning_rate": 1.9130108524256672e-06, "loss": 0.4683, "step": 5944 }, { "epoch": 1.648641153632834, "grad_norm": 0.220163494348526, "learning_rate": 1.9100762610069684e-06, "loss": 0.5043, "step": 5945 }, { "epoch": 1.64891846921797, "grad_norm": 0.2084074467420578, "learning_rate": 1.907143735962963e-06, "loss": 0.4717, "step": 5946 }, { "epoch": 1.6491957848031058, "grad_norm": 0.21064861118793488, "learning_rate": 1.9042132778658698e-06, "loss": 0.4935, "step": 5947 }, { "epoch": 1.6494731003882417, "grad_norm": 0.20477135479450226, "learning_rate": 1.9012848872874938e-06, "loss": 0.4861, "step": 5948 }, { "epoch": 1.6497504159733776, "grad_norm": 0.21649906039237976, "learning_rate": 1.8983585647992463e-06, "loss": 0.4822, "step": 5949 }, { "epoch": 1.6500277315585135, "grad_norm": 0.21001774072647095, "learning_rate": 1.8954343109721245e-06, "loss": 0.4892, "step": 5950 }, { "epoch": 1.6503050471436493, "grad_norm": 0.19450335204601288, "learning_rate": 1.8925121263767317e-06, "loss": 0.4973, "step": 5951 }, { "epoch": 1.6505823627287852, "grad_norm": 0.21450437605381012, "learning_rate": 1.8895920115832675e-06, "loss": 0.4925, "step": 5952 }, { "epoch": 1.650859678313921, "grad_norm": 0.21557024121284485, "learning_rate": 1.8866739671615175e-06, "loss": 0.4697, "step": 5953 }, { "epoch": 1.651136993899057, "grad_norm": 0.21621811389923096, "learning_rate": 1.8837579936808695e-06, "loss": 0.4718, "step": 5954 }, { "epoch": 1.6514143094841929, "grad_norm": 0.20242778956890106, "learning_rate": 1.8808440917103085e-06, "loss": 0.4827, "step": 5955 }, { "epoch": 1.6516916250693288, "grad_norm": 0.21023069322109222, "learning_rate": 1.8779322618184173e-06, "loss": 0.514, "step": 5956 }, { "epoch": 1.6519689406544646, "grad_norm": 0.20898282527923584, "learning_rate": 1.8750225045733678e-06, "loss": 0.5226, "step": 5957 }, { "epoch": 1.6522462562396005, "grad_norm": 0.21227650344371796, "learning_rate": 1.872114820542925e-06, "loss": 0.5026, "step": 5958 }, { "epoch": 1.6525235718247364, "grad_norm": 0.21009580790996552, "learning_rate": 1.8692092102944674e-06, "loss": 0.4846, "step": 5959 }, { "epoch": 1.6528008874098723, "grad_norm": 0.2119254618883133, "learning_rate": 1.8663056743949512e-06, "loss": 0.4779, "step": 5960 }, { "epoch": 1.6530782029950082, "grad_norm": 0.21257485449314117, "learning_rate": 1.8634042134109285e-06, "loss": 0.5017, "step": 5961 }, { "epoch": 1.653355518580144, "grad_norm": 0.216257706284523, "learning_rate": 1.860504827908556e-06, "loss": 0.5239, "step": 5962 }, { "epoch": 1.65363283416528, "grad_norm": 0.20020192861557007, "learning_rate": 1.8576075184535815e-06, "loss": 0.5074, "step": 5963 }, { "epoch": 1.6539101497504158, "grad_norm": 0.21256963908672333, "learning_rate": 1.8547122856113458e-06, "loss": 0.5388, "step": 5964 }, { "epoch": 1.6541874653355517, "grad_norm": 0.21079207956790924, "learning_rate": 1.8518191299467815e-06, "loss": 0.5012, "step": 5965 }, { "epoch": 1.6544647809206876, "grad_norm": 0.21106182038784027, "learning_rate": 1.8489280520244235e-06, "loss": 0.4973, "step": 5966 }, { "epoch": 1.6547420965058235, "grad_norm": 0.2153145670890808, "learning_rate": 1.8460390524083992e-06, "loss": 0.5018, "step": 5967 }, { "epoch": 1.6550194120909594, "grad_norm": 0.2027408331632614, "learning_rate": 1.843152131662429e-06, "loss": 0.4777, "step": 5968 }, { "epoch": 1.6552967276760953, "grad_norm": 0.2015599012374878, "learning_rate": 1.8402672903498188e-06, "loss": 0.4992, "step": 5969 }, { "epoch": 1.6555740432612311, "grad_norm": 0.24570131301879883, "learning_rate": 1.8373845290334896e-06, "loss": 0.5061, "step": 5970 }, { "epoch": 1.655851358846367, "grad_norm": 0.21255463361740112, "learning_rate": 1.834503848275941e-06, "loss": 0.4945, "step": 5971 }, { "epoch": 1.656128674431503, "grad_norm": 0.21138375997543335, "learning_rate": 1.8316252486392654e-06, "loss": 0.4811, "step": 5972 }, { "epoch": 1.6564059900166388, "grad_norm": 0.2128453552722931, "learning_rate": 1.8287487306851564e-06, "loss": 0.4978, "step": 5973 }, { "epoch": 1.6566833056017747, "grad_norm": 0.20760896801948547, "learning_rate": 1.8258742949749024e-06, "loss": 0.4547, "step": 5974 }, { "epoch": 1.6569606211869106, "grad_norm": 0.2109159529209137, "learning_rate": 1.8230019420693758e-06, "loss": 0.4891, "step": 5975 }, { "epoch": 1.6572379367720464, "grad_norm": 0.21184983849525452, "learning_rate": 1.820131672529056e-06, "loss": 0.4846, "step": 5976 }, { "epoch": 1.6575152523571823, "grad_norm": 0.20627717673778534, "learning_rate": 1.8172634869140018e-06, "loss": 0.4988, "step": 5977 }, { "epoch": 1.6577925679423182, "grad_norm": 0.1983606368303299, "learning_rate": 1.8143973857838768e-06, "loss": 0.4799, "step": 5978 }, { "epoch": 1.658069883527454, "grad_norm": 0.2059786021709442, "learning_rate": 1.8115333696979293e-06, "loss": 0.4843, "step": 5979 }, { "epoch": 1.65834719911259, "grad_norm": 0.20751167833805084, "learning_rate": 1.8086714392150096e-06, "loss": 0.4832, "step": 5980 }, { "epoch": 1.6586245146977259, "grad_norm": 0.20422019064426422, "learning_rate": 1.8058115948935514e-06, "loss": 0.488, "step": 5981 }, { "epoch": 1.6589018302828618, "grad_norm": 0.2069856822490692, "learning_rate": 1.8029538372915878e-06, "loss": 0.5008, "step": 5982 }, { "epoch": 1.6591791458679976, "grad_norm": 0.21730239689350128, "learning_rate": 1.800098166966746e-06, "loss": 0.4896, "step": 5983 }, { "epoch": 1.6594564614531335, "grad_norm": 0.20635366439819336, "learning_rate": 1.7972445844762376e-06, "loss": 0.4924, "step": 5984 }, { "epoch": 1.6597337770382694, "grad_norm": 0.21054959297180176, "learning_rate": 1.794393090376878e-06, "loss": 0.4737, "step": 5985 }, { "epoch": 1.6600110926234053, "grad_norm": 0.22848398983478546, "learning_rate": 1.7915436852250625e-06, "loss": 0.5273, "step": 5986 }, { "epoch": 1.6602884082085412, "grad_norm": 0.20154784619808197, "learning_rate": 1.7886963695767921e-06, "loss": 0.487, "step": 5987 }, { "epoch": 1.660565723793677, "grad_norm": 0.213270902633667, "learning_rate": 1.7858511439876491e-06, "loss": 0.4962, "step": 5988 }, { "epoch": 1.660843039378813, "grad_norm": 0.21345144510269165, "learning_rate": 1.7830080090128127e-06, "loss": 0.5156, "step": 5989 }, { "epoch": 1.6611203549639488, "grad_norm": 0.1976012885570526, "learning_rate": 1.7801669652070585e-06, "loss": 0.4916, "step": 5990 }, { "epoch": 1.6613976705490847, "grad_norm": 0.21380779147148132, "learning_rate": 1.7773280131247461e-06, "loss": 0.5073, "step": 5991 }, { "epoch": 1.6616749861342206, "grad_norm": 0.20816531777381897, "learning_rate": 1.7744911533198266e-06, "loss": 0.4932, "step": 5992 }, { "epoch": 1.6619523017193565, "grad_norm": 0.21241043508052826, "learning_rate": 1.771656386345852e-06, "loss": 0.4989, "step": 5993 }, { "epoch": 1.6622296173044924, "grad_norm": 0.2513413727283478, "learning_rate": 1.76882371275596e-06, "loss": 0.494, "step": 5994 }, { "epoch": 1.6625069328896283, "grad_norm": 0.20039229094982147, "learning_rate": 1.7659931331028777e-06, "loss": 0.5074, "step": 5995 }, { "epoch": 1.6627842484747641, "grad_norm": 0.1997574418783188, "learning_rate": 1.7631646479389224e-06, "loss": 0.5249, "step": 5996 }, { "epoch": 1.6630615640599, "grad_norm": 0.20580746233463287, "learning_rate": 1.7603382578160174e-06, "loss": 0.5056, "step": 5997 }, { "epoch": 1.663338879645036, "grad_norm": 0.21222086250782013, "learning_rate": 1.7575139632856604e-06, "loss": 0.4943, "step": 5998 }, { "epoch": 1.6636161952301718, "grad_norm": 0.21751612424850464, "learning_rate": 1.7546917648989428e-06, "loss": 0.4876, "step": 5999 }, { "epoch": 1.6638935108153077, "grad_norm": 0.20124460756778717, "learning_rate": 1.7518716632065544e-06, "loss": 0.4757, "step": 6000 }, { "epoch": 1.6641708264004436, "grad_norm": 0.20955534279346466, "learning_rate": 1.7490536587587716e-06, "loss": 0.5009, "step": 6001 }, { "epoch": 1.6644481419855794, "grad_norm": 0.21060660481452942, "learning_rate": 1.7462377521054633e-06, "loss": 0.5176, "step": 6002 }, { "epoch": 1.6647254575707153, "grad_norm": 0.2049666792154312, "learning_rate": 1.7434239437960797e-06, "loss": 0.5014, "step": 6003 }, { "epoch": 1.6650027731558512, "grad_norm": 0.2187904566526413, "learning_rate": 1.7406122343796766e-06, "loss": 0.4969, "step": 6004 }, { "epoch": 1.665280088740987, "grad_norm": 0.20527362823486328, "learning_rate": 1.737802624404894e-06, "loss": 0.469, "step": 6005 }, { "epoch": 1.665557404326123, "grad_norm": 0.21435390412807465, "learning_rate": 1.7349951144199572e-06, "loss": 0.5112, "step": 6006 }, { "epoch": 1.6658347199112589, "grad_norm": 0.2521720826625824, "learning_rate": 1.732189704972685e-06, "loss": 0.5026, "step": 6007 }, { "epoch": 1.6661120354963947, "grad_norm": 0.21735185384750366, "learning_rate": 1.7293863966104898e-06, "loss": 0.5284, "step": 6008 }, { "epoch": 1.6663893510815306, "grad_norm": 0.20416045188903809, "learning_rate": 1.7265851898803725e-06, "loss": 0.4943, "step": 6009 }, { "epoch": 1.6666666666666665, "grad_norm": 0.19750310480594635, "learning_rate": 1.7237860853289183e-06, "loss": 0.4829, "step": 6010 }, { "epoch": 1.6669439822518024, "grad_norm": 0.21233929693698883, "learning_rate": 1.7209890835023086e-06, "loss": 0.4907, "step": 6011 }, { "epoch": 1.6672212978369383, "grad_norm": 0.20838162302970886, "learning_rate": 1.7181941849463173e-06, "loss": 0.4875, "step": 6012 }, { "epoch": 1.6674986134220742, "grad_norm": 0.21194593608379364, "learning_rate": 1.7154013902062977e-06, "loss": 0.4902, "step": 6013 }, { "epoch": 1.66777592900721, "grad_norm": 0.208678737282753, "learning_rate": 1.7126106998271968e-06, "loss": 0.4778, "step": 6014 }, { "epoch": 1.668053244592346, "grad_norm": 0.2077416628599167, "learning_rate": 1.7098221143535557e-06, "loss": 0.4905, "step": 6015 }, { "epoch": 1.6683305601774818, "grad_norm": 0.21134360134601593, "learning_rate": 1.7070356343295026e-06, "loss": 0.4539, "step": 6016 }, { "epoch": 1.6686078757626177, "grad_norm": 0.21801994740962982, "learning_rate": 1.7042512602987489e-06, "loss": 0.4891, "step": 6017 }, { "epoch": 1.6688851913477536, "grad_norm": 0.21269913017749786, "learning_rate": 1.7014689928046056e-06, "loss": 0.4759, "step": 6018 }, { "epoch": 1.6691625069328895, "grad_norm": 0.21309658885002136, "learning_rate": 1.6986888323899594e-06, "loss": 0.477, "step": 6019 }, { "epoch": 1.6694398225180254, "grad_norm": 0.21096886694431305, "learning_rate": 1.6959107795973011e-06, "loss": 0.5187, "step": 6020 }, { "epoch": 1.6697171381031612, "grad_norm": 0.20901210606098175, "learning_rate": 1.693134834968696e-06, "loss": 0.4737, "step": 6021 }, { "epoch": 1.6699944536882971, "grad_norm": 0.21308885514736176, "learning_rate": 1.6903609990458063e-06, "loss": 0.4976, "step": 6022 }, { "epoch": 1.670271769273433, "grad_norm": 0.21009613573551178, "learning_rate": 1.6875892723698855e-06, "loss": 0.5045, "step": 6023 }, { "epoch": 1.670549084858569, "grad_norm": 0.2145100235939026, "learning_rate": 1.6848196554817633e-06, "loss": 0.4772, "step": 6024 }, { "epoch": 1.6708264004437048, "grad_norm": 0.21058829128742218, "learning_rate": 1.6820521489218728e-06, "loss": 0.5044, "step": 6025 }, { "epoch": 1.6711037160288407, "grad_norm": 0.21063855290412903, "learning_rate": 1.6792867532302207e-06, "loss": 0.5194, "step": 6026 }, { "epoch": 1.6713810316139766, "grad_norm": 0.21397702395915985, "learning_rate": 1.6765234689464157e-06, "loss": 0.4903, "step": 6027 }, { "epoch": 1.6716583471991124, "grad_norm": 0.20812270045280457, "learning_rate": 1.6737622966096405e-06, "loss": 0.4869, "step": 6028 }, { "epoch": 1.6719356627842483, "grad_norm": 0.19779033958911896, "learning_rate": 1.671003236758681e-06, "loss": 0.4884, "step": 6029 }, { "epoch": 1.6722129783693842, "grad_norm": 0.2069951891899109, "learning_rate": 1.6682462899318962e-06, "loss": 0.5154, "step": 6030 }, { "epoch": 1.67249029395452, "grad_norm": 0.20514649152755737, "learning_rate": 1.665491456667241e-06, "loss": 0.509, "step": 6031 }, { "epoch": 1.672767609539656, "grad_norm": 0.20265156030654907, "learning_rate": 1.6627387375022605e-06, "loss": 0.5008, "step": 6032 }, { "epoch": 1.6730449251247919, "grad_norm": 0.20257985591888428, "learning_rate": 1.6599881329740817e-06, "loss": 0.4914, "step": 6033 }, { "epoch": 1.6733222407099277, "grad_norm": 0.19979064166545868, "learning_rate": 1.657239643619414e-06, "loss": 0.477, "step": 6034 }, { "epoch": 1.6735995562950636, "grad_norm": 0.20505757629871368, "learning_rate": 1.6544932699745663e-06, "loss": 0.4945, "step": 6035 }, { "epoch": 1.6738768718801995, "grad_norm": 0.21343204379081726, "learning_rate": 1.6517490125754307e-06, "loss": 0.4913, "step": 6036 }, { "epoch": 1.6741541874653354, "grad_norm": 0.20039796829223633, "learning_rate": 1.6490068719574787e-06, "loss": 0.4874, "step": 6037 }, { "epoch": 1.6744315030504713, "grad_norm": 0.3097711503505707, "learning_rate": 1.646266848655778e-06, "loss": 0.464, "step": 6038 }, { "epoch": 1.6747088186356072, "grad_norm": 0.21286475658416748, "learning_rate": 1.6435289432049818e-06, "loss": 0.4889, "step": 6039 }, { "epoch": 1.674986134220743, "grad_norm": 0.2054137885570526, "learning_rate": 1.6407931561393253e-06, "loss": 0.5036, "step": 6040 }, { "epoch": 1.675263449805879, "grad_norm": 0.20179954171180725, "learning_rate": 1.638059487992631e-06, "loss": 0.5136, "step": 6041 }, { "epoch": 1.6755407653910148, "grad_norm": 0.21015551686286926, "learning_rate": 1.6353279392983117e-06, "loss": 0.4785, "step": 6042 }, { "epoch": 1.6758180809761507, "grad_norm": 0.2488449513912201, "learning_rate": 1.63259851058937e-06, "loss": 0.4889, "step": 6043 }, { "epoch": 1.6760953965612866, "grad_norm": 0.20739984512329102, "learning_rate": 1.6298712023983837e-06, "loss": 0.4849, "step": 6044 }, { "epoch": 1.6763727121464225, "grad_norm": 0.21058540046215057, "learning_rate": 1.627146015257522e-06, "loss": 0.522, "step": 6045 }, { "epoch": 1.6766500277315584, "grad_norm": 0.1949351280927658, "learning_rate": 1.6244229496985426e-06, "loss": 0.4794, "step": 6046 }, { "epoch": 1.6769273433166942, "grad_norm": 0.21308818459510803, "learning_rate": 1.6217020062527927e-06, "loss": 0.4913, "step": 6047 }, { "epoch": 1.6772046589018301, "grad_norm": 0.20299099385738373, "learning_rate": 1.6189831854511937e-06, "loss": 0.4901, "step": 6048 }, { "epoch": 1.677481974486966, "grad_norm": 0.21231001615524292, "learning_rate": 1.616266487824261e-06, "loss": 0.4885, "step": 6049 }, { "epoch": 1.677759290072102, "grad_norm": 0.20558898150920868, "learning_rate": 1.6135519139021005e-06, "loss": 0.5032, "step": 6050 }, { "epoch": 1.6780366056572378, "grad_norm": 0.2060549259185791, "learning_rate": 1.6108394642143907e-06, "loss": 0.5098, "step": 6051 }, { "epoch": 1.6783139212423737, "grad_norm": 0.20870618522167206, "learning_rate": 1.6081291392904027e-06, "loss": 0.4791, "step": 6052 }, { "epoch": 1.6785912368275095, "grad_norm": 0.20835687220096588, "learning_rate": 1.6054209396589929e-06, "loss": 0.4865, "step": 6053 }, { "epoch": 1.6788685524126454, "grad_norm": 0.2111913561820984, "learning_rate": 1.6027148658486077e-06, "loss": 0.4962, "step": 6054 }, { "epoch": 1.6791458679977813, "grad_norm": 0.21386343240737915, "learning_rate": 1.600010918387268e-06, "loss": 0.4999, "step": 6055 }, { "epoch": 1.6794231835829172, "grad_norm": 0.20479026436805725, "learning_rate": 1.5973090978025906e-06, "loss": 0.4723, "step": 6056 }, { "epoch": 1.679700499168053, "grad_norm": 0.20431126654148102, "learning_rate": 1.5946094046217664e-06, "loss": 0.4792, "step": 6057 }, { "epoch": 1.679977814753189, "grad_norm": 0.21851181983947754, "learning_rate": 1.5919118393715834e-06, "loss": 0.4986, "step": 6058 }, { "epoch": 1.6802551303383249, "grad_norm": 0.3242267668247223, "learning_rate": 1.5892164025784015e-06, "loss": 0.5032, "step": 6059 }, { "epoch": 1.6805324459234607, "grad_norm": 0.2177019715309143, "learning_rate": 1.5865230947681762e-06, "loss": 0.4851, "step": 6060 }, { "epoch": 1.6808097615085966, "grad_norm": 0.20672672986984253, "learning_rate": 1.5838319164664438e-06, "loss": 0.4711, "step": 6061 }, { "epoch": 1.6810870770937325, "grad_norm": 0.1982792168855667, "learning_rate": 1.581142868198321e-06, "loss": 0.466, "step": 6062 }, { "epoch": 1.6813643926788684, "grad_norm": 0.20962117612361908, "learning_rate": 1.5784559504885166e-06, "loss": 0.5069, "step": 6063 }, { "epoch": 1.6816417082640043, "grad_norm": 0.2054154872894287, "learning_rate": 1.5757711638613143e-06, "loss": 0.5134, "step": 6064 }, { "epoch": 1.6819190238491402, "grad_norm": 0.19651010632514954, "learning_rate": 1.5730885088405922e-06, "loss": 0.4967, "step": 6065 }, { "epoch": 1.682196339434276, "grad_norm": 0.20710954070091248, "learning_rate": 1.570407985949804e-06, "loss": 0.4772, "step": 6066 }, { "epoch": 1.682473655019412, "grad_norm": 0.2049458771944046, "learning_rate": 1.5677295957119934e-06, "loss": 0.4758, "step": 6067 }, { "epoch": 1.6827509706045478, "grad_norm": 0.21262648701667786, "learning_rate": 1.5650533386497801e-06, "loss": 0.5058, "step": 6068 }, { "epoch": 1.6830282861896837, "grad_norm": 0.20999044179916382, "learning_rate": 1.5623792152853783e-06, "loss": 0.5048, "step": 6069 }, { "epoch": 1.6833056017748196, "grad_norm": 0.21131351590156555, "learning_rate": 1.559707226140579e-06, "loss": 0.4729, "step": 6070 }, { "epoch": 1.6835829173599555, "grad_norm": 0.22507424652576447, "learning_rate": 1.5570373717367594e-06, "loss": 0.4815, "step": 6071 }, { "epoch": 1.6838602329450914, "grad_norm": 0.2068501114845276, "learning_rate": 1.5543696525948726e-06, "loss": 0.5036, "step": 6072 }, { "epoch": 1.6841375485302272, "grad_norm": 0.20177114009857178, "learning_rate": 1.551704069235467e-06, "loss": 0.4747, "step": 6073 }, { "epoch": 1.6844148641153631, "grad_norm": 0.20704385638237, "learning_rate": 1.5490406221786686e-06, "loss": 0.4812, "step": 6074 }, { "epoch": 1.684692179700499, "grad_norm": 0.21657253801822662, "learning_rate": 1.5463793119441835e-06, "loss": 0.4854, "step": 6075 }, { "epoch": 1.6849694952856349, "grad_norm": 0.2125793993473053, "learning_rate": 1.543720139051305e-06, "loss": 0.4749, "step": 6076 }, { "epoch": 1.6852468108707708, "grad_norm": 0.20819686353206635, "learning_rate": 1.541063104018911e-06, "loss": 0.5036, "step": 6077 }, { "epoch": 1.6855241264559067, "grad_norm": 0.21309512853622437, "learning_rate": 1.5384082073654564e-06, "loss": 0.5012, "step": 6078 }, { "epoch": 1.6858014420410425, "grad_norm": 0.20586422085762024, "learning_rate": 1.5357554496089805e-06, "loss": 0.4803, "step": 6079 }, { "epoch": 1.6860787576261784, "grad_norm": 0.2012133002281189, "learning_rate": 1.5331048312671085e-06, "loss": 0.4771, "step": 6080 }, { "epoch": 1.6863560732113143, "grad_norm": 0.2082866132259369, "learning_rate": 1.5304563528570488e-06, "loss": 0.4793, "step": 6081 }, { "epoch": 1.6866333887964502, "grad_norm": 0.21011273562908173, "learning_rate": 1.5278100148955865e-06, "loss": 0.4703, "step": 6082 }, { "epoch": 1.686910704381586, "grad_norm": 0.20084689557552338, "learning_rate": 1.5251658178990908e-06, "loss": 0.4969, "step": 6083 }, { "epoch": 1.687188019966722, "grad_norm": 0.22017468512058258, "learning_rate": 1.5225237623835167e-06, "loss": 0.4982, "step": 6084 }, { "epoch": 1.6874653355518578, "grad_norm": 0.21381786465644836, "learning_rate": 1.5198838488644036e-06, "loss": 0.4626, "step": 6085 }, { "epoch": 1.687742651136994, "grad_norm": 0.2041536420583725, "learning_rate": 1.5172460778568626e-06, "loss": 0.4923, "step": 6086 }, { "epoch": 1.6880199667221298, "grad_norm": 0.2171899676322937, "learning_rate": 1.5146104498755891e-06, "loss": 0.5064, "step": 6087 }, { "epoch": 1.6882972823072657, "grad_norm": 0.217244952917099, "learning_rate": 1.5119769654348748e-06, "loss": 0.5021, "step": 6088 }, { "epoch": 1.6885745978924016, "grad_norm": 0.20813634991645813, "learning_rate": 1.5093456250485764e-06, "loss": 0.4623, "step": 6089 }, { "epoch": 1.6888519134775375, "grad_norm": 0.2243288904428482, "learning_rate": 1.5067164292301358e-06, "loss": 0.5251, "step": 6090 }, { "epoch": 1.6891292290626734, "grad_norm": 0.21137182414531708, "learning_rate": 1.504089378492582e-06, "loss": 0.5257, "step": 6091 }, { "epoch": 1.6894065446478093, "grad_norm": 0.20429569482803345, "learning_rate": 1.501464473348524e-06, "loss": 0.4699, "step": 6092 }, { "epoch": 1.6896838602329451, "grad_norm": 0.20688970386981964, "learning_rate": 1.498841714310148e-06, "loss": 0.5066, "step": 6093 }, { "epoch": 1.689961175818081, "grad_norm": 0.20542113482952118, "learning_rate": 1.496221101889221e-06, "loss": 0.4753, "step": 6094 }, { "epoch": 1.690238491403217, "grad_norm": 0.20209279656410217, "learning_rate": 1.4936026365970968e-06, "loss": 0.5048, "step": 6095 }, { "epoch": 1.6905158069883528, "grad_norm": 0.2007710188627243, "learning_rate": 1.4909863189447093e-06, "loss": 0.4646, "step": 6096 }, { "epoch": 1.6907931225734887, "grad_norm": 0.19921134412288666, "learning_rate": 1.488372149442567e-06, "loss": 0.5055, "step": 6097 }, { "epoch": 1.6910704381586246, "grad_norm": 0.2226364016532898, "learning_rate": 1.485760128600769e-06, "loss": 0.4918, "step": 6098 }, { "epoch": 1.6913477537437605, "grad_norm": 0.21037879586219788, "learning_rate": 1.4831502569289834e-06, "loss": 0.4959, "step": 6099 }, { "epoch": 1.6916250693288963, "grad_norm": 0.20453481376171112, "learning_rate": 1.4805425349364716e-06, "loss": 0.5035, "step": 6100 }, { "epoch": 1.6919023849140322, "grad_norm": 0.2033025324344635, "learning_rate": 1.4779369631320637e-06, "loss": 0.4661, "step": 6101 }, { "epoch": 1.692179700499168, "grad_norm": 0.20237600803375244, "learning_rate": 1.475333542024178e-06, "loss": 0.4769, "step": 6102 }, { "epoch": 1.692457016084304, "grad_norm": 0.20004519820213318, "learning_rate": 1.4727322721208136e-06, "loss": 0.4957, "step": 6103 }, { "epoch": 1.6927343316694399, "grad_norm": 0.2055417001247406, "learning_rate": 1.4701331539295426e-06, "loss": 0.4975, "step": 6104 }, { "epoch": 1.6930116472545758, "grad_norm": 0.21751341223716736, "learning_rate": 1.4675361879575271e-06, "loss": 0.4675, "step": 6105 }, { "epoch": 1.6932889628397116, "grad_norm": 0.21179702877998352, "learning_rate": 1.4649413747114982e-06, "loss": 0.5041, "step": 6106 }, { "epoch": 1.6935662784248475, "grad_norm": 0.20675987005233765, "learning_rate": 1.4623487146977754e-06, "loss": 0.5094, "step": 6107 }, { "epoch": 1.6938435940099834, "grad_norm": 0.20587822794914246, "learning_rate": 1.4597582084222571e-06, "loss": 0.5026, "step": 6108 }, { "epoch": 1.6941209095951193, "grad_norm": 0.19946245849132538, "learning_rate": 1.4571698563904196e-06, "loss": 0.4706, "step": 6109 }, { "epoch": 1.6943982251802552, "grad_norm": 0.2001856118440628, "learning_rate": 1.4545836591073129e-06, "loss": 0.5065, "step": 6110 }, { "epoch": 1.694675540765391, "grad_norm": 0.21339738368988037, "learning_rate": 1.4519996170775791e-06, "loss": 0.5259, "step": 6111 }, { "epoch": 1.694952856350527, "grad_norm": 0.2282184213399887, "learning_rate": 1.4494177308054315e-06, "loss": 0.4954, "step": 6112 }, { "epoch": 1.6952301719356628, "grad_norm": 0.2113092988729477, "learning_rate": 1.4468380007946633e-06, "loss": 0.4853, "step": 6113 }, { "epoch": 1.6955074875207987, "grad_norm": 0.20249804854393005, "learning_rate": 1.4442604275486493e-06, "loss": 0.4781, "step": 6114 }, { "epoch": 1.6957848031059346, "grad_norm": 0.21009276807308197, "learning_rate": 1.4416850115703442e-06, "loss": 0.5062, "step": 6115 }, { "epoch": 1.6960621186910705, "grad_norm": 0.21459093689918518, "learning_rate": 1.439111753362278e-06, "loss": 0.4612, "step": 6116 }, { "epoch": 1.6963394342762064, "grad_norm": 0.20765627920627594, "learning_rate": 1.4365406534265587e-06, "loss": 0.4704, "step": 6117 }, { "epoch": 1.6966167498613423, "grad_norm": 0.20445479452610016, "learning_rate": 1.4339717122648797e-06, "loss": 0.4637, "step": 6118 }, { "epoch": 1.6968940654464781, "grad_norm": 0.19960498809814453, "learning_rate": 1.431404930378509e-06, "loss": 0.4579, "step": 6119 }, { "epoch": 1.697171381031614, "grad_norm": 0.22552312910556793, "learning_rate": 1.428840308268295e-06, "loss": 0.4842, "step": 6120 }, { "epoch": 1.69744869661675, "grad_norm": 0.2081850916147232, "learning_rate": 1.4262778464346593e-06, "loss": 0.4912, "step": 6121 }, { "epoch": 1.6977260122018858, "grad_norm": 0.22280938923358917, "learning_rate": 1.4237175453776077e-06, "loss": 0.492, "step": 6122 }, { "epoch": 1.6980033277870217, "grad_norm": 0.20667922496795654, "learning_rate": 1.4211594055967252e-06, "loss": 0.5025, "step": 6123 }, { "epoch": 1.6982806433721576, "grad_norm": 0.20905707776546478, "learning_rate": 1.4186034275911726e-06, "loss": 0.4897, "step": 6124 }, { "epoch": 1.6985579589572934, "grad_norm": 0.22128935158252716, "learning_rate": 1.4160496118596823e-06, "loss": 0.5004, "step": 6125 }, { "epoch": 1.6988352745424293, "grad_norm": 0.225121408700943, "learning_rate": 1.413497958900581e-06, "loss": 0.4908, "step": 6126 }, { "epoch": 1.6991125901275652, "grad_norm": 0.21258188784122467, "learning_rate": 1.4109484692117592e-06, "loss": 0.5058, "step": 6127 }, { "epoch": 1.699389905712701, "grad_norm": 0.21206620335578918, "learning_rate": 1.408401143290687e-06, "loss": 0.5058, "step": 6128 }, { "epoch": 1.699667221297837, "grad_norm": 0.21642054617404938, "learning_rate": 1.4058559816344186e-06, "loss": 0.47, "step": 6129 }, { "epoch": 1.6999445368829729, "grad_norm": 0.20300422608852386, "learning_rate": 1.403312984739584e-06, "loss": 0.492, "step": 6130 }, { "epoch": 1.7002218524681088, "grad_norm": 0.20064608752727509, "learning_rate": 1.400772153102388e-06, "loss": 0.4729, "step": 6131 }, { "epoch": 1.7004991680532446, "grad_norm": 0.22698557376861572, "learning_rate": 1.3982334872186101e-06, "loss": 0.4651, "step": 6132 }, { "epoch": 1.7007764836383805, "grad_norm": 0.21828146278858185, "learning_rate": 1.3956969875836155e-06, "loss": 0.4998, "step": 6133 }, { "epoch": 1.7010537992235164, "grad_norm": 0.20743338763713837, "learning_rate": 1.3931626546923426e-06, "loss": 0.5038, "step": 6134 }, { "epoch": 1.7013311148086523, "grad_norm": 0.19973209500312805, "learning_rate": 1.3906304890393047e-06, "loss": 0.475, "step": 6135 }, { "epoch": 1.7016084303937882, "grad_norm": 0.20409800112247467, "learning_rate": 1.3881004911185976e-06, "loss": 0.4745, "step": 6136 }, { "epoch": 1.701885745978924, "grad_norm": 0.21515069901943207, "learning_rate": 1.3855726614238868e-06, "loss": 0.5089, "step": 6137 }, { "epoch": 1.70216306156406, "grad_norm": 0.20320101082324982, "learning_rate": 1.383047000448423e-06, "loss": 0.4882, "step": 6138 }, { "epoch": 1.7024403771491958, "grad_norm": 0.20085285604000092, "learning_rate": 1.3805235086850249e-06, "loss": 0.4702, "step": 6139 }, { "epoch": 1.7027176927343317, "grad_norm": 0.2055083066225052, "learning_rate": 1.3780021866260955e-06, "loss": 0.4807, "step": 6140 }, { "epoch": 1.7029950083194676, "grad_norm": 0.21570569276809692, "learning_rate": 1.3754830347636138e-06, "loss": 0.4849, "step": 6141 }, { "epoch": 1.7032723239046035, "grad_norm": 0.20811456441879272, "learning_rate": 1.3729660535891282e-06, "loss": 0.4693, "step": 6142 }, { "epoch": 1.7035496394897394, "grad_norm": 0.2096460610628128, "learning_rate": 1.3704512435937734e-06, "loss": 0.5394, "step": 6143 }, { "epoch": 1.7038269550748752, "grad_norm": 0.20185941457748413, "learning_rate": 1.3679386052682499e-06, "loss": 0.5063, "step": 6144 }, { "epoch": 1.7041042706600111, "grad_norm": 0.26697927713394165, "learning_rate": 1.365428139102845e-06, "loss": 0.5086, "step": 6145 }, { "epoch": 1.704381586245147, "grad_norm": 0.21113193035125732, "learning_rate": 1.362919845587414e-06, "loss": 0.5187, "step": 6146 }, { "epoch": 1.704658901830283, "grad_norm": 0.21000999212265015, "learning_rate": 1.360413725211393e-06, "loss": 0.4919, "step": 6147 }, { "epoch": 1.7049362174154188, "grad_norm": 0.21595942974090576, "learning_rate": 1.3579097784637908e-06, "loss": 0.4821, "step": 6148 }, { "epoch": 1.7052135330005547, "grad_norm": 0.20245753228664398, "learning_rate": 1.3554080058331947e-06, "loss": 0.4946, "step": 6149 }, { "epoch": 1.7054908485856906, "grad_norm": 0.19732367992401123, "learning_rate": 1.3529084078077695e-06, "loss": 0.4822, "step": 6150 }, { "epoch": 1.7057681641708264, "grad_norm": 0.2121272087097168, "learning_rate": 1.3504109848752485e-06, "loss": 0.5231, "step": 6151 }, { "epoch": 1.7060454797559623, "grad_norm": 0.21215039491653442, "learning_rate": 1.3479157375229493e-06, "loss": 0.4893, "step": 6152 }, { "epoch": 1.7063227953410982, "grad_norm": 0.20783814787864685, "learning_rate": 1.3454226662377555e-06, "loss": 0.4933, "step": 6153 }, { "epoch": 1.706600110926234, "grad_norm": 0.20154152810573578, "learning_rate": 1.3429317715061367e-06, "loss": 0.4944, "step": 6154 }, { "epoch": 1.70687742651137, "grad_norm": 0.20301756262779236, "learning_rate": 1.340443053814129e-06, "loss": 0.4592, "step": 6155 }, { "epoch": 1.7071547420965059, "grad_norm": 0.20453648269176483, "learning_rate": 1.3379565136473482e-06, "loss": 0.4897, "step": 6156 }, { "epoch": 1.7074320576816417, "grad_norm": 0.21424759924411774, "learning_rate": 1.3354721514909865e-06, "loss": 0.4816, "step": 6157 }, { "epoch": 1.7077093732667776, "grad_norm": 0.20235998928546906, "learning_rate": 1.3329899678298063e-06, "loss": 0.4907, "step": 6158 }, { "epoch": 1.7079866888519135, "grad_norm": 0.20267461240291595, "learning_rate": 1.3305099631481453e-06, "loss": 0.4633, "step": 6159 }, { "epoch": 1.7082640044370494, "grad_norm": 0.21311825513839722, "learning_rate": 1.3280321379299215e-06, "loss": 0.4865, "step": 6160 }, { "epoch": 1.7085413200221853, "grad_norm": 0.2097369134426117, "learning_rate": 1.325556492658625e-06, "loss": 0.5052, "step": 6161 }, { "epoch": 1.7088186356073212, "grad_norm": 0.2084108293056488, "learning_rate": 1.3230830278173178e-06, "loss": 0.496, "step": 6162 }, { "epoch": 1.709095951192457, "grad_norm": 0.20339664816856384, "learning_rate": 1.3206117438886333e-06, "loss": 0.5049, "step": 6163 }, { "epoch": 1.709373266777593, "grad_norm": 0.2198762446641922, "learning_rate": 1.3181426413547955e-06, "loss": 0.4874, "step": 6164 }, { "epoch": 1.7096505823627288, "grad_norm": 0.21083226799964905, "learning_rate": 1.3156757206975873e-06, "loss": 0.4878, "step": 6165 }, { "epoch": 1.7099278979478647, "grad_norm": 0.23222039639949799, "learning_rate": 1.313210982398365e-06, "loss": 0.4773, "step": 6166 }, { "epoch": 1.7102052135330006, "grad_norm": 0.20829123258590698, "learning_rate": 1.3107484269380688e-06, "loss": 0.4823, "step": 6167 }, { "epoch": 1.7104825291181365, "grad_norm": 0.20589673519134521, "learning_rate": 1.3082880547972104e-06, "loss": 0.4466, "step": 6168 }, { "epoch": 1.7107598447032724, "grad_norm": 0.21213486790657043, "learning_rate": 1.3058298664558725e-06, "loss": 0.4807, "step": 6169 }, { "epoch": 1.7110371602884082, "grad_norm": 0.2168436497449875, "learning_rate": 1.3033738623937072e-06, "loss": 0.4891, "step": 6170 }, { "epoch": 1.7113144758735441, "grad_norm": 0.1989026516675949, "learning_rate": 1.300920043089951e-06, "loss": 0.5083, "step": 6171 }, { "epoch": 1.71159179145868, "grad_norm": 0.19995690882205963, "learning_rate": 1.2984684090234122e-06, "loss": 0.4747, "step": 6172 }, { "epoch": 1.711869107043816, "grad_norm": 0.20262081921100616, "learning_rate": 1.2960189606724613e-06, "loss": 0.4834, "step": 6173 }, { "epoch": 1.7121464226289518, "grad_norm": 0.22770211100578308, "learning_rate": 1.2935716985150587e-06, "loss": 0.5197, "step": 6174 }, { "epoch": 1.7124237382140877, "grad_norm": 0.21298766136169434, "learning_rate": 1.2911266230287239e-06, "loss": 0.5022, "step": 6175 }, { "epoch": 1.7127010537992235, "grad_norm": 0.21138055622577667, "learning_rate": 1.2886837346905615e-06, "loss": 0.472, "step": 6176 }, { "epoch": 1.7129783693843594, "grad_norm": 0.20885500311851501, "learning_rate": 1.2862430339772372e-06, "loss": 0.4925, "step": 6177 }, { "epoch": 1.7132556849694953, "grad_norm": 0.2066037952899933, "learning_rate": 1.2838045213650008e-06, "loss": 0.5073, "step": 6178 }, { "epoch": 1.7135330005546312, "grad_norm": 0.2201787531375885, "learning_rate": 1.2813681973296714e-06, "loss": 0.4771, "step": 6179 }, { "epoch": 1.713810316139767, "grad_norm": 0.20949430763721466, "learning_rate": 1.2789340623466358e-06, "loss": 0.4767, "step": 6180 }, { "epoch": 1.714087631724903, "grad_norm": 0.20830868184566498, "learning_rate": 1.276502116890864e-06, "loss": 0.4684, "step": 6181 }, { "epoch": 1.7143649473100389, "grad_norm": 0.197309672832489, "learning_rate": 1.2740723614368886e-06, "loss": 0.4686, "step": 6182 }, { "epoch": 1.7146422628951747, "grad_norm": 0.22104878723621368, "learning_rate": 1.2716447964588222e-06, "loss": 0.4738, "step": 6183 }, { "epoch": 1.7149195784803106, "grad_norm": 0.20894010365009308, "learning_rate": 1.2692194224303442e-06, "loss": 0.4781, "step": 6184 }, { "epoch": 1.7151968940654465, "grad_norm": 0.2051091194152832, "learning_rate": 1.266796239824712e-06, "loss": 0.4777, "step": 6185 }, { "epoch": 1.7154742096505824, "grad_norm": 0.26800915598869324, "learning_rate": 1.2643752491147505e-06, "loss": 0.5087, "step": 6186 }, { "epoch": 1.7157515252357183, "grad_norm": 0.20493799448013306, "learning_rate": 1.2619564507728595e-06, "loss": 0.4678, "step": 6187 }, { "epoch": 1.7160288408208542, "grad_norm": 0.22277140617370605, "learning_rate": 1.2595398452710128e-06, "loss": 0.5145, "step": 6188 }, { "epoch": 1.71630615640599, "grad_norm": 0.2096967250108719, "learning_rate": 1.2571254330807538e-06, "loss": 0.4936, "step": 6189 }, { "epoch": 1.716583471991126, "grad_norm": 0.2071332186460495, "learning_rate": 1.254713214673195e-06, "loss": 0.4964, "step": 6190 }, { "epoch": 1.7168607875762618, "grad_norm": 0.20031912624835968, "learning_rate": 1.252303190519026e-06, "loss": 0.5048, "step": 6191 }, { "epoch": 1.7171381031613977, "grad_norm": 0.2249787598848343, "learning_rate": 1.2498953610885087e-06, "loss": 0.5219, "step": 6192 }, { "epoch": 1.7174154187465336, "grad_norm": 0.20989800989627838, "learning_rate": 1.2474897268514696e-06, "loss": 0.4814, "step": 6193 }, { "epoch": 1.7176927343316695, "grad_norm": 0.21632020175457, "learning_rate": 1.2450862882773154e-06, "loss": 0.4855, "step": 6194 }, { "epoch": 1.7179700499168054, "grad_norm": 0.21728786826133728, "learning_rate": 1.2426850458350208e-06, "loss": 0.504, "step": 6195 }, { "epoch": 1.7182473655019412, "grad_norm": 0.20542924106121063, "learning_rate": 1.240285999993132e-06, "loss": 0.5027, "step": 6196 }, { "epoch": 1.7185246810870771, "grad_norm": 0.2109268456697464, "learning_rate": 1.237889151219762e-06, "loss": 0.5254, "step": 6197 }, { "epoch": 1.718801996672213, "grad_norm": 0.21600918471813202, "learning_rate": 1.2354944999826022e-06, "loss": 0.5359, "step": 6198 }, { "epoch": 1.719079312257349, "grad_norm": 0.21099236607551575, "learning_rate": 1.2331020467489157e-06, "loss": 0.5269, "step": 6199 }, { "epoch": 1.7193566278424848, "grad_norm": 0.21038185060024261, "learning_rate": 1.230711791985531e-06, "loss": 0.5292, "step": 6200 }, { "epoch": 1.7196339434276207, "grad_norm": 0.21021394431591034, "learning_rate": 1.2283237361588442e-06, "loss": 0.5175, "step": 6201 }, { "epoch": 1.7199112590127565, "grad_norm": 0.20856258273124695, "learning_rate": 1.2259378797348397e-06, "loss": 0.5112, "step": 6202 }, { "epoch": 1.7201885745978924, "grad_norm": 0.22244808077812195, "learning_rate": 1.2235542231790548e-06, "loss": 0.5144, "step": 6203 }, { "epoch": 1.7204658901830283, "grad_norm": 0.20829738676548004, "learning_rate": 1.2211727669566034e-06, "loss": 0.5132, "step": 6204 }, { "epoch": 1.7207432057681642, "grad_norm": 0.20939025282859802, "learning_rate": 1.2187935115321708e-06, "loss": 0.4784, "step": 6205 }, { "epoch": 1.7210205213533, "grad_norm": 0.20607073605060577, "learning_rate": 1.2164164573700162e-06, "loss": 0.4863, "step": 6206 }, { "epoch": 1.721297836938436, "grad_norm": 0.20456166565418243, "learning_rate": 1.2140416049339644e-06, "loss": 0.5155, "step": 6207 }, { "epoch": 1.7215751525235718, "grad_norm": 0.21939820051193237, "learning_rate": 1.2116689546874088e-06, "loss": 0.4917, "step": 6208 }, { "epoch": 1.7218524681087077, "grad_norm": 0.2015402466058731, "learning_rate": 1.209298507093319e-06, "loss": 0.4664, "step": 6209 }, { "epoch": 1.7221297836938436, "grad_norm": 0.20761947333812714, "learning_rate": 1.2069302626142352e-06, "loss": 0.4844, "step": 6210 }, { "epoch": 1.7224070992789795, "grad_norm": 0.21084575355052948, "learning_rate": 1.2045642217122594e-06, "loss": 0.5133, "step": 6211 }, { "epoch": 1.7226844148641154, "grad_norm": 0.20690487325191498, "learning_rate": 1.2022003848490699e-06, "loss": 0.5062, "step": 6212 }, { "epoch": 1.7229617304492513, "grad_norm": 0.2094261348247528, "learning_rate": 1.1998387524859141e-06, "loss": 0.5167, "step": 6213 }, { "epoch": 1.7232390460343872, "grad_norm": 0.20428799092769623, "learning_rate": 1.1974793250836128e-06, "loss": 0.4953, "step": 6214 }, { "epoch": 1.723516361619523, "grad_norm": 0.200530007481575, "learning_rate": 1.1951221031025473e-06, "loss": 0.5019, "step": 6215 }, { "epoch": 1.723793677204659, "grad_norm": 0.21505190432071686, "learning_rate": 1.1927670870026762e-06, "loss": 0.5069, "step": 6216 }, { "epoch": 1.7240709927897948, "grad_norm": 0.20491503179073334, "learning_rate": 1.190414277243529e-06, "loss": 0.4872, "step": 6217 }, { "epoch": 1.7243483083749307, "grad_norm": 0.20874449610710144, "learning_rate": 1.188063674284197e-06, "loss": 0.4886, "step": 6218 }, { "epoch": 1.7246256239600666, "grad_norm": 0.21290519833564758, "learning_rate": 1.1857152785833451e-06, "loss": 0.478, "step": 6219 }, { "epoch": 1.7249029395452025, "grad_norm": 0.20770753920078278, "learning_rate": 1.1833690905992081e-06, "loss": 0.4946, "step": 6220 }, { "epoch": 1.7251802551303383, "grad_norm": 0.21284344792366028, "learning_rate": 1.1810251107895923e-06, "loss": 0.4757, "step": 6221 }, { "epoch": 1.7254575707154742, "grad_norm": 0.20383362472057343, "learning_rate": 1.1786833396118664e-06, "loss": 0.4774, "step": 6222 }, { "epoch": 1.7257348863006101, "grad_norm": 0.2078067809343338, "learning_rate": 1.1763437775229744e-06, "loss": 0.4934, "step": 6223 }, { "epoch": 1.726012201885746, "grad_norm": 0.2041560709476471, "learning_rate": 1.174006424979425e-06, "loss": 0.5165, "step": 6224 }, { "epoch": 1.7262895174708819, "grad_norm": 0.21720266342163086, "learning_rate": 1.1716712824373006e-06, "loss": 0.4729, "step": 6225 }, { "epoch": 1.7265668330560178, "grad_norm": 0.19432541728019714, "learning_rate": 1.1693383503522435e-06, "loss": 0.4889, "step": 6226 }, { "epoch": 1.7268441486411537, "grad_norm": 0.20884351432323456, "learning_rate": 1.1670076291794785e-06, "loss": 0.4874, "step": 6227 }, { "epoch": 1.7271214642262895, "grad_norm": 0.20704929530620575, "learning_rate": 1.1646791193737848e-06, "loss": 0.4934, "step": 6228 }, { "epoch": 1.7273987798114254, "grad_norm": 0.2176746129989624, "learning_rate": 1.1623528213895174e-06, "loss": 0.4709, "step": 6229 }, { "epoch": 1.7276760953965613, "grad_norm": 0.20258481800556183, "learning_rate": 1.160028735680603e-06, "loss": 0.4843, "step": 6230 }, { "epoch": 1.7279534109816972, "grad_norm": 0.20934736728668213, "learning_rate": 1.1577068627005264e-06, "loss": 0.4852, "step": 6231 }, { "epoch": 1.728230726566833, "grad_norm": 0.20559830963611603, "learning_rate": 1.1553872029023498e-06, "loss": 0.4676, "step": 6232 }, { "epoch": 1.728508042151969, "grad_norm": 0.20282293856143951, "learning_rate": 1.1530697567387019e-06, "loss": 0.474, "step": 6233 }, { "epoch": 1.7287853577371048, "grad_norm": 0.20379206538200378, "learning_rate": 1.1507545246617763e-06, "loss": 0.4831, "step": 6234 }, { "epoch": 1.7290626733222407, "grad_norm": 0.20645445585250854, "learning_rate": 1.1484415071233322e-06, "loss": 0.4729, "step": 6235 }, { "epoch": 1.7293399889073766, "grad_norm": 0.20266611874103546, "learning_rate": 1.1461307045747035e-06, "loss": 0.4816, "step": 6236 }, { "epoch": 1.7296173044925125, "grad_norm": 0.20587818324565887, "learning_rate": 1.1438221174667931e-06, "loss": 0.5129, "step": 6237 }, { "epoch": 1.7298946200776484, "grad_norm": 0.21184997260570526, "learning_rate": 1.1415157462500631e-06, "loss": 0.5172, "step": 6238 }, { "epoch": 1.7301719356627843, "grad_norm": 0.21965019404888153, "learning_rate": 1.1392115913745436e-06, "loss": 0.5091, "step": 6239 }, { "epoch": 1.7304492512479202, "grad_norm": 0.2110959142446518, "learning_rate": 1.1369096532898458e-06, "loss": 0.4752, "step": 6240 }, { "epoch": 1.730726566833056, "grad_norm": 0.20495730638504028, "learning_rate": 1.134609932445134e-06, "loss": 0.4793, "step": 6241 }, { "epoch": 1.731003882418192, "grad_norm": 0.20782862603664398, "learning_rate": 1.132312429289145e-06, "loss": 0.5282, "step": 6242 }, { "epoch": 1.7312811980033278, "grad_norm": 0.20263516902923584, "learning_rate": 1.1300171442701776e-06, "loss": 0.4969, "step": 6243 }, { "epoch": 1.7315585135884637, "grad_norm": 0.2159367799758911, "learning_rate": 1.127724077836112e-06, "loss": 0.489, "step": 6244 }, { "epoch": 1.7318358291735996, "grad_norm": 0.2180434763431549, "learning_rate": 1.1254332304343806e-06, "loss": 0.4872, "step": 6245 }, { "epoch": 1.7321131447587355, "grad_norm": 0.20098507404327393, "learning_rate": 1.1231446025119885e-06, "loss": 0.5097, "step": 6246 }, { "epoch": 1.7323904603438713, "grad_norm": 0.20740365982055664, "learning_rate": 1.1208581945155075e-06, "loss": 0.4777, "step": 6247 }, { "epoch": 1.7326677759290072, "grad_norm": 0.2009047120809555, "learning_rate": 1.1185740068910807e-06, "loss": 0.4916, "step": 6248 }, { "epoch": 1.732945091514143, "grad_norm": 0.2049127072095871, "learning_rate": 1.1162920400844102e-06, "loss": 0.493, "step": 6249 }, { "epoch": 1.733222407099279, "grad_norm": 0.21437804400920868, "learning_rate": 1.1140122945407644e-06, "loss": 0.4864, "step": 6250 }, { "epoch": 1.7334997226844149, "grad_norm": 0.20494471490383148, "learning_rate": 1.1117347707049876e-06, "loss": 0.4845, "step": 6251 }, { "epoch": 1.7337770382695508, "grad_norm": 0.2116573303937912, "learning_rate": 1.1094594690214858e-06, "loss": 0.4745, "step": 6252 }, { "epoch": 1.7340543538546866, "grad_norm": 0.2070380300283432, "learning_rate": 1.1071863899342255e-06, "loss": 0.4956, "step": 6253 }, { "epoch": 1.7343316694398225, "grad_norm": 0.2108084261417389, "learning_rate": 1.1049155338867466e-06, "loss": 0.4917, "step": 6254 }, { "epoch": 1.7346089850249584, "grad_norm": 0.20470231771469116, "learning_rate": 1.1026469013221574e-06, "loss": 0.4788, "step": 6255 }, { "epoch": 1.7348863006100943, "grad_norm": 0.20482300221920013, "learning_rate": 1.1003804926831243e-06, "loss": 0.4933, "step": 6256 }, { "epoch": 1.7351636161952302, "grad_norm": 0.23574180901050568, "learning_rate": 1.0981163084118825e-06, "loss": 0.4953, "step": 6257 }, { "epoch": 1.735440931780366, "grad_norm": 0.19923263788223267, "learning_rate": 1.095854348950237e-06, "loss": 0.4594, "step": 6258 }, { "epoch": 1.735718247365502, "grad_norm": 0.23000024259090424, "learning_rate": 1.0935946147395556e-06, "loss": 0.4972, "step": 6259 }, { "epoch": 1.7359955629506378, "grad_norm": 0.2056475728750229, "learning_rate": 1.0913371062207702e-06, "loss": 0.4875, "step": 6260 }, { "epoch": 1.7362728785357737, "grad_norm": 0.21127453446388245, "learning_rate": 1.0890818238343856e-06, "loss": 0.4894, "step": 6261 }, { "epoch": 1.7365501941209096, "grad_norm": 0.21046780049800873, "learning_rate": 1.0868287680204606e-06, "loss": 0.499, "step": 6262 }, { "epoch": 1.7368275097060455, "grad_norm": 0.2049138844013214, "learning_rate": 1.0845779392186311e-06, "loss": 0.5112, "step": 6263 }, { "epoch": 1.7371048252911814, "grad_norm": 0.20450134575366974, "learning_rate": 1.0823293378680904e-06, "loss": 0.4916, "step": 6264 }, { "epoch": 1.7373821408763173, "grad_norm": 0.20806051790714264, "learning_rate": 1.0800829644076027e-06, "loss": 0.4837, "step": 6265 }, { "epoch": 1.7376594564614531, "grad_norm": 0.20882248878479004, "learning_rate": 1.0778388192754909e-06, "loss": 0.4949, "step": 6266 }, { "epoch": 1.737936772046589, "grad_norm": 0.2119029462337494, "learning_rate": 1.0755969029096508e-06, "loss": 0.4759, "step": 6267 }, { "epoch": 1.738214087631725, "grad_norm": 0.20091450214385986, "learning_rate": 1.0733572157475405e-06, "loss": 0.4907, "step": 6268 }, { "epoch": 1.7384914032168608, "grad_norm": 0.295280784368515, "learning_rate": 1.0711197582261786e-06, "loss": 0.5013, "step": 6269 }, { "epoch": 1.7387687188019967, "grad_norm": 0.21129965782165527, "learning_rate": 1.0688845307821577e-06, "loss": 0.5146, "step": 6270 }, { "epoch": 1.7390460343871326, "grad_norm": 0.20647571980953217, "learning_rate": 1.0666515338516232e-06, "loss": 0.4848, "step": 6271 }, { "epoch": 1.7393233499722685, "grad_norm": 0.20426985621452332, "learning_rate": 1.064420767870297e-06, "loss": 0.4568, "step": 6272 }, { "epoch": 1.7396006655574043, "grad_norm": 0.20671120285987854, "learning_rate": 1.0621922332734568e-06, "loss": 0.477, "step": 6273 }, { "epoch": 1.7398779811425402, "grad_norm": 0.20746487379074097, "learning_rate": 1.0599659304959514e-06, "loss": 0.5096, "step": 6274 }, { "epoch": 1.740155296727676, "grad_norm": 0.21173517405986786, "learning_rate": 1.057741859972193e-06, "loss": 0.4759, "step": 6275 }, { "epoch": 1.740432612312812, "grad_norm": 0.20655114948749542, "learning_rate": 1.0555200221361556e-06, "loss": 0.4914, "step": 6276 }, { "epoch": 1.7407099278979479, "grad_norm": 0.2177557796239853, "learning_rate": 1.053300417421374e-06, "loss": 0.4762, "step": 6277 }, { "epoch": 1.7409872434830838, "grad_norm": 0.1989573985338211, "learning_rate": 1.0510830462609561e-06, "loss": 0.4646, "step": 6278 }, { "epoch": 1.7412645590682196, "grad_norm": 0.20482690632343292, "learning_rate": 1.0488679090875711e-06, "loss": 0.4705, "step": 6279 }, { "epoch": 1.7415418746533555, "grad_norm": 0.19922995567321777, "learning_rate": 1.0466550063334497e-06, "loss": 0.4826, "step": 6280 }, { "epoch": 1.7418191902384914, "grad_norm": 0.2140817493200302, "learning_rate": 1.0444443384303823e-06, "loss": 0.4991, "step": 6281 }, { "epoch": 1.7420965058236273, "grad_norm": 0.2041434794664383, "learning_rate": 1.0422359058097382e-06, "loss": 0.4831, "step": 6282 }, { "epoch": 1.7423738214087632, "grad_norm": 0.2063605785369873, "learning_rate": 1.0400297089024373e-06, "loss": 0.5162, "step": 6283 }, { "epoch": 1.742651136993899, "grad_norm": 0.21836838126182556, "learning_rate": 1.0378257481389639e-06, "loss": 0.4971, "step": 6284 }, { "epoch": 1.742928452579035, "grad_norm": 0.2118816375732422, "learning_rate": 1.0356240239493705e-06, "loss": 0.4815, "step": 6285 }, { "epoch": 1.7432057681641708, "grad_norm": 0.20386700332164764, "learning_rate": 1.0334245367632764e-06, "loss": 0.4998, "step": 6286 }, { "epoch": 1.7434830837493067, "grad_norm": 0.2046600878238678, "learning_rate": 1.031227287009856e-06, "loss": 0.4958, "step": 6287 }, { "epoch": 1.7437603993344426, "grad_norm": 0.20780619978904724, "learning_rate": 1.0290322751178486e-06, "loss": 0.5133, "step": 6288 }, { "epoch": 1.7440377149195785, "grad_norm": 0.22479791939258575, "learning_rate": 1.0268395015155613e-06, "loss": 0.4936, "step": 6289 }, { "epoch": 1.7443150305047144, "grad_norm": 0.2186380922794342, "learning_rate": 1.0246489666308672e-06, "loss": 0.5165, "step": 6290 }, { "epoch": 1.7445923460898503, "grad_norm": 0.2129991352558136, "learning_rate": 1.0224606708911913e-06, "loss": 0.5162, "step": 6291 }, { "epoch": 1.7448696616749861, "grad_norm": 0.2101704329252243, "learning_rate": 1.0202746147235265e-06, "loss": 0.5055, "step": 6292 }, { "epoch": 1.745146977260122, "grad_norm": 0.21138980984687805, "learning_rate": 1.0180907985544387e-06, "loss": 0.4946, "step": 6293 }, { "epoch": 1.745424292845258, "grad_norm": 0.20843324065208435, "learning_rate": 1.0159092228100437e-06, "loss": 0.4729, "step": 6294 }, { "epoch": 1.7457016084303938, "grad_norm": 0.20566268265247345, "learning_rate": 1.0137298879160221e-06, "loss": 0.4515, "step": 6295 }, { "epoch": 1.7459789240155297, "grad_norm": 0.2100181132555008, "learning_rate": 1.0115527942976224e-06, "loss": 0.4772, "step": 6296 }, { "epoch": 1.7462562396006656, "grad_norm": 0.2049798220396042, "learning_rate": 1.009377942379655e-06, "loss": 0.4771, "step": 6297 }, { "epoch": 1.7465335551858014, "grad_norm": 0.2053791582584381, "learning_rate": 1.0072053325864875e-06, "loss": 0.4837, "step": 6298 }, { "epoch": 1.7468108707709373, "grad_norm": 0.20660558342933655, "learning_rate": 1.005034965342057e-06, "loss": 0.5195, "step": 6299 }, { "epoch": 1.7470881863560732, "grad_norm": 0.21594339609146118, "learning_rate": 1.0028668410698564e-06, "loss": 0.4937, "step": 6300 }, { "epoch": 1.747365501941209, "grad_norm": 0.21394188702106476, "learning_rate": 1.000700960192949e-06, "loss": 0.5009, "step": 6301 }, { "epoch": 1.747642817526345, "grad_norm": 0.2106638103723526, "learning_rate": 9.985373231339496e-07, "loss": 0.4723, "step": 6302 }, { "epoch": 1.7479201331114809, "grad_norm": 0.20718468725681305, "learning_rate": 9.963759303150453e-07, "loss": 0.4694, "step": 6303 }, { "epoch": 1.7481974486966168, "grad_norm": 0.2988225817680359, "learning_rate": 9.942167821579778e-07, "loss": 0.5089, "step": 6304 }, { "epoch": 1.7484747642817526, "grad_norm": 0.21094036102294922, "learning_rate": 9.920598790840562e-07, "loss": 0.4992, "step": 6305 }, { "epoch": 1.7487520798668885, "grad_norm": 0.20664121210575104, "learning_rate": 9.899052215141522e-07, "loss": 0.4853, "step": 6306 }, { "epoch": 1.7490293954520244, "grad_norm": 0.21847885847091675, "learning_rate": 9.877528098686917e-07, "loss": 0.5011, "step": 6307 }, { "epoch": 1.7493067110371603, "grad_norm": 0.22634607553482056, "learning_rate": 9.856026445676709e-07, "loss": 0.508, "step": 6308 }, { "epoch": 1.7495840266222962, "grad_norm": 0.21793290972709656, "learning_rate": 9.8345472603064e-07, "loss": 0.5396, "step": 6309 }, { "epoch": 1.749861342207432, "grad_norm": 0.2086942344903946, "learning_rate": 9.813090546767184e-07, "loss": 0.5171, "step": 6310 }, { "epoch": 1.750138657792568, "grad_norm": 0.21406447887420654, "learning_rate": 9.79165630924582e-07, "loss": 0.5021, "step": 6311 }, { "epoch": 1.7504159733777038, "grad_norm": 0.20880061388015747, "learning_rate": 9.770244551924683e-07, "loss": 0.5135, "step": 6312 }, { "epoch": 1.7506932889628397, "grad_norm": 0.2138943374156952, "learning_rate": 9.748855278981811e-07, "loss": 0.5149, "step": 6313 }, { "epoch": 1.7509706045479756, "grad_norm": 0.20010744035243988, "learning_rate": 9.727488494590805e-07, "loss": 0.5055, "step": 6314 }, { "epoch": 1.7512479201331115, "grad_norm": 0.2039041370153427, "learning_rate": 9.706144202920848e-07, "loss": 0.4796, "step": 6315 }, { "epoch": 1.7515252357182474, "grad_norm": 0.1957438588142395, "learning_rate": 9.684822408136813e-07, "loss": 0.4513, "step": 6316 }, { "epoch": 1.7518025513033832, "grad_norm": 0.20559154450893402, "learning_rate": 9.663523114399179e-07, "loss": 0.5066, "step": 6317 }, { "epoch": 1.7520798668885191, "grad_norm": 0.20115143060684204, "learning_rate": 9.642246325863954e-07, "loss": 0.53, "step": 6318 }, { "epoch": 1.752357182473655, "grad_norm": 0.22017818689346313, "learning_rate": 9.620992046682778e-07, "loss": 0.4881, "step": 6319 }, { "epoch": 1.752634498058791, "grad_norm": 0.20881177484989166, "learning_rate": 9.599760281002997e-07, "loss": 0.4777, "step": 6320 }, { "epoch": 1.7529118136439268, "grad_norm": 0.24889224767684937, "learning_rate": 9.578551032967466e-07, "loss": 0.4785, "step": 6321 }, { "epoch": 1.7531891292290627, "grad_norm": 0.21844631433486938, "learning_rate": 9.557364306714638e-07, "loss": 0.4774, "step": 6322 }, { "epoch": 1.7534664448141986, "grad_norm": 0.2035273164510727, "learning_rate": 9.536200106378637e-07, "loss": 0.4977, "step": 6323 }, { "epoch": 1.7537437603993344, "grad_norm": 0.19780419766902924, "learning_rate": 9.515058436089158e-07, "loss": 0.4964, "step": 6324 }, { "epoch": 1.7540210759844703, "grad_norm": 0.19891899824142456, "learning_rate": 9.493939299971499e-07, "loss": 0.4999, "step": 6325 }, { "epoch": 1.7542983915696062, "grad_norm": 0.20503740012645721, "learning_rate": 9.472842702146545e-07, "loss": 0.4901, "step": 6326 }, { "epoch": 1.754575707154742, "grad_norm": 0.22530865669250488, "learning_rate": 9.451768646730808e-07, "loss": 0.4997, "step": 6327 }, { "epoch": 1.754853022739878, "grad_norm": 0.21066060662269592, "learning_rate": 9.430717137836414e-07, "loss": 0.4874, "step": 6328 }, { "epoch": 1.7551303383250139, "grad_norm": 0.2237623631954193, "learning_rate": 9.409688179571066e-07, "loss": 0.4899, "step": 6329 }, { "epoch": 1.7554076539101497, "grad_norm": 0.2304733246564865, "learning_rate": 9.388681776038022e-07, "loss": 0.4858, "step": 6330 }, { "epoch": 1.7556849694952856, "grad_norm": 0.21019956469535828, "learning_rate": 9.367697931336266e-07, "loss": 0.5145, "step": 6331 }, { "epoch": 1.7559622850804215, "grad_norm": 0.2206750512123108, "learning_rate": 9.346736649560262e-07, "loss": 0.5251, "step": 6332 }, { "epoch": 1.7562396006655574, "grad_norm": 0.21418681740760803, "learning_rate": 9.325797934800082e-07, "loss": 0.4837, "step": 6333 }, { "epoch": 1.7565169162506933, "grad_norm": 0.20882757008075714, "learning_rate": 9.304881791141474e-07, "loss": 0.5064, "step": 6334 }, { "epoch": 1.7567942318358292, "grad_norm": 0.22608445584774017, "learning_rate": 9.283988222665726e-07, "loss": 0.5077, "step": 6335 }, { "epoch": 1.757071547420965, "grad_norm": 0.20677009224891663, "learning_rate": 9.263117233449706e-07, "loss": 0.5135, "step": 6336 }, { "epoch": 1.757348863006101, "grad_norm": 0.2091902196407318, "learning_rate": 9.24226882756589e-07, "loss": 0.5079, "step": 6337 }, { "epoch": 1.7576261785912368, "grad_norm": 0.2035513073205948, "learning_rate": 9.22144300908237e-07, "loss": 0.4837, "step": 6338 }, { "epoch": 1.7579034941763727, "grad_norm": 0.21183468401432037, "learning_rate": 9.200639782062842e-07, "loss": 0.4968, "step": 6339 }, { "epoch": 1.7581808097615086, "grad_norm": 0.20480120182037354, "learning_rate": 9.179859150566503e-07, "loss": 0.5, "step": 6340 }, { "epoch": 1.7584581253466445, "grad_norm": 0.20064327120780945, "learning_rate": 9.159101118648276e-07, "loss": 0.4982, "step": 6341 }, { "epoch": 1.7587354409317804, "grad_norm": 0.22214539349079132, "learning_rate": 9.13836569035853e-07, "loss": 0.5245, "step": 6342 }, { "epoch": 1.7590127565169162, "grad_norm": 0.23857805132865906, "learning_rate": 9.117652869743365e-07, "loss": 0.5035, "step": 6343 }, { "epoch": 1.7592900721020521, "grad_norm": 0.21097144484519958, "learning_rate": 9.096962660844352e-07, "loss": 0.4751, "step": 6344 }, { "epoch": 1.759567387687188, "grad_norm": 0.20220641791820526, "learning_rate": 9.076295067698707e-07, "loss": 0.4813, "step": 6345 }, { "epoch": 1.759844703272324, "grad_norm": 0.20024532079696655, "learning_rate": 9.055650094339257e-07, "loss": 0.4912, "step": 6346 }, { "epoch": 1.7601220188574598, "grad_norm": 0.2002626657485962, "learning_rate": 9.035027744794353e-07, "loss": 0.4897, "step": 6347 }, { "epoch": 1.7603993344425957, "grad_norm": 0.20744280517101288, "learning_rate": 9.014428023087981e-07, "loss": 0.5057, "step": 6348 }, { "epoch": 1.7606766500277315, "grad_norm": 0.21660728752613068, "learning_rate": 8.993850933239664e-07, "loss": 0.5014, "step": 6349 }, { "epoch": 1.7609539656128674, "grad_norm": 0.22033831477165222, "learning_rate": 8.973296479264564e-07, "loss": 0.4623, "step": 6350 }, { "epoch": 1.7612312811980033, "grad_norm": 0.1961023062467575, "learning_rate": 8.952764665173378e-07, "loss": 0.458, "step": 6351 }, { "epoch": 1.7615085967831392, "grad_norm": 0.20602113008499146, "learning_rate": 8.932255494972452e-07, "loss": 0.487, "step": 6352 }, { "epoch": 1.761785912368275, "grad_norm": 0.21287092566490173, "learning_rate": 8.911768972663603e-07, "loss": 0.5232, "step": 6353 }, { "epoch": 1.762063227953411, "grad_norm": 0.20058588683605194, "learning_rate": 8.891305102244326e-07, "loss": 0.4714, "step": 6354 }, { "epoch": 1.7623405435385469, "grad_norm": 0.22055946290493011, "learning_rate": 8.870863887707678e-07, "loss": 0.5222, "step": 6355 }, { "epoch": 1.7626178591236827, "grad_norm": 0.2032008171081543, "learning_rate": 8.850445333042287e-07, "loss": 0.5174, "step": 6356 }, { "epoch": 1.7628951747088186, "grad_norm": 0.22134113311767578, "learning_rate": 8.830049442232285e-07, "loss": 0.5117, "step": 6357 }, { "epoch": 1.7631724902939545, "grad_norm": 0.2134564369916916, "learning_rate": 8.809676219257546e-07, "loss": 0.5069, "step": 6358 }, { "epoch": 1.7634498058790904, "grad_norm": 0.21325929462909698, "learning_rate": 8.789325668093374e-07, "loss": 0.5023, "step": 6359 }, { "epoch": 1.7637271214642263, "grad_norm": 0.20757299661636353, "learning_rate": 8.768997792710689e-07, "loss": 0.5152, "step": 6360 }, { "epoch": 1.7640044370493622, "grad_norm": 0.2015492469072342, "learning_rate": 8.74869259707603e-07, "loss": 0.4596, "step": 6361 }, { "epoch": 1.764281752634498, "grad_norm": 0.23414163291454315, "learning_rate": 8.728410085151476e-07, "loss": 0.4936, "step": 6362 }, { "epoch": 1.764559068219634, "grad_norm": 0.21143180131912231, "learning_rate": 8.708150260894666e-07, "loss": 0.5004, "step": 6363 }, { "epoch": 1.7648363838047698, "grad_norm": 0.2277495414018631, "learning_rate": 8.687913128258826e-07, "loss": 0.4884, "step": 6364 }, { "epoch": 1.7651136993899057, "grad_norm": 0.21437212824821472, "learning_rate": 8.667698691192758e-07, "loss": 0.504, "step": 6365 }, { "epoch": 1.7653910149750416, "grad_norm": 0.20440204441547394, "learning_rate": 8.64750695364086e-07, "loss": 0.4866, "step": 6366 }, { "epoch": 1.7656683305601775, "grad_norm": 0.2076803594827652, "learning_rate": 8.627337919543066e-07, "loss": 0.5107, "step": 6367 }, { "epoch": 1.7659456461453134, "grad_norm": 0.21426992118358612, "learning_rate": 8.607191592834824e-07, "loss": 0.4913, "step": 6368 }, { "epoch": 1.7662229617304492, "grad_norm": 0.24520544707775116, "learning_rate": 8.587067977447321e-07, "loss": 0.5185, "step": 6369 }, { "epoch": 1.7665002773155851, "grad_norm": 0.20242126286029816, "learning_rate": 8.566967077307156e-07, "loss": 0.4838, "step": 6370 }, { "epoch": 1.766777592900721, "grad_norm": 0.2145501673221588, "learning_rate": 8.546888896336536e-07, "loss": 0.4861, "step": 6371 }, { "epoch": 1.767054908485857, "grad_norm": 0.21382984519004822, "learning_rate": 8.526833438453258e-07, "loss": 0.4972, "step": 6372 }, { "epoch": 1.7673322240709928, "grad_norm": 0.20749729871749878, "learning_rate": 8.506800707570691e-07, "loss": 0.4799, "step": 6373 }, { "epoch": 1.7676095396561287, "grad_norm": 0.20935972034931183, "learning_rate": 8.486790707597725e-07, "loss": 0.4861, "step": 6374 }, { "epoch": 1.7678868552412645, "grad_norm": 0.21835561096668243, "learning_rate": 8.466803442438844e-07, "loss": 0.5149, "step": 6375 }, { "epoch": 1.7681641708264004, "grad_norm": 0.2116965502500534, "learning_rate": 8.446838915994099e-07, "loss": 0.5302, "step": 6376 }, { "epoch": 1.7684414864115363, "grad_norm": 0.20836491882801056, "learning_rate": 8.42689713215912e-07, "loss": 0.5131, "step": 6377 }, { "epoch": 1.7687188019966722, "grad_norm": 0.19809886813163757, "learning_rate": 8.406978094825033e-07, "loss": 0.5147, "step": 6378 }, { "epoch": 1.768996117581808, "grad_norm": 0.20016132295131683, "learning_rate": 8.387081807878602e-07, "loss": 0.4531, "step": 6379 }, { "epoch": 1.769273433166944, "grad_norm": 0.20489495992660522, "learning_rate": 8.367208275202112e-07, "loss": 0.4955, "step": 6380 }, { "epoch": 1.7695507487520798, "grad_norm": 0.22036077082157135, "learning_rate": 8.34735750067342e-07, "loss": 0.4954, "step": 6381 }, { "epoch": 1.7698280643372157, "grad_norm": 0.19575130939483643, "learning_rate": 8.327529488165925e-07, "loss": 0.4695, "step": 6382 }, { "epoch": 1.7701053799223516, "grad_norm": 0.20686204731464386, "learning_rate": 8.30772424154859e-07, "loss": 0.4713, "step": 6383 }, { "epoch": 1.7703826955074875, "grad_norm": 0.2088318020105362, "learning_rate": 8.287941764685989e-07, "loss": 0.5319, "step": 6384 }, { "epoch": 1.7706600110926234, "grad_norm": 0.20468567311763763, "learning_rate": 8.26818206143816e-07, "loss": 0.5164, "step": 6385 }, { "epoch": 1.7709373266777593, "grad_norm": 0.22691620886325836, "learning_rate": 8.248445135660782e-07, "loss": 0.5026, "step": 6386 }, { "epoch": 1.7712146422628952, "grad_norm": 0.2066589593887329, "learning_rate": 8.22873099120501e-07, "loss": 0.5169, "step": 6387 }, { "epoch": 1.771491957848031, "grad_norm": 0.2105691283941269, "learning_rate": 8.209039631917631e-07, "loss": 0.5144, "step": 6388 }, { "epoch": 1.771769273433167, "grad_norm": 0.23009151220321655, "learning_rate": 8.18937106164093e-07, "loss": 0.5291, "step": 6389 }, { "epoch": 1.7720465890183028, "grad_norm": 0.20932504534721375, "learning_rate": 8.169725284212781e-07, "loss": 0.5105, "step": 6390 }, { "epoch": 1.7723239046034387, "grad_norm": 0.2107200175523758, "learning_rate": 8.150102303466578e-07, "loss": 0.4886, "step": 6391 }, { "epoch": 1.7726012201885746, "grad_norm": 0.20422813296318054, "learning_rate": 8.130502123231285e-07, "loss": 0.5055, "step": 6392 }, { "epoch": 1.7728785357737105, "grad_norm": 0.20422576367855072, "learning_rate": 8.110924747331467e-07, "loss": 0.5034, "step": 6393 }, { "epoch": 1.7731558513588463, "grad_norm": 0.21044956147670746, "learning_rate": 8.091370179587124e-07, "loss": 0.5029, "step": 6394 }, { "epoch": 1.7734331669439822, "grad_norm": 0.20264363288879395, "learning_rate": 8.071838423813885e-07, "loss": 0.5073, "step": 6395 }, { "epoch": 1.7737104825291181, "grad_norm": 0.2157750427722931, "learning_rate": 8.052329483822924e-07, "loss": 0.4861, "step": 6396 }, { "epoch": 1.773987798114254, "grad_norm": 0.20734168589115143, "learning_rate": 8.032843363420972e-07, "loss": 0.5013, "step": 6397 }, { "epoch": 1.7742651136993899, "grad_norm": 0.23018255829811096, "learning_rate": 8.013380066410253e-07, "loss": 0.5124, "step": 6398 }, { "epoch": 1.7745424292845258, "grad_norm": 0.19958136975765228, "learning_rate": 7.993939596588576e-07, "loss": 0.4944, "step": 6399 }, { "epoch": 1.7748197448696617, "grad_norm": 0.2005843073129654, "learning_rate": 7.974521957749309e-07, "loss": 0.4886, "step": 6400 }, { "epoch": 1.7750970604547975, "grad_norm": 0.20511426031589508, "learning_rate": 7.955127153681352e-07, "loss": 0.4816, "step": 6401 }, { "epoch": 1.7753743760399334, "grad_norm": 0.19508033990859985, "learning_rate": 7.935755188169122e-07, "loss": 0.4817, "step": 6402 }, { "epoch": 1.7756516916250693, "grad_norm": 0.2073792815208435, "learning_rate": 7.916406064992596e-07, "loss": 0.4902, "step": 6403 }, { "epoch": 1.7759290072102052, "grad_norm": 0.2069348245859146, "learning_rate": 7.89707978792735e-07, "loss": 0.5026, "step": 6404 }, { "epoch": 1.776206322795341, "grad_norm": 0.21495787799358368, "learning_rate": 7.87777636074441e-07, "loss": 0.4647, "step": 6405 }, { "epoch": 1.776483638380477, "grad_norm": 0.20726212859153748, "learning_rate": 7.858495787210373e-07, "loss": 0.4705, "step": 6406 }, { "epoch": 1.7767609539656128, "grad_norm": 0.2127843052148819, "learning_rate": 7.83923807108744e-07, "loss": 0.5182, "step": 6407 }, { "epoch": 1.7770382695507487, "grad_norm": 0.20020431280136108, "learning_rate": 7.820003216133284e-07, "loss": 0.4982, "step": 6408 }, { "epoch": 1.7773155851358846, "grad_norm": 0.21346312761306763, "learning_rate": 7.800791226101112e-07, "loss": 0.5011, "step": 6409 }, { "epoch": 1.7775929007210205, "grad_norm": 0.20786842703819275, "learning_rate": 7.781602104739674e-07, "loss": 0.4825, "step": 6410 }, { "epoch": 1.7778702163061564, "grad_norm": 0.21008695662021637, "learning_rate": 7.762435855793352e-07, "loss": 0.5017, "step": 6411 }, { "epoch": 1.7781475318912923, "grad_norm": 0.22787503898143768, "learning_rate": 7.743292483001944e-07, "loss": 0.5009, "step": 6412 }, { "epoch": 1.7784248474764282, "grad_norm": 0.21107859909534454, "learning_rate": 7.724171990100809e-07, "loss": 0.5056, "step": 6413 }, { "epoch": 1.778702163061564, "grad_norm": 0.20022447407245636, "learning_rate": 7.705074380820881e-07, "loss": 0.4997, "step": 6414 }, { "epoch": 1.7789794786467, "grad_norm": 0.20294740796089172, "learning_rate": 7.685999658888637e-07, "loss": 0.4944, "step": 6415 }, { "epoch": 1.7792567942318358, "grad_norm": 0.21547819674015045, "learning_rate": 7.66694782802603e-07, "loss": 0.5007, "step": 6416 }, { "epoch": 1.7795341098169717, "grad_norm": 0.20150557160377502, "learning_rate": 7.647918891950559e-07, "loss": 0.5125, "step": 6417 }, { "epoch": 1.7798114254021076, "grad_norm": 0.22688019275665283, "learning_rate": 7.628912854375308e-07, "loss": 0.4976, "step": 6418 }, { "epoch": 1.7800887409872435, "grad_norm": 0.20131336152553558, "learning_rate": 7.609929719008854e-07, "loss": 0.4804, "step": 6419 }, { "epoch": 1.7803660565723793, "grad_norm": 0.20517846941947937, "learning_rate": 7.590969489555289e-07, "loss": 0.5062, "step": 6420 }, { "epoch": 1.7806433721575152, "grad_norm": 0.21819156408309937, "learning_rate": 7.572032169714277e-07, "loss": 0.4709, "step": 6421 }, { "epoch": 1.780920687742651, "grad_norm": 0.21262036263942719, "learning_rate": 7.553117763180989e-07, "loss": 0.4938, "step": 6422 }, { "epoch": 1.781198003327787, "grad_norm": 0.2089860737323761, "learning_rate": 7.534226273646111e-07, "loss": 0.542, "step": 6423 }, { "epoch": 1.7814753189129229, "grad_norm": 0.20706579089164734, "learning_rate": 7.515357704795902e-07, "loss": 0.5062, "step": 6424 }, { "epoch": 1.7817526344980588, "grad_norm": 0.21182586252689362, "learning_rate": 7.496512060312086e-07, "loss": 0.4642, "step": 6425 }, { "epoch": 1.7820299500831946, "grad_norm": 0.21045905351638794, "learning_rate": 7.477689343871983e-07, "loss": 0.5155, "step": 6426 }, { "epoch": 1.7823072656683305, "grad_norm": 0.21174542605876923, "learning_rate": 7.458889559148363e-07, "loss": 0.5125, "step": 6427 }, { "epoch": 1.7825845812534664, "grad_norm": 0.21431586146354675, "learning_rate": 7.440112709809599e-07, "loss": 0.5094, "step": 6428 }, { "epoch": 1.7828618968386023, "grad_norm": 0.21488359570503235, "learning_rate": 7.421358799519524e-07, "loss": 0.5196, "step": 6429 }, { "epoch": 1.7831392124237382, "grad_norm": 0.21055498719215393, "learning_rate": 7.402627831937528e-07, "loss": 0.5338, "step": 6430 }, { "epoch": 1.783416528008874, "grad_norm": 0.19541572034358978, "learning_rate": 7.383919810718537e-07, "loss": 0.4689, "step": 6431 }, { "epoch": 1.78369384359401, "grad_norm": 0.22601917386054993, "learning_rate": 7.365234739512977e-07, "loss": 0.4881, "step": 6432 }, { "epoch": 1.7839711591791458, "grad_norm": 0.20422977209091187, "learning_rate": 7.34657262196678e-07, "loss": 0.489, "step": 6433 }, { "epoch": 1.7842484747642817, "grad_norm": 0.21061384677886963, "learning_rate": 7.327933461721434e-07, "loss": 0.523, "step": 6434 }, { "epoch": 1.7845257903494176, "grad_norm": 0.23255345225334167, "learning_rate": 7.309317262413948e-07, "loss": 0.4948, "step": 6435 }, { "epoch": 1.7848031059345535, "grad_norm": 0.20327569544315338, "learning_rate": 7.290724027676791e-07, "loss": 0.4884, "step": 6436 }, { "epoch": 1.7850804215196894, "grad_norm": 0.20791566371917725, "learning_rate": 7.272153761138031e-07, "loss": 0.487, "step": 6437 }, { "epoch": 1.7853577371048253, "grad_norm": 0.19935283064842224, "learning_rate": 7.253606466421231e-07, "loss": 0.4907, "step": 6438 }, { "epoch": 1.7856350526899611, "grad_norm": 0.2055501639842987, "learning_rate": 7.235082147145453e-07, "loss": 0.4908, "step": 6439 }, { "epoch": 1.785912368275097, "grad_norm": 0.20820872485637665, "learning_rate": 7.216580806925235e-07, "loss": 0.4897, "step": 6440 }, { "epoch": 1.786189683860233, "grad_norm": 0.19953136146068573, "learning_rate": 7.19810244937072e-07, "loss": 0.4806, "step": 6441 }, { "epoch": 1.7864669994453688, "grad_norm": 0.20614445209503174, "learning_rate": 7.179647078087548e-07, "loss": 0.4992, "step": 6442 }, { "epoch": 1.7867443150305047, "grad_norm": 0.20100446045398712, "learning_rate": 7.161214696676813e-07, "loss": 0.4811, "step": 6443 }, { "epoch": 1.7870216306156406, "grad_norm": 0.21023845672607422, "learning_rate": 7.142805308735151e-07, "loss": 0.4809, "step": 6444 }, { "epoch": 1.7872989462007765, "grad_norm": 0.21367646753787994, "learning_rate": 7.124418917854745e-07, "loss": 0.4902, "step": 6445 }, { "epoch": 1.7875762617859123, "grad_norm": 0.20011699199676514, "learning_rate": 7.106055527623279e-07, "loss": 0.4909, "step": 6446 }, { "epoch": 1.7878535773710482, "grad_norm": 0.2133239507675171, "learning_rate": 7.087715141623916e-07, "loss": 0.4953, "step": 6447 }, { "epoch": 1.788130892956184, "grad_norm": 0.20663748681545258, "learning_rate": 7.06939776343532e-07, "loss": 0.4885, "step": 6448 }, { "epoch": 1.78840820854132, "grad_norm": 0.2234223186969757, "learning_rate": 7.051103396631772e-07, "loss": 0.468, "step": 6449 }, { "epoch": 1.7886855241264559, "grad_norm": 0.2111656367778778, "learning_rate": 7.032832044782959e-07, "loss": 0.4909, "step": 6450 }, { "epoch": 1.7889628397115918, "grad_norm": 0.2099202424287796, "learning_rate": 7.014583711454053e-07, "loss": 0.4802, "step": 6451 }, { "epoch": 1.7892401552967276, "grad_norm": 0.19917812943458557, "learning_rate": 6.996358400205849e-07, "loss": 0.4913, "step": 6452 }, { "epoch": 1.7895174708818635, "grad_norm": 0.20266632735729218, "learning_rate": 6.978156114594583e-07, "loss": 0.5082, "step": 6453 }, { "epoch": 1.7897947864669994, "grad_norm": 0.21638800203800201, "learning_rate": 6.959976858171985e-07, "loss": 0.5163, "step": 6454 }, { "epoch": 1.7900721020521353, "grad_norm": 0.20662005245685577, "learning_rate": 6.941820634485299e-07, "loss": 0.501, "step": 6455 }, { "epoch": 1.7903494176372712, "grad_norm": 0.2111278474330902, "learning_rate": 6.92368744707729e-07, "loss": 0.5009, "step": 6456 }, { "epoch": 1.790626733222407, "grad_norm": 0.21369148790836334, "learning_rate": 6.905577299486266e-07, "loss": 0.4807, "step": 6457 }, { "epoch": 1.790904048807543, "grad_norm": 0.2006935179233551, "learning_rate": 6.887490195245941e-07, "loss": 0.5229, "step": 6458 }, { "epoch": 1.7911813643926788, "grad_norm": 0.20024555921554565, "learning_rate": 6.869426137885604e-07, "loss": 0.5033, "step": 6459 }, { "epoch": 1.7914586799778147, "grad_norm": 0.21740752458572388, "learning_rate": 6.85138513093006e-07, "loss": 0.4924, "step": 6460 }, { "epoch": 1.7917359955629506, "grad_norm": 0.21370796859264374, "learning_rate": 6.833367177899564e-07, "loss": 0.4914, "step": 6461 }, { "epoch": 1.7920133111480865, "grad_norm": 0.203489288687706, "learning_rate": 6.815372282309884e-07, "loss": 0.4905, "step": 6462 }, { "epoch": 1.7922906267332224, "grad_norm": 0.20067156851291656, "learning_rate": 6.797400447672311e-07, "loss": 0.4508, "step": 6463 }, { "epoch": 1.7925679423183583, "grad_norm": 0.2093934714794159, "learning_rate": 6.779451677493636e-07, "loss": 0.5265, "step": 6464 }, { "epoch": 1.7928452579034941, "grad_norm": 0.20720593631267548, "learning_rate": 6.761525975276129e-07, "loss": 0.4972, "step": 6465 }, { "epoch": 1.79312257348863, "grad_norm": 0.21246635913848877, "learning_rate": 6.7436233445176e-07, "loss": 0.5076, "step": 6466 }, { "epoch": 1.793399889073766, "grad_norm": 0.2040356993675232, "learning_rate": 6.72574378871127e-07, "loss": 0.4878, "step": 6467 }, { "epoch": 1.7936772046589018, "grad_norm": 0.20411434769630432, "learning_rate": 6.707887311345959e-07, "loss": 0.4903, "step": 6468 }, { "epoch": 1.7939545202440377, "grad_norm": 0.21358831226825714, "learning_rate": 6.690053915905908e-07, "loss": 0.5239, "step": 6469 }, { "epoch": 1.7942318358291736, "grad_norm": 0.20531177520751953, "learning_rate": 6.672243605870918e-07, "loss": 0.4836, "step": 6470 }, { "epoch": 1.7945091514143094, "grad_norm": 0.203311026096344, "learning_rate": 6.654456384716221e-07, "loss": 0.4853, "step": 6471 }, { "epoch": 1.7947864669994453, "grad_norm": 0.20684535801410675, "learning_rate": 6.636692255912583e-07, "loss": 0.4996, "step": 6472 }, { "epoch": 1.7950637825845812, "grad_norm": 0.21285724639892578, "learning_rate": 6.618951222926287e-07, "loss": 0.5002, "step": 6473 }, { "epoch": 1.795341098169717, "grad_norm": 0.201505646109581, "learning_rate": 6.601233289219036e-07, "loss": 0.5027, "step": 6474 }, { "epoch": 1.795618413754853, "grad_norm": 0.24583852291107178, "learning_rate": 6.583538458248106e-07, "loss": 0.4928, "step": 6475 }, { "epoch": 1.7958957293399889, "grad_norm": 0.22444890439510345, "learning_rate": 6.565866733466181e-07, "loss": 0.4982, "step": 6476 }, { "epoch": 1.7961730449251248, "grad_norm": 0.21000193059444427, "learning_rate": 6.548218118321542e-07, "loss": 0.4952, "step": 6477 }, { "epoch": 1.7964503605102606, "grad_norm": 0.20615287125110626, "learning_rate": 6.530592616257839e-07, "loss": 0.4686, "step": 6478 }, { "epoch": 1.7967276760953965, "grad_norm": 0.19822682440280914, "learning_rate": 6.512990230714306e-07, "loss": 0.5017, "step": 6479 }, { "epoch": 1.7970049916805324, "grad_norm": 0.20360395312309265, "learning_rate": 6.495410965125653e-07, "loss": 0.4922, "step": 6480 }, { "epoch": 1.7972823072656683, "grad_norm": 0.19985002279281616, "learning_rate": 6.477854822922042e-07, "loss": 0.4931, "step": 6481 }, { "epoch": 1.7975596228508042, "grad_norm": 0.19747690856456757, "learning_rate": 6.460321807529118e-07, "loss": 0.4757, "step": 6482 }, { "epoch": 1.79783693843594, "grad_norm": 0.2075447291135788, "learning_rate": 6.44281192236805e-07, "loss": 0.5065, "step": 6483 }, { "epoch": 1.798114254021076, "grad_norm": 0.2010498195886612, "learning_rate": 6.42532517085552e-07, "loss": 0.4978, "step": 6484 }, { "epoch": 1.7983915696062118, "grad_norm": 0.2031354308128357, "learning_rate": 6.407861556403633e-07, "loss": 0.5098, "step": 6485 }, { "epoch": 1.7986688851913477, "grad_norm": 0.21396131813526154, "learning_rate": 6.390421082419939e-07, "loss": 0.5098, "step": 6486 }, { "epoch": 1.7989462007764836, "grad_norm": 0.20978130400180817, "learning_rate": 6.373003752307649e-07, "loss": 0.4845, "step": 6487 }, { "epoch": 1.7992235163616195, "grad_norm": 0.20987875759601593, "learning_rate": 6.355609569465279e-07, "loss": 0.5111, "step": 6488 }, { "epoch": 1.7995008319467554, "grad_norm": 0.20756718516349792, "learning_rate": 6.338238537286892e-07, "loss": 0.5202, "step": 6489 }, { "epoch": 1.7997781475318912, "grad_norm": 0.2095729559659958, "learning_rate": 6.320890659162041e-07, "loss": 0.4798, "step": 6490 }, { "epoch": 1.8000554631170271, "grad_norm": 0.21260963380336761, "learning_rate": 6.303565938475794e-07, "loss": 0.4852, "step": 6491 }, { "epoch": 1.800332778702163, "grad_norm": 0.20586569607257843, "learning_rate": 6.286264378608631e-07, "loss": 0.4981, "step": 6492 }, { "epoch": 1.800610094287299, "grad_norm": 0.20685313642024994, "learning_rate": 6.268985982936529e-07, "loss": 0.4848, "step": 6493 }, { "epoch": 1.8008874098724348, "grad_norm": 0.21318146586418152, "learning_rate": 6.251730754830987e-07, "loss": 0.4873, "step": 6494 }, { "epoch": 1.8011647254575707, "grad_norm": 0.21374082565307617, "learning_rate": 6.234498697658964e-07, "loss": 0.4889, "step": 6495 }, { "epoch": 1.8014420410427066, "grad_norm": 0.20280690491199493, "learning_rate": 6.217289814782867e-07, "loss": 0.5096, "step": 6496 }, { "epoch": 1.8017193566278424, "grad_norm": 0.2248666137456894, "learning_rate": 6.200104109560622e-07, "loss": 0.5, "step": 6497 }, { "epoch": 1.8019966722129783, "grad_norm": 0.25136902928352356, "learning_rate": 6.182941585345603e-07, "loss": 0.5054, "step": 6498 }, { "epoch": 1.8022739877981142, "grad_norm": 0.2106061577796936, "learning_rate": 6.165802245486684e-07, "loss": 0.503, "step": 6499 }, { "epoch": 1.80255130338325, "grad_norm": 0.19975021481513977, "learning_rate": 6.148686093328193e-07, "loss": 0.4912, "step": 6500 }, { "epoch": 1.802828618968386, "grad_norm": 0.2124665230512619, "learning_rate": 6.131593132209942e-07, "loss": 0.5096, "step": 6501 }, { "epoch": 1.8031059345535219, "grad_norm": 0.20581713318824768, "learning_rate": 6.11452336546725e-07, "loss": 0.4901, "step": 6502 }, { "epoch": 1.8033832501386577, "grad_norm": 0.20466797053813934, "learning_rate": 6.097476796430856e-07, "loss": 0.4827, "step": 6503 }, { "epoch": 1.8036605657237936, "grad_norm": 0.20932024717330933, "learning_rate": 6.080453428427003e-07, "loss": 0.4881, "step": 6504 }, { "epoch": 1.8039378813089295, "grad_norm": 0.20006607472896576, "learning_rate": 6.063453264777391e-07, "loss": 0.4892, "step": 6505 }, { "epoch": 1.8042151968940654, "grad_norm": 0.21569404006004333, "learning_rate": 6.046476308799217e-07, "loss": 0.4704, "step": 6506 }, { "epoch": 1.8044925124792013, "grad_norm": 0.22104816138744354, "learning_rate": 6.029522563805123e-07, "loss": 0.4961, "step": 6507 }, { "epoch": 1.8047698280643372, "grad_norm": 0.21142521500587463, "learning_rate": 6.012592033103253e-07, "loss": 0.5161, "step": 6508 }, { "epoch": 1.805047143649473, "grad_norm": 0.20408941805362701, "learning_rate": 5.995684719997174e-07, "loss": 0.512, "step": 6509 }, { "epoch": 1.805324459234609, "grad_norm": 0.20980273187160492, "learning_rate": 5.978800627785968e-07, "loss": 0.5006, "step": 6510 }, { "epoch": 1.8056017748197448, "grad_norm": 0.21785345673561096, "learning_rate": 5.961939759764181e-07, "loss": 0.4957, "step": 6511 }, { "epoch": 1.8058790904048807, "grad_norm": 0.20818081498146057, "learning_rate": 5.945102119221793e-07, "loss": 0.5021, "step": 6512 }, { "epoch": 1.8061564059900166, "grad_norm": 0.21114230155944824, "learning_rate": 5.928287709444285e-07, "loss": 0.4915, "step": 6513 }, { "epoch": 1.8064337215751525, "grad_norm": 0.20450453460216522, "learning_rate": 5.911496533712577e-07, "loss": 0.5232, "step": 6514 }, { "epoch": 1.8067110371602884, "grad_norm": 0.20697475969791412, "learning_rate": 5.894728595303101e-07, "loss": 0.4895, "step": 6515 }, { "epoch": 1.8069883527454242, "grad_norm": 0.20897066593170166, "learning_rate": 5.877983897487699e-07, "loss": 0.4692, "step": 6516 }, { "epoch": 1.8072656683305601, "grad_norm": 0.2387371063232422, "learning_rate": 5.861262443533716e-07, "loss": 0.4805, "step": 6517 }, { "epoch": 1.807542983915696, "grad_norm": 0.2114652842283249, "learning_rate": 5.844564236703972e-07, "loss": 0.5114, "step": 6518 }, { "epoch": 1.807820299500832, "grad_norm": 0.19332511723041534, "learning_rate": 5.827889280256696e-07, "loss": 0.4668, "step": 6519 }, { "epoch": 1.8080976150859678, "grad_norm": 0.22820791602134705, "learning_rate": 5.811237577445616e-07, "loss": 0.4955, "step": 6520 }, { "epoch": 1.8083749306711037, "grad_norm": 0.2099064439535141, "learning_rate": 5.79460913151994e-07, "loss": 0.529, "step": 6521 }, { "epoch": 1.8086522462562395, "grad_norm": 0.22128312289714813, "learning_rate": 5.778003945724322e-07, "loss": 0.4778, "step": 6522 }, { "epoch": 1.8089295618413754, "grad_norm": 0.20738999545574188, "learning_rate": 5.761422023298851e-07, "loss": 0.4965, "step": 6523 }, { "epoch": 1.8092068774265113, "grad_norm": 0.23788759112358093, "learning_rate": 5.744863367479092e-07, "loss": 0.5113, "step": 6524 }, { "epoch": 1.8094841930116472, "grad_norm": 0.21730081737041473, "learning_rate": 5.728327981496112e-07, "loss": 0.4981, "step": 6525 }, { "epoch": 1.809761508596783, "grad_norm": 0.21804404258728027, "learning_rate": 5.711815868576401e-07, "loss": 0.4849, "step": 6526 }, { "epoch": 1.810038824181919, "grad_norm": 0.21189890801906586, "learning_rate": 5.695327031941866e-07, "loss": 0.4909, "step": 6527 }, { "epoch": 1.8103161397670549, "grad_norm": 0.2018709033727646, "learning_rate": 5.678861474809949e-07, "loss": 0.4799, "step": 6528 }, { "epoch": 1.8105934553521907, "grad_norm": 0.22132818400859833, "learning_rate": 5.662419200393537e-07, "loss": 0.4723, "step": 6529 }, { "epoch": 1.8108707709373266, "grad_norm": 0.2106301486492157, "learning_rate": 5.646000211900925e-07, "loss": 0.4924, "step": 6530 }, { "epoch": 1.8111480865224625, "grad_norm": 0.20907257497310638, "learning_rate": 5.629604512535871e-07, "loss": 0.497, "step": 6531 }, { "epoch": 1.8114254021075984, "grad_norm": 0.20625951886177063, "learning_rate": 5.613232105497649e-07, "loss": 0.504, "step": 6532 }, { "epoch": 1.8117027176927343, "grad_norm": 0.2099820077419281, "learning_rate": 5.596882993980937e-07, "loss": 0.4957, "step": 6533 }, { "epoch": 1.8119800332778702, "grad_norm": 0.21241778135299683, "learning_rate": 5.580557181175893e-07, "loss": 0.4908, "step": 6534 }, { "epoch": 1.812257348863006, "grad_norm": 0.20039701461791992, "learning_rate": 5.564254670268068e-07, "loss": 0.4982, "step": 6535 }, { "epoch": 1.8125346644481422, "grad_norm": 0.22225303947925568, "learning_rate": 5.547975464438568e-07, "loss": 0.5102, "step": 6536 }, { "epoch": 1.812811980033278, "grad_norm": 0.21028605103492737, "learning_rate": 5.53171956686388e-07, "loss": 0.5236, "step": 6537 }, { "epoch": 1.813089295618414, "grad_norm": 0.2042209506034851, "learning_rate": 5.51548698071594e-07, "loss": 0.4895, "step": 6538 }, { "epoch": 1.8133666112035498, "grad_norm": 0.21611033380031586, "learning_rate": 5.499277709162171e-07, "loss": 0.4968, "step": 6539 }, { "epoch": 1.8136439267886857, "grad_norm": 0.20384977757930756, "learning_rate": 5.483091755365461e-07, "loss": 0.492, "step": 6540 }, { "epoch": 1.8139212423738216, "grad_norm": 0.1995922178030014, "learning_rate": 5.466929122484075e-07, "loss": 0.4917, "step": 6541 }, { "epoch": 1.8141985579589575, "grad_norm": 0.20135626196861267, "learning_rate": 5.450789813671781e-07, "loss": 0.5002, "step": 6542 }, { "epoch": 1.8144758735440933, "grad_norm": 0.21459290385246277, "learning_rate": 5.434673832077783e-07, "loss": 0.4684, "step": 6543 }, { "epoch": 1.8147531891292292, "grad_norm": 0.2142772525548935, "learning_rate": 5.41858118084676e-07, "loss": 0.4991, "step": 6544 }, { "epoch": 1.8150305047143651, "grad_norm": 0.20316624641418457, "learning_rate": 5.402511863118798e-07, "loss": 0.4978, "step": 6545 }, { "epoch": 1.815307820299501, "grad_norm": 0.23183675110340118, "learning_rate": 5.386465882029443e-07, "loss": 0.4948, "step": 6546 }, { "epoch": 1.8155851358846369, "grad_norm": 0.2099410742521286, "learning_rate": 5.370443240709691e-07, "loss": 0.5037, "step": 6547 }, { "epoch": 1.8158624514697728, "grad_norm": 0.2161746770143509, "learning_rate": 5.354443942285986e-07, "loss": 0.5388, "step": 6548 }, { "epoch": 1.8161397670549086, "grad_norm": 0.2113848328590393, "learning_rate": 5.338467989880233e-07, "loss": 0.5012, "step": 6549 }, { "epoch": 1.8164170826400445, "grad_norm": 0.20695488154888153, "learning_rate": 5.322515386609731e-07, "loss": 0.5092, "step": 6550 }, { "epoch": 1.8166943982251804, "grad_norm": 0.22753605246543884, "learning_rate": 5.30658613558728e-07, "loss": 0.4784, "step": 6551 }, { "epoch": 1.8169717138103163, "grad_norm": 0.20156748592853546, "learning_rate": 5.290680239921089e-07, "loss": 0.4905, "step": 6552 }, { "epoch": 1.8172490293954522, "grad_norm": 0.20030784606933594, "learning_rate": 5.274797702714829e-07, "loss": 0.4789, "step": 6553 }, { "epoch": 1.817526344980588, "grad_norm": 0.2246222347021103, "learning_rate": 5.258938527067575e-07, "loss": 0.5038, "step": 6554 }, { "epoch": 1.817803660565724, "grad_norm": 0.21242398023605347, "learning_rate": 5.243102716073908e-07, "loss": 0.4957, "step": 6555 }, { "epoch": 1.8180809761508598, "grad_norm": 0.20436997711658478, "learning_rate": 5.227290272823801e-07, "loss": 0.4759, "step": 6556 }, { "epoch": 1.8183582917359957, "grad_norm": 0.20299552381038666, "learning_rate": 5.211501200402688e-07, "loss": 0.5054, "step": 6557 }, { "epoch": 1.8186356073211316, "grad_norm": 0.2033611238002777, "learning_rate": 5.195735501891424e-07, "loss": 0.4932, "step": 6558 }, { "epoch": 1.8189129229062675, "grad_norm": 0.2847311794757843, "learning_rate": 5.1799931803663e-07, "loss": 0.4986, "step": 6559 }, { "epoch": 1.8191902384914034, "grad_norm": 0.2158021777868271, "learning_rate": 5.164274238899091e-07, "loss": 0.4769, "step": 6560 }, { "epoch": 1.8194675540765393, "grad_norm": 0.21182480454444885, "learning_rate": 5.148578680556987e-07, "loss": 0.4827, "step": 6561 }, { "epoch": 1.8197448696616751, "grad_norm": 0.20851145684719086, "learning_rate": 5.132906508402535e-07, "loss": 0.512, "step": 6562 }, { "epoch": 1.820022185246811, "grad_norm": 0.20874373614788055, "learning_rate": 5.117257725493874e-07, "loss": 0.4721, "step": 6563 }, { "epoch": 1.820299500831947, "grad_norm": 0.203582301735878, "learning_rate": 5.101632334884476e-07, "loss": 0.467, "step": 6564 }, { "epoch": 1.8205768164170828, "grad_norm": 0.21535137295722961, "learning_rate": 5.086030339623237e-07, "loss": 0.477, "step": 6565 }, { "epoch": 1.8208541320022187, "grad_norm": 0.2023378312587738, "learning_rate": 5.070451742754528e-07, "loss": 0.4867, "step": 6566 }, { "epoch": 1.8211314475873546, "grad_norm": 0.20345695316791534, "learning_rate": 5.054896547318181e-07, "loss": 0.4803, "step": 6567 }, { "epoch": 1.8214087631724905, "grad_norm": 0.2040456086397171, "learning_rate": 5.039364756349405e-07, "loss": 0.5076, "step": 6568 }, { "epoch": 1.8216860787576263, "grad_norm": 0.20643432438373566, "learning_rate": 5.023856372878846e-07, "loss": 0.51, "step": 6569 }, { "epoch": 1.8219633943427622, "grad_norm": 0.1972525417804718, "learning_rate": 5.008371399932613e-07, "loss": 0.4995, "step": 6570 }, { "epoch": 1.822240709927898, "grad_norm": 0.27110210061073303, "learning_rate": 4.992909840532259e-07, "loss": 0.505, "step": 6571 }, { "epoch": 1.822518025513034, "grad_norm": 0.20463772118091583, "learning_rate": 4.977471697694719e-07, "loss": 0.5256, "step": 6572 }, { "epoch": 1.8227953410981699, "grad_norm": 0.20936624705791473, "learning_rate": 4.962056974432374e-07, "loss": 0.4791, "step": 6573 }, { "epoch": 1.8230726566833058, "grad_norm": 0.21320702135562897, "learning_rate": 4.946665673753056e-07, "loss": 0.5057, "step": 6574 }, { "epoch": 1.8233499722684416, "grad_norm": 0.20866161584854126, "learning_rate": 4.931297798660043e-07, "loss": 0.4935, "step": 6575 }, { "epoch": 1.8236272878535775, "grad_norm": 0.21233926713466644, "learning_rate": 4.915953352151961e-07, "loss": 0.5105, "step": 6576 }, { "epoch": 1.8239046034387134, "grad_norm": 0.20736192166805267, "learning_rate": 4.900632337222947e-07, "loss": 0.5026, "step": 6577 }, { "epoch": 1.8241819190238493, "grad_norm": 0.21084974706172943, "learning_rate": 4.885334756862564e-07, "loss": 0.4946, "step": 6578 }, { "epoch": 1.8244592346089852, "grad_norm": 0.20611052215099335, "learning_rate": 4.870060614055733e-07, "loss": 0.5124, "step": 6579 }, { "epoch": 1.824736550194121, "grad_norm": 0.19793611764907837, "learning_rate": 4.85480991178286e-07, "loss": 0.4918, "step": 6580 }, { "epoch": 1.825013865779257, "grad_norm": 0.20998556911945343, "learning_rate": 4.839582653019745e-07, "loss": 0.4959, "step": 6581 }, { "epoch": 1.8252911813643928, "grad_norm": 0.2145131677389145, "learning_rate": 4.824378840737664e-07, "loss": 0.4858, "step": 6582 }, { "epoch": 1.8255684969495287, "grad_norm": 0.20194010436534882, "learning_rate": 4.809198477903259e-07, "loss": 0.4685, "step": 6583 }, { "epoch": 1.8258458125346646, "grad_norm": 0.2031271904706955, "learning_rate": 4.794041567478632e-07, "loss": 0.4833, "step": 6584 }, { "epoch": 1.8261231281198005, "grad_norm": 0.2092730849981308, "learning_rate": 4.778908112421279e-07, "loss": 0.4969, "step": 6585 }, { "epoch": 1.8264004437049364, "grad_norm": 0.21413682401180267, "learning_rate": 4.7637981156841563e-07, "loss": 0.4964, "step": 6586 }, { "epoch": 1.8266777592900723, "grad_norm": 0.2138873189687729, "learning_rate": 4.7487115802156147e-07, "loss": 0.484, "step": 6587 }, { "epoch": 1.8269550748752081, "grad_norm": 0.23256392776966095, "learning_rate": 4.733648508959465e-07, "loss": 0.4868, "step": 6588 }, { "epoch": 1.827232390460344, "grad_norm": 0.20839428901672363, "learning_rate": 4.718608904854857e-07, "loss": 0.4998, "step": 6589 }, { "epoch": 1.82750970604548, "grad_norm": 0.21245619654655457, "learning_rate": 4.703592770836457e-07, "loss": 0.4976, "step": 6590 }, { "epoch": 1.8277870216306158, "grad_norm": 0.21361759305000305, "learning_rate": 4.6886001098343094e-07, "loss": 0.5143, "step": 6591 }, { "epoch": 1.8280643372157517, "grad_norm": 0.20406369864940643, "learning_rate": 4.673630924773853e-07, "loss": 0.4561, "step": 6592 }, { "epoch": 1.8283416528008876, "grad_norm": 0.20464986562728882, "learning_rate": 4.6586852185760144e-07, "loss": 0.4884, "step": 6593 }, { "epoch": 1.8286189683860234, "grad_norm": 0.21941900253295898, "learning_rate": 4.643762994157058e-07, "loss": 0.5124, "step": 6594 }, { "epoch": 1.8288962839711593, "grad_norm": 0.21227768063545227, "learning_rate": 4.628864254428725e-07, "loss": 0.4913, "step": 6595 }, { "epoch": 1.8291735995562952, "grad_norm": 0.20707818865776062, "learning_rate": 4.613989002298133e-07, "loss": 0.4637, "step": 6596 }, { "epoch": 1.829450915141431, "grad_norm": 0.21716617047786713, "learning_rate": 4.599137240667864e-07, "loss": 0.4596, "step": 6597 }, { "epoch": 1.829728230726567, "grad_norm": 0.21836552023887634, "learning_rate": 4.5843089724358913e-07, "loss": 0.4907, "step": 6598 }, { "epoch": 1.8300055463117029, "grad_norm": 0.20570078492164612, "learning_rate": 4.5695042004955943e-07, "loss": 0.4854, "step": 6599 }, { "epoch": 1.8302828618968388, "grad_norm": 0.21633854508399963, "learning_rate": 4.554722927735747e-07, "loss": 0.517, "step": 6600 }, { "epoch": 1.8305601774819746, "grad_norm": 0.20889052748680115, "learning_rate": 4.53996515704061e-07, "loss": 0.5119, "step": 6601 }, { "epoch": 1.8308374930671105, "grad_norm": 0.21190080046653748, "learning_rate": 4.5252308912897973e-07, "loss": 0.492, "step": 6602 }, { "epoch": 1.8311148086522464, "grad_norm": 0.19486699998378754, "learning_rate": 4.5105201333583565e-07, "loss": 0.4603, "step": 6603 }, { "epoch": 1.8313921242373823, "grad_norm": 0.20805886387825012, "learning_rate": 4.495832886116741e-07, "loss": 0.5107, "step": 6604 }, { "epoch": 1.8316694398225182, "grad_norm": 0.2246580719947815, "learning_rate": 4.481169152430839e-07, "loss": 0.5245, "step": 6605 }, { "epoch": 1.831946755407654, "grad_norm": 0.2717232406139374, "learning_rate": 4.466528935161918e-07, "loss": 0.5234, "step": 6606 }, { "epoch": 1.83222407099279, "grad_norm": 0.2078229784965515, "learning_rate": 4.451912237166664e-07, "loss": 0.4893, "step": 6607 }, { "epoch": 1.8325013865779258, "grad_norm": 0.20676632225513458, "learning_rate": 4.4373190612971986e-07, "loss": 0.5072, "step": 6608 }, { "epoch": 1.8327787021630617, "grad_norm": 0.21076683700084686, "learning_rate": 4.4227494104010503e-07, "loss": 0.5058, "step": 6609 }, { "epoch": 1.8330560177481976, "grad_norm": 0.20716801285743713, "learning_rate": 4.408203287321111e-07, "loss": 0.4752, "step": 6610 }, { "epoch": 1.8333333333333335, "grad_norm": 0.20303776860237122, "learning_rate": 4.3936806948957354e-07, "loss": 0.4991, "step": 6611 }, { "epoch": 1.8336106489184694, "grad_norm": 0.2012963891029358, "learning_rate": 4.379181635958643e-07, "loss": 0.486, "step": 6612 }, { "epoch": 1.8338879645036053, "grad_norm": 0.20308934152126312, "learning_rate": 4.3647061133390286e-07, "loss": 0.506, "step": 6613 }, { "epoch": 1.8341652800887411, "grad_norm": 0.20220200717449188, "learning_rate": 4.3502541298613977e-07, "loss": 0.4809, "step": 6614 }, { "epoch": 1.834442595673877, "grad_norm": 0.20746108889579773, "learning_rate": 4.335825688345743e-07, "loss": 0.4813, "step": 6615 }, { "epoch": 1.834719911259013, "grad_norm": 0.22002127766609192, "learning_rate": 4.321420791607453e-07, "loss": 0.5233, "step": 6616 }, { "epoch": 1.8349972268441488, "grad_norm": 0.20879393815994263, "learning_rate": 4.307039442457278e-07, "loss": 0.4956, "step": 6617 }, { "epoch": 1.8352745424292847, "grad_norm": 0.21325801312923431, "learning_rate": 4.2926816437014047e-07, "loss": 0.4913, "step": 6618 }, { "epoch": 1.8355518580144206, "grad_norm": 0.19979670643806458, "learning_rate": 4.278347398141411e-07, "loss": 0.5124, "step": 6619 }, { "epoch": 1.8358291735995564, "grad_norm": 0.21641233563423157, "learning_rate": 4.264036708574323e-07, "loss": 0.505, "step": 6620 }, { "epoch": 1.8361064891846923, "grad_norm": 0.2334747463464737, "learning_rate": 4.249749577792492e-07, "loss": 0.5114, "step": 6621 }, { "epoch": 1.8363838047698282, "grad_norm": 0.19688747823238373, "learning_rate": 4.2354860085837414e-07, "loss": 0.5072, "step": 6622 }, { "epoch": 1.836661120354964, "grad_norm": 0.21270081400871277, "learning_rate": 4.2212460037312636e-07, "loss": 0.5024, "step": 6623 }, { "epoch": 1.8369384359401, "grad_norm": 0.21791474521160126, "learning_rate": 4.2070295660136817e-07, "loss": 0.5123, "step": 6624 }, { "epoch": 1.8372157515252359, "grad_norm": 0.1985998898744583, "learning_rate": 4.192836698204958e-07, "loss": 0.4741, "step": 6625 }, { "epoch": 1.8374930671103717, "grad_norm": 0.2119298279285431, "learning_rate": 4.1786674030745295e-07, "loss": 0.4941, "step": 6626 }, { "epoch": 1.8377703826955076, "grad_norm": 0.20587483048439026, "learning_rate": 4.164521683387185e-07, "loss": 0.5002, "step": 6627 }, { "epoch": 1.8380476982806435, "grad_norm": 0.19921058416366577, "learning_rate": 4.1503995419031325e-07, "loss": 0.4908, "step": 6628 }, { "epoch": 1.8383250138657794, "grad_norm": 0.2109578251838684, "learning_rate": 4.1363009813780026e-07, "loss": 0.4987, "step": 6629 }, { "epoch": 1.8386023294509153, "grad_norm": 0.21179182827472687, "learning_rate": 4.122226004562746e-07, "loss": 0.5024, "step": 6630 }, { "epoch": 1.8388796450360512, "grad_norm": 0.20858225226402283, "learning_rate": 4.108174614203819e-07, "loss": 0.517, "step": 6631 }, { "epoch": 1.839156960621187, "grad_norm": 0.2038223147392273, "learning_rate": 4.094146813042973e-07, "loss": 0.448, "step": 6632 }, { "epoch": 1.839434276206323, "grad_norm": 0.21669189631938934, "learning_rate": 4.0801426038174357e-07, "loss": 0.4757, "step": 6633 }, { "epoch": 1.8397115917914588, "grad_norm": 0.204112246632576, "learning_rate": 4.06616198925977e-07, "loss": 0.4729, "step": 6634 }, { "epoch": 1.8399889073765947, "grad_norm": 0.22220736742019653, "learning_rate": 4.052204972097989e-07, "loss": 0.5062, "step": 6635 }, { "epoch": 1.8402662229617306, "grad_norm": 0.2096226066350937, "learning_rate": 4.0382715550554837e-07, "loss": 0.4841, "step": 6636 }, { "epoch": 1.8405435385468665, "grad_norm": 0.20454815030097961, "learning_rate": 4.024361740851024e-07, "loss": 0.4883, "step": 6637 }, { "epoch": 1.8408208541320024, "grad_norm": 0.20967929065227509, "learning_rate": 4.010475532198757e-07, "loss": 0.5155, "step": 6638 }, { "epoch": 1.8410981697171382, "grad_norm": 0.22011229395866394, "learning_rate": 3.996612931808266e-07, "loss": 0.5232, "step": 6639 }, { "epoch": 1.8413754853022741, "grad_norm": 0.22266606986522675, "learning_rate": 3.9827739423845265e-07, "loss": 0.5071, "step": 6640 }, { "epoch": 1.84165280088741, "grad_norm": 0.20759738981723785, "learning_rate": 3.9689585666278784e-07, "loss": 0.4825, "step": 6641 }, { "epoch": 1.841930116472546, "grad_norm": 0.2150697559118271, "learning_rate": 3.9551668072340675e-07, "loss": 0.511, "step": 6642 }, { "epoch": 1.8422074320576818, "grad_norm": 0.21445181965827942, "learning_rate": 3.9413986668942473e-07, "loss": 0.4903, "step": 6643 }, { "epoch": 1.8424847476428177, "grad_norm": 0.2114182561635971, "learning_rate": 3.9276541482949347e-07, "loss": 0.51, "step": 6644 }, { "epoch": 1.8427620632279536, "grad_norm": 0.2046855241060257, "learning_rate": 3.913933254118041e-07, "loss": 0.5055, "step": 6645 }, { "epoch": 1.8430393788130894, "grad_norm": 0.20602397620677948, "learning_rate": 3.9002359870408817e-07, "loss": 0.5024, "step": 6646 }, { "epoch": 1.8433166943982253, "grad_norm": 0.21433816850185394, "learning_rate": 3.886562349736167e-07, "loss": 0.4955, "step": 6647 }, { "epoch": 1.8435940099833612, "grad_norm": 0.21116018295288086, "learning_rate": 3.872912344871985e-07, "loss": 0.4696, "step": 6648 }, { "epoch": 1.843871325568497, "grad_norm": 0.20629142224788666, "learning_rate": 3.8592859751117873e-07, "loss": 0.4636, "step": 6649 }, { "epoch": 1.844148641153633, "grad_norm": 0.2071436494588852, "learning_rate": 3.845683243114462e-07, "loss": 0.5121, "step": 6650 }, { "epoch": 1.8444259567387689, "grad_norm": 0.20660892128944397, "learning_rate": 3.8321041515342744e-07, "loss": 0.5133, "step": 6651 }, { "epoch": 1.8447032723239047, "grad_norm": 0.19560420513153076, "learning_rate": 3.818548703020841e-07, "loss": 0.5027, "step": 6652 }, { "epoch": 1.8449805879090406, "grad_norm": 0.20409871637821198, "learning_rate": 3.805016900219172e-07, "loss": 0.507, "step": 6653 }, { "epoch": 1.8452579034941765, "grad_norm": 0.21331243216991425, "learning_rate": 3.791508745769737e-07, "loss": 0.5273, "step": 6654 }, { "epoch": 1.8455352190793124, "grad_norm": 0.2074180394411087, "learning_rate": 3.7780242423083036e-07, "loss": 0.4921, "step": 6655 }, { "epoch": 1.8458125346644483, "grad_norm": 0.22154366970062256, "learning_rate": 3.7645633924660446e-07, "loss": 0.5068, "step": 6656 }, { "epoch": 1.8460898502495842, "grad_norm": 0.20785929262638092, "learning_rate": 3.7511261988695393e-07, "loss": 0.4947, "step": 6657 }, { "epoch": 1.84636716583472, "grad_norm": 0.22468653321266174, "learning_rate": 3.737712664140747e-07, "loss": 0.5096, "step": 6658 }, { "epoch": 1.846644481419856, "grad_norm": 0.2090657502412796, "learning_rate": 3.724322790897003e-07, "loss": 0.5105, "step": 6659 }, { "epoch": 1.8469217970049918, "grad_norm": 0.20280075073242188, "learning_rate": 3.71095658175101e-07, "loss": 0.4872, "step": 6660 }, { "epoch": 1.8471991125901277, "grad_norm": 0.2233634740114212, "learning_rate": 3.697614039310876e-07, "loss": 0.506, "step": 6661 }, { "epoch": 1.8474764281752636, "grad_norm": 0.2086162120103836, "learning_rate": 3.684295166180102e-07, "loss": 0.5089, "step": 6662 }, { "epoch": 1.8477537437603995, "grad_norm": 0.20018382370471954, "learning_rate": 3.6709999649575386e-07, "loss": 0.5051, "step": 6663 }, { "epoch": 1.8480310593455354, "grad_norm": 0.21848781406879425, "learning_rate": 3.6577284382374316e-07, "loss": 0.497, "step": 6664 }, { "epoch": 1.8483083749306712, "grad_norm": 0.24338483810424805, "learning_rate": 3.644480588609403e-07, "loss": 0.5335, "step": 6665 }, { "epoch": 1.8485856905158071, "grad_norm": 0.21215735375881195, "learning_rate": 3.6312564186584826e-07, "loss": 0.5265, "step": 6666 }, { "epoch": 1.848863006100943, "grad_norm": 0.22368744015693665, "learning_rate": 3.6180559309650086e-07, "loss": 0.5033, "step": 6667 }, { "epoch": 1.849140321686079, "grad_norm": 0.20563702285289764, "learning_rate": 3.6048791281047963e-07, "loss": 0.4938, "step": 6668 }, { "epoch": 1.8494176372712148, "grad_norm": 0.21737602353096008, "learning_rate": 3.5917260126489687e-07, "loss": 0.5033, "step": 6669 }, { "epoch": 1.8496949528563507, "grad_norm": 0.32653746008872986, "learning_rate": 3.578596587164043e-07, "loss": 0.517, "step": 6670 }, { "epoch": 1.8499722684414865, "grad_norm": 0.45970603823661804, "learning_rate": 3.565490854211928e-07, "loss": 0.5046, "step": 6671 }, { "epoch": 1.8502495840266224, "grad_norm": 0.20332348346710205, "learning_rate": 3.552408816349884e-07, "loss": 0.4842, "step": 6672 }, { "epoch": 1.8505268996117583, "grad_norm": 0.20179514586925507, "learning_rate": 3.5393504761305903e-07, "loss": 0.4844, "step": 6673 }, { "epoch": 1.8508042151968942, "grad_norm": 0.20190241932868958, "learning_rate": 3.5263158361020373e-07, "loss": 0.4805, "step": 6674 }, { "epoch": 1.85108153078203, "grad_norm": 0.2159728854894638, "learning_rate": 3.513304898807676e-07, "loss": 0.5239, "step": 6675 }, { "epoch": 1.851358846367166, "grad_norm": 0.20452584326267242, "learning_rate": 3.5003176667862265e-07, "loss": 0.4694, "step": 6676 }, { "epoch": 1.8516361619523019, "grad_norm": 0.19299055635929108, "learning_rate": 3.487354142571883e-07, "loss": 0.5076, "step": 6677 }, { "epoch": 1.8519134775374377, "grad_norm": 0.20924656093120575, "learning_rate": 3.474414328694178e-07, "loss": 0.502, "step": 6678 }, { "epoch": 1.8521907931225736, "grad_norm": 1.066011905670166, "learning_rate": 3.4614982276779953e-07, "loss": 0.4964, "step": 6679 }, { "epoch": 1.8524681087077095, "grad_norm": 0.21032501757144928, "learning_rate": 3.448605842043581e-07, "loss": 0.4718, "step": 6680 }, { "epoch": 1.8527454242928454, "grad_norm": 0.216018944978714, "learning_rate": 3.435737174306633e-07, "loss": 0.5178, "step": 6681 }, { "epoch": 1.8530227398779813, "grad_norm": 0.2162523716688156, "learning_rate": 3.4228922269781556e-07, "loss": 0.4962, "step": 6682 }, { "epoch": 1.8533000554631172, "grad_norm": 0.20311126112937927, "learning_rate": 3.410071002564519e-07, "loss": 0.5093, "step": 6683 }, { "epoch": 1.853577371048253, "grad_norm": 0.21295960247516632, "learning_rate": 3.397273503567486e-07, "loss": 0.4753, "step": 6684 }, { "epoch": 1.853854686633389, "grad_norm": 0.21265694499015808, "learning_rate": 3.3844997324842113e-07, "loss": 0.4974, "step": 6685 }, { "epoch": 1.8541320022185248, "grad_norm": 0.21514546871185303, "learning_rate": 3.3717496918071746e-07, "loss": 0.4879, "step": 6686 }, { "epoch": 1.8544093178036607, "grad_norm": 0.23401249945163727, "learning_rate": 3.3590233840242455e-07, "loss": 0.5049, "step": 6687 }, { "epoch": 1.8546866333887966, "grad_norm": 0.20478412508964539, "learning_rate": 3.346320811618675e-07, "loss": 0.5053, "step": 6688 }, { "epoch": 1.8549639489739325, "grad_norm": 0.21184539794921875, "learning_rate": 3.333641977069077e-07, "loss": 0.528, "step": 6689 }, { "epoch": 1.8552412645590683, "grad_norm": 0.1997302621603012, "learning_rate": 3.320986882849417e-07, "loss": 0.4981, "step": 6690 }, { "epoch": 1.8555185801442042, "grad_norm": 0.21800538897514343, "learning_rate": 3.308355531429011e-07, "loss": 0.5069, "step": 6691 }, { "epoch": 1.8557958957293401, "grad_norm": 0.2156432867050171, "learning_rate": 3.295747925272638e-07, "loss": 0.5039, "step": 6692 }, { "epoch": 1.856073211314476, "grad_norm": 0.2117234319448471, "learning_rate": 3.28316406684033e-07, "loss": 0.4791, "step": 6693 }, { "epoch": 1.8563505268996119, "grad_norm": 0.48360639810562134, "learning_rate": 3.2706039585875257e-07, "loss": 0.4949, "step": 6694 }, { "epoch": 1.8566278424847478, "grad_norm": 0.20971214771270752, "learning_rate": 3.2580676029650566e-07, "loss": 0.5, "step": 6695 }, { "epoch": 1.8569051580698837, "grad_norm": 0.20673950016498566, "learning_rate": 3.24555500241909e-07, "loss": 0.5199, "step": 6696 }, { "epoch": 1.8571824736550195, "grad_norm": 0.20716223120689392, "learning_rate": 3.233066159391174e-07, "loss": 0.5073, "step": 6697 }, { "epoch": 1.8574597892401554, "grad_norm": 0.2109098881483078, "learning_rate": 3.2206010763181916e-07, "loss": 0.5055, "step": 6698 }, { "epoch": 1.8577371048252913, "grad_norm": 0.20620295405387878, "learning_rate": 3.20815975563242e-07, "loss": 0.5266, "step": 6699 }, { "epoch": 1.8580144204104272, "grad_norm": 0.22776873409748077, "learning_rate": 3.1957421997615004e-07, "loss": 0.5076, "step": 6700 }, { "epoch": 1.858291735995563, "grad_norm": 0.2032913714647293, "learning_rate": 3.1833484111284115e-07, "loss": 0.5065, "step": 6701 }, { "epoch": 1.858569051580699, "grad_norm": 0.2074134647846222, "learning_rate": 3.1709783921515383e-07, "loss": 0.5079, "step": 6702 }, { "epoch": 1.8588463671658348, "grad_norm": 0.20687542855739594, "learning_rate": 3.158632145244561e-07, "loss": 0.5004, "step": 6703 }, { "epoch": 1.8591236827509707, "grad_norm": 0.21574708819389343, "learning_rate": 3.1463096728165944e-07, "loss": 0.5162, "step": 6704 }, { "epoch": 1.8594009983361066, "grad_norm": 0.2129552662372589, "learning_rate": 3.134010977272048e-07, "loss": 0.505, "step": 6705 }, { "epoch": 1.8596783139212425, "grad_norm": 0.2198726236820221, "learning_rate": 3.121736061010738e-07, "loss": 0.5004, "step": 6706 }, { "epoch": 1.8599556295063784, "grad_norm": 0.21903812885284424, "learning_rate": 3.109484926427847e-07, "loss": 0.4909, "step": 6707 }, { "epoch": 1.8602329450915143, "grad_norm": 0.2174917459487915, "learning_rate": 3.0972575759138503e-07, "loss": 0.4957, "step": 6708 }, { "epoch": 1.8605102606766502, "grad_norm": 0.2153944969177246, "learning_rate": 3.085054011854674e-07, "loss": 0.4964, "step": 6709 }, { "epoch": 1.860787576261786, "grad_norm": 0.22760117053985596, "learning_rate": 3.0728742366315233e-07, "loss": 0.4961, "step": 6710 }, { "epoch": 1.861064891846922, "grad_norm": 0.20543061196804047, "learning_rate": 3.0607182526210115e-07, "loss": 0.4923, "step": 6711 }, { "epoch": 1.8613422074320578, "grad_norm": 0.2019956409931183, "learning_rate": 3.0485860621950887e-07, "loss": 0.5033, "step": 6712 }, { "epoch": 1.8616195230171937, "grad_norm": 0.2030191272497177, "learning_rate": 3.036477667721069e-07, "loss": 0.4962, "step": 6713 }, { "epoch": 1.8618968386023296, "grad_norm": 0.20309413969516754, "learning_rate": 3.024393071561604e-07, "loss": 0.5137, "step": 6714 }, { "epoch": 1.8621741541874655, "grad_norm": 0.20694345235824585, "learning_rate": 3.0123322760747394e-07, "loss": 0.4906, "step": 6715 }, { "epoch": 1.8624514697726013, "grad_norm": 0.21089065074920654, "learning_rate": 3.000295283613869e-07, "loss": 0.4833, "step": 6716 }, { "epoch": 1.8627287853577372, "grad_norm": 0.20231840014457703, "learning_rate": 2.9882820965276975e-07, "loss": 0.5002, "step": 6717 }, { "epoch": 1.8630061009428731, "grad_norm": 0.20089836418628693, "learning_rate": 2.9762927171603226e-07, "loss": 0.4721, "step": 6718 }, { "epoch": 1.863283416528009, "grad_norm": 0.25077909231185913, "learning_rate": 2.9643271478511925e-07, "loss": 0.5162, "step": 6719 }, { "epoch": 1.8635607321131449, "grad_norm": 0.22070035338401794, "learning_rate": 2.952385390935133e-07, "loss": 0.482, "step": 6720 }, { "epoch": 1.8638380476982808, "grad_norm": 0.21285070478916168, "learning_rate": 2.94046744874224e-07, "loss": 0.5105, "step": 6721 }, { "epoch": 1.8641153632834166, "grad_norm": 0.20452368259429932, "learning_rate": 2.928573323598069e-07, "loss": 0.4942, "step": 6722 }, { "epoch": 1.8643926788685525, "grad_norm": 0.21610909700393677, "learning_rate": 2.9167030178234853e-07, "loss": 0.5066, "step": 6723 }, { "epoch": 1.8646699944536884, "grad_norm": 0.2100083827972412, "learning_rate": 2.904856533734665e-07, "loss": 0.486, "step": 6724 }, { "epoch": 1.8649473100388243, "grad_norm": 0.211506187915802, "learning_rate": 2.893033873643175e-07, "loss": 0.4986, "step": 6725 }, { "epoch": 1.8652246256239602, "grad_norm": 0.2029467523097992, "learning_rate": 2.881235039855934e-07, "loss": 0.4879, "step": 6726 }, { "epoch": 1.865501941209096, "grad_norm": 0.21474742889404297, "learning_rate": 2.8694600346752255e-07, "loss": 0.5127, "step": 6727 }, { "epoch": 1.865779256794232, "grad_norm": 0.20987072587013245, "learning_rate": 2.857708860398656e-07, "loss": 0.5121, "step": 6728 }, { "epoch": 1.8660565723793678, "grad_norm": 0.2003088742494583, "learning_rate": 2.845981519319169e-07, "loss": 0.4814, "step": 6729 }, { "epoch": 1.8663338879645037, "grad_norm": 0.20292720198631287, "learning_rate": 2.834278013725114e-07, "loss": 0.4883, "step": 6730 }, { "epoch": 1.8666112035496396, "grad_norm": 0.2072405070066452, "learning_rate": 2.8225983459001374e-07, "loss": 0.5012, "step": 6731 }, { "epoch": 1.8668885191347755, "grad_norm": 0.22093196213245392, "learning_rate": 2.81094251812325e-07, "loss": 0.5124, "step": 6732 }, { "epoch": 1.8671658347199114, "grad_norm": 0.21116647124290466, "learning_rate": 2.7993105326687975e-07, "loss": 0.4803, "step": 6733 }, { "epoch": 1.8674431503050473, "grad_norm": 0.20909181237220764, "learning_rate": 2.7877023918065346e-07, "loss": 0.5075, "step": 6734 }, { "epoch": 1.8677204658901831, "grad_norm": 0.2065388560295105, "learning_rate": 2.776118097801483e-07, "loss": 0.4884, "step": 6735 }, { "epoch": 1.867997781475319, "grad_norm": 0.2007947862148285, "learning_rate": 2.764557652914029e-07, "loss": 0.4811, "step": 6736 }, { "epoch": 1.868275097060455, "grad_norm": 0.21440117061138153, "learning_rate": 2.753021059399952e-07, "loss": 0.4785, "step": 6737 }, { "epoch": 1.8685524126455908, "grad_norm": 0.2125922590494156, "learning_rate": 2.7415083195103525e-07, "loss": 0.473, "step": 6738 }, { "epoch": 1.8688297282307267, "grad_norm": 0.21997934579849243, "learning_rate": 2.730019435491657e-07, "loss": 0.4944, "step": 6739 }, { "epoch": 1.8691070438158626, "grad_norm": 0.19915248453617096, "learning_rate": 2.7185544095856413e-07, "loss": 0.4901, "step": 6740 }, { "epoch": 1.8693843594009985, "grad_norm": 0.21164058148860931, "learning_rate": 2.7071132440294464e-07, "loss": 0.4929, "step": 6741 }, { "epoch": 1.8696616749861343, "grad_norm": 0.1981211006641388, "learning_rate": 2.695695941055551e-07, "loss": 0.4817, "step": 6742 }, { "epoch": 1.8699389905712702, "grad_norm": 0.21673041582107544, "learning_rate": 2.68430250289177e-07, "loss": 0.5048, "step": 6743 }, { "epoch": 1.870216306156406, "grad_norm": 0.19340240955352783, "learning_rate": 2.6729329317612545e-07, "loss": 0.5013, "step": 6744 }, { "epoch": 1.870493621741542, "grad_norm": 0.20828621089458466, "learning_rate": 2.661587229882537e-07, "loss": 0.4879, "step": 6745 }, { "epoch": 1.8707709373266779, "grad_norm": 0.2040943205356598, "learning_rate": 2.650265399469429e-07, "loss": 0.5113, "step": 6746 }, { "epoch": 1.8710482529118138, "grad_norm": 0.2092759907245636, "learning_rate": 2.6389674427311494e-07, "loss": 0.5067, "step": 6747 }, { "epoch": 1.8713255684969496, "grad_norm": 0.20813149213790894, "learning_rate": 2.6276933618721995e-07, "loss": 0.5074, "step": 6748 }, { "epoch": 1.8716028840820855, "grad_norm": 0.2012152075767517, "learning_rate": 2.6164431590924856e-07, "loss": 0.5058, "step": 6749 }, { "epoch": 1.8718801996672214, "grad_norm": 0.2121763676404953, "learning_rate": 2.605216836587182e-07, "loss": 0.508, "step": 6750 }, { "epoch": 1.8721575152523573, "grad_norm": 0.20884345471858978, "learning_rate": 2.5940143965468843e-07, "loss": 0.5022, "step": 6751 }, { "epoch": 1.8724348308374932, "grad_norm": 0.21974632143974304, "learning_rate": 2.582835841157441e-07, "loss": 0.4794, "step": 6752 }, { "epoch": 1.872712146422629, "grad_norm": 0.22024495899677277, "learning_rate": 2.5716811726001213e-07, "loss": 0.5149, "step": 6753 }, { "epoch": 1.872989462007765, "grad_norm": 0.20395393669605255, "learning_rate": 2.560550393051475e-07, "loss": 0.4796, "step": 6754 }, { "epoch": 1.8732667775929008, "grad_norm": 0.2120208889245987, "learning_rate": 2.5494435046834324e-07, "loss": 0.5186, "step": 6755 }, { "epoch": 1.8735440931780367, "grad_norm": 0.20500154793262482, "learning_rate": 2.538360509663218e-07, "loss": 0.4926, "step": 6756 }, { "epoch": 1.8738214087631726, "grad_norm": 0.20315538346767426, "learning_rate": 2.527301410153421e-07, "loss": 0.4664, "step": 6757 }, { "epoch": 1.8740987243483085, "grad_norm": 0.19243794679641724, "learning_rate": 2.516266208311982e-07, "loss": 0.4876, "step": 6758 }, { "epoch": 1.8743760399334444, "grad_norm": 0.2194959968328476, "learning_rate": 2.505254906292151e-07, "loss": 0.5032, "step": 6759 }, { "epoch": 1.8746533555185803, "grad_norm": 0.21170787513256073, "learning_rate": 2.4942675062425147e-07, "loss": 0.4849, "step": 6760 }, { "epoch": 1.8749306711037161, "grad_norm": 0.2141827791929245, "learning_rate": 2.483304010307025e-07, "loss": 0.5182, "step": 6761 }, { "epoch": 1.875207986688852, "grad_norm": 0.21987125277519226, "learning_rate": 2.4723644206249424e-07, "loss": 0.4889, "step": 6762 }, { "epoch": 1.875485302273988, "grad_norm": 0.2075456976890564, "learning_rate": 2.4614487393308657e-07, "loss": 0.5282, "step": 6763 }, { "epoch": 1.8757626178591238, "grad_norm": 0.20293202996253967, "learning_rate": 2.450556968554743e-07, "loss": 0.4726, "step": 6764 }, { "epoch": 1.8760399334442597, "grad_norm": 0.2271379679441452, "learning_rate": 2.4396891104218335e-07, "loss": 0.5321, "step": 6765 }, { "epoch": 1.8763172490293956, "grad_norm": 0.206784188747406, "learning_rate": 2.4288451670527736e-07, "loss": 0.5207, "step": 6766 }, { "epoch": 1.8765945646145314, "grad_norm": 0.21522942185401917, "learning_rate": 2.41802514056344e-07, "loss": 0.5015, "step": 6767 }, { "epoch": 1.8768718801996673, "grad_norm": 0.2123783975839615, "learning_rate": 2.4072290330651867e-07, "loss": 0.488, "step": 6768 }, { "epoch": 1.8771491957848032, "grad_norm": 0.2079179435968399, "learning_rate": 2.396456846664577e-07, "loss": 0.4738, "step": 6769 }, { "epoch": 1.877426511369939, "grad_norm": 0.21037521958351135, "learning_rate": 2.3857085834635557e-07, "loss": 0.5059, "step": 6770 }, { "epoch": 1.877703826955075, "grad_norm": 0.21396882832050323, "learning_rate": 2.374984245559375e-07, "loss": 0.5156, "step": 6771 }, { "epoch": 1.8779811425402109, "grad_norm": 0.21056626737117767, "learning_rate": 2.3642838350446812e-07, "loss": 0.5154, "step": 6772 }, { "epoch": 1.8782584581253468, "grad_norm": 0.2141093611717224, "learning_rate": 2.3536073540073727e-07, "loss": 0.5053, "step": 6773 }, { "epoch": 1.8785357737104826, "grad_norm": 0.2114919126033783, "learning_rate": 2.342954804530728e-07, "loss": 0.5151, "step": 6774 }, { "epoch": 1.8788130892956185, "grad_norm": 0.2190243899822235, "learning_rate": 2.3323261886933344e-07, "loss": 0.5099, "step": 6775 }, { "epoch": 1.8790904048807544, "grad_norm": 0.22468531131744385, "learning_rate": 2.3217215085691164e-07, "loss": 0.5139, "step": 6776 }, { "epoch": 1.8793677204658903, "grad_norm": 0.20503219962120056, "learning_rate": 2.311140766227349e-07, "loss": 0.5068, "step": 6777 }, { "epoch": 1.8796450360510262, "grad_norm": 0.22132523357868195, "learning_rate": 2.300583963732575e-07, "loss": 0.5136, "step": 6778 }, { "epoch": 1.879922351636162, "grad_norm": 0.21017663180828094, "learning_rate": 2.2900511031447303e-07, "loss": 0.4947, "step": 6779 }, { "epoch": 1.880199667221298, "grad_norm": 0.20506809651851654, "learning_rate": 2.2795421865190737e-07, "loss": 0.5036, "step": 6780 }, { "epoch": 1.8804769828064338, "grad_norm": 0.2145530879497528, "learning_rate": 2.2690572159061453e-07, "loss": 0.5016, "step": 6781 }, { "epoch": 1.8807542983915697, "grad_norm": 0.20361502468585968, "learning_rate": 2.2585961933518362e-07, "loss": 0.4867, "step": 6782 }, { "epoch": 1.8810316139767056, "grad_norm": 0.2013719379901886, "learning_rate": 2.248159120897403e-07, "loss": 0.4996, "step": 6783 }, { "epoch": 1.8813089295618415, "grad_norm": 0.21445105969905853, "learning_rate": 2.2377460005793688e-07, "loss": 0.4938, "step": 6784 }, { "epoch": 1.8815862451469774, "grad_norm": 0.2540777027606964, "learning_rate": 2.2273568344295948e-07, "loss": 0.4668, "step": 6785 }, { "epoch": 1.8818635607321132, "grad_norm": 0.21935346722602844, "learning_rate": 2.2169916244753204e-07, "loss": 0.4713, "step": 6786 }, { "epoch": 1.8821408763172491, "grad_norm": 0.20928525924682617, "learning_rate": 2.2066503727390526e-07, "loss": 0.492, "step": 6787 }, { "epoch": 1.882418191902385, "grad_norm": 0.21874678134918213, "learning_rate": 2.1963330812386356e-07, "loss": 0.4754, "step": 6788 }, { "epoch": 1.882695507487521, "grad_norm": 0.22388552129268646, "learning_rate": 2.186039751987265e-07, "loss": 0.5055, "step": 6789 }, { "epoch": 1.8829728230726568, "grad_norm": 0.20376282930374146, "learning_rate": 2.175770386993431e-07, "loss": 0.4799, "step": 6790 }, { "epoch": 1.8832501386577927, "grad_norm": 0.22341391444206238, "learning_rate": 2.1655249882609612e-07, "loss": 0.5089, "step": 6791 }, { "epoch": 1.8835274542429286, "grad_norm": 0.20708690583705902, "learning_rate": 2.1553035577890068e-07, "loss": 0.4717, "step": 6792 }, { "epoch": 1.8838047698280644, "grad_norm": 0.20336076617240906, "learning_rate": 2.1451060975720417e-07, "loss": 0.4888, "step": 6793 }, { "epoch": 1.8840820854132003, "grad_norm": 0.23210936784744263, "learning_rate": 2.134932609599849e-07, "loss": 0.4947, "step": 6794 }, { "epoch": 1.8843594009983362, "grad_norm": 0.2158088982105255, "learning_rate": 2.1247830958575493e-07, "loss": 0.5283, "step": 6795 }, { "epoch": 1.884636716583472, "grad_norm": 0.21517014503479004, "learning_rate": 2.1146575583255862e-07, "loss": 0.4747, "step": 6796 }, { "epoch": 1.884914032168608, "grad_norm": 0.20993436872959137, "learning_rate": 2.1045559989797125e-07, "loss": 0.4945, "step": 6797 }, { "epoch": 1.8851913477537439, "grad_norm": 0.210999995470047, "learning_rate": 2.0944784197910321e-07, "loss": 0.4993, "step": 6798 }, { "epoch": 1.8854686633388797, "grad_norm": 0.2413627654314041, "learning_rate": 2.0844248227259305e-07, "loss": 0.4893, "step": 6799 }, { "epoch": 1.8857459789240156, "grad_norm": 0.20545423030853271, "learning_rate": 2.07439520974613e-07, "loss": 0.5073, "step": 6800 }, { "epoch": 1.8860232945091515, "grad_norm": 0.20195883512496948, "learning_rate": 2.0643895828086625e-07, "loss": 0.4633, "step": 6801 }, { "epoch": 1.8863006100942874, "grad_norm": 0.21203799545764923, "learning_rate": 2.0544079438659242e-07, "loss": 0.5121, "step": 6802 }, { "epoch": 1.8865779256794233, "grad_norm": 0.2213236391544342, "learning_rate": 2.0444502948655658e-07, "loss": 0.4984, "step": 6803 }, { "epoch": 1.8868552412645592, "grad_norm": 0.2082570642232895, "learning_rate": 2.0345166377506165e-07, "loss": 0.5068, "step": 6804 }, { "epoch": 1.887132556849695, "grad_norm": 0.20740516483783722, "learning_rate": 2.024606974459359e-07, "loss": 0.4746, "step": 6805 }, { "epoch": 1.887409872434831, "grad_norm": 0.20049674808979034, "learning_rate": 2.0147213069254683e-07, "loss": 0.478, "step": 6806 }, { "epoch": 1.8876871880199668, "grad_norm": 0.1955636441707611, "learning_rate": 2.0048596370778744e-07, "loss": 0.4785, "step": 6807 }, { "epoch": 1.8879645036051027, "grad_norm": 0.21246939897537231, "learning_rate": 1.995021966840871e-07, "loss": 0.4925, "step": 6808 }, { "epoch": 1.8882418191902386, "grad_norm": 0.20627954602241516, "learning_rate": 1.9852082981340198e-07, "loss": 0.4882, "step": 6809 }, { "epoch": 1.8885191347753745, "grad_norm": 0.20891375839710236, "learning_rate": 1.9754186328722614e-07, "loss": 0.5049, "step": 6810 }, { "epoch": 1.8887964503605104, "grad_norm": 0.2203957885503769, "learning_rate": 1.9656529729658036e-07, "loss": 0.4642, "step": 6811 }, { "epoch": 1.8890737659456462, "grad_norm": 0.2100536823272705, "learning_rate": 1.955911320320164e-07, "loss": 0.4995, "step": 6812 }, { "epoch": 1.8893510815307821, "grad_norm": 0.20311182737350464, "learning_rate": 1.946193676836225e-07, "loss": 0.525, "step": 6813 }, { "epoch": 1.889628397115918, "grad_norm": 0.2031848132610321, "learning_rate": 1.936500044410164e-07, "loss": 0.519, "step": 6814 }, { "epoch": 1.889905712701054, "grad_norm": 0.19998584687709808, "learning_rate": 1.9268304249334402e-07, "loss": 0.4888, "step": 6815 }, { "epoch": 1.8901830282861898, "grad_norm": 0.20453502237796783, "learning_rate": 1.9171848202928637e-07, "loss": 0.4936, "step": 6816 }, { "epoch": 1.8904603438713257, "grad_norm": 0.1982734203338623, "learning_rate": 1.9075632323705405e-07, "loss": 0.4986, "step": 6817 }, { "epoch": 1.8907376594564616, "grad_norm": 0.21546244621276855, "learning_rate": 1.897965663043913e-07, "loss": 0.5138, "step": 6818 }, { "epoch": 1.8910149750415974, "grad_norm": 0.20453289151191711, "learning_rate": 1.8883921141857053e-07, "loss": 0.4685, "step": 6819 }, { "epoch": 1.8912922906267333, "grad_norm": 0.2022780179977417, "learning_rate": 1.8788425876639647e-07, "loss": 0.4743, "step": 6820 }, { "epoch": 1.8915696062118692, "grad_norm": 0.20572780072689056, "learning_rate": 1.8693170853420898e-07, "loss": 0.4792, "step": 6821 }, { "epoch": 1.891846921797005, "grad_norm": 0.2061588168144226, "learning_rate": 1.8598156090787328e-07, "loss": 0.491, "step": 6822 }, { "epoch": 1.892124237382141, "grad_norm": 0.21832877397537231, "learning_rate": 1.8503381607278825e-07, "loss": 0.5126, "step": 6823 }, { "epoch": 1.8924015529672769, "grad_norm": 0.21814782917499542, "learning_rate": 1.8408847421388376e-07, "loss": 0.4833, "step": 6824 }, { "epoch": 1.8926788685524127, "grad_norm": 0.2065282016992569, "learning_rate": 1.8314553551562336e-07, "loss": 0.5193, "step": 6825 }, { "epoch": 1.8929561841375486, "grad_norm": 0.21190817654132843, "learning_rate": 1.8220500016199736e-07, "loss": 0.5011, "step": 6826 }, { "epoch": 1.8932334997226845, "grad_norm": 0.3020632266998291, "learning_rate": 1.8126686833652984e-07, "loss": 0.4953, "step": 6827 }, { "epoch": 1.8935108153078204, "grad_norm": 0.2020588517189026, "learning_rate": 1.8033114022227432e-07, "loss": 0.4876, "step": 6828 }, { "epoch": 1.8937881308929563, "grad_norm": 0.21000345051288605, "learning_rate": 1.793978160018181e-07, "loss": 0.4933, "step": 6829 }, { "epoch": 1.8940654464780922, "grad_norm": 0.20526744425296783, "learning_rate": 1.784668958572752e-07, "loss": 0.5048, "step": 6830 }, { "epoch": 1.894342762063228, "grad_norm": 0.20586876571178436, "learning_rate": 1.7753837997029343e-07, "loss": 0.4861, "step": 6831 }, { "epoch": 1.894620077648364, "grad_norm": 0.22010831534862518, "learning_rate": 1.7661226852205143e-07, "loss": 0.5143, "step": 6832 }, { "epoch": 1.8948973932334998, "grad_norm": 0.21001818776130676, "learning_rate": 1.7568856169325743e-07, "loss": 0.4987, "step": 6833 }, { "epoch": 1.8951747088186357, "grad_norm": 0.1980331391096115, "learning_rate": 1.7476725966415335e-07, "loss": 0.4786, "step": 6834 }, { "epoch": 1.8954520244037716, "grad_norm": 0.20589183270931244, "learning_rate": 1.7384836261450655e-07, "loss": 0.5138, "step": 6835 }, { "epoch": 1.8957293399889075, "grad_norm": 0.20626741647720337, "learning_rate": 1.7293187072361938e-07, "loss": 0.489, "step": 6836 }, { "epoch": 1.8960066555740434, "grad_norm": 0.20527756214141846, "learning_rate": 1.7201778417032383e-07, "loss": 0.4767, "step": 6837 }, { "epoch": 1.8962839711591792, "grad_norm": 0.21390627324581146, "learning_rate": 1.7110610313298274e-07, "loss": 0.4862, "step": 6838 }, { "epoch": 1.8965612867443151, "grad_norm": 0.205204576253891, "learning_rate": 1.701968277894872e-07, "loss": 0.487, "step": 6839 }, { "epoch": 1.896838602329451, "grad_norm": 0.20338958501815796, "learning_rate": 1.692899583172633e-07, "loss": 0.492, "step": 6840 }, { "epoch": 1.897115917914587, "grad_norm": 0.2076566219329834, "learning_rate": 1.6838549489326533e-07, "loss": 0.5127, "step": 6841 }, { "epoch": 1.8973932334997228, "grad_norm": 0.21550370752811432, "learning_rate": 1.6748343769397713e-07, "loss": 0.5049, "step": 6842 }, { "epoch": 1.8976705490848587, "grad_norm": 0.21209217607975006, "learning_rate": 1.6658378689541343e-07, "loss": 0.5149, "step": 6843 }, { "epoch": 1.8979478646699945, "grad_norm": 0.2269715517759323, "learning_rate": 1.6568654267312133e-07, "loss": 0.4871, "step": 6844 }, { "epoch": 1.8982251802551304, "grad_norm": 0.2141071856021881, "learning_rate": 1.6479170520217607e-07, "loss": 0.4873, "step": 6845 }, { "epoch": 1.8985024958402663, "grad_norm": 0.21018719673156738, "learning_rate": 1.638992746571852e-07, "loss": 0.5122, "step": 6846 }, { "epoch": 1.8987798114254022, "grad_norm": 0.21268904209136963, "learning_rate": 1.630092512122816e-07, "loss": 0.5282, "step": 6847 }, { "epoch": 1.899057127010538, "grad_norm": 0.21051792800426483, "learning_rate": 1.6212163504113753e-07, "loss": 0.4921, "step": 6848 }, { "epoch": 1.899334442595674, "grad_norm": 0.20133888721466064, "learning_rate": 1.6123642631694913e-07, "loss": 0.4807, "step": 6849 }, { "epoch": 1.8996117581808099, "grad_norm": 0.19789256155490875, "learning_rate": 1.6035362521244213e-07, "loss": 0.5019, "step": 6850 }, { "epoch": 1.8998890737659457, "grad_norm": 0.19800089299678802, "learning_rate": 1.5947323189987595e-07, "loss": 0.4618, "step": 6851 }, { "epoch": 1.9001663893510816, "grad_norm": 0.20594573020935059, "learning_rate": 1.585952465510382e-07, "loss": 0.4941, "step": 6852 }, { "epoch": 1.9004437049362175, "grad_norm": 0.18927329778671265, "learning_rate": 1.5771966933724736e-07, "loss": 0.4925, "step": 6853 }, { "epoch": 1.9007210205213534, "grad_norm": 0.2202519327402115, "learning_rate": 1.568465004293515e-07, "loss": 0.5087, "step": 6854 }, { "epoch": 1.9009983361064893, "grad_norm": 0.2147960215806961, "learning_rate": 1.5597573999772823e-07, "loss": 0.516, "step": 6855 }, { "epoch": 1.9012756516916252, "grad_norm": 0.21644139289855957, "learning_rate": 1.5510738821228887e-07, "loss": 0.4624, "step": 6856 }, { "epoch": 1.901552967276761, "grad_norm": 0.2117416262626648, "learning_rate": 1.542414452424687e-07, "loss": 0.4926, "step": 6857 }, { "epoch": 1.901830282861897, "grad_norm": 0.2076312005519867, "learning_rate": 1.533779112572367e-07, "loss": 0.514, "step": 6858 }, { "epoch": 1.9021075984470328, "grad_norm": 0.21172620356082916, "learning_rate": 1.5251678642509286e-07, "loss": 0.4708, "step": 6859 }, { "epoch": 1.9023849140321687, "grad_norm": 0.2097117304801941, "learning_rate": 1.5165807091406386e-07, "loss": 0.5145, "step": 6860 }, { "epoch": 1.9026622296173046, "grad_norm": 0.21858976781368256, "learning_rate": 1.5080176489170734e-07, "loss": 0.4811, "step": 6861 }, { "epoch": 1.9029395452024405, "grad_norm": 0.1994626671075821, "learning_rate": 1.4994786852511322e-07, "loss": 0.4825, "step": 6862 }, { "epoch": 1.9032168607875763, "grad_norm": 0.20749947428703308, "learning_rate": 1.4909638198089966e-07, "loss": 0.5112, "step": 6863 }, { "epoch": 1.9034941763727122, "grad_norm": 0.20840351283550262, "learning_rate": 1.4824730542521148e-07, "loss": 0.4832, "step": 6864 }, { "epoch": 1.9037714919578481, "grad_norm": 0.20934903621673584, "learning_rate": 1.474006390237273e-07, "loss": 0.5057, "step": 6865 }, { "epoch": 1.904048807542984, "grad_norm": 0.21367427706718445, "learning_rate": 1.4655638294165525e-07, "loss": 0.5018, "step": 6866 }, { "epoch": 1.9043261231281199, "grad_norm": 0.21489764750003815, "learning_rate": 1.4571453734373157e-07, "loss": 0.5019, "step": 6867 }, { "epoch": 1.9046034387132558, "grad_norm": 0.20891396701335907, "learning_rate": 1.4487510239422076e-07, "loss": 0.4948, "step": 6868 }, { "epoch": 1.9048807542983917, "grad_norm": 0.20342187583446503, "learning_rate": 1.440380782569209e-07, "loss": 0.4613, "step": 6869 }, { "epoch": 1.9051580698835275, "grad_norm": 0.20930233597755432, "learning_rate": 1.432034650951569e-07, "loss": 0.5102, "step": 6870 }, { "epoch": 1.9054353854686634, "grad_norm": 0.19923752546310425, "learning_rate": 1.4237126307178467e-07, "loss": 0.4857, "step": 6871 }, { "epoch": 1.9057127010537993, "grad_norm": 0.2359122633934021, "learning_rate": 1.4154147234918814e-07, "loss": 0.4877, "step": 6872 }, { "epoch": 1.9059900166389352, "grad_norm": 0.19613344967365265, "learning_rate": 1.4071409308928086e-07, "loss": 0.4993, "step": 6873 }, { "epoch": 1.906267332224071, "grad_norm": 0.22320273518562317, "learning_rate": 1.398891254535073e-07, "loss": 0.5018, "step": 6874 }, { "epoch": 1.906544647809207, "grad_norm": 0.21227893233299255, "learning_rate": 1.3906656960284008e-07, "loss": 0.5012, "step": 6875 }, { "epoch": 1.9068219633943428, "grad_norm": 0.2211824506521225, "learning_rate": 1.382464256977828e-07, "loss": 0.4604, "step": 6876 }, { "epoch": 1.9070992789794787, "grad_norm": 0.20455965399742126, "learning_rate": 1.3742869389836572e-07, "loss": 0.4759, "step": 6877 }, { "epoch": 1.9073765945646146, "grad_norm": 0.20828378200531006, "learning_rate": 1.3661337436415012e-07, "loss": 0.5047, "step": 6878 }, { "epoch": 1.9076539101497505, "grad_norm": 0.2089402675628662, "learning_rate": 1.358004672542282e-07, "loss": 0.4886, "step": 6879 }, { "epoch": 1.9079312257348864, "grad_norm": 0.23213088512420654, "learning_rate": 1.3498997272721886e-07, "loss": 0.513, "step": 6880 }, { "epoch": 1.9082085413200223, "grad_norm": 0.22040383517742157, "learning_rate": 1.3418189094126926e-07, "loss": 0.5072, "step": 6881 }, { "epoch": 1.9084858569051582, "grad_norm": 0.22452102601528168, "learning_rate": 1.333762220540588e-07, "loss": 0.5167, "step": 6882 }, { "epoch": 1.908763172490294, "grad_norm": 0.20027916133403778, "learning_rate": 1.325729662227951e-07, "loss": 0.4924, "step": 6883 }, { "epoch": 1.90904048807543, "grad_norm": 0.20974531769752502, "learning_rate": 1.317721236042152e-07, "loss": 0.4752, "step": 6884 }, { "epoch": 1.9093178036605658, "grad_norm": 0.22228215634822845, "learning_rate": 1.3097369435458167e-07, "loss": 0.4951, "step": 6885 }, { "epoch": 1.9095951192457017, "grad_norm": 0.21499434113502502, "learning_rate": 1.3017767862969487e-07, "loss": 0.507, "step": 6886 }, { "epoch": 1.9098724348308376, "grad_norm": 0.1968400925397873, "learning_rate": 1.293840765848736e-07, "loss": 0.4698, "step": 6887 }, { "epoch": 1.9101497504159735, "grad_norm": 0.20211312174797058, "learning_rate": 1.285928883749718e-07, "loss": 0.4916, "step": 6888 }, { "epoch": 1.9104270660011093, "grad_norm": 0.20148654282093048, "learning_rate": 1.2780411415437148e-07, "loss": 0.4794, "step": 6889 }, { "epoch": 1.9107043815862452, "grad_norm": 0.20846286416053772, "learning_rate": 1.2701775407698567e-07, "loss": 0.4847, "step": 6890 }, { "epoch": 1.910981697171381, "grad_norm": 0.2036297619342804, "learning_rate": 1.2623380829624997e-07, "loss": 0.4813, "step": 6891 }, { "epoch": 1.911259012756517, "grad_norm": 0.21009089052677155, "learning_rate": 1.2545227696513644e-07, "loss": 0.5258, "step": 6892 }, { "epoch": 1.9115363283416529, "grad_norm": 0.21154698729515076, "learning_rate": 1.2467316023613978e-07, "loss": 0.5118, "step": 6893 }, { "epoch": 1.9118136439267888, "grad_norm": 0.21194148063659668, "learning_rate": 1.2389645826128836e-07, "loss": 0.4956, "step": 6894 }, { "epoch": 1.9120909595119246, "grad_norm": 0.21466988325119019, "learning_rate": 1.2312217119213737e-07, "loss": 0.5134, "step": 6895 }, { "epoch": 1.9123682750970605, "grad_norm": 0.22028987109661102, "learning_rate": 1.223502991797687e-07, "loss": 0.5057, "step": 6896 }, { "epoch": 1.9126455906821964, "grad_norm": 0.21367661654949188, "learning_rate": 1.215808423747966e-07, "loss": 0.5005, "step": 6897 }, { "epoch": 1.9129229062673323, "grad_norm": 0.2160566747188568, "learning_rate": 1.208138009273621e-07, "loss": 0.5011, "step": 6898 }, { "epoch": 1.9132002218524682, "grad_norm": 0.21020996570587158, "learning_rate": 1.2004917498713576e-07, "loss": 0.5011, "step": 6899 }, { "epoch": 1.913477537437604, "grad_norm": 0.20518991351127625, "learning_rate": 1.1928696470331486e-07, "loss": 0.4979, "step": 6900 }, { "epoch": 1.91375485302274, "grad_norm": 0.20846955478191376, "learning_rate": 1.1852717022463045e-07, "loss": 0.5073, "step": 6901 }, { "epoch": 1.9140321686078758, "grad_norm": 0.21253100037574768, "learning_rate": 1.1776979169933478e-07, "loss": 0.5105, "step": 6902 }, { "epoch": 1.9143094841930117, "grad_norm": 0.22121791541576385, "learning_rate": 1.1701482927521241e-07, "loss": 0.4747, "step": 6903 }, { "epoch": 1.9145867997781476, "grad_norm": 0.20756427943706512, "learning_rate": 1.1626228309957881e-07, "loss": 0.493, "step": 6904 }, { "epoch": 1.9148641153632835, "grad_norm": 0.22023040056228638, "learning_rate": 1.1551215331927489e-07, "loss": 0.5037, "step": 6905 }, { "epoch": 1.9151414309484194, "grad_norm": 0.2466723471879959, "learning_rate": 1.1476444008067105e-07, "loss": 0.5177, "step": 6906 }, { "epoch": 1.9154187465335553, "grad_norm": 0.21187765896320343, "learning_rate": 1.1401914352966447e-07, "loss": 0.4916, "step": 6907 }, { "epoch": 1.9156960621186911, "grad_norm": 0.21347801387310028, "learning_rate": 1.1327626381168466e-07, "loss": 0.511, "step": 6908 }, { "epoch": 1.915973377703827, "grad_norm": 0.20320340991020203, "learning_rate": 1.125358010716851e-07, "loss": 0.5196, "step": 6909 }, { "epoch": 1.916250693288963, "grad_norm": 0.211409792304039, "learning_rate": 1.1179775545415022e-07, "loss": 0.4988, "step": 6910 }, { "epoch": 1.9165280088740988, "grad_norm": 0.20658671855926514, "learning_rate": 1.1106212710309261e-07, "loss": 0.499, "step": 6911 }, { "epoch": 1.9168053244592347, "grad_norm": 0.21737776696681976, "learning_rate": 1.1032891616205299e-07, "loss": 0.5227, "step": 6912 }, { "epoch": 1.9170826400443706, "grad_norm": 0.20637080073356628, "learning_rate": 1.0959812277410025e-07, "loss": 0.4793, "step": 6913 }, { "epoch": 1.9173599556295065, "grad_norm": 0.20913554728031158, "learning_rate": 1.0886974708183007e-07, "loss": 0.5227, "step": 6914 }, { "epoch": 1.9176372712146423, "grad_norm": 0.2132870852947235, "learning_rate": 1.081437892273704e-07, "loss": 0.4907, "step": 6915 }, { "epoch": 1.9179145867997782, "grad_norm": 0.20474767684936523, "learning_rate": 1.0742024935237322e-07, "loss": 0.5073, "step": 6916 }, { "epoch": 1.918191902384914, "grad_norm": 0.2082609385251999, "learning_rate": 1.0669912759802004e-07, "loss": 0.5381, "step": 6917 }, { "epoch": 1.91846921797005, "grad_norm": 0.21400731801986694, "learning_rate": 1.059804241050219e-07, "loss": 0.4944, "step": 6918 }, { "epoch": 1.9187465335551859, "grad_norm": 0.22101816534996033, "learning_rate": 1.0526413901361526e-07, "loss": 0.5161, "step": 6919 }, { "epoch": 1.9190238491403218, "grad_norm": 0.21787089109420776, "learning_rate": 1.0455027246356746e-07, "loss": 0.5062, "step": 6920 }, { "epoch": 1.9193011647254576, "grad_norm": 0.2018042355775833, "learning_rate": 1.0383882459417404e-07, "loss": 0.4633, "step": 6921 }, { "epoch": 1.9195784803105935, "grad_norm": 0.20412792265415192, "learning_rate": 1.031297955442559e-07, "loss": 0.5062, "step": 6922 }, { "epoch": 1.9198557958957294, "grad_norm": 0.214492067694664, "learning_rate": 1.0242318545216207e-07, "loss": 0.5178, "step": 6923 }, { "epoch": 1.9201331114808653, "grad_norm": 0.21588201820850372, "learning_rate": 1.0171899445577393e-07, "loss": 0.5249, "step": 6924 }, { "epoch": 1.9204104270660012, "grad_norm": 0.19817836582660675, "learning_rate": 1.0101722269249547e-07, "loss": 0.4964, "step": 6925 }, { "epoch": 1.920687742651137, "grad_norm": 0.20311301946640015, "learning_rate": 1.003178702992616e-07, "loss": 0.4804, "step": 6926 }, { "epoch": 1.920965058236273, "grad_norm": 0.20916207134723663, "learning_rate": 9.962093741253537e-08, "loss": 0.5013, "step": 6927 }, { "epoch": 1.9212423738214088, "grad_norm": 0.21295547485351562, "learning_rate": 9.892642416830522e-08, "loss": 0.492, "step": 6928 }, { "epoch": 1.9215196894065447, "grad_norm": 0.21146497130393982, "learning_rate": 9.823433070209053e-08, "loss": 0.5107, "step": 6929 }, { "epoch": 1.9217970049916806, "grad_norm": 0.2107129544019699, "learning_rate": 9.754465714893607e-08, "loss": 0.5023, "step": 6930 }, { "epoch": 1.9220743205768165, "grad_norm": 0.21256358921527863, "learning_rate": 9.685740364341611e-08, "loss": 0.4751, "step": 6931 }, { "epoch": 1.9223516361619524, "grad_norm": 0.21741509437561035, "learning_rate": 9.617257031963173e-08, "loss": 0.5264, "step": 6932 }, { "epoch": 1.9226289517470883, "grad_norm": 0.38610130548477173, "learning_rate": 9.549015731121353e-08, "loss": 0.5083, "step": 6933 }, { "epoch": 1.9229062673322241, "grad_norm": 0.2073136270046234, "learning_rate": 9.481016475131472e-08, "loss": 0.4922, "step": 6934 }, { "epoch": 1.92318358291736, "grad_norm": 0.21415603160858154, "learning_rate": 9.41325927726222e-08, "loss": 0.5014, "step": 6935 }, { "epoch": 1.923460898502496, "grad_norm": 0.23321330547332764, "learning_rate": 9.345744150734969e-08, "loss": 0.5141, "step": 6936 }, { "epoch": 1.9237382140876318, "grad_norm": 0.2120734453201294, "learning_rate": 9.278471108723347e-08, "loss": 0.4874, "step": 6937 }, { "epoch": 1.9240155296727677, "grad_norm": 0.2397221326828003, "learning_rate": 9.211440164354351e-08, "loss": 0.4939, "step": 6938 }, { "epoch": 1.9242928452579036, "grad_norm": 0.2101098895072937, "learning_rate": 9.144651330707659e-08, "loss": 0.5111, "step": 6939 }, { "epoch": 1.9245701608430394, "grad_norm": 0.20636944472789764, "learning_rate": 9.078104620815209e-08, "loss": 0.5037, "step": 6940 }, { "epoch": 1.9248474764281753, "grad_norm": 0.22542667388916016, "learning_rate": 9.011800047662028e-08, "loss": 0.5024, "step": 6941 }, { "epoch": 1.9251247920133112, "grad_norm": 0.19407561421394348, "learning_rate": 8.945737624186101e-08, "loss": 0.4894, "step": 6942 }, { "epoch": 1.925402107598447, "grad_norm": 0.21263688802719116, "learning_rate": 8.87991736327795e-08, "loss": 0.513, "step": 6943 }, { "epoch": 1.925679423183583, "grad_norm": 0.20905403792858124, "learning_rate": 8.814339277780636e-08, "loss": 0.512, "step": 6944 }, { "epoch": 1.9259567387687189, "grad_norm": 0.20334313809871674, "learning_rate": 8.74900338049045e-08, "loss": 0.5043, "step": 6945 }, { "epoch": 1.9262340543538548, "grad_norm": 0.2149227261543274, "learning_rate": 8.683909684155944e-08, "loss": 0.4717, "step": 6946 }, { "epoch": 1.9265113699389906, "grad_norm": 0.2087196409702301, "learning_rate": 8.619058201478763e-08, "loss": 0.5005, "step": 6947 }, { "epoch": 1.9267886855241265, "grad_norm": 0.2073797583580017, "learning_rate": 8.554448945113091e-08, "loss": 0.4941, "step": 6948 }, { "epoch": 1.9270660011092624, "grad_norm": 0.21620948612689972, "learning_rate": 8.490081927665927e-08, "loss": 0.4743, "step": 6949 }, { "epoch": 1.9273433166943983, "grad_norm": 0.211389422416687, "learning_rate": 8.425957161696946e-08, "loss": 0.4886, "step": 6950 }, { "epoch": 1.9276206322795342, "grad_norm": 0.20867092907428741, "learning_rate": 8.3620746597185e-08, "loss": 0.4856, "step": 6951 }, { "epoch": 1.92789794786467, "grad_norm": 0.2138075828552246, "learning_rate": 8.298434434196034e-08, "loss": 0.479, "step": 6952 }, { "epoch": 1.928175263449806, "grad_norm": 0.21532325446605682, "learning_rate": 8.235036497547116e-08, "loss": 0.4989, "step": 6953 }, { "epoch": 1.9284525790349418, "grad_norm": 0.19918455183506012, "learning_rate": 8.171880862142683e-08, "loss": 0.5055, "step": 6954 }, { "epoch": 1.9287298946200777, "grad_norm": 0.21148592233657837, "learning_rate": 8.108967540305795e-08, "loss": 0.4639, "step": 6955 }, { "epoch": 1.9290072102052136, "grad_norm": 0.20538489520549774, "learning_rate": 8.046296544312742e-08, "loss": 0.5217, "step": 6956 }, { "epoch": 1.9292845257903495, "grad_norm": 0.19862611591815948, "learning_rate": 7.983867886391938e-08, "loss": 0.4928, "step": 6957 }, { "epoch": 1.9295618413754854, "grad_norm": 0.2005883902311325, "learning_rate": 7.921681578725305e-08, "loss": 0.4719, "step": 6958 }, { "epoch": 1.9298391569606212, "grad_norm": 0.21354445815086365, "learning_rate": 7.859737633446745e-08, "loss": 0.5073, "step": 6959 }, { "epoch": 1.9301164725457571, "grad_norm": 0.20679736137390137, "learning_rate": 7.798036062643399e-08, "loss": 0.4767, "step": 6960 }, { "epoch": 1.930393788130893, "grad_norm": 0.2107972651720047, "learning_rate": 7.736576878354523e-08, "loss": 0.5154, "step": 6961 }, { "epoch": 1.930671103716029, "grad_norm": 0.21988734602928162, "learning_rate": 7.675360092572747e-08, "loss": 0.4996, "step": 6962 }, { "epoch": 1.9309484193011648, "grad_norm": 0.20176535844802856, "learning_rate": 7.6143857172431e-08, "loss": 0.4677, "step": 6963 }, { "epoch": 1.9312257348863007, "grad_norm": 0.21723569929599762, "learning_rate": 7.553653764263008e-08, "loss": 0.5311, "step": 6964 }, { "epoch": 1.9315030504714366, "grad_norm": 0.20594346523284912, "learning_rate": 7.49316424548313e-08, "loss": 0.4978, "step": 6965 }, { "epoch": 1.9317803660565724, "grad_norm": 0.2213924676179886, "learning_rate": 7.432917172706528e-08, "loss": 0.5089, "step": 6966 }, { "epoch": 1.9320576816417083, "grad_norm": 0.2052609771490097, "learning_rate": 7.372912557688933e-08, "loss": 0.4955, "step": 6967 }, { "epoch": 1.9323349972268442, "grad_norm": 0.2087954580783844, "learning_rate": 7.313150412138898e-08, "loss": 0.4674, "step": 6968 }, { "epoch": 1.93261231281198, "grad_norm": 0.20820866525173187, "learning_rate": 7.253630747717648e-08, "loss": 0.4871, "step": 6969 }, { "epoch": 1.932889628397116, "grad_norm": 0.21151137351989746, "learning_rate": 7.194353576038953e-08, "loss": 0.4889, "step": 6970 }, { "epoch": 1.9331669439822519, "grad_norm": 0.21804526448249817, "learning_rate": 7.135318908669392e-08, "loss": 0.4842, "step": 6971 }, { "epoch": 1.9334442595673877, "grad_norm": 0.21454410254955292, "learning_rate": 7.076526757128083e-08, "loss": 0.5052, "step": 6972 }, { "epoch": 1.9337215751525236, "grad_norm": 0.2476005256175995, "learning_rate": 7.0179771328871e-08, "loss": 0.5001, "step": 6973 }, { "epoch": 1.9339988907376595, "grad_norm": 0.20526906847953796, "learning_rate": 6.959670047371053e-08, "loss": 0.4873, "step": 6974 }, { "epoch": 1.9342762063227954, "grad_norm": 0.23170456290245056, "learning_rate": 6.901605511957093e-08, "loss": 0.4773, "step": 6975 }, { "epoch": 1.9345535219079313, "grad_norm": 0.21698127686977386, "learning_rate": 6.843783537974907e-08, "loss": 0.4895, "step": 6976 }, { "epoch": 1.9348308374930672, "grad_norm": 0.2072725147008896, "learning_rate": 6.786204136707691e-08, "loss": 0.5427, "step": 6977 }, { "epoch": 1.935108153078203, "grad_norm": 0.21492376923561096, "learning_rate": 6.728867319390209e-08, "loss": 0.5325, "step": 6978 }, { "epoch": 1.935385468663339, "grad_norm": 0.2331382930278778, "learning_rate": 6.671773097210593e-08, "loss": 0.4782, "step": 6979 }, { "epoch": 1.9356627842484748, "grad_norm": 0.2333361804485321, "learning_rate": 6.614921481309377e-08, "loss": 0.4814, "step": 6980 }, { "epoch": 1.9359400998336107, "grad_norm": 0.20312927663326263, "learning_rate": 6.55831248277991e-08, "loss": 0.4894, "step": 6981 }, { "epoch": 1.9362174154187466, "grad_norm": 0.21046309173107147, "learning_rate": 6.501946112668078e-08, "loss": 0.4759, "step": 6982 }, { "epoch": 1.9364947310038825, "grad_norm": 0.21393108367919922, "learning_rate": 6.445822381972305e-08, "loss": 0.4939, "step": 6983 }, { "epoch": 1.9367720465890184, "grad_norm": 0.20159265398979187, "learning_rate": 6.38994130164397e-08, "loss": 0.508, "step": 6984 }, { "epoch": 1.9370493621741542, "grad_norm": 0.2000395506620407, "learning_rate": 6.33430288258699e-08, "loss": 0.5001, "step": 6985 }, { "epoch": 1.9373266777592901, "grad_norm": 0.21411900222301483, "learning_rate": 6.27890713565768e-08, "loss": 0.4783, "step": 6986 }, { "epoch": 1.937603993344426, "grad_norm": 0.22993165254592896, "learning_rate": 6.22375407166545e-08, "loss": 0.491, "step": 6987 }, { "epoch": 1.937881308929562, "grad_norm": 0.21177081763744354, "learning_rate": 6.168843701371968e-08, "loss": 0.4968, "step": 6988 }, { "epoch": 1.9381586245146978, "grad_norm": 0.21067818999290466, "learning_rate": 6.114176035491859e-08, "loss": 0.5007, "step": 6989 }, { "epoch": 1.9384359400998337, "grad_norm": 0.2169368863105774, "learning_rate": 6.059751084692006e-08, "loss": 0.4761, "step": 6990 }, { "epoch": 1.9387132556849695, "grad_norm": 0.20408381521701813, "learning_rate": 6.005568859592386e-08, "loss": 0.5434, "step": 6991 }, { "epoch": 1.9389905712701054, "grad_norm": 0.2044132947921753, "learning_rate": 5.9516293707652385e-08, "loss": 0.4814, "step": 6992 }, { "epoch": 1.9392678868552413, "grad_norm": 0.2122509479522705, "learning_rate": 5.897932628735614e-08, "loss": 0.4915, "step": 6993 }, { "epoch": 1.9395452024403772, "grad_norm": 0.21428446471691132, "learning_rate": 5.844478643981383e-08, "loss": 0.5069, "step": 6994 }, { "epoch": 1.939822518025513, "grad_norm": 0.20293046534061432, "learning_rate": 5.791267426932395e-08, "loss": 0.5028, "step": 6995 }, { "epoch": 1.940099833610649, "grad_norm": 0.20903684198856354, "learning_rate": 5.7382989879720126e-08, "loss": 0.4971, "step": 6996 }, { "epoch": 1.9403771491957849, "grad_norm": 0.21450695395469666, "learning_rate": 5.6855733374354404e-08, "loss": 0.5173, "step": 6997 }, { "epoch": 1.9406544647809207, "grad_norm": 0.21470417082309723, "learning_rate": 5.633090485611114e-08, "loss": 0.5008, "step": 6998 }, { "epoch": 1.9409317803660566, "grad_norm": 0.20227287709712982, "learning_rate": 5.580850442739732e-08, "loss": 0.4925, "step": 6999 }, { "epoch": 1.9412090959511925, "grad_norm": 0.20225927233695984, "learning_rate": 5.5288532190145294e-08, "loss": 0.4843, "step": 7000 }, { "epoch": 1.9414864115363284, "grad_norm": 0.21021947264671326, "learning_rate": 5.4770988245818336e-08, "loss": 0.5028, "step": 7001 }, { "epoch": 1.9417637271214643, "grad_norm": 0.21199429035186768, "learning_rate": 5.4255872695400946e-08, "loss": 0.5071, "step": 7002 }, { "epoch": 1.9420410427066002, "grad_norm": 0.20802341401576996, "learning_rate": 5.374318563940717e-08, "loss": 0.4943, "step": 7003 }, { "epoch": 1.942318358291736, "grad_norm": 0.20316016674041748, "learning_rate": 5.323292717787504e-08, "loss": 0.491, "step": 7004 }, { "epoch": 1.942595673876872, "grad_norm": 0.20811815559864044, "learning_rate": 5.272509741037074e-08, "loss": 0.495, "step": 7005 }, { "epoch": 1.9428729894620078, "grad_norm": 0.2118585854768753, "learning_rate": 5.221969643598307e-08, "loss": 0.49, "step": 7006 }, { "epoch": 1.9431503050471437, "grad_norm": 0.2077036201953888, "learning_rate": 5.171672435333036e-08, "loss": 0.4795, "step": 7007 }, { "epoch": 1.9434276206322796, "grad_norm": 0.21370601654052734, "learning_rate": 5.121618126055633e-08, "loss": 0.4795, "step": 7008 }, { "epoch": 1.9437049362174155, "grad_norm": 0.20026057958602905, "learning_rate": 5.071806725532868e-08, "loss": 0.4859, "step": 7009 }, { "epoch": 1.9439822518025514, "grad_norm": 0.20249269902706146, "learning_rate": 5.022238243484467e-08, "loss": 0.5196, "step": 7010 }, { "epoch": 1.9442595673876872, "grad_norm": 0.2025049477815628, "learning_rate": 4.972912689582276e-08, "loss": 0.4916, "step": 7011 }, { "epoch": 1.9445368829728231, "grad_norm": 0.21317322552204132, "learning_rate": 4.923830073451374e-08, "loss": 0.4984, "step": 7012 }, { "epoch": 1.944814198557959, "grad_norm": 0.21346881985664368, "learning_rate": 4.8749904046688223e-08, "loss": 0.4878, "step": 7013 }, { "epoch": 1.945091514143095, "grad_norm": 0.21879152953624725, "learning_rate": 4.826393692764636e-08, "loss": 0.5174, "step": 7014 }, { "epoch": 1.9453688297282308, "grad_norm": 0.22700130939483643, "learning_rate": 4.77803994722123e-08, "loss": 0.4503, "step": 7015 }, { "epoch": 1.9456461453133667, "grad_norm": 0.20824943482875824, "learning_rate": 4.729929177473835e-08, "loss": 0.5011, "step": 7016 }, { "epoch": 1.9459234608985025, "grad_norm": 0.1998833268880844, "learning_rate": 4.682061392910081e-08, "loss": 0.4994, "step": 7017 }, { "epoch": 1.9462007764836384, "grad_norm": 0.2063128501176834, "learning_rate": 4.6344366028701346e-08, "loss": 0.5316, "step": 7018 }, { "epoch": 1.9464780920687743, "grad_norm": 0.20899119973182678, "learning_rate": 4.5870548166469796e-08, "loss": 0.4771, "step": 7019 }, { "epoch": 1.9467554076539102, "grad_norm": 0.20648007094860077, "learning_rate": 4.539916043485998e-08, "loss": 0.4892, "step": 7020 }, { "epoch": 1.947032723239046, "grad_norm": 0.2271704226732254, "learning_rate": 4.4930202925852484e-08, "loss": 0.5068, "step": 7021 }, { "epoch": 1.947310038824182, "grad_norm": 0.2094396948814392, "learning_rate": 4.446367573095328e-08, "loss": 0.5027, "step": 7022 }, { "epoch": 1.9475873544093179, "grad_norm": 0.20577369630336761, "learning_rate": 4.3999578941195107e-08, "loss": 0.5092, "step": 7023 }, { "epoch": 1.9478646699944537, "grad_norm": 0.20665308833122253, "learning_rate": 4.3537912647133305e-08, "loss": 0.4884, "step": 7024 }, { "epoch": 1.9481419855795896, "grad_norm": 0.21540111303329468, "learning_rate": 4.3078676938852755e-08, "loss": 0.5022, "step": 7025 }, { "epoch": 1.9484193011647255, "grad_norm": 0.20871970057487488, "learning_rate": 4.262187190596234e-08, "loss": 0.5195, "step": 7026 }, { "epoch": 1.9486966167498614, "grad_norm": 0.2026074379682541, "learning_rate": 4.21674976375977e-08, "loss": 0.5111, "step": 7027 }, { "epoch": 1.9489739323349973, "grad_norm": 0.20567013323307037, "learning_rate": 4.171555422241707e-08, "loss": 0.4755, "step": 7028 }, { "epoch": 1.9492512479201332, "grad_norm": 0.1948745846748352, "learning_rate": 4.1266041748608265e-08, "loss": 0.5021, "step": 7029 }, { "epoch": 1.949528563505269, "grad_norm": 0.2238130271434784, "learning_rate": 4.0818960303881656e-08, "loss": 0.4984, "step": 7030 }, { "epoch": 1.949805879090405, "grad_norm": 0.20423227548599243, "learning_rate": 4.03743099754772e-08, "loss": 0.495, "step": 7031 }, { "epoch": 1.9500831946755408, "grad_norm": 0.2025814950466156, "learning_rate": 3.9932090850156036e-08, "loss": 0.5147, "step": 7032 }, { "epoch": 1.9503605102606767, "grad_norm": 0.20636986196041107, "learning_rate": 3.949230301420609e-08, "loss": 0.496, "step": 7033 }, { "epoch": 1.9506378258458126, "grad_norm": 0.1991264522075653, "learning_rate": 3.905494655344483e-08, "loss": 0.4856, "step": 7034 }, { "epoch": 1.9509151414309485, "grad_norm": 0.21904256939888, "learning_rate": 3.862002155320815e-08, "loss": 0.4892, "step": 7035 }, { "epoch": 1.9511924570160843, "grad_norm": 0.21712666749954224, "learning_rate": 3.818752809836429e-08, "loss": 0.5127, "step": 7036 }, { "epoch": 1.9514697726012202, "grad_norm": 0.22889919579029083, "learning_rate": 3.775746627330268e-08, "loss": 0.4967, "step": 7037 }, { "epoch": 1.9517470881863561, "grad_norm": 0.21116408705711365, "learning_rate": 3.732983616193952e-08, "loss": 0.4953, "step": 7038 }, { "epoch": 1.952024403771492, "grad_norm": 0.2215505987405777, "learning_rate": 3.6904637847719195e-08, "loss": 0.4778, "step": 7039 }, { "epoch": 1.9523017193566279, "grad_norm": 0.21022702753543854, "learning_rate": 3.6481871413605874e-08, "loss": 0.4975, "step": 7040 }, { "epoch": 1.9525790349417638, "grad_norm": 0.20214217901229858, "learning_rate": 3.606153694209608e-08, "loss": 0.5121, "step": 7041 }, { "epoch": 1.9528563505268997, "grad_norm": 0.20192945003509521, "learning_rate": 3.5643634515204747e-08, "loss": 0.4975, "step": 7042 }, { "epoch": 1.9531336661120355, "grad_norm": 0.21496793627738953, "learning_rate": 3.522816421447778e-08, "loss": 0.5, "step": 7043 }, { "epoch": 1.9534109816971714, "grad_norm": 0.20407256484031677, "learning_rate": 3.4815126120983646e-08, "loss": 0.5053, "step": 7044 }, { "epoch": 1.9536882972823073, "grad_norm": 0.2113526612520218, "learning_rate": 3.4404520315316216e-08, "loss": 0.4911, "step": 7045 }, { "epoch": 1.9539656128674432, "grad_norm": 0.2122277021408081, "learning_rate": 3.399634687759751e-08, "loss": 0.5139, "step": 7046 }, { "epoch": 1.954242928452579, "grad_norm": 0.2064533829689026, "learning_rate": 3.359060588747354e-08, "loss": 0.5004, "step": 7047 }, { "epoch": 1.954520244037715, "grad_norm": 0.22271350026130676, "learning_rate": 3.318729742411153e-08, "loss": 0.5052, "step": 7048 }, { "epoch": 1.9547975596228508, "grad_norm": 0.20162583887577057, "learning_rate": 3.278642156620965e-08, "loss": 0.4936, "step": 7049 }, { "epoch": 1.9550748752079867, "grad_norm": 0.20813512802124023, "learning_rate": 3.238797839199143e-08, "loss": 0.4976, "step": 7050 }, { "epoch": 1.9553521907931226, "grad_norm": 0.20408669114112854, "learning_rate": 3.1991967979200235e-08, "loss": 0.4978, "step": 7051 }, { "epoch": 1.9556295063782585, "grad_norm": 0.2017277628183365, "learning_rate": 3.159839040511037e-08, "loss": 0.4948, "step": 7052 }, { "epoch": 1.9559068219633944, "grad_norm": 0.2124013453722, "learning_rate": 3.120724574651873e-08, "loss": 0.4853, "step": 7053 }, { "epoch": 1.9561841375485303, "grad_norm": 0.20675784349441528, "learning_rate": 3.0818534079747606e-08, "loss": 0.4972, "step": 7054 }, { "epoch": 1.9564614531336662, "grad_norm": 0.21523889899253845, "learning_rate": 3.043225548064465e-08, "loss": 0.5126, "step": 7055 }, { "epoch": 1.956738768718802, "grad_norm": 0.20514823496341705, "learning_rate": 3.004841002458431e-08, "loss": 0.5061, "step": 7056 }, { "epoch": 1.957016084303938, "grad_norm": 0.2089594602584839, "learning_rate": 2.966699778646359e-08, "loss": 0.4925, "step": 7057 }, { "epoch": 1.9572933998890738, "grad_norm": 0.21636748313903809, "learning_rate": 2.92880188407077e-08, "loss": 0.4895, "step": 7058 }, { "epoch": 1.9575707154742097, "grad_norm": 0.2242691069841385, "learning_rate": 2.8911473261264423e-08, "loss": 0.4819, "step": 7059 }, { "epoch": 1.9578480310593456, "grad_norm": 0.19989970326423645, "learning_rate": 2.853736112160693e-08, "loss": 0.514, "step": 7060 }, { "epoch": 1.9581253466444815, "grad_norm": 0.20352482795715332, "learning_rate": 2.8165682494736556e-08, "loss": 0.5165, "step": 7061 }, { "epoch": 1.9584026622296173, "grad_norm": 0.21366307139396667, "learning_rate": 2.7796437453177228e-08, "loss": 0.5171, "step": 7062 }, { "epoch": 1.9586799778147532, "grad_norm": 0.20763346552848816, "learning_rate": 2.7429626068976865e-08, "loss": 0.4874, "step": 7063 }, { "epoch": 1.958957293399889, "grad_norm": 0.20051513612270355, "learning_rate": 2.7065248413710166e-08, "loss": 0.4858, "step": 7064 }, { "epoch": 1.959234608985025, "grad_norm": 0.22556112706661224, "learning_rate": 2.6703304558478583e-08, "loss": 0.4879, "step": 7065 }, { "epoch": 1.9595119245701609, "grad_norm": 0.20919561386108398, "learning_rate": 2.634379457390618e-08, "loss": 0.4919, "step": 7066 }, { "epoch": 1.9597892401552968, "grad_norm": 0.20339058339595795, "learning_rate": 2.5986718530142396e-08, "loss": 0.5165, "step": 7067 }, { "epoch": 1.9600665557404326, "grad_norm": 0.19766461849212646, "learning_rate": 2.5632076496862058e-08, "loss": 0.4615, "step": 7068 }, { "epoch": 1.9603438713255685, "grad_norm": 0.20357239246368408, "learning_rate": 2.527986854326675e-08, "loss": 0.4637, "step": 7069 }, { "epoch": 1.9606211869107044, "grad_norm": 0.21048687398433685, "learning_rate": 2.493009473807928e-08, "loss": 0.485, "step": 7070 }, { "epoch": 1.9608985024958403, "grad_norm": 0.21230055391788483, "learning_rate": 2.4582755149551995e-08, "loss": 0.5122, "step": 7071 }, { "epoch": 1.9611758180809762, "grad_norm": 0.21410781145095825, "learning_rate": 2.4237849845459848e-08, "loss": 0.5042, "step": 7072 }, { "epoch": 1.961453133666112, "grad_norm": 0.2122282236814499, "learning_rate": 2.3895378893100394e-08, "loss": 0.5211, "step": 7073 }, { "epoch": 1.961730449251248, "grad_norm": 0.21611341834068298, "learning_rate": 2.3555342359302123e-08, "loss": 0.5021, "step": 7074 }, { "epoch": 1.9620077648363838, "grad_norm": 0.22394365072250366, "learning_rate": 2.321774031041335e-08, "loss": 0.5268, "step": 7075 }, { "epoch": 1.9622850804215197, "grad_norm": 0.21854601800441742, "learning_rate": 2.2882572812309156e-08, "loss": 0.4732, "step": 7076 }, { "epoch": 1.9625623960066556, "grad_norm": 0.21542960405349731, "learning_rate": 2.2549839930390004e-08, "loss": 0.5072, "step": 7077 }, { "epoch": 1.9628397115917915, "grad_norm": 0.2258591204881668, "learning_rate": 2.221954172958174e-08, "loss": 0.517, "step": 7078 }, { "epoch": 1.9631170271769274, "grad_norm": 0.22644679248332977, "learning_rate": 2.1891678274332804e-08, "loss": 0.4841, "step": 7079 }, { "epoch": 1.9633943427620633, "grad_norm": 0.19779692590236664, "learning_rate": 2.1566249628618417e-08, "loss": 0.4987, "step": 7080 }, { "epoch": 1.9636716583471991, "grad_norm": 0.21196384727954865, "learning_rate": 2.124325585593917e-08, "loss": 0.5227, "step": 7081 }, { "epoch": 1.963948973932335, "grad_norm": 0.20542475581169128, "learning_rate": 2.092269701931826e-08, "loss": 0.4849, "step": 7082 }, { "epoch": 1.964226289517471, "grad_norm": 0.1972956657409668, "learning_rate": 2.060457318130704e-08, "loss": 0.4605, "step": 7083 }, { "epoch": 1.9645036051026068, "grad_norm": 0.2096942663192749, "learning_rate": 2.028888440397947e-08, "loss": 0.4854, "step": 7084 }, { "epoch": 1.9647809206877427, "grad_norm": 0.20811447501182556, "learning_rate": 1.9975630748933493e-08, "loss": 0.4842, "step": 7085 }, { "epoch": 1.9650582362728786, "grad_norm": 0.21638718247413635, "learning_rate": 1.9664812277292442e-08, "loss": 0.5213, "step": 7086 }, { "epoch": 1.9653355518580145, "grad_norm": 0.22020205855369568, "learning_rate": 1.935642904970919e-08, "loss": 0.5084, "step": 7087 }, { "epoch": 1.9656128674431503, "grad_norm": 0.2070709615945816, "learning_rate": 1.9050481126353658e-08, "loss": 0.4763, "step": 7088 }, { "epoch": 1.9658901830282862, "grad_norm": 0.20314981043338776, "learning_rate": 1.8746968566926704e-08, "loss": 0.5041, "step": 7089 }, { "epoch": 1.966167498613422, "grad_norm": 0.21381008625030518, "learning_rate": 1.844589143064901e-08, "loss": 0.4862, "step": 7090 }, { "epoch": 1.966444814198558, "grad_norm": 0.2065730094909668, "learning_rate": 1.814724977627219e-08, "loss": 0.5071, "step": 7091 }, { "epoch": 1.9667221297836939, "grad_norm": 0.2038286328315735, "learning_rate": 1.7851043662066302e-08, "loss": 0.4935, "step": 7092 }, { "epoch": 1.9669994453688298, "grad_norm": 0.2133897840976715, "learning_rate": 1.7557273145830943e-08, "loss": 0.5179, "step": 7093 }, { "epoch": 1.9672767609539656, "grad_norm": 0.20114904642105103, "learning_rate": 1.726593828488693e-08, "loss": 0.5107, "step": 7094 }, { "epoch": 1.9675540765391015, "grad_norm": 0.21013131737709045, "learning_rate": 1.697703913608184e-08, "loss": 0.5173, "step": 7095 }, { "epoch": 1.9678313921242374, "grad_norm": 0.2039504200220108, "learning_rate": 1.669057575578864e-08, "loss": 0.4999, "step": 7096 }, { "epoch": 1.9681087077093733, "grad_norm": 0.20330029726028442, "learning_rate": 1.6406548199902893e-08, "loss": 0.5164, "step": 7097 }, { "epoch": 1.9683860232945092, "grad_norm": 0.20391713082790375, "learning_rate": 1.6124956523846934e-08, "loss": 0.505, "step": 7098 }, { "epoch": 1.968663338879645, "grad_norm": 0.2079223245382309, "learning_rate": 1.5845800782564314e-08, "loss": 0.4836, "step": 7099 }, { "epoch": 1.968940654464781, "grad_norm": 0.20631448924541473, "learning_rate": 1.5569081030529507e-08, "loss": 0.4875, "step": 7100 }, { "epoch": 1.9692179700499168, "grad_norm": 0.21116302907466888, "learning_rate": 1.5294797321734057e-08, "loss": 0.5115, "step": 7101 }, { "epoch": 1.9694952856350527, "grad_norm": 0.21221213042736053, "learning_rate": 1.5022949709700417e-08, "loss": 0.495, "step": 7102 }, { "epoch": 1.9697726012201886, "grad_norm": 0.20361442863941193, "learning_rate": 1.4753538247472277e-08, "loss": 0.4937, "step": 7103 }, { "epoch": 1.9700499168053245, "grad_norm": 0.2115209698677063, "learning_rate": 1.448656298761869e-08, "loss": 0.4856, "step": 7104 }, { "epoch": 1.9703272323904604, "grad_norm": 0.20742273330688477, "learning_rate": 1.42220239822341e-08, "loss": 0.4659, "step": 7105 }, { "epoch": 1.9706045479755963, "grad_norm": 0.20554955303668976, "learning_rate": 1.3959921282938327e-08, "loss": 0.5062, "step": 7106 }, { "epoch": 1.9708818635607321, "grad_norm": 0.20452530682086945, "learning_rate": 1.3700254940872404e-08, "loss": 0.4663, "step": 7107 }, { "epoch": 1.971159179145868, "grad_norm": 0.2129167914390564, "learning_rate": 1.3443025006705523e-08, "loss": 0.5172, "step": 7108 }, { "epoch": 1.971436494731004, "grad_norm": 0.21334435045719147, "learning_rate": 1.3188231530628092e-08, "loss": 0.513, "step": 7109 }, { "epoch": 1.9717138103161398, "grad_norm": 0.20458266139030457, "learning_rate": 1.2935874562360062e-08, "loss": 0.4769, "step": 7110 }, { "epoch": 1.9719911259012757, "grad_norm": 0.20730172097682953, "learning_rate": 1.2685954151141211e-08, "loss": 0.5029, "step": 7111 }, { "epoch": 1.9722684414864116, "grad_norm": 0.21345804631710052, "learning_rate": 1.2438470345738085e-08, "loss": 0.4918, "step": 7112 }, { "epoch": 1.9725457570715474, "grad_norm": 0.22697855532169342, "learning_rate": 1.2193423194439835e-08, "loss": 0.5038, "step": 7113 }, { "epoch": 1.9728230726566833, "grad_norm": 0.20521080493927002, "learning_rate": 1.1950812745063766e-08, "loss": 0.4882, "step": 7114 }, { "epoch": 1.9731003882418192, "grad_norm": 0.20702117681503296, "learning_rate": 1.1710639044948401e-08, "loss": 0.4954, "step": 7115 }, { "epoch": 1.973377703826955, "grad_norm": 0.21297289431095123, "learning_rate": 1.1472902140959029e-08, "loss": 0.5015, "step": 7116 }, { "epoch": 1.973655019412091, "grad_norm": 0.20684416592121124, "learning_rate": 1.1237602079483545e-08, "loss": 0.4632, "step": 7117 }, { "epoch": 1.9739323349972269, "grad_norm": 0.20604479312896729, "learning_rate": 1.100473890643522e-08, "loss": 0.495, "step": 7118 }, { "epoch": 1.9742096505823628, "grad_norm": 0.2087058126926422, "learning_rate": 1.0774312667251319e-08, "loss": 0.4967, "step": 7119 }, { "epoch": 1.9744869661674986, "grad_norm": 0.23602424561977386, "learning_rate": 1.0546323406895875e-08, "loss": 0.4913, "step": 7120 }, { "epoch": 1.9747642817526345, "grad_norm": 0.20307756960391998, "learning_rate": 1.0320771169854137e-08, "loss": 0.4753, "step": 7121 }, { "epoch": 1.9750415973377704, "grad_norm": 0.20893272757530212, "learning_rate": 1.0097656000136735e-08, "loss": 0.4845, "step": 7122 }, { "epoch": 1.9753189129229063, "grad_norm": 0.2109754979610443, "learning_rate": 9.876977941282451e-09, "loss": 0.4906, "step": 7123 }, { "epoch": 1.9755962285080422, "grad_norm": 0.20519696176052094, "learning_rate": 9.658737036347121e-09, "loss": 0.496, "step": 7124 }, { "epoch": 1.975873544093178, "grad_norm": 0.21880246698856354, "learning_rate": 9.442933327918902e-09, "loss": 0.517, "step": 7125 }, { "epoch": 1.976150859678314, "grad_norm": 0.21169376373291016, "learning_rate": 9.229566858105777e-09, "loss": 0.4915, "step": 7126 }, { "epoch": 1.9764281752634498, "grad_norm": 0.20725256204605103, "learning_rate": 9.01863766853972e-09, "loss": 0.4964, "step": 7127 }, { "epoch": 1.9767054908485857, "grad_norm": 0.22511859238147736, "learning_rate": 8.810145800379477e-09, "loss": 0.5046, "step": 7128 }, { "epoch": 1.9769828064337216, "grad_norm": 0.22022607922554016, "learning_rate": 8.60409129430917e-09, "loss": 0.4962, "step": 7129 }, { "epoch": 1.9772601220188575, "grad_norm": 0.21660543978214264, "learning_rate": 8.400474190532747e-09, "loss": 0.524, "step": 7130 }, { "epoch": 1.9775374376039934, "grad_norm": 0.21604527533054352, "learning_rate": 8.199294528783707e-09, "loss": 0.499, "step": 7131 }, { "epoch": 1.9778147531891292, "grad_norm": 0.20662792026996613, "learning_rate": 8.000552348315371e-09, "loss": 0.5365, "step": 7132 }, { "epoch": 1.9780920687742651, "grad_norm": 0.20979103446006775, "learning_rate": 7.804247687909216e-09, "loss": 0.4771, "step": 7133 }, { "epoch": 1.978369384359401, "grad_norm": 0.21938779950141907, "learning_rate": 7.610380585867937e-09, "loss": 0.498, "step": 7134 }, { "epoch": 1.978646699944537, "grad_norm": 0.20905134081840515, "learning_rate": 7.418951080020997e-09, "loss": 0.5047, "step": 7135 }, { "epoch": 1.9789240155296728, "grad_norm": 0.2120780348777771, "learning_rate": 7.229959207721848e-09, "loss": 0.492, "step": 7136 }, { "epoch": 1.9792013311148087, "grad_norm": 0.21072250604629517, "learning_rate": 7.043405005847936e-09, "loss": 0.503, "step": 7137 }, { "epoch": 1.9794786466999446, "grad_norm": 0.2067342847585678, "learning_rate": 6.859288510799311e-09, "loss": 0.5092, "step": 7138 }, { "epoch": 1.9797559622850804, "grad_norm": 0.21522685885429382, "learning_rate": 6.67760975850279e-09, "loss": 0.5161, "step": 7139 }, { "epoch": 1.9800332778702163, "grad_norm": 0.20429226756095886, "learning_rate": 6.498368784409181e-09, "loss": 0.5381, "step": 7140 }, { "epoch": 1.9803105934553522, "grad_norm": 0.2123131901025772, "learning_rate": 6.321565623494674e-09, "loss": 0.5146, "step": 7141 }, { "epoch": 1.980587909040488, "grad_norm": 0.21125838160514832, "learning_rate": 6.147200310253898e-09, "loss": 0.4832, "step": 7142 }, { "epoch": 1.980865224625624, "grad_norm": 0.20799988508224487, "learning_rate": 5.9752728787138005e-09, "loss": 0.4841, "step": 7143 }, { "epoch": 1.9811425402107599, "grad_norm": 0.22431087493896484, "learning_rate": 5.805783362421158e-09, "loss": 0.5086, "step": 7144 }, { "epoch": 1.9814198557958957, "grad_norm": 0.2249828279018402, "learning_rate": 5.6387317944481265e-09, "loss": 0.5089, "step": 7145 }, { "epoch": 1.9816971713810316, "grad_norm": 0.22779472172260284, "learning_rate": 5.474118207389467e-09, "loss": 0.5253, "step": 7146 }, { "epoch": 1.9819744869661675, "grad_norm": 0.21490508317947388, "learning_rate": 5.311942633366706e-09, "loss": 0.4918, "step": 7147 }, { "epoch": 1.9822518025513034, "grad_norm": 0.21175016462802887, "learning_rate": 5.152205104023977e-09, "loss": 0.4858, "step": 7148 }, { "epoch": 1.9825291181364393, "grad_norm": 0.22119320929050446, "learning_rate": 4.99490565053079e-09, "loss": 0.4984, "step": 7149 }, { "epoch": 1.9828064337215752, "grad_norm": 0.22225895524024963, "learning_rate": 4.840044303582036e-09, "loss": 0.5394, "step": 7150 }, { "epoch": 1.983083749306711, "grad_norm": 0.20062761008739471, "learning_rate": 4.687621093392436e-09, "loss": 0.4905, "step": 7151 }, { "epoch": 1.983361064891847, "grad_norm": 0.20917369425296783, "learning_rate": 4.537636049704863e-09, "loss": 0.5128, "step": 7152 }, { "epoch": 1.9836383804769828, "grad_norm": 0.21111933887004852, "learning_rate": 4.390089201786185e-09, "loss": 0.4886, "step": 7153 }, { "epoch": 1.9839156960621187, "grad_norm": 0.2019733041524887, "learning_rate": 4.244980578424485e-09, "loss": 0.4908, "step": 7154 }, { "epoch": 1.9841930116472546, "grad_norm": 0.21139049530029297, "learning_rate": 4.1023102079373875e-09, "loss": 0.5261, "step": 7155 }, { "epoch": 1.9844703272323905, "grad_norm": 0.2183750867843628, "learning_rate": 3.962078118162349e-09, "loss": 0.5286, "step": 7156 }, { "epoch": 1.9847476428175264, "grad_norm": 0.21051201224327087, "learning_rate": 3.824284336460815e-09, "loss": 0.4948, "step": 7157 }, { "epoch": 1.9850249584026622, "grad_norm": 0.21117429435253143, "learning_rate": 3.6889288897223872e-09, "loss": 0.4867, "step": 7158 }, { "epoch": 1.9853022739877981, "grad_norm": 0.20643287897109985, "learning_rate": 3.556011804356496e-09, "loss": 0.494, "step": 7159 }, { "epoch": 1.985579589572934, "grad_norm": 0.21045584976673126, "learning_rate": 3.425533106300727e-09, "loss": 0.5114, "step": 7160 }, { "epoch": 1.98585690515807, "grad_norm": 0.20693178474903107, "learning_rate": 3.297492821013881e-09, "loss": 0.5092, "step": 7161 }, { "epoch": 1.9861342207432058, "grad_norm": 0.19891990721225739, "learning_rate": 3.1718909734787526e-09, "loss": 0.4961, "step": 7162 }, { "epoch": 1.9864115363283417, "grad_norm": 0.21904566884040833, "learning_rate": 3.0487275882062906e-09, "loss": 0.4908, "step": 7163 }, { "epoch": 1.9866888519134775, "grad_norm": 0.21359799802303314, "learning_rate": 2.9280026892272715e-09, "loss": 0.4848, "step": 7164 }, { "epoch": 1.9869661674986134, "grad_norm": 0.21357576549053192, "learning_rate": 2.809716300097853e-09, "loss": 0.4776, "step": 7165 }, { "epoch": 1.9872434830837493, "grad_norm": 0.2025846391916275, "learning_rate": 2.6938684439009598e-09, "loss": 0.477, "step": 7166 }, { "epoch": 1.9875207986688852, "grad_norm": 0.20797260105609894, "learning_rate": 2.5804591432393442e-09, "loss": 0.4981, "step": 7167 }, { "epoch": 1.987798114254021, "grad_norm": 0.21296679973602295, "learning_rate": 2.469488420242527e-09, "loss": 0.4845, "step": 7168 }, { "epoch": 1.988075429839157, "grad_norm": 0.20188210904598236, "learning_rate": 2.3609562965654085e-09, "loss": 0.486, "step": 7169 }, { "epoch": 1.9883527454242929, "grad_norm": 0.2065025269985199, "learning_rate": 2.2548627933841047e-09, "loss": 0.4778, "step": 7170 }, { "epoch": 1.9886300610094287, "grad_norm": 0.21647189557552338, "learning_rate": 2.151207931400112e-09, "loss": 0.4972, "step": 7171 }, { "epoch": 1.9889073765945646, "grad_norm": 0.21635204553604126, "learning_rate": 2.0499917308403062e-09, "loss": 0.5197, "step": 7172 }, { "epoch": 1.9891846921797005, "grad_norm": 0.20818372070789337, "learning_rate": 1.951214211452779e-09, "loss": 0.4838, "step": 7173 }, { "epoch": 1.9894620077648364, "grad_norm": 0.1977744996547699, "learning_rate": 1.8548753925137773e-09, "loss": 0.4944, "step": 7174 }, { "epoch": 1.9897393233499723, "grad_norm": 0.38426437973976135, "learning_rate": 1.7609752928207657e-09, "loss": 0.5029, "step": 7175 }, { "epoch": 1.9900166389351082, "grad_norm": 0.22992363572120667, "learning_rate": 1.6695139306965869e-09, "loss": 0.5092, "step": 7176 }, { "epoch": 1.990293954520244, "grad_norm": 0.20251336693763733, "learning_rate": 1.580491323986688e-09, "loss": 0.4982, "step": 7177 }, { "epoch": 1.99057127010538, "grad_norm": 0.21631348133087158, "learning_rate": 1.4939074900618965e-09, "loss": 0.5016, "step": 7178 }, { "epoch": 1.9908485856905158, "grad_norm": 0.21560998260974884, "learning_rate": 1.4097624458184188e-09, "loss": 0.5206, "step": 7179 }, { "epoch": 1.9911259012756517, "grad_norm": 0.2111794650554657, "learning_rate": 1.328056207673678e-09, "loss": 0.5029, "step": 7180 }, { "epoch": 1.9914032168607876, "grad_norm": 0.21028463542461395, "learning_rate": 1.2487887915704766e-09, "loss": 0.5018, "step": 7181 }, { "epoch": 1.9916805324459235, "grad_norm": 0.2057926207780838, "learning_rate": 1.1719602129769968e-09, "loss": 0.5104, "step": 7182 }, { "epoch": 1.9919578480310594, "grad_norm": 0.2165459394454956, "learning_rate": 1.097570486885413e-09, "loss": 0.5189, "step": 7183 }, { "epoch": 1.9922351636161952, "grad_norm": 0.21551057696342468, "learning_rate": 1.0256196278091156e-09, "loss": 0.4608, "step": 7184 }, { "epoch": 1.9925124792013311, "grad_norm": 0.22477814555168152, "learning_rate": 9.561076497882626e-10, "loss": 0.4854, "step": 7185 }, { "epoch": 1.992789794786467, "grad_norm": 0.21325767040252686, "learning_rate": 8.890345663870037e-10, "loss": 0.5055, "step": 7186 }, { "epoch": 1.993067110371603, "grad_norm": 0.20826996862888336, "learning_rate": 8.244003906934806e-10, "loss": 0.5067, "step": 7187 }, { "epoch": 1.9933444259567388, "grad_norm": 0.2099534571170807, "learning_rate": 7.622051353184389e-10, "loss": 0.5104, "step": 7188 }, { "epoch": 1.9936217415418747, "grad_norm": 0.21604932844638824, "learning_rate": 7.024488123980044e-10, "loss": 0.4922, "step": 7189 }, { "epoch": 1.9938990571270105, "grad_norm": 0.21847958862781525, "learning_rate": 6.451314335922942e-10, "loss": 0.4976, "step": 7190 }, { "epoch": 1.9941763727121464, "grad_norm": 0.21057890355587006, "learning_rate": 5.902530100854175e-10, "loss": 0.5255, "step": 7191 }, { "epoch": 1.9944536882972823, "grad_norm": 0.20802158117294312, "learning_rate": 5.378135525868633e-10, "loss": 0.5211, "step": 7192 }, { "epoch": 1.9947310038824182, "grad_norm": 0.2087002694606781, "learning_rate": 4.878130713273365e-10, "loss": 0.5145, "step": 7193 }, { "epoch": 1.995008319467554, "grad_norm": 0.211053267121315, "learning_rate": 4.402515760629222e-10, "loss": 0.5037, "step": 7194 }, { "epoch": 1.99528563505269, "grad_norm": 0.20845288038253784, "learning_rate": 3.9512907607647256e-10, "loss": 0.4992, "step": 7195 }, { "epoch": 1.9955629506378258, "grad_norm": 0.2077481597661972, "learning_rate": 3.524455801706683e-10, "loss": 0.4787, "step": 7196 }, { "epoch": 1.9958402662229617, "grad_norm": 0.21687790751457214, "learning_rate": 3.1220109667357e-10, "loss": 0.488, "step": 7197 }, { "epoch": 1.9961175818080976, "grad_norm": 0.20425012707710266, "learning_rate": 2.743956334400055e-10, "loss": 0.5079, "step": 7198 }, { "epoch": 1.9963948973932335, "grad_norm": 0.19561026990413666, "learning_rate": 2.3902919784601905e-10, "loss": 0.4914, "step": 7199 }, { "epoch": 1.9966722129783694, "grad_norm": 0.20784446597099304, "learning_rate": 2.0610179679164676e-10, "loss": 0.4945, "step": 7200 }, { "epoch": 1.9969495285635053, "grad_norm": 0.19664216041564941, "learning_rate": 1.7561343670230436e-10, "loss": 0.4889, "step": 7201 }, { "epoch": 1.9972268441486412, "grad_norm": 0.20693275332450867, "learning_rate": 1.4756412352878724e-10, "loss": 0.5138, "step": 7202 }, { "epoch": 1.997504159733777, "grad_norm": 0.20792469382286072, "learning_rate": 1.2195386274171938e-10, "loss": 0.5086, "step": 7203 }, { "epoch": 1.997781475318913, "grad_norm": 0.2034110128879547, "learning_rate": 9.878265933987995e-11, "loss": 0.4793, "step": 7204 }, { "epoch": 1.9980587909040488, "grad_norm": 0.2156227082014084, "learning_rate": 7.805051784326445e-11, "loss": 0.5048, "step": 7205 }, { "epoch": 1.9983361064891847, "grad_norm": 0.20901308953762054, "learning_rate": 5.975744230002356e-11, "loss": 0.5019, "step": 7206 }, { "epoch": 1.9986134220743206, "grad_norm": 0.20007333159446716, "learning_rate": 4.3903436276748756e-11, "loss": 0.4837, "step": 7207 }, { "epoch": 1.9988907376594565, "grad_norm": 0.21206532418727875, "learning_rate": 3.048850286679894e-11, "loss": 0.4823, "step": 7208 }, { "epoch": 1.9991680532445923, "grad_norm": 0.2093973159790039, "learning_rate": 1.9512644690300452e-11, "loss": 0.4759, "step": 7209 }, { "epoch": 1.9994453688297282, "grad_norm": 0.2100788652896881, "learning_rate": 1.0975863887208171e-11, "loss": 0.5038, "step": 7210 }, { "epoch": 1.9997226844148641, "grad_norm": 0.2026882767677307, "learning_rate": 4.878162124244412e-12, "loss": 0.4975, "step": 7211 }, { "epoch": 2.0, "grad_norm": 0.23644179105758667, "learning_rate": 1.2195405907355905e-12, "loss": 0.5014, "step": 7212 }, { "epoch": 2.0, "eval_loss": 0.82587730884552, "eval_runtime": 439.0989, "eval_samples_per_second": 93.403, "eval_steps_per_second": 1.46, "step": 7212 } ], "logging_steps": 1, "max_steps": 7212, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.531044471940139e+20, "train_batch_size": 8, "trial_name": null, "trial_params": null }