diff --git "a/checkpoint-13572/trainer_state.json" "b/checkpoint-13572/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-13572/trainer_state.json" @@ -0,0 +1,95038 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.999901768172888, + "eval_steps": 500, + "global_step": 13572, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00014734774066797642, + "grad_norm": 3.129801034927368, + "learning_rate": 5.0000000000000004e-08, + "loss": 0.924, + "step": 1 + }, + { + "epoch": 0.00029469548133595285, + "grad_norm": 3.4312047958374023, + "learning_rate": 1.0000000000000001e-07, + "loss": 0.9039, + "step": 2 + }, + { + "epoch": 0.0004420432220039293, + "grad_norm": 3.3184163570404053, + "learning_rate": 1.5000000000000002e-07, + "loss": 0.9394, + "step": 3 + }, + { + "epoch": 0.0005893909626719057, + "grad_norm": 3.8032283782958984, + "learning_rate": 2.0000000000000002e-07, + "loss": 0.9068, + "step": 4 + }, + { + "epoch": 0.0007367387033398821, + "grad_norm": 3.542307138442993, + "learning_rate": 2.5000000000000004e-07, + "loss": 0.9164, + "step": 5 + }, + { + "epoch": 0.0008840864440078585, + "grad_norm": 3.5166819095611572, + "learning_rate": 3.0000000000000004e-07, + "loss": 0.9296, + "step": 6 + }, + { + "epoch": 0.0010314341846758349, + "grad_norm": 3.3350446224212646, + "learning_rate": 3.5000000000000004e-07, + "loss": 0.9083, + "step": 7 + }, + { + "epoch": 0.0011787819253438114, + "grad_norm": 3.431854724884033, + "learning_rate": 4.0000000000000003e-07, + "loss": 0.9346, + "step": 8 + }, + { + "epoch": 0.0013261296660117877, + "grad_norm": 3.666396379470825, + "learning_rate": 4.5000000000000003e-07, + "loss": 0.9706, + "step": 9 + }, + { + "epoch": 0.0014734774066797642, + "grad_norm": 3.2273788452148438, + "learning_rate": 5.000000000000001e-07, + "loss": 0.9169, + "step": 10 + }, + { + "epoch": 0.0016208251473477406, + "grad_norm": 3.055265426635742, + "learning_rate": 5.5e-07, + "loss": 0.9313, + "step": 11 + }, + { + "epoch": 0.001768172888015717, + "grad_norm": 2.6600029468536377, + "learning_rate": 6.000000000000001e-07, + "loss": 0.882, + "step": 12 + }, + { + "epoch": 0.0019155206286836934, + "grad_norm": 2.631992816925049, + "learning_rate": 6.5e-07, + "loss": 0.9102, + "step": 13 + }, + { + "epoch": 0.0020628683693516697, + "grad_norm": 2.394164800643921, + "learning_rate": 7.000000000000001e-07, + "loss": 0.9217, + "step": 14 + }, + { + "epoch": 0.0022102161100196463, + "grad_norm": 2.2560245990753174, + "learning_rate": 7.5e-07, + "loss": 0.9376, + "step": 15 + }, + { + "epoch": 0.002357563850687623, + "grad_norm": 2.1331701278686523, + "learning_rate": 8.000000000000001e-07, + "loss": 0.9334, + "step": 16 + }, + { + "epoch": 0.0025049115913555993, + "grad_norm": 1.7134573459625244, + "learning_rate": 8.500000000000001e-07, + "loss": 0.9007, + "step": 17 + }, + { + "epoch": 0.0026522593320235754, + "grad_norm": 1.681143045425415, + "learning_rate": 9.000000000000001e-07, + "loss": 0.9217, + "step": 18 + }, + { + "epoch": 0.002799607072691552, + "grad_norm": 1.632788896560669, + "learning_rate": 9.500000000000001e-07, + "loss": 0.9042, + "step": 19 + }, + { + "epoch": 0.0029469548133595285, + "grad_norm": 1.3404014110565186, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.8781, + "step": 20 + }, + { + "epoch": 0.003094302554027505, + "grad_norm": 1.320508360862732, + "learning_rate": 1.0500000000000001e-06, + "loss": 0.9114, + "step": 21 + }, + { + "epoch": 0.003241650294695481, + "grad_norm": 1.2331907749176025, + "learning_rate": 1.1e-06, + "loss": 0.8836, + "step": 22 + }, + { + "epoch": 0.0033889980353634577, + "grad_norm": 1.2161420583724976, + "learning_rate": 1.1500000000000002e-06, + "loss": 0.9353, + "step": 23 + }, + { + "epoch": 0.003536345776031434, + "grad_norm": 1.112533688545227, + "learning_rate": 1.2000000000000002e-06, + "loss": 0.8437, + "step": 24 + }, + { + "epoch": 0.0036836935166994107, + "grad_norm": 1.089389443397522, + "learning_rate": 1.25e-06, + "loss": 0.8852, + "step": 25 + }, + { + "epoch": 0.003831041257367387, + "grad_norm": 1.0788263082504272, + "learning_rate": 1.3e-06, + "loss": 0.9046, + "step": 26 + }, + { + "epoch": 0.003978388998035364, + "grad_norm": 1.0677697658538818, + "learning_rate": 1.3500000000000002e-06, + "loss": 0.8639, + "step": 27 + }, + { + "epoch": 0.0041257367387033395, + "grad_norm": 1.0027332305908203, + "learning_rate": 1.4000000000000001e-06, + "loss": 0.8769, + "step": 28 + }, + { + "epoch": 0.004273084479371316, + "grad_norm": 0.9867594242095947, + "learning_rate": 1.45e-06, + "loss": 0.8605, + "step": 29 + }, + { + "epoch": 0.0044204322200392925, + "grad_norm": 0.9173768162727356, + "learning_rate": 1.5e-06, + "loss": 0.8125, + "step": 30 + }, + { + "epoch": 0.004567779960707269, + "grad_norm": 0.9226269125938416, + "learning_rate": 1.5500000000000002e-06, + "loss": 0.8833, + "step": 31 + }, + { + "epoch": 0.004715127701375246, + "grad_norm": 0.8701996803283691, + "learning_rate": 1.6000000000000001e-06, + "loss": 0.8334, + "step": 32 + }, + { + "epoch": 0.004862475442043222, + "grad_norm": 0.9197446703910828, + "learning_rate": 1.6500000000000003e-06, + "loss": 0.8396, + "step": 33 + }, + { + "epoch": 0.005009823182711199, + "grad_norm": 0.8978206515312195, + "learning_rate": 1.7000000000000002e-06, + "loss": 0.8739, + "step": 34 + }, + { + "epoch": 0.005157170923379175, + "grad_norm": 0.8374379873275757, + "learning_rate": 1.75e-06, + "loss": 0.8352, + "step": 35 + }, + { + "epoch": 0.005304518664047151, + "grad_norm": 0.8223899602890015, + "learning_rate": 1.8000000000000001e-06, + "loss": 0.8297, + "step": 36 + }, + { + "epoch": 0.005451866404715127, + "grad_norm": 0.8278210759162903, + "learning_rate": 1.85e-06, + "loss": 0.8114, + "step": 37 + }, + { + "epoch": 0.005599214145383104, + "grad_norm": 0.7506674528121948, + "learning_rate": 1.9000000000000002e-06, + "loss": 0.792, + "step": 38 + }, + { + "epoch": 0.0057465618860510805, + "grad_norm": 0.7657833099365234, + "learning_rate": 1.9500000000000004e-06, + "loss": 0.8208, + "step": 39 + }, + { + "epoch": 0.005893909626719057, + "grad_norm": 0.7247297167778015, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.8092, + "step": 40 + }, + { + "epoch": 0.0060412573673870335, + "grad_norm": 0.7272946834564209, + "learning_rate": 2.05e-06, + "loss": 0.8139, + "step": 41 + }, + { + "epoch": 0.00618860510805501, + "grad_norm": 0.7059181928634644, + "learning_rate": 2.1000000000000002e-06, + "loss": 0.8037, + "step": 42 + }, + { + "epoch": 0.006335952848722987, + "grad_norm": 0.7165780067443848, + "learning_rate": 2.15e-06, + "loss": 0.7998, + "step": 43 + }, + { + "epoch": 0.006483300589390962, + "grad_norm": 0.7106595039367676, + "learning_rate": 2.2e-06, + "loss": 0.821, + "step": 44 + }, + { + "epoch": 0.006630648330058939, + "grad_norm": 0.6854310035705566, + "learning_rate": 2.25e-06, + "loss": 0.7674, + "step": 45 + }, + { + "epoch": 0.006777996070726915, + "grad_norm": 0.7156155705451965, + "learning_rate": 2.3000000000000004e-06, + "loss": 0.7934, + "step": 46 + }, + { + "epoch": 0.006925343811394892, + "grad_norm": 0.7221450805664062, + "learning_rate": 2.35e-06, + "loss": 0.7856, + "step": 47 + }, + { + "epoch": 0.007072691552062868, + "grad_norm": 0.6528695225715637, + "learning_rate": 2.4000000000000003e-06, + "loss": 0.8028, + "step": 48 + }, + { + "epoch": 0.007220039292730845, + "grad_norm": 0.6800763607025146, + "learning_rate": 2.4500000000000003e-06, + "loss": 0.8391, + "step": 49 + }, + { + "epoch": 0.0073673870333988214, + "grad_norm": 0.6818342208862305, + "learning_rate": 2.5e-06, + "loss": 0.7841, + "step": 50 + }, + { + "epoch": 0.007514734774066798, + "grad_norm": 0.6392002105712891, + "learning_rate": 2.55e-06, + "loss": 0.7858, + "step": 51 + }, + { + "epoch": 0.007662082514734774, + "grad_norm": 0.6379413604736328, + "learning_rate": 2.6e-06, + "loss": 0.7654, + "step": 52 + }, + { + "epoch": 0.00780943025540275, + "grad_norm": 0.6733275651931763, + "learning_rate": 2.6500000000000005e-06, + "loss": 0.8016, + "step": 53 + }, + { + "epoch": 0.007956777996070728, + "grad_norm": 0.7060877680778503, + "learning_rate": 2.7000000000000004e-06, + "loss": 0.7617, + "step": 54 + }, + { + "epoch": 0.008104125736738703, + "grad_norm": 0.6240788102149963, + "learning_rate": 2.7500000000000004e-06, + "loss": 0.7905, + "step": 55 + }, + { + "epoch": 0.008251473477406679, + "grad_norm": 0.5977641940116882, + "learning_rate": 2.8000000000000003e-06, + "loss": 0.7507, + "step": 56 + }, + { + "epoch": 0.008398821218074656, + "grad_norm": 0.5974783301353455, + "learning_rate": 2.85e-06, + "loss": 0.7897, + "step": 57 + }, + { + "epoch": 0.008546168958742632, + "grad_norm": 0.6143400073051453, + "learning_rate": 2.9e-06, + "loss": 0.825, + "step": 58 + }, + { + "epoch": 0.00869351669941061, + "grad_norm": 0.6291998624801636, + "learning_rate": 2.95e-06, + "loss": 0.7673, + "step": 59 + }, + { + "epoch": 0.008840864440078585, + "grad_norm": 0.6375567317008972, + "learning_rate": 3e-06, + "loss": 0.7537, + "step": 60 + }, + { + "epoch": 0.008988212180746562, + "grad_norm": 0.5708982348442078, + "learning_rate": 3.05e-06, + "loss": 0.7656, + "step": 61 + }, + { + "epoch": 0.009135559921414538, + "grad_norm": 0.6130401492118835, + "learning_rate": 3.1000000000000004e-06, + "loss": 0.7535, + "step": 62 + }, + { + "epoch": 0.009282907662082516, + "grad_norm": 0.5916008353233337, + "learning_rate": 3.1500000000000003e-06, + "loss": 0.7521, + "step": 63 + }, + { + "epoch": 0.009430255402750491, + "grad_norm": 0.5866997241973877, + "learning_rate": 3.2000000000000003e-06, + "loss": 0.79, + "step": 64 + }, + { + "epoch": 0.009577603143418467, + "grad_norm": 0.590018093585968, + "learning_rate": 3.2500000000000002e-06, + "loss": 0.7725, + "step": 65 + }, + { + "epoch": 0.009724950884086444, + "grad_norm": 0.5969561338424683, + "learning_rate": 3.3000000000000006e-06, + "loss": 0.767, + "step": 66 + }, + { + "epoch": 0.00987229862475442, + "grad_norm": 0.6123926639556885, + "learning_rate": 3.3500000000000005e-06, + "loss": 0.7361, + "step": 67 + }, + { + "epoch": 0.010019646365422397, + "grad_norm": 0.5870373249053955, + "learning_rate": 3.4000000000000005e-06, + "loss": 0.7555, + "step": 68 + }, + { + "epoch": 0.010166994106090373, + "grad_norm": 0.5947421789169312, + "learning_rate": 3.45e-06, + "loss": 0.7642, + "step": 69 + }, + { + "epoch": 0.01031434184675835, + "grad_norm": 0.6133670806884766, + "learning_rate": 3.5e-06, + "loss": 0.7335, + "step": 70 + }, + { + "epoch": 0.010461689587426326, + "grad_norm": 0.5561818480491638, + "learning_rate": 3.5500000000000003e-06, + "loss": 0.7771, + "step": 71 + }, + { + "epoch": 0.010609037328094302, + "grad_norm": 0.5646201968193054, + "learning_rate": 3.6000000000000003e-06, + "loss": 0.7662, + "step": 72 + }, + { + "epoch": 0.010756385068762279, + "grad_norm": 0.6255005598068237, + "learning_rate": 3.65e-06, + "loss": 0.7743, + "step": 73 + }, + { + "epoch": 0.010903732809430255, + "grad_norm": 0.5652551651000977, + "learning_rate": 3.7e-06, + "loss": 0.7437, + "step": 74 + }, + { + "epoch": 0.011051080550098232, + "grad_norm": 0.5587124228477478, + "learning_rate": 3.7500000000000005e-06, + "loss": 0.7049, + "step": 75 + }, + { + "epoch": 0.011198428290766208, + "grad_norm": 0.6032604575157166, + "learning_rate": 3.8000000000000005e-06, + "loss": 0.7595, + "step": 76 + }, + { + "epoch": 0.011345776031434185, + "grad_norm": 0.5800550580024719, + "learning_rate": 3.85e-06, + "loss": 0.7246, + "step": 77 + }, + { + "epoch": 0.011493123772102161, + "grad_norm": 0.5628860592842102, + "learning_rate": 3.900000000000001e-06, + "loss": 0.742, + "step": 78 + }, + { + "epoch": 0.011640471512770138, + "grad_norm": 0.5780375599861145, + "learning_rate": 3.95e-06, + "loss": 0.7323, + "step": 79 + }, + { + "epoch": 0.011787819253438114, + "grad_norm": 0.5557669997215271, + "learning_rate": 4.000000000000001e-06, + "loss": 0.7034, + "step": 80 + }, + { + "epoch": 0.01193516699410609, + "grad_norm": 0.5715268850326538, + "learning_rate": 4.05e-06, + "loss": 0.7385, + "step": 81 + }, + { + "epoch": 0.012082514734774067, + "grad_norm": 0.5524431467056274, + "learning_rate": 4.1e-06, + "loss": 0.714, + "step": 82 + }, + { + "epoch": 0.012229862475442043, + "grad_norm": 0.5942098498344421, + "learning_rate": 4.15e-06, + "loss": 0.7304, + "step": 83 + }, + { + "epoch": 0.01237721021611002, + "grad_norm": 0.6087955236434937, + "learning_rate": 4.2000000000000004e-06, + "loss": 0.7605, + "step": 84 + }, + { + "epoch": 0.012524557956777996, + "grad_norm": 0.5730810761451721, + "learning_rate": 4.25e-06, + "loss": 0.6742, + "step": 85 + }, + { + "epoch": 0.012671905697445973, + "grad_norm": 0.5637152791023254, + "learning_rate": 4.3e-06, + "loss": 0.7474, + "step": 86 + }, + { + "epoch": 0.012819253438113949, + "grad_norm": 0.5650014877319336, + "learning_rate": 4.350000000000001e-06, + "loss": 0.7194, + "step": 87 + }, + { + "epoch": 0.012966601178781925, + "grad_norm": 0.5524791479110718, + "learning_rate": 4.4e-06, + "loss": 0.7678, + "step": 88 + }, + { + "epoch": 0.013113948919449902, + "grad_norm": 0.5611167550086975, + "learning_rate": 4.450000000000001e-06, + "loss": 0.6888, + "step": 89 + }, + { + "epoch": 0.013261296660117878, + "grad_norm": 0.5665087699890137, + "learning_rate": 4.5e-06, + "loss": 0.7424, + "step": 90 + }, + { + "epoch": 0.013408644400785855, + "grad_norm": 0.5724959969520569, + "learning_rate": 4.5500000000000005e-06, + "loss": 0.729, + "step": 91 + }, + { + "epoch": 0.01355599214145383, + "grad_norm": 0.5426538586616516, + "learning_rate": 4.600000000000001e-06, + "loss": 0.7552, + "step": 92 + }, + { + "epoch": 0.013703339882121808, + "grad_norm": 0.5653647780418396, + "learning_rate": 4.65e-06, + "loss": 0.7072, + "step": 93 + }, + { + "epoch": 0.013850687622789784, + "grad_norm": 0.5675098896026611, + "learning_rate": 4.7e-06, + "loss": 0.7142, + "step": 94 + }, + { + "epoch": 0.013998035363457761, + "grad_norm": 0.5661633610725403, + "learning_rate": 4.75e-06, + "loss": 0.7154, + "step": 95 + }, + { + "epoch": 0.014145383104125737, + "grad_norm": 0.5623870491981506, + "learning_rate": 4.800000000000001e-06, + "loss": 0.7508, + "step": 96 + }, + { + "epoch": 0.014292730844793712, + "grad_norm": 0.5721336007118225, + "learning_rate": 4.85e-06, + "loss": 0.7209, + "step": 97 + }, + { + "epoch": 0.01444007858546169, + "grad_norm": 0.5616310834884644, + "learning_rate": 4.9000000000000005e-06, + "loss": 0.7561, + "step": 98 + }, + { + "epoch": 0.014587426326129665, + "grad_norm": 0.5870972275733948, + "learning_rate": 4.95e-06, + "loss": 0.7438, + "step": 99 + }, + { + "epoch": 0.014734774066797643, + "grad_norm": 0.5912125110626221, + "learning_rate": 5e-06, + "loss": 0.6912, + "step": 100 + }, + { + "epoch": 0.014882121807465619, + "grad_norm": 0.5790248513221741, + "learning_rate": 4.999999969938086e-06, + "loss": 0.766, + "step": 101 + }, + { + "epoch": 0.015029469548133596, + "grad_norm": 0.5846655964851379, + "learning_rate": 4.999999879752346e-06, + "loss": 0.6991, + "step": 102 + }, + { + "epoch": 0.015176817288801572, + "grad_norm": 0.5630676746368408, + "learning_rate": 4.999999729442781e-06, + "loss": 0.7367, + "step": 103 + }, + { + "epoch": 0.015324165029469547, + "grad_norm": 0.5507757663726807, + "learning_rate": 4.999999519009395e-06, + "loss": 0.7055, + "step": 104 + }, + { + "epoch": 0.015471512770137525, + "grad_norm": 0.5379103422164917, + "learning_rate": 4.999999248452194e-06, + "loss": 0.7338, + "step": 105 + }, + { + "epoch": 0.0156188605108055, + "grad_norm": 0.5607940554618835, + "learning_rate": 4.999998917771183e-06, + "loss": 0.7319, + "step": 106 + }, + { + "epoch": 0.015766208251473478, + "grad_norm": 0.5536065101623535, + "learning_rate": 4.99999852696637e-06, + "loss": 0.7208, + "step": 107 + }, + { + "epoch": 0.015913555992141455, + "grad_norm": 0.5862002372741699, + "learning_rate": 4.999998076037766e-06, + "loss": 0.7421, + "step": 108 + }, + { + "epoch": 0.01606090373280943, + "grad_norm": 0.5625506043434143, + "learning_rate": 4.99999756498538e-06, + "loss": 0.7341, + "step": 109 + }, + { + "epoch": 0.016208251473477406, + "grad_norm": 0.5777937173843384, + "learning_rate": 4.999996993809226e-06, + "loss": 0.7099, + "step": 110 + }, + { + "epoch": 0.016355599214145384, + "grad_norm": 0.588497519493103, + "learning_rate": 4.9999963625093155e-06, + "loss": 0.7131, + "step": 111 + }, + { + "epoch": 0.016502946954813358, + "grad_norm": 0.5797510743141174, + "learning_rate": 4.999995671085666e-06, + "loss": 0.7377, + "step": 112 + }, + { + "epoch": 0.016650294695481335, + "grad_norm": 0.5680051445960999, + "learning_rate": 4.999994919538293e-06, + "loss": 0.7322, + "step": 113 + }, + { + "epoch": 0.016797642436149313, + "grad_norm": 0.5842293500900269, + "learning_rate": 4.9999941078672155e-06, + "loss": 0.7275, + "step": 114 + }, + { + "epoch": 0.01694499017681729, + "grad_norm": 0.5424667596817017, + "learning_rate": 4.999993236072451e-06, + "loss": 0.7167, + "step": 115 + }, + { + "epoch": 0.017092337917485264, + "grad_norm": 0.5480489134788513, + "learning_rate": 4.999992304154022e-06, + "loss": 0.73, + "step": 116 + }, + { + "epoch": 0.01723968565815324, + "grad_norm": 0.5907907485961914, + "learning_rate": 4.999991312111952e-06, + "loss": 0.7496, + "step": 117 + }, + { + "epoch": 0.01738703339882122, + "grad_norm": 0.5874444842338562, + "learning_rate": 4.999990259946262e-06, + "loss": 0.7366, + "step": 118 + }, + { + "epoch": 0.017534381139489196, + "grad_norm": 0.5477811694145203, + "learning_rate": 4.999989147656979e-06, + "loss": 0.7231, + "step": 119 + }, + { + "epoch": 0.01768172888015717, + "grad_norm": 0.5658272504806519, + "learning_rate": 4.999987975244131e-06, + "loss": 0.7533, + "step": 120 + }, + { + "epoch": 0.017829076620825147, + "grad_norm": 0.5710364580154419, + "learning_rate": 4.999986742707743e-06, + "loss": 0.7374, + "step": 121 + }, + { + "epoch": 0.017976424361493125, + "grad_norm": 0.5666940808296204, + "learning_rate": 4.999985450047847e-06, + "loss": 0.737, + "step": 122 + }, + { + "epoch": 0.0181237721021611, + "grad_norm": 0.5871756672859192, + "learning_rate": 4.999984097264473e-06, + "loss": 0.7126, + "step": 123 + }, + { + "epoch": 0.018271119842829076, + "grad_norm": 0.570758581161499, + "learning_rate": 4.999982684357655e-06, + "loss": 0.7365, + "step": 124 + }, + { + "epoch": 0.018418467583497054, + "grad_norm": 0.5508794784545898, + "learning_rate": 4.999981211327426e-06, + "loss": 0.69, + "step": 125 + }, + { + "epoch": 0.01856581532416503, + "grad_norm": 0.603508710861206, + "learning_rate": 4.999979678173821e-06, + "loss": 0.7486, + "step": 126 + }, + { + "epoch": 0.018713163064833005, + "grad_norm": 0.6261571049690247, + "learning_rate": 4.999978084896877e-06, + "loss": 0.7207, + "step": 127 + }, + { + "epoch": 0.018860510805500982, + "grad_norm": 0.5617467761039734, + "learning_rate": 4.9999764314966325e-06, + "loss": 0.7292, + "step": 128 + }, + { + "epoch": 0.01900785854616896, + "grad_norm": 0.5585967898368835, + "learning_rate": 4.999974717973127e-06, + "loss": 0.6994, + "step": 129 + }, + { + "epoch": 0.019155206286836934, + "grad_norm": 0.5821323394775391, + "learning_rate": 4.999972944326403e-06, + "loss": 0.7334, + "step": 130 + }, + { + "epoch": 0.01930255402750491, + "grad_norm": 0.5791561007499695, + "learning_rate": 4.999971110556503e-06, + "loss": 0.7326, + "step": 131 + }, + { + "epoch": 0.01944990176817289, + "grad_norm": 0.5690702795982361, + "learning_rate": 4.99996921666347e-06, + "loss": 0.7075, + "step": 132 + }, + { + "epoch": 0.019597249508840866, + "grad_norm": 0.550655722618103, + "learning_rate": 4.999967262647348e-06, + "loss": 0.6599, + "step": 133 + }, + { + "epoch": 0.01974459724950884, + "grad_norm": 0.5927152633666992, + "learning_rate": 4.999965248508188e-06, + "loss": 0.6676, + "step": 134 + }, + { + "epoch": 0.019891944990176817, + "grad_norm": 0.5529323816299438, + "learning_rate": 4.999963174246035e-06, + "loss": 0.7222, + "step": 135 + }, + { + "epoch": 0.020039292730844795, + "grad_norm": 0.5389238595962524, + "learning_rate": 4.9999610398609414e-06, + "loss": 0.7044, + "step": 136 + }, + { + "epoch": 0.02018664047151277, + "grad_norm": 0.6048428416252136, + "learning_rate": 4.999958845352957e-06, + "loss": 0.7135, + "step": 137 + }, + { + "epoch": 0.020333988212180746, + "grad_norm": 0.6134109497070312, + "learning_rate": 4.999956590722134e-06, + "loss": 0.7269, + "step": 138 + }, + { + "epoch": 0.020481335952848723, + "grad_norm": 0.5626540780067444, + "learning_rate": 4.999954275968529e-06, + "loss": 0.7225, + "step": 139 + }, + { + "epoch": 0.0206286836935167, + "grad_norm": 0.5344046354293823, + "learning_rate": 4.999951901092195e-06, + "loss": 0.713, + "step": 140 + }, + { + "epoch": 0.020776031434184675, + "grad_norm": 0.5763823390007019, + "learning_rate": 4.99994946609319e-06, + "loss": 0.7369, + "step": 141 + }, + { + "epoch": 0.020923379174852652, + "grad_norm": 0.5818661451339722, + "learning_rate": 4.999946970971574e-06, + "loss": 0.6831, + "step": 142 + }, + { + "epoch": 0.02107072691552063, + "grad_norm": 0.5761791467666626, + "learning_rate": 4.999944415727406e-06, + "loss": 0.6776, + "step": 143 + }, + { + "epoch": 0.021218074656188603, + "grad_norm": 0.600286066532135, + "learning_rate": 4.999941800360747e-06, + "loss": 0.721, + "step": 144 + }, + { + "epoch": 0.02136542239685658, + "grad_norm": 0.5801372528076172, + "learning_rate": 4.99993912487166e-06, + "loss": 0.7065, + "step": 145 + }, + { + "epoch": 0.021512770137524558, + "grad_norm": 0.5994334816932678, + "learning_rate": 4.999936389260209e-06, + "loss": 0.6733, + "step": 146 + }, + { + "epoch": 0.021660117878192536, + "grad_norm": 0.5985910296440125, + "learning_rate": 4.999933593526462e-06, + "loss": 0.7612, + "step": 147 + }, + { + "epoch": 0.02180746561886051, + "grad_norm": 0.5779752731323242, + "learning_rate": 4.999930737670483e-06, + "loss": 0.7007, + "step": 148 + }, + { + "epoch": 0.021954813359528487, + "grad_norm": 0.5668262243270874, + "learning_rate": 4.999927821692343e-06, + "loss": 0.7161, + "step": 149 + }, + { + "epoch": 0.022102161100196464, + "grad_norm": 0.5649608969688416, + "learning_rate": 4.999924845592112e-06, + "loss": 0.7105, + "step": 150 + }, + { + "epoch": 0.022249508840864442, + "grad_norm": 0.5541603565216064, + "learning_rate": 4.9999218093698595e-06, + "loss": 0.7218, + "step": 151 + }, + { + "epoch": 0.022396856581532416, + "grad_norm": 0.5511834025382996, + "learning_rate": 4.9999187130256615e-06, + "loss": 0.712, + "step": 152 + }, + { + "epoch": 0.022544204322200393, + "grad_norm": 0.5935576558113098, + "learning_rate": 4.99991555655959e-06, + "loss": 0.6911, + "step": 153 + }, + { + "epoch": 0.02269155206286837, + "grad_norm": 0.5938119888305664, + "learning_rate": 4.999912339971723e-06, + "loss": 0.7237, + "step": 154 + }, + { + "epoch": 0.022838899803536344, + "grad_norm": 0.5493853092193604, + "learning_rate": 4.999909063262136e-06, + "loss": 0.6927, + "step": 155 + }, + { + "epoch": 0.022986247544204322, + "grad_norm": 0.5935282707214355, + "learning_rate": 4.9999057264309085e-06, + "loss": 0.6668, + "step": 156 + }, + { + "epoch": 0.0231335952848723, + "grad_norm": 0.5724316239356995, + "learning_rate": 4.999902329478121e-06, + "loss": 0.7136, + "step": 157 + }, + { + "epoch": 0.023280943025540277, + "grad_norm": 0.6305427551269531, + "learning_rate": 4.999898872403855e-06, + "loss": 0.7225, + "step": 158 + }, + { + "epoch": 0.02342829076620825, + "grad_norm": 0.5600860118865967, + "learning_rate": 4.9998953552081935e-06, + "loss": 0.7263, + "step": 159 + }, + { + "epoch": 0.023575638506876228, + "grad_norm": 0.6034495234489441, + "learning_rate": 4.999891777891221e-06, + "loss": 0.7187, + "step": 160 + }, + { + "epoch": 0.023722986247544205, + "grad_norm": 0.6008214354515076, + "learning_rate": 4.9998881404530245e-06, + "loss": 0.6936, + "step": 161 + }, + { + "epoch": 0.02387033398821218, + "grad_norm": 0.6007279753684998, + "learning_rate": 4.9998844428936895e-06, + "loss": 0.6857, + "step": 162 + }, + { + "epoch": 0.024017681728880157, + "grad_norm": 0.5924290418624878, + "learning_rate": 4.999880685213308e-06, + "loss": 0.6967, + "step": 163 + }, + { + "epoch": 0.024165029469548134, + "grad_norm": 0.5520642399787903, + "learning_rate": 4.9998768674119666e-06, + "loss": 0.6714, + "step": 164 + }, + { + "epoch": 0.02431237721021611, + "grad_norm": 0.5977151989936829, + "learning_rate": 4.99987298948976e-06, + "loss": 0.6869, + "step": 165 + }, + { + "epoch": 0.024459724950884085, + "grad_norm": 0.5828975439071655, + "learning_rate": 4.999869051446781e-06, + "loss": 0.7089, + "step": 166 + }, + { + "epoch": 0.024607072691552063, + "grad_norm": 0.5520312786102295, + "learning_rate": 4.999865053283124e-06, + "loss": 0.6169, + "step": 167 + }, + { + "epoch": 0.02475442043222004, + "grad_norm": 0.630419909954071, + "learning_rate": 4.999860994998884e-06, + "loss": 0.7253, + "step": 168 + }, + { + "epoch": 0.024901768172888014, + "grad_norm": 0.5869994163513184, + "learning_rate": 4.999856876594159e-06, + "loss": 0.6953, + "step": 169 + }, + { + "epoch": 0.02504911591355599, + "grad_norm": 0.6180671453475952, + "learning_rate": 4.99985269806905e-06, + "loss": 0.7036, + "step": 170 + }, + { + "epoch": 0.02519646365422397, + "grad_norm": 0.5581749677658081, + "learning_rate": 4.999848459423655e-06, + "loss": 0.7101, + "step": 171 + }, + { + "epoch": 0.025343811394891946, + "grad_norm": 0.5623202323913574, + "learning_rate": 4.999844160658079e-06, + "loss": 0.6906, + "step": 172 + }, + { + "epoch": 0.02549115913555992, + "grad_norm": 0.6176874041557312, + "learning_rate": 4.999839801772422e-06, + "loss": 0.64, + "step": 173 + }, + { + "epoch": 0.025638506876227898, + "grad_norm": 0.5548398494720459, + "learning_rate": 4.999835382766789e-06, + "loss": 0.6958, + "step": 174 + }, + { + "epoch": 0.025785854616895875, + "grad_norm": 0.5907695889472961, + "learning_rate": 4.9998309036412894e-06, + "loss": 0.6687, + "step": 175 + }, + { + "epoch": 0.02593320235756385, + "grad_norm": 0.5565664768218994, + "learning_rate": 4.999826364396029e-06, + "loss": 0.6712, + "step": 176 + }, + { + "epoch": 0.026080550098231826, + "grad_norm": 0.6059663891792297, + "learning_rate": 4.999821765031116e-06, + "loss": 0.6998, + "step": 177 + }, + { + "epoch": 0.026227897838899804, + "grad_norm": 0.5943310856819153, + "learning_rate": 4.999817105546663e-06, + "loss": 0.7323, + "step": 178 + }, + { + "epoch": 0.02637524557956778, + "grad_norm": 0.5630499124526978, + "learning_rate": 4.99981238594278e-06, + "loss": 0.7241, + "step": 179 + }, + { + "epoch": 0.026522593320235755, + "grad_norm": 0.5933716893196106, + "learning_rate": 4.999807606219582e-06, + "loss": 0.68, + "step": 180 + }, + { + "epoch": 0.026669941060903733, + "grad_norm": 0.5709980726242065, + "learning_rate": 4.999802766377184e-06, + "loss": 0.7054, + "step": 181 + }, + { + "epoch": 0.02681728880157171, + "grad_norm": 0.6329121589660645, + "learning_rate": 4.999797866415702e-06, + "loss": 0.6808, + "step": 182 + }, + { + "epoch": 0.026964636542239687, + "grad_norm": 0.7589988708496094, + "learning_rate": 4.999792906335253e-06, + "loss": 0.6401, + "step": 183 + }, + { + "epoch": 0.02711198428290766, + "grad_norm": 0.5695599913597107, + "learning_rate": 4.999787886135958e-06, + "loss": 0.6896, + "step": 184 + }, + { + "epoch": 0.02725933202357564, + "grad_norm": 0.5521302223205566, + "learning_rate": 4.999782805817937e-06, + "loss": 0.7041, + "step": 185 + }, + { + "epoch": 0.027406679764243616, + "grad_norm": 0.6021455526351929, + "learning_rate": 4.999777665381311e-06, + "loss": 0.6602, + "step": 186 + }, + { + "epoch": 0.02755402750491159, + "grad_norm": 0.5883948802947998, + "learning_rate": 4.999772464826205e-06, + "loss": 0.7215, + "step": 187 + }, + { + "epoch": 0.027701375245579567, + "grad_norm": 0.5985521078109741, + "learning_rate": 4.999767204152744e-06, + "loss": 0.7098, + "step": 188 + }, + { + "epoch": 0.027848722986247545, + "grad_norm": 0.5609668493270874, + "learning_rate": 4.999761883361054e-06, + "loss": 0.6484, + "step": 189 + }, + { + "epoch": 0.027996070726915522, + "grad_norm": 0.5799615979194641, + "learning_rate": 4.999756502451263e-06, + "loss": 0.6957, + "step": 190 + }, + { + "epoch": 0.028143418467583496, + "grad_norm": 0.6021848917007446, + "learning_rate": 4.9997510614235014e-06, + "loss": 0.686, + "step": 191 + }, + { + "epoch": 0.028290766208251474, + "grad_norm": 0.5806272029876709, + "learning_rate": 4.999745560277898e-06, + "loss": 0.7014, + "step": 192 + }, + { + "epoch": 0.02843811394891945, + "grad_norm": 0.6537670493125916, + "learning_rate": 4.9997399990145864e-06, + "loss": 0.7032, + "step": 193 + }, + { + "epoch": 0.028585461689587425, + "grad_norm": 0.5661720633506775, + "learning_rate": 4.999734377633701e-06, + "loss": 0.7183, + "step": 194 + }, + { + "epoch": 0.028732809430255402, + "grad_norm": 0.5995674133300781, + "learning_rate": 4.999728696135377e-06, + "loss": 0.6833, + "step": 195 + }, + { + "epoch": 0.02888015717092338, + "grad_norm": 0.5943658351898193, + "learning_rate": 4.9997229545197485e-06, + "loss": 0.6934, + "step": 196 + }, + { + "epoch": 0.029027504911591357, + "grad_norm": 0.5916645526885986, + "learning_rate": 4.999717152786956e-06, + "loss": 0.6881, + "step": 197 + }, + { + "epoch": 0.02917485265225933, + "grad_norm": 0.6087749600410461, + "learning_rate": 4.9997112909371385e-06, + "loss": 0.6813, + "step": 198 + }, + { + "epoch": 0.02932220039292731, + "grad_norm": 0.6076786518096924, + "learning_rate": 4.999705368970437e-06, + "loss": 0.7158, + "step": 199 + }, + { + "epoch": 0.029469548133595286, + "grad_norm": 0.6161251664161682, + "learning_rate": 4.999699386886993e-06, + "loss": 0.6986, + "step": 200 + }, + { + "epoch": 0.02961689587426326, + "grad_norm": 0.6111221313476562, + "learning_rate": 4.999693344686952e-06, + "loss": 0.7066, + "step": 201 + }, + { + "epoch": 0.029764243614931237, + "grad_norm": 0.6318661570549011, + "learning_rate": 4.999687242370459e-06, + "loss": 0.6793, + "step": 202 + }, + { + "epoch": 0.029911591355599215, + "grad_norm": 0.5712698698043823, + "learning_rate": 4.99968107993766e-06, + "loss": 0.6775, + "step": 203 + }, + { + "epoch": 0.030058939096267192, + "grad_norm": 0.6100833415985107, + "learning_rate": 4.999674857388703e-06, + "loss": 0.6985, + "step": 204 + }, + { + "epoch": 0.030206286836935166, + "grad_norm": 0.5681303143501282, + "learning_rate": 4.999668574723738e-06, + "loss": 0.6811, + "step": 205 + }, + { + "epoch": 0.030353634577603143, + "grad_norm": 0.6123011112213135, + "learning_rate": 4.999662231942917e-06, + "loss": 0.7284, + "step": 206 + }, + { + "epoch": 0.03050098231827112, + "grad_norm": 0.5969845652580261, + "learning_rate": 4.999655829046391e-06, + "loss": 0.6348, + "step": 207 + }, + { + "epoch": 0.030648330058939095, + "grad_norm": 0.6324526071548462, + "learning_rate": 4.999649366034315e-06, + "loss": 0.718, + "step": 208 + }, + { + "epoch": 0.030795677799607072, + "grad_norm": 0.6196548938751221, + "learning_rate": 4.999642842906843e-06, + "loss": 0.6728, + "step": 209 + }, + { + "epoch": 0.03094302554027505, + "grad_norm": 0.5843265056610107, + "learning_rate": 4.999636259664135e-06, + "loss": 0.7007, + "step": 210 + }, + { + "epoch": 0.031090373280943027, + "grad_norm": 0.5697255730628967, + "learning_rate": 4.999629616306347e-06, + "loss": 0.7152, + "step": 211 + }, + { + "epoch": 0.031237721021611, + "grad_norm": 0.6080254912376404, + "learning_rate": 4.999622912833638e-06, + "loss": 0.6343, + "step": 212 + }, + { + "epoch": 0.03138506876227898, + "grad_norm": 0.5762299299240112, + "learning_rate": 4.99961614924617e-06, + "loss": 0.6731, + "step": 213 + }, + { + "epoch": 0.031532416502946956, + "grad_norm": 0.5736714601516724, + "learning_rate": 4.999609325544107e-06, + "loss": 0.6947, + "step": 214 + }, + { + "epoch": 0.03167976424361493, + "grad_norm": 0.5885283946990967, + "learning_rate": 4.9996024417276125e-06, + "loss": 0.6885, + "step": 215 + }, + { + "epoch": 0.03182711198428291, + "grad_norm": 0.5728049278259277, + "learning_rate": 4.999595497796851e-06, + "loss": 0.7125, + "step": 216 + }, + { + "epoch": 0.03197445972495088, + "grad_norm": 0.5572594404220581, + "learning_rate": 4.99958849375199e-06, + "loss": 0.7158, + "step": 217 + }, + { + "epoch": 0.03212180746561886, + "grad_norm": 0.5644719004631042, + "learning_rate": 4.999581429593199e-06, + "loss": 0.6643, + "step": 218 + }, + { + "epoch": 0.032269155206286836, + "grad_norm": 0.6084421277046204, + "learning_rate": 4.999574305320646e-06, + "loss": 0.722, + "step": 219 + }, + { + "epoch": 0.03241650294695481, + "grad_norm": 0.5983022451400757, + "learning_rate": 4.999567120934504e-06, + "loss": 0.6995, + "step": 220 + }, + { + "epoch": 0.03256385068762279, + "grad_norm": 0.5933680534362793, + "learning_rate": 4.999559876434945e-06, + "loss": 0.6649, + "step": 221 + }, + { + "epoch": 0.03271119842829077, + "grad_norm": 0.5512552857398987, + "learning_rate": 4.999552571822142e-06, + "loss": 0.6824, + "step": 222 + }, + { + "epoch": 0.032858546168958745, + "grad_norm": 0.6012586951255798, + "learning_rate": 4.999545207096274e-06, + "loss": 0.7033, + "step": 223 + }, + { + "epoch": 0.033005893909626716, + "grad_norm": 0.5734738707542419, + "learning_rate": 4.999537782257515e-06, + "loss": 0.6965, + "step": 224 + }, + { + "epoch": 0.03315324165029469, + "grad_norm": 0.5926839113235474, + "learning_rate": 4.999530297306046e-06, + "loss": 0.6424, + "step": 225 + }, + { + "epoch": 0.03330058939096267, + "grad_norm": 0.5874471664428711, + "learning_rate": 4.9995227522420444e-06, + "loss": 0.692, + "step": 226 + }, + { + "epoch": 0.03344793713163065, + "grad_norm": 0.6086294651031494, + "learning_rate": 4.999515147065693e-06, + "loss": 0.6683, + "step": 227 + }, + { + "epoch": 0.033595284872298625, + "grad_norm": 0.5498947501182556, + "learning_rate": 4.999507481777175e-06, + "loss": 0.6661, + "step": 228 + }, + { + "epoch": 0.0337426326129666, + "grad_norm": 0.6392776370048523, + "learning_rate": 4.9994997563766736e-06, + "loss": 0.6942, + "step": 229 + }, + { + "epoch": 0.03388998035363458, + "grad_norm": 0.609695553779602, + "learning_rate": 4.999491970864376e-06, + "loss": 0.6789, + "step": 230 + }, + { + "epoch": 0.03403732809430256, + "grad_norm": 0.5912224650382996, + "learning_rate": 4.999484125240469e-06, + "loss": 0.6688, + "step": 231 + }, + { + "epoch": 0.03418467583497053, + "grad_norm": 0.5829542875289917, + "learning_rate": 4.999476219505141e-06, + "loss": 0.6747, + "step": 232 + }, + { + "epoch": 0.034332023575638505, + "grad_norm": 0.5836968421936035, + "learning_rate": 4.999468253658583e-06, + "loss": 0.6763, + "step": 233 + }, + { + "epoch": 0.03447937131630648, + "grad_norm": 0.5648065209388733, + "learning_rate": 4.999460227700985e-06, + "loss": 0.6451, + "step": 234 + }, + { + "epoch": 0.03462671905697446, + "grad_norm": 0.5822215676307678, + "learning_rate": 4.9994521416325406e-06, + "loss": 0.6793, + "step": 235 + }, + { + "epoch": 0.03477406679764244, + "grad_norm": 0.5781381726264954, + "learning_rate": 4.999443995453445e-06, + "loss": 0.6735, + "step": 236 + }, + { + "epoch": 0.034921414538310415, + "grad_norm": 0.5796432495117188, + "learning_rate": 4.999435789163893e-06, + "loss": 0.6975, + "step": 237 + }, + { + "epoch": 0.03506876227897839, + "grad_norm": 0.5743250250816345, + "learning_rate": 4.9994275227640835e-06, + "loss": 0.6776, + "step": 238 + }, + { + "epoch": 0.03521611001964636, + "grad_norm": 0.6296905875205994, + "learning_rate": 4.999419196254215e-06, + "loss": 0.701, + "step": 239 + }, + { + "epoch": 0.03536345776031434, + "grad_norm": 0.6069874167442322, + "learning_rate": 4.999410809634485e-06, + "loss": 0.7025, + "step": 240 + }, + { + "epoch": 0.03551080550098232, + "grad_norm": 0.585936963558197, + "learning_rate": 4.9994023629050986e-06, + "loss": 0.6878, + "step": 241 + }, + { + "epoch": 0.035658153241650295, + "grad_norm": 0.5681853294372559, + "learning_rate": 4.999393856066257e-06, + "loss": 0.6942, + "step": 242 + }, + { + "epoch": 0.03580550098231827, + "grad_norm": 0.5562339425086975, + "learning_rate": 4.9993852891181664e-06, + "loss": 0.6785, + "step": 243 + }, + { + "epoch": 0.03595284872298625, + "grad_norm": 0.6018003821372986, + "learning_rate": 4.9993766620610315e-06, + "loss": 0.7185, + "step": 244 + }, + { + "epoch": 0.03610019646365423, + "grad_norm": 0.5651373267173767, + "learning_rate": 4.99936797489506e-06, + "loss": 0.6784, + "step": 245 + }, + { + "epoch": 0.0362475442043222, + "grad_norm": 0.5908582806587219, + "learning_rate": 4.999359227620461e-06, + "loss": 0.6624, + "step": 246 + }, + { + "epoch": 0.036394891944990175, + "grad_norm": 0.5711154937744141, + "learning_rate": 4.999350420237445e-06, + "loss": 0.6668, + "step": 247 + }, + { + "epoch": 0.03654223968565815, + "grad_norm": 0.5989816188812256, + "learning_rate": 4.999341552746223e-06, + "loss": 0.62, + "step": 248 + }, + { + "epoch": 0.03668958742632613, + "grad_norm": 0.643564760684967, + "learning_rate": 4.99933262514701e-06, + "loss": 0.6841, + "step": 249 + }, + { + "epoch": 0.03683693516699411, + "grad_norm": 0.5998784899711609, + "learning_rate": 4.999323637440019e-06, + "loss": 0.664, + "step": 250 + }, + { + "epoch": 0.036984282907662085, + "grad_norm": 0.5781920552253723, + "learning_rate": 4.999314589625467e-06, + "loss": 0.677, + "step": 251 + }, + { + "epoch": 0.03713163064833006, + "grad_norm": 0.5951316356658936, + "learning_rate": 4.999305481703571e-06, + "loss": 0.658, + "step": 252 + }, + { + "epoch": 0.03727897838899803, + "grad_norm": 0.5762894749641418, + "learning_rate": 4.999296313674551e-06, + "loss": 0.7223, + "step": 253 + }, + { + "epoch": 0.03742632612966601, + "grad_norm": 0.5917654037475586, + "learning_rate": 4.999287085538626e-06, + "loss": 0.6768, + "step": 254 + }, + { + "epoch": 0.03757367387033399, + "grad_norm": 0.6123010516166687, + "learning_rate": 4.99927779729602e-06, + "loss": 0.6987, + "step": 255 + }, + { + "epoch": 0.037721021611001965, + "grad_norm": 0.5924208760261536, + "learning_rate": 4.999268448946954e-06, + "loss": 0.7195, + "step": 256 + }, + { + "epoch": 0.03786836935166994, + "grad_norm": 0.5820088982582092, + "learning_rate": 4.9992590404916546e-06, + "loss": 0.6448, + "step": 257 + }, + { + "epoch": 0.03801571709233792, + "grad_norm": 0.5957354307174683, + "learning_rate": 4.999249571930348e-06, + "loss": 0.6494, + "step": 258 + }, + { + "epoch": 0.0381630648330059, + "grad_norm": 0.6102159023284912, + "learning_rate": 4.99924004326326e-06, + "loss": 0.6573, + "step": 259 + }, + { + "epoch": 0.03831041257367387, + "grad_norm": 0.5938486456871033, + "learning_rate": 4.999230454490622e-06, + "loss": 0.6987, + "step": 260 + }, + { + "epoch": 0.038457760314341845, + "grad_norm": 0.6142421960830688, + "learning_rate": 4.999220805612664e-06, + "loss": 0.6848, + "step": 261 + }, + { + "epoch": 0.03860510805500982, + "grad_norm": 0.5728561878204346, + "learning_rate": 4.999211096629618e-06, + "loss": 0.6724, + "step": 262 + }, + { + "epoch": 0.0387524557956778, + "grad_norm": 0.6079733371734619, + "learning_rate": 4.999201327541717e-06, + "loss": 0.667, + "step": 263 + }, + { + "epoch": 0.03889980353634578, + "grad_norm": 0.5763253569602966, + "learning_rate": 4.999191498349196e-06, + "loss": 0.7063, + "step": 264 + }, + { + "epoch": 0.039047151277013754, + "grad_norm": 0.583901047706604, + "learning_rate": 4.9991816090522924e-06, + "loss": 0.681, + "step": 265 + }, + { + "epoch": 0.03919449901768173, + "grad_norm": 0.6192635297775269, + "learning_rate": 4.999171659651242e-06, + "loss": 0.668, + "step": 266 + }, + { + "epoch": 0.0393418467583497, + "grad_norm": 0.6041098833084106, + "learning_rate": 4.999161650146287e-06, + "loss": 0.6876, + "step": 267 + }, + { + "epoch": 0.03948919449901768, + "grad_norm": 0.5708252191543579, + "learning_rate": 4.999151580537666e-06, + "loss": 0.6635, + "step": 268 + }, + { + "epoch": 0.03963654223968566, + "grad_norm": 0.5858175754547119, + "learning_rate": 4.999141450825621e-06, + "loss": 0.6737, + "step": 269 + }, + { + "epoch": 0.039783889980353634, + "grad_norm": 0.5576506853103638, + "learning_rate": 4.999131261010396e-06, + "loss": 0.6853, + "step": 270 + }, + { + "epoch": 0.03993123772102161, + "grad_norm": 0.5891148447990417, + "learning_rate": 4.999121011092237e-06, + "loss": 0.694, + "step": 271 + }, + { + "epoch": 0.04007858546168959, + "grad_norm": 0.5862632393836975, + "learning_rate": 4.99911070107139e-06, + "loss": 0.6948, + "step": 272 + }, + { + "epoch": 0.04022593320235757, + "grad_norm": 0.6119864583015442, + "learning_rate": 4.999100330948102e-06, + "loss": 0.7091, + "step": 273 + }, + { + "epoch": 0.04037328094302554, + "grad_norm": 0.593101978302002, + "learning_rate": 4.9990899007226245e-06, + "loss": 0.6682, + "step": 274 + }, + { + "epoch": 0.040520628683693515, + "grad_norm": 0.5942928194999695, + "learning_rate": 4.999079410395206e-06, + "loss": 0.6646, + "step": 275 + }, + { + "epoch": 0.04066797642436149, + "grad_norm": 0.5898528695106506, + "learning_rate": 4.9990688599661e-06, + "loss": 0.7039, + "step": 276 + }, + { + "epoch": 0.04081532416502947, + "grad_norm": 0.5815855264663696, + "learning_rate": 4.99905824943556e-06, + "loss": 0.6695, + "step": 277 + }, + { + "epoch": 0.04096267190569745, + "grad_norm": 0.5760469436645508, + "learning_rate": 4.999047578803841e-06, + "loss": 0.6911, + "step": 278 + }, + { + "epoch": 0.041110019646365424, + "grad_norm": 0.5940120220184326, + "learning_rate": 4.999036848071201e-06, + "loss": 0.6722, + "step": 279 + }, + { + "epoch": 0.0412573673870334, + "grad_norm": 0.560375988483429, + "learning_rate": 4.999026057237896e-06, + "loss": 0.6491, + "step": 280 + }, + { + "epoch": 0.04140471512770137, + "grad_norm": 0.579648494720459, + "learning_rate": 4.999015206304187e-06, + "loss": 0.6931, + "step": 281 + }, + { + "epoch": 0.04155206286836935, + "grad_norm": 0.5718643665313721, + "learning_rate": 4.999004295270335e-06, + "loss": 0.6582, + "step": 282 + }, + { + "epoch": 0.04169941060903733, + "grad_norm": 0.6174735426902771, + "learning_rate": 4.998993324136599e-06, + "loss": 0.699, + "step": 283 + }, + { + "epoch": 0.041846758349705304, + "grad_norm": 0.6282663941383362, + "learning_rate": 4.9989822929032485e-06, + "loss": 0.6814, + "step": 284 + }, + { + "epoch": 0.04199410609037328, + "grad_norm": 0.6392083168029785, + "learning_rate": 4.998971201570545e-06, + "loss": 0.6562, + "step": 285 + }, + { + "epoch": 0.04214145383104126, + "grad_norm": 0.5875017046928406, + "learning_rate": 4.9989600501387555e-06, + "loss": 0.6685, + "step": 286 + }, + { + "epoch": 0.042288801571709236, + "grad_norm": 0.5908867120742798, + "learning_rate": 4.998948838608149e-06, + "loss": 0.6602, + "step": 287 + }, + { + "epoch": 0.04243614931237721, + "grad_norm": 0.605904757976532, + "learning_rate": 4.998937566978996e-06, + "loss": 0.7002, + "step": 288 + }, + { + "epoch": 0.042583497053045184, + "grad_norm": 0.5994600653648376, + "learning_rate": 4.998926235251566e-06, + "loss": 0.6716, + "step": 289 + }, + { + "epoch": 0.04273084479371316, + "grad_norm": 0.5810460448265076, + "learning_rate": 4.998914843426132e-06, + "loss": 0.688, + "step": 290 + }, + { + "epoch": 0.04287819253438114, + "grad_norm": 0.5910282135009766, + "learning_rate": 4.998903391502969e-06, + "loss": 0.6953, + "step": 291 + }, + { + "epoch": 0.043025540275049116, + "grad_norm": 0.5942459106445312, + "learning_rate": 4.99889187948235e-06, + "loss": 0.6643, + "step": 292 + }, + { + "epoch": 0.043172888015717094, + "grad_norm": 0.6052362322807312, + "learning_rate": 4.998880307364555e-06, + "loss": 0.6489, + "step": 293 + }, + { + "epoch": 0.04332023575638507, + "grad_norm": 0.5574461817741394, + "learning_rate": 4.998868675149859e-06, + "loss": 0.6845, + "step": 294 + }, + { + "epoch": 0.04346758349705305, + "grad_norm": 0.5911553502082825, + "learning_rate": 4.9988569828385445e-06, + "loss": 0.6815, + "step": 295 + }, + { + "epoch": 0.04361493123772102, + "grad_norm": 0.5817576050758362, + "learning_rate": 4.998845230430891e-06, + "loss": 0.6991, + "step": 296 + }, + { + "epoch": 0.043762278978388996, + "grad_norm": 0.6104317307472229, + "learning_rate": 4.9988334179271825e-06, + "loss": 0.6839, + "step": 297 + }, + { + "epoch": 0.043909626719056974, + "grad_norm": 0.6044988036155701, + "learning_rate": 4.998821545327702e-06, + "loss": 0.6641, + "step": 298 + }, + { + "epoch": 0.04405697445972495, + "grad_norm": 0.6226392388343811, + "learning_rate": 4.9988096126327345e-06, + "loss": 0.7124, + "step": 299 + }, + { + "epoch": 0.04420432220039293, + "grad_norm": 0.5848937630653381, + "learning_rate": 4.998797619842569e-06, + "loss": 0.6877, + "step": 300 + }, + { + "epoch": 0.044351669941060906, + "grad_norm": 0.5567523241043091, + "learning_rate": 4.998785566957493e-06, + "loss": 0.6986, + "step": 301 + }, + { + "epoch": 0.044499017681728883, + "grad_norm": 0.6181151270866394, + "learning_rate": 4.998773453977794e-06, + "loss": 0.6734, + "step": 302 + }, + { + "epoch": 0.044646365422396854, + "grad_norm": 0.6389240622520447, + "learning_rate": 4.998761280903767e-06, + "loss": 0.6618, + "step": 303 + }, + { + "epoch": 0.04479371316306483, + "grad_norm": 0.5885164141654968, + "learning_rate": 4.998749047735703e-06, + "loss": 0.6928, + "step": 304 + }, + { + "epoch": 0.04494106090373281, + "grad_norm": 0.6291882991790771, + "learning_rate": 4.9987367544738965e-06, + "loss": 0.6441, + "step": 305 + }, + { + "epoch": 0.045088408644400786, + "grad_norm": 0.5940127968788147, + "learning_rate": 4.998724401118643e-06, + "loss": 0.6506, + "step": 306 + }, + { + "epoch": 0.045235756385068764, + "grad_norm": 0.598956823348999, + "learning_rate": 4.99871198767024e-06, + "loss": 0.6537, + "step": 307 + }, + { + "epoch": 0.04538310412573674, + "grad_norm": 0.5739148259162903, + "learning_rate": 4.998699514128985e-06, + "loss": 0.6512, + "step": 308 + }, + { + "epoch": 0.04553045186640472, + "grad_norm": 0.6061959266662598, + "learning_rate": 4.998686980495179e-06, + "loss": 0.6954, + "step": 309 + }, + { + "epoch": 0.04567779960707269, + "grad_norm": 0.5968853831291199, + "learning_rate": 4.998674386769122e-06, + "loss": 0.6832, + "step": 310 + }, + { + "epoch": 0.045825147347740666, + "grad_norm": 0.6077551245689392, + "learning_rate": 4.998661732951119e-06, + "loss": 0.6856, + "step": 311 + }, + { + "epoch": 0.045972495088408644, + "grad_norm": 0.5952416062355042, + "learning_rate": 4.998649019041474e-06, + "loss": 0.6669, + "step": 312 + }, + { + "epoch": 0.04611984282907662, + "grad_norm": 0.5807070136070251, + "learning_rate": 4.99863624504049e-06, + "loss": 0.6884, + "step": 313 + }, + { + "epoch": 0.0462671905697446, + "grad_norm": 0.6058803796768188, + "learning_rate": 4.998623410948478e-06, + "loss": 0.6562, + "step": 314 + }, + { + "epoch": 0.046414538310412576, + "grad_norm": 0.5954038500785828, + "learning_rate": 4.9986105167657435e-06, + "loss": 0.6951, + "step": 315 + }, + { + "epoch": 0.04656188605108055, + "grad_norm": 0.6376572847366333, + "learning_rate": 4.9985975624926e-06, + "loss": 0.6444, + "step": 316 + }, + { + "epoch": 0.046709233791748524, + "grad_norm": 0.6021691560745239, + "learning_rate": 4.998584548129355e-06, + "loss": 0.6543, + "step": 317 + }, + { + "epoch": 0.0468565815324165, + "grad_norm": 0.6379029750823975, + "learning_rate": 4.9985714736763245e-06, + "loss": 0.6467, + "step": 318 + }, + { + "epoch": 0.04700392927308448, + "grad_norm": 0.5899917483329773, + "learning_rate": 4.998558339133822e-06, + "loss": 0.63, + "step": 319 + }, + { + "epoch": 0.047151277013752456, + "grad_norm": 0.5967628359794617, + "learning_rate": 4.998545144502163e-06, + "loss": 0.715, + "step": 320 + }, + { + "epoch": 0.04729862475442043, + "grad_norm": 0.58378666639328, + "learning_rate": 4.998531889781665e-06, + "loss": 0.6539, + "step": 321 + }, + { + "epoch": 0.04744597249508841, + "grad_norm": 0.6415468454360962, + "learning_rate": 4.998518574972647e-06, + "loss": 0.6635, + "step": 322 + }, + { + "epoch": 0.04759332023575639, + "grad_norm": 0.5816906690597534, + "learning_rate": 4.99850520007543e-06, + "loss": 0.6683, + "step": 323 + }, + { + "epoch": 0.04774066797642436, + "grad_norm": 0.5543171763420105, + "learning_rate": 4.9984917650903345e-06, + "loss": 0.61, + "step": 324 + }, + { + "epoch": 0.047888015717092336, + "grad_norm": 0.6226346492767334, + "learning_rate": 4.998478270017683e-06, + "loss": 0.6606, + "step": 325 + }, + { + "epoch": 0.04803536345776031, + "grad_norm": 0.597787082195282, + "learning_rate": 4.9984647148578015e-06, + "loss": 0.6705, + "step": 326 + }, + { + "epoch": 0.04818271119842829, + "grad_norm": 0.6018844842910767, + "learning_rate": 4.998451099611016e-06, + "loss": 0.6584, + "step": 327 + }, + { + "epoch": 0.04833005893909627, + "grad_norm": 0.5828070640563965, + "learning_rate": 4.998437424277652e-06, + "loss": 0.6976, + "step": 328 + }, + { + "epoch": 0.048477406679764246, + "grad_norm": 0.5907812118530273, + "learning_rate": 4.998423688858041e-06, + "loss": 0.7091, + "step": 329 + }, + { + "epoch": 0.04862475442043222, + "grad_norm": 0.6007378697395325, + "learning_rate": 4.998409893352512e-06, + "loss": 0.6848, + "step": 330 + }, + { + "epoch": 0.04877210216110019, + "grad_norm": 0.6577190160751343, + "learning_rate": 4.998396037761397e-06, + "loss": 0.6837, + "step": 331 + }, + { + "epoch": 0.04891944990176817, + "grad_norm": 0.5950509309768677, + "learning_rate": 4.998382122085029e-06, + "loss": 0.6658, + "step": 332 + }, + { + "epoch": 0.04906679764243615, + "grad_norm": 0.6271317005157471, + "learning_rate": 4.9983681463237425e-06, + "loss": 0.6799, + "step": 333 + }, + { + "epoch": 0.049214145383104126, + "grad_norm": 0.6022440791130066, + "learning_rate": 4.998354110477874e-06, + "loss": 0.6681, + "step": 334 + }, + { + "epoch": 0.0493614931237721, + "grad_norm": 0.630835771560669, + "learning_rate": 4.998340014547761e-06, + "loss": 0.641, + "step": 335 + }, + { + "epoch": 0.04950884086444008, + "grad_norm": 0.554517924785614, + "learning_rate": 4.998325858533742e-06, + "loss": 0.6522, + "step": 336 + }, + { + "epoch": 0.04965618860510806, + "grad_norm": 0.5936357378959656, + "learning_rate": 4.998311642436157e-06, + "loss": 0.6461, + "step": 337 + }, + { + "epoch": 0.04980353634577603, + "grad_norm": 0.6474644541740417, + "learning_rate": 4.998297366255352e-06, + "loss": 0.7013, + "step": 338 + }, + { + "epoch": 0.049950884086444006, + "grad_norm": 0.6061030030250549, + "learning_rate": 4.998283029991664e-06, + "loss": 0.6894, + "step": 339 + }, + { + "epoch": 0.05009823182711198, + "grad_norm": 0.7220948934555054, + "learning_rate": 4.998268633645443e-06, + "loss": 0.63, + "step": 340 + }, + { + "epoch": 0.05024557956777996, + "grad_norm": 0.600858211517334, + "learning_rate": 4.998254177217031e-06, + "loss": 0.6666, + "step": 341 + }, + { + "epoch": 0.05039292730844794, + "grad_norm": 0.5898813009262085, + "learning_rate": 4.99823966070678e-06, + "loss": 0.6379, + "step": 342 + }, + { + "epoch": 0.050540275049115915, + "grad_norm": 0.5691763758659363, + "learning_rate": 4.9982250841150345e-06, + "loss": 0.7072, + "step": 343 + }, + { + "epoch": 0.05068762278978389, + "grad_norm": 0.5641574263572693, + "learning_rate": 4.99821044744215e-06, + "loss": 0.6486, + "step": 344 + }, + { + "epoch": 0.05083497053045186, + "grad_norm": 0.6161054968833923, + "learning_rate": 4.998195750688474e-06, + "loss": 0.6635, + "step": 345 + }, + { + "epoch": 0.05098231827111984, + "grad_norm": 0.602353036403656, + "learning_rate": 4.998180993854363e-06, + "loss": 0.6824, + "step": 346 + }, + { + "epoch": 0.05112966601178782, + "grad_norm": 0.589952826499939, + "learning_rate": 4.99816617694017e-06, + "loss": 0.6735, + "step": 347 + }, + { + "epoch": 0.051277013752455795, + "grad_norm": 0.6031582951545715, + "learning_rate": 4.998151299946253e-06, + "loss": 0.69, + "step": 348 + }, + { + "epoch": 0.05142436149312377, + "grad_norm": 0.6330375075340271, + "learning_rate": 4.998136362872968e-06, + "loss": 0.6948, + "step": 349 + }, + { + "epoch": 0.05157170923379175, + "grad_norm": 0.5920854806900024, + "learning_rate": 4.998121365720676e-06, + "loss": 0.6694, + "step": 350 + }, + { + "epoch": 0.05171905697445973, + "grad_norm": 0.613947868347168, + "learning_rate": 4.9981063084897376e-06, + "loss": 0.6642, + "step": 351 + }, + { + "epoch": 0.0518664047151277, + "grad_norm": 0.609421968460083, + "learning_rate": 4.998091191180513e-06, + "loss": 0.6742, + "step": 352 + }, + { + "epoch": 0.052013752455795675, + "grad_norm": 0.6022937297821045, + "learning_rate": 4.998076013793367e-06, + "loss": 0.6631, + "step": 353 + }, + { + "epoch": 0.05216110019646365, + "grad_norm": 0.5838522911071777, + "learning_rate": 4.9980607763286645e-06, + "loss": 0.6826, + "step": 354 + }, + { + "epoch": 0.05230844793713163, + "grad_norm": 0.6003016829490662, + "learning_rate": 4.998045478786772e-06, + "loss": 0.6584, + "step": 355 + }, + { + "epoch": 0.05245579567779961, + "grad_norm": 0.5765702724456787, + "learning_rate": 4.998030121168058e-06, + "loss": 0.6518, + "step": 356 + }, + { + "epoch": 0.052603143418467585, + "grad_norm": 0.575115978717804, + "learning_rate": 4.998014703472891e-06, + "loss": 0.6645, + "step": 357 + }, + { + "epoch": 0.05275049115913556, + "grad_norm": 0.5583890676498413, + "learning_rate": 4.997999225701641e-06, + "loss": 0.6539, + "step": 358 + }, + { + "epoch": 0.05289783889980354, + "grad_norm": 0.5996747016906738, + "learning_rate": 4.9979836878546826e-06, + "loss": 0.681, + "step": 359 + }, + { + "epoch": 0.05304518664047151, + "grad_norm": 0.5891256928443909, + "learning_rate": 4.997968089932387e-06, + "loss": 0.6463, + "step": 360 + }, + { + "epoch": 0.05319253438113949, + "grad_norm": 0.5876609086990356, + "learning_rate": 4.997952431935131e-06, + "loss": 0.6794, + "step": 361 + }, + { + "epoch": 0.053339882121807465, + "grad_norm": 0.5680165886878967, + "learning_rate": 4.997936713863291e-06, + "loss": 0.6314, + "step": 362 + }, + { + "epoch": 0.05348722986247544, + "grad_norm": 0.5790871381759644, + "learning_rate": 4.997920935717244e-06, + "loss": 0.6702, + "step": 363 + }, + { + "epoch": 0.05363457760314342, + "grad_norm": 0.617107093334198, + "learning_rate": 4.997905097497371e-06, + "loss": 0.6552, + "step": 364 + }, + { + "epoch": 0.0537819253438114, + "grad_norm": 0.6010002493858337, + "learning_rate": 4.997889199204051e-06, + "loss": 0.6572, + "step": 365 + }, + { + "epoch": 0.053929273084479375, + "grad_norm": 0.6206546425819397, + "learning_rate": 4.997873240837668e-06, + "loss": 0.658, + "step": 366 + }, + { + "epoch": 0.054076620825147345, + "grad_norm": 0.6228324174880981, + "learning_rate": 4.9978572223986045e-06, + "loss": 0.6516, + "step": 367 + }, + { + "epoch": 0.05422396856581532, + "grad_norm": 0.6169202923774719, + "learning_rate": 4.997841143887246e-06, + "loss": 0.671, + "step": 368 + }, + { + "epoch": 0.0543713163064833, + "grad_norm": 0.6146575808525085, + "learning_rate": 4.997825005303981e-06, + "loss": 0.662, + "step": 369 + }, + { + "epoch": 0.05451866404715128, + "grad_norm": 0.6172051429748535, + "learning_rate": 4.9978088066491955e-06, + "loss": 0.6555, + "step": 370 + }, + { + "epoch": 0.054666011787819255, + "grad_norm": 0.6059294939041138, + "learning_rate": 4.9977925479232795e-06, + "loss": 0.6588, + "step": 371 + }, + { + "epoch": 0.05481335952848723, + "grad_norm": 0.6294935941696167, + "learning_rate": 4.997776229126624e-06, + "loss": 0.7043, + "step": 372 + }, + { + "epoch": 0.05496070726915521, + "grad_norm": 0.5895375609397888, + "learning_rate": 4.997759850259623e-06, + "loss": 0.6544, + "step": 373 + }, + { + "epoch": 0.05510805500982318, + "grad_norm": 0.6043909192085266, + "learning_rate": 4.9977434113226675e-06, + "loss": 0.6721, + "step": 374 + }, + { + "epoch": 0.05525540275049116, + "grad_norm": 0.6365818977355957, + "learning_rate": 4.997726912316156e-06, + "loss": 0.6965, + "step": 375 + }, + { + "epoch": 0.055402750491159135, + "grad_norm": 0.5799818634986877, + "learning_rate": 4.997710353240482e-06, + "loss": 0.677, + "step": 376 + }, + { + "epoch": 0.05555009823182711, + "grad_norm": 0.6589671969413757, + "learning_rate": 4.997693734096047e-06, + "loss": 0.6288, + "step": 377 + }, + { + "epoch": 0.05569744597249509, + "grad_norm": 0.5922634601593018, + "learning_rate": 4.997677054883249e-06, + "loss": 0.6616, + "step": 378 + }, + { + "epoch": 0.05584479371316307, + "grad_norm": 0.6311843991279602, + "learning_rate": 4.997660315602488e-06, + "loss": 0.6818, + "step": 379 + }, + { + "epoch": 0.055992141453831044, + "grad_norm": 0.5927177667617798, + "learning_rate": 4.997643516254169e-06, + "loss": 0.6682, + "step": 380 + }, + { + "epoch": 0.056139489194499015, + "grad_norm": 0.6145800352096558, + "learning_rate": 4.997626656838695e-06, + "loss": 0.6658, + "step": 381 + }, + { + "epoch": 0.05628683693516699, + "grad_norm": 0.5965235233306885, + "learning_rate": 4.9976097373564716e-06, + "loss": 0.6607, + "step": 382 + }, + { + "epoch": 0.05643418467583497, + "grad_norm": 0.6150085926055908, + "learning_rate": 4.997592757807904e-06, + "loss": 0.7027, + "step": 383 + }, + { + "epoch": 0.05658153241650295, + "grad_norm": 0.6739271879196167, + "learning_rate": 4.9975757181934035e-06, + "loss": 0.6444, + "step": 384 + }, + { + "epoch": 0.056728880157170924, + "grad_norm": 0.6161174178123474, + "learning_rate": 4.997558618513378e-06, + "loss": 0.667, + "step": 385 + }, + { + "epoch": 0.0568762278978389, + "grad_norm": 0.6295338869094849, + "learning_rate": 4.997541458768239e-06, + "loss": 0.6667, + "step": 386 + }, + { + "epoch": 0.05702357563850688, + "grad_norm": 0.5898357629776001, + "learning_rate": 4.9975242389584e-06, + "loss": 0.6725, + "step": 387 + }, + { + "epoch": 0.05717092337917485, + "grad_norm": 0.5747357606887817, + "learning_rate": 4.997506959084273e-06, + "loss": 0.6585, + "step": 388 + }, + { + "epoch": 0.05731827111984283, + "grad_norm": 0.5996381044387817, + "learning_rate": 4.997489619146276e-06, + "loss": 0.6568, + "step": 389 + }, + { + "epoch": 0.057465618860510805, + "grad_norm": 0.5946488380432129, + "learning_rate": 4.997472219144826e-06, + "loss": 0.6761, + "step": 390 + }, + { + "epoch": 0.05761296660117878, + "grad_norm": 0.6659935116767883, + "learning_rate": 4.997454759080339e-06, + "loss": 0.63, + "step": 391 + }, + { + "epoch": 0.05776031434184676, + "grad_norm": 0.6300572752952576, + "learning_rate": 4.997437238953237e-06, + "loss": 0.6502, + "step": 392 + }, + { + "epoch": 0.05790766208251474, + "grad_norm": 0.6219966411590576, + "learning_rate": 4.9974196587639404e-06, + "loss": 0.6808, + "step": 393 + }, + { + "epoch": 0.058055009823182714, + "grad_norm": 0.6138851046562195, + "learning_rate": 4.997402018512873e-06, + "loss": 0.6814, + "step": 394 + }, + { + "epoch": 0.058202357563850685, + "grad_norm": 0.6145579814910889, + "learning_rate": 4.997384318200458e-06, + "loss": 0.6329, + "step": 395 + }, + { + "epoch": 0.05834970530451866, + "grad_norm": 0.627988874912262, + "learning_rate": 4.997366557827121e-06, + "loss": 0.6818, + "step": 396 + }, + { + "epoch": 0.05849705304518664, + "grad_norm": 0.6087729930877686, + "learning_rate": 4.997348737393291e-06, + "loss": 0.6736, + "step": 397 + }, + { + "epoch": 0.05864440078585462, + "grad_norm": 0.6030706167221069, + "learning_rate": 4.997330856899394e-06, + "loss": 0.6618, + "step": 398 + }, + { + "epoch": 0.058791748526522594, + "grad_norm": 0.5748167634010315, + "learning_rate": 4.997312916345861e-06, + "loss": 0.6469, + "step": 399 + }, + { + "epoch": 0.05893909626719057, + "grad_norm": 0.5941494703292847, + "learning_rate": 4.997294915733124e-06, + "loss": 0.6678, + "step": 400 + }, + { + "epoch": 0.05908644400785855, + "grad_norm": 0.5932667255401611, + "learning_rate": 4.9972768550616166e-06, + "loss": 0.6579, + "step": 401 + }, + { + "epoch": 0.05923379174852652, + "grad_norm": 0.5876591205596924, + "learning_rate": 4.997258734331771e-06, + "loss": 0.6074, + "step": 402 + }, + { + "epoch": 0.0593811394891945, + "grad_norm": 0.5839720964431763, + "learning_rate": 4.997240553544024e-06, + "loss": 0.6446, + "step": 403 + }, + { + "epoch": 0.059528487229862474, + "grad_norm": 0.5958043932914734, + "learning_rate": 4.997222312698814e-06, + "loss": 0.6614, + "step": 404 + }, + { + "epoch": 0.05967583497053045, + "grad_norm": 0.6068096160888672, + "learning_rate": 4.997204011796578e-06, + "loss": 0.6829, + "step": 405 + }, + { + "epoch": 0.05982318271119843, + "grad_norm": 0.6012946963310242, + "learning_rate": 4.997185650837757e-06, + "loss": 0.6771, + "step": 406 + }, + { + "epoch": 0.059970530451866406, + "grad_norm": 0.5931991338729858, + "learning_rate": 4.9971672298227915e-06, + "loss": 0.6874, + "step": 407 + }, + { + "epoch": 0.060117878192534384, + "grad_norm": 0.5784168243408203, + "learning_rate": 4.9971487487521265e-06, + "loss": 0.6642, + "step": 408 + }, + { + "epoch": 0.060265225933202354, + "grad_norm": 0.5884780287742615, + "learning_rate": 4.9971302076262055e-06, + "loss": 0.6498, + "step": 409 + }, + { + "epoch": 0.06041257367387033, + "grad_norm": 0.5942254662513733, + "learning_rate": 4.997111606445472e-06, + "loss": 0.6573, + "step": 410 + }, + { + "epoch": 0.06055992141453831, + "grad_norm": 0.5765435695648193, + "learning_rate": 4.997092945210378e-06, + "loss": 0.6531, + "step": 411 + }, + { + "epoch": 0.060707269155206287, + "grad_norm": 0.5869132280349731, + "learning_rate": 4.99707422392137e-06, + "loss": 0.68, + "step": 412 + }, + { + "epoch": 0.060854616895874264, + "grad_norm": 0.5849117636680603, + "learning_rate": 4.997055442578896e-06, + "loss": 0.6383, + "step": 413 + }, + { + "epoch": 0.06100196463654224, + "grad_norm": 0.5986029505729675, + "learning_rate": 4.997036601183411e-06, + "loss": 0.6821, + "step": 414 + }, + { + "epoch": 0.06114931237721022, + "grad_norm": 0.5985538363456726, + "learning_rate": 4.997017699735367e-06, + "loss": 0.6637, + "step": 415 + }, + { + "epoch": 0.06129666011787819, + "grad_norm": 0.613088846206665, + "learning_rate": 4.9969987382352186e-06, + "loss": 0.6727, + "step": 416 + }, + { + "epoch": 0.06144400785854617, + "grad_norm": 0.6429189443588257, + "learning_rate": 4.996979716683422e-06, + "loss": 0.6352, + "step": 417 + }, + { + "epoch": 0.061591355599214144, + "grad_norm": 0.6279242634773254, + "learning_rate": 4.996960635080434e-06, + "loss": 0.6414, + "step": 418 + }, + { + "epoch": 0.06173870333988212, + "grad_norm": 0.5868019461631775, + "learning_rate": 4.996941493426713e-06, + "loss": 0.6596, + "step": 419 + }, + { + "epoch": 0.0618860510805501, + "grad_norm": 0.5683985352516174, + "learning_rate": 4.996922291722721e-06, + "loss": 0.6344, + "step": 420 + }, + { + "epoch": 0.062033398821218076, + "grad_norm": 0.6037581562995911, + "learning_rate": 4.9969030299689196e-06, + "loss": 0.6856, + "step": 421 + }, + { + "epoch": 0.062180746561886054, + "grad_norm": 0.6367250084877014, + "learning_rate": 4.9968837081657704e-06, + "loss": 0.683, + "step": 422 + }, + { + "epoch": 0.062328094302554024, + "grad_norm": 0.6199342608451843, + "learning_rate": 4.996864326313739e-06, + "loss": 0.6596, + "step": 423 + }, + { + "epoch": 0.062475442043222, + "grad_norm": 0.6653918027877808, + "learning_rate": 4.996844884413293e-06, + "loss": 0.6852, + "step": 424 + }, + { + "epoch": 0.06262278978388998, + "grad_norm": 0.5910289883613586, + "learning_rate": 4.996825382464897e-06, + "loss": 0.6374, + "step": 425 + }, + { + "epoch": 0.06277013752455796, + "grad_norm": 0.5699657201766968, + "learning_rate": 4.996805820469023e-06, + "loss": 0.65, + "step": 426 + }, + { + "epoch": 0.06291748526522593, + "grad_norm": 0.5861983895301819, + "learning_rate": 4.996786198426139e-06, + "loss": 0.6754, + "step": 427 + }, + { + "epoch": 0.06306483300589391, + "grad_norm": 0.5766420960426331, + "learning_rate": 4.996766516336719e-06, + "loss": 0.6186, + "step": 428 + }, + { + "epoch": 0.06321218074656189, + "grad_norm": 0.590089738368988, + "learning_rate": 4.996746774201235e-06, + "loss": 0.6175, + "step": 429 + }, + { + "epoch": 0.06335952848722987, + "grad_norm": 0.6041563153266907, + "learning_rate": 4.996726972020162e-06, + "loss": 0.6503, + "step": 430 + }, + { + "epoch": 0.06350687622789784, + "grad_norm": 0.593813419342041, + "learning_rate": 4.996707109793976e-06, + "loss": 0.6378, + "step": 431 + }, + { + "epoch": 0.06365422396856582, + "grad_norm": 0.6274111270904541, + "learning_rate": 4.996687187523156e-06, + "loss": 0.6593, + "step": 432 + }, + { + "epoch": 0.0638015717092338, + "grad_norm": 0.5725140571594238, + "learning_rate": 4.996667205208179e-06, + "loss": 0.6147, + "step": 433 + }, + { + "epoch": 0.06394891944990176, + "grad_norm": 0.6207109689712524, + "learning_rate": 4.996647162849527e-06, + "loss": 0.6375, + "step": 434 + }, + { + "epoch": 0.06409626719056974, + "grad_norm": 0.6229543685913086, + "learning_rate": 4.996627060447682e-06, + "loss": 0.6621, + "step": 435 + }, + { + "epoch": 0.06424361493123772, + "grad_norm": 0.6477464437484741, + "learning_rate": 4.996606898003128e-06, + "loss": 0.6882, + "step": 436 + }, + { + "epoch": 0.0643909626719057, + "grad_norm": 0.6218327283859253, + "learning_rate": 4.996586675516348e-06, + "loss": 0.6692, + "step": 437 + }, + { + "epoch": 0.06453831041257367, + "grad_norm": 0.590770423412323, + "learning_rate": 4.996566392987829e-06, + "loss": 0.6621, + "step": 438 + }, + { + "epoch": 0.06468565815324165, + "grad_norm": 0.5729672312736511, + "learning_rate": 4.996546050418061e-06, + "loss": 0.6499, + "step": 439 + }, + { + "epoch": 0.06483300589390963, + "grad_norm": 0.5959722995758057, + "learning_rate": 4.99652564780753e-06, + "loss": 0.6482, + "step": 440 + }, + { + "epoch": 0.0649803536345776, + "grad_norm": 0.5841032862663269, + "learning_rate": 4.996505185156728e-06, + "loss": 0.658, + "step": 441 + }, + { + "epoch": 0.06512770137524558, + "grad_norm": 0.5657989382743835, + "learning_rate": 4.996484662466148e-06, + "loss": 0.6642, + "step": 442 + }, + { + "epoch": 0.06527504911591356, + "grad_norm": 0.5938258767127991, + "learning_rate": 4.996464079736283e-06, + "loss": 0.6703, + "step": 443 + }, + { + "epoch": 0.06542239685658154, + "grad_norm": 0.6052440404891968, + "learning_rate": 4.9964434369676265e-06, + "loss": 0.6619, + "step": 444 + }, + { + "epoch": 0.06556974459724951, + "grad_norm": 0.6550309658050537, + "learning_rate": 4.996422734160678e-06, + "loss": 0.6436, + "step": 445 + }, + { + "epoch": 0.06571709233791749, + "grad_norm": 0.6265039443969727, + "learning_rate": 4.996401971315932e-06, + "loss": 0.6581, + "step": 446 + }, + { + "epoch": 0.06586444007858547, + "grad_norm": 0.6450071930885315, + "learning_rate": 4.99638114843389e-06, + "loss": 0.685, + "step": 447 + }, + { + "epoch": 0.06601178781925343, + "grad_norm": 0.6411169171333313, + "learning_rate": 4.996360265515052e-06, + "loss": 0.6318, + "step": 448 + }, + { + "epoch": 0.06615913555992141, + "grad_norm": 0.672988772392273, + "learning_rate": 4.996339322559921e-06, + "loss": 0.6719, + "step": 449 + }, + { + "epoch": 0.06630648330058939, + "grad_norm": 0.566007137298584, + "learning_rate": 4.996318319569e-06, + "loss": 0.6461, + "step": 450 + }, + { + "epoch": 0.06645383104125736, + "grad_norm": 0.6124153733253479, + "learning_rate": 4.996297256542794e-06, + "loss": 0.6633, + "step": 451 + }, + { + "epoch": 0.06660117878192534, + "grad_norm": 0.580081045627594, + "learning_rate": 4.99627613348181e-06, + "loss": 0.6485, + "step": 452 + }, + { + "epoch": 0.06674852652259332, + "grad_norm": 0.6150937080383301, + "learning_rate": 4.996254950386556e-06, + "loss": 0.6423, + "step": 453 + }, + { + "epoch": 0.0668958742632613, + "grad_norm": 0.6088070273399353, + "learning_rate": 4.996233707257542e-06, + "loss": 0.6877, + "step": 454 + }, + { + "epoch": 0.06704322200392927, + "grad_norm": 0.5807029604911804, + "learning_rate": 4.996212404095276e-06, + "loss": 0.6303, + "step": 455 + }, + { + "epoch": 0.06719056974459725, + "grad_norm": 0.5920756459236145, + "learning_rate": 4.996191040900274e-06, + "loss": 0.6816, + "step": 456 + }, + { + "epoch": 0.06733791748526523, + "grad_norm": 0.5994493961334229, + "learning_rate": 4.996169617673048e-06, + "loss": 0.6411, + "step": 457 + }, + { + "epoch": 0.0674852652259332, + "grad_norm": 0.591630756855011, + "learning_rate": 4.996148134414114e-06, + "loss": 0.6756, + "step": 458 + }, + { + "epoch": 0.06763261296660118, + "grad_norm": 0.6260467171669006, + "learning_rate": 4.996126591123988e-06, + "loss": 0.6131, + "step": 459 + }, + { + "epoch": 0.06777996070726916, + "grad_norm": 0.595569908618927, + "learning_rate": 4.996104987803188e-06, + "loss": 0.6775, + "step": 460 + }, + { + "epoch": 0.06792730844793714, + "grad_norm": 0.6080424785614014, + "learning_rate": 4.996083324452233e-06, + "loss": 0.6677, + "step": 461 + }, + { + "epoch": 0.06807465618860511, + "grad_norm": 0.6022981405258179, + "learning_rate": 4.996061601071646e-06, + "loss": 0.6409, + "step": 462 + }, + { + "epoch": 0.06822200392927308, + "grad_norm": 0.6034508943557739, + "learning_rate": 4.996039817661947e-06, + "loss": 0.643, + "step": 463 + }, + { + "epoch": 0.06836935166994106, + "grad_norm": 0.6168201565742493, + "learning_rate": 4.996017974223661e-06, + "loss": 0.6905, + "step": 464 + }, + { + "epoch": 0.06851669941060903, + "grad_norm": 0.6016014218330383, + "learning_rate": 4.995996070757315e-06, + "loss": 0.6636, + "step": 465 + }, + { + "epoch": 0.06866404715127701, + "grad_norm": 0.5759754180908203, + "learning_rate": 4.9959741072634335e-06, + "loss": 0.6674, + "step": 466 + }, + { + "epoch": 0.06881139489194499, + "grad_norm": 0.5912307500839233, + "learning_rate": 4.995952083742545e-06, + "loss": 0.6802, + "step": 467 + }, + { + "epoch": 0.06895874263261297, + "grad_norm": 0.5812605023384094, + "learning_rate": 4.9959300001951796e-06, + "loss": 0.6449, + "step": 468 + }, + { + "epoch": 0.06910609037328094, + "grad_norm": 0.5848585367202759, + "learning_rate": 4.995907856621869e-06, + "loss": 0.6767, + "step": 469 + }, + { + "epoch": 0.06925343811394892, + "grad_norm": 0.5818460583686829, + "learning_rate": 4.995885653023145e-06, + "loss": 0.6809, + "step": 470 + }, + { + "epoch": 0.0694007858546169, + "grad_norm": 0.6058306694030762, + "learning_rate": 4.99586338939954e-06, + "loss": 0.6749, + "step": 471 + }, + { + "epoch": 0.06954813359528488, + "grad_norm": 0.5630565881729126, + "learning_rate": 4.995841065751593e-06, + "loss": 0.5917, + "step": 472 + }, + { + "epoch": 0.06969548133595285, + "grad_norm": 0.5649656057357788, + "learning_rate": 4.995818682079838e-06, + "loss": 0.6616, + "step": 473 + }, + { + "epoch": 0.06984282907662083, + "grad_norm": 0.589773416519165, + "learning_rate": 4.995796238384815e-06, + "loss": 0.6598, + "step": 474 + }, + { + "epoch": 0.06999017681728881, + "grad_norm": 0.5579890012741089, + "learning_rate": 4.995773734667063e-06, + "loss": 0.6468, + "step": 475 + }, + { + "epoch": 0.07013752455795678, + "grad_norm": 0.5877634882926941, + "learning_rate": 4.9957511709271235e-06, + "loss": 0.6389, + "step": 476 + }, + { + "epoch": 0.07028487229862475, + "grad_norm": 0.5823347568511963, + "learning_rate": 4.995728547165538e-06, + "loss": 0.6591, + "step": 477 + }, + { + "epoch": 0.07043222003929273, + "grad_norm": 0.5953249335289001, + "learning_rate": 4.995705863382852e-06, + "loss": 0.6676, + "step": 478 + }, + { + "epoch": 0.0705795677799607, + "grad_norm": 0.6276386976242065, + "learning_rate": 4.995683119579611e-06, + "loss": 0.6685, + "step": 479 + }, + { + "epoch": 0.07072691552062868, + "grad_norm": 0.5877466797828674, + "learning_rate": 4.995660315756361e-06, + "loss": 0.6268, + "step": 480 + }, + { + "epoch": 0.07087426326129666, + "grad_norm": 0.5890880227088928, + "learning_rate": 4.995637451913651e-06, + "loss": 0.6594, + "step": 481 + }, + { + "epoch": 0.07102161100196464, + "grad_norm": 0.5890219211578369, + "learning_rate": 4.995614528052031e-06, + "loss": 0.6483, + "step": 482 + }, + { + "epoch": 0.07116895874263261, + "grad_norm": 0.6095407009124756, + "learning_rate": 4.995591544172052e-06, + "loss": 0.6425, + "step": 483 + }, + { + "epoch": 0.07131630648330059, + "grad_norm": 0.5910885334014893, + "learning_rate": 4.9955685002742665e-06, + "loss": 0.6637, + "step": 484 + }, + { + "epoch": 0.07146365422396857, + "grad_norm": 0.5859991908073425, + "learning_rate": 4.995545396359229e-06, + "loss": 0.6582, + "step": 485 + }, + { + "epoch": 0.07161100196463654, + "grad_norm": 0.6217613220214844, + "learning_rate": 4.995522232427496e-06, + "loss": 0.6321, + "step": 486 + }, + { + "epoch": 0.07175834970530452, + "grad_norm": 0.6090859174728394, + "learning_rate": 4.995499008479623e-06, + "loss": 0.6942, + "step": 487 + }, + { + "epoch": 0.0719056974459725, + "grad_norm": 0.5915296673774719, + "learning_rate": 4.99547572451617e-06, + "loss": 0.6393, + "step": 488 + }, + { + "epoch": 0.07205304518664048, + "grad_norm": 0.6014181971549988, + "learning_rate": 4.995452380537695e-06, + "loss": 0.6715, + "step": 489 + }, + { + "epoch": 0.07220039292730845, + "grad_norm": 0.5956104397773743, + "learning_rate": 4.995428976544762e-06, + "loss": 0.7121, + "step": 490 + }, + { + "epoch": 0.07234774066797642, + "grad_norm": 0.6248080134391785, + "learning_rate": 4.995405512537932e-06, + "loss": 0.6406, + "step": 491 + }, + { + "epoch": 0.0724950884086444, + "grad_norm": 0.6609901189804077, + "learning_rate": 4.99538198851777e-06, + "loss": 0.6808, + "step": 492 + }, + { + "epoch": 0.07264243614931237, + "grad_norm": 0.6172043681144714, + "learning_rate": 4.9953584044848425e-06, + "loss": 0.6492, + "step": 493 + }, + { + "epoch": 0.07278978388998035, + "grad_norm": 0.5769622921943665, + "learning_rate": 4.995334760439714e-06, + "loss": 0.6364, + "step": 494 + }, + { + "epoch": 0.07293713163064833, + "grad_norm": 0.5988051891326904, + "learning_rate": 4.995311056382956e-06, + "loss": 0.6569, + "step": 495 + }, + { + "epoch": 0.0730844793713163, + "grad_norm": 0.6307658553123474, + "learning_rate": 4.995287292315137e-06, + "loss": 0.6836, + "step": 496 + }, + { + "epoch": 0.07323182711198428, + "grad_norm": 0.6066113710403442, + "learning_rate": 4.99526346823683e-06, + "loss": 0.6284, + "step": 497 + }, + { + "epoch": 0.07337917485265226, + "grad_norm": 0.5992991328239441, + "learning_rate": 4.995239584148606e-06, + "loss": 0.6443, + "step": 498 + }, + { + "epoch": 0.07352652259332024, + "grad_norm": 0.584589958190918, + "learning_rate": 4.99521564005104e-06, + "loss": 0.6597, + "step": 499 + }, + { + "epoch": 0.07367387033398821, + "grad_norm": 0.636104941368103, + "learning_rate": 4.995191635944708e-06, + "loss": 0.6719, + "step": 500 + }, + { + "epoch": 0.07382121807465619, + "grad_norm": 0.6063581109046936, + "learning_rate": 4.995167571830188e-06, + "loss": 0.6397, + "step": 501 + }, + { + "epoch": 0.07396856581532417, + "grad_norm": 0.6483521461486816, + "learning_rate": 4.995143447708059e-06, + "loss": 0.637, + "step": 502 + }, + { + "epoch": 0.07411591355599215, + "grad_norm": 0.613595187664032, + "learning_rate": 4.9951192635788995e-06, + "loss": 0.6443, + "step": 503 + }, + { + "epoch": 0.07426326129666012, + "grad_norm": 0.5998794436454773, + "learning_rate": 4.995095019443293e-06, + "loss": 0.6255, + "step": 504 + }, + { + "epoch": 0.07441060903732809, + "grad_norm": 0.6016404032707214, + "learning_rate": 4.99507071530182e-06, + "loss": 0.6658, + "step": 505 + }, + { + "epoch": 0.07455795677799607, + "grad_norm": 0.6346244215965271, + "learning_rate": 4.995046351155067e-06, + "loss": 0.6934, + "step": 506 + }, + { + "epoch": 0.07470530451866404, + "grad_norm": 0.58289635181427, + "learning_rate": 4.99502192700362e-06, + "loss": 0.654, + "step": 507 + }, + { + "epoch": 0.07485265225933202, + "grad_norm": 0.6151940226554871, + "learning_rate": 4.994997442848066e-06, + "loss": 0.6557, + "step": 508 + }, + { + "epoch": 0.075, + "grad_norm": 0.6002930998802185, + "learning_rate": 4.994972898688994e-06, + "loss": 0.6451, + "step": 509 + }, + { + "epoch": 0.07514734774066797, + "grad_norm": 0.608546257019043, + "learning_rate": 4.994948294526992e-06, + "loss": 0.6735, + "step": 510 + }, + { + "epoch": 0.07529469548133595, + "grad_norm": 0.5820083022117615, + "learning_rate": 4.994923630362656e-06, + "loss": 0.6364, + "step": 511 + }, + { + "epoch": 0.07544204322200393, + "grad_norm": 0.5756909251213074, + "learning_rate": 4.9948989061965755e-06, + "loss": 0.6595, + "step": 512 + }, + { + "epoch": 0.0755893909626719, + "grad_norm": 0.6137067079544067, + "learning_rate": 4.994874122029347e-06, + "loss": 0.6875, + "step": 513 + }, + { + "epoch": 0.07573673870333988, + "grad_norm": 0.5818147659301758, + "learning_rate": 4.994849277861566e-06, + "loss": 0.6554, + "step": 514 + }, + { + "epoch": 0.07588408644400786, + "grad_norm": 0.5998055934906006, + "learning_rate": 4.99482437369383e-06, + "loss": 0.6374, + "step": 515 + }, + { + "epoch": 0.07603143418467584, + "grad_norm": 0.5943475365638733, + "learning_rate": 4.994799409526738e-06, + "loss": 0.6555, + "step": 516 + }, + { + "epoch": 0.07617878192534382, + "grad_norm": 0.6041486859321594, + "learning_rate": 4.994774385360891e-06, + "loss": 0.6688, + "step": 517 + }, + { + "epoch": 0.0763261296660118, + "grad_norm": 0.6238055229187012, + "learning_rate": 4.994749301196889e-06, + "loss": 0.6896, + "step": 518 + }, + { + "epoch": 0.07647347740667976, + "grad_norm": 0.6532991528511047, + "learning_rate": 4.994724157035337e-06, + "loss": 0.6702, + "step": 519 + }, + { + "epoch": 0.07662082514734773, + "grad_norm": 0.6165205836296082, + "learning_rate": 4.9946989528768386e-06, + "loss": 0.6347, + "step": 520 + }, + { + "epoch": 0.07676817288801571, + "grad_norm": 0.5950798988342285, + "learning_rate": 4.994673688722002e-06, + "loss": 0.6369, + "step": 521 + }, + { + "epoch": 0.07691552062868369, + "grad_norm": 0.6032286286354065, + "learning_rate": 4.994648364571431e-06, + "loss": 0.6854, + "step": 522 + }, + { + "epoch": 0.07706286836935167, + "grad_norm": 0.590121865272522, + "learning_rate": 4.994622980425739e-06, + "loss": 0.6345, + "step": 523 + }, + { + "epoch": 0.07721021611001964, + "grad_norm": 0.6265715956687927, + "learning_rate": 4.994597536285532e-06, + "loss": 0.6553, + "step": 524 + }, + { + "epoch": 0.07735756385068762, + "grad_norm": 0.5895006656646729, + "learning_rate": 4.994572032151426e-06, + "loss": 0.6734, + "step": 525 + }, + { + "epoch": 0.0775049115913556, + "grad_norm": 0.6254992485046387, + "learning_rate": 4.994546468024033e-06, + "loss": 0.673, + "step": 526 + }, + { + "epoch": 0.07765225933202358, + "grad_norm": 0.5966036319732666, + "learning_rate": 4.994520843903967e-06, + "loss": 0.6636, + "step": 527 + }, + { + "epoch": 0.07779960707269155, + "grad_norm": 0.6553522348403931, + "learning_rate": 4.994495159791844e-06, + "loss": 0.6679, + "step": 528 + }, + { + "epoch": 0.07794695481335953, + "grad_norm": 0.6310304403305054, + "learning_rate": 4.994469415688283e-06, + "loss": 0.6779, + "step": 529 + }, + { + "epoch": 0.07809430255402751, + "grad_norm": 0.6421753168106079, + "learning_rate": 4.994443611593902e-06, + "loss": 0.6287, + "step": 530 + }, + { + "epoch": 0.07824165029469549, + "grad_norm": 0.6036988496780396, + "learning_rate": 4.994417747509323e-06, + "loss": 0.6541, + "step": 531 + }, + { + "epoch": 0.07838899803536346, + "grad_norm": 0.5869317054748535, + "learning_rate": 4.994391823435166e-06, + "loss": 0.6215, + "step": 532 + }, + { + "epoch": 0.07853634577603144, + "grad_norm": 0.6205430626869202, + "learning_rate": 4.994365839372057e-06, + "loss": 0.6763, + "step": 533 + }, + { + "epoch": 0.0786836935166994, + "grad_norm": 0.5994263291358948, + "learning_rate": 4.994339795320618e-06, + "loss": 0.6511, + "step": 534 + }, + { + "epoch": 0.07883104125736738, + "grad_norm": 0.6151286363601685, + "learning_rate": 4.994313691281478e-06, + "loss": 0.626, + "step": 535 + }, + { + "epoch": 0.07897838899803536, + "grad_norm": 0.5853347182273865, + "learning_rate": 4.9942875272552635e-06, + "loss": 0.6729, + "step": 536 + }, + { + "epoch": 0.07912573673870334, + "grad_norm": 0.6534618139266968, + "learning_rate": 4.9942613032426035e-06, + "loss": 0.6837, + "step": 537 + }, + { + "epoch": 0.07927308447937131, + "grad_norm": 0.5910976529121399, + "learning_rate": 4.994235019244129e-06, + "loss": 0.6816, + "step": 538 + }, + { + "epoch": 0.07942043222003929, + "grad_norm": 0.6287471652030945, + "learning_rate": 4.994208675260473e-06, + "loss": 0.7, + "step": 539 + }, + { + "epoch": 0.07956777996070727, + "grad_norm": 0.6241965889930725, + "learning_rate": 4.994182271292266e-06, + "loss": 0.6249, + "step": 540 + }, + { + "epoch": 0.07971512770137525, + "grad_norm": 0.6212645769119263, + "learning_rate": 4.994155807340147e-06, + "loss": 0.6536, + "step": 541 + }, + { + "epoch": 0.07986247544204322, + "grad_norm": 0.6011468768119812, + "learning_rate": 4.99412928340475e-06, + "loss": 0.675, + "step": 542 + }, + { + "epoch": 0.0800098231827112, + "grad_norm": 0.599738359451294, + "learning_rate": 4.994102699486714e-06, + "loss": 0.6331, + "step": 543 + }, + { + "epoch": 0.08015717092337918, + "grad_norm": 0.5969452261924744, + "learning_rate": 4.994076055586678e-06, + "loss": 0.661, + "step": 544 + }, + { + "epoch": 0.08030451866404716, + "grad_norm": 0.6413594484329224, + "learning_rate": 4.994049351705283e-06, + "loss": 0.6536, + "step": 545 + }, + { + "epoch": 0.08045186640471513, + "grad_norm": 0.5992808938026428, + "learning_rate": 4.99402258784317e-06, + "loss": 0.6986, + "step": 546 + }, + { + "epoch": 0.08059921414538311, + "grad_norm": 0.6145942807197571, + "learning_rate": 4.993995764000984e-06, + "loss": 0.6616, + "step": 547 + }, + { + "epoch": 0.08074656188605107, + "grad_norm": 0.6289473176002502, + "learning_rate": 4.993968880179369e-06, + "loss": 0.674, + "step": 548 + }, + { + "epoch": 0.08089390962671905, + "grad_norm": 0.5970988869667053, + "learning_rate": 4.993941936378973e-06, + "loss": 0.6269, + "step": 549 + }, + { + "epoch": 0.08104125736738703, + "grad_norm": 0.5851086378097534, + "learning_rate": 4.993914932600442e-06, + "loss": 0.6345, + "step": 550 + }, + { + "epoch": 0.081188605108055, + "grad_norm": 0.6499071717262268, + "learning_rate": 4.9938878688444285e-06, + "loss": 0.6613, + "step": 551 + }, + { + "epoch": 0.08133595284872298, + "grad_norm": 0.6321249604225159, + "learning_rate": 4.99386074511158e-06, + "loss": 0.6245, + "step": 552 + }, + { + "epoch": 0.08148330058939096, + "grad_norm": 0.6099115014076233, + "learning_rate": 4.99383356140255e-06, + "loss": 0.6321, + "step": 553 + }, + { + "epoch": 0.08163064833005894, + "grad_norm": 0.6063461899757385, + "learning_rate": 4.993806317717994e-06, + "loss": 0.6397, + "step": 554 + }, + { + "epoch": 0.08177799607072692, + "grad_norm": 0.6329178810119629, + "learning_rate": 4.9937790140585645e-06, + "loss": 0.6349, + "step": 555 + }, + { + "epoch": 0.0819253438113949, + "grad_norm": 0.6260061860084534, + "learning_rate": 4.993751650424919e-06, + "loss": 0.6554, + "step": 556 + }, + { + "epoch": 0.08207269155206287, + "grad_norm": 0.6369381546974182, + "learning_rate": 4.993724226817718e-06, + "loss": 0.6543, + "step": 557 + }, + { + "epoch": 0.08222003929273085, + "grad_norm": 0.6073974370956421, + "learning_rate": 4.993696743237617e-06, + "loss": 0.6707, + "step": 558 + }, + { + "epoch": 0.08236738703339883, + "grad_norm": 0.6221438646316528, + "learning_rate": 4.993669199685279e-06, + "loss": 0.6747, + "step": 559 + }, + { + "epoch": 0.0825147347740668, + "grad_norm": 0.6405483484268188, + "learning_rate": 4.993641596161368e-06, + "loss": 0.6337, + "step": 560 + }, + { + "epoch": 0.08266208251473478, + "grad_norm": 0.5582924485206604, + "learning_rate": 4.993613932666545e-06, + "loss": 0.6072, + "step": 561 + }, + { + "epoch": 0.08280943025540274, + "grad_norm": 0.6025072336196899, + "learning_rate": 4.993586209201476e-06, + "loss": 0.6296, + "step": 562 + }, + { + "epoch": 0.08295677799607072, + "grad_norm": 0.6266443729400635, + "learning_rate": 4.993558425766829e-06, + "loss": 0.6995, + "step": 563 + }, + { + "epoch": 0.0831041257367387, + "grad_norm": 0.6297134757041931, + "learning_rate": 4.993530582363272e-06, + "loss": 0.6682, + "step": 564 + }, + { + "epoch": 0.08325147347740668, + "grad_norm": 0.604738175868988, + "learning_rate": 4.993502678991473e-06, + "loss": 0.6674, + "step": 565 + }, + { + "epoch": 0.08339882121807465, + "grad_norm": 0.5786380767822266, + "learning_rate": 4.993474715652104e-06, + "loss": 0.6234, + "step": 566 + }, + { + "epoch": 0.08354616895874263, + "grad_norm": 0.6647732853889465, + "learning_rate": 4.993446692345838e-06, + "loss": 0.6493, + "step": 567 + }, + { + "epoch": 0.08369351669941061, + "grad_norm": 0.6095333695411682, + "learning_rate": 4.993418609073348e-06, + "loss": 0.6655, + "step": 568 + }, + { + "epoch": 0.08384086444007859, + "grad_norm": 0.6278440952301025, + "learning_rate": 4.993390465835311e-06, + "loss": 0.633, + "step": 569 + }, + { + "epoch": 0.08398821218074656, + "grad_norm": 0.5947677493095398, + "learning_rate": 4.9933622626324024e-06, + "loss": 0.6701, + "step": 570 + }, + { + "epoch": 0.08413555992141454, + "grad_norm": 0.6313149929046631, + "learning_rate": 4.993333999465301e-06, + "loss": 0.6447, + "step": 571 + }, + { + "epoch": 0.08428290766208252, + "grad_norm": 0.6273631453514099, + "learning_rate": 4.9933056763346855e-06, + "loss": 0.6293, + "step": 572 + }, + { + "epoch": 0.0844302554027505, + "grad_norm": 0.6361114978790283, + "learning_rate": 4.993277293241239e-06, + "loss": 0.6566, + "step": 573 + }, + { + "epoch": 0.08457760314341847, + "grad_norm": 0.6531082391738892, + "learning_rate": 4.993248850185642e-06, + "loss": 0.6746, + "step": 574 + }, + { + "epoch": 0.08472495088408645, + "grad_norm": 0.6308591365814209, + "learning_rate": 4.99322034716858e-06, + "loss": 0.6411, + "step": 575 + }, + { + "epoch": 0.08487229862475441, + "grad_norm": 0.6371141672134399, + "learning_rate": 4.993191784190739e-06, + "loss": 0.6768, + "step": 576 + }, + { + "epoch": 0.08501964636542239, + "grad_norm": 0.6175285577774048, + "learning_rate": 4.993163161252803e-06, + "loss": 0.6427, + "step": 577 + }, + { + "epoch": 0.08516699410609037, + "grad_norm": 0.5966286659240723, + "learning_rate": 4.993134478355464e-06, + "loss": 0.6684, + "step": 578 + }, + { + "epoch": 0.08531434184675835, + "grad_norm": 0.5820363163948059, + "learning_rate": 4.99310573549941e-06, + "loss": 0.647, + "step": 579 + }, + { + "epoch": 0.08546168958742632, + "grad_norm": 0.5643455386161804, + "learning_rate": 4.993076932685331e-06, + "loss": 0.6349, + "step": 580 + }, + { + "epoch": 0.0856090373280943, + "grad_norm": 0.6097249388694763, + "learning_rate": 4.9930480699139215e-06, + "loss": 0.612, + "step": 581 + }, + { + "epoch": 0.08575638506876228, + "grad_norm": 0.6453356146812439, + "learning_rate": 4.993019147185877e-06, + "loss": 0.663, + "step": 582 + }, + { + "epoch": 0.08590373280943026, + "grad_norm": 0.5898832678794861, + "learning_rate": 4.99299016450189e-06, + "loss": 0.671, + "step": 583 + }, + { + "epoch": 0.08605108055009823, + "grad_norm": 0.6105568408966064, + "learning_rate": 4.992961121862658e-06, + "loss": 0.6252, + "step": 584 + }, + { + "epoch": 0.08619842829076621, + "grad_norm": 0.6293426752090454, + "learning_rate": 4.992932019268882e-06, + "loss": 0.6343, + "step": 585 + }, + { + "epoch": 0.08634577603143419, + "grad_norm": 0.6442444324493408, + "learning_rate": 4.992902856721259e-06, + "loss": 0.6332, + "step": 586 + }, + { + "epoch": 0.08649312377210217, + "grad_norm": 0.6154413223266602, + "learning_rate": 4.992873634220491e-06, + "loss": 0.6652, + "step": 587 + }, + { + "epoch": 0.08664047151277014, + "grad_norm": 0.6593888401985168, + "learning_rate": 4.9928443517672834e-06, + "loss": 0.6694, + "step": 588 + }, + { + "epoch": 0.08678781925343812, + "grad_norm": 0.6030516624450684, + "learning_rate": 4.9928150093623366e-06, + "loss": 0.6176, + "step": 589 + }, + { + "epoch": 0.0869351669941061, + "grad_norm": 0.6003717184066772, + "learning_rate": 4.9927856070063585e-06, + "loss": 0.6625, + "step": 590 + }, + { + "epoch": 0.08708251473477406, + "grad_norm": 0.6075407266616821, + "learning_rate": 4.992756144700056e-06, + "loss": 0.6347, + "step": 591 + }, + { + "epoch": 0.08722986247544204, + "grad_norm": 0.620564341545105, + "learning_rate": 4.992726622444137e-06, + "loss": 0.6389, + "step": 592 + }, + { + "epoch": 0.08737721021611002, + "grad_norm": 0.6192676424980164, + "learning_rate": 4.992697040239312e-06, + "loss": 0.6091, + "step": 593 + }, + { + "epoch": 0.08752455795677799, + "grad_norm": 0.6559770107269287, + "learning_rate": 4.9926673980862925e-06, + "loss": 0.6355, + "step": 594 + }, + { + "epoch": 0.08767190569744597, + "grad_norm": 0.6690354347229004, + "learning_rate": 4.992637695985792e-06, + "loss": 0.6525, + "step": 595 + }, + { + "epoch": 0.08781925343811395, + "grad_norm": 0.6058727502822876, + "learning_rate": 4.992607933938523e-06, + "loss": 0.6097, + "step": 596 + }, + { + "epoch": 0.08796660117878193, + "grad_norm": 0.5796279311180115, + "learning_rate": 4.9925781119452025e-06, + "loss": 0.6421, + "step": 597 + }, + { + "epoch": 0.0881139489194499, + "grad_norm": 0.620445966720581, + "learning_rate": 4.992548230006547e-06, + "loss": 0.6617, + "step": 598 + }, + { + "epoch": 0.08826129666011788, + "grad_norm": 0.5861567258834839, + "learning_rate": 4.992518288123276e-06, + "loss": 0.6776, + "step": 599 + }, + { + "epoch": 0.08840864440078586, + "grad_norm": 0.5894278287887573, + "learning_rate": 4.992488286296109e-06, + "loss": 0.6547, + "step": 600 + }, + { + "epoch": 0.08855599214145383, + "grad_norm": 0.6295183897018433, + "learning_rate": 4.992458224525768e-06, + "loss": 0.6463, + "step": 601 + }, + { + "epoch": 0.08870333988212181, + "grad_norm": 0.5993700623512268, + "learning_rate": 4.992428102812976e-06, + "loss": 0.6222, + "step": 602 + }, + { + "epoch": 0.08885068762278979, + "grad_norm": 0.6060412526130676, + "learning_rate": 4.992397921158457e-06, + "loss": 0.6786, + "step": 603 + }, + { + "epoch": 0.08899803536345777, + "grad_norm": 0.6075387597084045, + "learning_rate": 4.992367679562937e-06, + "loss": 0.6489, + "step": 604 + }, + { + "epoch": 0.08914538310412573, + "grad_norm": 0.6133982539176941, + "learning_rate": 4.9923373780271435e-06, + "loss": 0.6407, + "step": 605 + }, + { + "epoch": 0.08929273084479371, + "grad_norm": 0.6603361964225769, + "learning_rate": 4.992307016551805e-06, + "loss": 0.6516, + "step": 606 + }, + { + "epoch": 0.08944007858546169, + "grad_norm": 0.6534843444824219, + "learning_rate": 4.992276595137652e-06, + "loss": 0.6591, + "step": 607 + }, + { + "epoch": 0.08958742632612966, + "grad_norm": 0.598167359828949, + "learning_rate": 4.992246113785415e-06, + "loss": 0.6566, + "step": 608 + }, + { + "epoch": 0.08973477406679764, + "grad_norm": 0.592099666595459, + "learning_rate": 4.992215572495828e-06, + "loss": 0.6478, + "step": 609 + }, + { + "epoch": 0.08988212180746562, + "grad_norm": 0.6012969017028809, + "learning_rate": 4.992184971269626e-06, + "loss": 0.6524, + "step": 610 + }, + { + "epoch": 0.0900294695481336, + "grad_norm": 0.6169747114181519, + "learning_rate": 4.9921543101075434e-06, + "loss": 0.6717, + "step": 611 + }, + { + "epoch": 0.09017681728880157, + "grad_norm": 0.5817141532897949, + "learning_rate": 4.992123589010319e-06, + "loss": 0.6598, + "step": 612 + }, + { + "epoch": 0.09032416502946955, + "grad_norm": 0.6634376049041748, + "learning_rate": 4.992092807978691e-06, + "loss": 0.6539, + "step": 613 + }, + { + "epoch": 0.09047151277013753, + "grad_norm": 0.6159392595291138, + "learning_rate": 4.992061967013401e-06, + "loss": 0.6384, + "step": 614 + }, + { + "epoch": 0.0906188605108055, + "grad_norm": 0.6568448543548584, + "learning_rate": 4.9920310661151885e-06, + "loss": 0.6611, + "step": 615 + }, + { + "epoch": 0.09076620825147348, + "grad_norm": 0.6008943319320679, + "learning_rate": 4.9920001052847975e-06, + "loss": 0.6519, + "step": 616 + }, + { + "epoch": 0.09091355599214146, + "grad_norm": 0.5854132175445557, + "learning_rate": 4.991969084522973e-06, + "loss": 0.6506, + "step": 617 + }, + { + "epoch": 0.09106090373280944, + "grad_norm": 0.6005224585533142, + "learning_rate": 4.991938003830462e-06, + "loss": 0.6371, + "step": 618 + }, + { + "epoch": 0.0912082514734774, + "grad_norm": 0.6364739537239075, + "learning_rate": 4.991906863208009e-06, + "loss": 0.6252, + "step": 619 + }, + { + "epoch": 0.09135559921414538, + "grad_norm": 0.676878035068512, + "learning_rate": 4.9918756626563654e-06, + "loss": 0.649, + "step": 620 + }, + { + "epoch": 0.09150294695481336, + "grad_norm": 0.650751531124115, + "learning_rate": 4.991844402176281e-06, + "loss": 0.6234, + "step": 621 + }, + { + "epoch": 0.09165029469548133, + "grad_norm": 0.6206580996513367, + "learning_rate": 4.991813081768508e-06, + "loss": 0.6201, + "step": 622 + }, + { + "epoch": 0.09179764243614931, + "grad_norm": 0.6371481418609619, + "learning_rate": 4.991781701433798e-06, + "loss": 0.6107, + "step": 623 + }, + { + "epoch": 0.09194499017681729, + "grad_norm": 0.6326432824134827, + "learning_rate": 4.9917502611729075e-06, + "loss": 0.6724, + "step": 624 + }, + { + "epoch": 0.09209233791748526, + "grad_norm": 0.6144717335700989, + "learning_rate": 4.991718760986592e-06, + "loss": 0.6435, + "step": 625 + }, + { + "epoch": 0.09223968565815324, + "grad_norm": 0.5820417404174805, + "learning_rate": 4.991687200875609e-06, + "loss": 0.6248, + "step": 626 + }, + { + "epoch": 0.09238703339882122, + "grad_norm": 0.5949105620384216, + "learning_rate": 4.991655580840717e-06, + "loss": 0.62, + "step": 627 + }, + { + "epoch": 0.0925343811394892, + "grad_norm": 0.5833020210266113, + "learning_rate": 4.9916239008826774e-06, + "loss": 0.6244, + "step": 628 + }, + { + "epoch": 0.09268172888015717, + "grad_norm": 0.5961763858795166, + "learning_rate": 4.991592161002251e-06, + "loss": 0.617, + "step": 629 + }, + { + "epoch": 0.09282907662082515, + "grad_norm": 0.6066949367523193, + "learning_rate": 4.9915603612002025e-06, + "loss": 0.6225, + "step": 630 + }, + { + "epoch": 0.09297642436149313, + "grad_norm": 0.5680168271064758, + "learning_rate": 4.991528501477295e-06, + "loss": 0.653, + "step": 631 + }, + { + "epoch": 0.0931237721021611, + "grad_norm": 0.590671718120575, + "learning_rate": 4.991496581834297e-06, + "loss": 0.6592, + "step": 632 + }, + { + "epoch": 0.09327111984282907, + "grad_norm": 0.595911979675293, + "learning_rate": 4.9914646022719735e-06, + "loss": 0.6397, + "step": 633 + }, + { + "epoch": 0.09341846758349705, + "grad_norm": 0.608792781829834, + "learning_rate": 4.991432562791095e-06, + "loss": 0.6756, + "step": 634 + }, + { + "epoch": 0.09356581532416502, + "grad_norm": 0.5708727240562439, + "learning_rate": 4.991400463392433e-06, + "loss": 0.6244, + "step": 635 + }, + { + "epoch": 0.093713163064833, + "grad_norm": 0.6144192814826965, + "learning_rate": 4.991368304076758e-06, + "loss": 0.6625, + "step": 636 + }, + { + "epoch": 0.09386051080550098, + "grad_norm": 0.6002987027168274, + "learning_rate": 4.991336084844843e-06, + "loss": 0.5809, + "step": 637 + }, + { + "epoch": 0.09400785854616896, + "grad_norm": 0.5959497690200806, + "learning_rate": 4.991303805697464e-06, + "loss": 0.6527, + "step": 638 + }, + { + "epoch": 0.09415520628683693, + "grad_norm": 0.6001700162887573, + "learning_rate": 4.991271466635397e-06, + "loss": 0.6487, + "step": 639 + }, + { + "epoch": 0.09430255402750491, + "grad_norm": 0.5984620451927185, + "learning_rate": 4.99123906765942e-06, + "loss": 0.6156, + "step": 640 + }, + { + "epoch": 0.09444990176817289, + "grad_norm": 0.5954095125198364, + "learning_rate": 4.991206608770311e-06, + "loss": 0.627, + "step": 641 + }, + { + "epoch": 0.09459724950884087, + "grad_norm": 0.6021104454994202, + "learning_rate": 4.991174089968853e-06, + "loss": 0.6304, + "step": 642 + }, + { + "epoch": 0.09474459724950884, + "grad_norm": 0.6257914304733276, + "learning_rate": 4.991141511255826e-06, + "loss": 0.645, + "step": 643 + }, + { + "epoch": 0.09489194499017682, + "grad_norm": 0.6502537131309509, + "learning_rate": 4.991108872632013e-06, + "loss": 0.6959, + "step": 644 + }, + { + "epoch": 0.0950392927308448, + "grad_norm": 0.6078796982765198, + "learning_rate": 4.991076174098201e-06, + "loss": 0.6275, + "step": 645 + }, + { + "epoch": 0.09518664047151278, + "grad_norm": 0.6105157732963562, + "learning_rate": 4.9910434156551755e-06, + "loss": 0.6702, + "step": 646 + }, + { + "epoch": 0.09533398821218074, + "grad_norm": 0.6221362352371216, + "learning_rate": 4.991010597303724e-06, + "loss": 0.6405, + "step": 647 + }, + { + "epoch": 0.09548133595284872, + "grad_norm": 0.6187402009963989, + "learning_rate": 4.9909777190446364e-06, + "loss": 0.6293, + "step": 648 + }, + { + "epoch": 0.0956286836935167, + "grad_norm": 0.6071860790252686, + "learning_rate": 4.990944780878703e-06, + "loss": 0.6291, + "step": 649 + }, + { + "epoch": 0.09577603143418467, + "grad_norm": 0.6227055788040161, + "learning_rate": 4.9909117828067156e-06, + "loss": 0.6475, + "step": 650 + }, + { + "epoch": 0.09592337917485265, + "grad_norm": 0.6024603843688965, + "learning_rate": 4.9908787248294674e-06, + "loss": 0.6509, + "step": 651 + }, + { + "epoch": 0.09607072691552063, + "grad_norm": 0.5946162939071655, + "learning_rate": 4.9908456069477555e-06, + "loss": 0.6113, + "step": 652 + }, + { + "epoch": 0.0962180746561886, + "grad_norm": 0.6044866442680359, + "learning_rate": 4.9908124291623735e-06, + "loss": 0.6366, + "step": 653 + }, + { + "epoch": 0.09636542239685658, + "grad_norm": 0.6567986607551575, + "learning_rate": 4.990779191474122e-06, + "loss": 0.6191, + "step": 654 + }, + { + "epoch": 0.09651277013752456, + "grad_norm": 0.5973286628723145, + "learning_rate": 4.990745893883799e-06, + "loss": 0.673, + "step": 655 + }, + { + "epoch": 0.09666011787819254, + "grad_norm": 0.5799248218536377, + "learning_rate": 4.990712536392205e-06, + "loss": 0.645, + "step": 656 + }, + { + "epoch": 0.09680746561886051, + "grad_norm": 0.6372923254966736, + "learning_rate": 4.990679119000144e-06, + "loss": 0.6004, + "step": 657 + }, + { + "epoch": 0.09695481335952849, + "grad_norm": 0.5822020769119263, + "learning_rate": 4.990645641708417e-06, + "loss": 0.6301, + "step": 658 + }, + { + "epoch": 0.09710216110019647, + "grad_norm": 0.5995962023735046, + "learning_rate": 4.990612104517831e-06, + "loss": 0.6455, + "step": 659 + }, + { + "epoch": 0.09724950884086445, + "grad_norm": 0.6065372824668884, + "learning_rate": 4.990578507429192e-06, + "loss": 0.6582, + "step": 660 + }, + { + "epoch": 0.09739685658153242, + "grad_norm": 0.7086186408996582, + "learning_rate": 4.990544850443308e-06, + "loss": 0.6271, + "step": 661 + }, + { + "epoch": 0.09754420432220039, + "grad_norm": 0.6229815483093262, + "learning_rate": 4.990511133560989e-06, + "loss": 0.5977, + "step": 662 + }, + { + "epoch": 0.09769155206286836, + "grad_norm": 0.6118787527084351, + "learning_rate": 4.990477356783044e-06, + "loss": 0.6285, + "step": 663 + }, + { + "epoch": 0.09783889980353634, + "grad_norm": 0.5844835638999939, + "learning_rate": 4.990443520110288e-06, + "loss": 0.6488, + "step": 664 + }, + { + "epoch": 0.09798624754420432, + "grad_norm": 0.635471761226654, + "learning_rate": 4.990409623543533e-06, + "loss": 0.6108, + "step": 665 + }, + { + "epoch": 0.0981335952848723, + "grad_norm": 0.6328785419464111, + "learning_rate": 4.990375667083594e-06, + "loss": 0.6708, + "step": 666 + }, + { + "epoch": 0.09828094302554027, + "grad_norm": 0.5935143828392029, + "learning_rate": 4.99034165073129e-06, + "loss": 0.6462, + "step": 667 + }, + { + "epoch": 0.09842829076620825, + "grad_norm": 0.627528965473175, + "learning_rate": 4.990307574487435e-06, + "loss": 0.6304, + "step": 668 + }, + { + "epoch": 0.09857563850687623, + "grad_norm": 0.5901961922645569, + "learning_rate": 4.990273438352853e-06, + "loss": 0.6266, + "step": 669 + }, + { + "epoch": 0.0987229862475442, + "grad_norm": 0.5855931043624878, + "learning_rate": 4.9902392423283616e-06, + "loss": 0.621, + "step": 670 + }, + { + "epoch": 0.09887033398821218, + "grad_norm": 0.6360809803009033, + "learning_rate": 4.990204986414785e-06, + "loss": 0.6522, + "step": 671 + }, + { + "epoch": 0.09901768172888016, + "grad_norm": 0.6060152053833008, + "learning_rate": 4.990170670612946e-06, + "loss": 0.6244, + "step": 672 + }, + { + "epoch": 0.09916502946954814, + "grad_norm": 0.6046375632286072, + "learning_rate": 4.990136294923671e-06, + "loss": 0.652, + "step": 673 + }, + { + "epoch": 0.09931237721021612, + "grad_norm": 0.5844109058380127, + "learning_rate": 4.990101859347785e-06, + "loss": 0.639, + "step": 674 + }, + { + "epoch": 0.09945972495088409, + "grad_norm": 0.6226914525032043, + "learning_rate": 4.990067363886118e-06, + "loss": 0.6449, + "step": 675 + }, + { + "epoch": 0.09960707269155206, + "grad_norm": 0.5682400465011597, + "learning_rate": 4.990032808539499e-06, + "loss": 0.6068, + "step": 676 + }, + { + "epoch": 0.09975442043222003, + "grad_norm": 0.6099737882614136, + "learning_rate": 4.989998193308758e-06, + "loss": 0.6319, + "step": 677 + }, + { + "epoch": 0.09990176817288801, + "grad_norm": 0.620363175868988, + "learning_rate": 4.989963518194729e-06, + "loss": 0.6456, + "step": 678 + }, + { + "epoch": 0.10004911591355599, + "grad_norm": 0.577392578125, + "learning_rate": 4.989928783198245e-06, + "loss": 0.6378, + "step": 679 + }, + { + "epoch": 0.10019646365422397, + "grad_norm": 0.6067952513694763, + "learning_rate": 4.989893988320141e-06, + "loss": 0.6397, + "step": 680 + }, + { + "epoch": 0.10034381139489194, + "grad_norm": 0.5824027061462402, + "learning_rate": 4.989859133561255e-06, + "loss": 0.6149, + "step": 681 + }, + { + "epoch": 0.10049115913555992, + "grad_norm": 0.6062278151512146, + "learning_rate": 4.989824218922425e-06, + "loss": 0.668, + "step": 682 + }, + { + "epoch": 0.1006385068762279, + "grad_norm": 0.5911549925804138, + "learning_rate": 4.98978924440449e-06, + "loss": 0.6281, + "step": 683 + }, + { + "epoch": 0.10078585461689588, + "grad_norm": 0.6093787550926208, + "learning_rate": 4.989754210008292e-06, + "loss": 0.631, + "step": 684 + }, + { + "epoch": 0.10093320235756385, + "grad_norm": 0.5662335753440857, + "learning_rate": 4.989719115734672e-06, + "loss": 0.6354, + "step": 685 + }, + { + "epoch": 0.10108055009823183, + "grad_norm": 0.6003792881965637, + "learning_rate": 4.989683961584476e-06, + "loss": 0.6656, + "step": 686 + }, + { + "epoch": 0.10122789783889981, + "grad_norm": 0.6164658069610596, + "learning_rate": 4.9896487475585475e-06, + "loss": 0.6434, + "step": 687 + }, + { + "epoch": 0.10137524557956779, + "grad_norm": 0.5994817018508911, + "learning_rate": 4.989613473657735e-06, + "loss": 0.6348, + "step": 688 + }, + { + "epoch": 0.10152259332023576, + "grad_norm": 0.6414754390716553, + "learning_rate": 4.989578139882885e-06, + "loss": 0.6257, + "step": 689 + }, + { + "epoch": 0.10166994106090373, + "grad_norm": 0.5904622077941895, + "learning_rate": 4.98954274623485e-06, + "loss": 0.6557, + "step": 690 + }, + { + "epoch": 0.1018172888015717, + "grad_norm": 0.5870137214660645, + "learning_rate": 4.989507292714479e-06, + "loss": 0.6645, + "step": 691 + }, + { + "epoch": 0.10196463654223968, + "grad_norm": 0.6198242902755737, + "learning_rate": 4.989471779322625e-06, + "loss": 0.6385, + "step": 692 + }, + { + "epoch": 0.10211198428290766, + "grad_norm": 0.592327892780304, + "learning_rate": 4.989436206060143e-06, + "loss": 0.6432, + "step": 693 + }, + { + "epoch": 0.10225933202357564, + "grad_norm": 0.5972097516059875, + "learning_rate": 4.989400572927887e-06, + "loss": 0.6146, + "step": 694 + }, + { + "epoch": 0.10240667976424361, + "grad_norm": 0.5964189767837524, + "learning_rate": 4.989364879926716e-06, + "loss": 0.6353, + "step": 695 + }, + { + "epoch": 0.10255402750491159, + "grad_norm": 0.6221606731414795, + "learning_rate": 4.9893291270574866e-06, + "loss": 0.6481, + "step": 696 + }, + { + "epoch": 0.10270137524557957, + "grad_norm": 0.6011464595794678, + "learning_rate": 4.989293314321059e-06, + "loss": 0.6538, + "step": 697 + }, + { + "epoch": 0.10284872298624755, + "grad_norm": 0.5836980938911438, + "learning_rate": 4.989257441718295e-06, + "loss": 0.6429, + "step": 698 + }, + { + "epoch": 0.10299607072691552, + "grad_norm": 0.6195159554481506, + "learning_rate": 4.989221509250057e-06, + "loss": 0.6389, + "step": 699 + }, + { + "epoch": 0.1031434184675835, + "grad_norm": 0.5972622632980347, + "learning_rate": 4.989185516917209e-06, + "loss": 0.647, + "step": 700 + }, + { + "epoch": 0.10329076620825148, + "grad_norm": 0.5893787741661072, + "learning_rate": 4.989149464720618e-06, + "loss": 0.6409, + "step": 701 + }, + { + "epoch": 0.10343811394891946, + "grad_norm": 0.5762535929679871, + "learning_rate": 4.989113352661148e-06, + "loss": 0.6206, + "step": 702 + }, + { + "epoch": 0.10358546168958743, + "grad_norm": 0.5874776244163513, + "learning_rate": 4.98907718073967e-06, + "loss": 0.6012, + "step": 703 + }, + { + "epoch": 0.1037328094302554, + "grad_norm": 0.6217293739318848, + "learning_rate": 4.989040948957053e-06, + "loss": 0.6413, + "step": 704 + }, + { + "epoch": 0.10388015717092337, + "grad_norm": 0.593628466129303, + "learning_rate": 4.989004657314169e-06, + "loss": 0.6409, + "step": 705 + }, + { + "epoch": 0.10402750491159135, + "grad_norm": 0.6072261333465576, + "learning_rate": 4.988968305811891e-06, + "loss": 0.6514, + "step": 706 + }, + { + "epoch": 0.10417485265225933, + "grad_norm": 0.6202991008758545, + "learning_rate": 4.988931894451092e-06, + "loss": 0.6427, + "step": 707 + }, + { + "epoch": 0.1043222003929273, + "grad_norm": 0.6071133017539978, + "learning_rate": 4.988895423232648e-06, + "loss": 0.6325, + "step": 708 + }, + { + "epoch": 0.10446954813359528, + "grad_norm": 0.5788361430168152, + "learning_rate": 4.988858892157437e-06, + "loss": 0.6155, + "step": 709 + }, + { + "epoch": 0.10461689587426326, + "grad_norm": 0.6457369327545166, + "learning_rate": 4.988822301226337e-06, + "loss": 0.6662, + "step": 710 + }, + { + "epoch": 0.10476424361493124, + "grad_norm": 0.5848075151443481, + "learning_rate": 4.988785650440228e-06, + "loss": 0.6413, + "step": 711 + }, + { + "epoch": 0.10491159135559922, + "grad_norm": 0.6248124241828918, + "learning_rate": 4.988748939799991e-06, + "loss": 0.6868, + "step": 712 + }, + { + "epoch": 0.10505893909626719, + "grad_norm": 0.5965239405632019, + "learning_rate": 4.98871216930651e-06, + "loss": 0.655, + "step": 713 + }, + { + "epoch": 0.10520628683693517, + "grad_norm": 0.5697264671325684, + "learning_rate": 4.988675338960668e-06, + "loss": 0.6393, + "step": 714 + }, + { + "epoch": 0.10535363457760315, + "grad_norm": 0.5813064575195312, + "learning_rate": 4.988638448763352e-06, + "loss": 0.6429, + "step": 715 + }, + { + "epoch": 0.10550098231827112, + "grad_norm": 0.6059949994087219, + "learning_rate": 4.988601498715447e-06, + "loss": 0.6695, + "step": 716 + }, + { + "epoch": 0.1056483300589391, + "grad_norm": 0.606278121471405, + "learning_rate": 4.988564488817845e-06, + "loss": 0.6472, + "step": 717 + }, + { + "epoch": 0.10579567779960708, + "grad_norm": 0.6361944675445557, + "learning_rate": 4.988527419071433e-06, + "loss": 0.6432, + "step": 718 + }, + { + "epoch": 0.10594302554027504, + "grad_norm": 0.5772610902786255, + "learning_rate": 4.9884902894771044e-06, + "loss": 0.6585, + "step": 719 + }, + { + "epoch": 0.10609037328094302, + "grad_norm": 0.5951732993125916, + "learning_rate": 4.988453100035752e-06, + "loss": 0.641, + "step": 720 + }, + { + "epoch": 0.106237721021611, + "grad_norm": 0.5946915745735168, + "learning_rate": 4.988415850748268e-06, + "loss": 0.6521, + "step": 721 + }, + { + "epoch": 0.10638506876227898, + "grad_norm": 0.6204200983047485, + "learning_rate": 4.988378541615552e-06, + "loss": 0.6518, + "step": 722 + }, + { + "epoch": 0.10653241650294695, + "grad_norm": 0.5973379611968994, + "learning_rate": 4.988341172638498e-06, + "loss": 0.6444, + "step": 723 + }, + { + "epoch": 0.10667976424361493, + "grad_norm": 0.6331665515899658, + "learning_rate": 4.988303743818005e-06, + "loss": 0.6073, + "step": 724 + }, + { + "epoch": 0.10682711198428291, + "grad_norm": 0.5919360518455505, + "learning_rate": 4.988266255154975e-06, + "loss": 0.6263, + "step": 725 + }, + { + "epoch": 0.10697445972495088, + "grad_norm": 0.6058592200279236, + "learning_rate": 4.988228706650309e-06, + "loss": 0.6194, + "step": 726 + }, + { + "epoch": 0.10712180746561886, + "grad_norm": 0.6152552962303162, + "learning_rate": 4.9881910983049095e-06, + "loss": 0.6584, + "step": 727 + }, + { + "epoch": 0.10726915520628684, + "grad_norm": 0.6368682980537415, + "learning_rate": 4.988153430119681e-06, + "loss": 0.6399, + "step": 728 + }, + { + "epoch": 0.10741650294695482, + "grad_norm": 0.5861819386482239, + "learning_rate": 4.988115702095529e-06, + "loss": 0.6504, + "step": 729 + }, + { + "epoch": 0.1075638506876228, + "grad_norm": 0.6063957214355469, + "learning_rate": 4.9880779142333625e-06, + "loss": 0.6377, + "step": 730 + }, + { + "epoch": 0.10771119842829077, + "grad_norm": 0.6544402241706848, + "learning_rate": 4.988040066534089e-06, + "loss": 0.6269, + "step": 731 + }, + { + "epoch": 0.10785854616895875, + "grad_norm": 0.6460950970649719, + "learning_rate": 4.988002158998618e-06, + "loss": 0.6045, + "step": 732 + }, + { + "epoch": 0.10800589390962671, + "grad_norm": 0.6288785338401794, + "learning_rate": 4.987964191627862e-06, + "loss": 0.587, + "step": 733 + }, + { + "epoch": 0.10815324165029469, + "grad_norm": 0.6105619668960571, + "learning_rate": 4.987926164422735e-06, + "loss": 0.6374, + "step": 734 + }, + { + "epoch": 0.10830058939096267, + "grad_norm": 0.6011467576026917, + "learning_rate": 4.98788807738415e-06, + "loss": 0.6671, + "step": 735 + }, + { + "epoch": 0.10844793713163065, + "grad_norm": 0.6194849014282227, + "learning_rate": 4.987849930513024e-06, + "loss": 0.624, + "step": 736 + }, + { + "epoch": 0.10859528487229862, + "grad_norm": 0.6626009941101074, + "learning_rate": 4.9878117238102745e-06, + "loss": 0.647, + "step": 737 + }, + { + "epoch": 0.1087426326129666, + "grad_norm": 0.6058076620101929, + "learning_rate": 4.9877734572768185e-06, + "loss": 0.5902, + "step": 738 + }, + { + "epoch": 0.10888998035363458, + "grad_norm": 0.628951907157898, + "learning_rate": 4.987735130913578e-06, + "loss": 0.6482, + "step": 739 + }, + { + "epoch": 0.10903732809430255, + "grad_norm": 0.5985690355300903, + "learning_rate": 4.987696744721475e-06, + "loss": 0.6071, + "step": 740 + }, + { + "epoch": 0.10918467583497053, + "grad_norm": 0.6156362295150757, + "learning_rate": 4.987658298701432e-06, + "loss": 0.6207, + "step": 741 + }, + { + "epoch": 0.10933202357563851, + "grad_norm": 0.5943852066993713, + "learning_rate": 4.987619792854374e-06, + "loss": 0.6165, + "step": 742 + }, + { + "epoch": 0.10947937131630649, + "grad_norm": 0.639491856098175, + "learning_rate": 4.987581227181226e-06, + "loss": 0.6048, + "step": 743 + }, + { + "epoch": 0.10962671905697446, + "grad_norm": 0.6356396675109863, + "learning_rate": 4.987542601682917e-06, + "loss": 0.6699, + "step": 744 + }, + { + "epoch": 0.10977406679764244, + "grad_norm": 0.5979402661323547, + "learning_rate": 4.987503916360375e-06, + "loss": 0.6242, + "step": 745 + }, + { + "epoch": 0.10992141453831042, + "grad_norm": 0.6376111507415771, + "learning_rate": 4.98746517121453e-06, + "loss": 0.6451, + "step": 746 + }, + { + "epoch": 0.11006876227897838, + "grad_norm": 0.6607041954994202, + "learning_rate": 4.987426366246315e-06, + "loss": 0.6308, + "step": 747 + }, + { + "epoch": 0.11021611001964636, + "grad_norm": 0.5964421033859253, + "learning_rate": 4.987387501456662e-06, + "loss": 0.6459, + "step": 748 + }, + { + "epoch": 0.11036345776031434, + "grad_norm": 0.6136065125465393, + "learning_rate": 4.987348576846507e-06, + "loss": 0.6549, + "step": 749 + }, + { + "epoch": 0.11051080550098231, + "grad_norm": 0.6105695366859436, + "learning_rate": 4.987309592416784e-06, + "loss": 0.6345, + "step": 750 + }, + { + "epoch": 0.11065815324165029, + "grad_norm": 0.6183786988258362, + "learning_rate": 4.9872705481684325e-06, + "loss": 0.6327, + "step": 751 + }, + { + "epoch": 0.11080550098231827, + "grad_norm": 0.5961651802062988, + "learning_rate": 4.9872314441023915e-06, + "loss": 0.6272, + "step": 752 + }, + { + "epoch": 0.11095284872298625, + "grad_norm": 0.6030038595199585, + "learning_rate": 4.9871922802196e-06, + "loss": 0.6053, + "step": 753 + }, + { + "epoch": 0.11110019646365422, + "grad_norm": 0.6321914196014404, + "learning_rate": 4.987153056521002e-06, + "loss": 0.6515, + "step": 754 + }, + { + "epoch": 0.1112475442043222, + "grad_norm": 0.6013494729995728, + "learning_rate": 4.987113773007538e-06, + "loss": 0.6521, + "step": 755 + }, + { + "epoch": 0.11139489194499018, + "grad_norm": 0.6036097407341003, + "learning_rate": 4.987074429680155e-06, + "loss": 0.6091, + "step": 756 + }, + { + "epoch": 0.11154223968565816, + "grad_norm": 0.6027313470840454, + "learning_rate": 4.9870350265397985e-06, + "loss": 0.6618, + "step": 757 + }, + { + "epoch": 0.11168958742632613, + "grad_norm": 0.5730757713317871, + "learning_rate": 4.9869955635874165e-06, + "loss": 0.6121, + "step": 758 + }, + { + "epoch": 0.11183693516699411, + "grad_norm": 0.6118531823158264, + "learning_rate": 4.986956040823957e-06, + "loss": 0.6014, + "step": 759 + }, + { + "epoch": 0.11198428290766209, + "grad_norm": 0.614724338054657, + "learning_rate": 4.986916458250372e-06, + "loss": 0.6137, + "step": 760 + }, + { + "epoch": 0.11213163064833005, + "grad_norm": 0.6175045967102051, + "learning_rate": 4.9868768158676115e-06, + "loss": 0.6732, + "step": 761 + }, + { + "epoch": 0.11227897838899803, + "grad_norm": 0.6175711750984192, + "learning_rate": 4.986837113676631e-06, + "loss": 0.6184, + "step": 762 + }, + { + "epoch": 0.11242632612966601, + "grad_norm": 0.6134110689163208, + "learning_rate": 4.986797351678383e-06, + "loss": 0.6132, + "step": 763 + }, + { + "epoch": 0.11257367387033398, + "grad_norm": 0.6320343613624573, + "learning_rate": 4.986757529873827e-06, + "loss": 0.6579, + "step": 764 + }, + { + "epoch": 0.11272102161100196, + "grad_norm": 0.5930683016777039, + "learning_rate": 4.986717648263917e-06, + "loss": 0.6682, + "step": 765 + }, + { + "epoch": 0.11286836935166994, + "grad_norm": 0.6083317399024963, + "learning_rate": 4.986677706849615e-06, + "loss": 0.6295, + "step": 766 + }, + { + "epoch": 0.11301571709233792, + "grad_norm": 0.6010107398033142, + "learning_rate": 4.9866377056318795e-06, + "loss": 0.6168, + "step": 767 + }, + { + "epoch": 0.1131630648330059, + "grad_norm": 0.6261181831359863, + "learning_rate": 4.986597644611675e-06, + "loss": 0.608, + "step": 768 + }, + { + "epoch": 0.11331041257367387, + "grad_norm": 0.6035236120223999, + "learning_rate": 4.986557523789962e-06, + "loss": 0.6547, + "step": 769 + }, + { + "epoch": 0.11345776031434185, + "grad_norm": 0.6091105937957764, + "learning_rate": 4.986517343167707e-06, + "loss": 0.5821, + "step": 770 + }, + { + "epoch": 0.11360510805500983, + "grad_norm": 0.6440032720565796, + "learning_rate": 4.986477102745877e-06, + "loss": 0.6448, + "step": 771 + }, + { + "epoch": 0.1137524557956778, + "grad_norm": 0.6122448444366455, + "learning_rate": 4.986436802525439e-06, + "loss": 0.5984, + "step": 772 + }, + { + "epoch": 0.11389980353634578, + "grad_norm": 0.5637643933296204, + "learning_rate": 4.986396442507361e-06, + "loss": 0.6039, + "step": 773 + }, + { + "epoch": 0.11404715127701376, + "grad_norm": 0.6586242318153381, + "learning_rate": 4.986356022692615e-06, + "loss": 0.6621, + "step": 774 + }, + { + "epoch": 0.11419449901768172, + "grad_norm": 0.5944536328315735, + "learning_rate": 4.986315543082173e-06, + "loss": 0.6442, + "step": 775 + }, + { + "epoch": 0.1143418467583497, + "grad_norm": 0.6115134358406067, + "learning_rate": 4.986275003677008e-06, + "loss": 0.6541, + "step": 776 + }, + { + "epoch": 0.11448919449901768, + "grad_norm": 0.5872041583061218, + "learning_rate": 4.9862344044780955e-06, + "loss": 0.6644, + "step": 777 + }, + { + "epoch": 0.11463654223968565, + "grad_norm": 0.6199164390563965, + "learning_rate": 4.9861937454864115e-06, + "loss": 0.6205, + "step": 778 + }, + { + "epoch": 0.11478388998035363, + "grad_norm": 0.5738059282302856, + "learning_rate": 4.986153026702935e-06, + "loss": 0.6041, + "step": 779 + }, + { + "epoch": 0.11493123772102161, + "grad_norm": 0.6181480288505554, + "learning_rate": 4.986112248128644e-06, + "loss": 0.6373, + "step": 780 + }, + { + "epoch": 0.11507858546168959, + "grad_norm": 0.6062254309654236, + "learning_rate": 4.986071409764519e-06, + "loss": 0.5962, + "step": 781 + }, + { + "epoch": 0.11522593320235756, + "grad_norm": 0.6333271265029907, + "learning_rate": 4.986030511611543e-06, + "loss": 0.6344, + "step": 782 + }, + { + "epoch": 0.11537328094302554, + "grad_norm": 0.6103214621543884, + "learning_rate": 4.985989553670698e-06, + "loss": 0.6159, + "step": 783 + }, + { + "epoch": 0.11552062868369352, + "grad_norm": 0.5975281596183777, + "learning_rate": 4.985948535942972e-06, + "loss": 0.6215, + "step": 784 + }, + { + "epoch": 0.1156679764243615, + "grad_norm": 0.6461109519004822, + "learning_rate": 4.985907458429349e-06, + "loss": 0.6679, + "step": 785 + }, + { + "epoch": 0.11581532416502947, + "grad_norm": 0.5714089870452881, + "learning_rate": 4.9858663211308176e-06, + "loss": 0.6574, + "step": 786 + }, + { + "epoch": 0.11596267190569745, + "grad_norm": 0.6272205710411072, + "learning_rate": 4.985825124048367e-06, + "loss": 0.653, + "step": 787 + }, + { + "epoch": 0.11611001964636543, + "grad_norm": 0.625817060470581, + "learning_rate": 4.985783867182988e-06, + "loss": 0.6478, + "step": 788 + }, + { + "epoch": 0.1162573673870334, + "grad_norm": 0.6118309497833252, + "learning_rate": 4.985742550535673e-06, + "loss": 0.6402, + "step": 789 + }, + { + "epoch": 0.11640471512770137, + "grad_norm": 0.6215218901634216, + "learning_rate": 4.985701174107415e-06, + "loss": 0.6667, + "step": 790 + }, + { + "epoch": 0.11655206286836935, + "grad_norm": 0.6284427046775818, + "learning_rate": 4.9856597378992105e-06, + "loss": 0.6475, + "step": 791 + }, + { + "epoch": 0.11669941060903732, + "grad_norm": 0.597936749458313, + "learning_rate": 4.985618241912054e-06, + "loss": 0.6514, + "step": 792 + }, + { + "epoch": 0.1168467583497053, + "grad_norm": 0.5990316271781921, + "learning_rate": 4.985576686146946e-06, + "loss": 0.6377, + "step": 793 + }, + { + "epoch": 0.11699410609037328, + "grad_norm": 0.6392074823379517, + "learning_rate": 4.985535070604884e-06, + "loss": 0.6279, + "step": 794 + }, + { + "epoch": 0.11714145383104126, + "grad_norm": 0.6042420268058777, + "learning_rate": 4.9854933952868695e-06, + "loss": 0.6131, + "step": 795 + }, + { + "epoch": 0.11728880157170923, + "grad_norm": 0.6368290185928345, + "learning_rate": 4.985451660193904e-06, + "loss": 0.6632, + "step": 796 + }, + { + "epoch": 0.11743614931237721, + "grad_norm": 0.6070185899734497, + "learning_rate": 4.985409865326992e-06, + "loss": 0.699, + "step": 797 + }, + { + "epoch": 0.11758349705304519, + "grad_norm": 0.6090782880783081, + "learning_rate": 4.98536801068714e-06, + "loss": 0.648, + "step": 798 + }, + { + "epoch": 0.11773084479371317, + "grad_norm": 0.576924204826355, + "learning_rate": 4.985326096275351e-06, + "loss": 0.607, + "step": 799 + }, + { + "epoch": 0.11787819253438114, + "grad_norm": 0.6236814260482788, + "learning_rate": 4.985284122092636e-06, + "loss": 0.6368, + "step": 800 + }, + { + "epoch": 0.11802554027504912, + "grad_norm": 0.6224692463874817, + "learning_rate": 4.985242088140003e-06, + "loss": 0.6583, + "step": 801 + }, + { + "epoch": 0.1181728880157171, + "grad_norm": 0.5900741815567017, + "learning_rate": 4.985199994418464e-06, + "loss": 0.619, + "step": 802 + }, + { + "epoch": 0.11832023575638508, + "grad_norm": 0.6400871872901917, + "learning_rate": 4.9851578409290305e-06, + "loss": 0.6402, + "step": 803 + }, + { + "epoch": 0.11846758349705304, + "grad_norm": 0.6701797842979431, + "learning_rate": 4.985115627672716e-06, + "loss": 0.6272, + "step": 804 + }, + { + "epoch": 0.11861493123772102, + "grad_norm": 0.5970591306686401, + "learning_rate": 4.985073354650537e-06, + "loss": 0.5963, + "step": 805 + }, + { + "epoch": 0.118762278978389, + "grad_norm": 0.5993068218231201, + "learning_rate": 4.98503102186351e-06, + "loss": 0.6536, + "step": 806 + }, + { + "epoch": 0.11890962671905697, + "grad_norm": 0.5948344469070435, + "learning_rate": 4.9849886293126516e-06, + "loss": 0.6566, + "step": 807 + }, + { + "epoch": 0.11905697445972495, + "grad_norm": 0.5991151332855225, + "learning_rate": 4.9849461769989815e-06, + "loss": 0.6402, + "step": 808 + }, + { + "epoch": 0.11920432220039293, + "grad_norm": 0.603520393371582, + "learning_rate": 4.984903664923523e-06, + "loss": 0.6261, + "step": 809 + }, + { + "epoch": 0.1193516699410609, + "grad_norm": 0.5883390307426453, + "learning_rate": 4.984861093087296e-06, + "loss": 0.6498, + "step": 810 + }, + { + "epoch": 0.11949901768172888, + "grad_norm": 0.5898233652114868, + "learning_rate": 4.984818461491324e-06, + "loss": 0.6205, + "step": 811 + }, + { + "epoch": 0.11964636542239686, + "grad_norm": 0.6320998072624207, + "learning_rate": 4.984775770136635e-06, + "loss": 0.6191, + "step": 812 + }, + { + "epoch": 0.11979371316306484, + "grad_norm": 0.6036606431007385, + "learning_rate": 4.9847330190242534e-06, + "loss": 0.6311, + "step": 813 + }, + { + "epoch": 0.11994106090373281, + "grad_norm": 0.591684103012085, + "learning_rate": 4.9846902081552086e-06, + "loss": 0.6393, + "step": 814 + }, + { + "epoch": 0.12008840864440079, + "grad_norm": 0.5882051587104797, + "learning_rate": 4.9846473375305295e-06, + "loss": 0.6191, + "step": 815 + }, + { + "epoch": 0.12023575638506877, + "grad_norm": 0.6225761771202087, + "learning_rate": 4.984604407151248e-06, + "loss": 0.6297, + "step": 816 + }, + { + "epoch": 0.12038310412573675, + "grad_norm": 0.6148713231086731, + "learning_rate": 4.984561417018395e-06, + "loss": 0.6531, + "step": 817 + }, + { + "epoch": 0.12053045186640471, + "grad_norm": 0.6093199849128723, + "learning_rate": 4.984518367133006e-06, + "loss": 0.6401, + "step": 818 + }, + { + "epoch": 0.12067779960707269, + "grad_norm": 0.6269418001174927, + "learning_rate": 4.984475257496114e-06, + "loss": 0.633, + "step": 819 + }, + { + "epoch": 0.12082514734774066, + "grad_norm": 0.5972291231155396, + "learning_rate": 4.984432088108759e-06, + "loss": 0.6812, + "step": 820 + }, + { + "epoch": 0.12097249508840864, + "grad_norm": 0.6368165612220764, + "learning_rate": 4.984388858971977e-06, + "loss": 0.6494, + "step": 821 + }, + { + "epoch": 0.12111984282907662, + "grad_norm": 0.5930677652359009, + "learning_rate": 4.984345570086808e-06, + "loss": 0.6289, + "step": 822 + }, + { + "epoch": 0.1212671905697446, + "grad_norm": 0.6045053005218506, + "learning_rate": 4.984302221454293e-06, + "loss": 0.6756, + "step": 823 + }, + { + "epoch": 0.12141453831041257, + "grad_norm": 0.6216995120048523, + "learning_rate": 4.984258813075475e-06, + "loss": 0.6338, + "step": 824 + }, + { + "epoch": 0.12156188605108055, + "grad_norm": 0.6033307909965515, + "learning_rate": 4.984215344951398e-06, + "loss": 0.6401, + "step": 825 + }, + { + "epoch": 0.12170923379174853, + "grad_norm": 0.6186839938163757, + "learning_rate": 4.984171817083107e-06, + "loss": 0.6535, + "step": 826 + }, + { + "epoch": 0.1218565815324165, + "grad_norm": 0.6200135350227356, + "learning_rate": 4.984128229471649e-06, + "loss": 0.6093, + "step": 827 + }, + { + "epoch": 0.12200392927308448, + "grad_norm": 0.6195905208587646, + "learning_rate": 4.984084582118073e-06, + "loss": 0.6853, + "step": 828 + }, + { + "epoch": 0.12215127701375246, + "grad_norm": 0.6320921778678894, + "learning_rate": 4.984040875023427e-06, + "loss": 0.5988, + "step": 829 + }, + { + "epoch": 0.12229862475442044, + "grad_norm": 0.6552107930183411, + "learning_rate": 4.983997108188763e-06, + "loss": 0.6058, + "step": 830 + }, + { + "epoch": 0.12244597249508841, + "grad_norm": 0.5614131093025208, + "learning_rate": 4.983953281615133e-06, + "loss": 0.6421, + "step": 831 + }, + { + "epoch": 0.12259332023575638, + "grad_norm": 0.646501898765564, + "learning_rate": 4.983909395303592e-06, + "loss": 0.6453, + "step": 832 + }, + { + "epoch": 0.12274066797642436, + "grad_norm": 0.5983470678329468, + "learning_rate": 4.983865449255196e-06, + "loss": 0.6587, + "step": 833 + }, + { + "epoch": 0.12288801571709233, + "grad_norm": 0.6157034039497375, + "learning_rate": 4.983821443471e-06, + "loss": 0.6408, + "step": 834 + }, + { + "epoch": 0.12303536345776031, + "grad_norm": 0.5964518785476685, + "learning_rate": 4.983777377952063e-06, + "loss": 0.658, + "step": 835 + }, + { + "epoch": 0.12318271119842829, + "grad_norm": 0.6323977112770081, + "learning_rate": 4.983733252699446e-06, + "loss": 0.6484, + "step": 836 + }, + { + "epoch": 0.12333005893909627, + "grad_norm": 0.6068468689918518, + "learning_rate": 4.983689067714209e-06, + "loss": 0.6343, + "step": 837 + }, + { + "epoch": 0.12347740667976424, + "grad_norm": 0.5938650965690613, + "learning_rate": 4.983644822997415e-06, + "loss": 0.6241, + "step": 838 + }, + { + "epoch": 0.12362475442043222, + "grad_norm": 0.6053922772407532, + "learning_rate": 4.983600518550128e-06, + "loss": 0.6261, + "step": 839 + }, + { + "epoch": 0.1237721021611002, + "grad_norm": 0.6630250811576843, + "learning_rate": 4.983556154373413e-06, + "loss": 0.6085, + "step": 840 + }, + { + "epoch": 0.12391944990176817, + "grad_norm": 0.5925431251525879, + "learning_rate": 4.983511730468337e-06, + "loss": 0.5972, + "step": 841 + }, + { + "epoch": 0.12406679764243615, + "grad_norm": 0.5895491242408752, + "learning_rate": 4.983467246835969e-06, + "loss": 0.6027, + "step": 842 + }, + { + "epoch": 0.12421414538310413, + "grad_norm": 0.591998279094696, + "learning_rate": 4.983422703477379e-06, + "loss": 0.6223, + "step": 843 + }, + { + "epoch": 0.12436149312377211, + "grad_norm": 0.6057637333869934, + "learning_rate": 4.9833781003936376e-06, + "loss": 0.6443, + "step": 844 + }, + { + "epoch": 0.12450884086444008, + "grad_norm": 0.606509268283844, + "learning_rate": 4.983333437585818e-06, + "loss": 0.6358, + "step": 845 + }, + { + "epoch": 0.12465618860510805, + "grad_norm": 0.6238833665847778, + "learning_rate": 4.983288715054993e-06, + "loss": 0.6387, + "step": 846 + }, + { + "epoch": 0.12480353634577603, + "grad_norm": 0.5875282287597656, + "learning_rate": 4.983243932802239e-06, + "loss": 0.6294, + "step": 847 + }, + { + "epoch": 0.124950884086444, + "grad_norm": 0.6304488182067871, + "learning_rate": 4.983199090828634e-06, + "loss": 0.6199, + "step": 848 + }, + { + "epoch": 0.125098231827112, + "grad_norm": 0.5945996642112732, + "learning_rate": 4.983154189135256e-06, + "loss": 0.651, + "step": 849 + }, + { + "epoch": 0.12524557956777996, + "grad_norm": 0.6083250045776367, + "learning_rate": 4.983109227723183e-06, + "loss": 0.6535, + "step": 850 + }, + { + "epoch": 0.12539292730844795, + "grad_norm": 0.5812260508537292, + "learning_rate": 4.983064206593498e-06, + "loss": 0.669, + "step": 851 + }, + { + "epoch": 0.1255402750491159, + "grad_norm": 0.5825611352920532, + "learning_rate": 4.983019125747284e-06, + "loss": 0.6198, + "step": 852 + }, + { + "epoch": 0.1256876227897839, + "grad_norm": 0.5946929454803467, + "learning_rate": 4.982973985185624e-06, + "loss": 0.6795, + "step": 853 + }, + { + "epoch": 0.12583497053045187, + "grad_norm": 0.6095932126045227, + "learning_rate": 4.982928784909605e-06, + "loss": 0.6602, + "step": 854 + }, + { + "epoch": 0.12598231827111983, + "grad_norm": 0.5746783018112183, + "learning_rate": 4.982883524920313e-06, + "loss": 0.6135, + "step": 855 + }, + { + "epoch": 0.12612966601178782, + "grad_norm": 0.6096512675285339, + "learning_rate": 4.982838205218837e-06, + "loss": 0.6234, + "step": 856 + }, + { + "epoch": 0.12627701375245579, + "grad_norm": 0.6267403364181519, + "learning_rate": 4.982792825806267e-06, + "loss": 0.6221, + "step": 857 + }, + { + "epoch": 0.12642436149312378, + "grad_norm": 0.6095855832099915, + "learning_rate": 4.982747386683693e-06, + "loss": 0.6149, + "step": 858 + }, + { + "epoch": 0.12657170923379174, + "grad_norm": 0.6177636384963989, + "learning_rate": 4.9827018878522095e-06, + "loss": 0.6443, + "step": 859 + }, + { + "epoch": 0.12671905697445973, + "grad_norm": 0.6033649444580078, + "learning_rate": 4.98265632931291e-06, + "loss": 0.6243, + "step": 860 + }, + { + "epoch": 0.1268664047151277, + "grad_norm": 0.6118965744972229, + "learning_rate": 4.98261071106689e-06, + "loss": 0.6418, + "step": 861 + }, + { + "epoch": 0.1270137524557957, + "grad_norm": 0.5888882875442505, + "learning_rate": 4.9825650331152465e-06, + "loss": 0.6226, + "step": 862 + }, + { + "epoch": 0.12716110019646365, + "grad_norm": 0.592221200466156, + "learning_rate": 4.982519295459079e-06, + "loss": 0.5988, + "step": 863 + }, + { + "epoch": 0.12730844793713164, + "grad_norm": 0.6357391476631165, + "learning_rate": 4.982473498099487e-06, + "loss": 0.6593, + "step": 864 + }, + { + "epoch": 0.1274557956777996, + "grad_norm": 0.625713586807251, + "learning_rate": 4.982427641037571e-06, + "loss": 0.6115, + "step": 865 + }, + { + "epoch": 0.1276031434184676, + "grad_norm": 0.5800497531890869, + "learning_rate": 4.9823817242744346e-06, + "loss": 0.6331, + "step": 866 + }, + { + "epoch": 0.12775049115913556, + "grad_norm": 0.6052739024162292, + "learning_rate": 4.982335747811182e-06, + "loss": 0.6305, + "step": 867 + }, + { + "epoch": 0.12789783889980352, + "grad_norm": 0.5877625942230225, + "learning_rate": 4.98228971164892e-06, + "loss": 0.6505, + "step": 868 + }, + { + "epoch": 0.12804518664047151, + "grad_norm": 0.6004683971405029, + "learning_rate": 4.9822436157887535e-06, + "loss": 0.6429, + "step": 869 + }, + { + "epoch": 0.12819253438113948, + "grad_norm": 0.5980567932128906, + "learning_rate": 4.982197460231793e-06, + "loss": 0.6577, + "step": 870 + }, + { + "epoch": 0.12833988212180747, + "grad_norm": 0.6305035948753357, + "learning_rate": 4.982151244979147e-06, + "loss": 0.6285, + "step": 871 + }, + { + "epoch": 0.12848722986247543, + "grad_norm": 0.5693764090538025, + "learning_rate": 4.982104970031929e-06, + "loss": 0.6361, + "step": 872 + }, + { + "epoch": 0.12863457760314342, + "grad_norm": 0.6248660683631897, + "learning_rate": 4.98205863539125e-06, + "loss": 0.6437, + "step": 873 + }, + { + "epoch": 0.1287819253438114, + "grad_norm": 0.6157057881355286, + "learning_rate": 4.982012241058225e-06, + "loss": 0.6574, + "step": 874 + }, + { + "epoch": 0.12892927308447938, + "grad_norm": 0.6380220651626587, + "learning_rate": 4.981965787033969e-06, + "loss": 0.6384, + "step": 875 + }, + { + "epoch": 0.12907662082514734, + "grad_norm": 0.6251410841941833, + "learning_rate": 4.981919273319601e-06, + "loss": 0.6482, + "step": 876 + }, + { + "epoch": 0.12922396856581533, + "grad_norm": 0.614137589931488, + "learning_rate": 4.981872699916238e-06, + "loss": 0.6452, + "step": 877 + }, + { + "epoch": 0.1293713163064833, + "grad_norm": 0.5951417684555054, + "learning_rate": 4.9818260668250006e-06, + "loss": 0.6618, + "step": 878 + }, + { + "epoch": 0.1295186640471513, + "grad_norm": 0.64459627866745, + "learning_rate": 4.981779374047011e-06, + "loss": 0.6464, + "step": 879 + }, + { + "epoch": 0.12966601178781925, + "grad_norm": 0.6488474607467651, + "learning_rate": 4.98173262158339e-06, + "loss": 0.6561, + "step": 880 + }, + { + "epoch": 0.12981335952848724, + "grad_norm": 0.6025131940841675, + "learning_rate": 4.9816858094352646e-06, + "loss": 0.6559, + "step": 881 + }, + { + "epoch": 0.1299607072691552, + "grad_norm": 0.6188545823097229, + "learning_rate": 4.98163893760376e-06, + "loss": 0.6242, + "step": 882 + }, + { + "epoch": 0.13010805500982317, + "grad_norm": 0.6789697408676147, + "learning_rate": 4.981592006090002e-06, + "loss": 0.626, + "step": 883 + }, + { + "epoch": 0.13025540275049116, + "grad_norm": 0.6176579594612122, + "learning_rate": 4.98154501489512e-06, + "loss": 0.6379, + "step": 884 + }, + { + "epoch": 0.13040275049115913, + "grad_norm": 0.615065336227417, + "learning_rate": 4.981497964020245e-06, + "loss": 0.6256, + "step": 885 + }, + { + "epoch": 0.13055009823182712, + "grad_norm": 0.6152657866477966, + "learning_rate": 4.981450853466508e-06, + "loss": 0.6441, + "step": 886 + }, + { + "epoch": 0.13069744597249508, + "grad_norm": 0.6051253080368042, + "learning_rate": 4.981403683235042e-06, + "loss": 0.6162, + "step": 887 + }, + { + "epoch": 0.13084479371316307, + "grad_norm": 0.6215437650680542, + "learning_rate": 4.98135645332698e-06, + "loss": 0.6089, + "step": 888 + }, + { + "epoch": 0.13099214145383103, + "grad_norm": 0.5881799459457397, + "learning_rate": 4.98130916374346e-06, + "loss": 0.6517, + "step": 889 + }, + { + "epoch": 0.13113948919449903, + "grad_norm": 0.6118667125701904, + "learning_rate": 4.981261814485618e-06, + "loss": 0.6639, + "step": 890 + }, + { + "epoch": 0.131286836935167, + "grad_norm": 0.6376328468322754, + "learning_rate": 4.9812144055545935e-06, + "loss": 0.597, + "step": 891 + }, + { + "epoch": 0.13143418467583498, + "grad_norm": 0.6011861562728882, + "learning_rate": 4.981166936951526e-06, + "loss": 0.6128, + "step": 892 + }, + { + "epoch": 0.13158153241650294, + "grad_norm": 0.596449077129364, + "learning_rate": 4.981119408677558e-06, + "loss": 0.6013, + "step": 893 + }, + { + "epoch": 0.13172888015717094, + "grad_norm": 0.6130931973457336, + "learning_rate": 4.9810718207338315e-06, + "loss": 0.6292, + "step": 894 + }, + { + "epoch": 0.1318762278978389, + "grad_norm": 0.6056917309761047, + "learning_rate": 4.981024173121491e-06, + "loss": 0.657, + "step": 895 + }, + { + "epoch": 0.13202357563850686, + "grad_norm": 0.5847144722938538, + "learning_rate": 4.980976465841684e-06, + "loss": 0.605, + "step": 896 + }, + { + "epoch": 0.13217092337917485, + "grad_norm": 0.5788822174072266, + "learning_rate": 4.980928698895555e-06, + "loss": 0.6322, + "step": 897 + }, + { + "epoch": 0.13231827111984282, + "grad_norm": 0.5910406708717346, + "learning_rate": 4.980880872284256e-06, + "loss": 0.6366, + "step": 898 + }, + { + "epoch": 0.1324656188605108, + "grad_norm": 0.5960266590118408, + "learning_rate": 4.980832986008934e-06, + "loss": 0.6233, + "step": 899 + }, + { + "epoch": 0.13261296660117877, + "grad_norm": 0.6728135943412781, + "learning_rate": 4.980785040070744e-06, + "loss": 0.6026, + "step": 900 + }, + { + "epoch": 0.13276031434184676, + "grad_norm": 0.5968371033668518, + "learning_rate": 4.980737034470836e-06, + "loss": 0.605, + "step": 901 + }, + { + "epoch": 0.13290766208251473, + "grad_norm": 0.614309549331665, + "learning_rate": 4.980688969210366e-06, + "loss": 0.6141, + "step": 902 + }, + { + "epoch": 0.13305500982318272, + "grad_norm": 0.6697075963020325, + "learning_rate": 4.9806408442904905e-06, + "loss": 0.6366, + "step": 903 + }, + { + "epoch": 0.13320235756385068, + "grad_norm": 0.6114403605461121, + "learning_rate": 4.9805926597123665e-06, + "loss": 0.6363, + "step": 904 + }, + { + "epoch": 0.13334970530451867, + "grad_norm": 0.5777339935302734, + "learning_rate": 4.980544415477152e-06, + "loss": 0.626, + "step": 905 + }, + { + "epoch": 0.13349705304518664, + "grad_norm": 0.611530065536499, + "learning_rate": 4.980496111586007e-06, + "loss": 0.6377, + "step": 906 + }, + { + "epoch": 0.13364440078585463, + "grad_norm": 0.5731481313705444, + "learning_rate": 4.980447748040095e-06, + "loss": 0.6406, + "step": 907 + }, + { + "epoch": 0.1337917485265226, + "grad_norm": 0.6394562125205994, + "learning_rate": 4.980399324840578e-06, + "loss": 0.6805, + "step": 908 + }, + { + "epoch": 0.13393909626719058, + "grad_norm": 0.6083534955978394, + "learning_rate": 4.98035084198862e-06, + "loss": 0.6335, + "step": 909 + }, + { + "epoch": 0.13408644400785855, + "grad_norm": 0.6065772771835327, + "learning_rate": 4.9803022994853875e-06, + "loss": 0.6579, + "step": 910 + }, + { + "epoch": 0.1342337917485265, + "grad_norm": 0.6025118827819824, + "learning_rate": 4.980253697332048e-06, + "loss": 0.6465, + "step": 911 + }, + { + "epoch": 0.1343811394891945, + "grad_norm": 0.6094956994056702, + "learning_rate": 4.980205035529772e-06, + "loss": 0.6332, + "step": 912 + }, + { + "epoch": 0.13452848722986246, + "grad_norm": 0.5950258374214172, + "learning_rate": 4.980156314079727e-06, + "loss": 0.6742, + "step": 913 + }, + { + "epoch": 0.13467583497053046, + "grad_norm": 0.5714655518531799, + "learning_rate": 4.980107532983085e-06, + "loss": 0.6366, + "step": 914 + }, + { + "epoch": 0.13482318271119842, + "grad_norm": 0.5975852608680725, + "learning_rate": 4.980058692241021e-06, + "loss": 0.6436, + "step": 915 + }, + { + "epoch": 0.1349705304518664, + "grad_norm": 0.6084606051445007, + "learning_rate": 4.980009791854709e-06, + "loss": 0.6296, + "step": 916 + }, + { + "epoch": 0.13511787819253437, + "grad_norm": 0.5953097939491272, + "learning_rate": 4.979960831825325e-06, + "loss": 0.6416, + "step": 917 + }, + { + "epoch": 0.13526522593320237, + "grad_norm": 0.5766756534576416, + "learning_rate": 4.979911812154044e-06, + "loss": 0.6509, + "step": 918 + }, + { + "epoch": 0.13541257367387033, + "grad_norm": 0.5935989022254944, + "learning_rate": 4.979862732842049e-06, + "loss": 0.6412, + "step": 919 + }, + { + "epoch": 0.13555992141453832, + "grad_norm": 0.5963611006736755, + "learning_rate": 4.979813593890518e-06, + "loss": 0.5995, + "step": 920 + }, + { + "epoch": 0.13570726915520628, + "grad_norm": 0.6338916420936584, + "learning_rate": 4.979764395300634e-06, + "loss": 0.6046, + "step": 921 + }, + { + "epoch": 0.13585461689587428, + "grad_norm": 0.5820133686065674, + "learning_rate": 4.979715137073578e-06, + "loss": 0.6368, + "step": 922 + }, + { + "epoch": 0.13600196463654224, + "grad_norm": 0.6245938539505005, + "learning_rate": 4.9796658192105365e-06, + "loss": 0.6197, + "step": 923 + }, + { + "epoch": 0.13614931237721023, + "grad_norm": 0.5824450850486755, + "learning_rate": 4.979616441712695e-06, + "loss": 0.6404, + "step": 924 + }, + { + "epoch": 0.1362966601178782, + "grad_norm": 0.6389758586883545, + "learning_rate": 4.979567004581241e-06, + "loss": 0.6638, + "step": 925 + }, + { + "epoch": 0.13644400785854616, + "grad_norm": 0.6089726090431213, + "learning_rate": 4.9795175078173635e-06, + "loss": 0.6204, + "step": 926 + }, + { + "epoch": 0.13659135559921415, + "grad_norm": 0.6075071692466736, + "learning_rate": 4.979467951422253e-06, + "loss": 0.6453, + "step": 927 + }, + { + "epoch": 0.1367387033398821, + "grad_norm": 0.5840855836868286, + "learning_rate": 4.979418335397101e-06, + "loss": 0.6387, + "step": 928 + }, + { + "epoch": 0.1368860510805501, + "grad_norm": 0.6046027541160583, + "learning_rate": 4.979368659743102e-06, + "loss": 0.6222, + "step": 929 + }, + { + "epoch": 0.13703339882121807, + "grad_norm": 0.5927652716636658, + "learning_rate": 4.979318924461449e-06, + "loss": 0.6317, + "step": 930 + }, + { + "epoch": 0.13718074656188606, + "grad_norm": 0.61641526222229, + "learning_rate": 4.979269129553338e-06, + "loss": 0.6196, + "step": 931 + }, + { + "epoch": 0.13732809430255402, + "grad_norm": 0.590711772441864, + "learning_rate": 4.979219275019968e-06, + "loss": 0.5912, + "step": 932 + }, + { + "epoch": 0.137475442043222, + "grad_norm": 0.6354338526725769, + "learning_rate": 4.979169360862537e-06, + "loss": 0.63, + "step": 933 + }, + { + "epoch": 0.13762278978388998, + "grad_norm": 0.596445620059967, + "learning_rate": 4.9791193870822465e-06, + "loss": 0.6501, + "step": 934 + }, + { + "epoch": 0.13777013752455797, + "grad_norm": 0.6093775629997253, + "learning_rate": 4.979069353680297e-06, + "loss": 0.6151, + "step": 935 + }, + { + "epoch": 0.13791748526522593, + "grad_norm": 0.6995539665222168, + "learning_rate": 4.9790192606578914e-06, + "loss": 0.6255, + "step": 936 + }, + { + "epoch": 0.13806483300589392, + "grad_norm": 0.6123045086860657, + "learning_rate": 4.978969108016236e-06, + "loss": 0.5909, + "step": 937 + }, + { + "epoch": 0.13821218074656189, + "grad_norm": 0.5978010892868042, + "learning_rate": 4.978918895756536e-06, + "loss": 0.5845, + "step": 938 + }, + { + "epoch": 0.13835952848722985, + "grad_norm": 0.6293134093284607, + "learning_rate": 4.9788686238799996e-06, + "loss": 0.652, + "step": 939 + }, + { + "epoch": 0.13850687622789784, + "grad_norm": 0.577775776386261, + "learning_rate": 4.978818292387836e-06, + "loss": 0.6273, + "step": 940 + }, + { + "epoch": 0.1386542239685658, + "grad_norm": 0.6014806628227234, + "learning_rate": 4.978767901281255e-06, + "loss": 0.6248, + "step": 941 + }, + { + "epoch": 0.1388015717092338, + "grad_norm": 0.6091002821922302, + "learning_rate": 4.9787174505614675e-06, + "loss": 0.6366, + "step": 942 + }, + { + "epoch": 0.13894891944990176, + "grad_norm": 0.5748556852340698, + "learning_rate": 4.9786669402296885e-06, + "loss": 0.6203, + "step": 943 + }, + { + "epoch": 0.13909626719056975, + "grad_norm": 0.6209672093391418, + "learning_rate": 4.978616370287132e-06, + "loss": 0.6237, + "step": 944 + }, + { + "epoch": 0.1392436149312377, + "grad_norm": 0.6001598238945007, + "learning_rate": 4.978565740735014e-06, + "loss": 0.6384, + "step": 945 + }, + { + "epoch": 0.1393909626719057, + "grad_norm": 0.5860715508460999, + "learning_rate": 4.978515051574554e-06, + "loss": 0.621, + "step": 946 + }, + { + "epoch": 0.13953831041257367, + "grad_norm": 0.6240447163581848, + "learning_rate": 4.978464302806968e-06, + "loss": 0.6183, + "step": 947 + }, + { + "epoch": 0.13968565815324166, + "grad_norm": 0.6172810792922974, + "learning_rate": 4.978413494433478e-06, + "loss": 0.6369, + "step": 948 + }, + { + "epoch": 0.13983300589390962, + "grad_norm": 0.6129052042961121, + "learning_rate": 4.978362626455307e-06, + "loss": 0.6469, + "step": 949 + }, + { + "epoch": 0.13998035363457761, + "grad_norm": 0.5770387053489685, + "learning_rate": 4.9783116988736765e-06, + "loss": 0.6329, + "step": 950 + }, + { + "epoch": 0.14012770137524558, + "grad_norm": 0.631916344165802, + "learning_rate": 4.9782607116898115e-06, + "loss": 0.6269, + "step": 951 + }, + { + "epoch": 0.14027504911591357, + "grad_norm": 0.5906457304954529, + "learning_rate": 4.97820966490494e-06, + "loss": 0.601, + "step": 952 + }, + { + "epoch": 0.14042239685658153, + "grad_norm": 0.5898656249046326, + "learning_rate": 4.978158558520288e-06, + "loss": 0.6583, + "step": 953 + }, + { + "epoch": 0.1405697445972495, + "grad_norm": 0.604338526725769, + "learning_rate": 4.9781073925370845e-06, + "loss": 0.651, + "step": 954 + }, + { + "epoch": 0.1407170923379175, + "grad_norm": 0.5860520005226135, + "learning_rate": 4.9780561669565605e-06, + "loss": 0.6559, + "step": 955 + }, + { + "epoch": 0.14086444007858545, + "grad_norm": 0.607391357421875, + "learning_rate": 4.978004881779949e-06, + "loss": 0.6393, + "step": 956 + }, + { + "epoch": 0.14101178781925344, + "grad_norm": 0.6195008158683777, + "learning_rate": 4.977953537008481e-06, + "loss": 0.6309, + "step": 957 + }, + { + "epoch": 0.1411591355599214, + "grad_norm": 0.6062892079353333, + "learning_rate": 4.9779021326433934e-06, + "loss": 0.6127, + "step": 958 + }, + { + "epoch": 0.1413064833005894, + "grad_norm": 0.5798046588897705, + "learning_rate": 4.977850668685922e-06, + "loss": 0.595, + "step": 959 + }, + { + "epoch": 0.14145383104125736, + "grad_norm": 0.5814330577850342, + "learning_rate": 4.977799145137304e-06, + "loss": 0.624, + "step": 960 + }, + { + "epoch": 0.14160117878192535, + "grad_norm": 0.6803590655326843, + "learning_rate": 4.977747561998778e-06, + "loss": 0.6306, + "step": 961 + }, + { + "epoch": 0.14174852652259332, + "grad_norm": 0.5722382068634033, + "learning_rate": 4.977695919271586e-06, + "loss": 0.6422, + "step": 962 + }, + { + "epoch": 0.1418958742632613, + "grad_norm": 0.5968260765075684, + "learning_rate": 4.977644216956969e-06, + "loss": 0.6326, + "step": 963 + }, + { + "epoch": 0.14204322200392927, + "grad_norm": 0.6105520725250244, + "learning_rate": 4.977592455056171e-06, + "loss": 0.6375, + "step": 964 + }, + { + "epoch": 0.14219056974459726, + "grad_norm": 0.60184246301651, + "learning_rate": 4.977540633570436e-06, + "loss": 0.6567, + "step": 965 + }, + { + "epoch": 0.14233791748526523, + "grad_norm": 0.6044562458992004, + "learning_rate": 4.977488752501011e-06, + "loss": 0.6362, + "step": 966 + }, + { + "epoch": 0.1424852652259332, + "grad_norm": 0.6089996695518494, + "learning_rate": 4.9774368118491435e-06, + "loss": 0.6574, + "step": 967 + }, + { + "epoch": 0.14263261296660118, + "grad_norm": 0.5903562903404236, + "learning_rate": 4.977384811616083e-06, + "loss": 0.6605, + "step": 968 + }, + { + "epoch": 0.14277996070726914, + "grad_norm": 0.5687375068664551, + "learning_rate": 4.977332751803079e-06, + "loss": 0.6474, + "step": 969 + }, + { + "epoch": 0.14292730844793713, + "grad_norm": 0.6095196008682251, + "learning_rate": 4.977280632411385e-06, + "loss": 0.5983, + "step": 970 + }, + { + "epoch": 0.1430746561886051, + "grad_norm": 0.5911668539047241, + "learning_rate": 4.977228453442253e-06, + "loss": 0.6141, + "step": 971 + }, + { + "epoch": 0.1432220039292731, + "grad_norm": 0.5993618369102478, + "learning_rate": 4.977176214896939e-06, + "loss": 0.6305, + "step": 972 + }, + { + "epoch": 0.14336935166994105, + "grad_norm": 0.5818612575531006, + "learning_rate": 4.9771239167767e-06, + "loss": 0.6002, + "step": 973 + }, + { + "epoch": 0.14351669941060904, + "grad_norm": 0.6325139999389648, + "learning_rate": 4.977071559082791e-06, + "loss": 0.6008, + "step": 974 + }, + { + "epoch": 0.143664047151277, + "grad_norm": 0.5868161916732788, + "learning_rate": 4.977019141816473e-06, + "loss": 0.6417, + "step": 975 + }, + { + "epoch": 0.143811394891945, + "grad_norm": 0.6134438514709473, + "learning_rate": 4.976966664979007e-06, + "loss": 0.6093, + "step": 976 + }, + { + "epoch": 0.14395874263261296, + "grad_norm": 0.6413354873657227, + "learning_rate": 4.976914128571653e-06, + "loss": 0.6179, + "step": 977 + }, + { + "epoch": 0.14410609037328095, + "grad_norm": 0.5753635168075562, + "learning_rate": 4.976861532595678e-06, + "loss": 0.6239, + "step": 978 + }, + { + "epoch": 0.14425343811394892, + "grad_norm": 0.5788527727127075, + "learning_rate": 4.976808877052344e-06, + "loss": 0.6691, + "step": 979 + }, + { + "epoch": 0.1444007858546169, + "grad_norm": 0.6535327434539795, + "learning_rate": 4.976756161942918e-06, + "loss": 0.6324, + "step": 980 + }, + { + "epoch": 0.14454813359528487, + "grad_norm": 0.6348285675048828, + "learning_rate": 4.9767033872686685e-06, + "loss": 0.6117, + "step": 981 + }, + { + "epoch": 0.14469548133595284, + "grad_norm": 0.6196520924568176, + "learning_rate": 4.976650553030864e-06, + "loss": 0.6512, + "step": 982 + }, + { + "epoch": 0.14484282907662083, + "grad_norm": 0.5863232016563416, + "learning_rate": 4.976597659230775e-06, + "loss": 0.6056, + "step": 983 + }, + { + "epoch": 0.1449901768172888, + "grad_norm": 0.5951036214828491, + "learning_rate": 4.9765447058696745e-06, + "loss": 0.6267, + "step": 984 + }, + { + "epoch": 0.14513752455795678, + "grad_norm": 0.6085488796234131, + "learning_rate": 4.976491692948835e-06, + "loss": 0.6395, + "step": 985 + }, + { + "epoch": 0.14528487229862475, + "grad_norm": 0.6360968351364136, + "learning_rate": 4.976438620469532e-06, + "loss": 0.6703, + "step": 986 + }, + { + "epoch": 0.14543222003929274, + "grad_norm": 0.5790683031082153, + "learning_rate": 4.9763854884330416e-06, + "loss": 0.5763, + "step": 987 + }, + { + "epoch": 0.1455795677799607, + "grad_norm": 0.6134780049324036, + "learning_rate": 4.976332296840642e-06, + "loss": 0.6156, + "step": 988 + }, + { + "epoch": 0.1457269155206287, + "grad_norm": 0.6717028021812439, + "learning_rate": 4.976279045693612e-06, + "loss": 0.5857, + "step": 989 + }, + { + "epoch": 0.14587426326129665, + "grad_norm": 0.5734598636627197, + "learning_rate": 4.976225734993231e-06, + "loss": 0.6547, + "step": 990 + }, + { + "epoch": 0.14602161100196465, + "grad_norm": 0.6468273997306824, + "learning_rate": 4.9761723647407845e-06, + "loss": 0.6708, + "step": 991 + }, + { + "epoch": 0.1461689587426326, + "grad_norm": 0.6045759320259094, + "learning_rate": 4.976118934937553e-06, + "loss": 0.6098, + "step": 992 + }, + { + "epoch": 0.1463163064833006, + "grad_norm": 0.6020183563232422, + "learning_rate": 4.9760654455848225e-06, + "loss": 0.6388, + "step": 993 + }, + { + "epoch": 0.14646365422396856, + "grad_norm": 0.6027597784996033, + "learning_rate": 4.976011896683879e-06, + "loss": 0.6212, + "step": 994 + }, + { + "epoch": 0.14661100196463656, + "grad_norm": 0.5785106420516968, + "learning_rate": 4.975958288236011e-06, + "loss": 0.6157, + "step": 995 + }, + { + "epoch": 0.14675834970530452, + "grad_norm": 0.6027930974960327, + "learning_rate": 4.975904620242508e-06, + "loss": 0.6707, + "step": 996 + }, + { + "epoch": 0.14690569744597248, + "grad_norm": 0.6017693281173706, + "learning_rate": 4.97585089270466e-06, + "loss": 0.6419, + "step": 997 + }, + { + "epoch": 0.14705304518664047, + "grad_norm": 0.606748104095459, + "learning_rate": 4.97579710562376e-06, + "loss": 0.6263, + "step": 998 + }, + { + "epoch": 0.14720039292730844, + "grad_norm": 0.5958097577095032, + "learning_rate": 4.975743259001099e-06, + "loss": 0.5876, + "step": 999 + }, + { + "epoch": 0.14734774066797643, + "grad_norm": 0.6290190815925598, + "learning_rate": 4.975689352837975e-06, + "loss": 0.6276, + "step": 1000 + }, + { + "epoch": 0.1474950884086444, + "grad_norm": 0.6086926460266113, + "learning_rate": 4.975635387135684e-06, + "loss": 0.6057, + "step": 1001 + }, + { + "epoch": 0.14764243614931238, + "grad_norm": 0.5708885192871094, + "learning_rate": 4.975581361895521e-06, + "loss": 0.6284, + "step": 1002 + }, + { + "epoch": 0.14778978388998035, + "grad_norm": 0.5520373582839966, + "learning_rate": 4.975527277118789e-06, + "loss": 0.5968, + "step": 1003 + }, + { + "epoch": 0.14793713163064834, + "grad_norm": 0.5767863392829895, + "learning_rate": 4.975473132806787e-06, + "loss": 0.6215, + "step": 1004 + }, + { + "epoch": 0.1480844793713163, + "grad_norm": 0.6098321080207825, + "learning_rate": 4.9754189289608165e-06, + "loss": 0.6279, + "step": 1005 + }, + { + "epoch": 0.1482318271119843, + "grad_norm": 0.6349608898162842, + "learning_rate": 4.975364665582182e-06, + "loss": 0.591, + "step": 1006 + }, + { + "epoch": 0.14837917485265226, + "grad_norm": 0.6272415518760681, + "learning_rate": 4.9753103426721885e-06, + "loss": 0.6181, + "step": 1007 + }, + { + "epoch": 0.14852652259332025, + "grad_norm": 0.6054591536521912, + "learning_rate": 4.975255960232141e-06, + "loss": 0.6298, + "step": 1008 + }, + { + "epoch": 0.1486738703339882, + "grad_norm": 0.6217292547225952, + "learning_rate": 4.975201518263351e-06, + "loss": 0.6276, + "step": 1009 + }, + { + "epoch": 0.14882121807465618, + "grad_norm": 0.6061639785766602, + "learning_rate": 4.975147016767123e-06, + "loss": 0.6279, + "step": 1010 + }, + { + "epoch": 0.14896856581532417, + "grad_norm": 0.6465947031974792, + "learning_rate": 4.975092455744772e-06, + "loss": 0.5987, + "step": 1011 + }, + { + "epoch": 0.14911591355599213, + "grad_norm": 0.5874618291854858, + "learning_rate": 4.975037835197608e-06, + "loss": 0.6403, + "step": 1012 + }, + { + "epoch": 0.14926326129666012, + "grad_norm": 0.6129521727561951, + "learning_rate": 4.974983155126945e-06, + "loss": 0.6434, + "step": 1013 + }, + { + "epoch": 0.14941060903732808, + "grad_norm": 0.5913174152374268, + "learning_rate": 4.974928415534097e-06, + "loss": 0.6051, + "step": 1014 + }, + { + "epoch": 0.14955795677799608, + "grad_norm": 0.5770914554595947, + "learning_rate": 4.974873616420381e-06, + "loss": 0.6587, + "step": 1015 + }, + { + "epoch": 0.14970530451866404, + "grad_norm": 0.5744360685348511, + "learning_rate": 4.974818757787117e-06, + "loss": 0.6228, + "step": 1016 + }, + { + "epoch": 0.14985265225933203, + "grad_norm": 0.6097941994667053, + "learning_rate": 4.974763839635622e-06, + "loss": 0.641, + "step": 1017 + }, + { + "epoch": 0.15, + "grad_norm": 0.598760724067688, + "learning_rate": 4.974708861967217e-06, + "loss": 0.6319, + "step": 1018 + }, + { + "epoch": 0.15014734774066799, + "grad_norm": 0.5596571564674377, + "learning_rate": 4.974653824783225e-06, + "loss": 0.6018, + "step": 1019 + }, + { + "epoch": 0.15029469548133595, + "grad_norm": 0.5799163579940796, + "learning_rate": 4.9745987280849695e-06, + "loss": 0.611, + "step": 1020 + }, + { + "epoch": 0.15044204322200394, + "grad_norm": 0.6585241556167603, + "learning_rate": 4.974543571873775e-06, + "loss": 0.6518, + "step": 1021 + }, + { + "epoch": 0.1505893909626719, + "grad_norm": 0.5956674814224243, + "learning_rate": 4.974488356150967e-06, + "loss": 0.6268, + "step": 1022 + }, + { + "epoch": 0.1507367387033399, + "grad_norm": 0.6177791357040405, + "learning_rate": 4.974433080917876e-06, + "loss": 0.5742, + "step": 1023 + }, + { + "epoch": 0.15088408644400786, + "grad_norm": 0.590190589427948, + "learning_rate": 4.974377746175829e-06, + "loss": 0.6436, + "step": 1024 + }, + { + "epoch": 0.15103143418467582, + "grad_norm": 0.6481848359107971, + "learning_rate": 4.974322351926159e-06, + "loss": 0.6308, + "step": 1025 + }, + { + "epoch": 0.1511787819253438, + "grad_norm": 0.5816704034805298, + "learning_rate": 4.974266898170197e-06, + "loss": 0.5969, + "step": 1026 + }, + { + "epoch": 0.15132612966601178, + "grad_norm": 0.5859255194664001, + "learning_rate": 4.974211384909276e-06, + "loss": 0.6612, + "step": 1027 + }, + { + "epoch": 0.15147347740667977, + "grad_norm": 0.6180488467216492, + "learning_rate": 4.974155812144731e-06, + "loss": 0.6278, + "step": 1028 + }, + { + "epoch": 0.15162082514734773, + "grad_norm": 0.6264434456825256, + "learning_rate": 4.9741001798779e-06, + "loss": 0.6389, + "step": 1029 + }, + { + "epoch": 0.15176817288801572, + "grad_norm": 0.5886472463607788, + "learning_rate": 4.97404448811012e-06, + "loss": 0.6337, + "step": 1030 + }, + { + "epoch": 0.1519155206286837, + "grad_norm": 0.6102797985076904, + "learning_rate": 4.97398873684273e-06, + "loss": 0.6495, + "step": 1031 + }, + { + "epoch": 0.15206286836935168, + "grad_norm": 0.6115193367004395, + "learning_rate": 4.973932926077072e-06, + "loss": 0.6388, + "step": 1032 + }, + { + "epoch": 0.15221021611001964, + "grad_norm": 0.6223381161689758, + "learning_rate": 4.973877055814487e-06, + "loss": 0.6282, + "step": 1033 + }, + { + "epoch": 0.15235756385068763, + "grad_norm": 0.6050751805305481, + "learning_rate": 4.9738211260563195e-06, + "loss": 0.6288, + "step": 1034 + }, + { + "epoch": 0.1525049115913556, + "grad_norm": 0.6015203595161438, + "learning_rate": 4.973765136803914e-06, + "loss": 0.6246, + "step": 1035 + }, + { + "epoch": 0.1526522593320236, + "grad_norm": 0.6330953240394592, + "learning_rate": 4.973709088058617e-06, + "loss": 0.6395, + "step": 1036 + }, + { + "epoch": 0.15279960707269155, + "grad_norm": 0.5864235162734985, + "learning_rate": 4.973652979821777e-06, + "loss": 0.6076, + "step": 1037 + }, + { + "epoch": 0.15294695481335951, + "grad_norm": 0.5996755957603455, + "learning_rate": 4.973596812094742e-06, + "loss": 0.6185, + "step": 1038 + }, + { + "epoch": 0.1530943025540275, + "grad_norm": 0.582129716873169, + "learning_rate": 4.973540584878865e-06, + "loss": 0.6054, + "step": 1039 + }, + { + "epoch": 0.15324165029469547, + "grad_norm": 0.5914654731750488, + "learning_rate": 4.973484298175497e-06, + "loss": 0.6074, + "step": 1040 + }, + { + "epoch": 0.15338899803536346, + "grad_norm": 0.6030542254447937, + "learning_rate": 4.973427951985992e-06, + "loss": 0.6305, + "step": 1041 + }, + { + "epoch": 0.15353634577603142, + "grad_norm": 0.5978639721870422, + "learning_rate": 4.9733715463117035e-06, + "loss": 0.6405, + "step": 1042 + }, + { + "epoch": 0.15368369351669942, + "grad_norm": 0.5960456728935242, + "learning_rate": 4.973315081153991e-06, + "loss": 0.661, + "step": 1043 + }, + { + "epoch": 0.15383104125736738, + "grad_norm": 0.6542711853981018, + "learning_rate": 4.97325855651421e-06, + "loss": 0.6437, + "step": 1044 + }, + { + "epoch": 0.15397838899803537, + "grad_norm": 0.6215084195137024, + "learning_rate": 4.9732019723937204e-06, + "loss": 0.5812, + "step": 1045 + }, + { + "epoch": 0.15412573673870333, + "grad_norm": 0.5918387174606323, + "learning_rate": 4.973145328793885e-06, + "loss": 0.6051, + "step": 1046 + }, + { + "epoch": 0.15427308447937133, + "grad_norm": 0.6318201422691345, + "learning_rate": 4.973088625716063e-06, + "loss": 0.6475, + "step": 1047 + }, + { + "epoch": 0.1544204322200393, + "grad_norm": 0.5975579619407654, + "learning_rate": 4.9730318631616195e-06, + "loss": 0.6331, + "step": 1048 + }, + { + "epoch": 0.15456777996070728, + "grad_norm": 0.5830935835838318, + "learning_rate": 4.97297504113192e-06, + "loss": 0.6205, + "step": 1049 + }, + { + "epoch": 0.15471512770137524, + "grad_norm": 0.6209080219268799, + "learning_rate": 4.972918159628331e-06, + "loss": 0.6496, + "step": 1050 + }, + { + "epoch": 0.15486247544204323, + "grad_norm": 0.6268972158432007, + "learning_rate": 4.97286121865222e-06, + "loss": 0.6524, + "step": 1051 + }, + { + "epoch": 0.1550098231827112, + "grad_norm": 0.627147912979126, + "learning_rate": 4.9728042182049565e-06, + "loss": 0.6447, + "step": 1052 + }, + { + "epoch": 0.15515717092337916, + "grad_norm": 0.6160210371017456, + "learning_rate": 4.972747158287911e-06, + "loss": 0.6399, + "step": 1053 + }, + { + "epoch": 0.15530451866404715, + "grad_norm": 0.6298843622207642, + "learning_rate": 4.9726900389024555e-06, + "loss": 0.6252, + "step": 1054 + }, + { + "epoch": 0.15545186640471512, + "grad_norm": 0.6481191515922546, + "learning_rate": 4.9726328600499654e-06, + "loss": 0.6032, + "step": 1055 + }, + { + "epoch": 0.1555992141453831, + "grad_norm": 0.5910577774047852, + "learning_rate": 4.9725756217318145e-06, + "loss": 0.6288, + "step": 1056 + }, + { + "epoch": 0.15574656188605107, + "grad_norm": 0.5964633822441101, + "learning_rate": 4.972518323949379e-06, + "loss": 0.6302, + "step": 1057 + }, + { + "epoch": 0.15589390962671906, + "grad_norm": 0.5897382497787476, + "learning_rate": 4.972460966704039e-06, + "loss": 0.6377, + "step": 1058 + }, + { + "epoch": 0.15604125736738703, + "grad_norm": 0.6096847653388977, + "learning_rate": 4.972403549997171e-06, + "loss": 0.6258, + "step": 1059 + }, + { + "epoch": 0.15618860510805502, + "grad_norm": 0.6179301738739014, + "learning_rate": 4.972346073830158e-06, + "loss": 0.6272, + "step": 1060 + }, + { + "epoch": 0.15633595284872298, + "grad_norm": 0.6067973971366882, + "learning_rate": 4.972288538204381e-06, + "loss": 0.6523, + "step": 1061 + }, + { + "epoch": 0.15648330058939097, + "grad_norm": 0.6187566518783569, + "learning_rate": 4.9722309431212245e-06, + "loss": 0.6289, + "step": 1062 + }, + { + "epoch": 0.15663064833005894, + "grad_norm": 0.5844240784645081, + "learning_rate": 4.972173288582073e-06, + "loss": 0.6144, + "step": 1063 + }, + { + "epoch": 0.15677799607072693, + "grad_norm": 0.6197131872177124, + "learning_rate": 4.972115574588314e-06, + "loss": 0.607, + "step": 1064 + }, + { + "epoch": 0.1569253438113949, + "grad_norm": 0.6088480949401855, + "learning_rate": 4.972057801141334e-06, + "loss": 0.6255, + "step": 1065 + }, + { + "epoch": 0.15707269155206288, + "grad_norm": 0.6032307744026184, + "learning_rate": 4.971999968242525e-06, + "loss": 0.6353, + "step": 1066 + }, + { + "epoch": 0.15722003929273085, + "grad_norm": 0.6515870094299316, + "learning_rate": 4.971942075893274e-06, + "loss": 0.6408, + "step": 1067 + }, + { + "epoch": 0.1573673870333988, + "grad_norm": 0.5988897681236267, + "learning_rate": 4.971884124094977e-06, + "loss": 0.6287, + "step": 1068 + }, + { + "epoch": 0.1575147347740668, + "grad_norm": 0.5980631113052368, + "learning_rate": 4.971826112849026e-06, + "loss": 0.6461, + "step": 1069 + }, + { + "epoch": 0.15766208251473476, + "grad_norm": 0.5907476544380188, + "learning_rate": 4.971768042156817e-06, + "loss": 0.6314, + "step": 1070 + }, + { + "epoch": 0.15780943025540276, + "grad_norm": 0.5962628126144409, + "learning_rate": 4.971709912019744e-06, + "loss": 0.5995, + "step": 1071 + }, + { + "epoch": 0.15795677799607072, + "grad_norm": 0.6018052697181702, + "learning_rate": 4.971651722439209e-06, + "loss": 0.6197, + "step": 1072 + }, + { + "epoch": 0.1581041257367387, + "grad_norm": 0.6338629126548767, + "learning_rate": 4.971593473416608e-06, + "loss": 0.6398, + "step": 1073 + }, + { + "epoch": 0.15825147347740667, + "grad_norm": 0.6405065059661865, + "learning_rate": 4.971535164953344e-06, + "loss": 0.6549, + "step": 1074 + }, + { + "epoch": 0.15839882121807466, + "grad_norm": 0.6006082892417908, + "learning_rate": 4.971476797050818e-06, + "loss": 0.5978, + "step": 1075 + }, + { + "epoch": 0.15854616895874263, + "grad_norm": 0.6042784452438354, + "learning_rate": 4.971418369710434e-06, + "loss": 0.6576, + "step": 1076 + }, + { + "epoch": 0.15869351669941062, + "grad_norm": 0.6117986440658569, + "learning_rate": 4.971359882933598e-06, + "loss": 0.597, + "step": 1077 + }, + { + "epoch": 0.15884086444007858, + "grad_norm": 0.6443344950675964, + "learning_rate": 4.971301336721715e-06, + "loss": 0.6294, + "step": 1078 + }, + { + "epoch": 0.15898821218074657, + "grad_norm": 0.5946574807167053, + "learning_rate": 4.971242731076195e-06, + "loss": 0.6471, + "step": 1079 + }, + { + "epoch": 0.15913555992141454, + "grad_norm": 0.627579391002655, + "learning_rate": 4.971184065998446e-06, + "loss": 0.6437, + "step": 1080 + }, + { + "epoch": 0.1592829076620825, + "grad_norm": 0.6069404482841492, + "learning_rate": 4.97112534148988e-06, + "loss": 0.6301, + "step": 1081 + }, + { + "epoch": 0.1594302554027505, + "grad_norm": 0.6249756813049316, + "learning_rate": 4.9710665575519075e-06, + "loss": 0.6234, + "step": 1082 + }, + { + "epoch": 0.15957760314341846, + "grad_norm": 0.589683473110199, + "learning_rate": 4.971007714185943e-06, + "loss": 0.6543, + "step": 1083 + }, + { + "epoch": 0.15972495088408645, + "grad_norm": 0.5745710134506226, + "learning_rate": 4.970948811393404e-06, + "loss": 0.6292, + "step": 1084 + }, + { + "epoch": 0.1598722986247544, + "grad_norm": 0.6294600963592529, + "learning_rate": 4.970889849175703e-06, + "loss": 0.6487, + "step": 1085 + }, + { + "epoch": 0.1600196463654224, + "grad_norm": 0.5942264795303345, + "learning_rate": 4.97083082753426e-06, + "loss": 0.6301, + "step": 1086 + }, + { + "epoch": 0.16016699410609037, + "grad_norm": 0.6096281409263611, + "learning_rate": 4.970771746470495e-06, + "loss": 0.6509, + "step": 1087 + }, + { + "epoch": 0.16031434184675836, + "grad_norm": 0.6213005781173706, + "learning_rate": 4.970712605985829e-06, + "loss": 0.6362, + "step": 1088 + }, + { + "epoch": 0.16046168958742632, + "grad_norm": 0.584473192691803, + "learning_rate": 4.970653406081684e-06, + "loss": 0.6568, + "step": 1089 + }, + { + "epoch": 0.1606090373280943, + "grad_norm": 0.6391384601593018, + "learning_rate": 4.970594146759482e-06, + "loss": 0.6412, + "step": 1090 + }, + { + "epoch": 0.16075638506876228, + "grad_norm": 0.5879412889480591, + "learning_rate": 4.97053482802065e-06, + "loss": 0.6168, + "step": 1091 + }, + { + "epoch": 0.16090373280943027, + "grad_norm": 0.5910617113113403, + "learning_rate": 4.970475449866614e-06, + "loss": 0.6119, + "step": 1092 + }, + { + "epoch": 0.16105108055009823, + "grad_norm": 0.59885174036026, + "learning_rate": 4.970416012298803e-06, + "loss": 0.5507, + "step": 1093 + }, + { + "epoch": 0.16119842829076622, + "grad_norm": 0.5764373540878296, + "learning_rate": 4.970356515318645e-06, + "loss": 0.6375, + "step": 1094 + }, + { + "epoch": 0.16134577603143418, + "grad_norm": 0.5786505937576294, + "learning_rate": 4.970296958927571e-06, + "loss": 0.5703, + "step": 1095 + }, + { + "epoch": 0.16149312377210215, + "grad_norm": 0.5879568457603455, + "learning_rate": 4.970237343127014e-06, + "loss": 0.6163, + "step": 1096 + }, + { + "epoch": 0.16164047151277014, + "grad_norm": 0.654149055480957, + "learning_rate": 4.970177667918408e-06, + "loss": 0.6194, + "step": 1097 + }, + { + "epoch": 0.1617878192534381, + "grad_norm": 0.5936648845672607, + "learning_rate": 4.970117933303188e-06, + "loss": 0.646, + "step": 1098 + }, + { + "epoch": 0.1619351669941061, + "grad_norm": 0.6259503960609436, + "learning_rate": 4.9700581392827895e-06, + "loss": 0.6368, + "step": 1099 + }, + { + "epoch": 0.16208251473477406, + "grad_norm": 0.5986748933792114, + "learning_rate": 4.969998285858651e-06, + "loss": 0.6078, + "step": 1100 + }, + { + "epoch": 0.16222986247544205, + "grad_norm": 0.5991204977035522, + "learning_rate": 4.969938373032213e-06, + "loss": 0.6348, + "step": 1101 + }, + { + "epoch": 0.16237721021611, + "grad_norm": 0.6032591462135315, + "learning_rate": 4.969878400804915e-06, + "loss": 0.6706, + "step": 1102 + }, + { + "epoch": 0.162524557956778, + "grad_norm": 0.6095359921455383, + "learning_rate": 4.969818369178201e-06, + "loss": 0.5936, + "step": 1103 + }, + { + "epoch": 0.16267190569744597, + "grad_norm": 0.6307313442230225, + "learning_rate": 4.969758278153513e-06, + "loss": 0.6083, + "step": 1104 + }, + { + "epoch": 0.16281925343811396, + "grad_norm": 0.6188304424285889, + "learning_rate": 4.969698127732296e-06, + "loss": 0.6225, + "step": 1105 + }, + { + "epoch": 0.16296660117878192, + "grad_norm": 0.5986102223396301, + "learning_rate": 4.969637917915998e-06, + "loss": 0.6242, + "step": 1106 + }, + { + "epoch": 0.1631139489194499, + "grad_norm": 0.6025019288063049, + "learning_rate": 4.969577648706066e-06, + "loss": 0.6411, + "step": 1107 + }, + { + "epoch": 0.16326129666011788, + "grad_norm": 0.6186534762382507, + "learning_rate": 4.969517320103951e-06, + "loss": 0.6609, + "step": 1108 + }, + { + "epoch": 0.16340864440078584, + "grad_norm": 0.6170161366462708, + "learning_rate": 4.9694569321111016e-06, + "loss": 0.6179, + "step": 1109 + }, + { + "epoch": 0.16355599214145383, + "grad_norm": 0.5876601338386536, + "learning_rate": 4.969396484728972e-06, + "loss": 0.6158, + "step": 1110 + }, + { + "epoch": 0.1637033398821218, + "grad_norm": 0.6012449264526367, + "learning_rate": 4.9693359779590145e-06, + "loss": 0.6434, + "step": 1111 + }, + { + "epoch": 0.1638506876227898, + "grad_norm": 0.5883602499961853, + "learning_rate": 4.9692754118026855e-06, + "loss": 0.6313, + "step": 1112 + }, + { + "epoch": 0.16399803536345775, + "grad_norm": 0.5691156983375549, + "learning_rate": 4.969214786261441e-06, + "loss": 0.6413, + "step": 1113 + }, + { + "epoch": 0.16414538310412574, + "grad_norm": 0.6026689410209656, + "learning_rate": 4.969154101336738e-06, + "loss": 0.6711, + "step": 1114 + }, + { + "epoch": 0.1642927308447937, + "grad_norm": 0.6108425259590149, + "learning_rate": 4.969093357030038e-06, + "loss": 0.6485, + "step": 1115 + }, + { + "epoch": 0.1644400785854617, + "grad_norm": 0.599412739276886, + "learning_rate": 4.969032553342801e-06, + "loss": 0.6339, + "step": 1116 + }, + { + "epoch": 0.16458742632612966, + "grad_norm": 0.6033245921134949, + "learning_rate": 4.968971690276488e-06, + "loss": 0.6368, + "step": 1117 + }, + { + "epoch": 0.16473477406679765, + "grad_norm": 0.6081422567367554, + "learning_rate": 4.9689107678325645e-06, + "loss": 0.6115, + "step": 1118 + }, + { + "epoch": 0.16488212180746561, + "grad_norm": 0.6131630539894104, + "learning_rate": 4.968849786012495e-06, + "loss": 0.6372, + "step": 1119 + }, + { + "epoch": 0.1650294695481336, + "grad_norm": 0.5969972610473633, + "learning_rate": 4.968788744817746e-06, + "loss": 0.6619, + "step": 1120 + }, + { + "epoch": 0.16517681728880157, + "grad_norm": 0.6143292784690857, + "learning_rate": 4.968727644249787e-06, + "loss": 0.6558, + "step": 1121 + }, + { + "epoch": 0.16532416502946956, + "grad_norm": 0.6255223155021667, + "learning_rate": 4.968666484310084e-06, + "loss": 0.5844, + "step": 1122 + }, + { + "epoch": 0.16547151277013752, + "grad_norm": 0.5959665775299072, + "learning_rate": 4.968605265000111e-06, + "loss": 0.6426, + "step": 1123 + }, + { + "epoch": 0.1656188605108055, + "grad_norm": 0.6134980916976929, + "learning_rate": 4.968543986321339e-06, + "loss": 0.6105, + "step": 1124 + }, + { + "epoch": 0.16576620825147348, + "grad_norm": 0.5855924487113953, + "learning_rate": 4.968482648275242e-06, + "loss": 0.6491, + "step": 1125 + }, + { + "epoch": 0.16591355599214144, + "grad_norm": 0.6217854022979736, + "learning_rate": 4.968421250863295e-06, + "loss": 0.6068, + "step": 1126 + }, + { + "epoch": 0.16606090373280943, + "grad_norm": 0.6047441959381104, + "learning_rate": 4.968359794086974e-06, + "loss": 0.6308, + "step": 1127 + }, + { + "epoch": 0.1662082514734774, + "grad_norm": 0.6074826121330261, + "learning_rate": 4.968298277947759e-06, + "loss": 0.6567, + "step": 1128 + }, + { + "epoch": 0.1663555992141454, + "grad_norm": 0.6084855794906616, + "learning_rate": 4.968236702447128e-06, + "loss": 0.6275, + "step": 1129 + }, + { + "epoch": 0.16650294695481335, + "grad_norm": 0.6043924689292908, + "learning_rate": 4.9681750675865614e-06, + "loss": 0.6633, + "step": 1130 + }, + { + "epoch": 0.16665029469548134, + "grad_norm": 0.6216224431991577, + "learning_rate": 4.968113373367542e-06, + "loss": 0.6118, + "step": 1131 + }, + { + "epoch": 0.1667976424361493, + "grad_norm": 0.6275046467781067, + "learning_rate": 4.968051619791554e-06, + "loss": 0.6023, + "step": 1132 + }, + { + "epoch": 0.1669449901768173, + "grad_norm": 0.6193328499794006, + "learning_rate": 4.967989806860082e-06, + "loss": 0.5966, + "step": 1133 + }, + { + "epoch": 0.16709233791748526, + "grad_norm": 0.6500139832496643, + "learning_rate": 4.967927934574612e-06, + "loss": 0.6542, + "step": 1134 + }, + { + "epoch": 0.16723968565815325, + "grad_norm": 0.6140546798706055, + "learning_rate": 4.967866002936633e-06, + "loss": 0.6317, + "step": 1135 + }, + { + "epoch": 0.16738703339882122, + "grad_norm": 0.6086215972900391, + "learning_rate": 4.9678040119476346e-06, + "loss": 0.6489, + "step": 1136 + }, + { + "epoch": 0.1675343811394892, + "grad_norm": 0.6497405767440796, + "learning_rate": 4.967741961609107e-06, + "loss": 0.6176, + "step": 1137 + }, + { + "epoch": 0.16768172888015717, + "grad_norm": 0.613511323928833, + "learning_rate": 4.967679851922543e-06, + "loss": 0.6188, + "step": 1138 + }, + { + "epoch": 0.16782907662082514, + "grad_norm": 0.6169940829277039, + "learning_rate": 4.9676176828894355e-06, + "loss": 0.6455, + "step": 1139 + }, + { + "epoch": 0.16797642436149313, + "grad_norm": 0.6004725098609924, + "learning_rate": 4.96755545451128e-06, + "loss": 0.6107, + "step": 1140 + }, + { + "epoch": 0.1681237721021611, + "grad_norm": 0.591448187828064, + "learning_rate": 4.967493166789574e-06, + "loss": 0.6252, + "step": 1141 + }, + { + "epoch": 0.16827111984282908, + "grad_norm": 0.6024001240730286, + "learning_rate": 4.967430819725813e-06, + "loss": 0.6287, + "step": 1142 + }, + { + "epoch": 0.16841846758349704, + "grad_norm": 0.6010298728942871, + "learning_rate": 4.9673684133215e-06, + "loss": 0.6309, + "step": 1143 + }, + { + "epoch": 0.16856581532416504, + "grad_norm": 0.5904011130332947, + "learning_rate": 4.9673059475781325e-06, + "loss": 0.5958, + "step": 1144 + }, + { + "epoch": 0.168713163064833, + "grad_norm": 0.6683719754219055, + "learning_rate": 4.967243422497216e-06, + "loss": 0.656, + "step": 1145 + }, + { + "epoch": 0.168860510805501, + "grad_norm": 0.6318342685699463, + "learning_rate": 4.96718083808025e-06, + "loss": 0.6416, + "step": 1146 + }, + { + "epoch": 0.16900785854616895, + "grad_norm": 0.6373676061630249, + "learning_rate": 4.967118194328744e-06, + "loss": 0.6377, + "step": 1147 + }, + { + "epoch": 0.16915520628683695, + "grad_norm": 0.6170668601989746, + "learning_rate": 4.967055491244201e-06, + "loss": 0.6466, + "step": 1148 + }, + { + "epoch": 0.1693025540275049, + "grad_norm": 0.6159094572067261, + "learning_rate": 4.966992728828131e-06, + "loss": 0.6483, + "step": 1149 + }, + { + "epoch": 0.1694499017681729, + "grad_norm": 0.6051827669143677, + "learning_rate": 4.9669299070820445e-06, + "loss": 0.6382, + "step": 1150 + }, + { + "epoch": 0.16959724950884086, + "grad_norm": 0.6146190166473389, + "learning_rate": 4.966867026007449e-06, + "loss": 0.6149, + "step": 1151 + }, + { + "epoch": 0.16974459724950883, + "grad_norm": 0.6414499878883362, + "learning_rate": 4.96680408560586e-06, + "loss": 0.6294, + "step": 1152 + }, + { + "epoch": 0.16989194499017682, + "grad_norm": 0.6096742749214172, + "learning_rate": 4.966741085878788e-06, + "loss": 0.6293, + "step": 1153 + }, + { + "epoch": 0.17003929273084478, + "grad_norm": 0.5975481867790222, + "learning_rate": 4.966678026827751e-06, + "loss": 0.6091, + "step": 1154 + }, + { + "epoch": 0.17018664047151277, + "grad_norm": 0.5877688527107239, + "learning_rate": 4.966614908454264e-06, + "loss": 0.6142, + "step": 1155 + }, + { + "epoch": 0.17033398821218074, + "grad_norm": 0.6068018078804016, + "learning_rate": 4.966551730759845e-06, + "loss": 0.6509, + "step": 1156 + }, + { + "epoch": 0.17048133595284873, + "grad_norm": 0.6016820669174194, + "learning_rate": 4.966488493746014e-06, + "loss": 0.5794, + "step": 1157 + }, + { + "epoch": 0.1706286836935167, + "grad_norm": 0.6211217045783997, + "learning_rate": 4.966425197414293e-06, + "loss": 0.6011, + "step": 1158 + }, + { + "epoch": 0.17077603143418468, + "grad_norm": 0.6153913140296936, + "learning_rate": 4.966361841766201e-06, + "loss": 0.6273, + "step": 1159 + }, + { + "epoch": 0.17092337917485265, + "grad_norm": 0.5989111661911011, + "learning_rate": 4.9662984268032644e-06, + "loss": 0.6152, + "step": 1160 + }, + { + "epoch": 0.17107072691552064, + "grad_norm": 0.568160355091095, + "learning_rate": 4.966234952527007e-06, + "loss": 0.5862, + "step": 1161 + }, + { + "epoch": 0.1712180746561886, + "grad_norm": 0.5935263633728027, + "learning_rate": 4.966171418938957e-06, + "loss": 0.5662, + "step": 1162 + }, + { + "epoch": 0.1713654223968566, + "grad_norm": 0.576259434223175, + "learning_rate": 4.966107826040639e-06, + "loss": 0.6122, + "step": 1163 + }, + { + "epoch": 0.17151277013752456, + "grad_norm": 0.6024925112724304, + "learning_rate": 4.966044173833585e-06, + "loss": 0.6104, + "step": 1164 + }, + { + "epoch": 0.17166011787819255, + "grad_norm": 0.6071736216545105, + "learning_rate": 4.965980462319326e-06, + "loss": 0.6025, + "step": 1165 + }, + { + "epoch": 0.1718074656188605, + "grad_norm": 0.6114234924316406, + "learning_rate": 4.9659166914993935e-06, + "loss": 0.6191, + "step": 1166 + }, + { + "epoch": 0.17195481335952847, + "grad_norm": 0.5989375114440918, + "learning_rate": 4.965852861375321e-06, + "loss": 0.604, + "step": 1167 + }, + { + "epoch": 0.17210216110019647, + "grad_norm": 0.594012975692749, + "learning_rate": 4.965788971948643e-06, + "loss": 0.5785, + "step": 1168 + }, + { + "epoch": 0.17224950884086443, + "grad_norm": 0.6004170179367065, + "learning_rate": 4.965725023220897e-06, + "loss": 0.6276, + "step": 1169 + }, + { + "epoch": 0.17239685658153242, + "grad_norm": 0.5789831280708313, + "learning_rate": 4.965661015193621e-06, + "loss": 0.5982, + "step": 1170 + }, + { + "epoch": 0.17254420432220038, + "grad_norm": 0.5989968776702881, + "learning_rate": 4.965596947868355e-06, + "loss": 0.6308, + "step": 1171 + }, + { + "epoch": 0.17269155206286838, + "grad_norm": 0.6376698613166809, + "learning_rate": 4.965532821246638e-06, + "loss": 0.6325, + "step": 1172 + }, + { + "epoch": 0.17283889980353634, + "grad_norm": 0.5954023003578186, + "learning_rate": 4.9654686353300125e-06, + "loss": 0.6272, + "step": 1173 + }, + { + "epoch": 0.17298624754420433, + "grad_norm": 0.6179423332214355, + "learning_rate": 4.965404390120024e-06, + "loss": 0.5921, + "step": 1174 + }, + { + "epoch": 0.1731335952848723, + "grad_norm": 0.5981164574623108, + "learning_rate": 4.965340085618214e-06, + "loss": 0.6255, + "step": 1175 + }, + { + "epoch": 0.17328094302554028, + "grad_norm": 0.5905123353004456, + "learning_rate": 4.965275721826133e-06, + "loss": 0.6494, + "step": 1176 + }, + { + "epoch": 0.17342829076620825, + "grad_norm": 0.5973097085952759, + "learning_rate": 4.965211298745327e-06, + "loss": 0.6196, + "step": 1177 + }, + { + "epoch": 0.17357563850687624, + "grad_norm": 0.5918458700180054, + "learning_rate": 4.965146816377345e-06, + "loss": 0.6058, + "step": 1178 + }, + { + "epoch": 0.1737229862475442, + "grad_norm": 0.6084218621253967, + "learning_rate": 4.96508227472374e-06, + "loss": 0.6297, + "step": 1179 + }, + { + "epoch": 0.1738703339882122, + "grad_norm": 0.5888078212738037, + "learning_rate": 4.96501767378606e-06, + "loss": 0.5701, + "step": 1180 + }, + { + "epoch": 0.17401768172888016, + "grad_norm": 0.6032836437225342, + "learning_rate": 4.9649530135658625e-06, + "loss": 0.6103, + "step": 1181 + }, + { + "epoch": 0.17416502946954812, + "grad_norm": 0.614061713218689, + "learning_rate": 4.964888294064701e-06, + "loss": 0.6045, + "step": 1182 + }, + { + "epoch": 0.1743123772102161, + "grad_norm": 0.6312205791473389, + "learning_rate": 4.9648235152841314e-06, + "loss": 0.6173, + "step": 1183 + }, + { + "epoch": 0.17445972495088408, + "grad_norm": 0.6035560965538025, + "learning_rate": 4.964758677225713e-06, + "loss": 0.6442, + "step": 1184 + }, + { + "epoch": 0.17460707269155207, + "grad_norm": 0.7031545639038086, + "learning_rate": 4.964693779891005e-06, + "loss": 0.5896, + "step": 1185 + }, + { + "epoch": 0.17475442043222003, + "grad_norm": 0.6026126146316528, + "learning_rate": 4.964628823281566e-06, + "loss": 0.6324, + "step": 1186 + }, + { + "epoch": 0.17490176817288802, + "grad_norm": 0.5955009460449219, + "learning_rate": 4.964563807398961e-06, + "loss": 0.6077, + "step": 1187 + }, + { + "epoch": 0.17504911591355599, + "grad_norm": 0.6012455224990845, + "learning_rate": 4.964498732244752e-06, + "loss": 0.6052, + "step": 1188 + }, + { + "epoch": 0.17519646365422398, + "grad_norm": 0.5953582525253296, + "learning_rate": 4.964433597820505e-06, + "loss": 0.6188, + "step": 1189 + }, + { + "epoch": 0.17534381139489194, + "grad_norm": 0.621030330657959, + "learning_rate": 4.9643684041277855e-06, + "loss": 0.5993, + "step": 1190 + }, + { + "epoch": 0.17549115913555993, + "grad_norm": 0.602660059928894, + "learning_rate": 4.964303151168162e-06, + "loss": 0.5972, + "step": 1191 + }, + { + "epoch": 0.1756385068762279, + "grad_norm": 0.6288620233535767, + "learning_rate": 4.9642378389432035e-06, + "loss": 0.6256, + "step": 1192 + }, + { + "epoch": 0.1757858546168959, + "grad_norm": 0.5935959219932556, + "learning_rate": 4.96417246745448e-06, + "loss": 0.6023, + "step": 1193 + }, + { + "epoch": 0.17593320235756385, + "grad_norm": 0.6180009245872498, + "learning_rate": 4.964107036703565e-06, + "loss": 0.615, + "step": 1194 + }, + { + "epoch": 0.17608055009823181, + "grad_norm": 0.6029854416847229, + "learning_rate": 4.964041546692031e-06, + "loss": 0.5548, + "step": 1195 + }, + { + "epoch": 0.1762278978388998, + "grad_norm": 0.6314036250114441, + "learning_rate": 4.963975997421454e-06, + "loss": 0.6409, + "step": 1196 + }, + { + "epoch": 0.17637524557956777, + "grad_norm": 0.5733265280723572, + "learning_rate": 4.9639103888934106e-06, + "loss": 0.6054, + "step": 1197 + }, + { + "epoch": 0.17652259332023576, + "grad_norm": 0.6167635917663574, + "learning_rate": 4.963844721109477e-06, + "loss": 0.6278, + "step": 1198 + }, + { + "epoch": 0.17666994106090372, + "grad_norm": 0.5923928618431091, + "learning_rate": 4.963778994071233e-06, + "loss": 0.6392, + "step": 1199 + }, + { + "epoch": 0.17681728880157171, + "grad_norm": 0.6009294390678406, + "learning_rate": 4.963713207780261e-06, + "loss": 0.6165, + "step": 1200 + }, + { + "epoch": 0.17696463654223968, + "grad_norm": 0.6028009057044983, + "learning_rate": 4.9636473622381415e-06, + "loss": 0.6209, + "step": 1201 + }, + { + "epoch": 0.17711198428290767, + "grad_norm": 0.5907905697822571, + "learning_rate": 4.963581457446458e-06, + "loss": 0.6219, + "step": 1202 + }, + { + "epoch": 0.17725933202357563, + "grad_norm": 0.6188703179359436, + "learning_rate": 4.963515493406796e-06, + "loss": 0.6228, + "step": 1203 + }, + { + "epoch": 0.17740667976424362, + "grad_norm": 0.6523192524909973, + "learning_rate": 4.963449470120743e-06, + "loss": 0.6405, + "step": 1204 + }, + { + "epoch": 0.1775540275049116, + "grad_norm": 0.6339799165725708, + "learning_rate": 4.9633833875898845e-06, + "loss": 0.6068, + "step": 1205 + }, + { + "epoch": 0.17770137524557958, + "grad_norm": 0.6293594241142273, + "learning_rate": 4.9633172458158115e-06, + "loss": 0.6449, + "step": 1206 + }, + { + "epoch": 0.17784872298624754, + "grad_norm": 0.5806838870048523, + "learning_rate": 4.9632510448001135e-06, + "loss": 0.6113, + "step": 1207 + }, + { + "epoch": 0.17799607072691553, + "grad_norm": 0.5843467712402344, + "learning_rate": 4.963184784544384e-06, + "loss": 0.6518, + "step": 1208 + }, + { + "epoch": 0.1781434184675835, + "grad_norm": 0.6183896064758301, + "learning_rate": 4.963118465050215e-06, + "loss": 0.6181, + "step": 1209 + }, + { + "epoch": 0.17829076620825146, + "grad_norm": 0.6421794295310974, + "learning_rate": 4.963052086319202e-06, + "loss": 0.6087, + "step": 1210 + }, + { + "epoch": 0.17843811394891945, + "grad_norm": 0.6108830571174622, + "learning_rate": 4.962985648352943e-06, + "loss": 0.63, + "step": 1211 + }, + { + "epoch": 0.17858546168958742, + "grad_norm": 0.6043266654014587, + "learning_rate": 4.962919151153033e-06, + "loss": 0.6186, + "step": 1212 + }, + { + "epoch": 0.1787328094302554, + "grad_norm": 0.5608899593353271, + "learning_rate": 4.9628525947210725e-06, + "loss": 0.6469, + "step": 1213 + }, + { + "epoch": 0.17888015717092337, + "grad_norm": 0.5806758999824524, + "learning_rate": 4.962785979058663e-06, + "loss": 0.6386, + "step": 1214 + }, + { + "epoch": 0.17902750491159136, + "grad_norm": 0.6170945763587952, + "learning_rate": 4.962719304167406e-06, + "loss": 0.6406, + "step": 1215 + }, + { + "epoch": 0.17917485265225933, + "grad_norm": 0.5934296250343323, + "learning_rate": 4.962652570048904e-06, + "loss": 0.6403, + "step": 1216 + }, + { + "epoch": 0.17932220039292732, + "grad_norm": 0.6104062795639038, + "learning_rate": 4.962585776704763e-06, + "loss": 0.6091, + "step": 1217 + }, + { + "epoch": 0.17946954813359528, + "grad_norm": 0.6242760419845581, + "learning_rate": 4.96251892413659e-06, + "loss": 0.6036, + "step": 1218 + }, + { + "epoch": 0.17961689587426327, + "grad_norm": 0.6175158023834229, + "learning_rate": 4.96245201234599e-06, + "loss": 0.5828, + "step": 1219 + }, + { + "epoch": 0.17976424361493124, + "grad_norm": 0.6388654708862305, + "learning_rate": 4.962385041334575e-06, + "loss": 0.6412, + "step": 1220 + }, + { + "epoch": 0.17991159135559923, + "grad_norm": 0.6134159564971924, + "learning_rate": 4.962318011103956e-06, + "loss": 0.6515, + "step": 1221 + }, + { + "epoch": 0.1800589390962672, + "grad_norm": 0.5959429144859314, + "learning_rate": 4.9622509216557425e-06, + "loss": 0.6502, + "step": 1222 + }, + { + "epoch": 0.18020628683693515, + "grad_norm": 0.6211436986923218, + "learning_rate": 4.962183772991549e-06, + "loss": 0.6219, + "step": 1223 + }, + { + "epoch": 0.18035363457760314, + "grad_norm": 0.6549066305160522, + "learning_rate": 4.962116565112991e-06, + "loss": 0.6007, + "step": 1224 + }, + { + "epoch": 0.1805009823182711, + "grad_norm": 0.6244359612464905, + "learning_rate": 4.962049298021684e-06, + "loss": 0.6196, + "step": 1225 + }, + { + "epoch": 0.1806483300589391, + "grad_norm": 0.620944082736969, + "learning_rate": 4.961981971719246e-06, + "loss": 0.6368, + "step": 1226 + }, + { + "epoch": 0.18079567779960706, + "grad_norm": 0.615524411201477, + "learning_rate": 4.961914586207297e-06, + "loss": 0.6095, + "step": 1227 + }, + { + "epoch": 0.18094302554027505, + "grad_norm": 0.6622191071510315, + "learning_rate": 4.961847141487457e-06, + "loss": 0.5916, + "step": 1228 + }, + { + "epoch": 0.18109037328094302, + "grad_norm": 0.6060539484024048, + "learning_rate": 4.961779637561347e-06, + "loss": 0.6282, + "step": 1229 + }, + { + "epoch": 0.181237721021611, + "grad_norm": 0.6023911237716675, + "learning_rate": 4.961712074430592e-06, + "loss": 0.611, + "step": 1230 + }, + { + "epoch": 0.18138506876227897, + "grad_norm": 0.6261144280433655, + "learning_rate": 4.961644452096817e-06, + "loss": 0.6328, + "step": 1231 + }, + { + "epoch": 0.18153241650294696, + "grad_norm": 0.603447437286377, + "learning_rate": 4.961576770561646e-06, + "loss": 0.5978, + "step": 1232 + }, + { + "epoch": 0.18167976424361493, + "grad_norm": 0.5897770524024963, + "learning_rate": 4.961509029826709e-06, + "loss": 0.601, + "step": 1233 + }, + { + "epoch": 0.18182711198428292, + "grad_norm": 0.5914242267608643, + "learning_rate": 4.961441229893634e-06, + "loss": 0.6159, + "step": 1234 + }, + { + "epoch": 0.18197445972495088, + "grad_norm": 0.5973886847496033, + "learning_rate": 4.961373370764052e-06, + "loss": 0.6237, + "step": 1235 + }, + { + "epoch": 0.18212180746561887, + "grad_norm": 0.6382995247840881, + "learning_rate": 4.961305452439595e-06, + "loss": 0.5997, + "step": 1236 + }, + { + "epoch": 0.18226915520628684, + "grad_norm": 0.6072067618370056, + "learning_rate": 4.9612374749218965e-06, + "loss": 0.6192, + "step": 1237 + }, + { + "epoch": 0.1824165029469548, + "grad_norm": 0.6078459620475769, + "learning_rate": 4.96116943821259e-06, + "loss": 0.6064, + "step": 1238 + }, + { + "epoch": 0.1825638506876228, + "grad_norm": 0.703377366065979, + "learning_rate": 4.961101342313315e-06, + "loss": 0.5952, + "step": 1239 + }, + { + "epoch": 0.18271119842829076, + "grad_norm": 0.6318250298500061, + "learning_rate": 4.961033187225705e-06, + "loss": 0.6304, + "step": 1240 + }, + { + "epoch": 0.18285854616895875, + "grad_norm": 0.6167752146720886, + "learning_rate": 4.960964972951403e-06, + "loss": 0.631, + "step": 1241 + }, + { + "epoch": 0.1830058939096267, + "grad_norm": 0.591474711894989, + "learning_rate": 4.960896699492046e-06, + "loss": 0.6013, + "step": 1242 + }, + { + "epoch": 0.1831532416502947, + "grad_norm": 0.574491560459137, + "learning_rate": 4.960828366849278e-06, + "loss": 0.6023, + "step": 1243 + }, + { + "epoch": 0.18330058939096266, + "grad_norm": 0.6029043197631836, + "learning_rate": 4.960759975024743e-06, + "loss": 0.6312, + "step": 1244 + }, + { + "epoch": 0.18344793713163066, + "grad_norm": 0.6021742224693298, + "learning_rate": 4.960691524020084e-06, + "loss": 0.6273, + "step": 1245 + }, + { + "epoch": 0.18359528487229862, + "grad_norm": 0.6042230725288391, + "learning_rate": 4.960623013836947e-06, + "loss": 0.6272, + "step": 1246 + }, + { + "epoch": 0.1837426326129666, + "grad_norm": 0.5772675275802612, + "learning_rate": 4.960554444476983e-06, + "loss": 0.6339, + "step": 1247 + }, + { + "epoch": 0.18388998035363457, + "grad_norm": 0.5951935648918152, + "learning_rate": 4.960485815941836e-06, + "loss": 0.6463, + "step": 1248 + }, + { + "epoch": 0.18403732809430257, + "grad_norm": 0.5720517039299011, + "learning_rate": 4.960417128233161e-06, + "loss": 0.6377, + "step": 1249 + }, + { + "epoch": 0.18418467583497053, + "grad_norm": 0.6018413305282593, + "learning_rate": 4.960348381352608e-06, + "loss": 0.6054, + "step": 1250 + }, + { + "epoch": 0.18433202357563852, + "grad_norm": 0.6145224571228027, + "learning_rate": 4.96027957530183e-06, + "loss": 0.6544, + "step": 1251 + }, + { + "epoch": 0.18447937131630648, + "grad_norm": 0.6213505268096924, + "learning_rate": 4.960210710082481e-06, + "loss": 0.6283, + "step": 1252 + }, + { + "epoch": 0.18462671905697445, + "grad_norm": 0.6686877608299255, + "learning_rate": 4.96014178569622e-06, + "loss": 0.6522, + "step": 1253 + }, + { + "epoch": 0.18477406679764244, + "grad_norm": 0.5984253287315369, + "learning_rate": 4.9600728021447016e-06, + "loss": 0.6071, + "step": 1254 + }, + { + "epoch": 0.1849214145383104, + "grad_norm": 0.6004149317741394, + "learning_rate": 4.960003759429588e-06, + "loss": 0.6064, + "step": 1255 + }, + { + "epoch": 0.1850687622789784, + "grad_norm": 0.5770591497421265, + "learning_rate": 4.959934657552536e-06, + "loss": 0.6105, + "step": 1256 + }, + { + "epoch": 0.18521611001964636, + "grad_norm": 0.5695452690124512, + "learning_rate": 4.95986549651521e-06, + "loss": 0.6078, + "step": 1257 + }, + { + "epoch": 0.18536345776031435, + "grad_norm": 0.5967349410057068, + "learning_rate": 4.959796276319273e-06, + "loss": 0.6218, + "step": 1258 + }, + { + "epoch": 0.1855108055009823, + "grad_norm": 0.6093056201934814, + "learning_rate": 4.959726996966389e-06, + "loss": 0.6253, + "step": 1259 + }, + { + "epoch": 0.1856581532416503, + "grad_norm": 0.5877696871757507, + "learning_rate": 4.959657658458223e-06, + "loss": 0.6268, + "step": 1260 + }, + { + "epoch": 0.18580550098231827, + "grad_norm": 0.6013649702072144, + "learning_rate": 4.959588260796446e-06, + "loss": 0.6253, + "step": 1261 + }, + { + "epoch": 0.18595284872298626, + "grad_norm": 0.6110309362411499, + "learning_rate": 4.959518803982724e-06, + "loss": 0.6221, + "step": 1262 + }, + { + "epoch": 0.18610019646365422, + "grad_norm": 0.6336286067962646, + "learning_rate": 4.959449288018728e-06, + "loss": 0.6471, + "step": 1263 + }, + { + "epoch": 0.1862475442043222, + "grad_norm": 0.6089473366737366, + "learning_rate": 4.959379712906131e-06, + "loss": 0.605, + "step": 1264 + }, + { + "epoch": 0.18639489194499018, + "grad_norm": 0.5584856271743774, + "learning_rate": 4.959310078646605e-06, + "loss": 0.6196, + "step": 1265 + }, + { + "epoch": 0.18654223968565814, + "grad_norm": 0.6072677373886108, + "learning_rate": 4.959240385241825e-06, + "loss": 0.6129, + "step": 1266 + }, + { + "epoch": 0.18668958742632613, + "grad_norm": 0.6222429871559143, + "learning_rate": 4.959170632693468e-06, + "loss": 0.6153, + "step": 1267 + }, + { + "epoch": 0.1868369351669941, + "grad_norm": 0.6503329873085022, + "learning_rate": 4.959100821003209e-06, + "loss": 0.6304, + "step": 1268 + }, + { + "epoch": 0.18698428290766209, + "grad_norm": 0.5849472880363464, + "learning_rate": 4.95903095017273e-06, + "loss": 0.6519, + "step": 1269 + }, + { + "epoch": 0.18713163064833005, + "grad_norm": 0.5943260192871094, + "learning_rate": 4.95896102020371e-06, + "loss": 0.6062, + "step": 1270 + }, + { + "epoch": 0.18727897838899804, + "grad_norm": 0.6213210821151733, + "learning_rate": 4.95889103109783e-06, + "loss": 0.6379, + "step": 1271 + }, + { + "epoch": 0.187426326129666, + "grad_norm": 0.6245825886726379, + "learning_rate": 4.958820982856775e-06, + "loss": 0.5927, + "step": 1272 + }, + { + "epoch": 0.187573673870334, + "grad_norm": 0.6015775203704834, + "learning_rate": 4.958750875482228e-06, + "loss": 0.6022, + "step": 1273 + }, + { + "epoch": 0.18772102161100196, + "grad_norm": 0.5743094682693481, + "learning_rate": 4.958680708975875e-06, + "loss": 0.6055, + "step": 1274 + }, + { + "epoch": 0.18786836935166995, + "grad_norm": 0.5995045900344849, + "learning_rate": 4.958610483339405e-06, + "loss": 0.6188, + "step": 1275 + }, + { + "epoch": 0.18801571709233791, + "grad_norm": 0.6292736530303955, + "learning_rate": 4.958540198574506e-06, + "loss": 0.6079, + "step": 1276 + }, + { + "epoch": 0.1881630648330059, + "grad_norm": 0.614361584186554, + "learning_rate": 4.958469854682868e-06, + "loss": 0.6051, + "step": 1277 + }, + { + "epoch": 0.18831041257367387, + "grad_norm": 0.6038268804550171, + "learning_rate": 4.958399451666183e-06, + "loss": 0.6005, + "step": 1278 + }, + { + "epoch": 0.18845776031434186, + "grad_norm": 0.6071839928627014, + "learning_rate": 4.958328989526145e-06, + "loss": 0.6318, + "step": 1279 + }, + { + "epoch": 0.18860510805500982, + "grad_norm": 0.5858475565910339, + "learning_rate": 4.958258468264447e-06, + "loss": 0.6188, + "step": 1280 + }, + { + "epoch": 0.1887524557956778, + "grad_norm": 0.5754815936088562, + "learning_rate": 4.958187887882786e-06, + "loss": 0.5887, + "step": 1281 + }, + { + "epoch": 0.18889980353634578, + "grad_norm": 0.5743491053581238, + "learning_rate": 4.95811724838286e-06, + "loss": 0.6297, + "step": 1282 + }, + { + "epoch": 0.18904715127701374, + "grad_norm": 0.5912784337997437, + "learning_rate": 4.958046549766367e-06, + "loss": 0.5994, + "step": 1283 + }, + { + "epoch": 0.18919449901768173, + "grad_norm": 0.632948637008667, + "learning_rate": 4.957975792035007e-06, + "loss": 0.6257, + "step": 1284 + }, + { + "epoch": 0.1893418467583497, + "grad_norm": 0.6816422343254089, + "learning_rate": 4.957904975190482e-06, + "loss": 0.6465, + "step": 1285 + }, + { + "epoch": 0.1894891944990177, + "grad_norm": 0.6088347434997559, + "learning_rate": 4.9578340992344955e-06, + "loss": 0.6304, + "step": 1286 + }, + { + "epoch": 0.18963654223968565, + "grad_norm": 0.6259744763374329, + "learning_rate": 4.957763164168751e-06, + "loss": 0.6127, + "step": 1287 + }, + { + "epoch": 0.18978388998035364, + "grad_norm": 0.6279564499855042, + "learning_rate": 4.957692169994955e-06, + "loss": 0.5622, + "step": 1288 + }, + { + "epoch": 0.1899312377210216, + "grad_norm": 0.5909742712974548, + "learning_rate": 4.957621116714816e-06, + "loss": 0.6596, + "step": 1289 + }, + { + "epoch": 0.1900785854616896, + "grad_norm": 0.6104810833930969, + "learning_rate": 4.957550004330041e-06, + "loss": 0.6364, + "step": 1290 + }, + { + "epoch": 0.19022593320235756, + "grad_norm": 0.5897259712219238, + "learning_rate": 4.957478832842342e-06, + "loss": 0.6477, + "step": 1291 + }, + { + "epoch": 0.19037328094302555, + "grad_norm": 0.60964035987854, + "learning_rate": 4.957407602253429e-06, + "loss": 0.6525, + "step": 1292 + }, + { + "epoch": 0.19052062868369352, + "grad_norm": 0.6052617430686951, + "learning_rate": 4.957336312565016e-06, + "loss": 0.6046, + "step": 1293 + }, + { + "epoch": 0.19066797642436148, + "grad_norm": 0.5937590003013611, + "learning_rate": 4.957264963778817e-06, + "loss": 0.6386, + "step": 1294 + }, + { + "epoch": 0.19081532416502947, + "grad_norm": 0.6041482090950012, + "learning_rate": 4.957193555896549e-06, + "loss": 0.609, + "step": 1295 + }, + { + "epoch": 0.19096267190569743, + "grad_norm": 0.6236125826835632, + "learning_rate": 4.957122088919928e-06, + "loss": 0.5948, + "step": 1296 + }, + { + "epoch": 0.19111001964636543, + "grad_norm": 0.6406474113464355, + "learning_rate": 4.957050562850673e-06, + "loss": 0.6013, + "step": 1297 + }, + { + "epoch": 0.1912573673870334, + "grad_norm": 0.6186324954032898, + "learning_rate": 4.956978977690504e-06, + "loss": 0.6073, + "step": 1298 + }, + { + "epoch": 0.19140471512770138, + "grad_norm": 0.5788116455078125, + "learning_rate": 4.956907333441143e-06, + "loss": 0.5951, + "step": 1299 + }, + { + "epoch": 0.19155206286836934, + "grad_norm": 0.6113336682319641, + "learning_rate": 4.956835630104314e-06, + "loss": 0.6287, + "step": 1300 + }, + { + "epoch": 0.19169941060903734, + "grad_norm": 0.5926626324653625, + "learning_rate": 4.95676386768174e-06, + "loss": 0.6069, + "step": 1301 + }, + { + "epoch": 0.1918467583497053, + "grad_norm": 0.59721839427948, + "learning_rate": 4.9566920461751466e-06, + "loss": 0.6331, + "step": 1302 + }, + { + "epoch": 0.1919941060903733, + "grad_norm": 0.6379821300506592, + "learning_rate": 4.956620165586262e-06, + "loss": 0.6106, + "step": 1303 + }, + { + "epoch": 0.19214145383104125, + "grad_norm": 0.5988343954086304, + "learning_rate": 4.9565482259168156e-06, + "loss": 0.6254, + "step": 1304 + }, + { + "epoch": 0.19228880157170924, + "grad_norm": 0.5958719253540039, + "learning_rate": 4.956476227168535e-06, + "loss": 0.6357, + "step": 1305 + }, + { + "epoch": 0.1924361493123772, + "grad_norm": 0.581247866153717, + "learning_rate": 4.956404169343155e-06, + "loss": 0.5926, + "step": 1306 + }, + { + "epoch": 0.1925834970530452, + "grad_norm": 0.5646694302558899, + "learning_rate": 4.956332052442405e-06, + "loss": 0.6136, + "step": 1307 + }, + { + "epoch": 0.19273084479371316, + "grad_norm": 0.614494800567627, + "learning_rate": 4.956259876468022e-06, + "loss": 0.612, + "step": 1308 + }, + { + "epoch": 0.19287819253438113, + "grad_norm": 0.6332164406776428, + "learning_rate": 4.9561876414217416e-06, + "loss": 0.6352, + "step": 1309 + }, + { + "epoch": 0.19302554027504912, + "grad_norm": 0.6229962706565857, + "learning_rate": 4.9561153473053e-06, + "loss": 0.6296, + "step": 1310 + }, + { + "epoch": 0.19317288801571708, + "grad_norm": 0.5970346331596375, + "learning_rate": 4.956042994120437e-06, + "loss": 0.637, + "step": 1311 + }, + { + "epoch": 0.19332023575638507, + "grad_norm": 0.6044877767562866, + "learning_rate": 4.955970581868891e-06, + "loss": 0.6387, + "step": 1312 + }, + { + "epoch": 0.19346758349705304, + "grad_norm": 0.6302936673164368, + "learning_rate": 4.955898110552405e-06, + "loss": 0.65, + "step": 1313 + }, + { + "epoch": 0.19361493123772103, + "grad_norm": 0.6416610479354858, + "learning_rate": 4.95582558017272e-06, + "loss": 0.5923, + "step": 1314 + }, + { + "epoch": 0.193762278978389, + "grad_norm": 0.6210614442825317, + "learning_rate": 4.955752990731584e-06, + "loss": 0.6063, + "step": 1315 + }, + { + "epoch": 0.19390962671905698, + "grad_norm": 0.6168908476829529, + "learning_rate": 4.9556803422307384e-06, + "loss": 0.6153, + "step": 1316 + }, + { + "epoch": 0.19405697445972495, + "grad_norm": 0.6443777084350586, + "learning_rate": 4.9556076346719334e-06, + "loss": 0.6339, + "step": 1317 + }, + { + "epoch": 0.19420432220039294, + "grad_norm": 0.5919657349586487, + "learning_rate": 4.955534868056916e-06, + "loss": 0.5806, + "step": 1318 + }, + { + "epoch": 0.1943516699410609, + "grad_norm": 0.6103394031524658, + "learning_rate": 4.9554620423874375e-06, + "loss": 0.6163, + "step": 1319 + }, + { + "epoch": 0.1944990176817289, + "grad_norm": 0.5915734171867371, + "learning_rate": 4.955389157665248e-06, + "loss": 0.6028, + "step": 1320 + }, + { + "epoch": 0.19464636542239686, + "grad_norm": 0.5771839618682861, + "learning_rate": 4.955316213892101e-06, + "loss": 0.6211, + "step": 1321 + }, + { + "epoch": 0.19479371316306485, + "grad_norm": 0.5840791463851929, + "learning_rate": 4.95524321106975e-06, + "loss": 0.5788, + "step": 1322 + }, + { + "epoch": 0.1949410609037328, + "grad_norm": 0.5982716083526611, + "learning_rate": 4.955170149199952e-06, + "loss": 0.5735, + "step": 1323 + }, + { + "epoch": 0.19508840864440077, + "grad_norm": 0.6198409199714661, + "learning_rate": 4.955097028284463e-06, + "loss": 0.6256, + "step": 1324 + }, + { + "epoch": 0.19523575638506876, + "grad_norm": 0.6183931231498718, + "learning_rate": 4.955023848325043e-06, + "loss": 0.6314, + "step": 1325 + }, + { + "epoch": 0.19538310412573673, + "grad_norm": 0.5809248089790344, + "learning_rate": 4.95495060932345e-06, + "loss": 0.6292, + "step": 1326 + }, + { + "epoch": 0.19553045186640472, + "grad_norm": 0.5932889580726624, + "learning_rate": 4.954877311281446e-06, + "loss": 0.6247, + "step": 1327 + }, + { + "epoch": 0.19567779960707268, + "grad_norm": 0.5672548413276672, + "learning_rate": 4.954803954200795e-06, + "loss": 0.598, + "step": 1328 + }, + { + "epoch": 0.19582514734774067, + "grad_norm": 0.5893679261207581, + "learning_rate": 4.95473053808326e-06, + "loss": 0.6339, + "step": 1329 + }, + { + "epoch": 0.19597249508840864, + "grad_norm": 0.6303575038909912, + "learning_rate": 4.954657062930607e-06, + "loss": 0.6195, + "step": 1330 + }, + { + "epoch": 0.19611984282907663, + "grad_norm": 0.5667849779129028, + "learning_rate": 4.9545835287446025e-06, + "loss": 0.608, + "step": 1331 + }, + { + "epoch": 0.1962671905697446, + "grad_norm": 0.5866913199424744, + "learning_rate": 4.954509935527016e-06, + "loss": 0.6165, + "step": 1332 + }, + { + "epoch": 0.19641453831041258, + "grad_norm": 0.5904062390327454, + "learning_rate": 4.9544362832796165e-06, + "loss": 0.6503, + "step": 1333 + }, + { + "epoch": 0.19656188605108055, + "grad_norm": 0.6189970970153809, + "learning_rate": 4.9543625720041756e-06, + "loss": 0.5896, + "step": 1334 + }, + { + "epoch": 0.19670923379174854, + "grad_norm": 0.6123439073562622, + "learning_rate": 4.954288801702466e-06, + "loss": 0.6054, + "step": 1335 + }, + { + "epoch": 0.1968565815324165, + "grad_norm": 0.63816237449646, + "learning_rate": 4.954214972376261e-06, + "loss": 0.6234, + "step": 1336 + }, + { + "epoch": 0.19700392927308447, + "grad_norm": 0.6060817241668701, + "learning_rate": 4.954141084027339e-06, + "loss": 0.6264, + "step": 1337 + }, + { + "epoch": 0.19715127701375246, + "grad_norm": 0.6172388792037964, + "learning_rate": 4.954067136657474e-06, + "loss": 0.5989, + "step": 1338 + }, + { + "epoch": 0.19729862475442042, + "grad_norm": 0.6298989653587341, + "learning_rate": 4.953993130268444e-06, + "loss": 0.6036, + "step": 1339 + }, + { + "epoch": 0.1974459724950884, + "grad_norm": 0.6043915748596191, + "learning_rate": 4.953919064862032e-06, + "loss": 0.6279, + "step": 1340 + }, + { + "epoch": 0.19759332023575638, + "grad_norm": 0.5761882066726685, + "learning_rate": 4.9538449404400165e-06, + "loss": 0.5836, + "step": 1341 + }, + { + "epoch": 0.19774066797642437, + "grad_norm": 0.5667174458503723, + "learning_rate": 4.953770757004181e-06, + "loss": 0.5959, + "step": 1342 + }, + { + "epoch": 0.19788801571709233, + "grad_norm": 0.6644392013549805, + "learning_rate": 4.9536965145563104e-06, + "loss": 0.6093, + "step": 1343 + }, + { + "epoch": 0.19803536345776032, + "grad_norm": 0.5749967098236084, + "learning_rate": 4.953622213098189e-06, + "loss": 0.6006, + "step": 1344 + }, + { + "epoch": 0.19818271119842829, + "grad_norm": 0.6360700130462646, + "learning_rate": 4.953547852631604e-06, + "loss": 0.6213, + "step": 1345 + }, + { + "epoch": 0.19833005893909628, + "grad_norm": 0.6129010319709778, + "learning_rate": 4.953473433158344e-06, + "loss": 0.6191, + "step": 1346 + }, + { + "epoch": 0.19847740667976424, + "grad_norm": 0.5830097794532776, + "learning_rate": 4.9533989546802e-06, + "loss": 0.5889, + "step": 1347 + }, + { + "epoch": 0.19862475442043223, + "grad_norm": 0.5595689415931702, + "learning_rate": 4.953324417198961e-06, + "loss": 0.6396, + "step": 1348 + }, + { + "epoch": 0.1987721021611002, + "grad_norm": 0.59440678358078, + "learning_rate": 4.953249820716421e-06, + "loss": 0.6478, + "step": 1349 + }, + { + "epoch": 0.19891944990176819, + "grad_norm": 0.5922817587852478, + "learning_rate": 4.953175165234372e-06, + "loss": 0.6177, + "step": 1350 + }, + { + "epoch": 0.19906679764243615, + "grad_norm": 0.625850260257721, + "learning_rate": 4.953100450754611e-06, + "loss": 0.6199, + "step": 1351 + }, + { + "epoch": 0.1992141453831041, + "grad_norm": 0.6018819808959961, + "learning_rate": 4.953025677278935e-06, + "loss": 0.5766, + "step": 1352 + }, + { + "epoch": 0.1993614931237721, + "grad_norm": 0.629694402217865, + "learning_rate": 4.952950844809143e-06, + "loss": 0.6505, + "step": 1353 + }, + { + "epoch": 0.19950884086444007, + "grad_norm": 0.6308799386024475, + "learning_rate": 4.952875953347033e-06, + "loss": 0.6119, + "step": 1354 + }, + { + "epoch": 0.19965618860510806, + "grad_norm": 0.6035031080245972, + "learning_rate": 4.952801002894407e-06, + "loss": 0.6284, + "step": 1355 + }, + { + "epoch": 0.19980353634577602, + "grad_norm": 0.5629380941390991, + "learning_rate": 4.952725993453067e-06, + "loss": 0.5953, + "step": 1356 + }, + { + "epoch": 0.19995088408644401, + "grad_norm": 0.6119502186775208, + "learning_rate": 4.952650925024818e-06, + "loss": 0.6109, + "step": 1357 + }, + { + "epoch": 0.20009823182711198, + "grad_norm": 0.6070235371589661, + "learning_rate": 4.952575797611464e-06, + "loss": 0.6118, + "step": 1358 + }, + { + "epoch": 0.20024557956777997, + "grad_norm": 0.6002263426780701, + "learning_rate": 4.952500611214812e-06, + "loss": 0.6208, + "step": 1359 + }, + { + "epoch": 0.20039292730844793, + "grad_norm": 0.6670234799385071, + "learning_rate": 4.952425365836671e-06, + "loss": 0.5993, + "step": 1360 + }, + { + "epoch": 0.20054027504911592, + "grad_norm": 0.6273658871650696, + "learning_rate": 4.95235006147885e-06, + "loss": 0.6141, + "step": 1361 + }, + { + "epoch": 0.2006876227897839, + "grad_norm": 0.610058605670929, + "learning_rate": 4.952274698143161e-06, + "loss": 0.6268, + "step": 1362 + }, + { + "epoch": 0.20083497053045188, + "grad_norm": 0.6109257936477661, + "learning_rate": 4.952199275831414e-06, + "loss": 0.6257, + "step": 1363 + }, + { + "epoch": 0.20098231827111984, + "grad_norm": 0.6110548377037048, + "learning_rate": 4.952123794545427e-06, + "loss": 0.6182, + "step": 1364 + }, + { + "epoch": 0.2011296660117878, + "grad_norm": 0.6145288944244385, + "learning_rate": 4.9520482542870104e-06, + "loss": 0.6464, + "step": 1365 + }, + { + "epoch": 0.2012770137524558, + "grad_norm": 0.573418915271759, + "learning_rate": 4.951972655057985e-06, + "loss": 0.6323, + "step": 1366 + }, + { + "epoch": 0.20142436149312376, + "grad_norm": 0.5866599678993225, + "learning_rate": 4.951896996860167e-06, + "loss": 0.6327, + "step": 1367 + }, + { + "epoch": 0.20157170923379175, + "grad_norm": 0.5788896083831787, + "learning_rate": 4.951821279695376e-06, + "loss": 0.6207, + "step": 1368 + }, + { + "epoch": 0.20171905697445972, + "grad_norm": 0.5986254811286926, + "learning_rate": 4.951745503565433e-06, + "loss": 0.5844, + "step": 1369 + }, + { + "epoch": 0.2018664047151277, + "grad_norm": 0.5967018604278564, + "learning_rate": 4.951669668472161e-06, + "loss": 0.5981, + "step": 1370 + }, + { + "epoch": 0.20201375245579567, + "grad_norm": 0.5695437788963318, + "learning_rate": 4.951593774417384e-06, + "loss": 0.6481, + "step": 1371 + }, + { + "epoch": 0.20216110019646366, + "grad_norm": 0.6180761456489563, + "learning_rate": 4.951517821402925e-06, + "loss": 0.6296, + "step": 1372 + }, + { + "epoch": 0.20230844793713162, + "grad_norm": 0.6345153450965881, + "learning_rate": 4.951441809430613e-06, + "loss": 0.6227, + "step": 1373 + }, + { + "epoch": 0.20245579567779962, + "grad_norm": 0.584170937538147, + "learning_rate": 4.951365738502276e-06, + "loss": 0.5887, + "step": 1374 + }, + { + "epoch": 0.20260314341846758, + "grad_norm": 0.599202573299408, + "learning_rate": 4.951289608619742e-06, + "loss": 0.6198, + "step": 1375 + }, + { + "epoch": 0.20275049115913557, + "grad_norm": 0.6160393357276917, + "learning_rate": 4.951213419784843e-06, + "loss": 0.6232, + "step": 1376 + }, + { + "epoch": 0.20289783889980353, + "grad_norm": 0.6053966879844666, + "learning_rate": 4.95113717199941e-06, + "loss": 0.6484, + "step": 1377 + }, + { + "epoch": 0.20304518664047153, + "grad_norm": 0.6325736045837402, + "learning_rate": 4.951060865265278e-06, + "loss": 0.6245, + "step": 1378 + }, + { + "epoch": 0.2031925343811395, + "grad_norm": 0.6281895041465759, + "learning_rate": 4.950984499584283e-06, + "loss": 0.6048, + "step": 1379 + }, + { + "epoch": 0.20333988212180745, + "grad_norm": 0.5961670875549316, + "learning_rate": 4.950908074958259e-06, + "loss": 0.6157, + "step": 1380 + }, + { + "epoch": 0.20348722986247544, + "grad_norm": 0.60648113489151, + "learning_rate": 4.950831591389046e-06, + "loss": 0.6151, + "step": 1381 + }, + { + "epoch": 0.2036345776031434, + "grad_norm": 0.6144936084747314, + "learning_rate": 4.950755048878482e-06, + "loss": 0.6374, + "step": 1382 + }, + { + "epoch": 0.2037819253438114, + "grad_norm": 0.5771678686141968, + "learning_rate": 4.95067844742841e-06, + "loss": 0.6285, + "step": 1383 + }, + { + "epoch": 0.20392927308447936, + "grad_norm": 0.584593653678894, + "learning_rate": 4.95060178704067e-06, + "loss": 0.641, + "step": 1384 + }, + { + "epoch": 0.20407662082514735, + "grad_norm": 0.5954614877700806, + "learning_rate": 4.9505250677171056e-06, + "loss": 0.6137, + "step": 1385 + }, + { + "epoch": 0.20422396856581532, + "grad_norm": 0.6073674559593201, + "learning_rate": 4.950448289459564e-06, + "loss": 0.6133, + "step": 1386 + }, + { + "epoch": 0.2043713163064833, + "grad_norm": 0.596467912197113, + "learning_rate": 4.9503714522698895e-06, + "loss": 0.6084, + "step": 1387 + }, + { + "epoch": 0.20451866404715127, + "grad_norm": 0.6756178736686707, + "learning_rate": 4.950294556149931e-06, + "loss": 0.6226, + "step": 1388 + }, + { + "epoch": 0.20466601178781926, + "grad_norm": 0.6493410468101501, + "learning_rate": 4.950217601101538e-06, + "loss": 0.6253, + "step": 1389 + }, + { + "epoch": 0.20481335952848723, + "grad_norm": 0.6484753489494324, + "learning_rate": 4.9501405871265605e-06, + "loss": 0.5698, + "step": 1390 + }, + { + "epoch": 0.20496070726915522, + "grad_norm": 0.6179805397987366, + "learning_rate": 4.950063514226851e-06, + "loss": 0.5825, + "step": 1391 + }, + { + "epoch": 0.20510805500982318, + "grad_norm": 0.6517780423164368, + "learning_rate": 4.949986382404263e-06, + "loss": 0.6024, + "step": 1392 + }, + { + "epoch": 0.20525540275049117, + "grad_norm": 0.6172077655792236, + "learning_rate": 4.9499091916606525e-06, + "loss": 0.6264, + "step": 1393 + }, + { + "epoch": 0.20540275049115914, + "grad_norm": 0.6155092120170593, + "learning_rate": 4.9498319419978744e-06, + "loss": 0.6299, + "step": 1394 + }, + { + "epoch": 0.2055500982318271, + "grad_norm": 0.6766539812088013, + "learning_rate": 4.949754633417788e-06, + "loss": 0.6423, + "step": 1395 + }, + { + "epoch": 0.2056974459724951, + "grad_norm": 0.5751426219940186, + "learning_rate": 4.9496772659222506e-06, + "loss": 0.6332, + "step": 1396 + }, + { + "epoch": 0.20584479371316305, + "grad_norm": 0.6194370985031128, + "learning_rate": 4.949599839513124e-06, + "loss": 0.6393, + "step": 1397 + }, + { + "epoch": 0.20599214145383105, + "grad_norm": 0.6059185862541199, + "learning_rate": 4.949522354192271e-06, + "loss": 0.6147, + "step": 1398 + }, + { + "epoch": 0.206139489194499, + "grad_norm": 0.6088065505027771, + "learning_rate": 4.949444809961553e-06, + "loss": 0.6077, + "step": 1399 + }, + { + "epoch": 0.206286836935167, + "grad_norm": 0.5985973477363586, + "learning_rate": 4.949367206822837e-06, + "loss": 0.6051, + "step": 1400 + }, + { + "epoch": 0.20643418467583496, + "grad_norm": 0.6104844808578491, + "learning_rate": 4.949289544777989e-06, + "loss": 0.6213, + "step": 1401 + }, + { + "epoch": 0.20658153241650296, + "grad_norm": 0.5751961469650269, + "learning_rate": 4.949211823828875e-06, + "loss": 0.6388, + "step": 1402 + }, + { + "epoch": 0.20672888015717092, + "grad_norm": 0.5984809994697571, + "learning_rate": 4.9491340439773664e-06, + "loss": 0.5891, + "step": 1403 + }, + { + "epoch": 0.2068762278978389, + "grad_norm": 0.6540998220443726, + "learning_rate": 4.949056205225333e-06, + "loss": 0.6218, + "step": 1404 + }, + { + "epoch": 0.20702357563850687, + "grad_norm": 0.6012822985649109, + "learning_rate": 4.9489783075746455e-06, + "loss": 0.5903, + "step": 1405 + }, + { + "epoch": 0.20717092337917486, + "grad_norm": 0.5940998792648315, + "learning_rate": 4.948900351027179e-06, + "loss": 0.63, + "step": 1406 + }, + { + "epoch": 0.20731827111984283, + "grad_norm": 0.5843308568000793, + "learning_rate": 4.948822335584808e-06, + "loss": 0.6379, + "step": 1407 + }, + { + "epoch": 0.2074656188605108, + "grad_norm": 0.6440520882606506, + "learning_rate": 4.948744261249407e-06, + "loss": 0.6384, + "step": 1408 + }, + { + "epoch": 0.20761296660117878, + "grad_norm": 0.5917844772338867, + "learning_rate": 4.948666128022856e-06, + "loss": 0.6368, + "step": 1409 + }, + { + "epoch": 0.20776031434184675, + "grad_norm": 0.5737636089324951, + "learning_rate": 4.948587935907033e-06, + "loss": 0.6102, + "step": 1410 + }, + { + "epoch": 0.20790766208251474, + "grad_norm": 0.6047429442405701, + "learning_rate": 4.948509684903818e-06, + "loss": 0.615, + "step": 1411 + }, + { + "epoch": 0.2080550098231827, + "grad_norm": 0.5793607831001282, + "learning_rate": 4.948431375015095e-06, + "loss": 0.5967, + "step": 1412 + }, + { + "epoch": 0.2082023575638507, + "grad_norm": 0.6292081475257874, + "learning_rate": 4.948353006242744e-06, + "loss": 0.632, + "step": 1413 + }, + { + "epoch": 0.20834970530451866, + "grad_norm": 0.5944942235946655, + "learning_rate": 4.948274578588654e-06, + "loss": 0.6287, + "step": 1414 + }, + { + "epoch": 0.20849705304518665, + "grad_norm": 0.5952387452125549, + "learning_rate": 4.948196092054707e-06, + "loss": 0.6254, + "step": 1415 + }, + { + "epoch": 0.2086444007858546, + "grad_norm": 0.6000376343727112, + "learning_rate": 4.948117546642791e-06, + "loss": 0.634, + "step": 1416 + }, + { + "epoch": 0.2087917485265226, + "grad_norm": 0.6141393780708313, + "learning_rate": 4.948038942354798e-06, + "loss": 0.6423, + "step": 1417 + }, + { + "epoch": 0.20893909626719057, + "grad_norm": 0.607109010219574, + "learning_rate": 4.9479602791926165e-06, + "loss": 0.6397, + "step": 1418 + }, + { + "epoch": 0.20908644400785856, + "grad_norm": 0.6227192282676697, + "learning_rate": 4.947881557158138e-06, + "loss": 0.6001, + "step": 1419 + }, + { + "epoch": 0.20923379174852652, + "grad_norm": 0.600243866443634, + "learning_rate": 4.947802776253256e-06, + "loss": 0.6327, + "step": 1420 + }, + { + "epoch": 0.2093811394891945, + "grad_norm": 0.5986863970756531, + "learning_rate": 4.947723936479867e-06, + "loss": 0.5855, + "step": 1421 + }, + { + "epoch": 0.20952848722986248, + "grad_norm": 0.6404968500137329, + "learning_rate": 4.947645037839863e-06, + "loss": 0.6304, + "step": 1422 + }, + { + "epoch": 0.20967583497053044, + "grad_norm": 0.6043936610221863, + "learning_rate": 4.947566080335145e-06, + "loss": 0.6179, + "step": 1423 + }, + { + "epoch": 0.20982318271119843, + "grad_norm": 0.6380194425582886, + "learning_rate": 4.947487063967611e-06, + "loss": 0.6203, + "step": 1424 + }, + { + "epoch": 0.2099705304518664, + "grad_norm": 0.5831424593925476, + "learning_rate": 4.947407988739162e-06, + "loss": 0.6282, + "step": 1425 + }, + { + "epoch": 0.21011787819253439, + "grad_norm": 0.5707808136940002, + "learning_rate": 4.947328854651697e-06, + "loss": 0.604, + "step": 1426 + }, + { + "epoch": 0.21026522593320235, + "grad_norm": 0.612095832824707, + "learning_rate": 4.947249661707121e-06, + "loss": 0.6324, + "step": 1427 + }, + { + "epoch": 0.21041257367387034, + "grad_norm": 0.5958979725837708, + "learning_rate": 4.9471704099073395e-06, + "loss": 0.5802, + "step": 1428 + }, + { + "epoch": 0.2105599214145383, + "grad_norm": 0.6034339666366577, + "learning_rate": 4.947091099254256e-06, + "loss": 0.5904, + "step": 1429 + }, + { + "epoch": 0.2107072691552063, + "grad_norm": 0.5782062411308289, + "learning_rate": 4.947011729749781e-06, + "loss": 0.639, + "step": 1430 + }, + { + "epoch": 0.21085461689587426, + "grad_norm": 0.5611622929573059, + "learning_rate": 4.94693230139582e-06, + "loss": 0.5859, + "step": 1431 + }, + { + "epoch": 0.21100196463654225, + "grad_norm": 0.5955947637557983, + "learning_rate": 4.946852814194286e-06, + "loss": 0.5987, + "step": 1432 + }, + { + "epoch": 0.2111493123772102, + "grad_norm": 0.6219133734703064, + "learning_rate": 4.94677326814709e-06, + "loss": 0.6133, + "step": 1433 + }, + { + "epoch": 0.2112966601178782, + "grad_norm": 0.6327332258224487, + "learning_rate": 4.946693663256143e-06, + "loss": 0.6326, + "step": 1434 + }, + { + "epoch": 0.21144400785854617, + "grad_norm": 0.6115903854370117, + "learning_rate": 4.946613999523361e-06, + "loss": 0.6227, + "step": 1435 + }, + { + "epoch": 0.21159135559921416, + "grad_norm": 0.5698453187942505, + "learning_rate": 4.946534276950661e-06, + "loss": 0.6079, + "step": 1436 + }, + { + "epoch": 0.21173870333988212, + "grad_norm": 0.613233745098114, + "learning_rate": 4.946454495539959e-06, + "loss": 0.6296, + "step": 1437 + }, + { + "epoch": 0.2118860510805501, + "grad_norm": 0.5867412686347961, + "learning_rate": 4.946374655293172e-06, + "loss": 0.6142, + "step": 1438 + }, + { + "epoch": 0.21203339882121808, + "grad_norm": 0.6177881956100464, + "learning_rate": 4.946294756212224e-06, + "loss": 0.6125, + "step": 1439 + }, + { + "epoch": 0.21218074656188604, + "grad_norm": 0.5978368520736694, + "learning_rate": 4.946214798299034e-06, + "loss": 0.6441, + "step": 1440 + }, + { + "epoch": 0.21232809430255403, + "grad_norm": 0.5822241306304932, + "learning_rate": 4.946134781555525e-06, + "loss": 0.6126, + "step": 1441 + }, + { + "epoch": 0.212475442043222, + "grad_norm": 0.5774044394493103, + "learning_rate": 4.9460547059836215e-06, + "loss": 0.6305, + "step": 1442 + }, + { + "epoch": 0.21262278978389, + "grad_norm": 0.6271365284919739, + "learning_rate": 4.9459745715852506e-06, + "loss": 0.5995, + "step": 1443 + }, + { + "epoch": 0.21277013752455795, + "grad_norm": 0.5703158378601074, + "learning_rate": 4.945894378362338e-06, + "loss": 0.5861, + "step": 1444 + }, + { + "epoch": 0.21291748526522594, + "grad_norm": 0.5950847864151001, + "learning_rate": 4.945814126316813e-06, + "loss": 0.639, + "step": 1445 + }, + { + "epoch": 0.2130648330058939, + "grad_norm": 0.6100016236305237, + "learning_rate": 4.945733815450605e-06, + "loss": 0.6245, + "step": 1446 + }, + { + "epoch": 0.2132121807465619, + "grad_norm": 0.577850878238678, + "learning_rate": 4.945653445765646e-06, + "loss": 0.6356, + "step": 1447 + }, + { + "epoch": 0.21335952848722986, + "grad_norm": 0.5909193754196167, + "learning_rate": 4.94557301726387e-06, + "loss": 0.623, + "step": 1448 + }, + { + "epoch": 0.21350687622789785, + "grad_norm": 0.6027027368545532, + "learning_rate": 4.945492529947208e-06, + "loss": 0.644, + "step": 1449 + }, + { + "epoch": 0.21365422396856582, + "grad_norm": 0.5744301676750183, + "learning_rate": 4.945411983817599e-06, + "loss": 0.6133, + "step": 1450 + }, + { + "epoch": 0.21380157170923378, + "grad_norm": 0.6128263473510742, + "learning_rate": 4.945331378876977e-06, + "loss": 0.622, + "step": 1451 + }, + { + "epoch": 0.21394891944990177, + "grad_norm": 0.5850877165794373, + "learning_rate": 4.9452507151272844e-06, + "loss": 0.6123, + "step": 1452 + }, + { + "epoch": 0.21409626719056973, + "grad_norm": 0.5896425247192383, + "learning_rate": 4.945169992570457e-06, + "loss": 0.5853, + "step": 1453 + }, + { + "epoch": 0.21424361493123772, + "grad_norm": 0.6142973303794861, + "learning_rate": 4.945089211208441e-06, + "loss": 0.6263, + "step": 1454 + }, + { + "epoch": 0.2143909626719057, + "grad_norm": 0.6004498600959778, + "learning_rate": 4.945008371043174e-06, + "loss": 0.6302, + "step": 1455 + }, + { + "epoch": 0.21453831041257368, + "grad_norm": 0.6166588068008423, + "learning_rate": 4.944927472076603e-06, + "loss": 0.6206, + "step": 1456 + }, + { + "epoch": 0.21468565815324164, + "grad_norm": 0.5939904451370239, + "learning_rate": 4.944846514310673e-06, + "loss": 0.6098, + "step": 1457 + }, + { + "epoch": 0.21483300589390963, + "grad_norm": 0.6076056361198425, + "learning_rate": 4.9447654977473306e-06, + "loss": 0.6182, + "step": 1458 + }, + { + "epoch": 0.2149803536345776, + "grad_norm": 0.6488870978355408, + "learning_rate": 4.944684422388525e-06, + "loss": 0.6187, + "step": 1459 + }, + { + "epoch": 0.2151277013752456, + "grad_norm": 0.6299635171890259, + "learning_rate": 4.944603288236206e-06, + "loss": 0.6261, + "step": 1460 + }, + { + "epoch": 0.21527504911591355, + "grad_norm": 0.6136243343353271, + "learning_rate": 4.944522095292325e-06, + "loss": 0.6351, + "step": 1461 + }, + { + "epoch": 0.21542239685658154, + "grad_norm": 0.5753845572471619, + "learning_rate": 4.944440843558832e-06, + "loss": 0.6122, + "step": 1462 + }, + { + "epoch": 0.2155697445972495, + "grad_norm": 0.6405686140060425, + "learning_rate": 4.944359533037685e-06, + "loss": 0.6346, + "step": 1463 + }, + { + "epoch": 0.2157170923379175, + "grad_norm": 0.5940660834312439, + "learning_rate": 4.944278163730838e-06, + "loss": 0.6083, + "step": 1464 + }, + { + "epoch": 0.21586444007858546, + "grad_norm": 0.6080908179283142, + "learning_rate": 4.9441967356402465e-06, + "loss": 0.5905, + "step": 1465 + }, + { + "epoch": 0.21601178781925343, + "grad_norm": 0.6131464838981628, + "learning_rate": 4.94411524876787e-06, + "loss": 0.6291, + "step": 1466 + }, + { + "epoch": 0.21615913555992142, + "grad_norm": 0.6049478650093079, + "learning_rate": 4.944033703115669e-06, + "loss": 0.6378, + "step": 1467 + }, + { + "epoch": 0.21630648330058938, + "grad_norm": 0.657967209815979, + "learning_rate": 4.943952098685603e-06, + "loss": 0.6168, + "step": 1468 + }, + { + "epoch": 0.21645383104125737, + "grad_norm": 0.5719718337059021, + "learning_rate": 4.943870435479636e-06, + "loss": 0.6211, + "step": 1469 + }, + { + "epoch": 0.21660117878192534, + "grad_norm": 0.6279664635658264, + "learning_rate": 4.94378871349973e-06, + "loss": 0.6378, + "step": 1470 + }, + { + "epoch": 0.21674852652259333, + "grad_norm": 0.6585307121276855, + "learning_rate": 4.943706932747853e-06, + "loss": 0.6207, + "step": 1471 + }, + { + "epoch": 0.2168958742632613, + "grad_norm": 0.6101717352867126, + "learning_rate": 4.94362509322597e-06, + "loss": 0.592, + "step": 1472 + }, + { + "epoch": 0.21704322200392928, + "grad_norm": 0.6098536849021912, + "learning_rate": 4.943543194936049e-06, + "loss": 0.6089, + "step": 1473 + }, + { + "epoch": 0.21719056974459724, + "grad_norm": 0.620525062084198, + "learning_rate": 4.9434612378800605e-06, + "loss": 0.6554, + "step": 1474 + }, + { + "epoch": 0.21733791748526524, + "grad_norm": 0.6023558378219604, + "learning_rate": 4.943379222059976e-06, + "loss": 0.5939, + "step": 1475 + }, + { + "epoch": 0.2174852652259332, + "grad_norm": 0.6017934083938599, + "learning_rate": 4.943297147477767e-06, + "loss": 0.6186, + "step": 1476 + }, + { + "epoch": 0.2176326129666012, + "grad_norm": 0.599224328994751, + "learning_rate": 4.9432150141354075e-06, + "loss": 0.6423, + "step": 1477 + }, + { + "epoch": 0.21777996070726915, + "grad_norm": 0.6337676048278809, + "learning_rate": 4.943132822034872e-06, + "loss": 0.6167, + "step": 1478 + }, + { + "epoch": 0.21792730844793712, + "grad_norm": 0.5892879962921143, + "learning_rate": 4.943050571178139e-06, + "loss": 0.6314, + "step": 1479 + }, + { + "epoch": 0.2180746561886051, + "grad_norm": 0.614597499370575, + "learning_rate": 4.942968261567185e-06, + "loss": 0.6247, + "step": 1480 + }, + { + "epoch": 0.21822200392927307, + "grad_norm": 0.59217369556427, + "learning_rate": 4.942885893203991e-06, + "loss": 0.5874, + "step": 1481 + }, + { + "epoch": 0.21836935166994106, + "grad_norm": 0.5998961925506592, + "learning_rate": 4.942803466090537e-06, + "loss": 0.5908, + "step": 1482 + }, + { + "epoch": 0.21851669941060903, + "grad_norm": 0.6185607314109802, + "learning_rate": 4.942720980228804e-06, + "loss": 0.6209, + "step": 1483 + }, + { + "epoch": 0.21866404715127702, + "grad_norm": 0.6066619753837585, + "learning_rate": 4.942638435620778e-06, + "loss": 0.6135, + "step": 1484 + }, + { + "epoch": 0.21881139489194498, + "grad_norm": 0.5923621654510498, + "learning_rate": 4.942555832268444e-06, + "loss": 0.6116, + "step": 1485 + }, + { + "epoch": 0.21895874263261297, + "grad_norm": 0.6481506824493408, + "learning_rate": 4.942473170173787e-06, + "loss": 0.6099, + "step": 1486 + }, + { + "epoch": 0.21910609037328094, + "grad_norm": 0.5960515737533569, + "learning_rate": 4.942390449338796e-06, + "loss": 0.5823, + "step": 1487 + }, + { + "epoch": 0.21925343811394893, + "grad_norm": 0.6239160299301147, + "learning_rate": 4.94230766976546e-06, + "loss": 0.6214, + "step": 1488 + }, + { + "epoch": 0.2194007858546169, + "grad_norm": 0.6266829967498779, + "learning_rate": 4.9422248314557705e-06, + "loss": 0.6063, + "step": 1489 + }, + { + "epoch": 0.21954813359528488, + "grad_norm": 0.6178009510040283, + "learning_rate": 4.942141934411719e-06, + "loss": 0.5953, + "step": 1490 + }, + { + "epoch": 0.21969548133595285, + "grad_norm": 0.5730262994766235, + "learning_rate": 4.9420589786353e-06, + "loss": 0.6074, + "step": 1491 + }, + { + "epoch": 0.21984282907662084, + "grad_norm": 0.6464042663574219, + "learning_rate": 4.941975964128507e-06, + "loss": 0.6011, + "step": 1492 + }, + { + "epoch": 0.2199901768172888, + "grad_norm": 0.609715461730957, + "learning_rate": 4.941892890893338e-06, + "loss": 0.6202, + "step": 1493 + }, + { + "epoch": 0.22013752455795677, + "grad_norm": 0.6089447736740112, + "learning_rate": 4.94180975893179e-06, + "loss": 0.6236, + "step": 1494 + }, + { + "epoch": 0.22028487229862476, + "grad_norm": 0.6006873250007629, + "learning_rate": 4.9417265682458635e-06, + "loss": 0.6191, + "step": 1495 + }, + { + "epoch": 0.22043222003929272, + "grad_norm": 0.605165958404541, + "learning_rate": 4.941643318837557e-06, + "loss": 0.6493, + "step": 1496 + }, + { + "epoch": 0.2205795677799607, + "grad_norm": 0.639488160610199, + "learning_rate": 4.941560010708875e-06, + "loss": 0.6515, + "step": 1497 + }, + { + "epoch": 0.22072691552062867, + "grad_norm": 0.6495417952537537, + "learning_rate": 4.941476643861819e-06, + "loss": 0.6316, + "step": 1498 + }, + { + "epoch": 0.22087426326129667, + "grad_norm": 0.6293731927871704, + "learning_rate": 4.941393218298396e-06, + "loss": 0.624, + "step": 1499 + }, + { + "epoch": 0.22102161100196463, + "grad_norm": 0.5684942603111267, + "learning_rate": 4.9413097340206096e-06, + "loss": 0.635, + "step": 1500 + }, + { + "epoch": 0.22116895874263262, + "grad_norm": 0.6038397550582886, + "learning_rate": 4.9412261910304695e-06, + "loss": 0.6294, + "step": 1501 + }, + { + "epoch": 0.22131630648330058, + "grad_norm": 0.6161315441131592, + "learning_rate": 4.941142589329986e-06, + "loss": 0.6061, + "step": 1502 + }, + { + "epoch": 0.22146365422396858, + "grad_norm": 0.629790723323822, + "learning_rate": 4.941058928921166e-06, + "loss": 0.5787, + "step": 1503 + }, + { + "epoch": 0.22161100196463654, + "grad_norm": 0.6197001338005066, + "learning_rate": 4.940975209806025e-06, + "loss": 0.596, + "step": 1504 + }, + { + "epoch": 0.22175834970530453, + "grad_norm": 0.5836420059204102, + "learning_rate": 4.940891431986575e-06, + "loss": 0.6394, + "step": 1505 + }, + { + "epoch": 0.2219056974459725, + "grad_norm": 0.5836131572723389, + "learning_rate": 4.94080759546483e-06, + "loss": 0.6026, + "step": 1506 + }, + { + "epoch": 0.22205304518664049, + "grad_norm": 0.6351119875907898, + "learning_rate": 4.940723700242809e-06, + "loss": 0.6003, + "step": 1507 + }, + { + "epoch": 0.22220039292730845, + "grad_norm": 0.6436312794685364, + "learning_rate": 4.940639746322526e-06, + "loss": 0.6003, + "step": 1508 + }, + { + "epoch": 0.2223477406679764, + "grad_norm": 0.6154443621635437, + "learning_rate": 4.940555733706003e-06, + "loss": 0.6329, + "step": 1509 + }, + { + "epoch": 0.2224950884086444, + "grad_norm": 0.6075474619865417, + "learning_rate": 4.940471662395259e-06, + "loss": 0.6401, + "step": 1510 + }, + { + "epoch": 0.22264243614931237, + "grad_norm": 0.5908315181732178, + "learning_rate": 4.940387532392315e-06, + "loss": 0.6098, + "step": 1511 + }, + { + "epoch": 0.22278978388998036, + "grad_norm": 0.6154339909553528, + "learning_rate": 4.940303343699197e-06, + "loss": 0.6355, + "step": 1512 + }, + { + "epoch": 0.22293713163064832, + "grad_norm": 0.600378155708313, + "learning_rate": 4.9402190963179274e-06, + "loss": 0.6145, + "step": 1513 + }, + { + "epoch": 0.2230844793713163, + "grad_norm": 0.6233968138694763, + "learning_rate": 4.9401347902505334e-06, + "loss": 0.6448, + "step": 1514 + }, + { + "epoch": 0.22323182711198428, + "grad_norm": 0.616431474685669, + "learning_rate": 4.940050425499043e-06, + "loss": 0.5825, + "step": 1515 + }, + { + "epoch": 0.22337917485265227, + "grad_norm": 0.6061640977859497, + "learning_rate": 4.939966002065483e-06, + "loss": 0.6484, + "step": 1516 + }, + { + "epoch": 0.22352652259332023, + "grad_norm": 0.5968467593193054, + "learning_rate": 4.939881519951885e-06, + "loss": 0.586, + "step": 1517 + }, + { + "epoch": 0.22367387033398822, + "grad_norm": 0.6528064608573914, + "learning_rate": 4.939796979160281e-06, + "loss": 0.635, + "step": 1518 + }, + { + "epoch": 0.2238212180746562, + "grad_norm": 0.6064465641975403, + "learning_rate": 4.939712379692705e-06, + "loss": 0.6137, + "step": 1519 + }, + { + "epoch": 0.22396856581532418, + "grad_norm": 0.6068639159202576, + "learning_rate": 4.939627721551189e-06, + "loss": 0.6136, + "step": 1520 + }, + { + "epoch": 0.22411591355599214, + "grad_norm": 0.5941352248191833, + "learning_rate": 4.939543004737772e-06, + "loss": 0.5853, + "step": 1521 + }, + { + "epoch": 0.2242632612966601, + "grad_norm": 0.6113271713256836, + "learning_rate": 4.939458229254489e-06, + "loss": 0.6162, + "step": 1522 + }, + { + "epoch": 0.2244106090373281, + "grad_norm": 0.5868501663208008, + "learning_rate": 4.93937339510338e-06, + "loss": 0.5974, + "step": 1523 + }, + { + "epoch": 0.22455795677799606, + "grad_norm": 0.5741041302680969, + "learning_rate": 4.939288502286485e-06, + "loss": 0.5541, + "step": 1524 + }, + { + "epoch": 0.22470530451866405, + "grad_norm": 0.6334990859031677, + "learning_rate": 4.939203550805846e-06, + "loss": 0.6334, + "step": 1525 + }, + { + "epoch": 0.22485265225933201, + "grad_norm": 0.6011141538619995, + "learning_rate": 4.939118540663505e-06, + "loss": 0.6347, + "step": 1526 + }, + { + "epoch": 0.225, + "grad_norm": 0.5919383764266968, + "learning_rate": 4.9390334718615075e-06, + "loss": 0.6172, + "step": 1527 + }, + { + "epoch": 0.22514734774066797, + "grad_norm": 0.5821806788444519, + "learning_rate": 4.9389483444019e-06, + "loss": 0.6363, + "step": 1528 + }, + { + "epoch": 0.22529469548133596, + "grad_norm": 0.6391839385032654, + "learning_rate": 4.938863158286726e-06, + "loss": 0.6095, + "step": 1529 + }, + { + "epoch": 0.22544204322200392, + "grad_norm": 0.602898120880127, + "learning_rate": 4.938777913518039e-06, + "loss": 0.5978, + "step": 1530 + }, + { + "epoch": 0.22558939096267192, + "grad_norm": 0.5905638337135315, + "learning_rate": 4.938692610097887e-06, + "loss": 0.6262, + "step": 1531 + }, + { + "epoch": 0.22573673870333988, + "grad_norm": 0.6427717208862305, + "learning_rate": 4.938607248028321e-06, + "loss": 0.6005, + "step": 1532 + }, + { + "epoch": 0.22588408644400787, + "grad_norm": 0.5912647843360901, + "learning_rate": 4.938521827311395e-06, + "loss": 0.6165, + "step": 1533 + }, + { + "epoch": 0.22603143418467583, + "grad_norm": 0.6216715574264526, + "learning_rate": 4.938436347949162e-06, + "loss": 0.6244, + "step": 1534 + }, + { + "epoch": 0.22617878192534382, + "grad_norm": 0.5775299668312073, + "learning_rate": 4.938350809943679e-06, + "loss": 0.6072, + "step": 1535 + }, + { + "epoch": 0.2263261296660118, + "grad_norm": 0.624480128288269, + "learning_rate": 4.9382652132970025e-06, + "loss": 0.6509, + "step": 1536 + }, + { + "epoch": 0.22647347740667975, + "grad_norm": 0.5846396684646606, + "learning_rate": 4.938179558011191e-06, + "loss": 0.6134, + "step": 1537 + }, + { + "epoch": 0.22662082514734774, + "grad_norm": 0.5812880992889404, + "learning_rate": 4.938093844088305e-06, + "loss": 0.5797, + "step": 1538 + }, + { + "epoch": 0.2267681728880157, + "grad_norm": 0.6022025942802429, + "learning_rate": 4.938008071530406e-06, + "loss": 0.6053, + "step": 1539 + }, + { + "epoch": 0.2269155206286837, + "grad_norm": 0.5889886021614075, + "learning_rate": 4.937922240339557e-06, + "loss": 0.6453, + "step": 1540 + }, + { + "epoch": 0.22706286836935166, + "grad_norm": 0.5870588421821594, + "learning_rate": 4.9378363505178195e-06, + "loss": 0.6142, + "step": 1541 + }, + { + "epoch": 0.22721021611001965, + "grad_norm": 0.5807991027832031, + "learning_rate": 4.937750402067263e-06, + "loss": 0.5977, + "step": 1542 + }, + { + "epoch": 0.22735756385068762, + "grad_norm": 0.5898467302322388, + "learning_rate": 4.9376643949899515e-06, + "loss": 0.6178, + "step": 1543 + }, + { + "epoch": 0.2275049115913556, + "grad_norm": 0.6416104435920715, + "learning_rate": 4.937578329287955e-06, + "loss": 0.6165, + "step": 1544 + }, + { + "epoch": 0.22765225933202357, + "grad_norm": 0.5846979022026062, + "learning_rate": 4.937492204963343e-06, + "loss": 0.6423, + "step": 1545 + }, + { + "epoch": 0.22779960707269156, + "grad_norm": 0.5811700224876404, + "learning_rate": 4.937406022018187e-06, + "loss": 0.6337, + "step": 1546 + }, + { + "epoch": 0.22794695481335953, + "grad_norm": 0.5653330087661743, + "learning_rate": 4.937319780454559e-06, + "loss": 0.6147, + "step": 1547 + }, + { + "epoch": 0.22809430255402752, + "grad_norm": 0.550447404384613, + "learning_rate": 4.937233480274534e-06, + "loss": 0.6253, + "step": 1548 + }, + { + "epoch": 0.22824165029469548, + "grad_norm": 0.5680750608444214, + "learning_rate": 4.937147121480187e-06, + "loss": 0.6405, + "step": 1549 + }, + { + "epoch": 0.22838899803536344, + "grad_norm": 0.6348654627799988, + "learning_rate": 4.937060704073594e-06, + "loss": 0.622, + "step": 1550 + }, + { + "epoch": 0.22853634577603144, + "grad_norm": 0.6056029796600342, + "learning_rate": 4.936974228056835e-06, + "loss": 0.6413, + "step": 1551 + }, + { + "epoch": 0.2286836935166994, + "grad_norm": 0.6095244288444519, + "learning_rate": 4.936887693431988e-06, + "loss": 0.5777, + "step": 1552 + }, + { + "epoch": 0.2288310412573674, + "grad_norm": 0.6216515898704529, + "learning_rate": 4.936801100201135e-06, + "loss": 0.6147, + "step": 1553 + }, + { + "epoch": 0.22897838899803535, + "grad_norm": 0.640612006187439, + "learning_rate": 4.936714448366359e-06, + "loss": 0.6043, + "step": 1554 + }, + { + "epoch": 0.22912573673870335, + "grad_norm": 0.5741074681282043, + "learning_rate": 4.936627737929744e-06, + "loss": 0.6151, + "step": 1555 + }, + { + "epoch": 0.2292730844793713, + "grad_norm": 0.6110879778862, + "learning_rate": 4.936540968893373e-06, + "loss": 0.6513, + "step": 1556 + }, + { + "epoch": 0.2294204322200393, + "grad_norm": 0.5998510122299194, + "learning_rate": 4.936454141259336e-06, + "loss": 0.6332, + "step": 1557 + }, + { + "epoch": 0.22956777996070726, + "grad_norm": 0.6052153706550598, + "learning_rate": 4.936367255029718e-06, + "loss": 0.5929, + "step": 1558 + }, + { + "epoch": 0.22971512770137525, + "grad_norm": 0.6266438364982605, + "learning_rate": 4.936280310206612e-06, + "loss": 0.5884, + "step": 1559 + }, + { + "epoch": 0.22986247544204322, + "grad_norm": 0.6068642139434814, + "learning_rate": 4.936193306792107e-06, + "loss": 0.5827, + "step": 1560 + }, + { + "epoch": 0.2300098231827112, + "grad_norm": 0.5947786569595337, + "learning_rate": 4.936106244788295e-06, + "loss": 0.6039, + "step": 1561 + }, + { + "epoch": 0.23015717092337917, + "grad_norm": 0.6012387275695801, + "learning_rate": 4.93601912419727e-06, + "loss": 0.6064, + "step": 1562 + }, + { + "epoch": 0.23030451866404716, + "grad_norm": 0.6003574132919312, + "learning_rate": 4.935931945021128e-06, + "loss": 0.6234, + "step": 1563 + }, + { + "epoch": 0.23045186640471513, + "grad_norm": 0.6260384917259216, + "learning_rate": 4.935844707261966e-06, + "loss": 0.6012, + "step": 1564 + }, + { + "epoch": 0.2305992141453831, + "grad_norm": 0.6095635294914246, + "learning_rate": 4.935757410921881e-06, + "loss": 0.6174, + "step": 1565 + }, + { + "epoch": 0.23074656188605108, + "grad_norm": 0.5968328714370728, + "learning_rate": 4.935670056002972e-06, + "loss": 0.5779, + "step": 1566 + }, + { + "epoch": 0.23089390962671905, + "grad_norm": 0.6170848608016968, + "learning_rate": 4.935582642507341e-06, + "loss": 0.6025, + "step": 1567 + }, + { + "epoch": 0.23104125736738704, + "grad_norm": 0.6059722304344177, + "learning_rate": 4.93549517043709e-06, + "loss": 0.6144, + "step": 1568 + }, + { + "epoch": 0.231188605108055, + "grad_norm": 0.6095712184906006, + "learning_rate": 4.935407639794322e-06, + "loss": 0.6193, + "step": 1569 + }, + { + "epoch": 0.231335952848723, + "grad_norm": 0.6033074855804443, + "learning_rate": 4.935320050581143e-06, + "loss": 0.6319, + "step": 1570 + }, + { + "epoch": 0.23148330058939096, + "grad_norm": 0.6033844351768494, + "learning_rate": 4.935232402799659e-06, + "loss": 0.616, + "step": 1571 + }, + { + "epoch": 0.23163064833005895, + "grad_norm": 0.5762119293212891, + "learning_rate": 4.9351446964519775e-06, + "loss": 0.6324, + "step": 1572 + }, + { + "epoch": 0.2317779960707269, + "grad_norm": 0.59881991147995, + "learning_rate": 4.935056931540209e-06, + "loss": 0.618, + "step": 1573 + }, + { + "epoch": 0.2319253438113949, + "grad_norm": 0.5860435366630554, + "learning_rate": 4.934969108066462e-06, + "loss": 0.625, + "step": 1574 + }, + { + "epoch": 0.23207269155206287, + "grad_norm": 0.6004811525344849, + "learning_rate": 4.934881226032851e-06, + "loss": 0.5979, + "step": 1575 + }, + { + "epoch": 0.23222003929273086, + "grad_norm": 0.5736439228057861, + "learning_rate": 4.934793285441488e-06, + "loss": 0.5856, + "step": 1576 + }, + { + "epoch": 0.23236738703339882, + "grad_norm": 0.5916290879249573, + "learning_rate": 4.934705286294489e-06, + "loss": 0.6366, + "step": 1577 + }, + { + "epoch": 0.2325147347740668, + "grad_norm": 0.5608088374137878, + "learning_rate": 4.9346172285939695e-06, + "loss": 0.6055, + "step": 1578 + }, + { + "epoch": 0.23266208251473477, + "grad_norm": 0.6131484508514404, + "learning_rate": 4.934529112342048e-06, + "loss": 0.6165, + "step": 1579 + }, + { + "epoch": 0.23280943025540274, + "grad_norm": 0.6057609915733337, + "learning_rate": 4.934440937540843e-06, + "loss": 0.6072, + "step": 1580 + }, + { + "epoch": 0.23295677799607073, + "grad_norm": 0.6102911233901978, + "learning_rate": 4.934352704192476e-06, + "loss": 0.6518, + "step": 1581 + }, + { + "epoch": 0.2331041257367387, + "grad_norm": 0.5833301544189453, + "learning_rate": 4.934264412299067e-06, + "loss": 0.5728, + "step": 1582 + }, + { + "epoch": 0.23325147347740668, + "grad_norm": 0.6030455231666565, + "learning_rate": 4.934176061862741e-06, + "loss": 0.6218, + "step": 1583 + }, + { + "epoch": 0.23339882121807465, + "grad_norm": 0.59686678647995, + "learning_rate": 4.934087652885622e-06, + "loss": 0.6182, + "step": 1584 + }, + { + "epoch": 0.23354616895874264, + "grad_norm": 0.6118378639221191, + "learning_rate": 4.933999185369838e-06, + "loss": 0.5853, + "step": 1585 + }, + { + "epoch": 0.2336935166994106, + "grad_norm": 0.6091147065162659, + "learning_rate": 4.933910659317514e-06, + "loss": 0.6182, + "step": 1586 + }, + { + "epoch": 0.2338408644400786, + "grad_norm": 0.5965601801872253, + "learning_rate": 4.9338220747307806e-06, + "loss": 0.5996, + "step": 1587 + }, + { + "epoch": 0.23398821218074656, + "grad_norm": 0.5953848958015442, + "learning_rate": 4.9337334316117676e-06, + "loss": 0.5991, + "step": 1588 + }, + { + "epoch": 0.23413555992141455, + "grad_norm": 0.5732067823410034, + "learning_rate": 4.933644729962607e-06, + "loss": 0.6292, + "step": 1589 + }, + { + "epoch": 0.2342829076620825, + "grad_norm": 0.5870921015739441, + "learning_rate": 4.933555969785432e-06, + "loss": 0.6142, + "step": 1590 + }, + { + "epoch": 0.2344302554027505, + "grad_norm": 0.5913054943084717, + "learning_rate": 4.933467151082378e-06, + "loss": 0.6157, + "step": 1591 + }, + { + "epoch": 0.23457760314341847, + "grad_norm": 0.6443929076194763, + "learning_rate": 4.93337827385558e-06, + "loss": 0.6078, + "step": 1592 + }, + { + "epoch": 0.23472495088408643, + "grad_norm": 0.5758781433105469, + "learning_rate": 4.933289338107176e-06, + "loss": 0.644, + "step": 1593 + }, + { + "epoch": 0.23487229862475442, + "grad_norm": 0.616238534450531, + "learning_rate": 4.933200343839304e-06, + "loss": 0.5719, + "step": 1594 + }, + { + "epoch": 0.23501964636542239, + "grad_norm": 0.6010088920593262, + "learning_rate": 4.933111291054106e-06, + "loss": 0.6254, + "step": 1595 + }, + { + "epoch": 0.23516699410609038, + "grad_norm": 0.5954583883285522, + "learning_rate": 4.933022179753722e-06, + "loss": 0.5999, + "step": 1596 + }, + { + "epoch": 0.23531434184675834, + "grad_norm": 0.6265577673912048, + "learning_rate": 4.932933009940296e-06, + "loss": 0.6188, + "step": 1597 + }, + { + "epoch": 0.23546168958742633, + "grad_norm": 0.597862958908081, + "learning_rate": 4.932843781615972e-06, + "loss": 0.6042, + "step": 1598 + }, + { + "epoch": 0.2356090373280943, + "grad_norm": 0.6383451223373413, + "learning_rate": 4.932754494782896e-06, + "loss": 0.5938, + "step": 1599 + }, + { + "epoch": 0.2357563850687623, + "grad_norm": 0.5959359407424927, + "learning_rate": 4.932665149443216e-06, + "loss": 0.6126, + "step": 1600 + }, + { + "epoch": 0.23590373280943025, + "grad_norm": 0.5773515105247498, + "learning_rate": 4.93257574559908e-06, + "loss": 0.5937, + "step": 1601 + }, + { + "epoch": 0.23605108055009824, + "grad_norm": 0.5590853691101074, + "learning_rate": 4.932486283252638e-06, + "loss": 0.593, + "step": 1602 + }, + { + "epoch": 0.2361984282907662, + "grad_norm": 0.6241990923881531, + "learning_rate": 4.932396762406042e-06, + "loss": 0.6399, + "step": 1603 + }, + { + "epoch": 0.2363457760314342, + "grad_norm": 0.6156283020973206, + "learning_rate": 4.932307183061444e-06, + "loss": 0.6454, + "step": 1604 + }, + { + "epoch": 0.23649312377210216, + "grad_norm": 0.5956816673278809, + "learning_rate": 4.932217545221e-06, + "loss": 0.6277, + "step": 1605 + }, + { + "epoch": 0.23664047151277015, + "grad_norm": 0.6240442395210266, + "learning_rate": 4.932127848886865e-06, + "loss": 0.5959, + "step": 1606 + }, + { + "epoch": 0.23678781925343811, + "grad_norm": 0.6427688002586365, + "learning_rate": 4.9320380940611955e-06, + "loss": 0.6564, + "step": 1607 + }, + { + "epoch": 0.23693516699410608, + "grad_norm": 0.5648936629295349, + "learning_rate": 4.93194828074615e-06, + "loss": 0.615, + "step": 1608 + }, + { + "epoch": 0.23708251473477407, + "grad_norm": 0.5858781933784485, + "learning_rate": 4.9318584089438895e-06, + "loss": 0.6073, + "step": 1609 + }, + { + "epoch": 0.23722986247544203, + "grad_norm": 0.6053730845451355, + "learning_rate": 4.931768478656574e-06, + "loss": 0.5607, + "step": 1610 + }, + { + "epoch": 0.23737721021611002, + "grad_norm": 0.597616970539093, + "learning_rate": 4.931678489886369e-06, + "loss": 0.6229, + "step": 1611 + }, + { + "epoch": 0.237524557956778, + "grad_norm": 0.5599822998046875, + "learning_rate": 4.931588442635435e-06, + "loss": 0.6383, + "step": 1612 + }, + { + "epoch": 0.23767190569744598, + "grad_norm": 0.6221845149993896, + "learning_rate": 4.931498336905941e-06, + "loss": 0.6093, + "step": 1613 + }, + { + "epoch": 0.23781925343811394, + "grad_norm": 0.6361809372901917, + "learning_rate": 4.931408172700052e-06, + "loss": 0.5956, + "step": 1614 + }, + { + "epoch": 0.23796660117878193, + "grad_norm": 0.6079491972923279, + "learning_rate": 4.9313179500199356e-06, + "loss": 0.6309, + "step": 1615 + }, + { + "epoch": 0.2381139489194499, + "grad_norm": 0.5948631763458252, + "learning_rate": 4.931227668867764e-06, + "loss": 0.6254, + "step": 1616 + }, + { + "epoch": 0.2382612966601179, + "grad_norm": 0.6463460922241211, + "learning_rate": 4.931137329245708e-06, + "loss": 0.6115, + "step": 1617 + }, + { + "epoch": 0.23840864440078585, + "grad_norm": 0.5937040448188782, + "learning_rate": 4.931046931155939e-06, + "loss": 0.6194, + "step": 1618 + }, + { + "epoch": 0.23855599214145384, + "grad_norm": 0.5954758524894714, + "learning_rate": 4.93095647460063e-06, + "loss": 0.5749, + "step": 1619 + }, + { + "epoch": 0.2387033398821218, + "grad_norm": 0.6209570169448853, + "learning_rate": 4.9308659595819594e-06, + "loss": 0.601, + "step": 1620 + }, + { + "epoch": 0.23885068762278977, + "grad_norm": 0.5972459316253662, + "learning_rate": 4.930775386102103e-06, + "loss": 0.616, + "step": 1621 + }, + { + "epoch": 0.23899803536345776, + "grad_norm": 0.654038667678833, + "learning_rate": 4.930684754163237e-06, + "loss": 0.6446, + "step": 1622 + }, + { + "epoch": 0.23914538310412572, + "grad_norm": 0.5975867509841919, + "learning_rate": 4.930594063767544e-06, + "loss": 0.5821, + "step": 1623 + }, + { + "epoch": 0.23929273084479372, + "grad_norm": 0.5917471051216125, + "learning_rate": 4.930503314917204e-06, + "loss": 0.6125, + "step": 1624 + }, + { + "epoch": 0.23944007858546168, + "grad_norm": 0.6416634321212769, + "learning_rate": 4.930412507614399e-06, + "loss": 0.6281, + "step": 1625 + }, + { + "epoch": 0.23958742632612967, + "grad_norm": 0.6094918847084045, + "learning_rate": 4.930321641861312e-06, + "loss": 0.623, + "step": 1626 + }, + { + "epoch": 0.23973477406679763, + "grad_norm": 0.6688728332519531, + "learning_rate": 4.930230717660131e-06, + "loss": 0.622, + "step": 1627 + }, + { + "epoch": 0.23988212180746563, + "grad_norm": 0.5776267647743225, + "learning_rate": 4.93013973501304e-06, + "loss": 0.5941, + "step": 1628 + }, + { + "epoch": 0.2400294695481336, + "grad_norm": 0.5988982915878296, + "learning_rate": 4.930048693922229e-06, + "loss": 0.5736, + "step": 1629 + }, + { + "epoch": 0.24017681728880158, + "grad_norm": 0.5776041150093079, + "learning_rate": 4.929957594389886e-06, + "loss": 0.6256, + "step": 1630 + }, + { + "epoch": 0.24032416502946954, + "grad_norm": 0.5969784259796143, + "learning_rate": 4.929866436418203e-06, + "loss": 0.628, + "step": 1631 + }, + { + "epoch": 0.24047151277013754, + "grad_norm": 0.5940757989883423, + "learning_rate": 4.929775220009372e-06, + "loss": 0.6332, + "step": 1632 + }, + { + "epoch": 0.2406188605108055, + "grad_norm": 0.6270540356636047, + "learning_rate": 4.9296839451655866e-06, + "loss": 0.6001, + "step": 1633 + }, + { + "epoch": 0.2407662082514735, + "grad_norm": 0.5848387479782104, + "learning_rate": 4.929592611889041e-06, + "loss": 0.608, + "step": 1634 + }, + { + "epoch": 0.24091355599214145, + "grad_norm": 0.5901945233345032, + "learning_rate": 4.929501220181934e-06, + "loss": 0.5857, + "step": 1635 + }, + { + "epoch": 0.24106090373280942, + "grad_norm": 0.5776450037956238, + "learning_rate": 4.929409770046461e-06, + "loss": 0.6119, + "step": 1636 + }, + { + "epoch": 0.2412082514734774, + "grad_norm": 0.5824360251426697, + "learning_rate": 4.929318261484823e-06, + "loss": 0.5943, + "step": 1637 + }, + { + "epoch": 0.24135559921414537, + "grad_norm": 0.6449417471885681, + "learning_rate": 4.92922669449922e-06, + "loss": 0.5802, + "step": 1638 + }, + { + "epoch": 0.24150294695481336, + "grad_norm": 0.6008056998252869, + "learning_rate": 4.929135069091854e-06, + "loss": 0.6324, + "step": 1639 + }, + { + "epoch": 0.24165029469548133, + "grad_norm": 0.5941566824913025, + "learning_rate": 4.92904338526493e-06, + "loss": 0.5872, + "step": 1640 + }, + { + "epoch": 0.24179764243614932, + "grad_norm": 0.5702040791511536, + "learning_rate": 4.9289516430206504e-06, + "loss": 0.6111, + "step": 1641 + }, + { + "epoch": 0.24194499017681728, + "grad_norm": 0.563958466053009, + "learning_rate": 4.928859842361224e-06, + "loss": 0.5828, + "step": 1642 + }, + { + "epoch": 0.24209233791748527, + "grad_norm": 0.6007937788963318, + "learning_rate": 4.928767983288858e-06, + "loss": 0.5906, + "step": 1643 + }, + { + "epoch": 0.24223968565815324, + "grad_norm": 0.6031252145767212, + "learning_rate": 4.928676065805761e-06, + "loss": 0.6071, + "step": 1644 + }, + { + "epoch": 0.24238703339882123, + "grad_norm": 0.6222847104072571, + "learning_rate": 4.928584089914143e-06, + "loss": 0.5945, + "step": 1645 + }, + { + "epoch": 0.2425343811394892, + "grad_norm": 0.5733784437179565, + "learning_rate": 4.928492055616217e-06, + "loss": 0.5966, + "step": 1646 + }, + { + "epoch": 0.24268172888015718, + "grad_norm": 0.5622508525848389, + "learning_rate": 4.9283999629141965e-06, + "loss": 0.5947, + "step": 1647 + }, + { + "epoch": 0.24282907662082515, + "grad_norm": 0.5893845558166504, + "learning_rate": 4.928307811810296e-06, + "loss": 0.6048, + "step": 1648 + }, + { + "epoch": 0.24297642436149314, + "grad_norm": 0.6461899876594543, + "learning_rate": 4.92821560230673e-06, + "loss": 0.606, + "step": 1649 + }, + { + "epoch": 0.2431237721021611, + "grad_norm": 0.5699790120124817, + "learning_rate": 4.928123334405719e-06, + "loss": 0.6105, + "step": 1650 + }, + { + "epoch": 0.24327111984282906, + "grad_norm": 0.602398693561554, + "learning_rate": 4.928031008109481e-06, + "loss": 0.6409, + "step": 1651 + }, + { + "epoch": 0.24341846758349706, + "grad_norm": 0.5764282941818237, + "learning_rate": 4.927938623420235e-06, + "loss": 0.6321, + "step": 1652 + }, + { + "epoch": 0.24356581532416502, + "grad_norm": 0.6044312119483948, + "learning_rate": 4.927846180340205e-06, + "loss": 0.5974, + "step": 1653 + }, + { + "epoch": 0.243713163064833, + "grad_norm": 0.5861804485321045, + "learning_rate": 4.9277536788716125e-06, + "loss": 0.6078, + "step": 1654 + }, + { + "epoch": 0.24386051080550097, + "grad_norm": 0.6013644933700562, + "learning_rate": 4.927661119016683e-06, + "loss": 0.5926, + "step": 1655 + }, + { + "epoch": 0.24400785854616897, + "grad_norm": 0.585410475730896, + "learning_rate": 4.927568500777642e-06, + "loss": 0.6054, + "step": 1656 + }, + { + "epoch": 0.24415520628683693, + "grad_norm": 0.6029179692268372, + "learning_rate": 4.9274758241567166e-06, + "loss": 0.6145, + "step": 1657 + }, + { + "epoch": 0.24430255402750492, + "grad_norm": 0.574758768081665, + "learning_rate": 4.9273830891561365e-06, + "loss": 0.6156, + "step": 1658 + }, + { + "epoch": 0.24444990176817288, + "grad_norm": 0.6004466414451599, + "learning_rate": 4.927290295778132e-06, + "loss": 0.6268, + "step": 1659 + }, + { + "epoch": 0.24459724950884087, + "grad_norm": 0.5870362520217896, + "learning_rate": 4.927197444024935e-06, + "loss": 0.6243, + "step": 1660 + }, + { + "epoch": 0.24474459724950884, + "grad_norm": 0.6081448793411255, + "learning_rate": 4.9271045338987765e-06, + "loss": 0.6275, + "step": 1661 + }, + { + "epoch": 0.24489194499017683, + "grad_norm": 0.56627357006073, + "learning_rate": 4.927011565401893e-06, + "loss": 0.6183, + "step": 1662 + }, + { + "epoch": 0.2450392927308448, + "grad_norm": 0.5959356427192688, + "learning_rate": 4.9269185385365195e-06, + "loss": 0.6044, + "step": 1663 + }, + { + "epoch": 0.24518664047151276, + "grad_norm": 0.5904929637908936, + "learning_rate": 4.926825453304894e-06, + "loss": 0.6067, + "step": 1664 + }, + { + "epoch": 0.24533398821218075, + "grad_norm": 0.6403310894966125, + "learning_rate": 4.926732309709254e-06, + "loss": 0.6125, + "step": 1665 + }, + { + "epoch": 0.2454813359528487, + "grad_norm": 0.5583239793777466, + "learning_rate": 4.926639107751841e-06, + "loss": 0.5866, + "step": 1666 + }, + { + "epoch": 0.2456286836935167, + "grad_norm": 0.5874050855636597, + "learning_rate": 4.926545847434894e-06, + "loss": 0.6173, + "step": 1667 + }, + { + "epoch": 0.24577603143418467, + "grad_norm": 0.629978597164154, + "learning_rate": 4.926452528760658e-06, + "loss": 0.611, + "step": 1668 + }, + { + "epoch": 0.24592337917485266, + "grad_norm": 0.5978645086288452, + "learning_rate": 4.926359151731378e-06, + "loss": 0.608, + "step": 1669 + }, + { + "epoch": 0.24607072691552062, + "grad_norm": 0.5864302515983582, + "learning_rate": 4.926265716349297e-06, + "loss": 0.6292, + "step": 1670 + }, + { + "epoch": 0.2462180746561886, + "grad_norm": 0.5927065014839172, + "learning_rate": 4.926172222616664e-06, + "loss": 0.6314, + "step": 1671 + }, + { + "epoch": 0.24636542239685658, + "grad_norm": 0.6044816970825195, + "learning_rate": 4.926078670535728e-06, + "loss": 0.5857, + "step": 1672 + }, + { + "epoch": 0.24651277013752457, + "grad_norm": 0.5857625603675842, + "learning_rate": 4.9259850601087365e-06, + "loss": 0.5869, + "step": 1673 + }, + { + "epoch": 0.24666011787819253, + "grad_norm": 0.5775193572044373, + "learning_rate": 4.925891391337943e-06, + "loss": 0.6234, + "step": 1674 + }, + { + "epoch": 0.24680746561886052, + "grad_norm": 0.6014735102653503, + "learning_rate": 4.925797664225599e-06, + "loss": 0.613, + "step": 1675 + }, + { + "epoch": 0.24695481335952849, + "grad_norm": 0.5920022130012512, + "learning_rate": 4.9257038787739586e-06, + "loss": 0.6321, + "step": 1676 + }, + { + "epoch": 0.24710216110019648, + "grad_norm": 0.5977739691734314, + "learning_rate": 4.925610034985278e-06, + "loss": 0.6302, + "step": 1677 + }, + { + "epoch": 0.24724950884086444, + "grad_norm": 0.6034195423126221, + "learning_rate": 4.925516132861814e-06, + "loss": 0.5989, + "step": 1678 + }, + { + "epoch": 0.2473968565815324, + "grad_norm": 0.6152459979057312, + "learning_rate": 4.9254221724058235e-06, + "loss": 0.6079, + "step": 1679 + }, + { + "epoch": 0.2475442043222004, + "grad_norm": 0.5966836214065552, + "learning_rate": 4.925328153619568e-06, + "loss": 0.5836, + "step": 1680 + }, + { + "epoch": 0.24769155206286836, + "grad_norm": 0.5884940028190613, + "learning_rate": 4.925234076505309e-06, + "loss": 0.5882, + "step": 1681 + }, + { + "epoch": 0.24783889980353635, + "grad_norm": 0.5861109495162964, + "learning_rate": 4.925139941065307e-06, + "loss": 0.5507, + "step": 1682 + }, + { + "epoch": 0.2479862475442043, + "grad_norm": 0.5914342403411865, + "learning_rate": 4.925045747301827e-06, + "loss": 0.6287, + "step": 1683 + }, + { + "epoch": 0.2481335952848723, + "grad_norm": 0.6262138485908508, + "learning_rate": 4.924951495217134e-06, + "loss": 0.6266, + "step": 1684 + }, + { + "epoch": 0.24828094302554027, + "grad_norm": 0.568790853023529, + "learning_rate": 4.924857184813495e-06, + "loss": 0.636, + "step": 1685 + }, + { + "epoch": 0.24842829076620826, + "grad_norm": 0.5761520266532898, + "learning_rate": 4.924762816093178e-06, + "loss": 0.5899, + "step": 1686 + }, + { + "epoch": 0.24857563850687622, + "grad_norm": 0.5928934812545776, + "learning_rate": 4.924668389058453e-06, + "loss": 0.6006, + "step": 1687 + }, + { + "epoch": 0.24872298624754421, + "grad_norm": 0.6217023134231567, + "learning_rate": 4.92457390371159e-06, + "loss": 0.5997, + "step": 1688 + }, + { + "epoch": 0.24887033398821218, + "grad_norm": 0.5857503414154053, + "learning_rate": 4.924479360054862e-06, + "loss": 0.592, + "step": 1689 + }, + { + "epoch": 0.24901768172888017, + "grad_norm": 0.6150176525115967, + "learning_rate": 4.9243847580905425e-06, + "loss": 0.6109, + "step": 1690 + }, + { + "epoch": 0.24916502946954813, + "grad_norm": 0.6203132271766663, + "learning_rate": 4.924290097820907e-06, + "loss": 0.5783, + "step": 1691 + }, + { + "epoch": 0.2493123772102161, + "grad_norm": 0.5573397874832153, + "learning_rate": 4.924195379248231e-06, + "loss": 0.6075, + "step": 1692 + }, + { + "epoch": 0.2494597249508841, + "grad_norm": 0.6367015242576599, + "learning_rate": 4.924100602374794e-06, + "loss": 0.6597, + "step": 1693 + }, + { + "epoch": 0.24960707269155205, + "grad_norm": 0.5958086252212524, + "learning_rate": 4.924005767202874e-06, + "loss": 0.6342, + "step": 1694 + }, + { + "epoch": 0.24975442043222004, + "grad_norm": 0.6051167845726013, + "learning_rate": 4.9239108737347515e-06, + "loss": 0.6297, + "step": 1695 + }, + { + "epoch": 0.249901768172888, + "grad_norm": 0.577907383441925, + "learning_rate": 4.9238159219727104e-06, + "loss": 0.5619, + "step": 1696 + }, + { + "epoch": 0.250049115913556, + "grad_norm": 0.6196224093437195, + "learning_rate": 4.923720911919033e-06, + "loss": 0.6114, + "step": 1697 + }, + { + "epoch": 0.250196463654224, + "grad_norm": 0.6260014772415161, + "learning_rate": 4.923625843576004e-06, + "loss": 0.6103, + "step": 1698 + }, + { + "epoch": 0.2503438113948919, + "grad_norm": 0.5984755754470825, + "learning_rate": 4.923530716945911e-06, + "loss": 0.599, + "step": 1699 + }, + { + "epoch": 0.2504911591355599, + "grad_norm": 0.5708172917366028, + "learning_rate": 4.92343553203104e-06, + "loss": 0.5677, + "step": 1700 + }, + { + "epoch": 0.2506385068762279, + "grad_norm": 0.5739811062812805, + "learning_rate": 4.923340288833682e-06, + "loss": 0.6044, + "step": 1701 + }, + { + "epoch": 0.2507858546168959, + "grad_norm": 0.5958719253540039, + "learning_rate": 4.923244987356127e-06, + "loss": 0.634, + "step": 1702 + }, + { + "epoch": 0.25093320235756383, + "grad_norm": 0.6228737235069275, + "learning_rate": 4.923149627600666e-06, + "loss": 0.6577, + "step": 1703 + }, + { + "epoch": 0.2510805500982318, + "grad_norm": 0.6019788980484009, + "learning_rate": 4.923054209569593e-06, + "loss": 0.6073, + "step": 1704 + }, + { + "epoch": 0.2512278978388998, + "grad_norm": 0.6187698841094971, + "learning_rate": 4.922958733265202e-06, + "loss": 0.6048, + "step": 1705 + }, + { + "epoch": 0.2513752455795678, + "grad_norm": 0.5781245231628418, + "learning_rate": 4.922863198689791e-06, + "loss": 0.6115, + "step": 1706 + }, + { + "epoch": 0.25152259332023574, + "grad_norm": 0.6070923209190369, + "learning_rate": 4.922767605845656e-06, + "loss": 0.6277, + "step": 1707 + }, + { + "epoch": 0.25166994106090373, + "grad_norm": 0.5963754057884216, + "learning_rate": 4.922671954735097e-06, + "loss": 0.6174, + "step": 1708 + }, + { + "epoch": 0.2518172888015717, + "grad_norm": 0.596831202507019, + "learning_rate": 4.922576245360413e-06, + "loss": 0.6388, + "step": 1709 + }, + { + "epoch": 0.25196463654223966, + "grad_norm": 0.6307262778282166, + "learning_rate": 4.922480477723907e-06, + "loss": 0.6402, + "step": 1710 + }, + { + "epoch": 0.25211198428290765, + "grad_norm": 0.6055577397346497, + "learning_rate": 4.9223846518278815e-06, + "loss": 0.5956, + "step": 1711 + }, + { + "epoch": 0.25225933202357564, + "grad_norm": 0.62318354845047, + "learning_rate": 4.922288767674641e-06, + "loss": 0.5999, + "step": 1712 + }, + { + "epoch": 0.25240667976424364, + "grad_norm": 0.6229989528656006, + "learning_rate": 4.922192825266492e-06, + "loss": 0.6163, + "step": 1713 + }, + { + "epoch": 0.25255402750491157, + "grad_norm": 0.6089563965797424, + "learning_rate": 4.922096824605742e-06, + "loss": 0.6084, + "step": 1714 + }, + { + "epoch": 0.25270137524557956, + "grad_norm": 0.5836276412010193, + "learning_rate": 4.922000765694699e-06, + "loss": 0.6358, + "step": 1715 + }, + { + "epoch": 0.25284872298624755, + "grad_norm": 0.5637613534927368, + "learning_rate": 4.921904648535674e-06, + "loss": 0.6021, + "step": 1716 + }, + { + "epoch": 0.25299607072691555, + "grad_norm": 0.623746395111084, + "learning_rate": 4.921808473130978e-06, + "loss": 0.6391, + "step": 1717 + }, + { + "epoch": 0.2531434184675835, + "grad_norm": 0.5909525156021118, + "learning_rate": 4.921712239482923e-06, + "loss": 0.5512, + "step": 1718 + }, + { + "epoch": 0.25329076620825147, + "grad_norm": 0.5901020169258118, + "learning_rate": 4.921615947593826e-06, + "loss": 0.6189, + "step": 1719 + }, + { + "epoch": 0.25343811394891946, + "grad_norm": 0.6183261871337891, + "learning_rate": 4.921519597466e-06, + "loss": 0.6245, + "step": 1720 + }, + { + "epoch": 0.2535854616895874, + "grad_norm": 0.5661899447441101, + "learning_rate": 4.921423189101765e-06, + "loss": 0.5898, + "step": 1721 + }, + { + "epoch": 0.2537328094302554, + "grad_norm": 0.5911903381347656, + "learning_rate": 4.9213267225034365e-06, + "loss": 0.6022, + "step": 1722 + }, + { + "epoch": 0.2538801571709234, + "grad_norm": 0.6187667846679688, + "learning_rate": 4.921230197673337e-06, + "loss": 0.6164, + "step": 1723 + }, + { + "epoch": 0.2540275049115914, + "grad_norm": 0.5763720870018005, + "learning_rate": 4.921133614613787e-06, + "loss": 0.62, + "step": 1724 + }, + { + "epoch": 0.2541748526522593, + "grad_norm": 0.615038275718689, + "learning_rate": 4.921036973327108e-06, + "loss": 0.5869, + "step": 1725 + }, + { + "epoch": 0.2543222003929273, + "grad_norm": 0.6290920376777649, + "learning_rate": 4.920940273815626e-06, + "loss": 0.6664, + "step": 1726 + }, + { + "epoch": 0.2544695481335953, + "grad_norm": 0.6156381368637085, + "learning_rate": 4.920843516081666e-06, + "loss": 0.6104, + "step": 1727 + }, + { + "epoch": 0.2546168958742633, + "grad_norm": 0.5833727121353149, + "learning_rate": 4.920746700127555e-06, + "loss": 0.585, + "step": 1728 + }, + { + "epoch": 0.2547642436149312, + "grad_norm": 0.5865613222122192, + "learning_rate": 4.9206498259556215e-06, + "loss": 0.6199, + "step": 1729 + }, + { + "epoch": 0.2549115913555992, + "grad_norm": 0.6333039999008179, + "learning_rate": 4.920552893568195e-06, + "loss": 0.5993, + "step": 1730 + }, + { + "epoch": 0.2550589390962672, + "grad_norm": 0.5991489291191101, + "learning_rate": 4.920455902967606e-06, + "loss": 0.5968, + "step": 1731 + }, + { + "epoch": 0.2552062868369352, + "grad_norm": 0.6149007081985474, + "learning_rate": 4.920358854156187e-06, + "loss": 0.6054, + "step": 1732 + }, + { + "epoch": 0.25535363457760313, + "grad_norm": 0.6167165040969849, + "learning_rate": 4.9202617471362744e-06, + "loss": 0.613, + "step": 1733 + }, + { + "epoch": 0.2555009823182711, + "grad_norm": 0.6420976519584656, + "learning_rate": 4.920164581910201e-06, + "loss": 0.6258, + "step": 1734 + }, + { + "epoch": 0.2556483300589391, + "grad_norm": 0.5883610844612122, + "learning_rate": 4.920067358480306e-06, + "loss": 0.6025, + "step": 1735 + }, + { + "epoch": 0.25579567779960705, + "grad_norm": 0.5938136577606201, + "learning_rate": 4.919970076848924e-06, + "loss": 0.6368, + "step": 1736 + }, + { + "epoch": 0.25594302554027504, + "grad_norm": 0.5972191691398621, + "learning_rate": 4.919872737018398e-06, + "loss": 0.6186, + "step": 1737 + }, + { + "epoch": 0.25609037328094303, + "grad_norm": 0.6006776094436646, + "learning_rate": 4.919775338991068e-06, + "loss": 0.5972, + "step": 1738 + }, + { + "epoch": 0.256237721021611, + "grad_norm": 0.6021167039871216, + "learning_rate": 4.919677882769275e-06, + "loss": 0.6278, + "step": 1739 + }, + { + "epoch": 0.25638506876227896, + "grad_norm": 0.6392310261726379, + "learning_rate": 4.9195803683553644e-06, + "loss": 0.6283, + "step": 1740 + }, + { + "epoch": 0.25653241650294695, + "grad_norm": 0.6134361624717712, + "learning_rate": 4.919482795751681e-06, + "loss": 0.607, + "step": 1741 + }, + { + "epoch": 0.25667976424361494, + "grad_norm": 0.6122457385063171, + "learning_rate": 4.919385164960571e-06, + "loss": 0.5947, + "step": 1742 + }, + { + "epoch": 0.25682711198428293, + "grad_norm": 0.5905463695526123, + "learning_rate": 4.919287475984382e-06, + "loss": 0.591, + "step": 1743 + }, + { + "epoch": 0.25697445972495087, + "grad_norm": 0.5838282108306885, + "learning_rate": 4.919189728825464e-06, + "loss": 0.6349, + "step": 1744 + }, + { + "epoch": 0.25712180746561886, + "grad_norm": 0.5842326879501343, + "learning_rate": 4.919091923486169e-06, + "loss": 0.5951, + "step": 1745 + }, + { + "epoch": 0.25726915520628685, + "grad_norm": 0.5848163962364197, + "learning_rate": 4.918994059968847e-06, + "loss": 0.5811, + "step": 1746 + }, + { + "epoch": 0.25741650294695484, + "grad_norm": 0.5876306891441345, + "learning_rate": 4.918896138275851e-06, + "loss": 0.5853, + "step": 1747 + }, + { + "epoch": 0.2575638506876228, + "grad_norm": 0.588253915309906, + "learning_rate": 4.918798158409539e-06, + "loss": 0.6154, + "step": 1748 + }, + { + "epoch": 0.25771119842829077, + "grad_norm": 0.5995535254478455, + "learning_rate": 4.918700120372266e-06, + "loss": 0.6259, + "step": 1749 + }, + { + "epoch": 0.25785854616895876, + "grad_norm": 0.5950498580932617, + "learning_rate": 4.918602024166389e-06, + "loss": 0.5975, + "step": 1750 + }, + { + "epoch": 0.2580058939096267, + "grad_norm": 0.6146644949913025, + "learning_rate": 4.918503869794268e-06, + "loss": 0.613, + "step": 1751 + }, + { + "epoch": 0.2581532416502947, + "grad_norm": 0.5995985269546509, + "learning_rate": 4.918405657258263e-06, + "loss": 0.6102, + "step": 1752 + }, + { + "epoch": 0.2583005893909627, + "grad_norm": 0.5710789561271667, + "learning_rate": 4.918307386560736e-06, + "loss": 0.5968, + "step": 1753 + }, + { + "epoch": 0.25844793713163067, + "grad_norm": 0.6377459764480591, + "learning_rate": 4.918209057704051e-06, + "loss": 0.6224, + "step": 1754 + }, + { + "epoch": 0.2585952848722986, + "grad_norm": 0.9128121733665466, + "learning_rate": 4.918110670690573e-06, + "loss": 0.5848, + "step": 1755 + }, + { + "epoch": 0.2587426326129666, + "grad_norm": 0.631233274936676, + "learning_rate": 4.918012225522666e-06, + "loss": 0.6131, + "step": 1756 + }, + { + "epoch": 0.2588899803536346, + "grad_norm": 0.611932635307312, + "learning_rate": 4.9179137222027e-06, + "loss": 0.6013, + "step": 1757 + }, + { + "epoch": 0.2590373280943026, + "grad_norm": 0.6069332957267761, + "learning_rate": 4.917815160733042e-06, + "loss": 0.594, + "step": 1758 + }, + { + "epoch": 0.2591846758349705, + "grad_norm": 0.5892805457115173, + "learning_rate": 4.917716541116065e-06, + "loss": 0.5906, + "step": 1759 + }, + { + "epoch": 0.2593320235756385, + "grad_norm": 0.6169430017471313, + "learning_rate": 4.917617863354138e-06, + "loss": 0.6129, + "step": 1760 + }, + { + "epoch": 0.2594793713163065, + "grad_norm": 0.6141143441200256, + "learning_rate": 4.917519127449636e-06, + "loss": 0.6355, + "step": 1761 + }, + { + "epoch": 0.2596267190569745, + "grad_norm": 0.631628692150116, + "learning_rate": 4.917420333404933e-06, + "loss": 0.5644, + "step": 1762 + }, + { + "epoch": 0.2597740667976424, + "grad_norm": 0.6339155435562134, + "learning_rate": 4.917321481222404e-06, + "loss": 0.6187, + "step": 1763 + }, + { + "epoch": 0.2599214145383104, + "grad_norm": 0.5648977756500244, + "learning_rate": 4.917222570904428e-06, + "loss": 0.5956, + "step": 1764 + }, + { + "epoch": 0.2600687622789784, + "grad_norm": 0.6280184388160706, + "learning_rate": 4.9171236024533835e-06, + "loss": 0.6092, + "step": 1765 + }, + { + "epoch": 0.26021611001964634, + "grad_norm": 0.5928637385368347, + "learning_rate": 4.917024575871648e-06, + "loss": 0.5613, + "step": 1766 + }, + { + "epoch": 0.26036345776031433, + "grad_norm": 0.591191828250885, + "learning_rate": 4.916925491161607e-06, + "loss": 0.6303, + "step": 1767 + }, + { + "epoch": 0.2605108055009823, + "grad_norm": 0.5857988595962524, + "learning_rate": 4.9168263483256405e-06, + "loss": 0.569, + "step": 1768 + }, + { + "epoch": 0.2606581532416503, + "grad_norm": 0.5731964111328125, + "learning_rate": 4.9167271473661336e-06, + "loss": 0.6461, + "step": 1769 + }, + { + "epoch": 0.26080550098231825, + "grad_norm": 0.601102352142334, + "learning_rate": 4.916627888285474e-06, + "loss": 0.6196, + "step": 1770 + }, + { + "epoch": 0.26095284872298624, + "grad_norm": 0.557368278503418, + "learning_rate": 4.916528571086046e-06, + "loss": 0.5849, + "step": 1771 + }, + { + "epoch": 0.26110019646365423, + "grad_norm": 0.5765355825424194, + "learning_rate": 4.916429195770239e-06, + "loss": 0.6176, + "step": 1772 + }, + { + "epoch": 0.2612475442043222, + "grad_norm": 0.5955084562301636, + "learning_rate": 4.916329762340443e-06, + "loss": 0.6145, + "step": 1773 + }, + { + "epoch": 0.26139489194499016, + "grad_norm": 0.5946002006530762, + "learning_rate": 4.916230270799051e-06, + "loss": 0.6108, + "step": 1774 + }, + { + "epoch": 0.26154223968565815, + "grad_norm": 0.593898355960846, + "learning_rate": 4.916130721148453e-06, + "loss": 0.5935, + "step": 1775 + }, + { + "epoch": 0.26168958742632614, + "grad_norm": 0.5853733420372009, + "learning_rate": 4.916031113391044e-06, + "loss": 0.6106, + "step": 1776 + }, + { + "epoch": 0.26183693516699413, + "grad_norm": 0.5759773254394531, + "learning_rate": 4.915931447529221e-06, + "loss": 0.6099, + "step": 1777 + }, + { + "epoch": 0.26198428290766207, + "grad_norm": 0.575660765171051, + "learning_rate": 4.91583172356538e-06, + "loss": 0.58, + "step": 1778 + }, + { + "epoch": 0.26213163064833006, + "grad_norm": 0.5636186003684998, + "learning_rate": 4.915731941501918e-06, + "loss": 0.5762, + "step": 1779 + }, + { + "epoch": 0.26227897838899805, + "grad_norm": 0.6141606569290161, + "learning_rate": 4.915632101341237e-06, + "loss": 0.6017, + "step": 1780 + }, + { + "epoch": 0.262426326129666, + "grad_norm": 0.5652437806129456, + "learning_rate": 4.9155322030857365e-06, + "loss": 0.6269, + "step": 1781 + }, + { + "epoch": 0.262573673870334, + "grad_norm": 0.6126176118850708, + "learning_rate": 4.915432246737819e-06, + "loss": 0.6171, + "step": 1782 + }, + { + "epoch": 0.26272102161100197, + "grad_norm": 0.5675702095031738, + "learning_rate": 4.915332232299889e-06, + "loss": 0.5785, + "step": 1783 + }, + { + "epoch": 0.26286836935166996, + "grad_norm": 0.585924506187439, + "learning_rate": 4.915232159774353e-06, + "loss": 0.6101, + "step": 1784 + }, + { + "epoch": 0.2630157170923379, + "grad_norm": 0.5551459789276123, + "learning_rate": 4.915132029163614e-06, + "loss": 0.5957, + "step": 1785 + }, + { + "epoch": 0.2631630648330059, + "grad_norm": 0.5702390074729919, + "learning_rate": 4.9150318404700845e-06, + "loss": 0.6438, + "step": 1786 + }, + { + "epoch": 0.2633104125736739, + "grad_norm": 0.5815082788467407, + "learning_rate": 4.91493159369617e-06, + "loss": 0.5829, + "step": 1787 + }, + { + "epoch": 0.26345776031434187, + "grad_norm": 0.7063375115394592, + "learning_rate": 4.914831288844286e-06, + "loss": 0.5965, + "step": 1788 + }, + { + "epoch": 0.2636051080550098, + "grad_norm": 0.5944624543190002, + "learning_rate": 4.91473092591684e-06, + "loss": 0.6265, + "step": 1789 + }, + { + "epoch": 0.2637524557956778, + "grad_norm": 0.5922138690948486, + "learning_rate": 4.914630504916249e-06, + "loss": 0.5987, + "step": 1790 + }, + { + "epoch": 0.2638998035363458, + "grad_norm": 0.5875394344329834, + "learning_rate": 4.914530025844926e-06, + "loss": 0.6114, + "step": 1791 + }, + { + "epoch": 0.2640471512770137, + "grad_norm": 0.5823413133621216, + "learning_rate": 4.914429488705289e-06, + "loss": 0.5866, + "step": 1792 + }, + { + "epoch": 0.2641944990176817, + "grad_norm": 0.6063029766082764, + "learning_rate": 4.914328893499755e-06, + "loss": 0.6058, + "step": 1793 + }, + { + "epoch": 0.2643418467583497, + "grad_norm": 0.6015620827674866, + "learning_rate": 4.914228240230743e-06, + "loss": 0.567, + "step": 1794 + }, + { + "epoch": 0.2644891944990177, + "grad_norm": 0.6150606274604797, + "learning_rate": 4.9141275289006755e-06, + "loss": 0.5928, + "step": 1795 + }, + { + "epoch": 0.26463654223968563, + "grad_norm": 0.5875958204269409, + "learning_rate": 4.914026759511971e-06, + "loss": 0.5925, + "step": 1796 + }, + { + "epoch": 0.2647838899803536, + "grad_norm": 0.5949323177337646, + "learning_rate": 4.913925932067057e-06, + "loss": 0.64, + "step": 1797 + }, + { + "epoch": 0.2649312377210216, + "grad_norm": 0.6561916470527649, + "learning_rate": 4.9138250465683564e-06, + "loss": 0.5995, + "step": 1798 + }, + { + "epoch": 0.2650785854616896, + "grad_norm": 0.5882973074913025, + "learning_rate": 4.913724103018296e-06, + "loss": 0.5936, + "step": 1799 + }, + { + "epoch": 0.26522593320235754, + "grad_norm": 0.5918797254562378, + "learning_rate": 4.913623101419301e-06, + "loss": 0.6346, + "step": 1800 + }, + { + "epoch": 0.26537328094302554, + "grad_norm": 0.6286064386367798, + "learning_rate": 4.913522041773805e-06, + "loss": 0.5673, + "step": 1801 + }, + { + "epoch": 0.2655206286836935, + "grad_norm": 0.6306771636009216, + "learning_rate": 4.913420924084234e-06, + "loss": 0.5687, + "step": 1802 + }, + { + "epoch": 0.2656679764243615, + "grad_norm": 0.6328890919685364, + "learning_rate": 4.913319748353022e-06, + "loss": 0.6169, + "step": 1803 + }, + { + "epoch": 0.26581532416502945, + "grad_norm": 0.6232098937034607, + "learning_rate": 4.913218514582603e-06, + "loss": 0.6297, + "step": 1804 + }, + { + "epoch": 0.26596267190569745, + "grad_norm": 0.5898336172103882, + "learning_rate": 4.91311722277541e-06, + "loss": 0.6132, + "step": 1805 + }, + { + "epoch": 0.26611001964636544, + "grad_norm": 0.6101147532463074, + "learning_rate": 4.9130158729338794e-06, + "loss": 0.6076, + "step": 1806 + }, + { + "epoch": 0.2662573673870334, + "grad_norm": 0.6149605512619019, + "learning_rate": 4.912914465060449e-06, + "loss": 0.5794, + "step": 1807 + }, + { + "epoch": 0.26640471512770136, + "grad_norm": 0.6027504205703735, + "learning_rate": 4.912812999157558e-06, + "loss": 0.5967, + "step": 1808 + }, + { + "epoch": 0.26655206286836935, + "grad_norm": 0.5795935988426208, + "learning_rate": 4.912711475227645e-06, + "loss": 0.5638, + "step": 1809 + }, + { + "epoch": 0.26669941060903735, + "grad_norm": 0.6020105481147766, + "learning_rate": 4.912609893273153e-06, + "loss": 0.6446, + "step": 1810 + }, + { + "epoch": 0.2668467583497053, + "grad_norm": 0.6264997124671936, + "learning_rate": 4.912508253296525e-06, + "loss": 0.6087, + "step": 1811 + }, + { + "epoch": 0.2669941060903733, + "grad_norm": 0.5581730008125305, + "learning_rate": 4.9124065553002045e-06, + "loss": 0.5862, + "step": 1812 + }, + { + "epoch": 0.26714145383104126, + "grad_norm": 0.5947370529174805, + "learning_rate": 4.912304799286638e-06, + "loss": 0.6305, + "step": 1813 + }, + { + "epoch": 0.26728880157170926, + "grad_norm": 0.5658950805664062, + "learning_rate": 4.912202985258273e-06, + "loss": 0.5697, + "step": 1814 + }, + { + "epoch": 0.2674361493123772, + "grad_norm": 0.6070843935012817, + "learning_rate": 4.912101113217558e-06, + "loss": 0.6267, + "step": 1815 + }, + { + "epoch": 0.2675834970530452, + "grad_norm": 0.6142455339431763, + "learning_rate": 4.911999183166941e-06, + "loss": 0.6183, + "step": 1816 + }, + { + "epoch": 0.2677308447937132, + "grad_norm": 0.6044833660125732, + "learning_rate": 4.911897195108876e-06, + "loss": 0.6045, + "step": 1817 + }, + { + "epoch": 0.26787819253438117, + "grad_norm": 0.5754340291023254, + "learning_rate": 4.9117951490458145e-06, + "loss": 0.6246, + "step": 1818 + }, + { + "epoch": 0.2680255402750491, + "grad_norm": 0.5618101954460144, + "learning_rate": 4.911693044980212e-06, + "loss": 0.6039, + "step": 1819 + }, + { + "epoch": 0.2681728880157171, + "grad_norm": 0.628834068775177, + "learning_rate": 4.911590882914521e-06, + "loss": 0.6115, + "step": 1820 + }, + { + "epoch": 0.2683202357563851, + "grad_norm": 0.6466172337532043, + "learning_rate": 4.911488662851201e-06, + "loss": 0.6061, + "step": 1821 + }, + { + "epoch": 0.268467583497053, + "grad_norm": 0.5656207203865051, + "learning_rate": 4.91138638479271e-06, + "loss": 0.5818, + "step": 1822 + }, + { + "epoch": 0.268614931237721, + "grad_norm": 0.6251662373542786, + "learning_rate": 4.911284048741507e-06, + "loss": 0.5248, + "step": 1823 + }, + { + "epoch": 0.268762278978389, + "grad_norm": 0.5820587873458862, + "learning_rate": 4.911181654700054e-06, + "loss": 0.5746, + "step": 1824 + }, + { + "epoch": 0.268909626719057, + "grad_norm": 0.6148613095283508, + "learning_rate": 4.911079202670813e-06, + "loss": 0.6146, + "step": 1825 + }, + { + "epoch": 0.26905697445972493, + "grad_norm": 0.6051336526870728, + "learning_rate": 4.910976692656249e-06, + "loss": 0.6362, + "step": 1826 + }, + { + "epoch": 0.2692043222003929, + "grad_norm": 0.6021358966827393, + "learning_rate": 4.910874124658825e-06, + "loss": 0.6172, + "step": 1827 + }, + { + "epoch": 0.2693516699410609, + "grad_norm": 0.6098156571388245, + "learning_rate": 4.91077149868101e-06, + "loss": 0.6067, + "step": 1828 + }, + { + "epoch": 0.2694990176817289, + "grad_norm": 0.6314789652824402, + "learning_rate": 4.91066881472527e-06, + "loss": 0.5898, + "step": 1829 + }, + { + "epoch": 0.26964636542239684, + "grad_norm": 0.6271588802337646, + "learning_rate": 4.910566072794076e-06, + "loss": 0.6002, + "step": 1830 + }, + { + "epoch": 0.26979371316306483, + "grad_norm": 0.6011391282081604, + "learning_rate": 4.910463272889899e-06, + "loss": 0.6049, + "step": 1831 + }, + { + "epoch": 0.2699410609037328, + "grad_norm": 0.5757840275764465, + "learning_rate": 4.910360415015211e-06, + "loss": 0.6079, + "step": 1832 + }, + { + "epoch": 0.2700884086444008, + "grad_norm": 0.5817499756813049, + "learning_rate": 4.910257499172485e-06, + "loss": 0.6022, + "step": 1833 + }, + { + "epoch": 0.27023575638506875, + "grad_norm": 0.6097776293754578, + "learning_rate": 4.910154525364197e-06, + "loss": 0.6192, + "step": 1834 + }, + { + "epoch": 0.27038310412573674, + "grad_norm": 0.62235426902771, + "learning_rate": 4.910051493592823e-06, + "loss": 0.6286, + "step": 1835 + }, + { + "epoch": 0.27053045186640473, + "grad_norm": 0.5774121880531311, + "learning_rate": 4.909948403860841e-06, + "loss": 0.5632, + "step": 1836 + }, + { + "epoch": 0.27067779960707267, + "grad_norm": 0.601817786693573, + "learning_rate": 4.90984525617073e-06, + "loss": 0.6277, + "step": 1837 + }, + { + "epoch": 0.27082514734774066, + "grad_norm": 0.6067691445350647, + "learning_rate": 4.909742050524971e-06, + "loss": 0.6119, + "step": 1838 + }, + { + "epoch": 0.27097249508840865, + "grad_norm": 0.6055246591567993, + "learning_rate": 4.909638786926046e-06, + "loss": 0.638, + "step": 1839 + }, + { + "epoch": 0.27111984282907664, + "grad_norm": 0.6147459149360657, + "learning_rate": 4.90953546537644e-06, + "loss": 0.5751, + "step": 1840 + }, + { + "epoch": 0.2712671905697446, + "grad_norm": 0.6011224389076233, + "learning_rate": 4.909432085878633e-06, + "loss": 0.582, + "step": 1841 + }, + { + "epoch": 0.27141453831041257, + "grad_norm": 0.5974448323249817, + "learning_rate": 4.909328648435117e-06, + "loss": 0.6223, + "step": 1842 + }, + { + "epoch": 0.27156188605108056, + "grad_norm": 0.6125964522361755, + "learning_rate": 4.909225153048376e-06, + "loss": 0.6569, + "step": 1843 + }, + { + "epoch": 0.27170923379174855, + "grad_norm": 0.611558198928833, + "learning_rate": 4.9091215997209e-06, + "loss": 0.5853, + "step": 1844 + }, + { + "epoch": 0.2718565815324165, + "grad_norm": 0.6275449991226196, + "learning_rate": 4.90901798845518e-06, + "loss": 0.6289, + "step": 1845 + }, + { + "epoch": 0.2720039292730845, + "grad_norm": 0.5858194828033447, + "learning_rate": 4.908914319253706e-06, + "loss": 0.6123, + "step": 1846 + }, + { + "epoch": 0.27215127701375247, + "grad_norm": 0.5808798670768738, + "learning_rate": 4.908810592118974e-06, + "loss": 0.6262, + "step": 1847 + }, + { + "epoch": 0.27229862475442046, + "grad_norm": 0.6127000451087952, + "learning_rate": 4.908706807053476e-06, + "loss": 0.6055, + "step": 1848 + }, + { + "epoch": 0.2724459724950884, + "grad_norm": 0.636264443397522, + "learning_rate": 4.90860296405971e-06, + "loss": 0.5926, + "step": 1849 + }, + { + "epoch": 0.2725933202357564, + "grad_norm": 0.6337025165557861, + "learning_rate": 4.908499063140172e-06, + "loss": 0.6191, + "step": 1850 + }, + { + "epoch": 0.2727406679764244, + "grad_norm": 0.6253567934036255, + "learning_rate": 4.908395104297361e-06, + "loss": 0.624, + "step": 1851 + }, + { + "epoch": 0.2728880157170923, + "grad_norm": 0.6202284097671509, + "learning_rate": 4.908291087533777e-06, + "loss": 0.597, + "step": 1852 + }, + { + "epoch": 0.2730353634577603, + "grad_norm": 0.6371239423751831, + "learning_rate": 4.908187012851923e-06, + "loss": 0.5637, + "step": 1853 + }, + { + "epoch": 0.2731827111984283, + "grad_norm": 0.6287308931350708, + "learning_rate": 4.9080828802543e-06, + "loss": 0.6256, + "step": 1854 + }, + { + "epoch": 0.2733300589390963, + "grad_norm": 0.5818671584129333, + "learning_rate": 4.907978689743413e-06, + "loss": 0.5925, + "step": 1855 + }, + { + "epoch": 0.2734774066797642, + "grad_norm": 0.6071414351463318, + "learning_rate": 4.907874441321768e-06, + "loss": 0.6322, + "step": 1856 + }, + { + "epoch": 0.2736247544204322, + "grad_norm": 0.62392258644104, + "learning_rate": 4.907770134991872e-06, + "loss": 0.6306, + "step": 1857 + }, + { + "epoch": 0.2737721021611002, + "grad_norm": 0.6361648440361023, + "learning_rate": 4.907665770756235e-06, + "loss": 0.5997, + "step": 1858 + }, + { + "epoch": 0.2739194499017682, + "grad_norm": 0.5778111219406128, + "learning_rate": 4.9075613486173636e-06, + "loss": 0.5991, + "step": 1859 + }, + { + "epoch": 0.27406679764243613, + "grad_norm": 0.5900107026100159, + "learning_rate": 4.907456868577772e-06, + "loss": 0.6398, + "step": 1860 + }, + { + "epoch": 0.2742141453831041, + "grad_norm": 0.5899320244789124, + "learning_rate": 4.907352330639972e-06, + "loss": 0.6549, + "step": 1861 + }, + { + "epoch": 0.2743614931237721, + "grad_norm": 0.6042764186859131, + "learning_rate": 4.907247734806476e-06, + "loss": 0.5833, + "step": 1862 + }, + { + "epoch": 0.27450884086444005, + "grad_norm": 0.5689100623130798, + "learning_rate": 4.907143081079802e-06, + "loss": 0.6043, + "step": 1863 + }, + { + "epoch": 0.27465618860510804, + "grad_norm": 0.5721964240074158, + "learning_rate": 4.907038369462467e-06, + "loss": 0.5997, + "step": 1864 + }, + { + "epoch": 0.27480353634577603, + "grad_norm": 0.5913596749305725, + "learning_rate": 4.906933599956987e-06, + "loss": 0.6124, + "step": 1865 + }, + { + "epoch": 0.274950884086444, + "grad_norm": 0.5828700065612793, + "learning_rate": 4.906828772565884e-06, + "loss": 0.6016, + "step": 1866 + }, + { + "epoch": 0.27509823182711196, + "grad_norm": 0.5826784372329712, + "learning_rate": 4.9067238872916765e-06, + "loss": 0.5954, + "step": 1867 + }, + { + "epoch": 0.27524557956777995, + "grad_norm": 0.5933745503425598, + "learning_rate": 4.906618944136889e-06, + "loss": 0.5835, + "step": 1868 + }, + { + "epoch": 0.27539292730844794, + "grad_norm": 0.5897332429885864, + "learning_rate": 4.906513943104044e-06, + "loss": 0.6136, + "step": 1869 + }, + { + "epoch": 0.27554027504911593, + "grad_norm": 0.5992917418479919, + "learning_rate": 4.906408884195668e-06, + "loss": 0.6247, + "step": 1870 + }, + { + "epoch": 0.27568762278978387, + "grad_norm": 0.59736567735672, + "learning_rate": 4.906303767414288e-06, + "loss": 0.5895, + "step": 1871 + }, + { + "epoch": 0.27583497053045186, + "grad_norm": 0.5853766202926636, + "learning_rate": 4.906198592762429e-06, + "loss": 0.6419, + "step": 1872 + }, + { + "epoch": 0.27598231827111985, + "grad_norm": 0.5791409611701965, + "learning_rate": 4.906093360242623e-06, + "loss": 0.6328, + "step": 1873 + }, + { + "epoch": 0.27612966601178784, + "grad_norm": 0.5896877646446228, + "learning_rate": 4.905988069857401e-06, + "loss": 0.6213, + "step": 1874 + }, + { + "epoch": 0.2762770137524558, + "grad_norm": 0.605901300907135, + "learning_rate": 4.905882721609294e-06, + "loss": 0.6335, + "step": 1875 + }, + { + "epoch": 0.27642436149312377, + "grad_norm": 0.5796653628349304, + "learning_rate": 4.905777315500836e-06, + "loss": 0.6012, + "step": 1876 + }, + { + "epoch": 0.27657170923379176, + "grad_norm": 0.5728895664215088, + "learning_rate": 4.9056718515345624e-06, + "loss": 0.5613, + "step": 1877 + }, + { + "epoch": 0.2767190569744597, + "grad_norm": 0.5990075469017029, + "learning_rate": 4.905566329713009e-06, + "loss": 0.6378, + "step": 1878 + }, + { + "epoch": 0.2768664047151277, + "grad_norm": 0.5770743489265442, + "learning_rate": 4.905460750038713e-06, + "loss": 0.5898, + "step": 1879 + }, + { + "epoch": 0.2770137524557957, + "grad_norm": 0.598072350025177, + "learning_rate": 4.905355112514214e-06, + "loss": 0.5632, + "step": 1880 + }, + { + "epoch": 0.27716110019646367, + "grad_norm": 0.5668124556541443, + "learning_rate": 4.9052494171420534e-06, + "loss": 0.6507, + "step": 1881 + }, + { + "epoch": 0.2773084479371316, + "grad_norm": 0.615648090839386, + "learning_rate": 4.9051436639247715e-06, + "loss": 0.6339, + "step": 1882 + }, + { + "epoch": 0.2774557956777996, + "grad_norm": 0.5890446305274963, + "learning_rate": 4.905037852864914e-06, + "loss": 0.6051, + "step": 1883 + }, + { + "epoch": 0.2776031434184676, + "grad_norm": 0.5819085836410522, + "learning_rate": 4.904931983965024e-06, + "loss": 0.5748, + "step": 1884 + }, + { + "epoch": 0.2777504911591356, + "grad_norm": 0.5695262551307678, + "learning_rate": 4.904826057227646e-06, + "loss": 0.6165, + "step": 1885 + }, + { + "epoch": 0.2778978388998035, + "grad_norm": 0.6010515093803406, + "learning_rate": 4.904720072655331e-06, + "loss": 0.5956, + "step": 1886 + }, + { + "epoch": 0.2780451866404715, + "grad_norm": 0.6201261281967163, + "learning_rate": 4.904614030250626e-06, + "loss": 0.5778, + "step": 1887 + }, + { + "epoch": 0.2781925343811395, + "grad_norm": 0.6306869387626648, + "learning_rate": 4.90450793001608e-06, + "loss": 0.6501, + "step": 1888 + }, + { + "epoch": 0.2783398821218075, + "grad_norm": 0.6350950002670288, + "learning_rate": 4.9044017719542484e-06, + "loss": 0.6443, + "step": 1889 + }, + { + "epoch": 0.2784872298624754, + "grad_norm": 0.6017818450927734, + "learning_rate": 4.90429555606768e-06, + "loss": 0.6019, + "step": 1890 + }, + { + "epoch": 0.2786345776031434, + "grad_norm": 0.5819234848022461, + "learning_rate": 4.904189282358931e-06, + "loss": 0.5781, + "step": 1891 + }, + { + "epoch": 0.2787819253438114, + "grad_norm": 0.6296589374542236, + "learning_rate": 4.9040829508305585e-06, + "loss": 0.6206, + "step": 1892 + }, + { + "epoch": 0.27892927308447935, + "grad_norm": 0.5965631008148193, + "learning_rate": 4.903976561485119e-06, + "loss": 0.6178, + "step": 1893 + }, + { + "epoch": 0.27907662082514734, + "grad_norm": 0.6043063998222351, + "learning_rate": 4.9038701143251696e-06, + "loss": 0.6027, + "step": 1894 + }, + { + "epoch": 0.27922396856581533, + "grad_norm": 0.6513491272926331, + "learning_rate": 4.903763609353271e-06, + "loss": 0.5791, + "step": 1895 + }, + { + "epoch": 0.2793713163064833, + "grad_norm": 0.5906035900115967, + "learning_rate": 4.9036570465719865e-06, + "loss": 0.6086, + "step": 1896 + }, + { + "epoch": 0.27951866404715126, + "grad_norm": 0.5683687329292297, + "learning_rate": 4.903550425983877e-06, + "loss": 0.6276, + "step": 1897 + }, + { + "epoch": 0.27966601178781925, + "grad_norm": 0.6254912614822388, + "learning_rate": 4.903443747591507e-06, + "loss": 0.5933, + "step": 1898 + }, + { + "epoch": 0.27981335952848724, + "grad_norm": 0.6065893173217773, + "learning_rate": 4.903337011397442e-06, + "loss": 0.6117, + "step": 1899 + }, + { + "epoch": 0.27996070726915523, + "grad_norm": 0.6102060675621033, + "learning_rate": 4.903230217404249e-06, + "loss": 0.5922, + "step": 1900 + }, + { + "epoch": 0.28010805500982316, + "grad_norm": 0.5791845917701721, + "learning_rate": 4.903123365614496e-06, + "loss": 0.6534, + "step": 1901 + }, + { + "epoch": 0.28025540275049116, + "grad_norm": 0.6031619310379028, + "learning_rate": 4.903016456030754e-06, + "loss": 0.5866, + "step": 1902 + }, + { + "epoch": 0.28040275049115915, + "grad_norm": 0.5709341764450073, + "learning_rate": 4.902909488655593e-06, + "loss": 0.5959, + "step": 1903 + }, + { + "epoch": 0.28055009823182714, + "grad_norm": 0.5752018690109253, + "learning_rate": 4.902802463491586e-06, + "loss": 0.5903, + "step": 1904 + }, + { + "epoch": 0.2806974459724951, + "grad_norm": 0.6101030707359314, + "learning_rate": 4.902695380541307e-06, + "loss": 0.5917, + "step": 1905 + }, + { + "epoch": 0.28084479371316307, + "grad_norm": 0.6306458711624146, + "learning_rate": 4.9025882398073305e-06, + "loss": 0.5725, + "step": 1906 + }, + { + "epoch": 0.28099214145383106, + "grad_norm": 0.5803332328796387, + "learning_rate": 4.902481041292234e-06, + "loss": 0.5977, + "step": 1907 + }, + { + "epoch": 0.281139489194499, + "grad_norm": 0.6662495732307434, + "learning_rate": 4.9023737849985956e-06, + "loss": 0.6184, + "step": 1908 + }, + { + "epoch": 0.281286836935167, + "grad_norm": 0.5926188826560974, + "learning_rate": 4.902266470928994e-06, + "loss": 0.5766, + "step": 1909 + }, + { + "epoch": 0.281434184675835, + "grad_norm": 0.5851850509643555, + "learning_rate": 4.9021590990860105e-06, + "loss": 0.5712, + "step": 1910 + }, + { + "epoch": 0.28158153241650297, + "grad_norm": 0.5922086238861084, + "learning_rate": 4.902051669472227e-06, + "loss": 0.582, + "step": 1911 + }, + { + "epoch": 0.2817288801571709, + "grad_norm": 0.5966348648071289, + "learning_rate": 4.901944182090228e-06, + "loss": 0.6301, + "step": 1912 + }, + { + "epoch": 0.2818762278978389, + "grad_norm": 0.6258214712142944, + "learning_rate": 4.9018366369425975e-06, + "loss": 0.6131, + "step": 1913 + }, + { + "epoch": 0.2820235756385069, + "grad_norm": 0.5661602020263672, + "learning_rate": 4.901729034031923e-06, + "loss": 0.6135, + "step": 1914 + }, + { + "epoch": 0.2821709233791749, + "grad_norm": 0.6071001887321472, + "learning_rate": 4.901621373360791e-06, + "loss": 0.6278, + "step": 1915 + }, + { + "epoch": 0.2823182711198428, + "grad_norm": 0.5811814665794373, + "learning_rate": 4.901513654931792e-06, + "loss": 0.6182, + "step": 1916 + }, + { + "epoch": 0.2824656188605108, + "grad_norm": 0.5748955011367798, + "learning_rate": 4.9014058787475165e-06, + "loss": 0.6145, + "step": 1917 + }, + { + "epoch": 0.2826129666011788, + "grad_norm": 0.5843654274940491, + "learning_rate": 4.9012980448105556e-06, + "loss": 0.5722, + "step": 1918 + }, + { + "epoch": 0.2827603143418468, + "grad_norm": 0.5852019190788269, + "learning_rate": 4.9011901531235016e-06, + "loss": 0.6002, + "step": 1919 + }, + { + "epoch": 0.2829076620825147, + "grad_norm": 0.606941282749176, + "learning_rate": 4.901082203688952e-06, + "loss": 0.6239, + "step": 1920 + }, + { + "epoch": 0.2830550098231827, + "grad_norm": 0.5966368317604065, + "learning_rate": 4.9009741965095015e-06, + "loss": 0.6154, + "step": 1921 + }, + { + "epoch": 0.2832023575638507, + "grad_norm": 0.6095646023750305, + "learning_rate": 4.900866131587747e-06, + "loss": 0.6244, + "step": 1922 + }, + { + "epoch": 0.28334970530451864, + "grad_norm": 0.5683894753456116, + "learning_rate": 4.900758008926289e-06, + "loss": 0.6167, + "step": 1923 + }, + { + "epoch": 0.28349705304518663, + "grad_norm": 0.5838152170181274, + "learning_rate": 4.900649828527726e-06, + "loss": 0.5782, + "step": 1924 + }, + { + "epoch": 0.2836444007858546, + "grad_norm": 0.5756011605262756, + "learning_rate": 4.900541590394662e-06, + "loss": 0.6127, + "step": 1925 + }, + { + "epoch": 0.2837917485265226, + "grad_norm": 0.5914430618286133, + "learning_rate": 4.9004332945296975e-06, + "loss": 0.6273, + "step": 1926 + }, + { + "epoch": 0.28393909626719055, + "grad_norm": 0.5824806690216064, + "learning_rate": 4.900324940935438e-06, + "loss": 0.6031, + "step": 1927 + }, + { + "epoch": 0.28408644400785854, + "grad_norm": 0.5975430607795715, + "learning_rate": 4.9002165296144905e-06, + "loss": 0.5736, + "step": 1928 + }, + { + "epoch": 0.28423379174852653, + "grad_norm": 0.6219887733459473, + "learning_rate": 4.900108060569461e-06, + "loss": 0.595, + "step": 1929 + }, + { + "epoch": 0.2843811394891945, + "grad_norm": 0.5940389037132263, + "learning_rate": 4.899999533802958e-06, + "loss": 0.6246, + "step": 1930 + }, + { + "epoch": 0.28452848722986246, + "grad_norm": 0.5700501203536987, + "learning_rate": 4.899890949317592e-06, + "loss": 0.5876, + "step": 1931 + }, + { + "epoch": 0.28467583497053045, + "grad_norm": 0.5801708698272705, + "learning_rate": 4.899782307115974e-06, + "loss": 0.6383, + "step": 1932 + }, + { + "epoch": 0.28482318271119844, + "grad_norm": 0.5822296738624573, + "learning_rate": 4.899673607200717e-06, + "loss": 0.6331, + "step": 1933 + }, + { + "epoch": 0.2849705304518664, + "grad_norm": 0.5796971321105957, + "learning_rate": 4.899564849574435e-06, + "loss": 0.5921, + "step": 1934 + }, + { + "epoch": 0.28511787819253437, + "grad_norm": 0.615541398525238, + "learning_rate": 4.899456034239745e-06, + "loss": 0.59, + "step": 1935 + }, + { + "epoch": 0.28526522593320236, + "grad_norm": 0.587843656539917, + "learning_rate": 4.899347161199261e-06, + "loss": 0.5883, + "step": 1936 + }, + { + "epoch": 0.28541257367387035, + "grad_norm": 0.5717246532440186, + "learning_rate": 4.899238230455605e-06, + "loss": 0.6161, + "step": 1937 + }, + { + "epoch": 0.2855599214145383, + "grad_norm": 0.6125832200050354, + "learning_rate": 4.8991292420113945e-06, + "loss": 0.6097, + "step": 1938 + }, + { + "epoch": 0.2857072691552063, + "grad_norm": 0.6212131977081299, + "learning_rate": 4.89902019586925e-06, + "loss": 0.5903, + "step": 1939 + }, + { + "epoch": 0.28585461689587427, + "grad_norm": 0.5707337856292725, + "learning_rate": 4.8989110920317966e-06, + "loss": 0.5865, + "step": 1940 + }, + { + "epoch": 0.28600196463654226, + "grad_norm": 0.6160897016525269, + "learning_rate": 4.898801930501656e-06, + "loss": 0.5917, + "step": 1941 + }, + { + "epoch": 0.2861493123772102, + "grad_norm": 0.6007860898971558, + "learning_rate": 4.898692711281453e-06, + "loss": 0.606, + "step": 1942 + }, + { + "epoch": 0.2862966601178782, + "grad_norm": 0.5813050866127014, + "learning_rate": 4.898583434373816e-06, + "loss": 0.6048, + "step": 1943 + }, + { + "epoch": 0.2864440078585462, + "grad_norm": 0.5741474032402039, + "learning_rate": 4.898474099781373e-06, + "loss": 0.5777, + "step": 1944 + }, + { + "epoch": 0.28659135559921417, + "grad_norm": 0.5705989599227905, + "learning_rate": 4.898364707506752e-06, + "loss": 0.5862, + "step": 1945 + }, + { + "epoch": 0.2867387033398821, + "grad_norm": 0.5999512672424316, + "learning_rate": 4.898255257552584e-06, + "loss": 0.5859, + "step": 1946 + }, + { + "epoch": 0.2868860510805501, + "grad_norm": 0.6180103421211243, + "learning_rate": 4.898145749921504e-06, + "loss": 0.6186, + "step": 1947 + }, + { + "epoch": 0.2870333988212181, + "grad_norm": 0.5858295559883118, + "learning_rate": 4.898036184616141e-06, + "loss": 0.6422, + "step": 1948 + }, + { + "epoch": 0.287180746561886, + "grad_norm": 0.6122486591339111, + "learning_rate": 4.897926561639135e-06, + "loss": 0.6072, + "step": 1949 + }, + { + "epoch": 0.287328094302554, + "grad_norm": 0.6128506660461426, + "learning_rate": 4.897816880993119e-06, + "loss": 0.5912, + "step": 1950 + }, + { + "epoch": 0.287475442043222, + "grad_norm": 0.5870504379272461, + "learning_rate": 4.897707142680731e-06, + "loss": 0.6018, + "step": 1951 + }, + { + "epoch": 0.28762278978389, + "grad_norm": 0.6105552911758423, + "learning_rate": 4.897597346704612e-06, + "loss": 0.5829, + "step": 1952 + }, + { + "epoch": 0.28777013752455793, + "grad_norm": 0.5861137509346008, + "learning_rate": 4.8974874930674e-06, + "loss": 0.594, + "step": 1953 + }, + { + "epoch": 0.2879174852652259, + "grad_norm": 0.5997482538223267, + "learning_rate": 4.897377581771739e-06, + "loss": 0.6208, + "step": 1954 + }, + { + "epoch": 0.2880648330058939, + "grad_norm": 0.6179654598236084, + "learning_rate": 4.897267612820272e-06, + "loss": 0.5815, + "step": 1955 + }, + { + "epoch": 0.2882121807465619, + "grad_norm": 0.5887936949729919, + "learning_rate": 4.897157586215643e-06, + "loss": 0.6387, + "step": 1956 + }, + { + "epoch": 0.28835952848722984, + "grad_norm": 0.5863364338874817, + "learning_rate": 4.897047501960498e-06, + "loss": 0.5921, + "step": 1957 + }, + { + "epoch": 0.28850687622789783, + "grad_norm": 0.5952617526054382, + "learning_rate": 4.896937360057484e-06, + "loss": 0.6127, + "step": 1958 + }, + { + "epoch": 0.2886542239685658, + "grad_norm": 0.6288694739341736, + "learning_rate": 4.896827160509252e-06, + "loss": 0.6261, + "step": 1959 + }, + { + "epoch": 0.2888015717092338, + "grad_norm": 0.583977997303009, + "learning_rate": 4.8967169033184514e-06, + "loss": 0.6282, + "step": 1960 + }, + { + "epoch": 0.28894891944990175, + "grad_norm": 0.593998372554779, + "learning_rate": 4.896606588487732e-06, + "loss": 0.5701, + "step": 1961 + }, + { + "epoch": 0.28909626719056974, + "grad_norm": 0.6307173371315002, + "learning_rate": 4.89649621601975e-06, + "loss": 0.6255, + "step": 1962 + }, + { + "epoch": 0.28924361493123774, + "grad_norm": 0.5861653089523315, + "learning_rate": 4.896385785917157e-06, + "loss": 0.6195, + "step": 1963 + }, + { + "epoch": 0.28939096267190567, + "grad_norm": 0.6238703727722168, + "learning_rate": 4.896275298182609e-06, + "loss": 0.6179, + "step": 1964 + }, + { + "epoch": 0.28953831041257366, + "grad_norm": 0.6270511150360107, + "learning_rate": 4.896164752818765e-06, + "loss": 0.6653, + "step": 1965 + }, + { + "epoch": 0.28968565815324165, + "grad_norm": 0.5939506888389587, + "learning_rate": 4.896054149828283e-06, + "loss": 0.6145, + "step": 1966 + }, + { + "epoch": 0.28983300589390965, + "grad_norm": 0.5817103385925293, + "learning_rate": 4.895943489213821e-06, + "loss": 0.6297, + "step": 1967 + }, + { + "epoch": 0.2899803536345776, + "grad_norm": 0.5652056932449341, + "learning_rate": 4.8958327709780425e-06, + "loss": 0.5581, + "step": 1968 + }, + { + "epoch": 0.2901277013752456, + "grad_norm": 0.6130415201187134, + "learning_rate": 4.895721995123611e-06, + "loss": 0.6092, + "step": 1969 + }, + { + "epoch": 0.29027504911591356, + "grad_norm": 0.5819131135940552, + "learning_rate": 4.895611161653187e-06, + "loss": 0.577, + "step": 1970 + }, + { + "epoch": 0.29042239685658156, + "grad_norm": 0.5976041555404663, + "learning_rate": 4.89550027056944e-06, + "loss": 0.6141, + "step": 1971 + }, + { + "epoch": 0.2905697445972495, + "grad_norm": 0.619307816028595, + "learning_rate": 4.895389321875034e-06, + "loss": 0.6126, + "step": 1972 + }, + { + "epoch": 0.2907170923379175, + "grad_norm": 0.60357666015625, + "learning_rate": 4.895278315572639e-06, + "loss": 0.6075, + "step": 1973 + }, + { + "epoch": 0.2908644400785855, + "grad_norm": 0.6244373917579651, + "learning_rate": 4.895167251664923e-06, + "loss": 0.6012, + "step": 1974 + }, + { + "epoch": 0.29101178781925346, + "grad_norm": 0.5907578468322754, + "learning_rate": 4.895056130154559e-06, + "loss": 0.6636, + "step": 1975 + }, + { + "epoch": 0.2911591355599214, + "grad_norm": 0.6460451483726501, + "learning_rate": 4.894944951044217e-06, + "loss": 0.5975, + "step": 1976 + }, + { + "epoch": 0.2913064833005894, + "grad_norm": 0.6607404947280884, + "learning_rate": 4.894833714336574e-06, + "loss": 0.5766, + "step": 1977 + }, + { + "epoch": 0.2914538310412574, + "grad_norm": 0.5722405910491943, + "learning_rate": 4.894722420034303e-06, + "loss": 0.6354, + "step": 1978 + }, + { + "epoch": 0.2916011787819253, + "grad_norm": 0.6173529028892517, + "learning_rate": 4.894611068140079e-06, + "loss": 0.6294, + "step": 1979 + }, + { + "epoch": 0.2917485265225933, + "grad_norm": 0.5745513439178467, + "learning_rate": 4.894499658656584e-06, + "loss": 0.6065, + "step": 1980 + }, + { + "epoch": 0.2918958742632613, + "grad_norm": 0.5621792674064636, + "learning_rate": 4.894388191586495e-06, + "loss": 0.6291, + "step": 1981 + }, + { + "epoch": 0.2920432220039293, + "grad_norm": 0.6102096438407898, + "learning_rate": 4.894276666932492e-06, + "loss": 0.6204, + "step": 1982 + }, + { + "epoch": 0.29219056974459723, + "grad_norm": 0.5959808826446533, + "learning_rate": 4.89416508469726e-06, + "loss": 0.5821, + "step": 1983 + }, + { + "epoch": 0.2923379174852652, + "grad_norm": 0.6153296232223511, + "learning_rate": 4.8940534448834795e-06, + "loss": 0.6296, + "step": 1984 + }, + { + "epoch": 0.2924852652259332, + "grad_norm": 0.576022744178772, + "learning_rate": 4.893941747493837e-06, + "loss": 0.5642, + "step": 1985 + }, + { + "epoch": 0.2926326129666012, + "grad_norm": 0.581516444683075, + "learning_rate": 4.893829992531018e-06, + "loss": 0.5806, + "step": 1986 + }, + { + "epoch": 0.29277996070726914, + "grad_norm": 0.5937744379043579, + "learning_rate": 4.893718179997709e-06, + "loss": 0.6013, + "step": 1987 + }, + { + "epoch": 0.29292730844793713, + "grad_norm": 0.5908276438713074, + "learning_rate": 4.893606309896602e-06, + "loss": 0.6143, + "step": 1988 + }, + { + "epoch": 0.2930746561886051, + "grad_norm": 0.5825467109680176, + "learning_rate": 4.893494382230386e-06, + "loss": 0.6223, + "step": 1989 + }, + { + "epoch": 0.2932220039292731, + "grad_norm": 0.6206814050674438, + "learning_rate": 4.893382397001753e-06, + "loss": 0.6146, + "step": 1990 + }, + { + "epoch": 0.29336935166994105, + "grad_norm": 0.6059784293174744, + "learning_rate": 4.893270354213396e-06, + "loss": 0.5889, + "step": 1991 + }, + { + "epoch": 0.29351669941060904, + "grad_norm": 0.5884191989898682, + "learning_rate": 4.893158253868009e-06, + "loss": 0.5875, + "step": 1992 + }, + { + "epoch": 0.29366404715127703, + "grad_norm": 0.6198190450668335, + "learning_rate": 4.893046095968288e-06, + "loss": 0.5969, + "step": 1993 + }, + { + "epoch": 0.29381139489194497, + "grad_norm": 0.5924283862113953, + "learning_rate": 4.892933880516931e-06, + "loss": 0.6406, + "step": 1994 + }, + { + "epoch": 0.29395874263261296, + "grad_norm": 0.6205914616584778, + "learning_rate": 4.892821607516637e-06, + "loss": 0.6299, + "step": 1995 + }, + { + "epoch": 0.29410609037328095, + "grad_norm": 0.6077253222465515, + "learning_rate": 4.892709276970105e-06, + "loss": 0.5685, + "step": 1996 + }, + { + "epoch": 0.29425343811394894, + "grad_norm": 0.6162218451499939, + "learning_rate": 4.892596888880038e-06, + "loss": 0.6059, + "step": 1997 + }, + { + "epoch": 0.2944007858546169, + "grad_norm": 0.6092612147331238, + "learning_rate": 4.892484443249137e-06, + "loss": 0.6054, + "step": 1998 + }, + { + "epoch": 0.29454813359528487, + "grad_norm": 0.5728386044502258, + "learning_rate": 4.8923719400801075e-06, + "loss": 0.6017, + "step": 1999 + }, + { + "epoch": 0.29469548133595286, + "grad_norm": 0.6253140568733215, + "learning_rate": 4.892259379375656e-06, + "loss": 0.603, + "step": 2000 + }, + { + "epoch": 0.29484282907662085, + "grad_norm": 0.5939204096794128, + "learning_rate": 4.892146761138488e-06, + "loss": 0.584, + "step": 2001 + }, + { + "epoch": 0.2949901768172888, + "grad_norm": 0.610161542892456, + "learning_rate": 4.892034085371311e-06, + "loss": 0.5759, + "step": 2002 + }, + { + "epoch": 0.2951375245579568, + "grad_norm": 0.5921528339385986, + "learning_rate": 4.891921352076838e-06, + "loss": 0.6007, + "step": 2003 + }, + { + "epoch": 0.29528487229862477, + "grad_norm": 0.5960596799850464, + "learning_rate": 4.891808561257777e-06, + "loss": 0.61, + "step": 2004 + }, + { + "epoch": 0.2954322200392927, + "grad_norm": 0.5995599627494812, + "learning_rate": 4.891695712916843e-06, + "loss": 0.6192, + "step": 2005 + }, + { + "epoch": 0.2955795677799607, + "grad_norm": 0.5978050827980042, + "learning_rate": 4.8915828070567486e-06, + "loss": 0.6247, + "step": 2006 + }, + { + "epoch": 0.2957269155206287, + "grad_norm": 0.5904756784439087, + "learning_rate": 4.891469843680209e-06, + "loss": 0.5941, + "step": 2007 + }, + { + "epoch": 0.2958742632612967, + "grad_norm": 0.5904873013496399, + "learning_rate": 4.891356822789941e-06, + "loss": 0.5943, + "step": 2008 + }, + { + "epoch": 0.2960216110019646, + "grad_norm": 0.5855686068534851, + "learning_rate": 4.891243744388664e-06, + "loss": 0.6092, + "step": 2009 + }, + { + "epoch": 0.2961689587426326, + "grad_norm": 0.6095072031021118, + "learning_rate": 4.891130608479096e-06, + "loss": 0.5942, + "step": 2010 + }, + { + "epoch": 0.2963163064833006, + "grad_norm": 0.6046180129051208, + "learning_rate": 4.891017415063958e-06, + "loss": 0.6335, + "step": 2011 + }, + { + "epoch": 0.2964636542239686, + "grad_norm": 0.5958187580108643, + "learning_rate": 4.890904164145973e-06, + "loss": 0.588, + "step": 2012 + }, + { + "epoch": 0.2966110019646365, + "grad_norm": 0.5957202911376953, + "learning_rate": 4.890790855727865e-06, + "loss": 0.612, + "step": 2013 + }, + { + "epoch": 0.2967583497053045, + "grad_norm": 0.5652107000350952, + "learning_rate": 4.8906774898123575e-06, + "loss": 0.5798, + "step": 2014 + }, + { + "epoch": 0.2969056974459725, + "grad_norm": 0.5913317799568176, + "learning_rate": 4.890564066402178e-06, + "loss": 0.6105, + "step": 2015 + }, + { + "epoch": 0.2970530451866405, + "grad_norm": 0.5690085291862488, + "learning_rate": 4.890450585500053e-06, + "loss": 0.5861, + "step": 2016 + }, + { + "epoch": 0.29720039292730843, + "grad_norm": 0.5956624746322632, + "learning_rate": 4.890337047108714e-06, + "loss": 0.6352, + "step": 2017 + }, + { + "epoch": 0.2973477406679764, + "grad_norm": 0.593437135219574, + "learning_rate": 4.890223451230889e-06, + "loss": 0.6252, + "step": 2018 + }, + { + "epoch": 0.2974950884086444, + "grad_norm": 0.5832969546318054, + "learning_rate": 4.8901097978693115e-06, + "loss": 0.6125, + "step": 2019 + }, + { + "epoch": 0.29764243614931235, + "grad_norm": 0.6084479093551636, + "learning_rate": 4.889996087026715e-06, + "loss": 0.5957, + "step": 2020 + }, + { + "epoch": 0.29778978388998034, + "grad_norm": 0.5904667973518372, + "learning_rate": 4.889882318705833e-06, + "loss": 0.6069, + "step": 2021 + }, + { + "epoch": 0.29793713163064833, + "grad_norm": 0.606143593788147, + "learning_rate": 4.889768492909402e-06, + "loss": 0.5857, + "step": 2022 + }, + { + "epoch": 0.2980844793713163, + "grad_norm": 0.5954603552818298, + "learning_rate": 4.8896546096401595e-06, + "loss": 0.6037, + "step": 2023 + }, + { + "epoch": 0.29823182711198426, + "grad_norm": 0.6140833497047424, + "learning_rate": 4.889540668900845e-06, + "loss": 0.6021, + "step": 2024 + }, + { + "epoch": 0.29837917485265225, + "grad_norm": 0.5744895339012146, + "learning_rate": 4.889426670694198e-06, + "loss": 0.6141, + "step": 2025 + }, + { + "epoch": 0.29852652259332024, + "grad_norm": 0.5921748876571655, + "learning_rate": 4.88931261502296e-06, + "loss": 0.6082, + "step": 2026 + }, + { + "epoch": 0.29867387033398823, + "grad_norm": 0.6008917689323425, + "learning_rate": 4.889198501889874e-06, + "loss": 0.5779, + "step": 2027 + }, + { + "epoch": 0.29882121807465617, + "grad_norm": 0.5826932191848755, + "learning_rate": 4.889084331297686e-06, + "loss": 0.5789, + "step": 2028 + }, + { + "epoch": 0.29896856581532416, + "grad_norm": 0.5679751038551331, + "learning_rate": 4.888970103249138e-06, + "loss": 0.5866, + "step": 2029 + }, + { + "epoch": 0.29911591355599215, + "grad_norm": 0.6434227228164673, + "learning_rate": 4.888855817746982e-06, + "loss": 0.6025, + "step": 2030 + }, + { + "epoch": 0.29926326129666014, + "grad_norm": 0.5719097852706909, + "learning_rate": 4.8887414747939636e-06, + "loss": 0.5656, + "step": 2031 + }, + { + "epoch": 0.2994106090373281, + "grad_norm": 0.5681674480438232, + "learning_rate": 4.8886270743928325e-06, + "loss": 0.6004, + "step": 2032 + }, + { + "epoch": 0.29955795677799607, + "grad_norm": 0.58720862865448, + "learning_rate": 4.88851261654634e-06, + "loss": 0.584, + "step": 2033 + }, + { + "epoch": 0.29970530451866406, + "grad_norm": 0.5966487526893616, + "learning_rate": 4.88839810125724e-06, + "loss": 0.5565, + "step": 2034 + }, + { + "epoch": 0.299852652259332, + "grad_norm": 0.6166028380393982, + "learning_rate": 4.888283528528286e-06, + "loss": 0.6122, + "step": 2035 + }, + { + "epoch": 0.3, + "grad_norm": 0.589228093624115, + "learning_rate": 4.888168898362233e-06, + "loss": 0.5824, + "step": 2036 + }, + { + "epoch": 0.300147347740668, + "grad_norm": 0.5961123704910278, + "learning_rate": 4.888054210761839e-06, + "loss": 0.5835, + "step": 2037 + }, + { + "epoch": 0.30029469548133597, + "grad_norm": 0.5983691811561584, + "learning_rate": 4.88793946572986e-06, + "loss": 0.6409, + "step": 2038 + }, + { + "epoch": 0.3004420432220039, + "grad_norm": 0.5810449123382568, + "learning_rate": 4.887824663269058e-06, + "loss": 0.608, + "step": 2039 + }, + { + "epoch": 0.3005893909626719, + "grad_norm": 0.5730899572372437, + "learning_rate": 4.887709803382192e-06, + "loss": 0.6007, + "step": 2040 + }, + { + "epoch": 0.3007367387033399, + "grad_norm": 0.5975651144981384, + "learning_rate": 4.887594886072025e-06, + "loss": 0.607, + "step": 2041 + }, + { + "epoch": 0.3008840864440079, + "grad_norm": 0.6396394968032837, + "learning_rate": 4.887479911341321e-06, + "loss": 0.5854, + "step": 2042 + }, + { + "epoch": 0.3010314341846758, + "grad_norm": 0.581659197807312, + "learning_rate": 4.887364879192845e-06, + "loss": 0.6015, + "step": 2043 + }, + { + "epoch": 0.3011787819253438, + "grad_norm": 0.5823763608932495, + "learning_rate": 4.887249789629364e-06, + "loss": 0.5737, + "step": 2044 + }, + { + "epoch": 0.3013261296660118, + "grad_norm": 0.6087318062782288, + "learning_rate": 4.887134642653645e-06, + "loss": 0.5878, + "step": 2045 + }, + { + "epoch": 0.3014734774066798, + "grad_norm": 0.595495343208313, + "learning_rate": 4.887019438268457e-06, + "loss": 0.5748, + "step": 2046 + }, + { + "epoch": 0.3016208251473477, + "grad_norm": 0.6093252897262573, + "learning_rate": 4.886904176476571e-06, + "loss": 0.6197, + "step": 2047 + }, + { + "epoch": 0.3017681728880157, + "grad_norm": 0.5968894958496094, + "learning_rate": 4.88678885728076e-06, + "loss": 0.5882, + "step": 2048 + }, + { + "epoch": 0.3019155206286837, + "grad_norm": 0.5891918540000916, + "learning_rate": 4.886673480683796e-06, + "loss": 0.5849, + "step": 2049 + }, + { + "epoch": 0.30206286836935164, + "grad_norm": 0.6580860614776611, + "learning_rate": 4.886558046688454e-06, + "loss": 0.5786, + "step": 2050 + }, + { + "epoch": 0.30221021611001964, + "grad_norm": 0.61373370885849, + "learning_rate": 4.88644255529751e-06, + "loss": 0.59, + "step": 2051 + }, + { + "epoch": 0.3023575638506876, + "grad_norm": 0.8495502471923828, + "learning_rate": 4.8863270065137425e-06, + "loss": 0.6109, + "step": 2052 + }, + { + "epoch": 0.3025049115913556, + "grad_norm": 0.5789976716041565, + "learning_rate": 4.886211400339929e-06, + "loss": 0.5743, + "step": 2053 + }, + { + "epoch": 0.30265225933202355, + "grad_norm": 0.6140173077583313, + "learning_rate": 4.886095736778852e-06, + "loss": 0.6254, + "step": 2054 + }, + { + "epoch": 0.30279960707269155, + "grad_norm": 0.6338689923286438, + "learning_rate": 4.88598001583329e-06, + "loss": 0.5952, + "step": 2055 + }, + { + "epoch": 0.30294695481335954, + "grad_norm": 0.5813091993331909, + "learning_rate": 4.885864237506029e-06, + "loss": 0.6, + "step": 2056 + }, + { + "epoch": 0.30309430255402753, + "grad_norm": 0.5915930271148682, + "learning_rate": 4.885748401799852e-06, + "loss": 0.6283, + "step": 2057 + }, + { + "epoch": 0.30324165029469546, + "grad_norm": 0.6096917390823364, + "learning_rate": 4.885632508717544e-06, + "loss": 0.5829, + "step": 2058 + }, + { + "epoch": 0.30338899803536346, + "grad_norm": 0.6176514029502869, + "learning_rate": 4.8855165582618935e-06, + "loss": 0.5782, + "step": 2059 + }, + { + "epoch": 0.30353634577603145, + "grad_norm": 0.6078602075576782, + "learning_rate": 4.885400550435689e-06, + "loss": 0.5932, + "step": 2060 + }, + { + "epoch": 0.30368369351669944, + "grad_norm": 0.5910968780517578, + "learning_rate": 4.8852844852417195e-06, + "loss": 0.6191, + "step": 2061 + }, + { + "epoch": 0.3038310412573674, + "grad_norm": 0.5984516739845276, + "learning_rate": 4.885168362682778e-06, + "loss": 0.5886, + "step": 2062 + }, + { + "epoch": 0.30397838899803536, + "grad_norm": 0.6001726984977722, + "learning_rate": 4.885052182761655e-06, + "loss": 0.5824, + "step": 2063 + }, + { + "epoch": 0.30412573673870336, + "grad_norm": 0.5853000283241272, + "learning_rate": 4.884935945481146e-06, + "loss": 0.6277, + "step": 2064 + }, + { + "epoch": 0.3042730844793713, + "grad_norm": 0.5905888080596924, + "learning_rate": 4.884819650844046e-06, + "loss": 0.6033, + "step": 2065 + }, + { + "epoch": 0.3044204322200393, + "grad_norm": 0.5617679953575134, + "learning_rate": 4.884703298853152e-06, + "loss": 0.5994, + "step": 2066 + }, + { + "epoch": 0.3045677799607073, + "grad_norm": 0.5551189184188843, + "learning_rate": 4.884586889511262e-06, + "loss": 0.6197, + "step": 2067 + }, + { + "epoch": 0.30471512770137527, + "grad_norm": 0.5906761288642883, + "learning_rate": 4.884470422821176e-06, + "loss": 0.5969, + "step": 2068 + }, + { + "epoch": 0.3048624754420432, + "grad_norm": 0.6218976378440857, + "learning_rate": 4.884353898785695e-06, + "loss": 0.575, + "step": 2069 + }, + { + "epoch": 0.3050098231827112, + "grad_norm": 0.5968498587608337, + "learning_rate": 4.8842373174076205e-06, + "loss": 0.6214, + "step": 2070 + }, + { + "epoch": 0.3051571709233792, + "grad_norm": 0.5913493633270264, + "learning_rate": 4.884120678689757e-06, + "loss": 0.6066, + "step": 2071 + }, + { + "epoch": 0.3053045186640472, + "grad_norm": 0.6224168539047241, + "learning_rate": 4.884003982634909e-06, + "loss": 0.6211, + "step": 2072 + }, + { + "epoch": 0.3054518664047151, + "grad_norm": 0.6482710838317871, + "learning_rate": 4.883887229245884e-06, + "loss": 0.5718, + "step": 2073 + }, + { + "epoch": 0.3055992141453831, + "grad_norm": 0.5912683606147766, + "learning_rate": 4.883770418525489e-06, + "loss": 0.5515, + "step": 2074 + }, + { + "epoch": 0.3057465618860511, + "grad_norm": 0.609914243221283, + "learning_rate": 4.883653550476534e-06, + "loss": 0.605, + "step": 2075 + }, + { + "epoch": 0.30589390962671903, + "grad_norm": 0.5775960087776184, + "learning_rate": 4.883536625101828e-06, + "loss": 0.5852, + "step": 2076 + }, + { + "epoch": 0.306041257367387, + "grad_norm": 0.592745840549469, + "learning_rate": 4.883419642404185e-06, + "loss": 0.5866, + "step": 2077 + }, + { + "epoch": 0.306188605108055, + "grad_norm": 0.5802726745605469, + "learning_rate": 4.8833026023864175e-06, + "loss": 0.621, + "step": 2078 + }, + { + "epoch": 0.306335952848723, + "grad_norm": 0.6113583445549011, + "learning_rate": 4.88318550505134e-06, + "loss": 0.6036, + "step": 2079 + }, + { + "epoch": 0.30648330058939094, + "grad_norm": 0.6375038623809814, + "learning_rate": 4.8830683504017685e-06, + "loss": 0.6617, + "step": 2080 + }, + { + "epoch": 0.30663064833005893, + "grad_norm": 0.612963080406189, + "learning_rate": 4.882951138440522e-06, + "loss": 0.6072, + "step": 2081 + }, + { + "epoch": 0.3067779960707269, + "grad_norm": 0.5604066252708435, + "learning_rate": 4.882833869170417e-06, + "loss": 0.6065, + "step": 2082 + }, + { + "epoch": 0.3069253438113949, + "grad_norm": 0.5920567512512207, + "learning_rate": 4.882716542594276e-06, + "loss": 0.5944, + "step": 2083 + }, + { + "epoch": 0.30707269155206285, + "grad_norm": 0.5666702389717102, + "learning_rate": 4.882599158714919e-06, + "loss": 0.6026, + "step": 2084 + }, + { + "epoch": 0.30722003929273084, + "grad_norm": 0.5961591005325317, + "learning_rate": 4.8824817175351706e-06, + "loss": 0.6205, + "step": 2085 + }, + { + "epoch": 0.30736738703339883, + "grad_norm": 0.6182242035865784, + "learning_rate": 4.882364219057853e-06, + "loss": 0.6136, + "step": 2086 + }, + { + "epoch": 0.3075147347740668, + "grad_norm": 0.5902482271194458, + "learning_rate": 4.882246663285795e-06, + "loss": 0.6232, + "step": 2087 + }, + { + "epoch": 0.30766208251473476, + "grad_norm": 0.5740540623664856, + "learning_rate": 4.8821290502218205e-06, + "loss": 0.5868, + "step": 2088 + }, + { + "epoch": 0.30780943025540275, + "grad_norm": 0.5773558616638184, + "learning_rate": 4.882011379868761e-06, + "loss": 0.623, + "step": 2089 + }, + { + "epoch": 0.30795677799607074, + "grad_norm": 0.597392201423645, + "learning_rate": 4.8818936522294435e-06, + "loss": 0.6095, + "step": 2090 + }, + { + "epoch": 0.3081041257367387, + "grad_norm": 0.5850422978401184, + "learning_rate": 4.881775867306702e-06, + "loss": 0.6102, + "step": 2091 + }, + { + "epoch": 0.30825147347740667, + "grad_norm": 0.5786942839622498, + "learning_rate": 4.8816580251033675e-06, + "loss": 0.5734, + "step": 2092 + }, + { + "epoch": 0.30839882121807466, + "grad_norm": 0.5938846468925476, + "learning_rate": 4.881540125622275e-06, + "loss": 0.6017, + "step": 2093 + }, + { + "epoch": 0.30854616895874265, + "grad_norm": 0.5683943033218384, + "learning_rate": 4.88142216886626e-06, + "loss": 0.5762, + "step": 2094 + }, + { + "epoch": 0.3086935166994106, + "grad_norm": 0.5995170474052429, + "learning_rate": 4.8813041548381575e-06, + "loss": 0.5984, + "step": 2095 + }, + { + "epoch": 0.3088408644400786, + "grad_norm": 0.572146475315094, + "learning_rate": 4.881186083540807e-06, + "loss": 0.592, + "step": 2096 + }, + { + "epoch": 0.30898821218074657, + "grad_norm": 0.6374772191047668, + "learning_rate": 4.881067954977049e-06, + "loss": 0.6112, + "step": 2097 + }, + { + "epoch": 0.30913555992141456, + "grad_norm": 0.6203997135162354, + "learning_rate": 4.880949769149723e-06, + "loss": 0.5914, + "step": 2098 + }, + { + "epoch": 0.3092829076620825, + "grad_norm": 0.6084123849868774, + "learning_rate": 4.880831526061673e-06, + "loss": 0.5983, + "step": 2099 + }, + { + "epoch": 0.3094302554027505, + "grad_norm": 0.5966019034385681, + "learning_rate": 4.88071322571574e-06, + "loss": 0.5816, + "step": 2100 + }, + { + "epoch": 0.3095776031434185, + "grad_norm": 0.6142233610153198, + "learning_rate": 4.880594868114771e-06, + "loss": 0.5936, + "step": 2101 + }, + { + "epoch": 0.30972495088408647, + "grad_norm": 0.6069533824920654, + "learning_rate": 4.880476453261611e-06, + "loss": 0.6117, + "step": 2102 + }, + { + "epoch": 0.3098722986247544, + "grad_norm": 0.5785217881202698, + "learning_rate": 4.88035798115911e-06, + "loss": 0.5777, + "step": 2103 + }, + { + "epoch": 0.3100196463654224, + "grad_norm": 0.6817132830619812, + "learning_rate": 4.880239451810116e-06, + "loss": 0.5949, + "step": 2104 + }, + { + "epoch": 0.3101669941060904, + "grad_norm": 0.6132467985153198, + "learning_rate": 4.880120865217479e-06, + "loss": 0.614, + "step": 2105 + }, + { + "epoch": 0.3103143418467583, + "grad_norm": 0.5618532299995422, + "learning_rate": 4.880002221384052e-06, + "loss": 0.5953, + "step": 2106 + }, + { + "epoch": 0.3104616895874263, + "grad_norm": 0.5864328742027283, + "learning_rate": 4.879883520312687e-06, + "loss": 0.617, + "step": 2107 + }, + { + "epoch": 0.3106090373280943, + "grad_norm": 0.6022245287895203, + "learning_rate": 4.87976476200624e-06, + "loss": 0.6249, + "step": 2108 + }, + { + "epoch": 0.3107563850687623, + "grad_norm": 0.5710820555686951, + "learning_rate": 4.8796459464675675e-06, + "loss": 0.6062, + "step": 2109 + }, + { + "epoch": 0.31090373280943023, + "grad_norm": 0.6006269454956055, + "learning_rate": 4.879527073699526e-06, + "loss": 0.5931, + "step": 2110 + }, + { + "epoch": 0.3110510805500982, + "grad_norm": 0.6240952610969543, + "learning_rate": 4.8794081437049735e-06, + "loss": 0.5787, + "step": 2111 + }, + { + "epoch": 0.3111984282907662, + "grad_norm": 0.6092895865440369, + "learning_rate": 4.879289156486772e-06, + "loss": 0.5958, + "step": 2112 + }, + { + "epoch": 0.3113457760314342, + "grad_norm": 0.6149207949638367, + "learning_rate": 4.879170112047782e-06, + "loss": 0.5559, + "step": 2113 + }, + { + "epoch": 0.31149312377210214, + "grad_norm": 0.5968666076660156, + "learning_rate": 4.879051010390866e-06, + "loss": 0.5734, + "step": 2114 + }, + { + "epoch": 0.31164047151277013, + "grad_norm": 0.5845307111740112, + "learning_rate": 4.87893185151889e-06, + "loss": 0.5616, + "step": 2115 + }, + { + "epoch": 0.3117878192534381, + "grad_norm": 0.5933300256729126, + "learning_rate": 4.8788126354347185e-06, + "loss": 0.6031, + "step": 2116 + }, + { + "epoch": 0.3119351669941061, + "grad_norm": 0.6250542402267456, + "learning_rate": 4.878693362141218e-06, + "loss": 0.6188, + "step": 2117 + }, + { + "epoch": 0.31208251473477405, + "grad_norm": 0.6197031140327454, + "learning_rate": 4.878574031641259e-06, + "loss": 0.5731, + "step": 2118 + }, + { + "epoch": 0.31222986247544204, + "grad_norm": 0.5680421590805054, + "learning_rate": 4.878454643937709e-06, + "loss": 0.6062, + "step": 2119 + }, + { + "epoch": 0.31237721021611004, + "grad_norm": 0.5958638191223145, + "learning_rate": 4.8783351990334415e-06, + "loss": 0.576, + "step": 2120 + }, + { + "epoch": 0.31252455795677797, + "grad_norm": 0.5942214131355286, + "learning_rate": 4.8782156969313275e-06, + "loss": 0.6225, + "step": 2121 + }, + { + "epoch": 0.31267190569744596, + "grad_norm": 0.5810866355895996, + "learning_rate": 4.878096137634242e-06, + "loss": 0.6104, + "step": 2122 + }, + { + "epoch": 0.31281925343811395, + "grad_norm": 0.5907482504844666, + "learning_rate": 4.877976521145058e-06, + "loss": 0.6013, + "step": 2123 + }, + { + "epoch": 0.31296660117878194, + "grad_norm": 0.6150843501091003, + "learning_rate": 4.877856847466656e-06, + "loss": 0.609, + "step": 2124 + }, + { + "epoch": 0.3131139489194499, + "grad_norm": 0.6019913554191589, + "learning_rate": 4.877737116601911e-06, + "loss": 0.6355, + "step": 2125 + }, + { + "epoch": 0.31326129666011787, + "grad_norm": 0.5821546316146851, + "learning_rate": 4.8776173285537045e-06, + "loss": 0.6157, + "step": 2126 + }, + { + "epoch": 0.31340864440078586, + "grad_norm": 0.5919125080108643, + "learning_rate": 4.877497483324916e-06, + "loss": 0.5653, + "step": 2127 + }, + { + "epoch": 0.31355599214145385, + "grad_norm": 0.6609360575675964, + "learning_rate": 4.877377580918427e-06, + "loss": 0.5669, + "step": 2128 + }, + { + "epoch": 0.3137033398821218, + "grad_norm": 0.5898580551147461, + "learning_rate": 4.8772576213371235e-06, + "loss": 0.5771, + "step": 2129 + }, + { + "epoch": 0.3138506876227898, + "grad_norm": 0.5854485034942627, + "learning_rate": 4.877137604583889e-06, + "loss": 0.5983, + "step": 2130 + }, + { + "epoch": 0.3139980353634578, + "grad_norm": 0.6096594333648682, + "learning_rate": 4.8770175306616106e-06, + "loss": 0.5861, + "step": 2131 + }, + { + "epoch": 0.31414538310412576, + "grad_norm": 0.6211555004119873, + "learning_rate": 4.876897399573175e-06, + "loss": 0.6312, + "step": 2132 + }, + { + "epoch": 0.3142927308447937, + "grad_norm": 0.6407020688056946, + "learning_rate": 4.87677721132147e-06, + "loss": 0.5913, + "step": 2133 + }, + { + "epoch": 0.3144400785854617, + "grad_norm": 0.6118552088737488, + "learning_rate": 4.8766569659093906e-06, + "loss": 0.6012, + "step": 2134 + }, + { + "epoch": 0.3145874263261297, + "grad_norm": 0.5963091254234314, + "learning_rate": 4.876536663339825e-06, + "loss": 0.6086, + "step": 2135 + }, + { + "epoch": 0.3147347740667976, + "grad_norm": 0.5812034010887146, + "learning_rate": 4.876416303615667e-06, + "loss": 0.6053, + "step": 2136 + }, + { + "epoch": 0.3148821218074656, + "grad_norm": 0.5978085994720459, + "learning_rate": 4.876295886739811e-06, + "loss": 0.5741, + "step": 2137 + }, + { + "epoch": 0.3150294695481336, + "grad_norm": 0.6923264265060425, + "learning_rate": 4.876175412715154e-06, + "loss": 0.5944, + "step": 2138 + }, + { + "epoch": 0.3151768172888016, + "grad_norm": 0.5929982662200928, + "learning_rate": 4.876054881544593e-06, + "loss": 0.5861, + "step": 2139 + }, + { + "epoch": 0.3153241650294695, + "grad_norm": 0.5660811066627502, + "learning_rate": 4.875934293231026e-06, + "loss": 0.6011, + "step": 2140 + }, + { + "epoch": 0.3154715127701375, + "grad_norm": 0.5713077187538147, + "learning_rate": 4.875813647777354e-06, + "loss": 0.6213, + "step": 2141 + }, + { + "epoch": 0.3156188605108055, + "grad_norm": 0.576054573059082, + "learning_rate": 4.8756929451864784e-06, + "loss": 0.5935, + "step": 2142 + }, + { + "epoch": 0.3157662082514735, + "grad_norm": 0.584564208984375, + "learning_rate": 4.8755721854613015e-06, + "loss": 0.6263, + "step": 2143 + }, + { + "epoch": 0.31591355599214144, + "grad_norm": 0.5973639488220215, + "learning_rate": 4.875451368604728e-06, + "loss": 0.5899, + "step": 2144 + }, + { + "epoch": 0.31606090373280943, + "grad_norm": 0.6318942308425903, + "learning_rate": 4.875330494619663e-06, + "loss": 0.5956, + "step": 2145 + }, + { + "epoch": 0.3162082514734774, + "grad_norm": 0.5748934149742126, + "learning_rate": 4.875209563509014e-06, + "loss": 0.587, + "step": 2146 + }, + { + "epoch": 0.31635559921414536, + "grad_norm": 0.6286715269088745, + "learning_rate": 4.875088575275688e-06, + "loss": 0.5973, + "step": 2147 + }, + { + "epoch": 0.31650294695481335, + "grad_norm": 0.5757050514221191, + "learning_rate": 4.874967529922596e-06, + "loss": 0.5851, + "step": 2148 + }, + { + "epoch": 0.31665029469548134, + "grad_norm": 0.5941868424415588, + "learning_rate": 4.87484642745265e-06, + "loss": 0.5917, + "step": 2149 + }, + { + "epoch": 0.31679764243614933, + "grad_norm": 0.5962218642234802, + "learning_rate": 4.8747252678687606e-06, + "loss": 0.5865, + "step": 2150 + }, + { + "epoch": 0.31694499017681727, + "grad_norm": 0.5808122158050537, + "learning_rate": 4.874604051173842e-06, + "loss": 0.5653, + "step": 2151 + }, + { + "epoch": 0.31709233791748526, + "grad_norm": 0.5823003053665161, + "learning_rate": 4.87448277737081e-06, + "loss": 0.6059, + "step": 2152 + }, + { + "epoch": 0.31723968565815325, + "grad_norm": 0.5917749404907227, + "learning_rate": 4.874361446462581e-06, + "loss": 0.6155, + "step": 2153 + }, + { + "epoch": 0.31738703339882124, + "grad_norm": 0.5449634194374084, + "learning_rate": 4.874240058452073e-06, + "loss": 0.5634, + "step": 2154 + }, + { + "epoch": 0.3175343811394892, + "grad_norm": 0.5668727159500122, + "learning_rate": 4.874118613342205e-06, + "loss": 0.5974, + "step": 2155 + }, + { + "epoch": 0.31768172888015717, + "grad_norm": 0.5773119330406189, + "learning_rate": 4.873997111135898e-06, + "loss": 0.6101, + "step": 2156 + }, + { + "epoch": 0.31782907662082516, + "grad_norm": 0.5795831084251404, + "learning_rate": 4.873875551836073e-06, + "loss": 0.6085, + "step": 2157 + }, + { + "epoch": 0.31797642436149315, + "grad_norm": 0.5988324880599976, + "learning_rate": 4.8737539354456555e-06, + "loss": 0.5861, + "step": 2158 + }, + { + "epoch": 0.3181237721021611, + "grad_norm": 0.6130087971687317, + "learning_rate": 4.8736322619675685e-06, + "loss": 0.6055, + "step": 2159 + }, + { + "epoch": 0.3182711198428291, + "grad_norm": 0.5748805403709412, + "learning_rate": 4.87351053140474e-06, + "loss": 0.5926, + "step": 2160 + }, + { + "epoch": 0.31841846758349707, + "grad_norm": 0.6096751093864441, + "learning_rate": 4.8733887437600945e-06, + "loss": 0.6045, + "step": 2161 + }, + { + "epoch": 0.318565815324165, + "grad_norm": 0.6100572943687439, + "learning_rate": 4.873266899036565e-06, + "loss": 0.64, + "step": 2162 + }, + { + "epoch": 0.318713163064833, + "grad_norm": 0.6049622297286987, + "learning_rate": 4.873144997237078e-06, + "loss": 0.6059, + "step": 2163 + }, + { + "epoch": 0.318860510805501, + "grad_norm": 0.5845608115196228, + "learning_rate": 4.873023038364569e-06, + "loss": 0.5967, + "step": 2164 + }, + { + "epoch": 0.319007858546169, + "grad_norm": 0.5879038572311401, + "learning_rate": 4.872901022421967e-06, + "loss": 0.6018, + "step": 2165 + }, + { + "epoch": 0.3191552062868369, + "grad_norm": 0.5962762832641602, + "learning_rate": 4.872778949412209e-06, + "loss": 0.5805, + "step": 2166 + }, + { + "epoch": 0.3193025540275049, + "grad_norm": 0.5852685570716858, + "learning_rate": 4.87265681933823e-06, + "loss": 0.6154, + "step": 2167 + }, + { + "epoch": 0.3194499017681729, + "grad_norm": 0.591407835483551, + "learning_rate": 4.8725346322029675e-06, + "loss": 0.6248, + "step": 2168 + }, + { + "epoch": 0.3195972495088409, + "grad_norm": 0.6024535894393921, + "learning_rate": 4.8724123880093595e-06, + "loss": 0.5976, + "step": 2169 + }, + { + "epoch": 0.3197445972495088, + "grad_norm": 0.5779682397842407, + "learning_rate": 4.872290086760347e-06, + "loss": 0.6263, + "step": 2170 + }, + { + "epoch": 0.3198919449901768, + "grad_norm": 0.608805239200592, + "learning_rate": 4.872167728458871e-06, + "loss": 0.6116, + "step": 2171 + }, + { + "epoch": 0.3200392927308448, + "grad_norm": 0.5810257792472839, + "learning_rate": 4.872045313107873e-06, + "loss": 0.6052, + "step": 2172 + }, + { + "epoch": 0.3201866404715128, + "grad_norm": 0.6180810928344727, + "learning_rate": 4.871922840710298e-06, + "loss": 0.5796, + "step": 2173 + }, + { + "epoch": 0.32033398821218073, + "grad_norm": 0.5830985307693481, + "learning_rate": 4.87180031126909e-06, + "loss": 0.5574, + "step": 2174 + }, + { + "epoch": 0.3204813359528487, + "grad_norm": 0.5898669362068176, + "learning_rate": 4.871677724787198e-06, + "loss": 0.5867, + "step": 2175 + }, + { + "epoch": 0.3206286836935167, + "grad_norm": 0.6009849905967712, + "learning_rate": 4.871555081267569e-06, + "loss": 0.5976, + "step": 2176 + }, + { + "epoch": 0.32077603143418465, + "grad_norm": 0.6056637167930603, + "learning_rate": 4.871432380713153e-06, + "loss": 0.5858, + "step": 2177 + }, + { + "epoch": 0.32092337917485264, + "grad_norm": 0.5768091082572937, + "learning_rate": 4.8713096231269e-06, + "loss": 0.5976, + "step": 2178 + }, + { + "epoch": 0.32107072691552063, + "grad_norm": 0.5787760019302368, + "learning_rate": 4.871186808511763e-06, + "loss": 0.5941, + "step": 2179 + }, + { + "epoch": 0.3212180746561886, + "grad_norm": 0.5900270342826843, + "learning_rate": 4.871063936870696e-06, + "loss": 0.5962, + "step": 2180 + }, + { + "epoch": 0.32136542239685656, + "grad_norm": 0.60186767578125, + "learning_rate": 4.870941008206652e-06, + "loss": 0.6157, + "step": 2181 + }, + { + "epoch": 0.32151277013752455, + "grad_norm": 0.6097154021263123, + "learning_rate": 4.87081802252259e-06, + "loss": 0.616, + "step": 2182 + }, + { + "epoch": 0.32166011787819254, + "grad_norm": 0.5955938100814819, + "learning_rate": 4.8706949798214664e-06, + "loss": 0.5793, + "step": 2183 + }, + { + "epoch": 0.32180746561886053, + "grad_norm": 0.5878384113311768, + "learning_rate": 4.87057188010624e-06, + "loss": 0.5999, + "step": 2184 + }, + { + "epoch": 0.32195481335952847, + "grad_norm": 0.5999186038970947, + "learning_rate": 4.870448723379871e-06, + "loss": 0.571, + "step": 2185 + }, + { + "epoch": 0.32210216110019646, + "grad_norm": 0.5920656323432922, + "learning_rate": 4.870325509645324e-06, + "loss": 0.5958, + "step": 2186 + }, + { + "epoch": 0.32224950884086445, + "grad_norm": 0.5821751952171326, + "learning_rate": 4.870202238905559e-06, + "loss": 0.6178, + "step": 2187 + }, + { + "epoch": 0.32239685658153244, + "grad_norm": 0.6013035774230957, + "learning_rate": 4.870078911163541e-06, + "loss": 0.6007, + "step": 2188 + }, + { + "epoch": 0.3225442043222004, + "grad_norm": 0.5865618586540222, + "learning_rate": 4.869955526422239e-06, + "loss": 0.6158, + "step": 2189 + }, + { + "epoch": 0.32269155206286837, + "grad_norm": 0.6007393598556519, + "learning_rate": 4.869832084684617e-06, + "loss": 0.6135, + "step": 2190 + }, + { + "epoch": 0.32283889980353636, + "grad_norm": 0.5889708399772644, + "learning_rate": 4.869708585953645e-06, + "loss": 0.57, + "step": 2191 + }, + { + "epoch": 0.3229862475442043, + "grad_norm": 0.594242513179779, + "learning_rate": 4.869585030232292e-06, + "loss": 0.5959, + "step": 2192 + }, + { + "epoch": 0.3231335952848723, + "grad_norm": 0.5670077204704285, + "learning_rate": 4.869461417523532e-06, + "loss": 0.5583, + "step": 2193 + }, + { + "epoch": 0.3232809430255403, + "grad_norm": 0.6016299724578857, + "learning_rate": 4.869337747830335e-06, + "loss": 0.62, + "step": 2194 + }, + { + "epoch": 0.32342829076620827, + "grad_norm": 0.6176084280014038, + "learning_rate": 4.869214021155677e-06, + "loss": 0.6278, + "step": 2195 + }, + { + "epoch": 0.3235756385068762, + "grad_norm": 0.5978299379348755, + "learning_rate": 4.869090237502533e-06, + "loss": 0.6095, + "step": 2196 + }, + { + "epoch": 0.3237229862475442, + "grad_norm": 0.6096727252006531, + "learning_rate": 4.8689663968738805e-06, + "loss": 0.6159, + "step": 2197 + }, + { + "epoch": 0.3238703339882122, + "grad_norm": 0.6076422929763794, + "learning_rate": 4.868842499272697e-06, + "loss": 0.5834, + "step": 2198 + }, + { + "epoch": 0.3240176817288802, + "grad_norm": 0.6166142225265503, + "learning_rate": 4.868718544701962e-06, + "loss": 0.6367, + "step": 2199 + }, + { + "epoch": 0.3241650294695481, + "grad_norm": 0.5840990543365479, + "learning_rate": 4.868594533164657e-06, + "loss": 0.616, + "step": 2200 + }, + { + "epoch": 0.3243123772102161, + "grad_norm": 0.582911491394043, + "learning_rate": 4.868470464663765e-06, + "loss": 0.6041, + "step": 2201 + }, + { + "epoch": 0.3244597249508841, + "grad_norm": 0.5828586220741272, + "learning_rate": 4.868346339202268e-06, + "loss": 0.6081, + "step": 2202 + }, + { + "epoch": 0.3246070726915521, + "grad_norm": 0.5729673504829407, + "learning_rate": 4.868222156783154e-06, + "loss": 0.5863, + "step": 2203 + }, + { + "epoch": 0.32475442043222, + "grad_norm": 0.6141967177391052, + "learning_rate": 4.868097917409407e-06, + "loss": 0.6073, + "step": 2204 + }, + { + "epoch": 0.324901768172888, + "grad_norm": 0.5930508971214294, + "learning_rate": 4.867973621084015e-06, + "loss": 0.5878, + "step": 2205 + }, + { + "epoch": 0.325049115913556, + "grad_norm": 0.5535299777984619, + "learning_rate": 4.86784926780997e-06, + "loss": 0.559, + "step": 2206 + }, + { + "epoch": 0.32519646365422394, + "grad_norm": 0.5889731645584106, + "learning_rate": 4.867724857590259e-06, + "loss": 0.6259, + "step": 2207 + }, + { + "epoch": 0.32534381139489194, + "grad_norm": 0.5741504430770874, + "learning_rate": 4.867600390427876e-06, + "loss": 0.6025, + "step": 2208 + }, + { + "epoch": 0.3254911591355599, + "grad_norm": 0.5871989727020264, + "learning_rate": 4.8674758663258145e-06, + "loss": 0.6285, + "step": 2209 + }, + { + "epoch": 0.3256385068762279, + "grad_norm": 0.5827222466468811, + "learning_rate": 4.867351285287068e-06, + "loss": 0.5607, + "step": 2210 + }, + { + "epoch": 0.32578585461689585, + "grad_norm": 0.6208155751228333, + "learning_rate": 4.867226647314634e-06, + "loss": 0.6175, + "step": 2211 + }, + { + "epoch": 0.32593320235756384, + "grad_norm": 0.5860605835914612, + "learning_rate": 4.8671019524115105e-06, + "loss": 0.6216, + "step": 2212 + }, + { + "epoch": 0.32608055009823184, + "grad_norm": 0.5968360304832458, + "learning_rate": 4.866977200580694e-06, + "loss": 0.6134, + "step": 2213 + }, + { + "epoch": 0.3262278978388998, + "grad_norm": 0.6182458996772766, + "learning_rate": 4.866852391825187e-06, + "loss": 0.57, + "step": 2214 + }, + { + "epoch": 0.32637524557956776, + "grad_norm": 0.5956223011016846, + "learning_rate": 4.86672752614799e-06, + "loss": 0.6014, + "step": 2215 + }, + { + "epoch": 0.32652259332023575, + "grad_norm": 0.5907906293869019, + "learning_rate": 4.866602603552105e-06, + "loss": 0.622, + "step": 2216 + }, + { + "epoch": 0.32666994106090375, + "grad_norm": 0.6244112253189087, + "learning_rate": 4.866477624040537e-06, + "loss": 0.5802, + "step": 2217 + }, + { + "epoch": 0.3268172888015717, + "grad_norm": 0.5923799872398376, + "learning_rate": 4.866352587616293e-06, + "loss": 0.5932, + "step": 2218 + }, + { + "epoch": 0.3269646365422397, + "grad_norm": 0.5848544836044312, + "learning_rate": 4.86622749428238e-06, + "loss": 0.6009, + "step": 2219 + }, + { + "epoch": 0.32711198428290766, + "grad_norm": 0.5830314755439758, + "learning_rate": 4.866102344041804e-06, + "loss": 0.6145, + "step": 2220 + }, + { + "epoch": 0.32725933202357566, + "grad_norm": 0.6093339920043945, + "learning_rate": 4.865977136897578e-06, + "loss": 0.6028, + "step": 2221 + }, + { + "epoch": 0.3274066797642436, + "grad_norm": 0.5996274948120117, + "learning_rate": 4.865851872852711e-06, + "loss": 0.5853, + "step": 2222 + }, + { + "epoch": 0.3275540275049116, + "grad_norm": 0.5902007222175598, + "learning_rate": 4.865726551910215e-06, + "loss": 0.6049, + "step": 2223 + }, + { + "epoch": 0.3277013752455796, + "grad_norm": 0.6048275232315063, + "learning_rate": 4.865601174073106e-06, + "loss": 0.6309, + "step": 2224 + }, + { + "epoch": 0.32784872298624756, + "grad_norm": 0.6004360914230347, + "learning_rate": 4.865475739344399e-06, + "loss": 0.6175, + "step": 2225 + }, + { + "epoch": 0.3279960707269155, + "grad_norm": 0.616046667098999, + "learning_rate": 4.865350247727109e-06, + "loss": 0.5826, + "step": 2226 + }, + { + "epoch": 0.3281434184675835, + "grad_norm": 0.6074836254119873, + "learning_rate": 4.865224699224255e-06, + "loss": 0.6215, + "step": 2227 + }, + { + "epoch": 0.3282907662082515, + "grad_norm": 0.5952644348144531, + "learning_rate": 4.865099093838855e-06, + "loss": 0.6018, + "step": 2228 + }, + { + "epoch": 0.3284381139489195, + "grad_norm": 0.5951140522956848, + "learning_rate": 4.8649734315739325e-06, + "loss": 0.5963, + "step": 2229 + }, + { + "epoch": 0.3285854616895874, + "grad_norm": 0.5936618447303772, + "learning_rate": 4.864847712432509e-06, + "loss": 0.5826, + "step": 2230 + }, + { + "epoch": 0.3287328094302554, + "grad_norm": 0.5704876780509949, + "learning_rate": 4.864721936417606e-06, + "loss": 0.5895, + "step": 2231 + }, + { + "epoch": 0.3288801571709234, + "grad_norm": 0.6221460103988647, + "learning_rate": 4.86459610353225e-06, + "loss": 0.635, + "step": 2232 + }, + { + "epoch": 0.32902750491159133, + "grad_norm": 0.6128721237182617, + "learning_rate": 4.864470213779467e-06, + "loss": 0.6402, + "step": 2233 + }, + { + "epoch": 0.3291748526522593, + "grad_norm": 0.6576764583587646, + "learning_rate": 4.864344267162285e-06, + "loss": 0.6242, + "step": 2234 + }, + { + "epoch": 0.3293222003929273, + "grad_norm": 0.5772041082382202, + "learning_rate": 4.86421826368373e-06, + "loss": 0.5961, + "step": 2235 + }, + { + "epoch": 0.3294695481335953, + "grad_norm": 0.5967355370521545, + "learning_rate": 4.864092203346837e-06, + "loss": 0.6159, + "step": 2236 + }, + { + "epoch": 0.32961689587426324, + "grad_norm": 0.5861693620681763, + "learning_rate": 4.863966086154633e-06, + "loss": 0.5968, + "step": 2237 + }, + { + "epoch": 0.32976424361493123, + "grad_norm": 0.6095735430717468, + "learning_rate": 4.863839912110156e-06, + "loss": 0.6103, + "step": 2238 + }, + { + "epoch": 0.3299115913555992, + "grad_norm": 0.5851233005523682, + "learning_rate": 4.863713681216436e-06, + "loss": 0.611, + "step": 2239 + }, + { + "epoch": 0.3300589390962672, + "grad_norm": 0.6094527840614319, + "learning_rate": 4.863587393476511e-06, + "loss": 0.5635, + "step": 2240 + }, + { + "epoch": 0.33020628683693515, + "grad_norm": 0.599046528339386, + "learning_rate": 4.8634610488934184e-06, + "loss": 0.6078, + "step": 2241 + }, + { + "epoch": 0.33035363457760314, + "grad_norm": 0.5762603878974915, + "learning_rate": 4.863334647470195e-06, + "loss": 0.6038, + "step": 2242 + }, + { + "epoch": 0.33050098231827113, + "grad_norm": 0.5714776515960693, + "learning_rate": 4.863208189209882e-06, + "loss": 0.5898, + "step": 2243 + }, + { + "epoch": 0.3306483300589391, + "grad_norm": 0.599219799041748, + "learning_rate": 4.863081674115522e-06, + "loss": 0.5865, + "step": 2244 + }, + { + "epoch": 0.33079567779960706, + "grad_norm": 0.5880495309829712, + "learning_rate": 4.8629551021901545e-06, + "loss": 0.6096, + "step": 2245 + }, + { + "epoch": 0.33094302554027505, + "grad_norm": 0.631159245967865, + "learning_rate": 4.862828473436826e-06, + "loss": 0.6023, + "step": 2246 + }, + { + "epoch": 0.33109037328094304, + "grad_norm": 0.5759385228157043, + "learning_rate": 4.86270178785858e-06, + "loss": 0.6181, + "step": 2247 + }, + { + "epoch": 0.331237721021611, + "grad_norm": 0.5966237187385559, + "learning_rate": 4.862575045458465e-06, + "loss": 0.585, + "step": 2248 + }, + { + "epoch": 0.33138506876227897, + "grad_norm": 0.5967074036598206, + "learning_rate": 4.862448246239528e-06, + "loss": 0.6068, + "step": 2249 + }, + { + "epoch": 0.33153241650294696, + "grad_norm": 0.582162082195282, + "learning_rate": 4.862321390204819e-06, + "loss": 0.5657, + "step": 2250 + }, + { + "epoch": 0.33167976424361495, + "grad_norm": 0.5863389372825623, + "learning_rate": 4.862194477357388e-06, + "loss": 0.5902, + "step": 2251 + }, + { + "epoch": 0.3318271119842829, + "grad_norm": 0.5755064487457275, + "learning_rate": 4.862067507700288e-06, + "loss": 0.5789, + "step": 2252 + }, + { + "epoch": 0.3319744597249509, + "grad_norm": 0.5709955096244812, + "learning_rate": 4.861940481236572e-06, + "loss": 0.6154, + "step": 2253 + }, + { + "epoch": 0.33212180746561887, + "grad_norm": 0.5788570642471313, + "learning_rate": 4.861813397969296e-06, + "loss": 0.6078, + "step": 2254 + }, + { + "epoch": 0.33226915520628686, + "grad_norm": 0.6144595146179199, + "learning_rate": 4.861686257901516e-06, + "loss": 0.5927, + "step": 2255 + }, + { + "epoch": 0.3324165029469548, + "grad_norm": 0.6089308261871338, + "learning_rate": 4.8615590610362875e-06, + "loss": 0.6033, + "step": 2256 + }, + { + "epoch": 0.3325638506876228, + "grad_norm": 0.5873708128929138, + "learning_rate": 4.861431807376672e-06, + "loss": 0.5997, + "step": 2257 + }, + { + "epoch": 0.3327111984282908, + "grad_norm": 0.603534460067749, + "learning_rate": 4.86130449692573e-06, + "loss": 0.5901, + "step": 2258 + }, + { + "epoch": 0.33285854616895877, + "grad_norm": 0.5820540189743042, + "learning_rate": 4.861177129686521e-06, + "loss": 0.6, + "step": 2259 + }, + { + "epoch": 0.3330058939096267, + "grad_norm": 0.5798094272613525, + "learning_rate": 4.8610497056621095e-06, + "loss": 0.6203, + "step": 2260 + }, + { + "epoch": 0.3331532416502947, + "grad_norm": 0.6244040131568909, + "learning_rate": 4.860922224855561e-06, + "loss": 0.6167, + "step": 2261 + }, + { + "epoch": 0.3333005893909627, + "grad_norm": 0.6177367568016052, + "learning_rate": 4.860794687269939e-06, + "loss": 0.5342, + "step": 2262 + }, + { + "epoch": 0.3334479371316306, + "grad_norm": 0.5908904671669006, + "learning_rate": 4.860667092908314e-06, + "loss": 0.588, + "step": 2263 + }, + { + "epoch": 0.3335952848722986, + "grad_norm": 0.5939189791679382, + "learning_rate": 4.860539441773751e-06, + "loss": 0.5751, + "step": 2264 + }, + { + "epoch": 0.3337426326129666, + "grad_norm": 0.6087237000465393, + "learning_rate": 4.860411733869323e-06, + "loss": 0.6247, + "step": 2265 + }, + { + "epoch": 0.3338899803536346, + "grad_norm": 0.5814547538757324, + "learning_rate": 4.860283969198099e-06, + "loss": 0.5966, + "step": 2266 + }, + { + "epoch": 0.33403732809430253, + "grad_norm": 0.5931536555290222, + "learning_rate": 4.860156147763152e-06, + "loss": 0.5672, + "step": 2267 + }, + { + "epoch": 0.3341846758349705, + "grad_norm": 0.563262403011322, + "learning_rate": 4.860028269567557e-06, + "loss": 0.6005, + "step": 2268 + }, + { + "epoch": 0.3343320235756385, + "grad_norm": 0.5912147164344788, + "learning_rate": 4.859900334614389e-06, + "loss": 0.6153, + "step": 2269 + }, + { + "epoch": 0.3344793713163065, + "grad_norm": 0.5828366875648499, + "learning_rate": 4.859772342906725e-06, + "loss": 0.588, + "step": 2270 + }, + { + "epoch": 0.33462671905697444, + "grad_norm": 0.6024062633514404, + "learning_rate": 4.859644294447643e-06, + "loss": 0.5999, + "step": 2271 + }, + { + "epoch": 0.33477406679764243, + "grad_norm": 0.5912784337997437, + "learning_rate": 4.859516189240223e-06, + "loss": 0.5753, + "step": 2272 + }, + { + "epoch": 0.3349214145383104, + "grad_norm": 0.5879456400871277, + "learning_rate": 4.859388027287545e-06, + "loss": 0.5746, + "step": 2273 + }, + { + "epoch": 0.3350687622789784, + "grad_norm": 0.6001894474029541, + "learning_rate": 4.859259808592691e-06, + "loss": 0.588, + "step": 2274 + }, + { + "epoch": 0.33521611001964635, + "grad_norm": 0.6218218803405762, + "learning_rate": 4.8591315331587455e-06, + "loss": 0.5833, + "step": 2275 + }, + { + "epoch": 0.33536345776031434, + "grad_norm": 0.5929319262504578, + "learning_rate": 4.859003200988793e-06, + "loss": 0.6154, + "step": 2276 + }, + { + "epoch": 0.33551080550098233, + "grad_norm": 0.5665820837020874, + "learning_rate": 4.858874812085921e-06, + "loss": 0.607, + "step": 2277 + }, + { + "epoch": 0.33565815324165027, + "grad_norm": 0.5998519659042358, + "learning_rate": 4.858746366453215e-06, + "loss": 0.6229, + "step": 2278 + }, + { + "epoch": 0.33580550098231826, + "grad_norm": 0.5910847187042236, + "learning_rate": 4.858617864093766e-06, + "loss": 0.574, + "step": 2279 + }, + { + "epoch": 0.33595284872298625, + "grad_norm": 0.5939415693283081, + "learning_rate": 4.858489305010662e-06, + "loss": 0.601, + "step": 2280 + }, + { + "epoch": 0.33610019646365424, + "grad_norm": 0.5961445569992065, + "learning_rate": 4.858360689206998e-06, + "loss": 0.6087, + "step": 2281 + }, + { + "epoch": 0.3362475442043222, + "grad_norm": 0.5915125608444214, + "learning_rate": 4.858232016685865e-06, + "loss": 0.5944, + "step": 2282 + }, + { + "epoch": 0.33639489194499017, + "grad_norm": 0.6012968420982361, + "learning_rate": 4.858103287450358e-06, + "loss": 0.5836, + "step": 2283 + }, + { + "epoch": 0.33654223968565816, + "grad_norm": 0.6068551540374756, + "learning_rate": 4.8579745015035726e-06, + "loss": 0.5728, + "step": 2284 + }, + { + "epoch": 0.33668958742632615, + "grad_norm": 0.5853287577629089, + "learning_rate": 4.857845658848607e-06, + "loss": 0.595, + "step": 2285 + }, + { + "epoch": 0.3368369351669941, + "grad_norm": 0.6079810857772827, + "learning_rate": 4.85771675948856e-06, + "loss": 0.6265, + "step": 2286 + }, + { + "epoch": 0.3369842829076621, + "grad_norm": 0.5826635956764221, + "learning_rate": 4.857587803426529e-06, + "loss": 0.6163, + "step": 2287 + }, + { + "epoch": 0.33713163064833007, + "grad_norm": 0.5699127912521362, + "learning_rate": 4.857458790665618e-06, + "loss": 0.5686, + "step": 2288 + }, + { + "epoch": 0.33727897838899806, + "grad_norm": 0.5766939520835876, + "learning_rate": 4.857329721208929e-06, + "loss": 0.5478, + "step": 2289 + }, + { + "epoch": 0.337426326129666, + "grad_norm": 0.6062702536582947, + "learning_rate": 4.8572005950595655e-06, + "loss": 0.6296, + "step": 2290 + }, + { + "epoch": 0.337573673870334, + "grad_norm": 0.6307411789894104, + "learning_rate": 4.857071412220634e-06, + "loss": 0.606, + "step": 2291 + }, + { + "epoch": 0.337721021611002, + "grad_norm": 0.6088112592697144, + "learning_rate": 4.85694217269524e-06, + "loss": 0.5927, + "step": 2292 + }, + { + "epoch": 0.3378683693516699, + "grad_norm": 0.600389301776886, + "learning_rate": 4.856812876486492e-06, + "loss": 0.5837, + "step": 2293 + }, + { + "epoch": 0.3380157170923379, + "grad_norm": 0.6373035311698914, + "learning_rate": 4.8566835235975e-06, + "loss": 0.6009, + "step": 2294 + }, + { + "epoch": 0.3381630648330059, + "grad_norm": 0.5996668934822083, + "learning_rate": 4.8565541140313745e-06, + "loss": 0.5756, + "step": 2295 + }, + { + "epoch": 0.3383104125736739, + "grad_norm": 0.6054746508598328, + "learning_rate": 4.856424647791228e-06, + "loss": 0.6313, + "step": 2296 + }, + { + "epoch": 0.3384577603143418, + "grad_norm": 0.5829976797103882, + "learning_rate": 4.856295124880174e-06, + "loss": 0.5953, + "step": 2297 + }, + { + "epoch": 0.3386051080550098, + "grad_norm": 0.5946535468101501, + "learning_rate": 4.856165545301327e-06, + "loss": 0.5711, + "step": 2298 + }, + { + "epoch": 0.3387524557956778, + "grad_norm": 0.572213888168335, + "learning_rate": 4.8560359090578035e-06, + "loss": 0.5984, + "step": 2299 + }, + { + "epoch": 0.3388998035363458, + "grad_norm": 0.5943171977996826, + "learning_rate": 4.855906216152722e-06, + "loss": 0.5884, + "step": 2300 + }, + { + "epoch": 0.33904715127701374, + "grad_norm": 0.5937965512275696, + "learning_rate": 4.855776466589201e-06, + "loss": 0.6329, + "step": 2301 + }, + { + "epoch": 0.3391944990176817, + "grad_norm": 0.5722349286079407, + "learning_rate": 4.855646660370361e-06, + "loss": 0.5832, + "step": 2302 + }, + { + "epoch": 0.3393418467583497, + "grad_norm": 0.6112989187240601, + "learning_rate": 4.8555167974993236e-06, + "loss": 0.6039, + "step": 2303 + }, + { + "epoch": 0.33948919449901765, + "grad_norm": 0.5740420818328857, + "learning_rate": 4.8553868779792114e-06, + "loss": 0.6052, + "step": 2304 + }, + { + "epoch": 0.33963654223968565, + "grad_norm": 0.5960212349891663, + "learning_rate": 4.85525690181315e-06, + "loss": 0.6328, + "step": 2305 + }, + { + "epoch": 0.33978388998035364, + "grad_norm": 0.5969383120536804, + "learning_rate": 4.855126869004265e-06, + "loss": 0.5736, + "step": 2306 + }, + { + "epoch": 0.33993123772102163, + "grad_norm": 0.5959840416908264, + "learning_rate": 4.854996779555684e-06, + "loss": 0.5969, + "step": 2307 + }, + { + "epoch": 0.34007858546168956, + "grad_norm": 0.7208685874938965, + "learning_rate": 4.854866633470534e-06, + "loss": 0.5978, + "step": 2308 + }, + { + "epoch": 0.34022593320235756, + "grad_norm": 0.576832115650177, + "learning_rate": 4.854736430751946e-06, + "loss": 0.5849, + "step": 2309 + }, + { + "epoch": 0.34037328094302555, + "grad_norm": 0.6227792501449585, + "learning_rate": 4.854606171403051e-06, + "loss": 0.587, + "step": 2310 + }, + { + "epoch": 0.34052062868369354, + "grad_norm": 0.5960302948951721, + "learning_rate": 4.854475855426983e-06, + "loss": 0.624, + "step": 2311 + }, + { + "epoch": 0.3406679764243615, + "grad_norm": 0.5821576714515686, + "learning_rate": 4.854345482826874e-06, + "loss": 0.5435, + "step": 2312 + }, + { + "epoch": 0.34081532416502947, + "grad_norm": 0.5881320238113403, + "learning_rate": 4.854215053605861e-06, + "loss": 0.5896, + "step": 2313 + }, + { + "epoch": 0.34096267190569746, + "grad_norm": 0.6148459911346436, + "learning_rate": 4.85408456776708e-06, + "loss": 0.5709, + "step": 2314 + }, + { + "epoch": 0.34111001964636545, + "grad_norm": 0.5824469923973083, + "learning_rate": 4.85395402531367e-06, + "loss": 0.5904, + "step": 2315 + }, + { + "epoch": 0.3412573673870334, + "grad_norm": 0.549801766872406, + "learning_rate": 4.8538234262487686e-06, + "loss": 0.5856, + "step": 2316 + }, + { + "epoch": 0.3414047151277014, + "grad_norm": 0.5930051803588867, + "learning_rate": 4.853692770575518e-06, + "loss": 0.6214, + "step": 2317 + }, + { + "epoch": 0.34155206286836937, + "grad_norm": 0.5858429074287415, + "learning_rate": 4.853562058297061e-06, + "loss": 0.6076, + "step": 2318 + }, + { + "epoch": 0.3416994106090373, + "grad_norm": 0.5926677584648132, + "learning_rate": 4.85343128941654e-06, + "loss": 0.5662, + "step": 2319 + }, + { + "epoch": 0.3418467583497053, + "grad_norm": 0.575274646282196, + "learning_rate": 4.8533004639371e-06, + "loss": 0.5842, + "step": 2320 + }, + { + "epoch": 0.3419941060903733, + "grad_norm": 0.579375684261322, + "learning_rate": 4.853169581861887e-06, + "loss": 0.5612, + "step": 2321 + }, + { + "epoch": 0.3421414538310413, + "grad_norm": 0.564022421836853, + "learning_rate": 4.853038643194051e-06, + "loss": 0.6192, + "step": 2322 + }, + { + "epoch": 0.3422888015717092, + "grad_norm": 0.58941650390625, + "learning_rate": 4.852907647936738e-06, + "loss": 0.5665, + "step": 2323 + }, + { + "epoch": 0.3424361493123772, + "grad_norm": 0.586490273475647, + "learning_rate": 4.8527765960931e-06, + "loss": 0.5698, + "step": 2324 + }, + { + "epoch": 0.3425834970530452, + "grad_norm": 0.5966892838478088, + "learning_rate": 4.852645487666289e-06, + "loss": 0.6012, + "step": 2325 + }, + { + "epoch": 0.3427308447937132, + "grad_norm": 0.5944734215736389, + "learning_rate": 4.8525143226594564e-06, + "loss": 0.5698, + "step": 2326 + }, + { + "epoch": 0.3428781925343811, + "grad_norm": 0.5891017317771912, + "learning_rate": 4.8523831010757585e-06, + "loss": 0.5763, + "step": 2327 + }, + { + "epoch": 0.3430255402750491, + "grad_norm": 0.6619085073471069, + "learning_rate": 4.852251822918349e-06, + "loss": 0.6182, + "step": 2328 + }, + { + "epoch": 0.3431728880157171, + "grad_norm": 0.6150630712509155, + "learning_rate": 4.852120488190388e-06, + "loss": 0.5702, + "step": 2329 + }, + { + "epoch": 0.3433202357563851, + "grad_norm": 0.5857674479484558, + "learning_rate": 4.8519890968950315e-06, + "loss": 0.6219, + "step": 2330 + }, + { + "epoch": 0.34346758349705303, + "grad_norm": 0.6000819802284241, + "learning_rate": 4.851857649035441e-06, + "loss": 0.6061, + "step": 2331 + }, + { + "epoch": 0.343614931237721, + "grad_norm": 0.571377694606781, + "learning_rate": 4.851726144614777e-06, + "loss": 0.6114, + "step": 2332 + }, + { + "epoch": 0.343762278978389, + "grad_norm": 0.6007928252220154, + "learning_rate": 4.851594583636202e-06, + "loss": 0.6144, + "step": 2333 + }, + { + "epoch": 0.34390962671905695, + "grad_norm": 0.5737290382385254, + "learning_rate": 4.85146296610288e-06, + "loss": 0.5829, + "step": 2334 + }, + { + "epoch": 0.34405697445972494, + "grad_norm": 0.5955109596252441, + "learning_rate": 4.8513312920179775e-06, + "loss": 0.617, + "step": 2335 + }, + { + "epoch": 0.34420432220039293, + "grad_norm": 0.5701382160186768, + "learning_rate": 4.8511995613846595e-06, + "loss": 0.5887, + "step": 2336 + }, + { + "epoch": 0.3443516699410609, + "grad_norm": 0.5793296098709106, + "learning_rate": 4.851067774206095e-06, + "loss": 0.5862, + "step": 2337 + }, + { + "epoch": 0.34449901768172886, + "grad_norm": 0.554814338684082, + "learning_rate": 4.850935930485453e-06, + "loss": 0.5954, + "step": 2338 + }, + { + "epoch": 0.34464636542239685, + "grad_norm": 0.6097831726074219, + "learning_rate": 4.850804030225905e-06, + "loss": 0.6216, + "step": 2339 + }, + { + "epoch": 0.34479371316306484, + "grad_norm": 0.5654569864273071, + "learning_rate": 4.850672073430623e-06, + "loss": 0.5865, + "step": 2340 + }, + { + "epoch": 0.34494106090373283, + "grad_norm": 0.586281955242157, + "learning_rate": 4.8505400601027794e-06, + "loss": 0.6029, + "step": 2341 + }, + { + "epoch": 0.34508840864440077, + "grad_norm": 0.5843366980552673, + "learning_rate": 4.85040799024555e-06, + "loss": 0.5831, + "step": 2342 + }, + { + "epoch": 0.34523575638506876, + "grad_norm": 0.6039828658103943, + "learning_rate": 4.850275863862111e-06, + "loss": 0.6055, + "step": 2343 + }, + { + "epoch": 0.34538310412573675, + "grad_norm": 0.6063969731330872, + "learning_rate": 4.85014368095564e-06, + "loss": 0.6125, + "step": 2344 + }, + { + "epoch": 0.34553045186640474, + "grad_norm": 0.5953904986381531, + "learning_rate": 4.850011441529316e-06, + "loss": 0.6083, + "step": 2345 + }, + { + "epoch": 0.3456777996070727, + "grad_norm": 0.5928139090538025, + "learning_rate": 4.849879145586318e-06, + "loss": 0.5521, + "step": 2346 + }, + { + "epoch": 0.34582514734774067, + "grad_norm": 0.6333965063095093, + "learning_rate": 4.849746793129829e-06, + "loss": 0.5827, + "step": 2347 + }, + { + "epoch": 0.34597249508840866, + "grad_norm": 0.6376335620880127, + "learning_rate": 4.8496143841630324e-06, + "loss": 0.6099, + "step": 2348 + }, + { + "epoch": 0.3461198428290766, + "grad_norm": 0.5920227766036987, + "learning_rate": 4.849481918689112e-06, + "loss": 0.6047, + "step": 2349 + }, + { + "epoch": 0.3462671905697446, + "grad_norm": 0.5626212954521179, + "learning_rate": 4.849349396711252e-06, + "loss": 0.5843, + "step": 2350 + }, + { + "epoch": 0.3464145383104126, + "grad_norm": 0.5884197354316711, + "learning_rate": 4.849216818232642e-06, + "loss": 0.6058, + "step": 2351 + }, + { + "epoch": 0.34656188605108057, + "grad_norm": 0.5801972150802612, + "learning_rate": 4.8490841832564695e-06, + "loss": 0.6254, + "step": 2352 + }, + { + "epoch": 0.3467092337917485, + "grad_norm": 0.56622713804245, + "learning_rate": 4.848951491785924e-06, + "loss": 0.6129, + "step": 2353 + }, + { + "epoch": 0.3468565815324165, + "grad_norm": 0.6151859760284424, + "learning_rate": 4.848818743824196e-06, + "loss": 0.6143, + "step": 2354 + }, + { + "epoch": 0.3470039292730845, + "grad_norm": 0.5985469222068787, + "learning_rate": 4.84868593937448e-06, + "loss": 0.6168, + "step": 2355 + }, + { + "epoch": 0.3471512770137525, + "grad_norm": 0.6046620607376099, + "learning_rate": 4.848553078439968e-06, + "loss": 0.6112, + "step": 2356 + }, + { + "epoch": 0.3472986247544204, + "grad_norm": 0.6084101796150208, + "learning_rate": 4.8484201610238565e-06, + "loss": 0.6281, + "step": 2357 + }, + { + "epoch": 0.3474459724950884, + "grad_norm": 0.632718563079834, + "learning_rate": 4.848287187129342e-06, + "loss": 0.59, + "step": 2358 + }, + { + "epoch": 0.3475933202357564, + "grad_norm": 0.6331171989440918, + "learning_rate": 4.8481541567596214e-06, + "loss": 0.6034, + "step": 2359 + }, + { + "epoch": 0.3477406679764244, + "grad_norm": 0.592637300491333, + "learning_rate": 4.848021069917895e-06, + "loss": 0.5625, + "step": 2360 + }, + { + "epoch": 0.3478880157170923, + "grad_norm": 0.5529640913009644, + "learning_rate": 4.847887926607363e-06, + "loss": 0.5865, + "step": 2361 + }, + { + "epoch": 0.3480353634577603, + "grad_norm": 0.5994933247566223, + "learning_rate": 4.847754726831227e-06, + "loss": 0.566, + "step": 2362 + }, + { + "epoch": 0.3481827111984283, + "grad_norm": 0.5616014003753662, + "learning_rate": 4.847621470592692e-06, + "loss": 0.5489, + "step": 2363 + }, + { + "epoch": 0.34833005893909624, + "grad_norm": 0.5534308552742004, + "learning_rate": 4.847488157894961e-06, + "loss": 0.6193, + "step": 2364 + }, + { + "epoch": 0.34847740667976423, + "grad_norm": 0.5980793237686157, + "learning_rate": 4.8473547887412405e-06, + "loss": 0.5914, + "step": 2365 + }, + { + "epoch": 0.3486247544204322, + "grad_norm": 0.6434298753738403, + "learning_rate": 4.84722136313474e-06, + "loss": 0.6075, + "step": 2366 + }, + { + "epoch": 0.3487721021611002, + "grad_norm": 0.6039789915084839, + "learning_rate": 4.847087881078665e-06, + "loss": 0.5658, + "step": 2367 + }, + { + "epoch": 0.34891944990176815, + "grad_norm": 0.5868022441864014, + "learning_rate": 4.846954342576228e-06, + "loss": 0.6115, + "step": 2368 + }, + { + "epoch": 0.34906679764243614, + "grad_norm": 0.6095625162124634, + "learning_rate": 4.84682074763064e-06, + "loss": 0.6005, + "step": 2369 + }, + { + "epoch": 0.34921414538310414, + "grad_norm": 0.5864225029945374, + "learning_rate": 4.846687096245113e-06, + "loss": 0.6091, + "step": 2370 + }, + { + "epoch": 0.3493614931237721, + "grad_norm": 0.569382905960083, + "learning_rate": 4.846553388422863e-06, + "loss": 0.6057, + "step": 2371 + }, + { + "epoch": 0.34950884086444006, + "grad_norm": 0.587532639503479, + "learning_rate": 4.846419624167103e-06, + "loss": 0.5728, + "step": 2372 + }, + { + "epoch": 0.34965618860510805, + "grad_norm": 0.586251974105835, + "learning_rate": 4.846285803481054e-06, + "loss": 0.6175, + "step": 2373 + }, + { + "epoch": 0.34980353634577604, + "grad_norm": 0.584625780582428, + "learning_rate": 4.8461519263679305e-06, + "loss": 0.5672, + "step": 2374 + }, + { + "epoch": 0.349950884086444, + "grad_norm": 0.5770185589790344, + "learning_rate": 4.846017992830953e-06, + "loss": 0.5947, + "step": 2375 + }, + { + "epoch": 0.35009823182711197, + "grad_norm": 0.5853592753410339, + "learning_rate": 4.845884002873343e-06, + "loss": 0.6017, + "step": 2376 + }, + { + "epoch": 0.35024557956777996, + "grad_norm": 0.5528571605682373, + "learning_rate": 4.845749956498324e-06, + "loss": 0.5807, + "step": 2377 + }, + { + "epoch": 0.35039292730844795, + "grad_norm": 0.5952796936035156, + "learning_rate": 4.8456158537091195e-06, + "loss": 0.5873, + "step": 2378 + }, + { + "epoch": 0.3505402750491159, + "grad_norm": 0.5892115235328674, + "learning_rate": 4.845481694508952e-06, + "loss": 0.6147, + "step": 2379 + }, + { + "epoch": 0.3506876227897839, + "grad_norm": 0.5846443176269531, + "learning_rate": 4.845347478901051e-06, + "loss": 0.5714, + "step": 2380 + }, + { + "epoch": 0.3508349705304519, + "grad_norm": 0.6351563930511475, + "learning_rate": 4.845213206888643e-06, + "loss": 0.6141, + "step": 2381 + }, + { + "epoch": 0.35098231827111986, + "grad_norm": 0.6058303117752075, + "learning_rate": 4.845078878474957e-06, + "loss": 0.5757, + "step": 2382 + }, + { + "epoch": 0.3511296660117878, + "grad_norm": 0.5674799680709839, + "learning_rate": 4.844944493663225e-06, + "loss": 0.6135, + "step": 2383 + }, + { + "epoch": 0.3512770137524558, + "grad_norm": 0.5971875786781311, + "learning_rate": 4.844810052456676e-06, + "loss": 0.6114, + "step": 2384 + }, + { + "epoch": 0.3514243614931238, + "grad_norm": 0.5898913145065308, + "learning_rate": 4.844675554858547e-06, + "loss": 0.5943, + "step": 2385 + }, + { + "epoch": 0.3515717092337918, + "grad_norm": 0.5683940052986145, + "learning_rate": 4.844541000872069e-06, + "loss": 0.5945, + "step": 2386 + }, + { + "epoch": 0.3517190569744597, + "grad_norm": 0.580759584903717, + "learning_rate": 4.84440639050048e-06, + "loss": 0.6066, + "step": 2387 + }, + { + "epoch": 0.3518664047151277, + "grad_norm": 0.609781801700592, + "learning_rate": 4.844271723747017e-06, + "loss": 0.6227, + "step": 2388 + }, + { + "epoch": 0.3520137524557957, + "grad_norm": 0.5768178105354309, + "learning_rate": 4.844137000614919e-06, + "loss": 0.6075, + "step": 2389 + }, + { + "epoch": 0.35216110019646363, + "grad_norm": 0.6087360978126526, + "learning_rate": 4.844002221107425e-06, + "loss": 0.5687, + "step": 2390 + }, + { + "epoch": 0.3523084479371316, + "grad_norm": 0.5755907297134399, + "learning_rate": 4.843867385227777e-06, + "loss": 0.6031, + "step": 2391 + }, + { + "epoch": 0.3524557956777996, + "grad_norm": 0.5906873345375061, + "learning_rate": 4.843732492979219e-06, + "loss": 0.5885, + "step": 2392 + }, + { + "epoch": 0.3526031434184676, + "grad_norm": 0.5928394198417664, + "learning_rate": 4.843597544364992e-06, + "loss": 0.5732, + "step": 2393 + }, + { + "epoch": 0.35275049115913554, + "grad_norm": 0.5792456865310669, + "learning_rate": 4.8434625393883435e-06, + "loss": 0.6122, + "step": 2394 + }, + { + "epoch": 0.35289783889980353, + "grad_norm": 0.5856475830078125, + "learning_rate": 4.843327478052521e-06, + "loss": 0.617, + "step": 2395 + }, + { + "epoch": 0.3530451866404715, + "grad_norm": 0.5891130566596985, + "learning_rate": 4.8431923603607715e-06, + "loss": 0.5645, + "step": 2396 + }, + { + "epoch": 0.3531925343811395, + "grad_norm": 0.6004961133003235, + "learning_rate": 4.8430571863163445e-06, + "loss": 0.5681, + "step": 2397 + }, + { + "epoch": 0.35333988212180745, + "grad_norm": 0.6023552417755127, + "learning_rate": 4.842921955922492e-06, + "loss": 0.5715, + "step": 2398 + }, + { + "epoch": 0.35348722986247544, + "grad_norm": 0.5632521510124207, + "learning_rate": 4.842786669182464e-06, + "loss": 0.58, + "step": 2399 + }, + { + "epoch": 0.35363457760314343, + "grad_norm": 0.5617197751998901, + "learning_rate": 4.8426513260995154e-06, + "loss": 0.5894, + "step": 2400 + }, + { + "epoch": 0.3537819253438114, + "grad_norm": 0.5742727518081665, + "learning_rate": 4.842515926676902e-06, + "loss": 0.6001, + "step": 2401 + }, + { + "epoch": 0.35392927308447936, + "grad_norm": 0.6106005907058716, + "learning_rate": 4.842380470917879e-06, + "loss": 0.5958, + "step": 2402 + }, + { + "epoch": 0.35407662082514735, + "grad_norm": 0.6044420599937439, + "learning_rate": 4.8422449588257045e-06, + "loss": 0.5875, + "step": 2403 + }, + { + "epoch": 0.35422396856581534, + "grad_norm": 0.5858578085899353, + "learning_rate": 4.842109390403638e-06, + "loss": 0.643, + "step": 2404 + }, + { + "epoch": 0.3543713163064833, + "grad_norm": 0.5626930594444275, + "learning_rate": 4.8419737656549386e-06, + "loss": 0.5309, + "step": 2405 + }, + { + "epoch": 0.35451866404715127, + "grad_norm": 0.5937252640724182, + "learning_rate": 4.8418380845828676e-06, + "loss": 0.604, + "step": 2406 + }, + { + "epoch": 0.35466601178781926, + "grad_norm": 0.6239109039306641, + "learning_rate": 4.84170234719069e-06, + "loss": 0.6078, + "step": 2407 + }, + { + "epoch": 0.35481335952848725, + "grad_norm": 0.6127679347991943, + "learning_rate": 4.841566553481669e-06, + "loss": 0.5844, + "step": 2408 + }, + { + "epoch": 0.3549607072691552, + "grad_norm": 0.62395840883255, + "learning_rate": 4.841430703459071e-06, + "loss": 0.5759, + "step": 2409 + }, + { + "epoch": 0.3551080550098232, + "grad_norm": 0.5927919149398804, + "learning_rate": 4.841294797126162e-06, + "loss": 0.5746, + "step": 2410 + }, + { + "epoch": 0.35525540275049117, + "grad_norm": 0.5949803590774536, + "learning_rate": 4.841158834486212e-06, + "loss": 0.6014, + "step": 2411 + }, + { + "epoch": 0.35540275049115916, + "grad_norm": 0.5840097665786743, + "learning_rate": 4.84102281554249e-06, + "loss": 0.6187, + "step": 2412 + }, + { + "epoch": 0.3555500982318271, + "grad_norm": 0.600530207157135, + "learning_rate": 4.840886740298268e-06, + "loss": 0.5891, + "step": 2413 + }, + { + "epoch": 0.3556974459724951, + "grad_norm": 0.6307014226913452, + "learning_rate": 4.840750608756815e-06, + "loss": 0.6044, + "step": 2414 + }, + { + "epoch": 0.3558447937131631, + "grad_norm": 0.5923655033111572, + "learning_rate": 4.840614420921411e-06, + "loss": 0.5645, + "step": 2415 + }, + { + "epoch": 0.35599214145383107, + "grad_norm": 0.6151763200759888, + "learning_rate": 4.840478176795327e-06, + "loss": 0.5886, + "step": 2416 + }, + { + "epoch": 0.356139489194499, + "grad_norm": 0.6170153021812439, + "learning_rate": 4.84034187638184e-06, + "loss": 0.5825, + "step": 2417 + }, + { + "epoch": 0.356286836935167, + "grad_norm": 0.6458231210708618, + "learning_rate": 4.840205519684229e-06, + "loss": 0.5944, + "step": 2418 + }, + { + "epoch": 0.356434184675835, + "grad_norm": 0.5811774134635925, + "learning_rate": 4.840069106705772e-06, + "loss": 0.5955, + "step": 2419 + }, + { + "epoch": 0.3565815324165029, + "grad_norm": 0.5619702935218811, + "learning_rate": 4.839932637449752e-06, + "loss": 0.5739, + "step": 2420 + }, + { + "epoch": 0.3567288801571709, + "grad_norm": 0.5925825238227844, + "learning_rate": 4.839796111919449e-06, + "loss": 0.58, + "step": 2421 + }, + { + "epoch": 0.3568762278978389, + "grad_norm": 0.5732433795928955, + "learning_rate": 4.839659530118147e-06, + "loss": 0.5689, + "step": 2422 + }, + { + "epoch": 0.3570235756385069, + "grad_norm": 0.6176435351371765, + "learning_rate": 4.83952289204913e-06, + "loss": 0.598, + "step": 2423 + }, + { + "epoch": 0.35717092337917483, + "grad_norm": 0.6005814671516418, + "learning_rate": 4.839386197715686e-06, + "loss": 0.6289, + "step": 2424 + }, + { + "epoch": 0.3573182711198428, + "grad_norm": 0.5677743554115295, + "learning_rate": 4.8392494471211e-06, + "loss": 0.6047, + "step": 2425 + }, + { + "epoch": 0.3574656188605108, + "grad_norm": 0.5934644341468811, + "learning_rate": 4.839112640268662e-06, + "loss": 0.5645, + "step": 2426 + }, + { + "epoch": 0.3576129666011788, + "grad_norm": 0.5966162085533142, + "learning_rate": 4.838975777161663e-06, + "loss": 0.6112, + "step": 2427 + }, + { + "epoch": 0.35776031434184674, + "grad_norm": 0.5756871700286865, + "learning_rate": 4.838838857803393e-06, + "loss": 0.5865, + "step": 2428 + }, + { + "epoch": 0.35790766208251473, + "grad_norm": 0.5995380282402039, + "learning_rate": 4.838701882197146e-06, + "loss": 0.6179, + "step": 2429 + }, + { + "epoch": 0.3580550098231827, + "grad_norm": 0.5754821300506592, + "learning_rate": 4.8385648503462145e-06, + "loss": 0.5914, + "step": 2430 + }, + { + "epoch": 0.3582023575638507, + "grad_norm": 0.5608173608779907, + "learning_rate": 4.838427762253896e-06, + "loss": 0.5795, + "step": 2431 + }, + { + "epoch": 0.35834970530451865, + "grad_norm": 0.6146678328514099, + "learning_rate": 4.838290617923487e-06, + "loss": 0.5939, + "step": 2432 + }, + { + "epoch": 0.35849705304518664, + "grad_norm": 0.5875523090362549, + "learning_rate": 4.838153417358284e-06, + "loss": 0.6086, + "step": 2433 + }, + { + "epoch": 0.35864440078585463, + "grad_norm": 0.5917184948921204, + "learning_rate": 4.838016160561589e-06, + "loss": 0.6088, + "step": 2434 + }, + { + "epoch": 0.35879174852652257, + "grad_norm": 0.614008903503418, + "learning_rate": 4.837878847536702e-06, + "loss": 0.6117, + "step": 2435 + }, + { + "epoch": 0.35893909626719056, + "grad_norm": 0.5769342184066772, + "learning_rate": 4.837741478286925e-06, + "loss": 0.5883, + "step": 2436 + }, + { + "epoch": 0.35908644400785855, + "grad_norm": 0.5579451322555542, + "learning_rate": 4.837604052815562e-06, + "loss": 0.6286, + "step": 2437 + }, + { + "epoch": 0.35923379174852654, + "grad_norm": 0.575441837310791, + "learning_rate": 4.837466571125918e-06, + "loss": 0.5863, + "step": 2438 + }, + { + "epoch": 0.3593811394891945, + "grad_norm": 0.6299701929092407, + "learning_rate": 4.837329033221299e-06, + "loss": 0.6045, + "step": 2439 + }, + { + "epoch": 0.35952848722986247, + "grad_norm": 0.5723358988761902, + "learning_rate": 4.8371914391050135e-06, + "loss": 0.5899, + "step": 2440 + }, + { + "epoch": 0.35967583497053046, + "grad_norm": 0.5600990653038025, + "learning_rate": 4.83705378878037e-06, + "loss": 0.6051, + "step": 2441 + }, + { + "epoch": 0.35982318271119845, + "grad_norm": 0.5679528713226318, + "learning_rate": 4.8369160822506785e-06, + "loss": 0.5829, + "step": 2442 + }, + { + "epoch": 0.3599705304518664, + "grad_norm": 0.5948792099952698, + "learning_rate": 4.836778319519252e-06, + "loss": 0.6039, + "step": 2443 + }, + { + "epoch": 0.3601178781925344, + "grad_norm": 0.569236695766449, + "learning_rate": 4.836640500589402e-06, + "loss": 0.6207, + "step": 2444 + }, + { + "epoch": 0.36026522593320237, + "grad_norm": 0.5514287948608398, + "learning_rate": 4.836502625464445e-06, + "loss": 0.5873, + "step": 2445 + }, + { + "epoch": 0.3604125736738703, + "grad_norm": 0.6121038794517517, + "learning_rate": 4.836364694147695e-06, + "loss": 0.6185, + "step": 2446 + }, + { + "epoch": 0.3605599214145383, + "grad_norm": 0.597268283367157, + "learning_rate": 4.83622670664247e-06, + "loss": 0.6011, + "step": 2447 + }, + { + "epoch": 0.3607072691552063, + "grad_norm": 0.5687645673751831, + "learning_rate": 4.836088662952089e-06, + "loss": 0.5701, + "step": 2448 + }, + { + "epoch": 0.3608546168958743, + "grad_norm": 0.5788796544075012, + "learning_rate": 4.835950563079871e-06, + "loss": 0.5886, + "step": 2449 + }, + { + "epoch": 0.3610019646365422, + "grad_norm": 0.6370461583137512, + "learning_rate": 4.835812407029137e-06, + "loss": 0.6016, + "step": 2450 + }, + { + "epoch": 0.3611493123772102, + "grad_norm": 0.6011497378349304, + "learning_rate": 4.835674194803211e-06, + "loss": 0.6012, + "step": 2451 + }, + { + "epoch": 0.3612966601178782, + "grad_norm": 0.568520188331604, + "learning_rate": 4.835535926405416e-06, + "loss": 0.6097, + "step": 2452 + }, + { + "epoch": 0.3614440078585462, + "grad_norm": 0.598744809627533, + "learning_rate": 4.835397601839077e-06, + "loss": 0.6398, + "step": 2453 + }, + { + "epoch": 0.3615913555992141, + "grad_norm": 0.5753774046897888, + "learning_rate": 4.835259221107521e-06, + "loss": 0.5854, + "step": 2454 + }, + { + "epoch": 0.3617387033398821, + "grad_norm": 0.5646988153457642, + "learning_rate": 4.835120784214077e-06, + "loss": 0.6119, + "step": 2455 + }, + { + "epoch": 0.3618860510805501, + "grad_norm": 0.5886872410774231, + "learning_rate": 4.834982291162073e-06, + "loss": 0.5821, + "step": 2456 + }, + { + "epoch": 0.3620333988212181, + "grad_norm": 0.601863443851471, + "learning_rate": 4.834843741954839e-06, + "loss": 0.5744, + "step": 2457 + }, + { + "epoch": 0.36218074656188604, + "grad_norm": 0.6082666516304016, + "learning_rate": 4.834705136595709e-06, + "loss": 0.5643, + "step": 2458 + }, + { + "epoch": 0.362328094302554, + "grad_norm": 0.5880180597305298, + "learning_rate": 4.834566475088015e-06, + "loss": 0.6298, + "step": 2459 + }, + { + "epoch": 0.362475442043222, + "grad_norm": 0.585227370262146, + "learning_rate": 4.8344277574350925e-06, + "loss": 0.5807, + "step": 2460 + }, + { + "epoch": 0.36262278978388995, + "grad_norm": 0.5879781246185303, + "learning_rate": 4.834288983640278e-06, + "loss": 0.5819, + "step": 2461 + }, + { + "epoch": 0.36277013752455795, + "grad_norm": 0.5997985601425171, + "learning_rate": 4.834150153706908e-06, + "loss": 0.5756, + "step": 2462 + }, + { + "epoch": 0.36291748526522594, + "grad_norm": 0.6146398186683655, + "learning_rate": 4.834011267638321e-06, + "loss": 0.5755, + "step": 2463 + }, + { + "epoch": 0.3630648330058939, + "grad_norm": 0.6029455065727234, + "learning_rate": 4.833872325437858e-06, + "loss": 0.6222, + "step": 2464 + }, + { + "epoch": 0.36321218074656186, + "grad_norm": 0.6117573380470276, + "learning_rate": 4.83373332710886e-06, + "loss": 0.5777, + "step": 2465 + }, + { + "epoch": 0.36335952848722985, + "grad_norm": 0.5892587900161743, + "learning_rate": 4.833594272654671e-06, + "loss": 0.5745, + "step": 2466 + }, + { + "epoch": 0.36350687622789785, + "grad_norm": 0.5759468674659729, + "learning_rate": 4.833455162078634e-06, + "loss": 0.5732, + "step": 2467 + }, + { + "epoch": 0.36365422396856584, + "grad_norm": 0.5604436993598938, + "learning_rate": 4.8333159953840935e-06, + "loss": 0.5833, + "step": 2468 + }, + { + "epoch": 0.3638015717092338, + "grad_norm": 0.5993325114250183, + "learning_rate": 4.833176772574399e-06, + "loss": 0.6077, + "step": 2469 + }, + { + "epoch": 0.36394891944990176, + "grad_norm": 0.5945647954940796, + "learning_rate": 4.833037493652897e-06, + "loss": 0.5914, + "step": 2470 + }, + { + "epoch": 0.36409626719056976, + "grad_norm": 0.6066462993621826, + "learning_rate": 4.832898158622938e-06, + "loss": 0.6028, + "step": 2471 + }, + { + "epoch": 0.36424361493123775, + "grad_norm": 0.5610077381134033, + "learning_rate": 4.832758767487872e-06, + "loss": 0.5684, + "step": 2472 + }, + { + "epoch": 0.3643909626719057, + "grad_norm": 0.6684325933456421, + "learning_rate": 4.832619320251052e-06, + "loss": 0.614, + "step": 2473 + }, + { + "epoch": 0.3645383104125737, + "grad_norm": 0.5966619849205017, + "learning_rate": 4.832479816915831e-06, + "loss": 0.61, + "step": 2474 + }, + { + "epoch": 0.36468565815324167, + "grad_norm": 0.6365052461624146, + "learning_rate": 4.832340257485565e-06, + "loss": 0.5771, + "step": 2475 + }, + { + "epoch": 0.3648330058939096, + "grad_norm": 0.5952895283699036, + "learning_rate": 4.8322006419636094e-06, + "loss": 0.5983, + "step": 2476 + }, + { + "epoch": 0.3649803536345776, + "grad_norm": 0.6065638065338135, + "learning_rate": 4.832060970353322e-06, + "loss": 0.6073, + "step": 2477 + }, + { + "epoch": 0.3651277013752456, + "grad_norm": 0.5874871015548706, + "learning_rate": 4.8319212426580625e-06, + "loss": 0.5606, + "step": 2478 + }, + { + "epoch": 0.3652750491159136, + "grad_norm": 0.5924184918403625, + "learning_rate": 4.831781458881191e-06, + "loss": 0.6107, + "step": 2479 + }, + { + "epoch": 0.3654223968565815, + "grad_norm": 0.5712376236915588, + "learning_rate": 4.831641619026068e-06, + "loss": 0.6187, + "step": 2480 + }, + { + "epoch": 0.3655697445972495, + "grad_norm": 0.5968480110168457, + "learning_rate": 4.8315017230960584e-06, + "loss": 0.6015, + "step": 2481 + }, + { + "epoch": 0.3657170923379175, + "grad_norm": 0.5711371302604675, + "learning_rate": 4.831361771094527e-06, + "loss": 0.6409, + "step": 2482 + }, + { + "epoch": 0.3658644400785855, + "grad_norm": 0.5748427510261536, + "learning_rate": 4.831221763024837e-06, + "loss": 0.5903, + "step": 2483 + }, + { + "epoch": 0.3660117878192534, + "grad_norm": 0.6009981036186218, + "learning_rate": 4.8310816988903575e-06, + "loss": 0.5596, + "step": 2484 + }, + { + "epoch": 0.3661591355599214, + "grad_norm": 0.5922354459762573, + "learning_rate": 4.830941578694456e-06, + "loss": 0.6052, + "step": 2485 + }, + { + "epoch": 0.3663064833005894, + "grad_norm": 0.626163899898529, + "learning_rate": 4.830801402440504e-06, + "loss": 0.6042, + "step": 2486 + }, + { + "epoch": 0.3664538310412574, + "grad_norm": 0.5850743651390076, + "learning_rate": 4.83066117013187e-06, + "loss": 0.5922, + "step": 2487 + }, + { + "epoch": 0.36660117878192533, + "grad_norm": 0.5844767093658447, + "learning_rate": 4.830520881771929e-06, + "loss": 0.6027, + "step": 2488 + }, + { + "epoch": 0.3667485265225933, + "grad_norm": 0.5713557600975037, + "learning_rate": 4.8303805373640535e-06, + "loss": 0.5783, + "step": 2489 + }, + { + "epoch": 0.3668958742632613, + "grad_norm": 0.6020040512084961, + "learning_rate": 4.830240136911619e-06, + "loss": 0.6028, + "step": 2490 + }, + { + "epoch": 0.36704322200392925, + "grad_norm": 0.5776389241218567, + "learning_rate": 4.8300996804180025e-06, + "loss": 0.6493, + "step": 2491 + }, + { + "epoch": 0.36719056974459724, + "grad_norm": 0.6017436385154724, + "learning_rate": 4.829959167886581e-06, + "loss": 0.6087, + "step": 2492 + }, + { + "epoch": 0.36733791748526523, + "grad_norm": 0.6070947051048279, + "learning_rate": 4.829818599320735e-06, + "loss": 0.5853, + "step": 2493 + }, + { + "epoch": 0.3674852652259332, + "grad_norm": 0.5664854049682617, + "learning_rate": 4.829677974723844e-06, + "loss": 0.5877, + "step": 2494 + }, + { + "epoch": 0.36763261296660116, + "grad_norm": 0.584691047668457, + "learning_rate": 4.829537294099291e-06, + "loss": 0.6142, + "step": 2495 + }, + { + "epoch": 0.36777996070726915, + "grad_norm": 0.5668953657150269, + "learning_rate": 4.829396557450458e-06, + "loss": 0.5899, + "step": 2496 + }, + { + "epoch": 0.36792730844793714, + "grad_norm": 0.6177760362625122, + "learning_rate": 4.82925576478073e-06, + "loss": 0.6093, + "step": 2497 + }, + { + "epoch": 0.36807465618860513, + "grad_norm": 0.5968844890594482, + "learning_rate": 4.829114916093495e-06, + "loss": 0.5687, + "step": 2498 + }, + { + "epoch": 0.36822200392927307, + "grad_norm": 0.5888805985450745, + "learning_rate": 4.828974011392137e-06, + "loss": 0.615, + "step": 2499 + }, + { + "epoch": 0.36836935166994106, + "grad_norm": 0.6197344660758972, + "learning_rate": 4.828833050680048e-06, + "loss": 0.6061, + "step": 2500 + }, + { + "epoch": 0.36851669941060905, + "grad_norm": 0.5995117425918579, + "learning_rate": 4.828692033960614e-06, + "loss": 0.5924, + "step": 2501 + }, + { + "epoch": 0.36866404715127704, + "grad_norm": 0.5865400433540344, + "learning_rate": 4.82855096123723e-06, + "loss": 0.6351, + "step": 2502 + }, + { + "epoch": 0.368811394891945, + "grad_norm": 0.5999864935874939, + "learning_rate": 4.828409832513289e-06, + "loss": 0.5965, + "step": 2503 + }, + { + "epoch": 0.36895874263261297, + "grad_norm": 0.6844099164009094, + "learning_rate": 4.828268647792182e-06, + "loss": 0.6074, + "step": 2504 + }, + { + "epoch": 0.36910609037328096, + "grad_norm": 0.6016273498535156, + "learning_rate": 4.8281274070773066e-06, + "loss": 0.5729, + "step": 2505 + }, + { + "epoch": 0.3692534381139489, + "grad_norm": 0.6074497699737549, + "learning_rate": 4.827986110372058e-06, + "loss": 0.5701, + "step": 2506 + }, + { + "epoch": 0.3694007858546169, + "grad_norm": 0.6024684906005859, + "learning_rate": 4.827844757679837e-06, + "loss": 0.5827, + "step": 2507 + }, + { + "epoch": 0.3695481335952849, + "grad_norm": 0.593451738357544, + "learning_rate": 4.82770334900404e-06, + "loss": 0.5954, + "step": 2508 + }, + { + "epoch": 0.36969548133595287, + "grad_norm": 0.585364043712616, + "learning_rate": 4.827561884348071e-06, + "loss": 0.5805, + "step": 2509 + }, + { + "epoch": 0.3698428290766208, + "grad_norm": 0.6053754687309265, + "learning_rate": 4.827420363715329e-06, + "loss": 0.568, + "step": 2510 + }, + { + "epoch": 0.3699901768172888, + "grad_norm": 0.5677345991134644, + "learning_rate": 4.827278787109219e-06, + "loss": 0.5889, + "step": 2511 + }, + { + "epoch": 0.3701375245579568, + "grad_norm": 0.5822675824165344, + "learning_rate": 4.827137154533146e-06, + "loss": 0.5942, + "step": 2512 + }, + { + "epoch": 0.3702848722986248, + "grad_norm": 0.6161859631538391, + "learning_rate": 4.826995465990515e-06, + "loss": 0.6164, + "step": 2513 + }, + { + "epoch": 0.3704322200392927, + "grad_norm": 0.5791798233985901, + "learning_rate": 4.826853721484735e-06, + "loss": 0.5951, + "step": 2514 + }, + { + "epoch": 0.3705795677799607, + "grad_norm": 0.5733460783958435, + "learning_rate": 4.826711921019215e-06, + "loss": 0.6488, + "step": 2515 + }, + { + "epoch": 0.3707269155206287, + "grad_norm": 0.5991607904434204, + "learning_rate": 4.826570064597364e-06, + "loss": 0.6099, + "step": 2516 + }, + { + "epoch": 0.37087426326129663, + "grad_norm": 0.5812264084815979, + "learning_rate": 4.826428152222594e-06, + "loss": 0.5889, + "step": 2517 + }, + { + "epoch": 0.3710216110019646, + "grad_norm": 0.5810298323631287, + "learning_rate": 4.8262861838983185e-06, + "loss": 0.5679, + "step": 2518 + }, + { + "epoch": 0.3711689587426326, + "grad_norm": 0.6107885241508484, + "learning_rate": 4.826144159627951e-06, + "loss": 0.6042, + "step": 2519 + }, + { + "epoch": 0.3713163064833006, + "grad_norm": 0.594275176525116, + "learning_rate": 4.8260020794149086e-06, + "loss": 0.5967, + "step": 2520 + }, + { + "epoch": 0.37146365422396854, + "grad_norm": 0.6046812534332275, + "learning_rate": 4.825859943262606e-06, + "loss": 0.596, + "step": 2521 + }, + { + "epoch": 0.37161100196463653, + "grad_norm": 0.5875598788261414, + "learning_rate": 4.825717751174463e-06, + "loss": 0.5581, + "step": 2522 + }, + { + "epoch": 0.3717583497053045, + "grad_norm": 0.5848010778427124, + "learning_rate": 4.825575503153899e-06, + "loss": 0.5882, + "step": 2523 + }, + { + "epoch": 0.3719056974459725, + "grad_norm": 0.5771281719207764, + "learning_rate": 4.825433199204334e-06, + "loss": 0.6099, + "step": 2524 + }, + { + "epoch": 0.37205304518664045, + "grad_norm": 0.5739482045173645, + "learning_rate": 4.825290839329193e-06, + "loss": 0.5889, + "step": 2525 + }, + { + "epoch": 0.37220039292730844, + "grad_norm": 0.6160637736320496, + "learning_rate": 4.825148423531897e-06, + "loss": 0.5864, + "step": 2526 + }, + { + "epoch": 0.37234774066797643, + "grad_norm": 0.6067585945129395, + "learning_rate": 4.825005951815872e-06, + "loss": 0.5754, + "step": 2527 + }, + { + "epoch": 0.3724950884086444, + "grad_norm": 0.5550565719604492, + "learning_rate": 4.8248634241845435e-06, + "loss": 0.6112, + "step": 2528 + }, + { + "epoch": 0.37264243614931236, + "grad_norm": 0.897000253200531, + "learning_rate": 4.824720840641341e-06, + "loss": 0.5924, + "step": 2529 + }, + { + "epoch": 0.37278978388998035, + "grad_norm": 0.6096635460853577, + "learning_rate": 4.824578201189693e-06, + "loss": 0.5858, + "step": 2530 + }, + { + "epoch": 0.37293713163064834, + "grad_norm": 0.6040086150169373, + "learning_rate": 4.824435505833029e-06, + "loss": 0.5823, + "step": 2531 + }, + { + "epoch": 0.3730844793713163, + "grad_norm": 0.6497594118118286, + "learning_rate": 4.824292754574782e-06, + "loss": 0.6015, + "step": 2532 + }, + { + "epoch": 0.37323182711198427, + "grad_norm": 0.6523922681808472, + "learning_rate": 4.824149947418384e-06, + "loss": 0.5836, + "step": 2533 + }, + { + "epoch": 0.37337917485265226, + "grad_norm": 0.5935865640640259, + "learning_rate": 4.824007084367269e-06, + "loss": 0.6257, + "step": 2534 + }, + { + "epoch": 0.37352652259332025, + "grad_norm": 0.6268407702445984, + "learning_rate": 4.823864165424874e-06, + "loss": 0.5926, + "step": 2535 + }, + { + "epoch": 0.3736738703339882, + "grad_norm": 0.5948095917701721, + "learning_rate": 4.823721190594637e-06, + "loss": 0.5903, + "step": 2536 + }, + { + "epoch": 0.3738212180746562, + "grad_norm": 0.6375263929367065, + "learning_rate": 4.8235781598799935e-06, + "loss": 0.6139, + "step": 2537 + }, + { + "epoch": 0.37396856581532417, + "grad_norm": 0.6083034873008728, + "learning_rate": 4.823435073284386e-06, + "loss": 0.5662, + "step": 2538 + }, + { + "epoch": 0.37411591355599216, + "grad_norm": 0.6455515623092651, + "learning_rate": 4.8232919308112545e-06, + "loss": 0.612, + "step": 2539 + }, + { + "epoch": 0.3742632612966601, + "grad_norm": 0.6021302342414856, + "learning_rate": 4.8231487324640425e-06, + "loss": 0.6178, + "step": 2540 + }, + { + "epoch": 0.3744106090373281, + "grad_norm": 0.5922404527664185, + "learning_rate": 4.823005478246192e-06, + "loss": 0.5582, + "step": 2541 + }, + { + "epoch": 0.3745579567779961, + "grad_norm": 0.582859992980957, + "learning_rate": 4.8228621681611506e-06, + "loss": 0.6119, + "step": 2542 + }, + { + "epoch": 0.3747053045186641, + "grad_norm": 0.6008878350257874, + "learning_rate": 4.822718802212363e-06, + "loss": 0.6042, + "step": 2543 + }, + { + "epoch": 0.374852652259332, + "grad_norm": 0.6010786294937134, + "learning_rate": 4.822575380403277e-06, + "loss": 0.6373, + "step": 2544 + }, + { + "epoch": 0.375, + "grad_norm": 0.6051074266433716, + "learning_rate": 4.822431902737343e-06, + "loss": 0.6351, + "step": 2545 + }, + { + "epoch": 0.375147347740668, + "grad_norm": 0.6307515501976013, + "learning_rate": 4.82228836921801e-06, + "loss": 0.5904, + "step": 2546 + }, + { + "epoch": 0.3752946954813359, + "grad_norm": 0.6125644445419312, + "learning_rate": 4.822144779848733e-06, + "loss": 0.6094, + "step": 2547 + }, + { + "epoch": 0.3754420432220039, + "grad_norm": 0.5764446258544922, + "learning_rate": 4.822001134632961e-06, + "loss": 0.6024, + "step": 2548 + }, + { + "epoch": 0.3755893909626719, + "grad_norm": 0.5481241345405579, + "learning_rate": 4.8218574335741516e-06, + "loss": 0.5975, + "step": 2549 + }, + { + "epoch": 0.3757367387033399, + "grad_norm": 0.5783992409706116, + "learning_rate": 4.821713676675761e-06, + "loss": 0.5996, + "step": 2550 + }, + { + "epoch": 0.37588408644400784, + "grad_norm": 0.6098543405532837, + "learning_rate": 4.821569863941244e-06, + "loss": 0.558, + "step": 2551 + }, + { + "epoch": 0.37603143418467583, + "grad_norm": 0.6243743896484375, + "learning_rate": 4.821425995374061e-06, + "loss": 0.6091, + "step": 2552 + }, + { + "epoch": 0.3761787819253438, + "grad_norm": 0.6190816760063171, + "learning_rate": 4.821282070977673e-06, + "loss": 0.6144, + "step": 2553 + }, + { + "epoch": 0.3763261296660118, + "grad_norm": 0.6146119832992554, + "learning_rate": 4.821138090755538e-06, + "loss": 0.5868, + "step": 2554 + }, + { + "epoch": 0.37647347740667975, + "grad_norm": 0.6018913984298706, + "learning_rate": 4.820994054711121e-06, + "loss": 0.5943, + "step": 2555 + }, + { + "epoch": 0.37662082514734774, + "grad_norm": 0.5812302231788635, + "learning_rate": 4.820849962847887e-06, + "loss": 0.5825, + "step": 2556 + }, + { + "epoch": 0.37676817288801573, + "grad_norm": 0.5691380500793457, + "learning_rate": 4.820705815169299e-06, + "loss": 0.6223, + "step": 2557 + }, + { + "epoch": 0.3769155206286837, + "grad_norm": 0.5692963600158691, + "learning_rate": 4.820561611678825e-06, + "loss": 0.6162, + "step": 2558 + }, + { + "epoch": 0.37706286836935166, + "grad_norm": 0.566031813621521, + "learning_rate": 4.8204173523799314e-06, + "loss": 0.6196, + "step": 2559 + }, + { + "epoch": 0.37721021611001965, + "grad_norm": 0.5850705504417419, + "learning_rate": 4.82027303727609e-06, + "loss": 0.611, + "step": 2560 + }, + { + "epoch": 0.37735756385068764, + "grad_norm": 0.6220420598983765, + "learning_rate": 4.8201286663707694e-06, + "loss": 0.5854, + "step": 2561 + }, + { + "epoch": 0.3775049115913556, + "grad_norm": 0.5983731746673584, + "learning_rate": 4.819984239667443e-06, + "loss": 0.6034, + "step": 2562 + }, + { + "epoch": 0.37765225933202357, + "grad_norm": 0.5995391011238098, + "learning_rate": 4.819839757169585e-06, + "loss": 0.542, + "step": 2563 + }, + { + "epoch": 0.37779960707269156, + "grad_norm": 0.6042183637619019, + "learning_rate": 4.819695218880667e-06, + "loss": 0.6357, + "step": 2564 + }, + { + "epoch": 0.37794695481335955, + "grad_norm": 0.6309070587158203, + "learning_rate": 4.819550624804168e-06, + "loss": 0.5819, + "step": 2565 + }, + { + "epoch": 0.3780943025540275, + "grad_norm": 0.571044921875, + "learning_rate": 4.819405974943564e-06, + "loss": 0.5932, + "step": 2566 + }, + { + "epoch": 0.3782416502946955, + "grad_norm": 0.5762603878974915, + "learning_rate": 4.819261269302335e-06, + "loss": 0.5754, + "step": 2567 + }, + { + "epoch": 0.37838899803536347, + "grad_norm": 0.600263237953186, + "learning_rate": 4.819116507883959e-06, + "loss": 0.5964, + "step": 2568 + }, + { + "epoch": 0.37853634577603146, + "grad_norm": 0.6055952310562134, + "learning_rate": 4.8189716906919196e-06, + "loss": 0.6034, + "step": 2569 + }, + { + "epoch": 0.3786836935166994, + "grad_norm": 0.5835533142089844, + "learning_rate": 4.818826817729698e-06, + "loss": 0.5682, + "step": 2570 + }, + { + "epoch": 0.3788310412573674, + "grad_norm": 0.6097365617752075, + "learning_rate": 4.81868188900078e-06, + "loss": 0.596, + "step": 2571 + }, + { + "epoch": 0.3789783889980354, + "grad_norm": 0.5760030746459961, + "learning_rate": 4.81853690450865e-06, + "loss": 0.6319, + "step": 2572 + }, + { + "epoch": 0.37912573673870337, + "grad_norm": 0.6167215704917908, + "learning_rate": 4.818391864256794e-06, + "loss": 0.5903, + "step": 2573 + }, + { + "epoch": 0.3792730844793713, + "grad_norm": 0.6100381016731262, + "learning_rate": 4.818246768248702e-06, + "loss": 0.5996, + "step": 2574 + }, + { + "epoch": 0.3794204322200393, + "grad_norm": 0.5775372385978699, + "learning_rate": 4.818101616487862e-06, + "loss": 0.5879, + "step": 2575 + }, + { + "epoch": 0.3795677799607073, + "grad_norm": 0.5601808428764343, + "learning_rate": 4.817956408977765e-06, + "loss": 0.6228, + "step": 2576 + }, + { + "epoch": 0.3797151277013752, + "grad_norm": 0.5965535044670105, + "learning_rate": 4.817811145721905e-06, + "loss": 0.6079, + "step": 2577 + }, + { + "epoch": 0.3798624754420432, + "grad_norm": 0.554373562335968, + "learning_rate": 4.817665826723773e-06, + "loss": 0.5468, + "step": 2578 + }, + { + "epoch": 0.3800098231827112, + "grad_norm": 0.5529343485832214, + "learning_rate": 4.817520451986864e-06, + "loss": 0.587, + "step": 2579 + }, + { + "epoch": 0.3801571709233792, + "grad_norm": 0.5755190849304199, + "learning_rate": 4.8173750215146764e-06, + "loss": 0.5998, + "step": 2580 + }, + { + "epoch": 0.38030451866404713, + "grad_norm": 0.6436457633972168, + "learning_rate": 4.817229535310706e-06, + "loss": 0.6211, + "step": 2581 + }, + { + "epoch": 0.3804518664047151, + "grad_norm": 0.5856518745422363, + "learning_rate": 4.817083993378453e-06, + "loss": 0.6228, + "step": 2582 + }, + { + "epoch": 0.3805992141453831, + "grad_norm": 0.6101407408714294, + "learning_rate": 4.8169383957214155e-06, + "loss": 0.5896, + "step": 2583 + }, + { + "epoch": 0.3807465618860511, + "grad_norm": 0.6388084888458252, + "learning_rate": 4.816792742343097e-06, + "loss": 0.5762, + "step": 2584 + }, + { + "epoch": 0.38089390962671904, + "grad_norm": 0.5940234661102295, + "learning_rate": 4.8166470332469995e-06, + "loss": 0.5935, + "step": 2585 + }, + { + "epoch": 0.38104125736738703, + "grad_norm": 0.6170804500579834, + "learning_rate": 4.816501268436627e-06, + "loss": 0.6219, + "step": 2586 + }, + { + "epoch": 0.381188605108055, + "grad_norm": 0.5874404311180115, + "learning_rate": 4.816355447915486e-06, + "loss": 0.6091, + "step": 2587 + }, + { + "epoch": 0.38133595284872296, + "grad_norm": 0.5728103518486023, + "learning_rate": 4.816209571687083e-06, + "loss": 0.5961, + "step": 2588 + }, + { + "epoch": 0.38148330058939095, + "grad_norm": 0.5940219759941101, + "learning_rate": 4.816063639754927e-06, + "loss": 0.594, + "step": 2589 + }, + { + "epoch": 0.38163064833005894, + "grad_norm": 0.6263928413391113, + "learning_rate": 4.815917652122525e-06, + "loss": 0.5646, + "step": 2590 + }, + { + "epoch": 0.38177799607072693, + "grad_norm": 0.6338996291160583, + "learning_rate": 4.81577160879339e-06, + "loss": 0.6277, + "step": 2591 + }, + { + "epoch": 0.38192534381139487, + "grad_norm": 0.6143360733985901, + "learning_rate": 4.815625509771035e-06, + "loss": 0.5694, + "step": 2592 + }, + { + "epoch": 0.38207269155206286, + "grad_norm": 0.6122021675109863, + "learning_rate": 4.815479355058972e-06, + "loss": 0.5937, + "step": 2593 + }, + { + "epoch": 0.38222003929273085, + "grad_norm": 0.5731049180030823, + "learning_rate": 4.8153331446607164e-06, + "loss": 0.6064, + "step": 2594 + }, + { + "epoch": 0.38236738703339884, + "grad_norm": 0.6447180509567261, + "learning_rate": 4.815186878579785e-06, + "loss": 0.5775, + "step": 2595 + }, + { + "epoch": 0.3825147347740668, + "grad_norm": 0.5666604042053223, + "learning_rate": 4.815040556819695e-06, + "loss": 0.5497, + "step": 2596 + }, + { + "epoch": 0.38266208251473477, + "grad_norm": 0.5845326781272888, + "learning_rate": 4.8148941793839656e-06, + "loss": 0.6084, + "step": 2597 + }, + { + "epoch": 0.38280943025540276, + "grad_norm": 0.5862566232681274, + "learning_rate": 4.814747746276116e-06, + "loss": 0.5938, + "step": 2598 + }, + { + "epoch": 0.38295677799607075, + "grad_norm": 0.5842987895011902, + "learning_rate": 4.81460125749967e-06, + "loss": 0.5838, + "step": 2599 + }, + { + "epoch": 0.3831041257367387, + "grad_norm": 0.5857968926429749, + "learning_rate": 4.8144547130581486e-06, + "loss": 0.5371, + "step": 2600 + }, + { + "epoch": 0.3832514734774067, + "grad_norm": 0.5968043208122253, + "learning_rate": 4.8143081129550775e-06, + "loss": 0.6056, + "step": 2601 + }, + { + "epoch": 0.38339882121807467, + "grad_norm": 0.6282446384429932, + "learning_rate": 4.814161457193981e-06, + "loss": 0.6081, + "step": 2602 + }, + { + "epoch": 0.3835461689587426, + "grad_norm": 0.6072935461997986, + "learning_rate": 4.814014745778387e-06, + "loss": 0.6123, + "step": 2603 + }, + { + "epoch": 0.3836935166994106, + "grad_norm": 0.5664817094802856, + "learning_rate": 4.813867978711824e-06, + "loss": 0.5859, + "step": 2604 + }, + { + "epoch": 0.3838408644400786, + "grad_norm": 0.5960747003555298, + "learning_rate": 4.813721155997822e-06, + "loss": 0.5873, + "step": 2605 + }, + { + "epoch": 0.3839882121807466, + "grad_norm": 0.5809637308120728, + "learning_rate": 4.813574277639909e-06, + "loss": 0.5789, + "step": 2606 + }, + { + "epoch": 0.3841355599214145, + "grad_norm": 0.5675134062767029, + "learning_rate": 4.8134273436416225e-06, + "loss": 0.568, + "step": 2607 + }, + { + "epoch": 0.3842829076620825, + "grad_norm": 0.5811915397644043, + "learning_rate": 4.813280354006492e-06, + "loss": 0.5668, + "step": 2608 + }, + { + "epoch": 0.3844302554027505, + "grad_norm": 0.5891931653022766, + "learning_rate": 4.813133308738055e-06, + "loss": 0.5952, + "step": 2609 + }, + { + "epoch": 0.3845776031434185, + "grad_norm": 0.5727624893188477, + "learning_rate": 4.812986207839846e-06, + "loss": 0.5787, + "step": 2610 + }, + { + "epoch": 0.3847249508840864, + "grad_norm": 0.5738645792007446, + "learning_rate": 4.812839051315403e-06, + "loss": 0.5647, + "step": 2611 + }, + { + "epoch": 0.3848722986247544, + "grad_norm": 0.5898451209068298, + "learning_rate": 4.812691839168267e-06, + "loss": 0.619, + "step": 2612 + }, + { + "epoch": 0.3850196463654224, + "grad_norm": 0.5868536829948425, + "learning_rate": 4.8125445714019765e-06, + "loss": 0.6065, + "step": 2613 + }, + { + "epoch": 0.3851669941060904, + "grad_norm": 0.5470441579818726, + "learning_rate": 4.812397248020073e-06, + "loss": 0.5966, + "step": 2614 + }, + { + "epoch": 0.38531434184675833, + "grad_norm": 0.6287844777107239, + "learning_rate": 4.812249869026101e-06, + "loss": 0.6285, + "step": 2615 + }, + { + "epoch": 0.3854616895874263, + "grad_norm": 0.6049784421920776, + "learning_rate": 4.812102434423605e-06, + "loss": 0.5837, + "step": 2616 + }, + { + "epoch": 0.3856090373280943, + "grad_norm": 0.5979255437850952, + "learning_rate": 4.811954944216129e-06, + "loss": 0.6228, + "step": 2617 + }, + { + "epoch": 0.38575638506876225, + "grad_norm": 0.5727920532226562, + "learning_rate": 4.81180739840722e-06, + "loss": 0.628, + "step": 2618 + }, + { + "epoch": 0.38590373280943024, + "grad_norm": 0.569709837436676, + "learning_rate": 4.811659797000428e-06, + "loss": 0.5514, + "step": 2619 + }, + { + "epoch": 0.38605108055009824, + "grad_norm": 0.574992299079895, + "learning_rate": 4.811512139999303e-06, + "loss": 0.6206, + "step": 2620 + }, + { + "epoch": 0.3861984282907662, + "grad_norm": 0.593732476234436, + "learning_rate": 4.811364427407394e-06, + "loss": 0.591, + "step": 2621 + }, + { + "epoch": 0.38634577603143416, + "grad_norm": 0.586320161819458, + "learning_rate": 4.811216659228255e-06, + "loss": 0.5786, + "step": 2622 + }, + { + "epoch": 0.38649312377210215, + "grad_norm": 0.5758473873138428, + "learning_rate": 4.81106883546544e-06, + "loss": 0.6034, + "step": 2623 + }, + { + "epoch": 0.38664047151277015, + "grad_norm": 0.5793882608413696, + "learning_rate": 4.810920956122502e-06, + "loss": 0.6189, + "step": 2624 + }, + { + "epoch": 0.38678781925343814, + "grad_norm": 0.5995904207229614, + "learning_rate": 4.810773021203e-06, + "loss": 0.5845, + "step": 2625 + }, + { + "epoch": 0.3869351669941061, + "grad_norm": 0.5858675837516785, + "learning_rate": 4.810625030710491e-06, + "loss": 0.5933, + "step": 2626 + }, + { + "epoch": 0.38708251473477406, + "grad_norm": 0.6288282871246338, + "learning_rate": 4.810476984648534e-06, + "loss": 0.5904, + "step": 2627 + }, + { + "epoch": 0.38722986247544205, + "grad_norm": 0.5843484401702881, + "learning_rate": 4.810328883020688e-06, + "loss": 0.6299, + "step": 2628 + }, + { + "epoch": 0.38737721021611005, + "grad_norm": 0.5503814220428467, + "learning_rate": 4.810180725830517e-06, + "loss": 0.5366, + "step": 2629 + }, + { + "epoch": 0.387524557956778, + "grad_norm": 0.6345456838607788, + "learning_rate": 4.810032513081581e-06, + "loss": 0.6338, + "step": 2630 + }, + { + "epoch": 0.387671905697446, + "grad_norm": 0.6262436509132385, + "learning_rate": 4.809884244777449e-06, + "loss": 0.6131, + "step": 2631 + }, + { + "epoch": 0.38781925343811396, + "grad_norm": 0.5610175728797913, + "learning_rate": 4.809735920921683e-06, + "loss": 0.5951, + "step": 2632 + }, + { + "epoch": 0.3879666011787819, + "grad_norm": 0.6043254137039185, + "learning_rate": 4.809587541517852e-06, + "loss": 0.5788, + "step": 2633 + }, + { + "epoch": 0.3881139489194499, + "grad_norm": 0.610184371471405, + "learning_rate": 4.809439106569524e-06, + "loss": 0.6151, + "step": 2634 + }, + { + "epoch": 0.3882612966601179, + "grad_norm": 0.62702876329422, + "learning_rate": 4.809290616080268e-06, + "loss": 0.6145, + "step": 2635 + }, + { + "epoch": 0.3884086444007859, + "grad_norm": 0.5774331092834473, + "learning_rate": 4.809142070053656e-06, + "loss": 0.6244, + "step": 2636 + }, + { + "epoch": 0.3885559921414538, + "grad_norm": 0.5773032903671265, + "learning_rate": 4.808993468493261e-06, + "loss": 0.5737, + "step": 2637 + }, + { + "epoch": 0.3887033398821218, + "grad_norm": 0.5793195366859436, + "learning_rate": 4.808844811402655e-06, + "loss": 0.581, + "step": 2638 + }, + { + "epoch": 0.3888506876227898, + "grad_norm": 0.582912266254425, + "learning_rate": 4.808696098785415e-06, + "loss": 0.6338, + "step": 2639 + }, + { + "epoch": 0.3889980353634578, + "grad_norm": 0.5757652521133423, + "learning_rate": 4.8085473306451165e-06, + "loss": 0.5704, + "step": 2640 + }, + { + "epoch": 0.3891453831041257, + "grad_norm": 0.5812224745750427, + "learning_rate": 4.808398506985338e-06, + "loss": 0.5697, + "step": 2641 + }, + { + "epoch": 0.3892927308447937, + "grad_norm": 0.5577124357223511, + "learning_rate": 4.8082496278096565e-06, + "loss": 0.5938, + "step": 2642 + }, + { + "epoch": 0.3894400785854617, + "grad_norm": 0.6050858497619629, + "learning_rate": 4.808100693121656e-06, + "loss": 0.6194, + "step": 2643 + }, + { + "epoch": 0.3895874263261297, + "grad_norm": 0.6324871778488159, + "learning_rate": 4.807951702924915e-06, + "loss": 0.5875, + "step": 2644 + }, + { + "epoch": 0.38973477406679763, + "grad_norm": 0.6703950762748718, + "learning_rate": 4.8078026572230184e-06, + "loss": 0.6034, + "step": 2645 + }, + { + "epoch": 0.3898821218074656, + "grad_norm": 0.5837351679801941, + "learning_rate": 4.807653556019551e-06, + "loss": 0.5593, + "step": 2646 + }, + { + "epoch": 0.3900294695481336, + "grad_norm": 0.5531430244445801, + "learning_rate": 4.807504399318097e-06, + "loss": 0.5423, + "step": 2647 + }, + { + "epoch": 0.39017681728880155, + "grad_norm": 0.5790143013000488, + "learning_rate": 4.807355187122245e-06, + "loss": 0.5643, + "step": 2648 + }, + { + "epoch": 0.39032416502946954, + "grad_norm": 0.5888893008232117, + "learning_rate": 4.807205919435583e-06, + "loss": 0.5774, + "step": 2649 + }, + { + "epoch": 0.39047151277013753, + "grad_norm": 0.6072431802749634, + "learning_rate": 4.8070565962617e-06, + "loss": 0.6126, + "step": 2650 + }, + { + "epoch": 0.3906188605108055, + "grad_norm": 0.634993851184845, + "learning_rate": 4.8069072176041885e-06, + "loss": 0.5625, + "step": 2651 + }, + { + "epoch": 0.39076620825147346, + "grad_norm": 0.6020274758338928, + "learning_rate": 4.80675778346664e-06, + "loss": 0.6201, + "step": 2652 + }, + { + "epoch": 0.39091355599214145, + "grad_norm": 0.6015986800193787, + "learning_rate": 4.80660829385265e-06, + "loss": 0.6387, + "step": 2653 + }, + { + "epoch": 0.39106090373280944, + "grad_norm": 0.5975841879844666, + "learning_rate": 4.806458748765811e-06, + "loss": 0.602, + "step": 2654 + }, + { + "epoch": 0.39120825147347743, + "grad_norm": 0.5831129550933838, + "learning_rate": 4.806309148209722e-06, + "loss": 0.5833, + "step": 2655 + }, + { + "epoch": 0.39135559921414537, + "grad_norm": 0.5824583768844604, + "learning_rate": 4.806159492187979e-06, + "loss": 0.6046, + "step": 2656 + }, + { + "epoch": 0.39150294695481336, + "grad_norm": 0.6140817999839783, + "learning_rate": 4.806009780704181e-06, + "loss": 0.5937, + "step": 2657 + }, + { + "epoch": 0.39165029469548135, + "grad_norm": 0.5500677824020386, + "learning_rate": 4.805860013761931e-06, + "loss": 0.5687, + "step": 2658 + }, + { + "epoch": 0.3917976424361493, + "grad_norm": 0.5642713904380798, + "learning_rate": 4.805710191364829e-06, + "loss": 0.6069, + "step": 2659 + }, + { + "epoch": 0.3919449901768173, + "grad_norm": 0.6112343072891235, + "learning_rate": 4.805560313516478e-06, + "loss": 0.5985, + "step": 2660 + }, + { + "epoch": 0.39209233791748527, + "grad_norm": 0.595539927482605, + "learning_rate": 4.805410380220482e-06, + "loss": 0.6025, + "step": 2661 + }, + { + "epoch": 0.39223968565815326, + "grad_norm": 0.5451828837394714, + "learning_rate": 4.805260391480449e-06, + "loss": 0.5701, + "step": 2662 + }, + { + "epoch": 0.3923870333988212, + "grad_norm": 0.5906186699867249, + "learning_rate": 4.805110347299985e-06, + "loss": 0.5788, + "step": 2663 + }, + { + "epoch": 0.3925343811394892, + "grad_norm": 0.6056923866271973, + "learning_rate": 4.804960247682697e-06, + "loss": 0.6205, + "step": 2664 + }, + { + "epoch": 0.3926817288801572, + "grad_norm": 0.5713236927986145, + "learning_rate": 4.8048100926321965e-06, + "loss": 0.6084, + "step": 2665 + }, + { + "epoch": 0.39282907662082517, + "grad_norm": 0.583087146282196, + "learning_rate": 4.804659882152095e-06, + "loss": 0.5902, + "step": 2666 + }, + { + "epoch": 0.3929764243614931, + "grad_norm": 0.5654310584068298, + "learning_rate": 4.804509616246003e-06, + "loss": 0.5865, + "step": 2667 + }, + { + "epoch": 0.3931237721021611, + "grad_norm": 0.5609861016273499, + "learning_rate": 4.804359294917537e-06, + "loss": 0.6453, + "step": 2668 + }, + { + "epoch": 0.3932711198428291, + "grad_norm": 0.5919530391693115, + "learning_rate": 4.8042089181703096e-06, + "loss": 0.5915, + "step": 2669 + }, + { + "epoch": 0.3934184675834971, + "grad_norm": 0.6005065441131592, + "learning_rate": 4.804058486007939e-06, + "loss": 0.5803, + "step": 2670 + }, + { + "epoch": 0.393565815324165, + "grad_norm": 0.5732362866401672, + "learning_rate": 4.803907998434043e-06, + "loss": 0.5898, + "step": 2671 + }, + { + "epoch": 0.393713163064833, + "grad_norm": 0.5790830850601196, + "learning_rate": 4.803757455452239e-06, + "loss": 0.5987, + "step": 2672 + }, + { + "epoch": 0.393860510805501, + "grad_norm": 0.5742267966270447, + "learning_rate": 4.80360685706615e-06, + "loss": 0.6175, + "step": 2673 + }, + { + "epoch": 0.39400785854616893, + "grad_norm": 0.5779837369918823, + "learning_rate": 4.803456203279396e-06, + "loss": 0.595, + "step": 2674 + }, + { + "epoch": 0.3941552062868369, + "grad_norm": 0.5878490805625916, + "learning_rate": 4.8033054940956004e-06, + "loss": 0.6229, + "step": 2675 + }, + { + "epoch": 0.3943025540275049, + "grad_norm": 0.6023467779159546, + "learning_rate": 4.803154729518388e-06, + "loss": 0.577, + "step": 2676 + }, + { + "epoch": 0.3944499017681729, + "grad_norm": 0.5826079249382019, + "learning_rate": 4.803003909551385e-06, + "loss": 0.5892, + "step": 2677 + }, + { + "epoch": 0.39459724950884084, + "grad_norm": 0.5783757567405701, + "learning_rate": 4.802853034198218e-06, + "loss": 0.5488, + "step": 2678 + }, + { + "epoch": 0.39474459724950883, + "grad_norm": 0.5876146554946899, + "learning_rate": 4.802702103462515e-06, + "loss": 0.6006, + "step": 2679 + }, + { + "epoch": 0.3948919449901768, + "grad_norm": 0.6170414090156555, + "learning_rate": 4.802551117347908e-06, + "loss": 0.6172, + "step": 2680 + }, + { + "epoch": 0.3950392927308448, + "grad_norm": 0.5649641156196594, + "learning_rate": 4.802400075858025e-06, + "loss": 0.6012, + "step": 2681 + }, + { + "epoch": 0.39518664047151275, + "grad_norm": 0.6148719191551208, + "learning_rate": 4.802248978996501e-06, + "loss": 0.6107, + "step": 2682 + }, + { + "epoch": 0.39533398821218074, + "grad_norm": 0.5851680636405945, + "learning_rate": 4.8020978267669685e-06, + "loss": 0.5918, + "step": 2683 + }, + { + "epoch": 0.39548133595284873, + "grad_norm": 0.5937392115592957, + "learning_rate": 4.801946619173064e-06, + "loss": 0.6037, + "step": 2684 + }, + { + "epoch": 0.3956286836935167, + "grad_norm": 0.6091750264167786, + "learning_rate": 4.801795356218422e-06, + "loss": 0.5849, + "step": 2685 + }, + { + "epoch": 0.39577603143418466, + "grad_norm": 0.5948675274848938, + "learning_rate": 4.8016440379066805e-06, + "loss": 0.579, + "step": 2686 + }, + { + "epoch": 0.39592337917485265, + "grad_norm": 0.580098032951355, + "learning_rate": 4.801492664241481e-06, + "loss": 0.6039, + "step": 2687 + }, + { + "epoch": 0.39607072691552064, + "grad_norm": 0.5764427781105042, + "learning_rate": 4.801341235226461e-06, + "loss": 0.6054, + "step": 2688 + }, + { + "epoch": 0.3962180746561886, + "grad_norm": 0.5466452836990356, + "learning_rate": 4.801189750865265e-06, + "loss": 0.5998, + "step": 2689 + }, + { + "epoch": 0.39636542239685657, + "grad_norm": 0.6225869655609131, + "learning_rate": 4.8010382111615335e-06, + "loss": 0.6305, + "step": 2690 + }, + { + "epoch": 0.39651277013752456, + "grad_norm": 0.5742381811141968, + "learning_rate": 4.800886616118914e-06, + "loss": 0.6229, + "step": 2691 + }, + { + "epoch": 0.39666011787819255, + "grad_norm": 0.5912784337997437, + "learning_rate": 4.800734965741049e-06, + "loss": 0.6127, + "step": 2692 + }, + { + "epoch": 0.3968074656188605, + "grad_norm": 0.5603839755058289, + "learning_rate": 4.800583260031588e-06, + "loss": 0.5513, + "step": 2693 + }, + { + "epoch": 0.3969548133595285, + "grad_norm": 0.5926120281219482, + "learning_rate": 4.800431498994178e-06, + "loss": 0.5726, + "step": 2694 + }, + { + "epoch": 0.39710216110019647, + "grad_norm": 0.5782555341720581, + "learning_rate": 4.8002796826324696e-06, + "loss": 0.5914, + "step": 2695 + }, + { + "epoch": 0.39724950884086446, + "grad_norm": 0.5769448280334473, + "learning_rate": 4.800127810950114e-06, + "loss": 0.5945, + "step": 2696 + }, + { + "epoch": 0.3973968565815324, + "grad_norm": 0.5957410335540771, + "learning_rate": 4.799975883950764e-06, + "loss": 0.6103, + "step": 2697 + }, + { + "epoch": 0.3975442043222004, + "grad_norm": 0.6411625146865845, + "learning_rate": 4.799823901638071e-06, + "loss": 0.6009, + "step": 2698 + }, + { + "epoch": 0.3976915520628684, + "grad_norm": 0.5736463665962219, + "learning_rate": 4.799671864015693e-06, + "loss": 0.5869, + "step": 2699 + }, + { + "epoch": 0.39783889980353637, + "grad_norm": 0.5931887626647949, + "learning_rate": 4.799519771087285e-06, + "loss": 0.6113, + "step": 2700 + }, + { + "epoch": 0.3979862475442043, + "grad_norm": 0.6291579604148865, + "learning_rate": 4.799367622856506e-06, + "loss": 0.5699, + "step": 2701 + }, + { + "epoch": 0.3981335952848723, + "grad_norm": 0.5766311883926392, + "learning_rate": 4.799215419327013e-06, + "loss": 0.5571, + "step": 2702 + }, + { + "epoch": 0.3982809430255403, + "grad_norm": 0.6219407916069031, + "learning_rate": 4.799063160502468e-06, + "loss": 0.5848, + "step": 2703 + }, + { + "epoch": 0.3984282907662082, + "grad_norm": 0.5804656744003296, + "learning_rate": 4.798910846386532e-06, + "loss": 0.5781, + "step": 2704 + }, + { + "epoch": 0.3985756385068762, + "grad_norm": 0.6199685335159302, + "learning_rate": 4.7987584769828685e-06, + "loss": 0.5711, + "step": 2705 + }, + { + "epoch": 0.3987229862475442, + "grad_norm": 0.5782421231269836, + "learning_rate": 4.798606052295142e-06, + "loss": 0.6, + "step": 2706 + }, + { + "epoch": 0.3988703339882122, + "grad_norm": 0.5783275961875916, + "learning_rate": 4.798453572327018e-06, + "loss": 0.5638, + "step": 2707 + }, + { + "epoch": 0.39901768172888014, + "grad_norm": 0.5610848665237427, + "learning_rate": 4.798301037082164e-06, + "loss": 0.5642, + "step": 2708 + }, + { + "epoch": 0.3991650294695481, + "grad_norm": 0.6191841959953308, + "learning_rate": 4.798148446564247e-06, + "loss": 0.5693, + "step": 2709 + }, + { + "epoch": 0.3993123772102161, + "grad_norm": 0.5983478426933289, + "learning_rate": 4.797995800776939e-06, + "loss": 0.593, + "step": 2710 + }, + { + "epoch": 0.3994597249508841, + "grad_norm": 0.581866979598999, + "learning_rate": 4.797843099723909e-06, + "loss": 0.6017, + "step": 2711 + }, + { + "epoch": 0.39960707269155205, + "grad_norm": 0.5995098948478699, + "learning_rate": 4.797690343408831e-06, + "loss": 0.5892, + "step": 2712 + }, + { + "epoch": 0.39975442043222004, + "grad_norm": 0.6076908707618713, + "learning_rate": 4.797537531835377e-06, + "loss": 0.5834, + "step": 2713 + }, + { + "epoch": 0.39990176817288803, + "grad_norm": 0.5693674087524414, + "learning_rate": 4.797384665007223e-06, + "loss": 0.5855, + "step": 2714 + }, + { + "epoch": 0.400049115913556, + "grad_norm": 0.5845901370048523, + "learning_rate": 4.7972317429280455e-06, + "loss": 0.555, + "step": 2715 + }, + { + "epoch": 0.40019646365422396, + "grad_norm": 0.591788113117218, + "learning_rate": 4.797078765601523e-06, + "loss": 0.5988, + "step": 2716 + }, + { + "epoch": 0.40034381139489195, + "grad_norm": 0.6067425012588501, + "learning_rate": 4.7969257330313324e-06, + "loss": 0.5755, + "step": 2717 + }, + { + "epoch": 0.40049115913555994, + "grad_norm": 0.6289933323860168, + "learning_rate": 4.796772645221155e-06, + "loss": 0.5779, + "step": 2718 + }, + { + "epoch": 0.4006385068762279, + "grad_norm": 0.6306756138801575, + "learning_rate": 4.796619502174674e-06, + "loss": 0.5903, + "step": 2719 + }, + { + "epoch": 0.40078585461689586, + "grad_norm": 0.5643883943557739, + "learning_rate": 4.7964663038955705e-06, + "loss": 0.5922, + "step": 2720 + }, + { + "epoch": 0.40093320235756386, + "grad_norm": 0.5535202622413635, + "learning_rate": 4.796313050387529e-06, + "loss": 0.6042, + "step": 2721 + }, + { + "epoch": 0.40108055009823185, + "grad_norm": 0.5757703185081482, + "learning_rate": 4.796159741654237e-06, + "loss": 0.5842, + "step": 2722 + }, + { + "epoch": 0.4012278978388998, + "grad_norm": 0.6096137166023254, + "learning_rate": 4.796006377699379e-06, + "loss": 0.5888, + "step": 2723 + }, + { + "epoch": 0.4013752455795678, + "grad_norm": 0.5742996335029602, + "learning_rate": 4.795852958526644e-06, + "loss": 0.6104, + "step": 2724 + }, + { + "epoch": 0.40152259332023577, + "grad_norm": 0.5815472602844238, + "learning_rate": 4.795699484139724e-06, + "loss": 0.6057, + "step": 2725 + }, + { + "epoch": 0.40166994106090376, + "grad_norm": 0.6230833530426025, + "learning_rate": 4.795545954542307e-06, + "loss": 0.6202, + "step": 2726 + }, + { + "epoch": 0.4018172888015717, + "grad_norm": 0.5628763437271118, + "learning_rate": 4.795392369738086e-06, + "loss": 0.5638, + "step": 2727 + }, + { + "epoch": 0.4019646365422397, + "grad_norm": 0.5813564658164978, + "learning_rate": 4.795238729730757e-06, + "loss": 0.549, + "step": 2728 + }, + { + "epoch": 0.4021119842829077, + "grad_norm": 0.5831095576286316, + "learning_rate": 4.795085034524012e-06, + "loss": 0.5918, + "step": 2729 + }, + { + "epoch": 0.4022593320235756, + "grad_norm": 0.6297887563705444, + "learning_rate": 4.7949312841215475e-06, + "loss": 0.5495, + "step": 2730 + }, + { + "epoch": 0.4024066797642436, + "grad_norm": 0.6301655173301697, + "learning_rate": 4.794777478527063e-06, + "loss": 0.5599, + "step": 2731 + }, + { + "epoch": 0.4025540275049116, + "grad_norm": 0.5735733509063721, + "learning_rate": 4.794623617744256e-06, + "loss": 0.5823, + "step": 2732 + }, + { + "epoch": 0.4027013752455796, + "grad_norm": 0.5921182036399841, + "learning_rate": 4.794469701776827e-06, + "loss": 0.5715, + "step": 2733 + }, + { + "epoch": 0.4028487229862475, + "grad_norm": 0.5694584250450134, + "learning_rate": 4.794315730628478e-06, + "loss": 0.5899, + "step": 2734 + }, + { + "epoch": 0.4029960707269155, + "grad_norm": 0.625616192817688, + "learning_rate": 4.794161704302912e-06, + "loss": 0.5757, + "step": 2735 + }, + { + "epoch": 0.4031434184675835, + "grad_norm": 0.5967764258384705, + "learning_rate": 4.7940076228038335e-06, + "loss": 0.6228, + "step": 2736 + }, + { + "epoch": 0.4032907662082515, + "grad_norm": 0.5908174514770508, + "learning_rate": 4.793853486134948e-06, + "loss": 0.5934, + "step": 2737 + }, + { + "epoch": 0.40343811394891943, + "grad_norm": 0.5668950080871582, + "learning_rate": 4.79369929429996e-06, + "loss": 0.5969, + "step": 2738 + }, + { + "epoch": 0.4035854616895874, + "grad_norm": 0.5866678357124329, + "learning_rate": 4.793545047302582e-06, + "loss": 0.5945, + "step": 2739 + }, + { + "epoch": 0.4037328094302554, + "grad_norm": 0.5973380208015442, + "learning_rate": 4.79339074514652e-06, + "loss": 0.5704, + "step": 2740 + }, + { + "epoch": 0.4038801571709234, + "grad_norm": 0.6058279275894165, + "learning_rate": 4.793236387835487e-06, + "loss": 0.6347, + "step": 2741 + }, + { + "epoch": 0.40402750491159134, + "grad_norm": 0.5891084671020508, + "learning_rate": 4.793081975373194e-06, + "loss": 0.6123, + "step": 2742 + }, + { + "epoch": 0.40417485265225933, + "grad_norm": 0.5767449140548706, + "learning_rate": 4.792927507763356e-06, + "loss": 0.605, + "step": 2743 + }, + { + "epoch": 0.4043222003929273, + "grad_norm": 0.5740152597427368, + "learning_rate": 4.792772985009685e-06, + "loss": 0.6047, + "step": 2744 + }, + { + "epoch": 0.40446954813359526, + "grad_norm": 0.5925048589706421, + "learning_rate": 4.7926184071158996e-06, + "loss": 0.5904, + "step": 2745 + }, + { + "epoch": 0.40461689587426325, + "grad_norm": 0.6112015247344971, + "learning_rate": 4.7924637740857176e-06, + "loss": 0.5849, + "step": 2746 + }, + { + "epoch": 0.40476424361493124, + "grad_norm": 0.5908433198928833, + "learning_rate": 4.792309085922857e-06, + "loss": 0.5489, + "step": 2747 + }, + { + "epoch": 0.40491159135559923, + "grad_norm": 0.5716387033462524, + "learning_rate": 4.792154342631038e-06, + "loss": 0.5863, + "step": 2748 + }, + { + "epoch": 0.40505893909626717, + "grad_norm": 0.5723243951797485, + "learning_rate": 4.791999544213982e-06, + "loss": 0.585, + "step": 2749 + }, + { + "epoch": 0.40520628683693516, + "grad_norm": 0.5814381241798401, + "learning_rate": 4.791844690675411e-06, + "loss": 0.6017, + "step": 2750 + }, + { + "epoch": 0.40535363457760315, + "grad_norm": 0.5869202017784119, + "learning_rate": 4.791689782019051e-06, + "loss": 0.5949, + "step": 2751 + }, + { + "epoch": 0.40550098231827114, + "grad_norm": 0.5761842131614685, + "learning_rate": 4.7915348182486275e-06, + "loss": 0.5929, + "step": 2752 + }, + { + "epoch": 0.4056483300589391, + "grad_norm": 0.6370195150375366, + "learning_rate": 4.791379799367866e-06, + "loss": 0.5985, + "step": 2753 + }, + { + "epoch": 0.40579567779960707, + "grad_norm": 0.5786860585212708, + "learning_rate": 4.791224725380494e-06, + "loss": 0.594, + "step": 2754 + }, + { + "epoch": 0.40594302554027506, + "grad_norm": 0.6206732392311096, + "learning_rate": 4.791069596290243e-06, + "loss": 0.5666, + "step": 2755 + }, + { + "epoch": 0.40609037328094305, + "grad_norm": 0.5666832327842712, + "learning_rate": 4.790914412100842e-06, + "loss": 0.5703, + "step": 2756 + }, + { + "epoch": 0.406237721021611, + "grad_norm": 0.563734769821167, + "learning_rate": 4.790759172816024e-06, + "loss": 0.5626, + "step": 2757 + }, + { + "epoch": 0.406385068762279, + "grad_norm": 0.574942409992218, + "learning_rate": 4.790603878439522e-06, + "loss": 0.6097, + "step": 2758 + }, + { + "epoch": 0.40653241650294697, + "grad_norm": 0.6179258227348328, + "learning_rate": 4.790448528975073e-06, + "loss": 0.6118, + "step": 2759 + }, + { + "epoch": 0.4066797642436149, + "grad_norm": 0.5988537073135376, + "learning_rate": 4.79029312442641e-06, + "loss": 0.5697, + "step": 2760 + }, + { + "epoch": 0.4068271119842829, + "grad_norm": 0.6117128729820251, + "learning_rate": 4.790137664797271e-06, + "loss": 0.577, + "step": 2761 + }, + { + "epoch": 0.4069744597249509, + "grad_norm": 0.5823422074317932, + "learning_rate": 4.789982150091395e-06, + "loss": 0.6069, + "step": 2762 + }, + { + "epoch": 0.4071218074656189, + "grad_norm": 0.6025882363319397, + "learning_rate": 4.7898265803125235e-06, + "loss": 0.5996, + "step": 2763 + }, + { + "epoch": 0.4072691552062868, + "grad_norm": 0.5737284421920776, + "learning_rate": 4.789670955464397e-06, + "loss": 0.5885, + "step": 2764 + }, + { + "epoch": 0.4074165029469548, + "grad_norm": 0.5982081890106201, + "learning_rate": 4.789515275550757e-06, + "loss": 0.6175, + "step": 2765 + }, + { + "epoch": 0.4075638506876228, + "grad_norm": 0.5861347913742065, + "learning_rate": 4.7893595405753484e-06, + "loss": 0.6044, + "step": 2766 + }, + { + "epoch": 0.4077111984282908, + "grad_norm": 0.6111582517623901, + "learning_rate": 4.789203750541917e-06, + "loss": 0.5819, + "step": 2767 + }, + { + "epoch": 0.4078585461689587, + "grad_norm": 0.588870108127594, + "learning_rate": 4.789047905454209e-06, + "loss": 0.5981, + "step": 2768 + }, + { + "epoch": 0.4080058939096267, + "grad_norm": 0.6230973601341248, + "learning_rate": 4.788892005315973e-06, + "loss": 0.6163, + "step": 2769 + }, + { + "epoch": 0.4081532416502947, + "grad_norm": 0.5933460593223572, + "learning_rate": 4.788736050130957e-06, + "loss": 0.6027, + "step": 2770 + }, + { + "epoch": 0.4083005893909627, + "grad_norm": 0.5792389512062073, + "learning_rate": 4.788580039902913e-06, + "loss": 0.61, + "step": 2771 + }, + { + "epoch": 0.40844793713163063, + "grad_norm": 0.6084865927696228, + "learning_rate": 4.788423974635592e-06, + "loss": 0.598, + "step": 2772 + }, + { + "epoch": 0.4085952848722986, + "grad_norm": 0.5834788084030151, + "learning_rate": 4.7882678543327485e-06, + "loss": 0.6131, + "step": 2773 + }, + { + "epoch": 0.4087426326129666, + "grad_norm": 0.5600731372833252, + "learning_rate": 4.788111678998136e-06, + "loss": 0.5963, + "step": 2774 + }, + { + "epoch": 0.40888998035363455, + "grad_norm": 0.5749059319496155, + "learning_rate": 4.787955448635511e-06, + "loss": 0.6014, + "step": 2775 + }, + { + "epoch": 0.40903732809430254, + "grad_norm": 0.6421395540237427, + "learning_rate": 4.78779916324863e-06, + "loss": 0.5846, + "step": 2776 + }, + { + "epoch": 0.40918467583497053, + "grad_norm": 0.572503387928009, + "learning_rate": 4.787642822841252e-06, + "loss": 0.5768, + "step": 2777 + }, + { + "epoch": 0.4093320235756385, + "grad_norm": 0.5787804126739502, + "learning_rate": 4.787486427417138e-06, + "loss": 0.5928, + "step": 2778 + }, + { + "epoch": 0.40947937131630646, + "grad_norm": 0.5868889093399048, + "learning_rate": 4.787329976980048e-06, + "loss": 0.5914, + "step": 2779 + }, + { + "epoch": 0.40962671905697445, + "grad_norm": 0.6024705171585083, + "learning_rate": 4.787173471533745e-06, + "loss": 0.6081, + "step": 2780 + }, + { + "epoch": 0.40977406679764244, + "grad_norm": 0.5963186621665955, + "learning_rate": 4.787016911081992e-06, + "loss": 0.5583, + "step": 2781 + }, + { + "epoch": 0.40992141453831044, + "grad_norm": 0.5807563066482544, + "learning_rate": 4.7868602956285556e-06, + "loss": 0.6144, + "step": 2782 + }, + { + "epoch": 0.41006876227897837, + "grad_norm": 0.569587230682373, + "learning_rate": 4.786703625177201e-06, + "loss": 0.5783, + "step": 2783 + }, + { + "epoch": 0.41021611001964636, + "grad_norm": 0.5785647630691528, + "learning_rate": 4.786546899731697e-06, + "loss": 0.6278, + "step": 2784 + }, + { + "epoch": 0.41036345776031435, + "grad_norm": 0.6165842413902283, + "learning_rate": 4.786390119295814e-06, + "loss": 0.6228, + "step": 2785 + }, + { + "epoch": 0.41051080550098235, + "grad_norm": 0.5606870055198669, + "learning_rate": 4.7862332838733195e-06, + "loss": 0.574, + "step": 2786 + }, + { + "epoch": 0.4106581532416503, + "grad_norm": 0.5658661723136902, + "learning_rate": 4.7860763934679874e-06, + "loss": 0.6245, + "step": 2787 + }, + { + "epoch": 0.4108055009823183, + "grad_norm": 0.6214610934257507, + "learning_rate": 4.78591944808359e-06, + "loss": 0.5867, + "step": 2788 + }, + { + "epoch": 0.41095284872298626, + "grad_norm": 0.6038523316383362, + "learning_rate": 4.785762447723902e-06, + "loss": 0.5814, + "step": 2789 + }, + { + "epoch": 0.4111001964636542, + "grad_norm": 0.5983718633651733, + "learning_rate": 4.785605392392699e-06, + "loss": 0.5831, + "step": 2790 + }, + { + "epoch": 0.4112475442043222, + "grad_norm": 0.5981801152229309, + "learning_rate": 4.7854482820937585e-06, + "loss": 0.6098, + "step": 2791 + }, + { + "epoch": 0.4113948919449902, + "grad_norm": 0.5622316598892212, + "learning_rate": 4.785291116830859e-06, + "loss": 0.5937, + "step": 2792 + }, + { + "epoch": 0.4115422396856582, + "grad_norm": 0.5831667184829712, + "learning_rate": 4.7851338966077795e-06, + "loss": 0.5841, + "step": 2793 + }, + { + "epoch": 0.4116895874263261, + "grad_norm": 0.5886744856834412, + "learning_rate": 4.784976621428302e-06, + "loss": 0.6051, + "step": 2794 + }, + { + "epoch": 0.4118369351669941, + "grad_norm": 0.5751788020133972, + "learning_rate": 4.784819291296209e-06, + "loss": 0.5917, + "step": 2795 + }, + { + "epoch": 0.4119842829076621, + "grad_norm": 0.5622609853744507, + "learning_rate": 4.784661906215283e-06, + "loss": 0.5987, + "step": 2796 + }, + { + "epoch": 0.4121316306483301, + "grad_norm": 0.5521116852760315, + "learning_rate": 4.784504466189311e-06, + "loss": 0.601, + "step": 2797 + }, + { + "epoch": 0.412278978388998, + "grad_norm": 0.6472792625427246, + "learning_rate": 4.784346971222075e-06, + "loss": 0.6181, + "step": 2798 + }, + { + "epoch": 0.412426326129666, + "grad_norm": 0.5785955190658569, + "learning_rate": 4.784189421317369e-06, + "loss": 0.5754, + "step": 2799 + }, + { + "epoch": 0.412573673870334, + "grad_norm": 0.5594112873077393, + "learning_rate": 4.7840318164789764e-06, + "loss": 0.5651, + "step": 2800 + }, + { + "epoch": 0.41272102161100194, + "grad_norm": 0.587622344493866, + "learning_rate": 4.783874156710692e-06, + "loss": 0.5745, + "step": 2801 + }, + { + "epoch": 0.41286836935166993, + "grad_norm": 0.5547536015510559, + "learning_rate": 4.783716442016304e-06, + "loss": 0.5939, + "step": 2802 + }, + { + "epoch": 0.4130157170923379, + "grad_norm": 0.5826300382614136, + "learning_rate": 4.783558672399607e-06, + "loss": 0.6157, + "step": 2803 + }, + { + "epoch": 0.4131630648330059, + "grad_norm": 0.5866250395774841, + "learning_rate": 4.783400847864395e-06, + "loss": 0.612, + "step": 2804 + }, + { + "epoch": 0.41331041257367385, + "grad_norm": 0.5815802812576294, + "learning_rate": 4.783242968414464e-06, + "loss": 0.6, + "step": 2805 + }, + { + "epoch": 0.41345776031434184, + "grad_norm": 0.5641602277755737, + "learning_rate": 4.783085034053609e-06, + "loss": 0.5996, + "step": 2806 + }, + { + "epoch": 0.41360510805500983, + "grad_norm": 0.5949298739433289, + "learning_rate": 4.782927044785631e-06, + "loss": 0.6117, + "step": 2807 + }, + { + "epoch": 0.4137524557956778, + "grad_norm": 0.5852234363555908, + "learning_rate": 4.782769000614329e-06, + "loss": 0.5898, + "step": 2808 + }, + { + "epoch": 0.41389980353634576, + "grad_norm": 0.6110051274299622, + "learning_rate": 4.7826109015435015e-06, + "loss": 0.5959, + "step": 2809 + }, + { + "epoch": 0.41404715127701375, + "grad_norm": 0.5868287682533264, + "learning_rate": 4.782452747576954e-06, + "loss": 0.6076, + "step": 2810 + }, + { + "epoch": 0.41419449901768174, + "grad_norm": 0.6127781867980957, + "learning_rate": 4.7822945387184875e-06, + "loss": 0.5638, + "step": 2811 + }, + { + "epoch": 0.41434184675834973, + "grad_norm": 0.582879364490509, + "learning_rate": 4.782136274971908e-06, + "loss": 0.5849, + "step": 2812 + }, + { + "epoch": 0.41448919449901767, + "grad_norm": 0.5968958139419556, + "learning_rate": 4.781977956341022e-06, + "loss": 0.5913, + "step": 2813 + }, + { + "epoch": 0.41463654223968566, + "grad_norm": 0.5520854592323303, + "learning_rate": 4.7818195828296355e-06, + "loss": 0.554, + "step": 2814 + }, + { + "epoch": 0.41478388998035365, + "grad_norm": 0.5795567631721497, + "learning_rate": 4.781661154441559e-06, + "loss": 0.59, + "step": 2815 + }, + { + "epoch": 0.4149312377210216, + "grad_norm": 0.5853440761566162, + "learning_rate": 4.781502671180602e-06, + "loss": 0.579, + "step": 2816 + }, + { + "epoch": 0.4150785854616896, + "grad_norm": 0.5782330632209778, + "learning_rate": 4.781344133050575e-06, + "loss": 0.5416, + "step": 2817 + }, + { + "epoch": 0.41522593320235757, + "grad_norm": 0.5986406803131104, + "learning_rate": 4.781185540055292e-06, + "loss": 0.5905, + "step": 2818 + }, + { + "epoch": 0.41537328094302556, + "grad_norm": 0.5902511477470398, + "learning_rate": 4.781026892198567e-06, + "loss": 0.6108, + "step": 2819 + }, + { + "epoch": 0.4155206286836935, + "grad_norm": 0.6109578609466553, + "learning_rate": 4.780868189484215e-06, + "loss": 0.5275, + "step": 2820 + }, + { + "epoch": 0.4156679764243615, + "grad_norm": 0.57193523645401, + "learning_rate": 4.780709431916053e-06, + "loss": 0.5774, + "step": 2821 + }, + { + "epoch": 0.4158153241650295, + "grad_norm": 0.6215221285820007, + "learning_rate": 4.780550619497899e-06, + "loss": 0.6281, + "step": 2822 + }, + { + "epoch": 0.41596267190569747, + "grad_norm": 0.5920333862304688, + "learning_rate": 4.780391752233572e-06, + "loss": 0.5973, + "step": 2823 + }, + { + "epoch": 0.4161100196463654, + "grad_norm": 0.6160426139831543, + "learning_rate": 4.780232830126893e-06, + "loss": 0.5685, + "step": 2824 + }, + { + "epoch": 0.4162573673870334, + "grad_norm": 0.6034358739852905, + "learning_rate": 4.780073853181683e-06, + "loss": 0.5919, + "step": 2825 + }, + { + "epoch": 0.4164047151277014, + "grad_norm": 0.5934231281280518, + "learning_rate": 4.779914821401767e-06, + "loss": 0.6219, + "step": 2826 + }, + { + "epoch": 0.4165520628683694, + "grad_norm": 0.6062235832214355, + "learning_rate": 4.779755734790969e-06, + "loss": 0.5597, + "step": 2827 + }, + { + "epoch": 0.4166994106090373, + "grad_norm": 0.5749539732933044, + "learning_rate": 4.779596593353115e-06, + "loss": 0.6048, + "step": 2828 + }, + { + "epoch": 0.4168467583497053, + "grad_norm": 0.586951732635498, + "learning_rate": 4.779437397092031e-06, + "loss": 0.6081, + "step": 2829 + }, + { + "epoch": 0.4169941060903733, + "grad_norm": 0.6017529368400574, + "learning_rate": 4.779278146011549e-06, + "loss": 0.5763, + "step": 2830 + }, + { + "epoch": 0.41714145383104123, + "grad_norm": 0.5621742606163025, + "learning_rate": 4.779118840115494e-06, + "loss": 0.5899, + "step": 2831 + }, + { + "epoch": 0.4172888015717092, + "grad_norm": 0.580478310585022, + "learning_rate": 4.778959479407702e-06, + "loss": 0.5996, + "step": 2832 + }, + { + "epoch": 0.4174361493123772, + "grad_norm": 0.6417176127433777, + "learning_rate": 4.778800063892002e-06, + "loss": 0.6217, + "step": 2833 + }, + { + "epoch": 0.4175834970530452, + "grad_norm": 0.6075024008750916, + "learning_rate": 4.7786405935722305e-06, + "loss": 0.587, + "step": 2834 + }, + { + "epoch": 0.41773084479371314, + "grad_norm": 0.6413843035697937, + "learning_rate": 4.7784810684522195e-06, + "loss": 0.625, + "step": 2835 + }, + { + "epoch": 0.41787819253438113, + "grad_norm": 0.575473964214325, + "learning_rate": 4.77832148853581e-06, + "loss": 0.6055, + "step": 2836 + }, + { + "epoch": 0.4180255402750491, + "grad_norm": 0.5980100035667419, + "learning_rate": 4.778161853826835e-06, + "loss": 0.5748, + "step": 2837 + }, + { + "epoch": 0.4181728880157171, + "grad_norm": 0.6277945041656494, + "learning_rate": 4.778002164329137e-06, + "loss": 0.6056, + "step": 2838 + }, + { + "epoch": 0.41832023575638505, + "grad_norm": 0.6107192635536194, + "learning_rate": 4.777842420046556e-06, + "loss": 0.5776, + "step": 2839 + }, + { + "epoch": 0.41846758349705304, + "grad_norm": 0.5897086262702942, + "learning_rate": 4.777682620982933e-06, + "loss": 0.6134, + "step": 2840 + }, + { + "epoch": 0.41861493123772103, + "grad_norm": 0.6400678753852844, + "learning_rate": 4.77752276714211e-06, + "loss": 0.6051, + "step": 2841 + }, + { + "epoch": 0.418762278978389, + "grad_norm": 0.6523728966712952, + "learning_rate": 4.7773628585279335e-06, + "loss": 0.5432, + "step": 2842 + }, + { + "epoch": 0.41890962671905696, + "grad_norm": 0.5809109210968018, + "learning_rate": 4.777202895144249e-06, + "loss": 0.5528, + "step": 2843 + }, + { + "epoch": 0.41905697445972495, + "grad_norm": 0.6095626950263977, + "learning_rate": 4.7770428769949015e-06, + "loss": 0.6105, + "step": 2844 + }, + { + "epoch": 0.41920432220039294, + "grad_norm": 0.579447329044342, + "learning_rate": 4.776882804083743e-06, + "loss": 0.5731, + "step": 2845 + }, + { + "epoch": 0.4193516699410609, + "grad_norm": 0.5881508588790894, + "learning_rate": 4.776722676414619e-06, + "loss": 0.598, + "step": 2846 + }, + { + "epoch": 0.41949901768172887, + "grad_norm": 0.5770289301872253, + "learning_rate": 4.776562493991383e-06, + "loss": 0.5983, + "step": 2847 + }, + { + "epoch": 0.41964636542239686, + "grad_norm": 0.5802206993103027, + "learning_rate": 4.776402256817887e-06, + "loss": 0.5611, + "step": 2848 + }, + { + "epoch": 0.41979371316306485, + "grad_norm": 0.6051363348960876, + "learning_rate": 4.776241964897984e-06, + "loss": 0.5879, + "step": 2849 + }, + { + "epoch": 0.4199410609037328, + "grad_norm": 0.5953899621963501, + "learning_rate": 4.776081618235529e-06, + "loss": 0.5898, + "step": 2850 + }, + { + "epoch": 0.4200884086444008, + "grad_norm": 0.6011185646057129, + "learning_rate": 4.77592121683438e-06, + "loss": 0.5793, + "step": 2851 + }, + { + "epoch": 0.42023575638506877, + "grad_norm": 0.5943323969841003, + "learning_rate": 4.775760760698392e-06, + "loss": 0.5691, + "step": 2852 + }, + { + "epoch": 0.42038310412573676, + "grad_norm": 0.6031147837638855, + "learning_rate": 4.7756002498314255e-06, + "loss": 0.5837, + "step": 2853 + }, + { + "epoch": 0.4205304518664047, + "grad_norm": 0.629374086856842, + "learning_rate": 4.775439684237341e-06, + "loss": 0.6007, + "step": 2854 + }, + { + "epoch": 0.4206777996070727, + "grad_norm": 0.5867839455604553, + "learning_rate": 4.775279063919999e-06, + "loss": 0.6084, + "step": 2855 + }, + { + "epoch": 0.4208251473477407, + "grad_norm": 0.6237460374832153, + "learning_rate": 4.775118388883262e-06, + "loss": 0.5974, + "step": 2856 + }, + { + "epoch": 0.42097249508840867, + "grad_norm": 0.6322688460350037, + "learning_rate": 4.774957659130995e-06, + "loss": 0.5801, + "step": 2857 + }, + { + "epoch": 0.4211198428290766, + "grad_norm": 0.6226965188980103, + "learning_rate": 4.774796874667065e-06, + "loss": 0.6014, + "step": 2858 + }, + { + "epoch": 0.4212671905697446, + "grad_norm": 0.5664898753166199, + "learning_rate": 4.774636035495335e-06, + "loss": 0.574, + "step": 2859 + }, + { + "epoch": 0.4214145383104126, + "grad_norm": 0.5577507615089417, + "learning_rate": 4.774475141619676e-06, + "loss": 0.5634, + "step": 2860 + }, + { + "epoch": 0.4215618860510805, + "grad_norm": 0.6147626042366028, + "learning_rate": 4.774314193043956e-06, + "loss": 0.5784, + "step": 2861 + }, + { + "epoch": 0.4217092337917485, + "grad_norm": 0.8286246657371521, + "learning_rate": 4.774153189772047e-06, + "loss": 0.6101, + "step": 2862 + }, + { + "epoch": 0.4218565815324165, + "grad_norm": 0.6400855779647827, + "learning_rate": 4.773992131807821e-06, + "loss": 0.5954, + "step": 2863 + }, + { + "epoch": 0.4220039292730845, + "grad_norm": 0.5988584756851196, + "learning_rate": 4.773831019155149e-06, + "loss": 0.5778, + "step": 2864 + }, + { + "epoch": 0.42215127701375244, + "grad_norm": 0.6302075386047363, + "learning_rate": 4.773669851817909e-06, + "loss": 0.5729, + "step": 2865 + }, + { + "epoch": 0.4222986247544204, + "grad_norm": 0.5939579010009766, + "learning_rate": 4.773508629799975e-06, + "loss": 0.5708, + "step": 2866 + }, + { + "epoch": 0.4224459724950884, + "grad_norm": 0.5859335660934448, + "learning_rate": 4.773347353105225e-06, + "loss": 0.556, + "step": 2867 + }, + { + "epoch": 0.4225933202357564, + "grad_norm": 0.5986723303794861, + "learning_rate": 4.773186021737538e-06, + "loss": 0.5543, + "step": 2868 + }, + { + "epoch": 0.42274066797642434, + "grad_norm": 0.5884068608283997, + "learning_rate": 4.773024635700792e-06, + "loss": 0.5655, + "step": 2869 + }, + { + "epoch": 0.42288801571709234, + "grad_norm": 0.5778591632843018, + "learning_rate": 4.772863194998871e-06, + "loss": 0.5913, + "step": 2870 + }, + { + "epoch": 0.4230353634577603, + "grad_norm": 0.5554454326629639, + "learning_rate": 4.7727016996356554e-06, + "loss": 0.5594, + "step": 2871 + }, + { + "epoch": 0.4231827111984283, + "grad_norm": 0.5787601470947266, + "learning_rate": 4.77254014961503e-06, + "loss": 0.5833, + "step": 2872 + }, + { + "epoch": 0.42333005893909625, + "grad_norm": 0.5940841436386108, + "learning_rate": 4.77237854494088e-06, + "loss": 0.56, + "step": 2873 + }, + { + "epoch": 0.42347740667976425, + "grad_norm": 0.5598629117012024, + "learning_rate": 4.7722168856170914e-06, + "loss": 0.5959, + "step": 2874 + }, + { + "epoch": 0.42362475442043224, + "grad_norm": 0.6113359332084656, + "learning_rate": 4.772055171647553e-06, + "loss": 0.5547, + "step": 2875 + }, + { + "epoch": 0.4237721021611002, + "grad_norm": 0.6091389656066895, + "learning_rate": 4.771893403036153e-06, + "loss": 0.6292, + "step": 2876 + }, + { + "epoch": 0.42391944990176816, + "grad_norm": 0.5671719908714294, + "learning_rate": 4.7717315797867825e-06, + "loss": 0.5542, + "step": 2877 + }, + { + "epoch": 0.42406679764243616, + "grad_norm": 0.5534621477127075, + "learning_rate": 4.771569701903332e-06, + "loss": 0.5807, + "step": 2878 + }, + { + "epoch": 0.42421414538310415, + "grad_norm": 0.599511444568634, + "learning_rate": 4.771407769389697e-06, + "loss": 0.5703, + "step": 2879 + }, + { + "epoch": 0.4243614931237721, + "grad_norm": 0.58267742395401, + "learning_rate": 4.77124578224977e-06, + "loss": 0.5803, + "step": 2880 + }, + { + "epoch": 0.4245088408644401, + "grad_norm": 0.5763214230537415, + "learning_rate": 4.771083740487448e-06, + "loss": 0.5624, + "step": 2881 + }, + { + "epoch": 0.42465618860510806, + "grad_norm": 0.578852117061615, + "learning_rate": 4.770921644106626e-06, + "loss": 0.581, + "step": 2882 + }, + { + "epoch": 0.42480353634577606, + "grad_norm": 0.6393662095069885, + "learning_rate": 4.770759493111204e-06, + "loss": 0.5766, + "step": 2883 + }, + { + "epoch": 0.424950884086444, + "grad_norm": 0.693715512752533, + "learning_rate": 4.770597287505081e-06, + "loss": 0.5534, + "step": 2884 + }, + { + "epoch": 0.425098231827112, + "grad_norm": 0.6104834079742432, + "learning_rate": 4.770435027292159e-06, + "loss": 0.6153, + "step": 2885 + }, + { + "epoch": 0.42524557956778, + "grad_norm": 0.5863134264945984, + "learning_rate": 4.770272712476339e-06, + "loss": 0.6323, + "step": 2886 + }, + { + "epoch": 0.4253929273084479, + "grad_norm": 0.6314327120780945, + "learning_rate": 4.7701103430615245e-06, + "loss": 0.6226, + "step": 2887 + }, + { + "epoch": 0.4255402750491159, + "grad_norm": 0.5975476503372192, + "learning_rate": 4.769947919051622e-06, + "loss": 0.6162, + "step": 2888 + }, + { + "epoch": 0.4256876227897839, + "grad_norm": 0.6031359434127808, + "learning_rate": 4.769785440450536e-06, + "loss": 0.5901, + "step": 2889 + }, + { + "epoch": 0.4258349705304519, + "grad_norm": 0.5922894477844238, + "learning_rate": 4.769622907262175e-06, + "loss": 0.6031, + "step": 2890 + }, + { + "epoch": 0.4259823182711198, + "grad_norm": 0.58421391248703, + "learning_rate": 4.769460319490448e-06, + "loss": 0.5647, + "step": 2891 + }, + { + "epoch": 0.4261296660117878, + "grad_norm": 0.576166033744812, + "learning_rate": 4.769297677139264e-06, + "loss": 0.5506, + "step": 2892 + }, + { + "epoch": 0.4262770137524558, + "grad_norm": 0.6077555418014526, + "learning_rate": 4.769134980212535e-06, + "loss": 0.5954, + "step": 2893 + }, + { + "epoch": 0.4264243614931238, + "grad_norm": 0.557258129119873, + "learning_rate": 4.768972228714175e-06, + "loss": 0.5699, + "step": 2894 + }, + { + "epoch": 0.42657170923379173, + "grad_norm": 0.5705839991569519, + "learning_rate": 4.768809422648097e-06, + "loss": 0.5726, + "step": 2895 + }, + { + "epoch": 0.4267190569744597, + "grad_norm": 0.5811918377876282, + "learning_rate": 4.768646562018217e-06, + "loss": 0.6173, + "step": 2896 + }, + { + "epoch": 0.4268664047151277, + "grad_norm": 0.5761358141899109, + "learning_rate": 4.768483646828449e-06, + "loss": 0.5575, + "step": 2897 + }, + { + "epoch": 0.4270137524557957, + "grad_norm": 0.5590113401412964, + "learning_rate": 4.768320677082715e-06, + "loss": 0.5416, + "step": 2898 + }, + { + "epoch": 0.42716110019646364, + "grad_norm": 0.6097702980041504, + "learning_rate": 4.768157652784932e-06, + "loss": 0.6072, + "step": 2899 + }, + { + "epoch": 0.42730844793713163, + "grad_norm": 0.6108025908470154, + "learning_rate": 4.767994573939022e-06, + "loss": 0.5925, + "step": 2900 + }, + { + "epoch": 0.4274557956777996, + "grad_norm": 0.6230066418647766, + "learning_rate": 4.767831440548906e-06, + "loss": 0.5952, + "step": 2901 + }, + { + "epoch": 0.42760314341846756, + "grad_norm": 0.5761754512786865, + "learning_rate": 4.767668252618508e-06, + "loss": 0.6236, + "step": 2902 + }, + { + "epoch": 0.42775049115913555, + "grad_norm": 0.5674829483032227, + "learning_rate": 4.767505010151752e-06, + "loss": 0.5585, + "step": 2903 + }, + { + "epoch": 0.42789783889980354, + "grad_norm": 0.601886510848999, + "learning_rate": 4.767341713152563e-06, + "loss": 0.5928, + "step": 2904 + }, + { + "epoch": 0.42804518664047153, + "grad_norm": 0.5666950941085815, + "learning_rate": 4.76717836162487e-06, + "loss": 0.5728, + "step": 2905 + }, + { + "epoch": 0.42819253438113947, + "grad_norm": 0.5860369801521301, + "learning_rate": 4.767014955572601e-06, + "loss": 0.5741, + "step": 2906 + }, + { + "epoch": 0.42833988212180746, + "grad_norm": 0.5777863264083862, + "learning_rate": 4.766851494999685e-06, + "loss": 0.5765, + "step": 2907 + }, + { + "epoch": 0.42848722986247545, + "grad_norm": 0.6229566931724548, + "learning_rate": 4.766687979910053e-06, + "loss": 0.6167, + "step": 2908 + }, + { + "epoch": 0.42863457760314344, + "grad_norm": 0.556179404258728, + "learning_rate": 4.76652441030764e-06, + "loss": 0.5722, + "step": 2909 + }, + { + "epoch": 0.4287819253438114, + "grad_norm": 0.5837134122848511, + "learning_rate": 4.766360786196377e-06, + "loss": 0.606, + "step": 2910 + }, + { + "epoch": 0.42892927308447937, + "grad_norm": 0.5933248400688171, + "learning_rate": 4.766197107580201e-06, + "loss": 0.5634, + "step": 2911 + }, + { + "epoch": 0.42907662082514736, + "grad_norm": 0.5925429463386536, + "learning_rate": 4.766033374463046e-06, + "loss": 0.6246, + "step": 2912 + }, + { + "epoch": 0.42922396856581535, + "grad_norm": 0.5672367811203003, + "learning_rate": 4.765869586848852e-06, + "loss": 0.5976, + "step": 2913 + }, + { + "epoch": 0.4293713163064833, + "grad_norm": 0.5854462385177612, + "learning_rate": 4.7657057447415575e-06, + "loss": 0.5652, + "step": 2914 + }, + { + "epoch": 0.4295186640471513, + "grad_norm": 0.6371724605560303, + "learning_rate": 4.765541848145102e-06, + "loss": 0.5963, + "step": 2915 + }, + { + "epoch": 0.42966601178781927, + "grad_norm": 0.5735093951225281, + "learning_rate": 4.765377897063428e-06, + "loss": 0.5984, + "step": 2916 + }, + { + "epoch": 0.4298133595284872, + "grad_norm": 0.5892980694770813, + "learning_rate": 4.765213891500477e-06, + "loss": 0.5874, + "step": 2917 + }, + { + "epoch": 0.4299607072691552, + "grad_norm": 0.5573527216911316, + "learning_rate": 4.765049831460196e-06, + "loss": 0.5648, + "step": 2918 + }, + { + "epoch": 0.4301080550098232, + "grad_norm": 0.6054264307022095, + "learning_rate": 4.764885716946528e-06, + "loss": 0.5903, + "step": 2919 + }, + { + "epoch": 0.4302554027504912, + "grad_norm": 0.5834176540374756, + "learning_rate": 4.764721547963421e-06, + "loss": 0.5822, + "step": 2920 + }, + { + "epoch": 0.4304027504911591, + "grad_norm": 0.6105104684829712, + "learning_rate": 4.764557324514823e-06, + "loss": 0.61, + "step": 2921 + }, + { + "epoch": 0.4305500982318271, + "grad_norm": 0.5972132682800293, + "learning_rate": 4.764393046604684e-06, + "loss": 0.6144, + "step": 2922 + }, + { + "epoch": 0.4306974459724951, + "grad_norm": 0.5959524512290955, + "learning_rate": 4.764228714236953e-06, + "loss": 0.5588, + "step": 2923 + }, + { + "epoch": 0.4308447937131631, + "grad_norm": 0.729308545589447, + "learning_rate": 4.764064327415584e-06, + "loss": 0.5902, + "step": 2924 + }, + { + "epoch": 0.430992141453831, + "grad_norm": 0.563227117061615, + "learning_rate": 4.763899886144531e-06, + "loss": 0.5784, + "step": 2925 + }, + { + "epoch": 0.431139489194499, + "grad_norm": 0.6087407469749451, + "learning_rate": 4.763735390427746e-06, + "loss": 0.6079, + "step": 2926 + }, + { + "epoch": 0.431286836935167, + "grad_norm": 0.5913374423980713, + "learning_rate": 4.763570840269187e-06, + "loss": 0.576, + "step": 2927 + }, + { + "epoch": 0.431434184675835, + "grad_norm": 0.5982052087783813, + "learning_rate": 4.763406235672812e-06, + "loss": 0.6143, + "step": 2928 + }, + { + "epoch": 0.43158153241650293, + "grad_norm": 0.6036952137947083, + "learning_rate": 4.7632415766425774e-06, + "loss": 0.595, + "step": 2929 + }, + { + "epoch": 0.4317288801571709, + "grad_norm": 0.5677163600921631, + "learning_rate": 4.763076863182445e-06, + "loss": 0.5781, + "step": 2930 + }, + { + "epoch": 0.4318762278978389, + "grad_norm": 0.6429976224899292, + "learning_rate": 4.762912095296374e-06, + "loss": 0.6152, + "step": 2931 + }, + { + "epoch": 0.43202357563850685, + "grad_norm": 0.5916604995727539, + "learning_rate": 4.7627472729883304e-06, + "loss": 0.6002, + "step": 2932 + }, + { + "epoch": 0.43217092337917484, + "grad_norm": 0.607524573802948, + "learning_rate": 4.762582396262276e-06, + "loss": 0.5846, + "step": 2933 + }, + { + "epoch": 0.43231827111984283, + "grad_norm": 0.6022653579711914, + "learning_rate": 4.762417465122176e-06, + "loss": 0.5592, + "step": 2934 + }, + { + "epoch": 0.4324656188605108, + "grad_norm": 0.6323211789131165, + "learning_rate": 4.7622524795719965e-06, + "loss": 0.5827, + "step": 2935 + }, + { + "epoch": 0.43261296660117876, + "grad_norm": 0.5754425525665283, + "learning_rate": 4.762087439615706e-06, + "loss": 0.5483, + "step": 2936 + }, + { + "epoch": 0.43276031434184675, + "grad_norm": 0.561715304851532, + "learning_rate": 4.761922345257274e-06, + "loss": 0.5676, + "step": 2937 + }, + { + "epoch": 0.43290766208251474, + "grad_norm": 0.5690575838088989, + "learning_rate": 4.76175719650067e-06, + "loss": 0.5993, + "step": 2938 + }, + { + "epoch": 0.43305500982318273, + "grad_norm": 0.5613047480583191, + "learning_rate": 4.7615919933498665e-06, + "loss": 0.622, + "step": 2939 + }, + { + "epoch": 0.43320235756385067, + "grad_norm": 0.6090343594551086, + "learning_rate": 4.761426735808838e-06, + "loss": 0.6, + "step": 2940 + }, + { + "epoch": 0.43334970530451866, + "grad_norm": 0.5803486108779907, + "learning_rate": 4.761261423881555e-06, + "loss": 0.5971, + "step": 2941 + }, + { + "epoch": 0.43349705304518665, + "grad_norm": 0.5518953204154968, + "learning_rate": 4.761096057571996e-06, + "loss": 0.5622, + "step": 2942 + }, + { + "epoch": 0.43364440078585464, + "grad_norm": 0.572534441947937, + "learning_rate": 4.7609306368841366e-06, + "loss": 0.5687, + "step": 2943 + }, + { + "epoch": 0.4337917485265226, + "grad_norm": 0.5811883807182312, + "learning_rate": 4.760765161821956e-06, + "loss": 0.5796, + "step": 2944 + }, + { + "epoch": 0.43393909626719057, + "grad_norm": 0.575440526008606, + "learning_rate": 4.7605996323894346e-06, + "loss": 0.5906, + "step": 2945 + }, + { + "epoch": 0.43408644400785856, + "grad_norm": 0.6019396185874939, + "learning_rate": 4.760434048590552e-06, + "loss": 0.6121, + "step": 2946 + }, + { + "epoch": 0.4342337917485265, + "grad_norm": 0.5624772310256958, + "learning_rate": 4.760268410429291e-06, + "loss": 0.6004, + "step": 2947 + }, + { + "epoch": 0.4343811394891945, + "grad_norm": 0.6148430109024048, + "learning_rate": 4.760102717909634e-06, + "loss": 0.6051, + "step": 2948 + }, + { + "epoch": 0.4345284872298625, + "grad_norm": 0.6298471689224243, + "learning_rate": 4.759936971035567e-06, + "loss": 0.6194, + "step": 2949 + }, + { + "epoch": 0.4346758349705305, + "grad_norm": 0.5816379189491272, + "learning_rate": 4.759771169811076e-06, + "loss": 0.5532, + "step": 2950 + }, + { + "epoch": 0.4348231827111984, + "grad_norm": 0.5608161091804504, + "learning_rate": 4.759605314240148e-06, + "loss": 0.5889, + "step": 2951 + }, + { + "epoch": 0.4349705304518664, + "grad_norm": 0.5858519077301025, + "learning_rate": 4.7594394043267725e-06, + "loss": 0.5704, + "step": 2952 + }, + { + "epoch": 0.4351178781925344, + "grad_norm": 0.595597505569458, + "learning_rate": 4.759273440074938e-06, + "loss": 0.581, + "step": 2953 + }, + { + "epoch": 0.4352652259332024, + "grad_norm": 0.5774251222610474, + "learning_rate": 4.759107421488638e-06, + "loss": 0.6012, + "step": 2954 + }, + { + "epoch": 0.4354125736738703, + "grad_norm": 0.6014441251754761, + "learning_rate": 4.758941348571864e-06, + "loss": 0.5801, + "step": 2955 + }, + { + "epoch": 0.4355599214145383, + "grad_norm": 0.5937467813491821, + "learning_rate": 4.75877522132861e-06, + "loss": 0.5991, + "step": 2956 + }, + { + "epoch": 0.4357072691552063, + "grad_norm": 0.5821740031242371, + "learning_rate": 4.758609039762872e-06, + "loss": 0.6001, + "step": 2957 + }, + { + "epoch": 0.43585461689587424, + "grad_norm": 0.5880241394042969, + "learning_rate": 4.758442803878645e-06, + "loss": 0.6062, + "step": 2958 + }, + { + "epoch": 0.4360019646365422, + "grad_norm": 0.5793176293373108, + "learning_rate": 4.7582765136799265e-06, + "loss": 0.6073, + "step": 2959 + }, + { + "epoch": 0.4361493123772102, + "grad_norm": 0.6027413606643677, + "learning_rate": 4.75811016917072e-06, + "loss": 0.6124, + "step": 2960 + }, + { + "epoch": 0.4362966601178782, + "grad_norm": 0.6138728260993958, + "learning_rate": 4.757943770355021e-06, + "loss": 0.6119, + "step": 2961 + }, + { + "epoch": 0.43644400785854615, + "grad_norm": 0.5851042866706848, + "learning_rate": 4.757777317236833e-06, + "loss": 0.586, + "step": 2962 + }, + { + "epoch": 0.43659135559921414, + "grad_norm": 0.555496096611023, + "learning_rate": 4.757610809820161e-06, + "loss": 0.5738, + "step": 2963 + }, + { + "epoch": 0.43673870333988213, + "grad_norm": 0.5829578638076782, + "learning_rate": 4.757444248109008e-06, + "loss": 0.558, + "step": 2964 + }, + { + "epoch": 0.4368860510805501, + "grad_norm": 0.5958649516105652, + "learning_rate": 4.757277632107379e-06, + "loss": 0.5543, + "step": 2965 + }, + { + "epoch": 0.43703339882121806, + "grad_norm": 0.5730531215667725, + "learning_rate": 4.757110961819282e-06, + "loss": 0.5889, + "step": 2966 + }, + { + "epoch": 0.43718074656188605, + "grad_norm": 0.5700915455818176, + "learning_rate": 4.756944237248724e-06, + "loss": 0.5892, + "step": 2967 + }, + { + "epoch": 0.43732809430255404, + "grad_norm": 0.578557550907135, + "learning_rate": 4.756777458399716e-06, + "loss": 0.606, + "step": 2968 + }, + { + "epoch": 0.43747544204322203, + "grad_norm": 0.5677943825721741, + "learning_rate": 4.756610625276269e-06, + "loss": 0.5864, + "step": 2969 + }, + { + "epoch": 0.43762278978388996, + "grad_norm": 0.5717220306396484, + "learning_rate": 4.756443737882395e-06, + "loss": 0.6119, + "step": 2970 + }, + { + "epoch": 0.43777013752455796, + "grad_norm": 0.7367172837257385, + "learning_rate": 4.756276796222108e-06, + "loss": 0.5788, + "step": 2971 + }, + { + "epoch": 0.43791748526522595, + "grad_norm": 0.6061476469039917, + "learning_rate": 4.756109800299421e-06, + "loss": 0.5912, + "step": 2972 + }, + { + "epoch": 0.4380648330058939, + "grad_norm": 0.5928801894187927, + "learning_rate": 4.755942750118352e-06, + "loss": 0.5853, + "step": 2973 + }, + { + "epoch": 0.4382121807465619, + "grad_norm": 0.5831226110458374, + "learning_rate": 4.7557756456829185e-06, + "loss": 0.5884, + "step": 2974 + }, + { + "epoch": 0.43835952848722987, + "grad_norm": 0.6182790994644165, + "learning_rate": 4.755608486997138e-06, + "loss": 0.5556, + "step": 2975 + }, + { + "epoch": 0.43850687622789786, + "grad_norm": 0.5792841911315918, + "learning_rate": 4.755441274065032e-06, + "loss": 0.554, + "step": 2976 + }, + { + "epoch": 0.4386542239685658, + "grad_norm": 0.5931054949760437, + "learning_rate": 4.75527400689062e-06, + "loss": 0.6071, + "step": 2977 + }, + { + "epoch": 0.4388015717092338, + "grad_norm": 0.6927461624145508, + "learning_rate": 4.755106685477927e-06, + "loss": 0.605, + "step": 2978 + }, + { + "epoch": 0.4389489194499018, + "grad_norm": 0.5567231178283691, + "learning_rate": 4.754939309830976e-06, + "loss": 0.5491, + "step": 2979 + }, + { + "epoch": 0.43909626719056977, + "grad_norm": 0.5809940099716187, + "learning_rate": 4.754771879953792e-06, + "loss": 0.6147, + "step": 2980 + }, + { + "epoch": 0.4392436149312377, + "grad_norm": 0.606429934501648, + "learning_rate": 4.754604395850402e-06, + "loss": 0.5912, + "step": 2981 + }, + { + "epoch": 0.4393909626719057, + "grad_norm": 0.5998333692550659, + "learning_rate": 4.754436857524833e-06, + "loss": 0.5917, + "step": 2982 + }, + { + "epoch": 0.4395383104125737, + "grad_norm": 0.6020911335945129, + "learning_rate": 4.7542692649811156e-06, + "loss": 0.6222, + "step": 2983 + }, + { + "epoch": 0.4396856581532417, + "grad_norm": 0.5740510821342468, + "learning_rate": 4.754101618223279e-06, + "loss": 0.584, + "step": 2984 + }, + { + "epoch": 0.4398330058939096, + "grad_norm": 0.6199593544006348, + "learning_rate": 4.753933917255356e-06, + "loss": 0.543, + "step": 2985 + }, + { + "epoch": 0.4399803536345776, + "grad_norm": 0.5790248513221741, + "learning_rate": 4.75376616208138e-06, + "loss": 0.5728, + "step": 2986 + }, + { + "epoch": 0.4401277013752456, + "grad_norm": 0.613155722618103, + "learning_rate": 4.753598352705385e-06, + "loss": 0.5724, + "step": 2987 + }, + { + "epoch": 0.44027504911591353, + "grad_norm": 0.5851902365684509, + "learning_rate": 4.753430489131406e-06, + "loss": 0.6012, + "step": 2988 + }, + { + "epoch": 0.4404223968565815, + "grad_norm": 0.6005927324295044, + "learning_rate": 4.753262571363481e-06, + "loss": 0.6123, + "step": 2989 + }, + { + "epoch": 0.4405697445972495, + "grad_norm": 0.6009159088134766, + "learning_rate": 4.753094599405648e-06, + "loss": 0.6205, + "step": 2990 + }, + { + "epoch": 0.4407170923379175, + "grad_norm": 0.6161020994186401, + "learning_rate": 4.752926573261947e-06, + "loss": 0.5759, + "step": 2991 + }, + { + "epoch": 0.44086444007858544, + "grad_norm": 0.5673810839653015, + "learning_rate": 4.752758492936418e-06, + "loss": 0.6154, + "step": 2992 + }, + { + "epoch": 0.44101178781925343, + "grad_norm": 0.6052945852279663, + "learning_rate": 4.752590358433103e-06, + "loss": 0.631, + "step": 2993 + }, + { + "epoch": 0.4411591355599214, + "grad_norm": 0.583228349685669, + "learning_rate": 4.752422169756048e-06, + "loss": 0.6202, + "step": 2994 + }, + { + "epoch": 0.4413064833005894, + "grad_norm": 0.5914695858955383, + "learning_rate": 4.752253926909296e-06, + "loss": 0.5866, + "step": 2995 + }, + { + "epoch": 0.44145383104125735, + "grad_norm": 0.5652154088020325, + "learning_rate": 4.752085629896893e-06, + "loss": 0.5949, + "step": 2996 + }, + { + "epoch": 0.44160117878192534, + "grad_norm": 0.5864854454994202, + "learning_rate": 4.751917278722887e-06, + "loss": 0.582, + "step": 2997 + }, + { + "epoch": 0.44174852652259333, + "grad_norm": 0.5854319930076599, + "learning_rate": 4.751748873391326e-06, + "loss": 0.5723, + "step": 2998 + }, + { + "epoch": 0.4418958742632613, + "grad_norm": 0.5658491849899292, + "learning_rate": 4.751580413906262e-06, + "loss": 0.6022, + "step": 2999 + }, + { + "epoch": 0.44204322200392926, + "grad_norm": 0.5910298228263855, + "learning_rate": 4.751411900271745e-06, + "loss": 0.5645, + "step": 3000 + }, + { + "epoch": 0.44219056974459725, + "grad_norm": 0.6076290011405945, + "learning_rate": 4.751243332491827e-06, + "loss": 0.5646, + "step": 3001 + }, + { + "epoch": 0.44233791748526524, + "grad_norm": 0.5832303762435913, + "learning_rate": 4.751074710570563e-06, + "loss": 0.6043, + "step": 3002 + }, + { + "epoch": 0.4424852652259332, + "grad_norm": 0.5752106308937073, + "learning_rate": 4.750906034512008e-06, + "loss": 0.6061, + "step": 3003 + }, + { + "epoch": 0.44263261296660117, + "grad_norm": 0.586273729801178, + "learning_rate": 4.750737304320219e-06, + "loss": 0.5929, + "step": 3004 + }, + { + "epoch": 0.44277996070726916, + "grad_norm": 0.6124250888824463, + "learning_rate": 4.750568519999252e-06, + "loss": 0.5468, + "step": 3005 + }, + { + "epoch": 0.44292730844793715, + "grad_norm": 0.5712791085243225, + "learning_rate": 4.7503996815531696e-06, + "loss": 0.5728, + "step": 3006 + }, + { + "epoch": 0.4430746561886051, + "grad_norm": 0.5655198097229004, + "learning_rate": 4.7502307889860295e-06, + "loss": 0.5943, + "step": 3007 + }, + { + "epoch": 0.4432220039292731, + "grad_norm": 0.5897172689437866, + "learning_rate": 4.750061842301894e-06, + "loss": 0.5937, + "step": 3008 + }, + { + "epoch": 0.44336935166994107, + "grad_norm": 0.5962372422218323, + "learning_rate": 4.749892841504827e-06, + "loss": 0.6168, + "step": 3009 + }, + { + "epoch": 0.44351669941060906, + "grad_norm": 0.5839712619781494, + "learning_rate": 4.749723786598893e-06, + "loss": 0.592, + "step": 3010 + }, + { + "epoch": 0.443664047151277, + "grad_norm": 0.6286908984184265, + "learning_rate": 4.749554677588156e-06, + "loss": 0.599, + "step": 3011 + }, + { + "epoch": 0.443811394891945, + "grad_norm": 0.5915190577507019, + "learning_rate": 4.749385514476685e-06, + "loss": 0.6005, + "step": 3012 + }, + { + "epoch": 0.443958742632613, + "grad_norm": 0.5811138153076172, + "learning_rate": 4.749216297268547e-06, + "loss": 0.5742, + "step": 3013 + }, + { + "epoch": 0.44410609037328097, + "grad_norm": 0.6228284239768982, + "learning_rate": 4.749047025967812e-06, + "loss": 0.5995, + "step": 3014 + }, + { + "epoch": 0.4442534381139489, + "grad_norm": 0.6092685461044312, + "learning_rate": 4.74887770057855e-06, + "loss": 0.619, + "step": 3015 + }, + { + "epoch": 0.4444007858546169, + "grad_norm": 0.5902023315429688, + "learning_rate": 4.7487083211048355e-06, + "loss": 0.5731, + "step": 3016 + }, + { + "epoch": 0.4445481335952849, + "grad_norm": 0.5680004358291626, + "learning_rate": 4.74853888755074e-06, + "loss": 0.574, + "step": 3017 + }, + { + "epoch": 0.4446954813359528, + "grad_norm": 0.6250379085540771, + "learning_rate": 4.748369399920339e-06, + "loss": 0.5956, + "step": 3018 + }, + { + "epoch": 0.4448428290766208, + "grad_norm": 0.6019533276557922, + "learning_rate": 4.7481998582177085e-06, + "loss": 0.6016, + "step": 3019 + }, + { + "epoch": 0.4449901768172888, + "grad_norm": 0.5732173323631287, + "learning_rate": 4.748030262446925e-06, + "loss": 0.5734, + "step": 3020 + }, + { + "epoch": 0.4451375245579568, + "grad_norm": 0.5791738629341125, + "learning_rate": 4.74786061261207e-06, + "loss": 0.5699, + "step": 3021 + }, + { + "epoch": 0.44528487229862473, + "grad_norm": 0.641268789768219, + "learning_rate": 4.74769090871722e-06, + "loss": 0.6139, + "step": 3022 + }, + { + "epoch": 0.4454322200392927, + "grad_norm": 0.5864115953445435, + "learning_rate": 4.747521150766459e-06, + "loss": 0.6232, + "step": 3023 + }, + { + "epoch": 0.4455795677799607, + "grad_norm": 0.5858447551727295, + "learning_rate": 4.747351338763867e-06, + "loss": 0.5816, + "step": 3024 + }, + { + "epoch": 0.4457269155206287, + "grad_norm": 0.5610215067863464, + "learning_rate": 4.74718147271353e-06, + "loss": 0.5887, + "step": 3025 + }, + { + "epoch": 0.44587426326129664, + "grad_norm": 0.5807800889015198, + "learning_rate": 4.7470115526195335e-06, + "loss": 0.5984, + "step": 3026 + }, + { + "epoch": 0.44602161100196464, + "grad_norm": 0.5804879665374756, + "learning_rate": 4.746841578485962e-06, + "loss": 0.5879, + "step": 3027 + }, + { + "epoch": 0.4461689587426326, + "grad_norm": 0.5791323781013489, + "learning_rate": 4.746671550316905e-06, + "loss": 0.5696, + "step": 3028 + }, + { + "epoch": 0.44631630648330056, + "grad_norm": 0.5844980478286743, + "learning_rate": 4.74650146811645e-06, + "loss": 0.5966, + "step": 3029 + }, + { + "epoch": 0.44646365422396855, + "grad_norm": 0.596921980381012, + "learning_rate": 4.746331331888689e-06, + "loss": 0.5998, + "step": 3030 + }, + { + "epoch": 0.44661100196463654, + "grad_norm": 0.6066097021102905, + "learning_rate": 4.746161141637713e-06, + "loss": 0.5992, + "step": 3031 + }, + { + "epoch": 0.44675834970530454, + "grad_norm": 0.5887219905853271, + "learning_rate": 4.745990897367616e-06, + "loss": 0.6424, + "step": 3032 + }, + { + "epoch": 0.44690569744597247, + "grad_norm": 0.5478706359863281, + "learning_rate": 4.74582059908249e-06, + "loss": 0.6205, + "step": 3033 + }, + { + "epoch": 0.44705304518664046, + "grad_norm": 0.6334892511367798, + "learning_rate": 4.745650246786433e-06, + "loss": 0.5904, + "step": 3034 + }, + { + "epoch": 0.44720039292730845, + "grad_norm": 0.6148732304573059, + "learning_rate": 4.74547984048354e-06, + "loss": 0.5752, + "step": 3035 + }, + { + "epoch": 0.44734774066797645, + "grad_norm": 0.5771917104721069, + "learning_rate": 4.745309380177911e-06, + "loss": 0.5768, + "step": 3036 + }, + { + "epoch": 0.4474950884086444, + "grad_norm": 0.5726704597473145, + "learning_rate": 4.745138865873643e-06, + "loss": 0.5492, + "step": 3037 + }, + { + "epoch": 0.4476424361493124, + "grad_norm": 0.5982184410095215, + "learning_rate": 4.744968297574839e-06, + "loss": 0.5832, + "step": 3038 + }, + { + "epoch": 0.44778978388998036, + "grad_norm": 0.5854517221450806, + "learning_rate": 4.744797675285601e-06, + "loss": 0.5743, + "step": 3039 + }, + { + "epoch": 0.44793713163064836, + "grad_norm": 0.577803909778595, + "learning_rate": 4.744626999010031e-06, + "loss": 0.5702, + "step": 3040 + }, + { + "epoch": 0.4480844793713163, + "grad_norm": 0.5752996206283569, + "learning_rate": 4.744456268752235e-06, + "loss": 0.5582, + "step": 3041 + }, + { + "epoch": 0.4482318271119843, + "grad_norm": 0.5965520739555359, + "learning_rate": 4.744285484516318e-06, + "loss": 0.5959, + "step": 3042 + }, + { + "epoch": 0.4483791748526523, + "grad_norm": 0.5951970219612122, + "learning_rate": 4.744114646306388e-06, + "loss": 0.601, + "step": 3043 + }, + { + "epoch": 0.4485265225933202, + "grad_norm": 0.5563391447067261, + "learning_rate": 4.743943754126553e-06, + "loss": 0.5706, + "step": 3044 + }, + { + "epoch": 0.4486738703339882, + "grad_norm": 0.5938650369644165, + "learning_rate": 4.743772807980924e-06, + "loss": 0.5995, + "step": 3045 + }, + { + "epoch": 0.4488212180746562, + "grad_norm": 0.6168292760848999, + "learning_rate": 4.74360180787361e-06, + "loss": 0.6119, + "step": 3046 + }, + { + "epoch": 0.4489685658153242, + "grad_norm": 0.6159502267837524, + "learning_rate": 4.743430753808726e-06, + "loss": 0.576, + "step": 3047 + }, + { + "epoch": 0.4491159135559921, + "grad_norm": 0.5789459943771362, + "learning_rate": 4.743259645790384e-06, + "loss": 0.5954, + "step": 3048 + }, + { + "epoch": 0.4492632612966601, + "grad_norm": 0.6099579334259033, + "learning_rate": 4.7430884838227004e-06, + "loss": 0.5709, + "step": 3049 + }, + { + "epoch": 0.4494106090373281, + "grad_norm": 0.6141890287399292, + "learning_rate": 4.7429172679097905e-06, + "loss": 0.606, + "step": 3050 + }, + { + "epoch": 0.4495579567779961, + "grad_norm": 0.5944125056266785, + "learning_rate": 4.742745998055773e-06, + "loss": 0.6122, + "step": 3051 + }, + { + "epoch": 0.44970530451866403, + "grad_norm": 0.6025492548942566, + "learning_rate": 4.742574674264764e-06, + "loss": 0.6242, + "step": 3052 + }, + { + "epoch": 0.449852652259332, + "grad_norm": 0.5808653235435486, + "learning_rate": 4.742403296540887e-06, + "loss": 0.5927, + "step": 3053 + }, + { + "epoch": 0.45, + "grad_norm": 0.5794634222984314, + "learning_rate": 4.742231864888264e-06, + "loss": 0.6261, + "step": 3054 + }, + { + "epoch": 0.450147347740668, + "grad_norm": 0.6109106540679932, + "learning_rate": 4.742060379311015e-06, + "loss": 0.5756, + "step": 3055 + }, + { + "epoch": 0.45029469548133594, + "grad_norm": 0.5705601572990417, + "learning_rate": 4.7418888398132644e-06, + "loss": 0.59, + "step": 3056 + }, + { + "epoch": 0.45044204322200393, + "grad_norm": 0.5766222476959229, + "learning_rate": 4.7417172463991405e-06, + "loss": 0.5957, + "step": 3057 + }, + { + "epoch": 0.4505893909626719, + "grad_norm": 0.5624830722808838, + "learning_rate": 4.741545599072768e-06, + "loss": 0.5728, + "step": 3058 + }, + { + "epoch": 0.45073673870333986, + "grad_norm": 0.5789316892623901, + "learning_rate": 4.741373897838274e-06, + "loss": 0.5784, + "step": 3059 + }, + { + "epoch": 0.45088408644400785, + "grad_norm": 0.5930604934692383, + "learning_rate": 4.74120214269979e-06, + "loss": 0.5751, + "step": 3060 + }, + { + "epoch": 0.45103143418467584, + "grad_norm": 0.5855991244316101, + "learning_rate": 4.741030333661445e-06, + "loss": 0.5924, + "step": 3061 + }, + { + "epoch": 0.45117878192534383, + "grad_norm": 0.5934950709342957, + "learning_rate": 4.740858470727372e-06, + "loss": 0.6072, + "step": 3062 + }, + { + "epoch": 0.45132612966601177, + "grad_norm": 0.5745378136634827, + "learning_rate": 4.740686553901705e-06, + "loss": 0.5549, + "step": 3063 + }, + { + "epoch": 0.45147347740667976, + "grad_norm": 0.5935742855072021, + "learning_rate": 4.740514583188575e-06, + "loss": 0.5342, + "step": 3064 + }, + { + "epoch": 0.45162082514734775, + "grad_norm": 0.5660735964775085, + "learning_rate": 4.7403425585921215e-06, + "loss": 0.5933, + "step": 3065 + }, + { + "epoch": 0.45176817288801574, + "grad_norm": 0.5728287100791931, + "learning_rate": 4.74017048011648e-06, + "loss": 0.6032, + "step": 3066 + }, + { + "epoch": 0.4519155206286837, + "grad_norm": 0.5934741497039795, + "learning_rate": 4.739998347765789e-06, + "loss": 0.5764, + "step": 3067 + }, + { + "epoch": 0.45206286836935167, + "grad_norm": 0.6290299296379089, + "learning_rate": 4.7398261615441884e-06, + "loss": 0.5563, + "step": 3068 + }, + { + "epoch": 0.45221021611001966, + "grad_norm": 0.5539639592170715, + "learning_rate": 4.73965392145582e-06, + "loss": 0.5551, + "step": 3069 + }, + { + "epoch": 0.45235756385068765, + "grad_norm": 0.5698058009147644, + "learning_rate": 4.739481627504824e-06, + "loss": 0.5809, + "step": 3070 + }, + { + "epoch": 0.4525049115913556, + "grad_norm": 0.5731882452964783, + "learning_rate": 4.739309279695346e-06, + "loss": 0.606, + "step": 3071 + }, + { + "epoch": 0.4526522593320236, + "grad_norm": 0.5936786532402039, + "learning_rate": 4.73913687803153e-06, + "loss": 0.5941, + "step": 3072 + }, + { + "epoch": 0.45279960707269157, + "grad_norm": 0.5647307634353638, + "learning_rate": 4.7389644225175225e-06, + "loss": 0.578, + "step": 3073 + }, + { + "epoch": 0.4529469548133595, + "grad_norm": 0.5840884447097778, + "learning_rate": 4.73879191315747e-06, + "loss": 0.6004, + "step": 3074 + }, + { + "epoch": 0.4530943025540275, + "grad_norm": 0.6003271341323853, + "learning_rate": 4.738619349955523e-06, + "loss": 0.5953, + "step": 3075 + }, + { + "epoch": 0.4532416502946955, + "grad_norm": 0.5686614513397217, + "learning_rate": 4.73844673291583e-06, + "loss": 0.6093, + "step": 3076 + }, + { + "epoch": 0.4533889980353635, + "grad_norm": 0.5637298822402954, + "learning_rate": 4.738274062042542e-06, + "loss": 0.5795, + "step": 3077 + }, + { + "epoch": 0.4535363457760314, + "grad_norm": 0.6050560474395752, + "learning_rate": 4.738101337339814e-06, + "loss": 0.5939, + "step": 3078 + }, + { + "epoch": 0.4536836935166994, + "grad_norm": 0.626176118850708, + "learning_rate": 4.737928558811799e-06, + "loss": 0.5929, + "step": 3079 + }, + { + "epoch": 0.4538310412573674, + "grad_norm": 0.6001272797584534, + "learning_rate": 4.737755726462651e-06, + "loss": 0.5914, + "step": 3080 + }, + { + "epoch": 0.4539783889980354, + "grad_norm": 0.6240527629852295, + "learning_rate": 4.737582840296527e-06, + "loss": 0.5932, + "step": 3081 + }, + { + "epoch": 0.4541257367387033, + "grad_norm": 0.6079282164573669, + "learning_rate": 4.737409900317587e-06, + "loss": 0.5566, + "step": 3082 + }, + { + "epoch": 0.4542730844793713, + "grad_norm": 0.5821549296379089, + "learning_rate": 4.737236906529986e-06, + "loss": 0.5832, + "step": 3083 + }, + { + "epoch": 0.4544204322200393, + "grad_norm": 0.6235008239746094, + "learning_rate": 4.737063858937888e-06, + "loss": 0.6078, + "step": 3084 + }, + { + "epoch": 0.4545677799607073, + "grad_norm": 0.5830113887786865, + "learning_rate": 4.7368907575454525e-06, + "loss": 0.5846, + "step": 3085 + }, + { + "epoch": 0.45471512770137523, + "grad_norm": 0.5808101892471313, + "learning_rate": 4.736717602356845e-06, + "loss": 0.6091, + "step": 3086 + }, + { + "epoch": 0.4548624754420432, + "grad_norm": 0.6026193499565125, + "learning_rate": 4.7365443933762265e-06, + "loss": 0.6376, + "step": 3087 + }, + { + "epoch": 0.4550098231827112, + "grad_norm": 0.586785078048706, + "learning_rate": 4.736371130607765e-06, + "loss": 0.5746, + "step": 3088 + }, + { + "epoch": 0.45515717092337915, + "grad_norm": 0.5840166807174683, + "learning_rate": 4.736197814055628e-06, + "loss": 0.5883, + "step": 3089 + }, + { + "epoch": 0.45530451866404714, + "grad_norm": 0.5902153253555298, + "learning_rate": 4.736024443723981e-06, + "loss": 0.58, + "step": 3090 + }, + { + "epoch": 0.45545186640471513, + "grad_norm": 0.5678513646125793, + "learning_rate": 4.735851019616995e-06, + "loss": 0.5973, + "step": 3091 + }, + { + "epoch": 0.4555992141453831, + "grad_norm": 0.5903770327568054, + "learning_rate": 4.7356775417388406e-06, + "loss": 0.5585, + "step": 3092 + }, + { + "epoch": 0.45574656188605106, + "grad_norm": 0.6041983962059021, + "learning_rate": 4.73550401009369e-06, + "loss": 0.5749, + "step": 3093 + }, + { + "epoch": 0.45589390962671905, + "grad_norm": 0.5824362635612488, + "learning_rate": 4.735330424685717e-06, + "loss": 0.5816, + "step": 3094 + }, + { + "epoch": 0.45604125736738704, + "grad_norm": 0.6262214183807373, + "learning_rate": 4.735156785519095e-06, + "loss": 0.5657, + "step": 3095 + }, + { + "epoch": 0.45618860510805503, + "grad_norm": 0.617790937423706, + "learning_rate": 4.7349830925980015e-06, + "loss": 0.5487, + "step": 3096 + }, + { + "epoch": 0.45633595284872297, + "grad_norm": 0.58673095703125, + "learning_rate": 4.734809345926612e-06, + "loss": 0.6014, + "step": 3097 + }, + { + "epoch": 0.45648330058939096, + "grad_norm": 0.5714814066886902, + "learning_rate": 4.734635545509107e-06, + "loss": 0.5738, + "step": 3098 + }, + { + "epoch": 0.45663064833005895, + "grad_norm": 0.6074364185333252, + "learning_rate": 4.734461691349665e-06, + "loss": 0.5397, + "step": 3099 + }, + { + "epoch": 0.4567779960707269, + "grad_norm": 0.5992818474769592, + "learning_rate": 4.7342877834524655e-06, + "loss": 0.5843, + "step": 3100 + }, + { + "epoch": 0.4569253438113949, + "grad_norm": 0.6565219759941101, + "learning_rate": 4.734113821821694e-06, + "loss": 0.6149, + "step": 3101 + }, + { + "epoch": 0.45707269155206287, + "grad_norm": 0.5996466875076294, + "learning_rate": 4.733939806461534e-06, + "loss": 0.5664, + "step": 3102 + }, + { + "epoch": 0.45722003929273086, + "grad_norm": 0.6121309399604797, + "learning_rate": 4.733765737376168e-06, + "loss": 0.6123, + "step": 3103 + }, + { + "epoch": 0.4573673870333988, + "grad_norm": 0.6354365348815918, + "learning_rate": 4.733591614569785e-06, + "loss": 0.5919, + "step": 3104 + }, + { + "epoch": 0.4575147347740668, + "grad_norm": 0.6065689325332642, + "learning_rate": 4.73341743804657e-06, + "loss": 0.5795, + "step": 3105 + }, + { + "epoch": 0.4576620825147348, + "grad_norm": 0.589360237121582, + "learning_rate": 4.7332432078107134e-06, + "loss": 0.5967, + "step": 3106 + }, + { + "epoch": 0.45780943025540277, + "grad_norm": 0.5744441747665405, + "learning_rate": 4.733068923866406e-06, + "loss": 0.5859, + "step": 3107 + }, + { + "epoch": 0.4579567779960707, + "grad_norm": 0.5819219946861267, + "learning_rate": 4.732894586217838e-06, + "loss": 0.5923, + "step": 3108 + }, + { + "epoch": 0.4581041257367387, + "grad_norm": 0.6090234518051147, + "learning_rate": 4.732720194869201e-06, + "loss": 0.5917, + "step": 3109 + }, + { + "epoch": 0.4582514734774067, + "grad_norm": 0.6060874462127686, + "learning_rate": 4.732545749824692e-06, + "loss": 0.5539, + "step": 3110 + }, + { + "epoch": 0.4583988212180747, + "grad_norm": 0.6108007431030273, + "learning_rate": 4.732371251088505e-06, + "loss": 0.5698, + "step": 3111 + }, + { + "epoch": 0.4585461689587426, + "grad_norm": 0.5883253812789917, + "learning_rate": 4.732196698664836e-06, + "loss": 0.6213, + "step": 3112 + }, + { + "epoch": 0.4586935166994106, + "grad_norm": 0.5981879234313965, + "learning_rate": 4.732022092557884e-06, + "loss": 0.5849, + "step": 3113 + }, + { + "epoch": 0.4588408644400786, + "grad_norm": 0.6019772291183472, + "learning_rate": 4.731847432771846e-06, + "loss": 0.5722, + "step": 3114 + }, + { + "epoch": 0.45898821218074654, + "grad_norm": 0.6327353715896606, + "learning_rate": 4.731672719310926e-06, + "loss": 0.5975, + "step": 3115 + }, + { + "epoch": 0.4591355599214145, + "grad_norm": 0.6214463710784912, + "learning_rate": 4.7314979521793225e-06, + "loss": 0.6194, + "step": 3116 + }, + { + "epoch": 0.4592829076620825, + "grad_norm": 0.5778625011444092, + "learning_rate": 4.73132313138124e-06, + "loss": 0.5786, + "step": 3117 + }, + { + "epoch": 0.4594302554027505, + "grad_norm": 0.5818091034889221, + "learning_rate": 4.731148256920883e-06, + "loss": 0.5687, + "step": 3118 + }, + { + "epoch": 0.45957760314341844, + "grad_norm": 0.5937495231628418, + "learning_rate": 4.730973328802457e-06, + "loss": 0.5737, + "step": 3119 + }, + { + "epoch": 0.45972495088408644, + "grad_norm": 0.5898513793945312, + "learning_rate": 4.7307983470301685e-06, + "loss": 0.581, + "step": 3120 + }, + { + "epoch": 0.4598722986247544, + "grad_norm": 0.5967569351196289, + "learning_rate": 4.730623311608227e-06, + "loss": 0.5632, + "step": 3121 + }, + { + "epoch": 0.4600196463654224, + "grad_norm": 0.6321589946746826, + "learning_rate": 4.73044822254084e-06, + "loss": 0.5802, + "step": 3122 + }, + { + "epoch": 0.46016699410609035, + "grad_norm": 0.5938317775726318, + "learning_rate": 4.73027307983222e-06, + "loss": 0.5901, + "step": 3123 + }, + { + "epoch": 0.46031434184675835, + "grad_norm": 0.5981414914131165, + "learning_rate": 4.730097883486577e-06, + "loss": 0.5736, + "step": 3124 + }, + { + "epoch": 0.46046168958742634, + "grad_norm": 0.6127888560295105, + "learning_rate": 4.729922633508128e-06, + "loss": 0.5822, + "step": 3125 + }, + { + "epoch": 0.46060903732809433, + "grad_norm": 0.5629151463508606, + "learning_rate": 4.7297473299010845e-06, + "loss": 0.5905, + "step": 3126 + }, + { + "epoch": 0.46075638506876226, + "grad_norm": 0.5767273902893066, + "learning_rate": 4.729571972669663e-06, + "loss": 0.5825, + "step": 3127 + }, + { + "epoch": 0.46090373280943026, + "grad_norm": 0.5719284415245056, + "learning_rate": 4.729396561818083e-06, + "loss": 0.5951, + "step": 3128 + }, + { + "epoch": 0.46105108055009825, + "grad_norm": 0.5935813784599304, + "learning_rate": 4.72922109735056e-06, + "loss": 0.6094, + "step": 3129 + }, + { + "epoch": 0.4611984282907662, + "grad_norm": 2.8845064640045166, + "learning_rate": 4.729045579271315e-06, + "loss": 0.5953, + "step": 3130 + }, + { + "epoch": 0.4613457760314342, + "grad_norm": 0.6181765794754028, + "learning_rate": 4.72887000758457e-06, + "loss": 0.559, + "step": 3131 + }, + { + "epoch": 0.46149312377210217, + "grad_norm": 0.593487024307251, + "learning_rate": 4.728694382294547e-06, + "loss": 0.5673, + "step": 3132 + }, + { + "epoch": 0.46164047151277016, + "grad_norm": 0.5744642615318298, + "learning_rate": 4.728518703405469e-06, + "loss": 0.6189, + "step": 3133 + }, + { + "epoch": 0.4617878192534381, + "grad_norm": 0.5617813467979431, + "learning_rate": 4.728342970921561e-06, + "loss": 0.6117, + "step": 3134 + }, + { + "epoch": 0.4619351669941061, + "grad_norm": 0.5931204557418823, + "learning_rate": 4.72816718484705e-06, + "loss": 0.5266, + "step": 3135 + }, + { + "epoch": 0.4620825147347741, + "grad_norm": 0.6007756590843201, + "learning_rate": 4.727991345186164e-06, + "loss": 0.5786, + "step": 3136 + }, + { + "epoch": 0.46222986247544207, + "grad_norm": 0.5809265971183777, + "learning_rate": 4.72781545194313e-06, + "loss": 0.6016, + "step": 3137 + }, + { + "epoch": 0.46237721021611, + "grad_norm": 0.5799721479415894, + "learning_rate": 4.72763950512218e-06, + "loss": 0.5899, + "step": 3138 + }, + { + "epoch": 0.462524557956778, + "grad_norm": 0.6013871431350708, + "learning_rate": 4.727463504727544e-06, + "loss": 0.6063, + "step": 3139 + }, + { + "epoch": 0.462671905697446, + "grad_norm": 0.6128621697425842, + "learning_rate": 4.727287450763456e-06, + "loss": 0.6081, + "step": 3140 + }, + { + "epoch": 0.462819253438114, + "grad_norm": 0.6413164138793945, + "learning_rate": 4.72711134323415e-06, + "loss": 0.5786, + "step": 3141 + }, + { + "epoch": 0.4629666011787819, + "grad_norm": 0.5876575112342834, + "learning_rate": 4.7269351821438604e-06, + "loss": 0.6019, + "step": 3142 + }, + { + "epoch": 0.4631139489194499, + "grad_norm": 0.6289963126182556, + "learning_rate": 4.726758967496823e-06, + "loss": 0.5577, + "step": 3143 + }, + { + "epoch": 0.4632612966601179, + "grad_norm": 0.5977717041969299, + "learning_rate": 4.726582699297279e-06, + "loss": 0.5865, + "step": 3144 + }, + { + "epoch": 0.46340864440078583, + "grad_norm": 0.5989941358566284, + "learning_rate": 4.726406377549463e-06, + "loss": 0.5987, + "step": 3145 + }, + { + "epoch": 0.4635559921414538, + "grad_norm": 0.596680223941803, + "learning_rate": 4.72623000225762e-06, + "loss": 0.5767, + "step": 3146 + }, + { + "epoch": 0.4637033398821218, + "grad_norm": 0.6103672385215759, + "learning_rate": 4.7260535734259885e-06, + "loss": 0.6197, + "step": 3147 + }, + { + "epoch": 0.4638506876227898, + "grad_norm": 0.5939247012138367, + "learning_rate": 4.725877091058813e-06, + "loss": 0.5849, + "step": 3148 + }, + { + "epoch": 0.46399803536345774, + "grad_norm": 0.5703379511833191, + "learning_rate": 4.7257005551603365e-06, + "loss": 0.5776, + "step": 3149 + }, + { + "epoch": 0.46414538310412573, + "grad_norm": 0.5911127924919128, + "learning_rate": 4.7255239657348065e-06, + "loss": 0.555, + "step": 3150 + }, + { + "epoch": 0.4642927308447937, + "grad_norm": 0.5842856764793396, + "learning_rate": 4.725347322786469e-06, + "loss": 0.5722, + "step": 3151 + }, + { + "epoch": 0.4644400785854617, + "grad_norm": 0.6004073619842529, + "learning_rate": 4.725170626319572e-06, + "loss": 0.5703, + "step": 3152 + }, + { + "epoch": 0.46458742632612965, + "grad_norm": 0.6107582449913025, + "learning_rate": 4.724993876338365e-06, + "loss": 0.5935, + "step": 3153 + }, + { + "epoch": 0.46473477406679764, + "grad_norm": 0.5744330286979675, + "learning_rate": 4.724817072847099e-06, + "loss": 0.5656, + "step": 3154 + }, + { + "epoch": 0.46488212180746563, + "grad_norm": 0.5909969210624695, + "learning_rate": 4.724640215850026e-06, + "loss": 0.5912, + "step": 3155 + }, + { + "epoch": 0.4650294695481336, + "grad_norm": 0.5914566516876221, + "learning_rate": 4.724463305351399e-06, + "loss": 0.5746, + "step": 3156 + }, + { + "epoch": 0.46517681728880156, + "grad_norm": 0.5931653380393982, + "learning_rate": 4.724286341355473e-06, + "loss": 0.5872, + "step": 3157 + }, + { + "epoch": 0.46532416502946955, + "grad_norm": 0.630266547203064, + "learning_rate": 4.724109323866504e-06, + "loss": 0.548, + "step": 3158 + }, + { + "epoch": 0.46547151277013754, + "grad_norm": 0.6248124241828918, + "learning_rate": 4.723932252888748e-06, + "loss": 0.5849, + "step": 3159 + }, + { + "epoch": 0.4656188605108055, + "grad_norm": 0.5928918123245239, + "learning_rate": 4.723755128426465e-06, + "loss": 0.6052, + "step": 3160 + }, + { + "epoch": 0.46576620825147347, + "grad_norm": 0.5861024260520935, + "learning_rate": 4.723577950483914e-06, + "loss": 0.5588, + "step": 3161 + }, + { + "epoch": 0.46591355599214146, + "grad_norm": 0.6247372031211853, + "learning_rate": 4.723400719065356e-06, + "loss": 0.5855, + "step": 3162 + }, + { + "epoch": 0.46606090373280945, + "grad_norm": 0.5632029175758362, + "learning_rate": 4.723223434175055e-06, + "loss": 0.5791, + "step": 3163 + }, + { + "epoch": 0.4662082514734774, + "grad_norm": 0.6011292338371277, + "learning_rate": 4.723046095817272e-06, + "loss": 0.5998, + "step": 3164 + }, + { + "epoch": 0.4663555992141454, + "grad_norm": 0.6485328078269958, + "learning_rate": 4.722868703996273e-06, + "loss": 0.6088, + "step": 3165 + }, + { + "epoch": 0.46650294695481337, + "grad_norm": 0.6627921462059021, + "learning_rate": 4.722691258716324e-06, + "loss": 0.5677, + "step": 3166 + }, + { + "epoch": 0.46665029469548136, + "grad_norm": 0.566642165184021, + "learning_rate": 4.722513759981693e-06, + "loss": 0.591, + "step": 3167 + }, + { + "epoch": 0.4667976424361493, + "grad_norm": 0.5755511522293091, + "learning_rate": 4.722336207796649e-06, + "loss": 0.5882, + "step": 3168 + }, + { + "epoch": 0.4669449901768173, + "grad_norm": 0.6217115521430969, + "learning_rate": 4.722158602165461e-06, + "loss": 0.569, + "step": 3169 + }, + { + "epoch": 0.4670923379174853, + "grad_norm": 0.6387696266174316, + "learning_rate": 4.721980943092401e-06, + "loss": 0.585, + "step": 3170 + }, + { + "epoch": 0.4672396856581532, + "grad_norm": 0.58958500623703, + "learning_rate": 4.721803230581742e-06, + "loss": 0.5945, + "step": 3171 + }, + { + "epoch": 0.4673870333988212, + "grad_norm": 0.6206267476081848, + "learning_rate": 4.721625464637756e-06, + "loss": 0.6048, + "step": 3172 + }, + { + "epoch": 0.4675343811394892, + "grad_norm": 0.5807662606239319, + "learning_rate": 4.721447645264721e-06, + "loss": 0.5777, + "step": 3173 + }, + { + "epoch": 0.4676817288801572, + "grad_norm": 0.5576851963996887, + "learning_rate": 4.721269772466912e-06, + "loss": 0.6181, + "step": 3174 + }, + { + "epoch": 0.4678290766208251, + "grad_norm": 0.5615859031677246, + "learning_rate": 4.721091846248606e-06, + "loss": 0.5951, + "step": 3175 + }, + { + "epoch": 0.4679764243614931, + "grad_norm": 0.5749754905700684, + "learning_rate": 4.720913866614083e-06, + "loss": 0.5963, + "step": 3176 + }, + { + "epoch": 0.4681237721021611, + "grad_norm": 0.554206132888794, + "learning_rate": 4.720735833567623e-06, + "loss": 0.5911, + "step": 3177 + }, + { + "epoch": 0.4682711198428291, + "grad_norm": 0.5937208533287048, + "learning_rate": 4.720557747113509e-06, + "loss": 0.5845, + "step": 3178 + }, + { + "epoch": 0.46841846758349703, + "grad_norm": 0.5577055215835571, + "learning_rate": 4.720379607256022e-06, + "loss": 0.5641, + "step": 3179 + }, + { + "epoch": 0.468565815324165, + "grad_norm": 0.5952684879302979, + "learning_rate": 4.7202014139994464e-06, + "loss": 0.5979, + "step": 3180 + }, + { + "epoch": 0.468713163064833, + "grad_norm": 0.5893633961677551, + "learning_rate": 4.720023167348068e-06, + "loss": 0.5803, + "step": 3181 + }, + { + "epoch": 0.468860510805501, + "grad_norm": 0.5970651507377625, + "learning_rate": 4.719844867306175e-06, + "loss": 0.5854, + "step": 3182 + }, + { + "epoch": 0.46900785854616894, + "grad_norm": 0.5819857716560364, + "learning_rate": 4.719666513878053e-06, + "loss": 0.5638, + "step": 3183 + }, + { + "epoch": 0.46915520628683693, + "grad_norm": 0.5743525624275208, + "learning_rate": 4.719488107067992e-06, + "loss": 0.574, + "step": 3184 + }, + { + "epoch": 0.4693025540275049, + "grad_norm": 0.548964262008667, + "learning_rate": 4.719309646880284e-06, + "loss": 0.5699, + "step": 3185 + }, + { + "epoch": 0.46944990176817286, + "grad_norm": 0.5787989497184753, + "learning_rate": 4.71913113331922e-06, + "loss": 0.5887, + "step": 3186 + }, + { + "epoch": 0.46959724950884085, + "grad_norm": 0.549669086933136, + "learning_rate": 4.718952566389092e-06, + "loss": 0.6102, + "step": 3187 + }, + { + "epoch": 0.46974459724950884, + "grad_norm": 0.5756315588951111, + "learning_rate": 4.718773946094197e-06, + "loss": 0.6073, + "step": 3188 + }, + { + "epoch": 0.46989194499017684, + "grad_norm": 0.5518194437026978, + "learning_rate": 4.718595272438828e-06, + "loss": 0.5663, + "step": 3189 + }, + { + "epoch": 0.47003929273084477, + "grad_norm": 0.6163632273674011, + "learning_rate": 4.718416545427284e-06, + "loss": 0.5622, + "step": 3190 + }, + { + "epoch": 0.47018664047151276, + "grad_norm": 0.5689073204994202, + "learning_rate": 4.7182377650638626e-06, + "loss": 0.553, + "step": 3191 + }, + { + "epoch": 0.47033398821218075, + "grad_norm": 0.5631166100502014, + "learning_rate": 4.718058931352862e-06, + "loss": 0.5754, + "step": 3192 + }, + { + "epoch": 0.47048133595284874, + "grad_norm": 0.5995513200759888, + "learning_rate": 4.717880044298586e-06, + "loss": 0.5763, + "step": 3193 + }, + { + "epoch": 0.4706286836935167, + "grad_norm": 0.596464991569519, + "learning_rate": 4.717701103905335e-06, + "loss": 0.5846, + "step": 3194 + }, + { + "epoch": 0.47077603143418467, + "grad_norm": 0.6079348921775818, + "learning_rate": 4.717522110177413e-06, + "loss": 0.5662, + "step": 3195 + }, + { + "epoch": 0.47092337917485266, + "grad_norm": 0.5916428565979004, + "learning_rate": 4.717343063119123e-06, + "loss": 0.5615, + "step": 3196 + }, + { + "epoch": 0.47107072691552065, + "grad_norm": 0.5807434916496277, + "learning_rate": 4.717163962734773e-06, + "loss": 0.5997, + "step": 3197 + }, + { + "epoch": 0.4712180746561886, + "grad_norm": 0.5943374037742615, + "learning_rate": 4.71698480902867e-06, + "loss": 0.593, + "step": 3198 + }, + { + "epoch": 0.4713654223968566, + "grad_norm": 0.5804513096809387, + "learning_rate": 4.716805602005121e-06, + "loss": 0.5968, + "step": 3199 + }, + { + "epoch": 0.4715127701375246, + "grad_norm": 0.5915226936340332, + "learning_rate": 4.716626341668438e-06, + "loss": 0.5714, + "step": 3200 + }, + { + "epoch": 0.4716601178781925, + "grad_norm": 0.618309736251831, + "learning_rate": 4.716447028022931e-06, + "loss": 0.5663, + "step": 3201 + }, + { + "epoch": 0.4718074656188605, + "grad_norm": 0.597377359867096, + "learning_rate": 4.716267661072913e-06, + "loss": 0.5732, + "step": 3202 + }, + { + "epoch": 0.4719548133595285, + "grad_norm": 0.5742101669311523, + "learning_rate": 4.716088240822696e-06, + "loss": 0.5591, + "step": 3203 + }, + { + "epoch": 0.4721021611001965, + "grad_norm": 0.5622377991676331, + "learning_rate": 4.715908767276597e-06, + "loss": 0.587, + "step": 3204 + }, + { + "epoch": 0.4722495088408644, + "grad_norm": 0.5746468901634216, + "learning_rate": 4.715729240438932e-06, + "loss": 0.6097, + "step": 3205 + }, + { + "epoch": 0.4723968565815324, + "grad_norm": 0.5855664610862732, + "learning_rate": 4.715549660314017e-06, + "loss": 0.577, + "step": 3206 + }, + { + "epoch": 0.4725442043222004, + "grad_norm": 0.5684012770652771, + "learning_rate": 4.715370026906173e-06, + "loss": 0.5889, + "step": 3207 + }, + { + "epoch": 0.4726915520628684, + "grad_norm": 0.5666549205780029, + "learning_rate": 4.715190340219717e-06, + "loss": 0.6088, + "step": 3208 + }, + { + "epoch": 0.47283889980353633, + "grad_norm": 0.5756152272224426, + "learning_rate": 4.7150106002589745e-06, + "loss": 0.5717, + "step": 3209 + }, + { + "epoch": 0.4729862475442043, + "grad_norm": 0.5459314584732056, + "learning_rate": 4.7148308070282644e-06, + "loss": 0.5695, + "step": 3210 + }, + { + "epoch": 0.4731335952848723, + "grad_norm": 0.5936712622642517, + "learning_rate": 4.714650960531913e-06, + "loss": 0.5703, + "step": 3211 + }, + { + "epoch": 0.4732809430255403, + "grad_norm": 0.5730563998222351, + "learning_rate": 4.714471060774245e-06, + "loss": 0.601, + "step": 3212 + }, + { + "epoch": 0.47342829076620824, + "grad_norm": 0.5916689038276672, + "learning_rate": 4.714291107759585e-06, + "loss": 0.608, + "step": 3213 + }, + { + "epoch": 0.47357563850687623, + "grad_norm": 0.5737413763999939, + "learning_rate": 4.714111101492265e-06, + "loss": 0.5775, + "step": 3214 + }, + { + "epoch": 0.4737229862475442, + "grad_norm": 0.5644193291664124, + "learning_rate": 4.71393104197661e-06, + "loss": 0.5584, + "step": 3215 + }, + { + "epoch": 0.47387033398821216, + "grad_norm": 0.5935625433921814, + "learning_rate": 4.713750929216951e-06, + "loss": 0.5688, + "step": 3216 + }, + { + "epoch": 0.47401768172888015, + "grad_norm": 0.5897957682609558, + "learning_rate": 4.713570763217621e-06, + "loss": 0.5928, + "step": 3217 + }, + { + "epoch": 0.47416502946954814, + "grad_norm": 0.5644633173942566, + "learning_rate": 4.713390543982953e-06, + "loss": 0.5924, + "step": 3218 + }, + { + "epoch": 0.47431237721021613, + "grad_norm": 0.6270262598991394, + "learning_rate": 4.71321027151728e-06, + "loss": 0.5842, + "step": 3219 + }, + { + "epoch": 0.47445972495088407, + "grad_norm": 0.6067751049995422, + "learning_rate": 4.713029945824939e-06, + "loss": 0.5715, + "step": 3220 + }, + { + "epoch": 0.47460707269155206, + "grad_norm": 0.5862571597099304, + "learning_rate": 4.712849566910264e-06, + "loss": 0.5928, + "step": 3221 + }, + { + "epoch": 0.47475442043222005, + "grad_norm": 0.6019778847694397, + "learning_rate": 4.712669134777596e-06, + "loss": 0.5647, + "step": 3222 + }, + { + "epoch": 0.47490176817288804, + "grad_norm": 0.5641512870788574, + "learning_rate": 4.712488649431274e-06, + "loss": 0.5936, + "step": 3223 + }, + { + "epoch": 0.475049115913556, + "grad_norm": 0.5695322155952454, + "learning_rate": 4.712308110875636e-06, + "loss": 0.5896, + "step": 3224 + }, + { + "epoch": 0.47519646365422397, + "grad_norm": 0.5543537139892578, + "learning_rate": 4.712127519115028e-06, + "loss": 0.5686, + "step": 3225 + }, + { + "epoch": 0.47534381139489196, + "grad_norm": 0.5931995511054993, + "learning_rate": 4.7119468741537874e-06, + "loss": 0.5768, + "step": 3226 + }, + { + "epoch": 0.47549115913555995, + "grad_norm": 0.6274838447570801, + "learning_rate": 4.711766175996264e-06, + "loss": 0.5848, + "step": 3227 + }, + { + "epoch": 0.4756385068762279, + "grad_norm": 0.5694071054458618, + "learning_rate": 4.711585424646801e-06, + "loss": 0.5818, + "step": 3228 + }, + { + "epoch": 0.4757858546168959, + "grad_norm": 0.5944874882698059, + "learning_rate": 4.7114046201097455e-06, + "loss": 0.6108, + "step": 3229 + }, + { + "epoch": 0.47593320235756387, + "grad_norm": 0.5751830339431763, + "learning_rate": 4.711223762389447e-06, + "loss": 0.6122, + "step": 3230 + }, + { + "epoch": 0.4760805500982318, + "grad_norm": 0.6025527119636536, + "learning_rate": 4.711042851490253e-06, + "loss": 0.567, + "step": 3231 + }, + { + "epoch": 0.4762278978388998, + "grad_norm": 0.6038786768913269, + "learning_rate": 4.710861887416516e-06, + "loss": 0.5745, + "step": 3232 + }, + { + "epoch": 0.4763752455795678, + "grad_norm": 0.5653093457221985, + "learning_rate": 4.710680870172588e-06, + "loss": 0.5884, + "step": 3233 + }, + { + "epoch": 0.4765225933202358, + "grad_norm": 0.6245757937431335, + "learning_rate": 4.710499799762822e-06, + "loss": 0.568, + "step": 3234 + }, + { + "epoch": 0.4766699410609037, + "grad_norm": 0.5686880946159363, + "learning_rate": 4.710318676191572e-06, + "loss": 0.5877, + "step": 3235 + }, + { + "epoch": 0.4768172888015717, + "grad_norm": 0.5551854968070984, + "learning_rate": 4.710137499463195e-06, + "loss": 0.5931, + "step": 3236 + }, + { + "epoch": 0.4769646365422397, + "grad_norm": 0.573583722114563, + "learning_rate": 4.709956269582047e-06, + "loss": 0.5786, + "step": 3237 + }, + { + "epoch": 0.4771119842829077, + "grad_norm": 0.6239295601844788, + "learning_rate": 4.709774986552487e-06, + "loss": 0.6113, + "step": 3238 + }, + { + "epoch": 0.4772593320235756, + "grad_norm": 0.5870274305343628, + "learning_rate": 4.709593650378876e-06, + "loss": 0.549, + "step": 3239 + }, + { + "epoch": 0.4774066797642436, + "grad_norm": 0.5890795588493347, + "learning_rate": 4.7094122610655736e-06, + "loss": 0.591, + "step": 3240 + }, + { + "epoch": 0.4775540275049116, + "grad_norm": 0.5831470489501953, + "learning_rate": 4.709230818616944e-06, + "loss": 0.5679, + "step": 3241 + }, + { + "epoch": 0.47770137524557954, + "grad_norm": 0.5820234417915344, + "learning_rate": 4.709049323037347e-06, + "loss": 0.5712, + "step": 3242 + }, + { + "epoch": 0.47784872298624753, + "grad_norm": 0.5709386467933655, + "learning_rate": 4.708867774331151e-06, + "loss": 0.6119, + "step": 3243 + }, + { + "epoch": 0.4779960707269155, + "grad_norm": 0.5652152299880981, + "learning_rate": 4.708686172502721e-06, + "loss": 0.5492, + "step": 3244 + }, + { + "epoch": 0.4781434184675835, + "grad_norm": 0.5730943083763123, + "learning_rate": 4.7085045175564245e-06, + "loss": 0.5586, + "step": 3245 + }, + { + "epoch": 0.47829076620825145, + "grad_norm": 0.5986080169677734, + "learning_rate": 4.70832280949663e-06, + "loss": 0.5279, + "step": 3246 + }, + { + "epoch": 0.47843811394891944, + "grad_norm": 0.5804016590118408, + "learning_rate": 4.708141048327709e-06, + "loss": 0.5647, + "step": 3247 + }, + { + "epoch": 0.47858546168958743, + "grad_norm": 0.5867045521736145, + "learning_rate": 4.70795923405403e-06, + "loss": 0.6236, + "step": 3248 + }, + { + "epoch": 0.4787328094302554, + "grad_norm": 0.6081222891807556, + "learning_rate": 4.707777366679968e-06, + "loss": 0.5943, + "step": 3249 + }, + { + "epoch": 0.47888015717092336, + "grad_norm": 0.5954335331916809, + "learning_rate": 4.707595446209895e-06, + "loss": 0.5491, + "step": 3250 + }, + { + "epoch": 0.47902750491159135, + "grad_norm": 0.6205912232398987, + "learning_rate": 4.707413472648187e-06, + "loss": 0.5821, + "step": 3251 + }, + { + "epoch": 0.47917485265225934, + "grad_norm": 0.5536810755729675, + "learning_rate": 4.707231445999221e-06, + "loss": 0.5816, + "step": 3252 + }, + { + "epoch": 0.47932220039292733, + "grad_norm": 0.5640286803245544, + "learning_rate": 4.707049366267373e-06, + "loss": 0.5754, + "step": 3253 + }, + { + "epoch": 0.47946954813359527, + "grad_norm": 0.6008896231651306, + "learning_rate": 4.706867233457024e-06, + "loss": 0.6092, + "step": 3254 + }, + { + "epoch": 0.47961689587426326, + "grad_norm": 0.5919343829154968, + "learning_rate": 4.706685047572553e-06, + "loss": 0.6203, + "step": 3255 + }, + { + "epoch": 0.47976424361493125, + "grad_norm": 0.544495165348053, + "learning_rate": 4.706502808618341e-06, + "loss": 0.566, + "step": 3256 + }, + { + "epoch": 0.4799115913555992, + "grad_norm": 0.5871495008468628, + "learning_rate": 4.7063205165987714e-06, + "loss": 0.5682, + "step": 3257 + }, + { + "epoch": 0.4800589390962672, + "grad_norm": 0.6058031320571899, + "learning_rate": 4.706138171518228e-06, + "loss": 0.5741, + "step": 3258 + }, + { + "epoch": 0.48020628683693517, + "grad_norm": 0.597867488861084, + "learning_rate": 4.705955773381097e-06, + "loss": 0.6, + "step": 3259 + }, + { + "epoch": 0.48035363457760316, + "grad_norm": 0.5614367723464966, + "learning_rate": 4.7057733221917635e-06, + "loss": 0.5636, + "step": 3260 + }, + { + "epoch": 0.4805009823182711, + "grad_norm": 0.5861160159111023, + "learning_rate": 4.7055908179546175e-06, + "loss": 0.5912, + "step": 3261 + }, + { + "epoch": 0.4806483300589391, + "grad_norm": 0.6035003066062927, + "learning_rate": 4.705408260674046e-06, + "loss": 0.5643, + "step": 3262 + }, + { + "epoch": 0.4807956777996071, + "grad_norm": 0.6102508306503296, + "learning_rate": 4.70522565035444e-06, + "loss": 0.5731, + "step": 3263 + }, + { + "epoch": 0.48094302554027507, + "grad_norm": 0.6126112937927246, + "learning_rate": 4.7050429870001915e-06, + "loss": 0.5861, + "step": 3264 + }, + { + "epoch": 0.481090373280943, + "grad_norm": 0.5839444398880005, + "learning_rate": 4.7048602706156936e-06, + "loss": 0.5535, + "step": 3265 + }, + { + "epoch": 0.481237721021611, + "grad_norm": 0.5541897416114807, + "learning_rate": 4.70467750120534e-06, + "loss": 0.5777, + "step": 3266 + }, + { + "epoch": 0.481385068762279, + "grad_norm": 0.5767025947570801, + "learning_rate": 4.7044946787735266e-06, + "loss": 0.6144, + "step": 3267 + }, + { + "epoch": 0.481532416502947, + "grad_norm": 0.5698677897453308, + "learning_rate": 4.7043118033246505e-06, + "loss": 0.5835, + "step": 3268 + }, + { + "epoch": 0.4816797642436149, + "grad_norm": 0.5984632968902588, + "learning_rate": 4.7041288748631095e-06, + "loss": 0.6078, + "step": 3269 + }, + { + "epoch": 0.4818271119842829, + "grad_norm": 0.564947247505188, + "learning_rate": 4.703945893393302e-06, + "loss": 0.574, + "step": 3270 + }, + { + "epoch": 0.4819744597249509, + "grad_norm": 0.5842565298080444, + "learning_rate": 4.703762858919631e-06, + "loss": 0.5508, + "step": 3271 + }, + { + "epoch": 0.48212180746561883, + "grad_norm": 0.5914248824119568, + "learning_rate": 4.703579771446495e-06, + "loss": 0.5822, + "step": 3272 + }, + { + "epoch": 0.4822691552062868, + "grad_norm": 0.5813931822776794, + "learning_rate": 4.703396630978301e-06, + "loss": 0.5909, + "step": 3273 + }, + { + "epoch": 0.4824165029469548, + "grad_norm": 0.5903865098953247, + "learning_rate": 4.703213437519449e-06, + "loss": 0.6074, + "step": 3274 + }, + { + "epoch": 0.4825638506876228, + "grad_norm": 0.581352710723877, + "learning_rate": 4.70303019107435e-06, + "loss": 0.5817, + "step": 3275 + }, + { + "epoch": 0.48271119842829074, + "grad_norm": 0.5844845771789551, + "learning_rate": 4.702846891647406e-06, + "loss": 0.5999, + "step": 3276 + }, + { + "epoch": 0.48285854616895874, + "grad_norm": 0.5744144320487976, + "learning_rate": 4.702663539243029e-06, + "loss": 0.5889, + "step": 3277 + }, + { + "epoch": 0.4830058939096267, + "grad_norm": 0.5874344706535339, + "learning_rate": 4.7024801338656254e-06, + "loss": 0.5743, + "step": 3278 + }, + { + "epoch": 0.4831532416502947, + "grad_norm": 0.5687532424926758, + "learning_rate": 4.702296675519609e-06, + "loss": 0.5845, + "step": 3279 + }, + { + "epoch": 0.48330058939096265, + "grad_norm": 0.5920212268829346, + "learning_rate": 4.70211316420939e-06, + "loss": 0.6037, + "step": 3280 + }, + { + "epoch": 0.48344793713163065, + "grad_norm": 0.6088188290596008, + "learning_rate": 4.7019295999393826e-06, + "loss": 0.5922, + "step": 3281 + }, + { + "epoch": 0.48359528487229864, + "grad_norm": 0.5941351056098938, + "learning_rate": 4.701745982714e-06, + "loss": 0.6091, + "step": 3282 + }, + { + "epoch": 0.4837426326129666, + "grad_norm": 0.5658277869224548, + "learning_rate": 4.701562312537661e-06, + "loss": 0.603, + "step": 3283 + }, + { + "epoch": 0.48388998035363456, + "grad_norm": 0.6097317337989807, + "learning_rate": 4.701378589414779e-06, + "loss": 0.6015, + "step": 3284 + }, + { + "epoch": 0.48403732809430255, + "grad_norm": 0.5726844072341919, + "learning_rate": 4.701194813349775e-06, + "loss": 0.541, + "step": 3285 + }, + { + "epoch": 0.48418467583497055, + "grad_norm": 0.5808463096618652, + "learning_rate": 4.701010984347069e-06, + "loss": 0.5964, + "step": 3286 + }, + { + "epoch": 0.4843320235756385, + "grad_norm": 0.5848633646965027, + "learning_rate": 4.7008271024110806e-06, + "loss": 0.5801, + "step": 3287 + }, + { + "epoch": 0.4844793713163065, + "grad_norm": 0.5765916109085083, + "learning_rate": 4.700643167546233e-06, + "loss": 0.5891, + "step": 3288 + }, + { + "epoch": 0.48462671905697446, + "grad_norm": 0.5886749029159546, + "learning_rate": 4.700459179756949e-06, + "loss": 0.5717, + "step": 3289 + }, + { + "epoch": 0.48477406679764246, + "grad_norm": 0.6221309900283813, + "learning_rate": 4.700275139047654e-06, + "loss": 0.5919, + "step": 3290 + }, + { + "epoch": 0.4849214145383104, + "grad_norm": 0.5768537521362305, + "learning_rate": 4.700091045422773e-06, + "loss": 0.6036, + "step": 3291 + }, + { + "epoch": 0.4850687622789784, + "grad_norm": 0.5899191498756409, + "learning_rate": 4.699906898886736e-06, + "loss": 0.5907, + "step": 3292 + }, + { + "epoch": 0.4852161100196464, + "grad_norm": 0.5689420700073242, + "learning_rate": 4.6997226994439685e-06, + "loss": 0.6142, + "step": 3293 + }, + { + "epoch": 0.48536345776031437, + "grad_norm": 0.5870834589004517, + "learning_rate": 4.699538447098903e-06, + "loss": 0.6122, + "step": 3294 + }, + { + "epoch": 0.4855108055009823, + "grad_norm": 0.6023898124694824, + "learning_rate": 4.699354141855968e-06, + "loss": 0.5802, + "step": 3295 + }, + { + "epoch": 0.4856581532416503, + "grad_norm": 0.6505183577537537, + "learning_rate": 4.6991697837195986e-06, + "loss": 0.5429, + "step": 3296 + }, + { + "epoch": 0.4858055009823183, + "grad_norm": 0.6129525303840637, + "learning_rate": 4.698985372694227e-06, + "loss": 0.5784, + "step": 3297 + }, + { + "epoch": 0.4859528487229863, + "grad_norm": 0.6233833432197571, + "learning_rate": 4.698800908784288e-06, + "loss": 0.5817, + "step": 3298 + }, + { + "epoch": 0.4861001964636542, + "grad_norm": 0.6139799952507019, + "learning_rate": 4.698616391994219e-06, + "loss": 0.6072, + "step": 3299 + }, + { + "epoch": 0.4862475442043222, + "grad_norm": 0.594261109828949, + "learning_rate": 4.6984318223284576e-06, + "loss": 0.5808, + "step": 3300 + }, + { + "epoch": 0.4863948919449902, + "grad_norm": 0.610588550567627, + "learning_rate": 4.698247199791441e-06, + "loss": 0.602, + "step": 3301 + }, + { + "epoch": 0.48654223968565813, + "grad_norm": 0.5793323516845703, + "learning_rate": 4.698062524387611e-06, + "loss": 0.5714, + "step": 3302 + }, + { + "epoch": 0.4866895874263261, + "grad_norm": 0.5987470149993896, + "learning_rate": 4.697877796121408e-06, + "loss": 0.5929, + "step": 3303 + }, + { + "epoch": 0.4868369351669941, + "grad_norm": 0.5772079229354858, + "learning_rate": 4.697693014997275e-06, + "loss": 0.5794, + "step": 3304 + }, + { + "epoch": 0.4869842829076621, + "grad_norm": 0.5651068687438965, + "learning_rate": 4.697508181019656e-06, + "loss": 0.5994, + "step": 3305 + }, + { + "epoch": 0.48713163064833004, + "grad_norm": 0.5771653056144714, + "learning_rate": 4.697323294192995e-06, + "loss": 0.5974, + "step": 3306 + }, + { + "epoch": 0.48727897838899803, + "grad_norm": 0.5440567135810852, + "learning_rate": 4.6971383545217396e-06, + "loss": 0.553, + "step": 3307 + }, + { + "epoch": 0.487426326129666, + "grad_norm": 0.5904837250709534, + "learning_rate": 4.696953362010338e-06, + "loss": 0.5778, + "step": 3308 + }, + { + "epoch": 0.487573673870334, + "grad_norm": 0.597187876701355, + "learning_rate": 4.696768316663238e-06, + "loss": 0.591, + "step": 3309 + }, + { + "epoch": 0.48772102161100195, + "grad_norm": 0.5981906056404114, + "learning_rate": 4.69658321848489e-06, + "loss": 0.6194, + "step": 3310 + }, + { + "epoch": 0.48786836935166994, + "grad_norm": 0.5884247422218323, + "learning_rate": 4.696398067479746e-06, + "loss": 0.5518, + "step": 3311 + }, + { + "epoch": 0.48801571709233793, + "grad_norm": 0.5566445589065552, + "learning_rate": 4.696212863652259e-06, + "loss": 0.5802, + "step": 3312 + }, + { + "epoch": 0.48816306483300587, + "grad_norm": 0.566119372844696, + "learning_rate": 4.6960276070068825e-06, + "loss": 0.6204, + "step": 3313 + }, + { + "epoch": 0.48831041257367386, + "grad_norm": 0.5669280886650085, + "learning_rate": 4.695842297548072e-06, + "loss": 0.6281, + "step": 3314 + }, + { + "epoch": 0.48845776031434185, + "grad_norm": 0.6102697849273682, + "learning_rate": 4.695656935280285e-06, + "loss": 0.5977, + "step": 3315 + }, + { + "epoch": 0.48860510805500984, + "grad_norm": 0.5622342228889465, + "learning_rate": 4.695471520207977e-06, + "loss": 0.5615, + "step": 3316 + }, + { + "epoch": 0.4887524557956778, + "grad_norm": 0.5975572466850281, + "learning_rate": 4.69528605233561e-06, + "loss": 0.6191, + "step": 3317 + }, + { + "epoch": 0.48889980353634577, + "grad_norm": 0.583502471446991, + "learning_rate": 4.695100531667642e-06, + "loss": 0.5796, + "step": 3318 + }, + { + "epoch": 0.48904715127701376, + "grad_norm": 0.5887831449508667, + "learning_rate": 4.694914958208537e-06, + "loss": 0.5982, + "step": 3319 + }, + { + "epoch": 0.48919449901768175, + "grad_norm": 0.5939801931381226, + "learning_rate": 4.694729331962756e-06, + "loss": 0.5557, + "step": 3320 + }, + { + "epoch": 0.4893418467583497, + "grad_norm": 0.5544824600219727, + "learning_rate": 4.6945436529347645e-06, + "loss": 0.6091, + "step": 3321 + }, + { + "epoch": 0.4894891944990177, + "grad_norm": 0.5415289998054504, + "learning_rate": 4.694357921129027e-06, + "loss": 0.5745, + "step": 3322 + }, + { + "epoch": 0.48963654223968567, + "grad_norm": 0.5909473896026611, + "learning_rate": 4.69417213655001e-06, + "loss": 0.5998, + "step": 3323 + }, + { + "epoch": 0.48978388998035366, + "grad_norm": 0.5787984728813171, + "learning_rate": 4.693986299202183e-06, + "loss": 0.5875, + "step": 3324 + }, + { + "epoch": 0.4899312377210216, + "grad_norm": 0.566895604133606, + "learning_rate": 4.693800409090015e-06, + "loss": 0.5833, + "step": 3325 + }, + { + "epoch": 0.4900785854616896, + "grad_norm": 0.5977500081062317, + "learning_rate": 4.6936144662179755e-06, + "loss": 0.5896, + "step": 3326 + }, + { + "epoch": 0.4902259332023576, + "grad_norm": 0.6272594928741455, + "learning_rate": 4.693428470590538e-06, + "loss": 0.5795, + "step": 3327 + }, + { + "epoch": 0.4903732809430255, + "grad_norm": 0.600591242313385, + "learning_rate": 4.6932424222121734e-06, + "loss": 0.5983, + "step": 3328 + }, + { + "epoch": 0.4905206286836935, + "grad_norm": 0.6321317553520203, + "learning_rate": 4.693056321087357e-06, + "loss": 0.5792, + "step": 3329 + }, + { + "epoch": 0.4906679764243615, + "grad_norm": 0.6434943675994873, + "learning_rate": 4.692870167220565e-06, + "loss": 0.5924, + "step": 3330 + }, + { + "epoch": 0.4908153241650295, + "grad_norm": 0.5921874642372131, + "learning_rate": 4.692683960616274e-06, + "loss": 0.6101, + "step": 3331 + }, + { + "epoch": 0.4909626719056974, + "grad_norm": 0.5849887132644653, + "learning_rate": 4.692497701278963e-06, + "loss": 0.5899, + "step": 3332 + }, + { + "epoch": 0.4911100196463654, + "grad_norm": 0.6248484253883362, + "learning_rate": 4.69231138921311e-06, + "loss": 0.6066, + "step": 3333 + }, + { + "epoch": 0.4912573673870334, + "grad_norm": 0.5766161680221558, + "learning_rate": 4.692125024423197e-06, + "loss": 0.6148, + "step": 3334 + }, + { + "epoch": 0.4914047151277014, + "grad_norm": 0.5865245461463928, + "learning_rate": 4.691938606913704e-06, + "loss": 0.6104, + "step": 3335 + }, + { + "epoch": 0.49155206286836933, + "grad_norm": 0.5767566561698914, + "learning_rate": 4.691752136689116e-06, + "loss": 0.5791, + "step": 3336 + }, + { + "epoch": 0.4916994106090373, + "grad_norm": 0.6162489652633667, + "learning_rate": 4.6915656137539175e-06, + "loss": 0.5741, + "step": 3337 + }, + { + "epoch": 0.4918467583497053, + "grad_norm": 0.6057591438293457, + "learning_rate": 4.691379038112594e-06, + "loss": 0.6197, + "step": 3338 + }, + { + "epoch": 0.4919941060903733, + "grad_norm": 0.605839192867279, + "learning_rate": 4.691192409769632e-06, + "loss": 0.5904, + "step": 3339 + }, + { + "epoch": 0.49214145383104124, + "grad_norm": 0.6101559400558472, + "learning_rate": 4.69100572872952e-06, + "loss": 0.56, + "step": 3340 + }, + { + "epoch": 0.49228880157170923, + "grad_norm": 0.586463212966919, + "learning_rate": 4.690818994996749e-06, + "loss": 0.5705, + "step": 3341 + }, + { + "epoch": 0.4924361493123772, + "grad_norm": 0.5899432301521301, + "learning_rate": 4.6906322085758075e-06, + "loss": 0.6045, + "step": 3342 + }, + { + "epoch": 0.49258349705304516, + "grad_norm": 0.5915207266807556, + "learning_rate": 4.690445369471189e-06, + "loss": 0.5745, + "step": 3343 + }, + { + "epoch": 0.49273084479371315, + "grad_norm": 0.604145348072052, + "learning_rate": 4.690258477687387e-06, + "loss": 0.6033, + "step": 3344 + }, + { + "epoch": 0.49287819253438114, + "grad_norm": 0.5909110307693481, + "learning_rate": 4.690071533228897e-06, + "loss": 0.5708, + "step": 3345 + }, + { + "epoch": 0.49302554027504913, + "grad_norm": 0.624889612197876, + "learning_rate": 4.689884536100213e-06, + "loss": 0.6216, + "step": 3346 + }, + { + "epoch": 0.49317288801571707, + "grad_norm": 0.5840081572532654, + "learning_rate": 4.689697486305833e-06, + "loss": 0.5482, + "step": 3347 + }, + { + "epoch": 0.49332023575638506, + "grad_norm": 0.5882272720336914, + "learning_rate": 4.689510383850255e-06, + "loss": 0.5595, + "step": 3348 + }, + { + "epoch": 0.49346758349705305, + "grad_norm": 0.6255789995193481, + "learning_rate": 4.68932322873798e-06, + "loss": 0.6305, + "step": 3349 + }, + { + "epoch": 0.49361493123772104, + "grad_norm": 0.612233579158783, + "learning_rate": 4.689136020973508e-06, + "loss": 0.6118, + "step": 3350 + }, + { + "epoch": 0.493762278978389, + "grad_norm": 0.605026125907898, + "learning_rate": 4.6889487605613406e-06, + "loss": 0.5604, + "step": 3351 + }, + { + "epoch": 0.49390962671905697, + "grad_norm": 0.5785461664199829, + "learning_rate": 4.688761447505983e-06, + "loss": 0.5868, + "step": 3352 + }, + { + "epoch": 0.49405697445972496, + "grad_norm": 0.5440986156463623, + "learning_rate": 4.688574081811939e-06, + "loss": 0.573, + "step": 3353 + }, + { + "epoch": 0.49420432220039295, + "grad_norm": 0.5811483860015869, + "learning_rate": 4.6883866634837146e-06, + "loss": 0.5713, + "step": 3354 + }, + { + "epoch": 0.4943516699410609, + "grad_norm": 0.5810913443565369, + "learning_rate": 4.688199192525818e-06, + "loss": 0.5786, + "step": 3355 + }, + { + "epoch": 0.4944990176817289, + "grad_norm": 0.6039149165153503, + "learning_rate": 4.688011668942757e-06, + "loss": 0.5592, + "step": 3356 + }, + { + "epoch": 0.49464636542239687, + "grad_norm": 0.5722075700759888, + "learning_rate": 4.687824092739041e-06, + "loss": 0.5562, + "step": 3357 + }, + { + "epoch": 0.4947937131630648, + "grad_norm": 0.5676624774932861, + "learning_rate": 4.687636463919182e-06, + "loss": 0.5969, + "step": 3358 + }, + { + "epoch": 0.4949410609037328, + "grad_norm": 0.5828519463539124, + "learning_rate": 4.687448782487692e-06, + "loss": 0.5442, + "step": 3359 + }, + { + "epoch": 0.4950884086444008, + "grad_norm": 0.6027354598045349, + "learning_rate": 4.687261048449085e-06, + "loss": 0.6156, + "step": 3360 + }, + { + "epoch": 0.4952357563850688, + "grad_norm": 0.5742033123970032, + "learning_rate": 4.687073261807876e-06, + "loss": 0.5781, + "step": 3361 + }, + { + "epoch": 0.4953831041257367, + "grad_norm": 0.6058565974235535, + "learning_rate": 4.686885422568581e-06, + "loss": 0.5976, + "step": 3362 + }, + { + "epoch": 0.4955304518664047, + "grad_norm": 0.6044963598251343, + "learning_rate": 4.686697530735716e-06, + "loss": 0.5718, + "step": 3363 + }, + { + "epoch": 0.4956777996070727, + "grad_norm": 0.5807062387466431, + "learning_rate": 4.686509586313802e-06, + "loss": 0.5822, + "step": 3364 + }, + { + "epoch": 0.4958251473477407, + "grad_norm": 0.6113049387931824, + "learning_rate": 4.6863215893073575e-06, + "loss": 0.5798, + "step": 3365 + }, + { + "epoch": 0.4959724950884086, + "grad_norm": 0.5834681987762451, + "learning_rate": 4.686133539720905e-06, + "loss": 0.5542, + "step": 3366 + }, + { + "epoch": 0.4961198428290766, + "grad_norm": 0.5798554420471191, + "learning_rate": 4.685945437558966e-06, + "loss": 0.5255, + "step": 3367 + }, + { + "epoch": 0.4962671905697446, + "grad_norm": 0.6041094660758972, + "learning_rate": 4.685757282826065e-06, + "loss": 0.5932, + "step": 3368 + }, + { + "epoch": 0.4964145383104126, + "grad_norm": 0.5704637765884399, + "learning_rate": 4.685569075526724e-06, + "loss": 0.5705, + "step": 3369 + }, + { + "epoch": 0.49656188605108054, + "grad_norm": 0.5628877282142639, + "learning_rate": 4.6853808156654744e-06, + "loss": 0.6025, + "step": 3370 + }, + { + "epoch": 0.49670923379174853, + "grad_norm": 0.6237375736236572, + "learning_rate": 4.685192503246841e-06, + "loss": 0.5595, + "step": 3371 + }, + { + "epoch": 0.4968565815324165, + "grad_norm": 0.6185330748558044, + "learning_rate": 4.685004138275352e-06, + "loss": 0.5712, + "step": 3372 + }, + { + "epoch": 0.49700392927308445, + "grad_norm": 0.5722343325614929, + "learning_rate": 4.684815720755538e-06, + "loss": 0.5642, + "step": 3373 + }, + { + "epoch": 0.49715127701375245, + "grad_norm": 0.562379002571106, + "learning_rate": 4.6846272506919314e-06, + "loss": 0.5592, + "step": 3374 + }, + { + "epoch": 0.49729862475442044, + "grad_norm": 0.603293776512146, + "learning_rate": 4.684438728089063e-06, + "loss": 0.5962, + "step": 3375 + }, + { + "epoch": 0.49744597249508843, + "grad_norm": 0.5658560991287231, + "learning_rate": 4.684250152951469e-06, + "loss": 0.5915, + "step": 3376 + }, + { + "epoch": 0.49759332023575636, + "grad_norm": 0.5871817469596863, + "learning_rate": 4.684061525283683e-06, + "loss": 0.5562, + "step": 3377 + }, + { + "epoch": 0.49774066797642436, + "grad_norm": 0.5827960968017578, + "learning_rate": 4.683872845090242e-06, + "loss": 0.5747, + "step": 3378 + }, + { + "epoch": 0.49788801571709235, + "grad_norm": 0.5981277823448181, + "learning_rate": 4.6836841123756825e-06, + "loss": 0.5346, + "step": 3379 + }, + { + "epoch": 0.49803536345776034, + "grad_norm": 0.583259105682373, + "learning_rate": 4.683495327144545e-06, + "loss": 0.569, + "step": 3380 + }, + { + "epoch": 0.4981827111984283, + "grad_norm": 0.5626740455627441, + "learning_rate": 4.683306489401369e-06, + "loss": 0.5917, + "step": 3381 + }, + { + "epoch": 0.49833005893909627, + "grad_norm": 0.5990997552871704, + "learning_rate": 4.683117599150696e-06, + "loss": 0.5722, + "step": 3382 + }, + { + "epoch": 0.49847740667976426, + "grad_norm": 0.5726852416992188, + "learning_rate": 4.682928656397068e-06, + "loss": 0.5869, + "step": 3383 + }, + { + "epoch": 0.4986247544204322, + "grad_norm": 0.5942280292510986, + "learning_rate": 4.6827396611450305e-06, + "loss": 0.5301, + "step": 3384 + }, + { + "epoch": 0.4987721021611002, + "grad_norm": 0.5782231688499451, + "learning_rate": 4.682550613399127e-06, + "loss": 0.6019, + "step": 3385 + }, + { + "epoch": 0.4989194499017682, + "grad_norm": 0.6172735691070557, + "learning_rate": 4.682361513163906e-06, + "loss": 0.5854, + "step": 3386 + }, + { + "epoch": 0.49906679764243617, + "grad_norm": 0.5612070560455322, + "learning_rate": 4.6821723604439125e-06, + "loss": 0.5732, + "step": 3387 + }, + { + "epoch": 0.4992141453831041, + "grad_norm": 0.5800061225891113, + "learning_rate": 4.681983155243699e-06, + "loss": 0.5543, + "step": 3388 + }, + { + "epoch": 0.4993614931237721, + "grad_norm": 0.5580871105194092, + "learning_rate": 4.681793897567814e-06, + "loss": 0.6014, + "step": 3389 + }, + { + "epoch": 0.4995088408644401, + "grad_norm": 0.5895041227340698, + "learning_rate": 4.6816045874208085e-06, + "loss": 0.5492, + "step": 3390 + }, + { + "epoch": 0.4996561886051081, + "grad_norm": 0.5628325343132019, + "learning_rate": 4.681415224807235e-06, + "loss": 0.5909, + "step": 3391 + }, + { + "epoch": 0.499803536345776, + "grad_norm": 0.5757815837860107, + "learning_rate": 4.68122580973165e-06, + "loss": 0.569, + "step": 3392 + }, + { + "epoch": 0.499950884086444, + "grad_norm": 0.5574511885643005, + "learning_rate": 4.681036342198606e-06, + "loss": 0.578, + "step": 3393 + }, + { + "epoch": 0.500098231827112, + "grad_norm": 0.5723998546600342, + "learning_rate": 4.680846822212662e-06, + "loss": 0.5745, + "step": 3394 + }, + { + "epoch": 0.5002455795677799, + "grad_norm": 0.5883909463882446, + "learning_rate": 4.680657249778374e-06, + "loss": 0.5709, + "step": 3395 + }, + { + "epoch": 0.500392927308448, + "grad_norm": 0.5809294581413269, + "learning_rate": 4.680467624900303e-06, + "loss": 0.6082, + "step": 3396 + }, + { + "epoch": 0.5005402750491159, + "grad_norm": 0.5856168866157532, + "learning_rate": 4.680277947583007e-06, + "loss": 0.5581, + "step": 3397 + }, + { + "epoch": 0.5006876227897838, + "grad_norm": 0.6218059659004211, + "learning_rate": 4.6800882178310505e-06, + "loss": 0.5717, + "step": 3398 + }, + { + "epoch": 0.5008349705304519, + "grad_norm": 0.5727264881134033, + "learning_rate": 4.679898435648994e-06, + "loss": 0.6054, + "step": 3399 + }, + { + "epoch": 0.5009823182711198, + "grad_norm": 0.598569393157959, + "learning_rate": 4.6797086010414026e-06, + "loss": 0.5887, + "step": 3400 + }, + { + "epoch": 0.5011296660117878, + "grad_norm": 0.5854867100715637, + "learning_rate": 4.679518714012842e-06, + "loss": 0.5684, + "step": 3401 + }, + { + "epoch": 0.5012770137524558, + "grad_norm": 0.6035808324813843, + "learning_rate": 4.679328774567877e-06, + "loss": 0.5896, + "step": 3402 + }, + { + "epoch": 0.5014243614931237, + "grad_norm": 0.5823826789855957, + "learning_rate": 4.679138782711079e-06, + "loss": 0.5809, + "step": 3403 + }, + { + "epoch": 0.5015717092337918, + "grad_norm": 0.5664811134338379, + "learning_rate": 4.678948738447015e-06, + "loss": 0.6083, + "step": 3404 + }, + { + "epoch": 0.5017190569744597, + "grad_norm": 0.5907384157180786, + "learning_rate": 4.678758641780256e-06, + "loss": 0.5608, + "step": 3405 + }, + { + "epoch": 0.5018664047151277, + "grad_norm": 0.5948281288146973, + "learning_rate": 4.678568492715373e-06, + "loss": 0.5903, + "step": 3406 + }, + { + "epoch": 0.5020137524557957, + "grad_norm": 0.575217068195343, + "learning_rate": 4.67837829125694e-06, + "loss": 0.5728, + "step": 3407 + }, + { + "epoch": 0.5021611001964637, + "grad_norm": 0.56910240650177, + "learning_rate": 4.67818803740953e-06, + "loss": 0.5213, + "step": 3408 + }, + { + "epoch": 0.5023084479371316, + "grad_norm": 0.5786854028701782, + "learning_rate": 4.6779977311777205e-06, + "loss": 0.6066, + "step": 3409 + }, + { + "epoch": 0.5024557956777996, + "grad_norm": 0.569149374961853, + "learning_rate": 4.677807372566085e-06, + "loss": 0.5656, + "step": 3410 + }, + { + "epoch": 0.5026031434184676, + "grad_norm": 0.5482324361801147, + "learning_rate": 4.677616961579205e-06, + "loss": 0.5693, + "step": 3411 + }, + { + "epoch": 0.5027504911591356, + "grad_norm": 0.5497886538505554, + "learning_rate": 4.67742649822166e-06, + "loss": 0.5758, + "step": 3412 + }, + { + "epoch": 0.5028978388998036, + "grad_norm": 0.588737964630127, + "learning_rate": 4.677235982498028e-06, + "loss": 0.6057, + "step": 3413 + }, + { + "epoch": 0.5030451866404715, + "grad_norm": 0.5782597064971924, + "learning_rate": 4.6770454144128905e-06, + "loss": 0.6182, + "step": 3414 + }, + { + "epoch": 0.5031925343811395, + "grad_norm": 0.5674954056739807, + "learning_rate": 4.676854793970833e-06, + "loss": 0.5462, + "step": 3415 + }, + { + "epoch": 0.5033398821218075, + "grad_norm": 0.5914164781570435, + "learning_rate": 4.676664121176438e-06, + "loss": 0.5528, + "step": 3416 + }, + { + "epoch": 0.5034872298624754, + "grad_norm": 0.5933207273483276, + "learning_rate": 4.676473396034293e-06, + "loss": 0.5945, + "step": 3417 + }, + { + "epoch": 0.5036345776031435, + "grad_norm": 0.5793149471282959, + "learning_rate": 4.676282618548982e-06, + "loss": 0.5876, + "step": 3418 + }, + { + "epoch": 0.5037819253438114, + "grad_norm": 0.5871608257293701, + "learning_rate": 4.676091788725096e-06, + "loss": 0.6103, + "step": 3419 + }, + { + "epoch": 0.5039292730844793, + "grad_norm": 0.5982767343521118, + "learning_rate": 4.6759009065672225e-06, + "loss": 0.5916, + "step": 3420 + }, + { + "epoch": 0.5040766208251474, + "grad_norm": 0.6240972876548767, + "learning_rate": 4.675709972079953e-06, + "loss": 0.603, + "step": 3421 + }, + { + "epoch": 0.5042239685658153, + "grad_norm": 0.5884596705436707, + "learning_rate": 4.675518985267879e-06, + "loss": 0.568, + "step": 3422 + }, + { + "epoch": 0.5043713163064834, + "grad_norm": 0.6217235922813416, + "learning_rate": 4.675327946135594e-06, + "loss": 0.5636, + "step": 3423 + }, + { + "epoch": 0.5045186640471513, + "grad_norm": 0.5875409841537476, + "learning_rate": 4.675136854687693e-06, + "loss": 0.592, + "step": 3424 + }, + { + "epoch": 0.5046660117878192, + "grad_norm": 0.6112950444221497, + "learning_rate": 4.67494571092877e-06, + "loss": 0.5774, + "step": 3425 + }, + { + "epoch": 0.5048133595284873, + "grad_norm": 0.6038789749145508, + "learning_rate": 4.674754514863422e-06, + "loss": 0.5796, + "step": 3426 + }, + { + "epoch": 0.5049607072691552, + "grad_norm": 0.585237443447113, + "learning_rate": 4.674563266496249e-06, + "loss": 0.5695, + "step": 3427 + }, + { + "epoch": 0.5051080550098231, + "grad_norm": 0.5914356112480164, + "learning_rate": 4.674371965831849e-06, + "loss": 0.5621, + "step": 3428 + }, + { + "epoch": 0.5052554027504912, + "grad_norm": 0.5964142680168152, + "learning_rate": 4.674180612874824e-06, + "loss": 0.5759, + "step": 3429 + }, + { + "epoch": 0.5054027504911591, + "grad_norm": 0.5915545225143433, + "learning_rate": 4.673989207629773e-06, + "loss": 0.5269, + "step": 3430 + }, + { + "epoch": 0.5055500982318271, + "grad_norm": 0.5934330224990845, + "learning_rate": 4.673797750101303e-06, + "loss": 0.5842, + "step": 3431 + }, + { + "epoch": 0.5056974459724951, + "grad_norm": 0.5858384966850281, + "learning_rate": 4.673606240294016e-06, + "loss": 0.5856, + "step": 3432 + }, + { + "epoch": 0.505844793713163, + "grad_norm": 0.5786495804786682, + "learning_rate": 4.673414678212518e-06, + "loss": 0.604, + "step": 3433 + }, + { + "epoch": 0.5059921414538311, + "grad_norm": 0.5987536907196045, + "learning_rate": 4.673223063861417e-06, + "loss": 0.5934, + "step": 3434 + }, + { + "epoch": 0.506139489194499, + "grad_norm": 0.5835490822792053, + "learning_rate": 4.673031397245321e-06, + "loss": 0.5923, + "step": 3435 + }, + { + "epoch": 0.506286836935167, + "grad_norm": 0.5840112566947937, + "learning_rate": 4.6728396783688385e-06, + "loss": 0.6029, + "step": 3436 + }, + { + "epoch": 0.506434184675835, + "grad_norm": 0.6053329110145569, + "learning_rate": 4.672647907236581e-06, + "loss": 0.5869, + "step": 3437 + }, + { + "epoch": 0.5065815324165029, + "grad_norm": 0.5948776006698608, + "learning_rate": 4.672456083853159e-06, + "loss": 0.5642, + "step": 3438 + }, + { + "epoch": 0.5067288801571709, + "grad_norm": 0.5722783207893372, + "learning_rate": 4.67226420822319e-06, + "loss": 0.5539, + "step": 3439 + }, + { + "epoch": 0.5068762278978389, + "grad_norm": 0.6044876575469971, + "learning_rate": 4.672072280351284e-06, + "loss": 0.5689, + "step": 3440 + }, + { + "epoch": 0.5070235756385069, + "grad_norm": 0.5885648131370544, + "learning_rate": 4.6718803002420585e-06, + "loss": 0.5615, + "step": 3441 + }, + { + "epoch": 0.5071709233791748, + "grad_norm": 0.5865170359611511, + "learning_rate": 4.6716882679001305e-06, + "loss": 0.5975, + "step": 3442 + }, + { + "epoch": 0.5073182711198428, + "grad_norm": 0.5951319932937622, + "learning_rate": 4.671496183330118e-06, + "loss": 0.5844, + "step": 3443 + }, + { + "epoch": 0.5074656188605108, + "grad_norm": 0.5832789540290833, + "learning_rate": 4.671304046536641e-06, + "loss": 0.5998, + "step": 3444 + }, + { + "epoch": 0.5076129666011788, + "grad_norm": 0.586457371711731, + "learning_rate": 4.6711118575243195e-06, + "loss": 0.601, + "step": 3445 + }, + { + "epoch": 0.5077603143418468, + "grad_norm": 0.6203793883323669, + "learning_rate": 4.6709196162977774e-06, + "loss": 0.5683, + "step": 3446 + }, + { + "epoch": 0.5079076620825147, + "grad_norm": 0.6057616472244263, + "learning_rate": 4.670727322861636e-06, + "loss": 0.5983, + "step": 3447 + }, + { + "epoch": 0.5080550098231827, + "grad_norm": 0.5786202549934387, + "learning_rate": 4.67053497722052e-06, + "loss": 0.5247, + "step": 3448 + }, + { + "epoch": 0.5082023575638507, + "grad_norm": 0.5836642980575562, + "learning_rate": 4.670342579379057e-06, + "loss": 0.5559, + "step": 3449 + }, + { + "epoch": 0.5083497053045186, + "grad_norm": 0.5905393362045288, + "learning_rate": 4.670150129341872e-06, + "loss": 0.5783, + "step": 3450 + }, + { + "epoch": 0.5084970530451867, + "grad_norm": 0.5969349145889282, + "learning_rate": 4.669957627113595e-06, + "loss": 0.5841, + "step": 3451 + }, + { + "epoch": 0.5086444007858546, + "grad_norm": 0.5351272821426392, + "learning_rate": 4.669765072698854e-06, + "loss": 0.5722, + "step": 3452 + }, + { + "epoch": 0.5087917485265226, + "grad_norm": 0.5687476396560669, + "learning_rate": 4.669572466102282e-06, + "loss": 0.5983, + "step": 3453 + }, + { + "epoch": 0.5089390962671906, + "grad_norm": 0.6008659601211548, + "learning_rate": 4.669379807328509e-06, + "loss": 0.5844, + "step": 3454 + }, + { + "epoch": 0.5090864440078585, + "grad_norm": 0.564318835735321, + "learning_rate": 4.6691870963821685e-06, + "loss": 0.5738, + "step": 3455 + }, + { + "epoch": 0.5092337917485266, + "grad_norm": 0.573272705078125, + "learning_rate": 4.668994333267896e-06, + "loss": 0.5934, + "step": 3456 + }, + { + "epoch": 0.5093811394891945, + "grad_norm": 0.569323718547821, + "learning_rate": 4.668801517990328e-06, + "loss": 0.6149, + "step": 3457 + }, + { + "epoch": 0.5095284872298624, + "grad_norm": 0.5532691478729248, + "learning_rate": 4.6686086505541005e-06, + "loss": 0.5733, + "step": 3458 + }, + { + "epoch": 0.5096758349705305, + "grad_norm": 0.555980920791626, + "learning_rate": 4.668415730963851e-06, + "loss": 0.5615, + "step": 3459 + }, + { + "epoch": 0.5098231827111984, + "grad_norm": 0.586735725402832, + "learning_rate": 4.668222759224221e-06, + "loss": 0.5914, + "step": 3460 + }, + { + "epoch": 0.5099705304518664, + "grad_norm": 0.64228755235672, + "learning_rate": 4.668029735339851e-06, + "loss": 0.5753, + "step": 3461 + }, + { + "epoch": 0.5101178781925344, + "grad_norm": 0.6016390919685364, + "learning_rate": 4.667836659315382e-06, + "loss": 0.5761, + "step": 3462 + }, + { + "epoch": 0.5102652259332023, + "grad_norm": 0.6215578317642212, + "learning_rate": 4.667643531155459e-06, + "loss": 0.5942, + "step": 3463 + }, + { + "epoch": 0.5104125736738704, + "grad_norm": 0.6013742089271545, + "learning_rate": 4.667450350864725e-06, + "loss": 0.5724, + "step": 3464 + }, + { + "epoch": 0.5105599214145383, + "grad_norm": 0.5582330822944641, + "learning_rate": 4.667257118447826e-06, + "loss": 0.555, + "step": 3465 + }, + { + "epoch": 0.5107072691552063, + "grad_norm": 0.5751593708992004, + "learning_rate": 4.667063833909411e-06, + "loss": 0.5897, + "step": 3466 + }, + { + "epoch": 0.5108546168958743, + "grad_norm": 0.5641042590141296, + "learning_rate": 4.666870497254127e-06, + "loss": 0.599, + "step": 3467 + }, + { + "epoch": 0.5110019646365422, + "grad_norm": 0.5705146193504333, + "learning_rate": 4.666677108486624e-06, + "loss": 0.5953, + "step": 3468 + }, + { + "epoch": 0.5111493123772102, + "grad_norm": 0.5740541219711304, + "learning_rate": 4.666483667611553e-06, + "loss": 0.5996, + "step": 3469 + }, + { + "epoch": 0.5112966601178782, + "grad_norm": 0.5829674601554871, + "learning_rate": 4.666290174633565e-06, + "loss": 0.6248, + "step": 3470 + }, + { + "epoch": 0.5114440078585462, + "grad_norm": 0.6340366005897522, + "learning_rate": 4.666096629557315e-06, + "loss": 0.5633, + "step": 3471 + }, + { + "epoch": 0.5115913555992141, + "grad_norm": 0.6017214059829712, + "learning_rate": 4.665903032387456e-06, + "loss": 0.582, + "step": 3472 + }, + { + "epoch": 0.5117387033398821, + "grad_norm": 0.5918905735015869, + "learning_rate": 4.6657093831286464e-06, + "loss": 0.5997, + "step": 3473 + }, + { + "epoch": 0.5118860510805501, + "grad_norm": 0.5921267867088318, + "learning_rate": 4.665515681785541e-06, + "loss": 0.6048, + "step": 3474 + }, + { + "epoch": 0.5120333988212181, + "grad_norm": 0.5802397131919861, + "learning_rate": 4.665321928362799e-06, + "loss": 0.6065, + "step": 3475 + }, + { + "epoch": 0.5121807465618861, + "grad_norm": 0.5613111853599548, + "learning_rate": 4.665128122865081e-06, + "loss": 0.5841, + "step": 3476 + }, + { + "epoch": 0.512328094302554, + "grad_norm": 0.5723958015441895, + "learning_rate": 4.664934265297048e-06, + "loss": 0.5585, + "step": 3477 + }, + { + "epoch": 0.512475442043222, + "grad_norm": 0.5468228459358215, + "learning_rate": 4.664740355663359e-06, + "loss": 0.5695, + "step": 3478 + }, + { + "epoch": 0.51262278978389, + "grad_norm": 0.557781994342804, + "learning_rate": 4.6645463939686805e-06, + "loss": 0.55, + "step": 3479 + }, + { + "epoch": 0.5127701375245579, + "grad_norm": 0.5889943838119507, + "learning_rate": 4.664352380217677e-06, + "loss": 0.5868, + "step": 3480 + }, + { + "epoch": 0.512917485265226, + "grad_norm": 0.5747378468513489, + "learning_rate": 4.664158314415014e-06, + "loss": 0.6095, + "step": 3481 + }, + { + "epoch": 0.5130648330058939, + "grad_norm": 0.5946477055549622, + "learning_rate": 4.663964196565358e-06, + "loss": 0.6037, + "step": 3482 + }, + { + "epoch": 0.5132121807465619, + "grad_norm": 0.5764960050582886, + "learning_rate": 4.663770026673379e-06, + "loss": 0.5735, + "step": 3483 + }, + { + "epoch": 0.5133595284872299, + "grad_norm": 0.6120442152023315, + "learning_rate": 4.663575804743745e-06, + "loss": 0.5724, + "step": 3484 + }, + { + "epoch": 0.5135068762278978, + "grad_norm": 0.5579593777656555, + "learning_rate": 4.6633815307811285e-06, + "loss": 0.5755, + "step": 3485 + }, + { + "epoch": 0.5136542239685659, + "grad_norm": 0.5903056859970093, + "learning_rate": 4.663187204790199e-06, + "loss": 0.5925, + "step": 3486 + }, + { + "epoch": 0.5138015717092338, + "grad_norm": 0.6016568541526794, + "learning_rate": 4.662992826775633e-06, + "loss": 0.6294, + "step": 3487 + }, + { + "epoch": 0.5139489194499017, + "grad_norm": 0.6057350039482117, + "learning_rate": 4.662798396742104e-06, + "loss": 0.5753, + "step": 3488 + }, + { + "epoch": 0.5140962671905698, + "grad_norm": 0.5919654965400696, + "learning_rate": 4.662603914694288e-06, + "loss": 0.5937, + "step": 3489 + }, + { + "epoch": 0.5142436149312377, + "grad_norm": 0.5618305206298828, + "learning_rate": 4.662409380636862e-06, + "loss": 0.6037, + "step": 3490 + }, + { + "epoch": 0.5143909626719056, + "grad_norm": 0.5573481917381287, + "learning_rate": 4.662214794574505e-06, + "loss": 0.5685, + "step": 3491 + }, + { + "epoch": 0.5145383104125737, + "grad_norm": 0.5606126189231873, + "learning_rate": 4.662020156511896e-06, + "loss": 0.5652, + "step": 3492 + }, + { + "epoch": 0.5146856581532416, + "grad_norm": 0.5925414562225342, + "learning_rate": 4.661825466453716e-06, + "loss": 0.5768, + "step": 3493 + }, + { + "epoch": 0.5148330058939097, + "grad_norm": 0.5975925922393799, + "learning_rate": 4.6616307244046484e-06, + "loss": 0.6112, + "step": 3494 + }, + { + "epoch": 0.5149803536345776, + "grad_norm": 0.8098443150520325, + "learning_rate": 4.6614359303693755e-06, + "loss": 0.6027, + "step": 3495 + }, + { + "epoch": 0.5151277013752456, + "grad_norm": 0.5980836153030396, + "learning_rate": 4.661241084352582e-06, + "loss": 0.5928, + "step": 3496 + }, + { + "epoch": 0.5152750491159136, + "grad_norm": 0.5952981114387512, + "learning_rate": 4.661046186358954e-06, + "loss": 0.5813, + "step": 3497 + }, + { + "epoch": 0.5154223968565815, + "grad_norm": 0.6007152199745178, + "learning_rate": 4.660851236393179e-06, + "loss": 0.591, + "step": 3498 + }, + { + "epoch": 0.5155697445972495, + "grad_norm": 0.5670558214187622, + "learning_rate": 4.660656234459945e-06, + "loss": 0.5805, + "step": 3499 + }, + { + "epoch": 0.5157170923379175, + "grad_norm": 0.6002108454704285, + "learning_rate": 4.6604611805639426e-06, + "loss": 0.6005, + "step": 3500 + }, + { + "epoch": 0.5158644400785855, + "grad_norm": 0.616374135017395, + "learning_rate": 4.660266074709861e-06, + "loss": 0.5952, + "step": 3501 + }, + { + "epoch": 0.5160117878192534, + "grad_norm": 0.5577821135520935, + "learning_rate": 4.660070916902394e-06, + "loss": 0.5674, + "step": 3502 + }, + { + "epoch": 0.5161591355599214, + "grad_norm": 0.5708683133125305, + "learning_rate": 4.6598757071462344e-06, + "loss": 0.561, + "step": 3503 + }, + { + "epoch": 0.5163064833005894, + "grad_norm": 0.5876691341400146, + "learning_rate": 4.659680445446078e-06, + "loss": 0.5802, + "step": 3504 + }, + { + "epoch": 0.5164538310412574, + "grad_norm": 0.5879068970680237, + "learning_rate": 4.659485131806619e-06, + "loss": 0.5655, + "step": 3505 + }, + { + "epoch": 0.5166011787819254, + "grad_norm": 0.6123214960098267, + "learning_rate": 4.659289766232556e-06, + "loss": 0.5791, + "step": 3506 + }, + { + "epoch": 0.5167485265225933, + "grad_norm": 0.5619329214096069, + "learning_rate": 4.659094348728587e-06, + "loss": 0.582, + "step": 3507 + }, + { + "epoch": 0.5168958742632613, + "grad_norm": 0.555004358291626, + "learning_rate": 4.65889887929941e-06, + "loss": 0.5782, + "step": 3508 + }, + { + "epoch": 0.5170432220039293, + "grad_norm": 0.5706020593643188, + "learning_rate": 4.658703357949729e-06, + "loss": 0.5766, + "step": 3509 + }, + { + "epoch": 0.5171905697445972, + "grad_norm": 0.5639511346817017, + "learning_rate": 4.658507784684245e-06, + "loss": 0.5739, + "step": 3510 + }, + { + "epoch": 0.5173379174852653, + "grad_norm": 0.5992263555526733, + "learning_rate": 4.65831215950766e-06, + "loss": 0.6004, + "step": 3511 + }, + { + "epoch": 0.5174852652259332, + "grad_norm": 0.5680133104324341, + "learning_rate": 4.658116482424681e-06, + "loss": 0.5688, + "step": 3512 + }, + { + "epoch": 0.5176326129666011, + "grad_norm": 0.617757260799408, + "learning_rate": 4.6579207534400126e-06, + "loss": 0.5683, + "step": 3513 + }, + { + "epoch": 0.5177799607072692, + "grad_norm": 0.5856590867042542, + "learning_rate": 4.657724972558361e-06, + "loss": 0.5909, + "step": 3514 + }, + { + "epoch": 0.5179273084479371, + "grad_norm": 0.6076751351356506, + "learning_rate": 4.657529139784437e-06, + "loss": 0.6187, + "step": 3515 + }, + { + "epoch": 0.5180746561886052, + "grad_norm": 0.6254976987838745, + "learning_rate": 4.65733325512295e-06, + "loss": 0.5441, + "step": 3516 + }, + { + "epoch": 0.5182220039292731, + "grad_norm": 0.5542503595352173, + "learning_rate": 4.657137318578608e-06, + "loss": 0.5498, + "step": 3517 + }, + { + "epoch": 0.518369351669941, + "grad_norm": 0.575357973575592, + "learning_rate": 4.656941330156126e-06, + "loss": 0.5659, + "step": 3518 + }, + { + "epoch": 0.5185166994106091, + "grad_norm": 0.5734875798225403, + "learning_rate": 4.656745289860217e-06, + "loss": 0.6005, + "step": 3519 + }, + { + "epoch": 0.518664047151277, + "grad_norm": 0.5732496976852417, + "learning_rate": 4.656549197695596e-06, + "loss": 0.5796, + "step": 3520 + }, + { + "epoch": 0.5188113948919449, + "grad_norm": 0.6009122729301453, + "learning_rate": 4.656353053666977e-06, + "loss": 0.5897, + "step": 3521 + }, + { + "epoch": 0.518958742632613, + "grad_norm": 0.5792922973632812, + "learning_rate": 4.65615685777908e-06, + "loss": 0.596, + "step": 3522 + }, + { + "epoch": 0.5191060903732809, + "grad_norm": 0.5904471278190613, + "learning_rate": 4.65596061003662e-06, + "loss": 0.6034, + "step": 3523 + }, + { + "epoch": 0.519253438113949, + "grad_norm": 0.5528329014778137, + "learning_rate": 4.6557643104443195e-06, + "loss": 0.576, + "step": 3524 + }, + { + "epoch": 0.5194007858546169, + "grad_norm": 0.6024522185325623, + "learning_rate": 4.655567959006898e-06, + "loss": 0.5716, + "step": 3525 + }, + { + "epoch": 0.5195481335952848, + "grad_norm": 0.634926438331604, + "learning_rate": 4.655371555729078e-06, + "loss": 0.5847, + "step": 3526 + }, + { + "epoch": 0.5196954813359529, + "grad_norm": 0.5879912376403809, + "learning_rate": 4.655175100615584e-06, + "loss": 0.5678, + "step": 3527 + }, + { + "epoch": 0.5198428290766208, + "grad_norm": 0.5832952260971069, + "learning_rate": 4.654978593671139e-06, + "loss": 0.601, + "step": 3528 + }, + { + "epoch": 0.5199901768172888, + "grad_norm": 0.6074789762496948, + "learning_rate": 4.65478203490047e-06, + "loss": 0.6029, + "step": 3529 + }, + { + "epoch": 0.5201375245579568, + "grad_norm": 0.587435781955719, + "learning_rate": 4.654585424308303e-06, + "loss": 0.6127, + "step": 3530 + }, + { + "epoch": 0.5202848722986247, + "grad_norm": 0.5753000974655151, + "learning_rate": 4.654388761899368e-06, + "loss": 0.583, + "step": 3531 + }, + { + "epoch": 0.5204322200392927, + "grad_norm": 0.5996934175491333, + "learning_rate": 4.654192047678393e-06, + "loss": 0.5774, + "step": 3532 + }, + { + "epoch": 0.5205795677799607, + "grad_norm": 0.5773442387580872, + "learning_rate": 4.65399528165011e-06, + "loss": 0.5837, + "step": 3533 + }, + { + "epoch": 0.5207269155206287, + "grad_norm": 0.5739894509315491, + "learning_rate": 4.6537984638192505e-06, + "loss": 0.606, + "step": 3534 + }, + { + "epoch": 0.5208742632612967, + "grad_norm": 0.5736202597618103, + "learning_rate": 4.653601594190548e-06, + "loss": 0.5607, + "step": 3535 + }, + { + "epoch": 0.5210216110019646, + "grad_norm": 0.587018609046936, + "learning_rate": 4.653404672768738e-06, + "loss": 0.5771, + "step": 3536 + }, + { + "epoch": 0.5211689587426326, + "grad_norm": 0.5605288743972778, + "learning_rate": 4.653207699558555e-06, + "loss": 0.5838, + "step": 3537 + }, + { + "epoch": 0.5213163064833006, + "grad_norm": 0.5974154472351074, + "learning_rate": 4.653010674564737e-06, + "loss": 0.576, + "step": 3538 + }, + { + "epoch": 0.5214636542239686, + "grad_norm": 0.5889730453491211, + "learning_rate": 4.652813597792022e-06, + "loss": 0.5668, + "step": 3539 + }, + { + "epoch": 0.5216110019646365, + "grad_norm": 0.5780352354049683, + "learning_rate": 4.65261646924515e-06, + "loss": 0.5654, + "step": 3540 + }, + { + "epoch": 0.5217583497053045, + "grad_norm": 0.5846579670906067, + "learning_rate": 4.652419288928861e-06, + "loss": 0.5561, + "step": 3541 + }, + { + "epoch": 0.5219056974459725, + "grad_norm": 0.5544121265411377, + "learning_rate": 4.652222056847897e-06, + "loss": 0.5447, + "step": 3542 + }, + { + "epoch": 0.5220530451866404, + "grad_norm": 0.5779309272766113, + "learning_rate": 4.652024773007004e-06, + "loss": 0.5766, + "step": 3543 + }, + { + "epoch": 0.5222003929273085, + "grad_norm": 0.5859720706939697, + "learning_rate": 4.651827437410923e-06, + "loss": 0.5527, + "step": 3544 + }, + { + "epoch": 0.5223477406679764, + "grad_norm": 0.5642853379249573, + "learning_rate": 4.651630050064402e-06, + "loss": 0.5741, + "step": 3545 + }, + { + "epoch": 0.5224950884086444, + "grad_norm": 0.5687301158905029, + "learning_rate": 4.6514326109721875e-06, + "loss": 0.555, + "step": 3546 + }, + { + "epoch": 0.5226424361493124, + "grad_norm": 0.5706378221511841, + "learning_rate": 4.651235120139027e-06, + "loss": 0.5494, + "step": 3547 + }, + { + "epoch": 0.5227897838899803, + "grad_norm": 0.6279070377349854, + "learning_rate": 4.651037577569672e-06, + "loss": 0.6169, + "step": 3548 + }, + { + "epoch": 0.5229371316306484, + "grad_norm": 0.5667206645011902, + "learning_rate": 4.6508399832688714e-06, + "loss": 0.5871, + "step": 3549 + }, + { + "epoch": 0.5230844793713163, + "grad_norm": 0.6011213660240173, + "learning_rate": 4.650642337241379e-06, + "loss": 0.6192, + "step": 3550 + }, + { + "epoch": 0.5232318271119842, + "grad_norm": 0.550561785697937, + "learning_rate": 4.650444639491946e-06, + "loss": 0.5826, + "step": 3551 + }, + { + "epoch": 0.5233791748526523, + "grad_norm": 0.5953341126441956, + "learning_rate": 4.650246890025328e-06, + "loss": 0.5944, + "step": 3552 + }, + { + "epoch": 0.5235265225933202, + "grad_norm": 0.5582936406135559, + "learning_rate": 4.650049088846281e-06, + "loss": 0.5831, + "step": 3553 + }, + { + "epoch": 0.5236738703339883, + "grad_norm": 0.6130421161651611, + "learning_rate": 4.6498512359595625e-06, + "loss": 0.6343, + "step": 3554 + }, + { + "epoch": 0.5238212180746562, + "grad_norm": 0.5625283122062683, + "learning_rate": 4.64965333136993e-06, + "loss": 0.5906, + "step": 3555 + }, + { + "epoch": 0.5239685658153241, + "grad_norm": 0.5657013654708862, + "learning_rate": 4.6494553750821424e-06, + "loss": 0.5831, + "step": 3556 + }, + { + "epoch": 0.5241159135559922, + "grad_norm": 0.6204487085342407, + "learning_rate": 4.649257367100962e-06, + "loss": 0.584, + "step": 3557 + }, + { + "epoch": 0.5242632612966601, + "grad_norm": 0.5827760100364685, + "learning_rate": 4.649059307431149e-06, + "loss": 0.5788, + "step": 3558 + }, + { + "epoch": 0.5244106090373281, + "grad_norm": 0.5758901834487915, + "learning_rate": 4.648861196077468e-06, + "loss": 0.5436, + "step": 3559 + }, + { + "epoch": 0.5245579567779961, + "grad_norm": 0.5913146138191223, + "learning_rate": 4.648663033044684e-06, + "loss": 0.589, + "step": 3560 + }, + { + "epoch": 0.524705304518664, + "grad_norm": 0.6301586627960205, + "learning_rate": 4.64846481833756e-06, + "loss": 0.6008, + "step": 3561 + }, + { + "epoch": 0.524852652259332, + "grad_norm": 0.5821130871772766, + "learning_rate": 4.648266551960867e-06, + "loss": 0.5812, + "step": 3562 + }, + { + "epoch": 0.525, + "grad_norm": 0.5833989381790161, + "learning_rate": 4.6480682339193694e-06, + "loss": 0.5851, + "step": 3563 + }, + { + "epoch": 0.525147347740668, + "grad_norm": 0.5899155139923096, + "learning_rate": 4.647869864217839e-06, + "loss": 0.5954, + "step": 3564 + }, + { + "epoch": 0.525294695481336, + "grad_norm": 0.5899690985679626, + "learning_rate": 4.647671442861046e-06, + "loss": 0.5787, + "step": 3565 + }, + { + "epoch": 0.5254420432220039, + "grad_norm": 0.5562404990196228, + "learning_rate": 4.647472969853761e-06, + "loss": 0.5584, + "step": 3566 + }, + { + "epoch": 0.5255893909626719, + "grad_norm": 0.5717750787734985, + "learning_rate": 4.6472744452007586e-06, + "loss": 0.5669, + "step": 3567 + }, + { + "epoch": 0.5257367387033399, + "grad_norm": 0.6189903020858765, + "learning_rate": 4.647075868906814e-06, + "loss": 0.5893, + "step": 3568 + }, + { + "epoch": 0.5258840864440079, + "grad_norm": 0.6105235815048218, + "learning_rate": 4.646877240976702e-06, + "loss": 0.599, + "step": 3569 + }, + { + "epoch": 0.5260314341846758, + "grad_norm": 0.6195973753929138, + "learning_rate": 4.646678561415197e-06, + "loss": 0.6101, + "step": 3570 + }, + { + "epoch": 0.5261787819253438, + "grad_norm": 0.6342945098876953, + "learning_rate": 4.646479830227082e-06, + "loss": 0.5736, + "step": 3571 + }, + { + "epoch": 0.5263261296660118, + "grad_norm": 0.6026301383972168, + "learning_rate": 4.646281047417133e-06, + "loss": 0.5949, + "step": 3572 + }, + { + "epoch": 0.5264734774066797, + "grad_norm": 0.5947911739349365, + "learning_rate": 4.64608221299013e-06, + "loss": 0.582, + "step": 3573 + }, + { + "epoch": 0.5266208251473478, + "grad_norm": 0.5721253156661987, + "learning_rate": 4.6458833269508586e-06, + "loss": 0.6128, + "step": 3574 + }, + { + "epoch": 0.5267681728880157, + "grad_norm": 0.5964721441268921, + "learning_rate": 4.645684389304098e-06, + "loss": 0.6046, + "step": 3575 + }, + { + "epoch": 0.5269155206286837, + "grad_norm": 0.5839972496032715, + "learning_rate": 4.645485400054635e-06, + "loss": 0.5707, + "step": 3576 + }, + { + "epoch": 0.5270628683693517, + "grad_norm": 0.5629241466522217, + "learning_rate": 4.645286359207254e-06, + "loss": 0.5719, + "step": 3577 + }, + { + "epoch": 0.5272102161100196, + "grad_norm": 0.5903350710868835, + "learning_rate": 4.645087266766743e-06, + "loss": 0.5917, + "step": 3578 + }, + { + "epoch": 0.5273575638506877, + "grad_norm": 0.6019978523254395, + "learning_rate": 4.644888122737889e-06, + "loss": 0.5521, + "step": 3579 + }, + { + "epoch": 0.5275049115913556, + "grad_norm": 0.6113786697387695, + "learning_rate": 4.644688927125481e-06, + "loss": 0.6175, + "step": 3580 + }, + { + "epoch": 0.5276522593320235, + "grad_norm": 0.5737701654434204, + "learning_rate": 4.644489679934311e-06, + "loss": 0.5943, + "step": 3581 + }, + { + "epoch": 0.5277996070726916, + "grad_norm": 0.5906572937965393, + "learning_rate": 4.64429038116917e-06, + "loss": 0.5773, + "step": 3582 + }, + { + "epoch": 0.5279469548133595, + "grad_norm": 0.6015275120735168, + "learning_rate": 4.644091030834851e-06, + "loss": 0.5781, + "step": 3583 + }, + { + "epoch": 0.5280943025540275, + "grad_norm": 0.5682691335678101, + "learning_rate": 4.643891628936148e-06, + "loss": 0.5594, + "step": 3584 + }, + { + "epoch": 0.5282416502946955, + "grad_norm": 0.5684455037117004, + "learning_rate": 4.643692175477857e-06, + "loss": 0.5543, + "step": 3585 + }, + { + "epoch": 0.5283889980353634, + "grad_norm": 0.6046398282051086, + "learning_rate": 4.643492670464775e-06, + "loss": 0.5847, + "step": 3586 + }, + { + "epoch": 0.5285363457760315, + "grad_norm": 0.5656443238258362, + "learning_rate": 4.643293113901699e-06, + "loss": 0.5443, + "step": 3587 + }, + { + "epoch": 0.5286836935166994, + "grad_norm": 0.5808804631233215, + "learning_rate": 4.6430935057934294e-06, + "loss": 0.5575, + "step": 3588 + }, + { + "epoch": 0.5288310412573674, + "grad_norm": 0.562021791934967, + "learning_rate": 4.6428938461447655e-06, + "loss": 0.61, + "step": 3589 + }, + { + "epoch": 0.5289783889980354, + "grad_norm": 0.5946148037910461, + "learning_rate": 4.6426941349605105e-06, + "loss": 0.5803, + "step": 3590 + }, + { + "epoch": 0.5291257367387033, + "grad_norm": 0.566230297088623, + "learning_rate": 4.642494372245466e-06, + "loss": 0.5658, + "step": 3591 + }, + { + "epoch": 0.5292730844793713, + "grad_norm": 0.5914976596832275, + "learning_rate": 4.642294558004438e-06, + "loss": 0.5642, + "step": 3592 + }, + { + "epoch": 0.5294204322200393, + "grad_norm": 0.5877007246017456, + "learning_rate": 4.642094692242229e-06, + "loss": 0.5597, + "step": 3593 + }, + { + "epoch": 0.5295677799607073, + "grad_norm": 0.5898883938789368, + "learning_rate": 4.641894774963648e-06, + "loss": 0.5999, + "step": 3594 + }, + { + "epoch": 0.5297151277013753, + "grad_norm": 0.5917782783508301, + "learning_rate": 4.641694806173502e-06, + "loss": 0.5855, + "step": 3595 + }, + { + "epoch": 0.5298624754420432, + "grad_norm": 0.5681557655334473, + "learning_rate": 4.641494785876601e-06, + "loss": 0.5633, + "step": 3596 + }, + { + "epoch": 0.5300098231827112, + "grad_norm": 0.6277133226394653, + "learning_rate": 4.641294714077754e-06, + "loss": 0.5704, + "step": 3597 + }, + { + "epoch": 0.5301571709233792, + "grad_norm": 0.5619063377380371, + "learning_rate": 4.641094590781774e-06, + "loss": 0.5744, + "step": 3598 + }, + { + "epoch": 0.5303045186640472, + "grad_norm": 0.5844464898109436, + "learning_rate": 4.640894415993474e-06, + "loss": 0.5883, + "step": 3599 + }, + { + "epoch": 0.5304518664047151, + "grad_norm": 0.5707536339759827, + "learning_rate": 4.640694189717666e-06, + "loss": 0.5985, + "step": 3600 + }, + { + "epoch": 0.5305992141453831, + "grad_norm": 0.565514326095581, + "learning_rate": 4.640493911959168e-06, + "loss": 0.5581, + "step": 3601 + }, + { + "epoch": 0.5307465618860511, + "grad_norm": 0.5854447484016418, + "learning_rate": 4.6402935827227945e-06, + "loss": 0.5447, + "step": 3602 + }, + { + "epoch": 0.530893909626719, + "grad_norm": 0.6254565715789795, + "learning_rate": 4.640093202013365e-06, + "loss": 0.588, + "step": 3603 + }, + { + "epoch": 0.531041257367387, + "grad_norm": 0.5734483599662781, + "learning_rate": 4.639892769835697e-06, + "loss": 0.6071, + "step": 3604 + }, + { + "epoch": 0.531188605108055, + "grad_norm": 0.5901880264282227, + "learning_rate": 4.639692286194612e-06, + "loss": 0.6113, + "step": 3605 + }, + { + "epoch": 0.531335952848723, + "grad_norm": 0.5696500539779663, + "learning_rate": 4.639491751094931e-06, + "loss": 0.578, + "step": 3606 + }, + { + "epoch": 0.531483300589391, + "grad_norm": 0.6160576343536377, + "learning_rate": 4.639291164541476e-06, + "loss": 0.5965, + "step": 3607 + }, + { + "epoch": 0.5316306483300589, + "grad_norm": 0.5826092958450317, + "learning_rate": 4.639090526539073e-06, + "loss": 0.6077, + "step": 3608 + }, + { + "epoch": 0.531777996070727, + "grad_norm": 0.5941972136497498, + "learning_rate": 4.638889837092546e-06, + "loss": 0.588, + "step": 3609 + }, + { + "epoch": 0.5319253438113949, + "grad_norm": 0.6169887781143188, + "learning_rate": 4.638689096206721e-06, + "loss": 0.5827, + "step": 3610 + }, + { + "epoch": 0.5320726915520628, + "grad_norm": 0.5906368494033813, + "learning_rate": 4.638488303886427e-06, + "loss": 0.6069, + "step": 3611 + }, + { + "epoch": 0.5322200392927309, + "grad_norm": 0.5793556571006775, + "learning_rate": 4.6382874601364925e-06, + "loss": 0.5897, + "step": 3612 + }, + { + "epoch": 0.5323673870333988, + "grad_norm": 0.5942623019218445, + "learning_rate": 4.638086564961747e-06, + "loss": 0.591, + "step": 3613 + }, + { + "epoch": 0.5325147347740667, + "grad_norm": 0.6158429980278015, + "learning_rate": 4.637885618367023e-06, + "loss": 0.5621, + "step": 3614 + }, + { + "epoch": 0.5326620825147348, + "grad_norm": 0.6055457592010498, + "learning_rate": 4.637684620357152e-06, + "loss": 0.5706, + "step": 3615 + }, + { + "epoch": 0.5328094302554027, + "grad_norm": 0.613112211227417, + "learning_rate": 4.637483570936968e-06, + "loss": 0.5781, + "step": 3616 + }, + { + "epoch": 0.5329567779960708, + "grad_norm": 0.5867196321487427, + "learning_rate": 4.6372824701113075e-06, + "loss": 0.5512, + "step": 3617 + }, + { + "epoch": 0.5331041257367387, + "grad_norm": 0.5736981630325317, + "learning_rate": 4.637081317885006e-06, + "loss": 0.5964, + "step": 3618 + }, + { + "epoch": 0.5332514734774066, + "grad_norm": 0.5677937269210815, + "learning_rate": 4.636880114262901e-06, + "loss": 0.5994, + "step": 3619 + }, + { + "epoch": 0.5333988212180747, + "grad_norm": 0.5864701867103577, + "learning_rate": 4.636678859249831e-06, + "loss": 0.5833, + "step": 3620 + }, + { + "epoch": 0.5335461689587426, + "grad_norm": 0.5683903098106384, + "learning_rate": 4.636477552850638e-06, + "loss": 0.5478, + "step": 3621 + }, + { + "epoch": 0.5336935166994106, + "grad_norm": 0.5712646842002869, + "learning_rate": 4.636276195070161e-06, + "loss": 0.5801, + "step": 3622 + }, + { + "epoch": 0.5338408644400786, + "grad_norm": 0.5741866230964661, + "learning_rate": 4.636074785913243e-06, + "loss": 0.6084, + "step": 3623 + }, + { + "epoch": 0.5339882121807465, + "grad_norm": 0.5709436535835266, + "learning_rate": 4.635873325384729e-06, + "loss": 0.6054, + "step": 3624 + }, + { + "epoch": 0.5341355599214146, + "grad_norm": 0.5719617605209351, + "learning_rate": 4.635671813489463e-06, + "loss": 0.5761, + "step": 3625 + }, + { + "epoch": 0.5342829076620825, + "grad_norm": 0.6171509027481079, + "learning_rate": 4.635470250232292e-06, + "loss": 0.5985, + "step": 3626 + }, + { + "epoch": 0.5344302554027505, + "grad_norm": 0.5426247715950012, + "learning_rate": 4.635268635618062e-06, + "loss": 0.5516, + "step": 3627 + }, + { + "epoch": 0.5345776031434185, + "grad_norm": 0.5798994302749634, + "learning_rate": 4.635066969651624e-06, + "loss": 0.5715, + "step": 3628 + }, + { + "epoch": 0.5347249508840864, + "grad_norm": 0.6430162191390991, + "learning_rate": 4.634865252337826e-06, + "loss": 0.5942, + "step": 3629 + }, + { + "epoch": 0.5348722986247544, + "grad_norm": 0.6091779470443726, + "learning_rate": 4.634663483681521e-06, + "loss": 0.5711, + "step": 3630 + }, + { + "epoch": 0.5350196463654224, + "grad_norm": 0.5896491408348083, + "learning_rate": 4.634461663687559e-06, + "loss": 0.571, + "step": 3631 + }, + { + "epoch": 0.5351669941060904, + "grad_norm": 0.5953497290611267, + "learning_rate": 4.634259792360797e-06, + "loss": 0.592, + "step": 3632 + }, + { + "epoch": 0.5353143418467583, + "grad_norm": 0.5932406187057495, + "learning_rate": 4.6340578697060864e-06, + "loss": 0.5986, + "step": 3633 + }, + { + "epoch": 0.5354616895874263, + "grad_norm": 0.5762302875518799, + "learning_rate": 4.633855895728285e-06, + "loss": 0.5478, + "step": 3634 + }, + { + "epoch": 0.5356090373280943, + "grad_norm": 0.5723639130592346, + "learning_rate": 4.633653870432251e-06, + "loss": 0.6046, + "step": 3635 + }, + { + "epoch": 0.5357563850687623, + "grad_norm": 0.6040847897529602, + "learning_rate": 4.633451793822842e-06, + "loss": 0.5851, + "step": 3636 + }, + { + "epoch": 0.5359037328094303, + "grad_norm": 0.5792104005813599, + "learning_rate": 4.6332496659049175e-06, + "loss": 0.5591, + "step": 3637 + }, + { + "epoch": 0.5360510805500982, + "grad_norm": 0.5845980048179626, + "learning_rate": 4.63304748668334e-06, + "loss": 0.6073, + "step": 3638 + }, + { + "epoch": 0.5361984282907662, + "grad_norm": 0.5696987509727478, + "learning_rate": 4.63284525616297e-06, + "loss": 0.5373, + "step": 3639 + }, + { + "epoch": 0.5363457760314342, + "grad_norm": 0.605255126953125, + "learning_rate": 4.632642974348672e-06, + "loss": 0.5866, + "step": 3640 + }, + { + "epoch": 0.5364931237721021, + "grad_norm": 0.5960814952850342, + "learning_rate": 4.632440641245311e-06, + "loss": 0.5711, + "step": 3641 + }, + { + "epoch": 0.5366404715127702, + "grad_norm": 0.604557454586029, + "learning_rate": 4.632238256857753e-06, + "loss": 0.5712, + "step": 3642 + }, + { + "epoch": 0.5367878192534381, + "grad_norm": 0.5690956711769104, + "learning_rate": 4.632035821190865e-06, + "loss": 0.5963, + "step": 3643 + }, + { + "epoch": 0.536935166994106, + "grad_norm": 0.6034788489341736, + "learning_rate": 4.631833334249515e-06, + "loss": 0.5906, + "step": 3644 + }, + { + "epoch": 0.5370825147347741, + "grad_norm": 0.5848168730735779, + "learning_rate": 4.631630796038573e-06, + "loss": 0.5739, + "step": 3645 + }, + { + "epoch": 0.537229862475442, + "grad_norm": 0.5752002596855164, + "learning_rate": 4.631428206562911e-06, + "loss": 0.5996, + "step": 3646 + }, + { + "epoch": 0.5373772102161101, + "grad_norm": 0.5728636980056763, + "learning_rate": 4.6312255658274e-06, + "loss": 0.5982, + "step": 3647 + }, + { + "epoch": 0.537524557956778, + "grad_norm": 0.5646301507949829, + "learning_rate": 4.631022873836913e-06, + "loss": 0.5859, + "step": 3648 + }, + { + "epoch": 0.5376719056974459, + "grad_norm": 0.5810279846191406, + "learning_rate": 4.630820130596327e-06, + "loss": 0.555, + "step": 3649 + }, + { + "epoch": 0.537819253438114, + "grad_norm": 0.5598205924034119, + "learning_rate": 4.630617336110515e-06, + "loss": 0.5851, + "step": 3650 + }, + { + "epoch": 0.5379666011787819, + "grad_norm": 0.5795870423316956, + "learning_rate": 4.630414490384356e-06, + "loss": 0.601, + "step": 3651 + }, + { + "epoch": 0.5381139489194499, + "grad_norm": 0.5633527040481567, + "learning_rate": 4.630211593422727e-06, + "loss": 0.5877, + "step": 3652 + }, + { + "epoch": 0.5382612966601179, + "grad_norm": 0.5626966953277588, + "learning_rate": 4.630008645230509e-06, + "loss": 0.5685, + "step": 3653 + }, + { + "epoch": 0.5384086444007858, + "grad_norm": 0.5814893841743469, + "learning_rate": 4.629805645812582e-06, + "loss": 0.5437, + "step": 3654 + }, + { + "epoch": 0.5385559921414538, + "grad_norm": 0.5779227614402771, + "learning_rate": 4.629602595173828e-06, + "loss": 0.5609, + "step": 3655 + }, + { + "epoch": 0.5387033398821218, + "grad_norm": 0.5960619449615479, + "learning_rate": 4.62939949331913e-06, + "loss": 0.5542, + "step": 3656 + }, + { + "epoch": 0.5388506876227898, + "grad_norm": 0.5786157250404358, + "learning_rate": 4.629196340253373e-06, + "loss": 0.5775, + "step": 3657 + }, + { + "epoch": 0.5389980353634578, + "grad_norm": 0.6164985299110413, + "learning_rate": 4.628993135981444e-06, + "loss": 0.5958, + "step": 3658 + }, + { + "epoch": 0.5391453831041257, + "grad_norm": 0.5838773846626282, + "learning_rate": 4.628789880508228e-06, + "loss": 0.5732, + "step": 3659 + }, + { + "epoch": 0.5392927308447937, + "grad_norm": 0.5768316388130188, + "learning_rate": 4.628586573838613e-06, + "loss": 0.5543, + "step": 3660 + }, + { + "epoch": 0.5394400785854617, + "grad_norm": 0.5773771405220032, + "learning_rate": 4.62838321597749e-06, + "loss": 0.5727, + "step": 3661 + }, + { + "epoch": 0.5395874263261297, + "grad_norm": 0.5880194306373596, + "learning_rate": 4.628179806929748e-06, + "loss": 0.6125, + "step": 3662 + }, + { + "epoch": 0.5397347740667976, + "grad_norm": 0.5703318119049072, + "learning_rate": 4.627976346700281e-06, + "loss": 0.5601, + "step": 3663 + }, + { + "epoch": 0.5398821218074656, + "grad_norm": 0.5684415698051453, + "learning_rate": 4.62777283529398e-06, + "loss": 0.5672, + "step": 3664 + }, + { + "epoch": 0.5400294695481336, + "grad_norm": 0.601350724697113, + "learning_rate": 4.627569272715741e-06, + "loss": 0.5902, + "step": 3665 + }, + { + "epoch": 0.5401768172888016, + "grad_norm": 0.6155059933662415, + "learning_rate": 4.627365658970459e-06, + "loss": 0.5474, + "step": 3666 + }, + { + "epoch": 0.5403241650294696, + "grad_norm": 0.5866442918777466, + "learning_rate": 4.627161994063031e-06, + "loss": 0.5933, + "step": 3667 + }, + { + "epoch": 0.5404715127701375, + "grad_norm": 0.610327422618866, + "learning_rate": 4.626958277998354e-06, + "loss": 0.5461, + "step": 3668 + }, + { + "epoch": 0.5406188605108055, + "grad_norm": 0.5796355605125427, + "learning_rate": 4.626754510781328e-06, + "loss": 0.5659, + "step": 3669 + }, + { + "epoch": 0.5407662082514735, + "grad_norm": 0.5582363605499268, + "learning_rate": 4.626550692416854e-06, + "loss": 0.5749, + "step": 3670 + }, + { + "epoch": 0.5409135559921414, + "grad_norm": 0.6008680462837219, + "learning_rate": 4.626346822909832e-06, + "loss": 0.5784, + "step": 3671 + }, + { + "epoch": 0.5410609037328095, + "grad_norm": 0.6159101128578186, + "learning_rate": 4.6261429022651675e-06, + "loss": 0.5727, + "step": 3672 + }, + { + "epoch": 0.5412082514734774, + "grad_norm": 0.5963539481163025, + "learning_rate": 4.625938930487763e-06, + "loss": 0.5649, + "step": 3673 + }, + { + "epoch": 0.5413555992141453, + "grad_norm": 0.6002902388572693, + "learning_rate": 4.625734907582524e-06, + "loss": 0.5796, + "step": 3674 + }, + { + "epoch": 0.5415029469548134, + "grad_norm": 0.5899228453636169, + "learning_rate": 4.625530833554358e-06, + "loss": 0.5688, + "step": 3675 + }, + { + "epoch": 0.5416502946954813, + "grad_norm": 0.5806033611297607, + "learning_rate": 4.625326708408172e-06, + "loss": 0.549, + "step": 3676 + }, + { + "epoch": 0.5417976424361494, + "grad_norm": 0.5932361483573914, + "learning_rate": 4.625122532148876e-06, + "loss": 0.5711, + "step": 3677 + }, + { + "epoch": 0.5419449901768173, + "grad_norm": 0.5715816020965576, + "learning_rate": 4.624918304781379e-06, + "loss": 0.578, + "step": 3678 + }, + { + "epoch": 0.5420923379174852, + "grad_norm": 0.56363445520401, + "learning_rate": 4.624714026310595e-06, + "loss": 0.5719, + "step": 3679 + }, + { + "epoch": 0.5422396856581533, + "grad_norm": 0.570804238319397, + "learning_rate": 4.624509696741434e-06, + "loss": 0.5997, + "step": 3680 + }, + { + "epoch": 0.5423870333988212, + "grad_norm": 0.5857297778129578, + "learning_rate": 4.624305316078812e-06, + "loss": 0.5909, + "step": 3681 + }, + { + "epoch": 0.5425343811394892, + "grad_norm": 0.612224280834198, + "learning_rate": 4.624100884327642e-06, + "loss": 0.5825, + "step": 3682 + }, + { + "epoch": 0.5426817288801572, + "grad_norm": 0.5953033566474915, + "learning_rate": 4.623896401492844e-06, + "loss": 0.5556, + "step": 3683 + }, + { + "epoch": 0.5428290766208251, + "grad_norm": 0.5835374593734741, + "learning_rate": 4.623691867579333e-06, + "loss": 0.5871, + "step": 3684 + }, + { + "epoch": 0.5429764243614931, + "grad_norm": 0.6081621050834656, + "learning_rate": 4.623487282592028e-06, + "loss": 0.5771, + "step": 3685 + }, + { + "epoch": 0.5431237721021611, + "grad_norm": 0.5931099653244019, + "learning_rate": 4.62328264653585e-06, + "loss": 0.5825, + "step": 3686 + }, + { + "epoch": 0.543271119842829, + "grad_norm": 0.6275483965873718, + "learning_rate": 4.62307795941572e-06, + "loss": 0.5937, + "step": 3687 + }, + { + "epoch": 0.5434184675834971, + "grad_norm": 0.6032069325447083, + "learning_rate": 4.6228732212365615e-06, + "loss": 0.5756, + "step": 3688 + }, + { + "epoch": 0.543565815324165, + "grad_norm": 0.5859166383743286, + "learning_rate": 4.622668432003298e-06, + "loss": 0.5327, + "step": 3689 + }, + { + "epoch": 0.543713163064833, + "grad_norm": 0.5946712493896484, + "learning_rate": 4.622463591720854e-06, + "loss": 0.5961, + "step": 3690 + }, + { + "epoch": 0.543860510805501, + "grad_norm": 0.5855820178985596, + "learning_rate": 4.622258700394155e-06, + "loss": 0.5687, + "step": 3691 + }, + { + "epoch": 0.544007858546169, + "grad_norm": 0.622073769569397, + "learning_rate": 4.622053758028131e-06, + "loss": 0.5813, + "step": 3692 + }, + { + "epoch": 0.5441552062868369, + "grad_norm": 0.5975779294967651, + "learning_rate": 4.62184876462771e-06, + "loss": 0.5862, + "step": 3693 + }, + { + "epoch": 0.5443025540275049, + "grad_norm": 0.6154404282569885, + "learning_rate": 4.62164372019782e-06, + "loss": 0.5608, + "step": 3694 + }, + { + "epoch": 0.5444499017681729, + "grad_norm": 0.5686171054840088, + "learning_rate": 4.621438624743394e-06, + "loss": 0.5909, + "step": 3695 + }, + { + "epoch": 0.5445972495088409, + "grad_norm": 0.6283589005470276, + "learning_rate": 4.621233478269364e-06, + "loss": 0.5022, + "step": 3696 + }, + { + "epoch": 0.5447445972495089, + "grad_norm": 0.6124129891395569, + "learning_rate": 4.621028280780664e-06, + "loss": 0.585, + "step": 3697 + }, + { + "epoch": 0.5448919449901768, + "grad_norm": 0.5876212120056152, + "learning_rate": 4.620823032282228e-06, + "loss": 0.5889, + "step": 3698 + }, + { + "epoch": 0.5450392927308448, + "grad_norm": 0.5912489891052246, + "learning_rate": 4.620617732778994e-06, + "loss": 0.5671, + "step": 3699 + }, + { + "epoch": 0.5451866404715128, + "grad_norm": 0.5682145953178406, + "learning_rate": 4.620412382275897e-06, + "loss": 0.5574, + "step": 3700 + }, + { + "epoch": 0.5453339882121807, + "grad_norm": 0.5864341855049133, + "learning_rate": 4.6202069807778775e-06, + "loss": 0.598, + "step": 3701 + }, + { + "epoch": 0.5454813359528488, + "grad_norm": 0.5966439843177795, + "learning_rate": 4.6200015282898744e-06, + "loss": 0.6094, + "step": 3702 + }, + { + "epoch": 0.5456286836935167, + "grad_norm": 0.5829799175262451, + "learning_rate": 4.61979602481683e-06, + "loss": 0.5886, + "step": 3703 + }, + { + "epoch": 0.5457760314341846, + "grad_norm": 0.6229490637779236, + "learning_rate": 4.619590470363683e-06, + "loss": 0.6134, + "step": 3704 + }, + { + "epoch": 0.5459233791748527, + "grad_norm": 0.5775903463363647, + "learning_rate": 4.619384864935381e-06, + "loss": 0.5521, + "step": 3705 + }, + { + "epoch": 0.5460707269155206, + "grad_norm": 0.5701427459716797, + "learning_rate": 4.619179208536867e-06, + "loss": 0.5584, + "step": 3706 + }, + { + "epoch": 0.5462180746561887, + "grad_norm": 0.5710622668266296, + "learning_rate": 4.618973501173087e-06, + "loss": 0.5672, + "step": 3707 + }, + { + "epoch": 0.5463654223968566, + "grad_norm": 0.6024094223976135, + "learning_rate": 4.618767742848988e-06, + "loss": 0.5528, + "step": 3708 + }, + { + "epoch": 0.5465127701375245, + "grad_norm": 0.5780733227729797, + "learning_rate": 4.618561933569518e-06, + "loss": 0.6199, + "step": 3709 + }, + { + "epoch": 0.5466601178781926, + "grad_norm": 0.6086109280586243, + "learning_rate": 4.618356073339627e-06, + "loss": 0.5877, + "step": 3710 + }, + { + "epoch": 0.5468074656188605, + "grad_norm": 0.6160051226615906, + "learning_rate": 4.618150162164266e-06, + "loss": 0.5406, + "step": 3711 + }, + { + "epoch": 0.5469548133595284, + "grad_norm": 0.5978252291679382, + "learning_rate": 4.617944200048388e-06, + "loss": 0.5698, + "step": 3712 + }, + { + "epoch": 0.5471021611001965, + "grad_norm": 0.5996285676956177, + "learning_rate": 4.617738186996945e-06, + "loss": 0.5817, + "step": 3713 + }, + { + "epoch": 0.5472495088408644, + "grad_norm": 0.611845850944519, + "learning_rate": 4.617532123014892e-06, + "loss": 0.548, + "step": 3714 + }, + { + "epoch": 0.5473968565815324, + "grad_norm": 0.5783276557922363, + "learning_rate": 4.6173260081071845e-06, + "loss": 0.5915, + "step": 3715 + }, + { + "epoch": 0.5475442043222004, + "grad_norm": 0.5737800598144531, + "learning_rate": 4.61711984227878e-06, + "loss": 0.5748, + "step": 3716 + }, + { + "epoch": 0.5476915520628683, + "grad_norm": 0.6155348420143127, + "learning_rate": 4.616913625534635e-06, + "loss": 0.6008, + "step": 3717 + }, + { + "epoch": 0.5478388998035364, + "grad_norm": 0.5667041540145874, + "learning_rate": 4.616707357879712e-06, + "loss": 0.5694, + "step": 3718 + }, + { + "epoch": 0.5479862475442043, + "grad_norm": 0.6203026175498962, + "learning_rate": 4.616501039318968e-06, + "loss": 0.5596, + "step": 3719 + }, + { + "epoch": 0.5481335952848723, + "grad_norm": 0.5766637325286865, + "learning_rate": 4.616294669857368e-06, + "loss": 0.5867, + "step": 3720 + }, + { + "epoch": 0.5482809430255403, + "grad_norm": 0.5942962169647217, + "learning_rate": 4.616088249499873e-06, + "loss": 0.5425, + "step": 3721 + }, + { + "epoch": 0.5484282907662082, + "grad_norm": 0.5840093493461609, + "learning_rate": 4.615881778251448e-06, + "loss": 0.5981, + "step": 3722 + }, + { + "epoch": 0.5485756385068762, + "grad_norm": 0.5872620940208435, + "learning_rate": 4.61567525611706e-06, + "loss": 0.5763, + "step": 3723 + }, + { + "epoch": 0.5487229862475442, + "grad_norm": 0.5915656089782715, + "learning_rate": 4.615468683101674e-06, + "loss": 0.6129, + "step": 3724 + }, + { + "epoch": 0.5488703339882122, + "grad_norm": 0.6127053499221802, + "learning_rate": 4.615262059210257e-06, + "loss": 0.5785, + "step": 3725 + }, + { + "epoch": 0.5490176817288801, + "grad_norm": 0.5433999300003052, + "learning_rate": 4.615055384447781e-06, + "loss": 0.5558, + "step": 3726 + }, + { + "epoch": 0.5491650294695481, + "grad_norm": 0.5983808040618896, + "learning_rate": 4.614848658819214e-06, + "loss": 0.5805, + "step": 3727 + }, + { + "epoch": 0.5493123772102161, + "grad_norm": 0.6026332974433899, + "learning_rate": 4.614641882329529e-06, + "loss": 0.6175, + "step": 3728 + }, + { + "epoch": 0.5494597249508841, + "grad_norm": 0.5693671107292175, + "learning_rate": 4.614435054983699e-06, + "loss": 0.5696, + "step": 3729 + }, + { + "epoch": 0.5496070726915521, + "grad_norm": 0.5635527968406677, + "learning_rate": 4.614228176786698e-06, + "loss": 0.576, + "step": 3730 + }, + { + "epoch": 0.54975442043222, + "grad_norm": 0.6113621592521667, + "learning_rate": 4.6140212477435e-06, + "loss": 0.5911, + "step": 3731 + }, + { + "epoch": 0.549901768172888, + "grad_norm": 0.5602743029594421, + "learning_rate": 4.613814267859083e-06, + "loss": 0.5632, + "step": 3732 + }, + { + "epoch": 0.550049115913556, + "grad_norm": 0.6217567920684814, + "learning_rate": 4.613607237138425e-06, + "loss": 0.5747, + "step": 3733 + }, + { + "epoch": 0.5501964636542239, + "grad_norm": 0.5930169224739075, + "learning_rate": 4.613400155586503e-06, + "loss": 0.6169, + "step": 3734 + }, + { + "epoch": 0.550343811394892, + "grad_norm": 0.5969391465187073, + "learning_rate": 4.613193023208299e-06, + "loss": 0.5704, + "step": 3735 + }, + { + "epoch": 0.5504911591355599, + "grad_norm": 0.5915680527687073, + "learning_rate": 4.612985840008793e-06, + "loss": 0.5975, + "step": 3736 + }, + { + "epoch": 0.550638506876228, + "grad_norm": 0.5704878568649292, + "learning_rate": 4.61277860599297e-06, + "loss": 0.582, + "step": 3737 + }, + { + "epoch": 0.5507858546168959, + "grad_norm": 0.6525079607963562, + "learning_rate": 4.612571321165813e-06, + "loss": 0.5653, + "step": 3738 + }, + { + "epoch": 0.5509332023575638, + "grad_norm": 0.5727036595344543, + "learning_rate": 4.6123639855323045e-06, + "loss": 0.5725, + "step": 3739 + }, + { + "epoch": 0.5510805500982319, + "grad_norm": 0.6082006096839905, + "learning_rate": 4.6121565990974345e-06, + "loss": 0.6107, + "step": 3740 + }, + { + "epoch": 0.5512278978388998, + "grad_norm": 0.5658292770385742, + "learning_rate": 4.611949161866188e-06, + "loss": 0.5682, + "step": 3741 + }, + { + "epoch": 0.5513752455795677, + "grad_norm": 0.5852732062339783, + "learning_rate": 4.611741673843555e-06, + "loss": 0.5978, + "step": 3742 + }, + { + "epoch": 0.5515225933202358, + "grad_norm": 0.5749489068984985, + "learning_rate": 4.6115341350345256e-06, + "loss": 0.5553, + "step": 3743 + }, + { + "epoch": 0.5516699410609037, + "grad_norm": 0.601689338684082, + "learning_rate": 4.611326545444091e-06, + "loss": 0.6005, + "step": 3744 + }, + { + "epoch": 0.5518172888015717, + "grad_norm": 0.5882197618484497, + "learning_rate": 4.611118905077242e-06, + "loss": 0.5902, + "step": 3745 + }, + { + "epoch": 0.5519646365422397, + "grad_norm": 0.5880776643753052, + "learning_rate": 4.610911213938974e-06, + "loss": 0.5471, + "step": 3746 + }, + { + "epoch": 0.5521119842829076, + "grad_norm": 0.6170170307159424, + "learning_rate": 4.6107034720342824e-06, + "loss": 0.5873, + "step": 3747 + }, + { + "epoch": 0.5522593320235757, + "grad_norm": 0.598755419254303, + "learning_rate": 4.610495679368161e-06, + "loss": 0.5687, + "step": 3748 + }, + { + "epoch": 0.5524066797642436, + "grad_norm": 0.558355987071991, + "learning_rate": 4.6102878359456095e-06, + "loss": 0.5741, + "step": 3749 + }, + { + "epoch": 0.5525540275049116, + "grad_norm": 0.5914067029953003, + "learning_rate": 4.610079941771624e-06, + "loss": 0.5861, + "step": 3750 + }, + { + "epoch": 0.5527013752455796, + "grad_norm": 0.5685051679611206, + "learning_rate": 4.6098719968512065e-06, + "loss": 0.5528, + "step": 3751 + }, + { + "epoch": 0.5528487229862475, + "grad_norm": 0.6590768098831177, + "learning_rate": 4.609664001189357e-06, + "loss": 0.5866, + "step": 3752 + }, + { + "epoch": 0.5529960707269155, + "grad_norm": 0.5848571062088013, + "learning_rate": 4.609455954791078e-06, + "loss": 0.5615, + "step": 3753 + }, + { + "epoch": 0.5531434184675835, + "grad_norm": 0.5845603942871094, + "learning_rate": 4.609247857661372e-06, + "loss": 0.5917, + "step": 3754 + }, + { + "epoch": 0.5532907662082515, + "grad_norm": 0.5947001576423645, + "learning_rate": 4.609039709805244e-06, + "loss": 0.5675, + "step": 3755 + }, + { + "epoch": 0.5534381139489194, + "grad_norm": 0.5921224355697632, + "learning_rate": 4.608831511227701e-06, + "loss": 0.5908, + "step": 3756 + }, + { + "epoch": 0.5535854616895874, + "grad_norm": 0.595126748085022, + "learning_rate": 4.608623261933749e-06, + "loss": 0.5733, + "step": 3757 + }, + { + "epoch": 0.5537328094302554, + "grad_norm": 0.5751464366912842, + "learning_rate": 4.6084149619283966e-06, + "loss": 0.57, + "step": 3758 + }, + { + "epoch": 0.5538801571709234, + "grad_norm": 0.5960691571235657, + "learning_rate": 4.608206611216654e-06, + "loss": 0.5713, + "step": 3759 + }, + { + "epoch": 0.5540275049115914, + "grad_norm": 0.604410707950592, + "learning_rate": 4.60799820980353e-06, + "loss": 0.5545, + "step": 3760 + }, + { + "epoch": 0.5541748526522593, + "grad_norm": 0.5460623502731323, + "learning_rate": 4.607789757694038e-06, + "loss": 0.5981, + "step": 3761 + }, + { + "epoch": 0.5543222003929273, + "grad_norm": 0.5673512816429138, + "learning_rate": 4.607581254893191e-06, + "loss": 0.5936, + "step": 3762 + }, + { + "epoch": 0.5544695481335953, + "grad_norm": 0.5840508937835693, + "learning_rate": 4.607372701406004e-06, + "loss": 0.6128, + "step": 3763 + }, + { + "epoch": 0.5546168958742632, + "grad_norm": 0.6220099925994873, + "learning_rate": 4.607164097237491e-06, + "loss": 0.5555, + "step": 3764 + }, + { + "epoch": 0.5547642436149313, + "grad_norm": 0.6163392663002014, + "learning_rate": 4.60695544239267e-06, + "loss": 0.5632, + "step": 3765 + }, + { + "epoch": 0.5549115913555992, + "grad_norm": 0.5789337158203125, + "learning_rate": 4.606746736876559e-06, + "loss": 0.6101, + "step": 3766 + }, + { + "epoch": 0.5550589390962672, + "grad_norm": 0.658064067363739, + "learning_rate": 4.606537980694178e-06, + "loss": 0.6223, + "step": 3767 + }, + { + "epoch": 0.5552062868369352, + "grad_norm": 0.559090793132782, + "learning_rate": 4.606329173850546e-06, + "loss": 0.5564, + "step": 3768 + }, + { + "epoch": 0.5553536345776031, + "grad_norm": 0.5853825807571411, + "learning_rate": 4.606120316350684e-06, + "loss": 0.5956, + "step": 3769 + }, + { + "epoch": 0.5555009823182712, + "grad_norm": 0.5774248242378235, + "learning_rate": 4.605911408199618e-06, + "loss": 0.5919, + "step": 3770 + }, + { + "epoch": 0.5556483300589391, + "grad_norm": 0.5660666227340698, + "learning_rate": 4.605702449402369e-06, + "loss": 0.5922, + "step": 3771 + }, + { + "epoch": 0.555795677799607, + "grad_norm": 0.5958632230758667, + "learning_rate": 4.605493439963965e-06, + "loss": 0.5622, + "step": 3772 + }, + { + "epoch": 0.5559430255402751, + "grad_norm": 0.5955044627189636, + "learning_rate": 4.60528437988943e-06, + "loss": 0.5649, + "step": 3773 + }, + { + "epoch": 0.556090373280943, + "grad_norm": 0.5781503915786743, + "learning_rate": 4.6050752691837944e-06, + "loss": 0.5943, + "step": 3774 + }, + { + "epoch": 0.556237721021611, + "grad_norm": 0.5468112826347351, + "learning_rate": 4.604866107852085e-06, + "loss": 0.5982, + "step": 3775 + }, + { + "epoch": 0.556385068762279, + "grad_norm": 0.5942455530166626, + "learning_rate": 4.604656895899333e-06, + "loss": 0.6087, + "step": 3776 + }, + { + "epoch": 0.5565324165029469, + "grad_norm": 0.5740276575088501, + "learning_rate": 4.60444763333057e-06, + "loss": 0.5618, + "step": 3777 + }, + { + "epoch": 0.556679764243615, + "grad_norm": 0.5935077667236328, + "learning_rate": 4.604238320150829e-06, + "loss": 0.5504, + "step": 3778 + }, + { + "epoch": 0.5568271119842829, + "grad_norm": 0.5820607542991638, + "learning_rate": 4.604028956365142e-06, + "loss": 0.5371, + "step": 3779 + }, + { + "epoch": 0.5569744597249509, + "grad_norm": 0.5953153967857361, + "learning_rate": 4.603819541978547e-06, + "loss": 0.5961, + "step": 3780 + }, + { + "epoch": 0.5571218074656189, + "grad_norm": 0.5686435699462891, + "learning_rate": 4.603610076996078e-06, + "loss": 0.5897, + "step": 3781 + }, + { + "epoch": 0.5572691552062868, + "grad_norm": 0.5686802268028259, + "learning_rate": 4.603400561422773e-06, + "loss": 0.5501, + "step": 3782 + }, + { + "epoch": 0.5574165029469548, + "grad_norm": 0.5968160033226013, + "learning_rate": 4.603190995263672e-06, + "loss": 0.5844, + "step": 3783 + }, + { + "epoch": 0.5575638506876228, + "grad_norm": 0.6171923875808716, + "learning_rate": 4.6029813785238135e-06, + "loss": 0.5742, + "step": 3784 + }, + { + "epoch": 0.5577111984282908, + "grad_norm": 0.6069396138191223, + "learning_rate": 4.602771711208239e-06, + "loss": 0.5976, + "step": 3785 + }, + { + "epoch": 0.5578585461689587, + "grad_norm": 0.5707288384437561, + "learning_rate": 4.602561993321991e-06, + "loss": 0.5582, + "step": 3786 + }, + { + "epoch": 0.5580058939096267, + "grad_norm": 0.6104961633682251, + "learning_rate": 4.602352224870113e-06, + "loss": 0.5872, + "step": 3787 + }, + { + "epoch": 0.5581532416502947, + "grad_norm": 0.5588537454605103, + "learning_rate": 4.6021424058576504e-06, + "loss": 0.5535, + "step": 3788 + }, + { + "epoch": 0.5583005893909627, + "grad_norm": 0.5872684717178345, + "learning_rate": 4.601932536289649e-06, + "loss": 0.594, + "step": 3789 + }, + { + "epoch": 0.5584479371316307, + "grad_norm": 0.5941241383552551, + "learning_rate": 4.601722616171157e-06, + "loss": 0.5601, + "step": 3790 + }, + { + "epoch": 0.5585952848722986, + "grad_norm": 0.6534239053726196, + "learning_rate": 4.60151264550722e-06, + "loss": 0.5587, + "step": 3791 + }, + { + "epoch": 0.5587426326129666, + "grad_norm": 0.5797701478004456, + "learning_rate": 4.60130262430289e-06, + "loss": 0.5763, + "step": 3792 + }, + { + "epoch": 0.5588899803536346, + "grad_norm": 0.5697091221809387, + "learning_rate": 4.6010925525632184e-06, + "loss": 0.5943, + "step": 3793 + }, + { + "epoch": 0.5590373280943025, + "grad_norm": 0.6033008694648743, + "learning_rate": 4.600882430293256e-06, + "loss": 0.604, + "step": 3794 + }, + { + "epoch": 0.5591846758349706, + "grad_norm": 0.6206357479095459, + "learning_rate": 4.6006722574980565e-06, + "loss": 0.5828, + "step": 3795 + }, + { + "epoch": 0.5593320235756385, + "grad_norm": 0.6027121543884277, + "learning_rate": 4.600462034182674e-06, + "loss": 0.5763, + "step": 3796 + }, + { + "epoch": 0.5594793713163064, + "grad_norm": 0.5510358810424805, + "learning_rate": 4.600251760352165e-06, + "loss": 0.55, + "step": 3797 + }, + { + "epoch": 0.5596267190569745, + "grad_norm": 0.5987648963928223, + "learning_rate": 4.6000414360115865e-06, + "loss": 0.5625, + "step": 3798 + }, + { + "epoch": 0.5597740667976424, + "grad_norm": 0.5865952372550964, + "learning_rate": 4.599831061165996e-06, + "loss": 0.5778, + "step": 3799 + }, + { + "epoch": 0.5599214145383105, + "grad_norm": 0.5793768763542175, + "learning_rate": 4.5996206358204534e-06, + "loss": 0.5686, + "step": 3800 + }, + { + "epoch": 0.5600687622789784, + "grad_norm": 0.5893857479095459, + "learning_rate": 4.59941015998002e-06, + "loss": 0.5994, + "step": 3801 + }, + { + "epoch": 0.5602161100196463, + "grad_norm": 0.5984562039375305, + "learning_rate": 4.599199633649756e-06, + "loss": 0.5872, + "step": 3802 + }, + { + "epoch": 0.5603634577603144, + "grad_norm": 0.5699424743652344, + "learning_rate": 4.598989056834726e-06, + "loss": 0.5787, + "step": 3803 + }, + { + "epoch": 0.5605108055009823, + "grad_norm": 0.6069443821907043, + "learning_rate": 4.598778429539994e-06, + "loss": 0.5479, + "step": 3804 + }, + { + "epoch": 0.5606581532416502, + "grad_norm": 0.5900365710258484, + "learning_rate": 4.598567751770624e-06, + "loss": 0.581, + "step": 3805 + }, + { + "epoch": 0.5608055009823183, + "grad_norm": 0.5743827819824219, + "learning_rate": 4.598357023531685e-06, + "loss": 0.5769, + "step": 3806 + }, + { + "epoch": 0.5609528487229862, + "grad_norm": 0.5952962636947632, + "learning_rate": 4.598146244828243e-06, + "loss": 0.5977, + "step": 3807 + }, + { + "epoch": 0.5611001964636543, + "grad_norm": 0.5913264155387878, + "learning_rate": 4.597935415665368e-06, + "loss": 0.6061, + "step": 3808 + }, + { + "epoch": 0.5612475442043222, + "grad_norm": 0.6078745722770691, + "learning_rate": 4.59772453604813e-06, + "loss": 0.5959, + "step": 3809 + }, + { + "epoch": 0.5613948919449901, + "grad_norm": 0.5463706851005554, + "learning_rate": 4.597513605981602e-06, + "loss": 0.5958, + "step": 3810 + }, + { + "epoch": 0.5615422396856582, + "grad_norm": 0.5882440805435181, + "learning_rate": 4.597302625470854e-06, + "loss": 0.5615, + "step": 3811 + }, + { + "epoch": 0.5616895874263261, + "grad_norm": 0.602908194065094, + "learning_rate": 4.597091594520964e-06, + "loss": 0.5827, + "step": 3812 + }, + { + "epoch": 0.5618369351669941, + "grad_norm": 0.563706636428833, + "learning_rate": 4.596880513137002e-06, + "loss": 0.591, + "step": 3813 + }, + { + "epoch": 0.5619842829076621, + "grad_norm": 0.5688768029212952, + "learning_rate": 4.596669381324048e-06, + "loss": 0.5538, + "step": 3814 + }, + { + "epoch": 0.56213163064833, + "grad_norm": 0.6139882802963257, + "learning_rate": 4.59645819908718e-06, + "loss": 0.5961, + "step": 3815 + }, + { + "epoch": 0.562278978388998, + "grad_norm": 0.590126633644104, + "learning_rate": 4.596246966431475e-06, + "loss": 0.563, + "step": 3816 + }, + { + "epoch": 0.562426326129666, + "grad_norm": 0.6281622648239136, + "learning_rate": 4.596035683362012e-06, + "loss": 0.5128, + "step": 3817 + }, + { + "epoch": 0.562573673870334, + "grad_norm": 0.5866178870201111, + "learning_rate": 4.595824349883877e-06, + "loss": 0.5932, + "step": 3818 + }, + { + "epoch": 0.562721021611002, + "grad_norm": 0.591880202293396, + "learning_rate": 4.595612966002147e-06, + "loss": 0.5782, + "step": 3819 + }, + { + "epoch": 0.56286836935167, + "grad_norm": 0.5903836488723755, + "learning_rate": 4.595401531721908e-06, + "loss": 0.6021, + "step": 3820 + }, + { + "epoch": 0.5630157170923379, + "grad_norm": 0.5724878311157227, + "learning_rate": 4.595190047048246e-06, + "loss": 0.5634, + "step": 3821 + }, + { + "epoch": 0.5631630648330059, + "grad_norm": 0.5651399493217468, + "learning_rate": 4.594978511986246e-06, + "loss": 0.5828, + "step": 3822 + }, + { + "epoch": 0.5633104125736739, + "grad_norm": 0.5819764733314514, + "learning_rate": 4.594766926540994e-06, + "loss": 0.5696, + "step": 3823 + }, + { + "epoch": 0.5634577603143418, + "grad_norm": 0.5903043150901794, + "learning_rate": 4.594555290717582e-06, + "loss": 0.586, + "step": 3824 + }, + { + "epoch": 0.5636051080550099, + "grad_norm": 0.5484024882316589, + "learning_rate": 4.594343604521096e-06, + "loss": 0.5749, + "step": 3825 + }, + { + "epoch": 0.5637524557956778, + "grad_norm": 0.5815621614456177, + "learning_rate": 4.594131867956629e-06, + "loss": 0.5711, + "step": 3826 + }, + { + "epoch": 0.5638998035363457, + "grad_norm": 0.5999745726585388, + "learning_rate": 4.593920081029274e-06, + "loss": 0.5459, + "step": 3827 + }, + { + "epoch": 0.5640471512770138, + "grad_norm": 0.6053522825241089, + "learning_rate": 4.593708243744122e-06, + "loss": 0.5903, + "step": 3828 + }, + { + "epoch": 0.5641944990176817, + "grad_norm": 0.5828033685684204, + "learning_rate": 4.593496356106269e-06, + "loss": 0.6015, + "step": 3829 + }, + { + "epoch": 0.5643418467583498, + "grad_norm": 0.5726675391197205, + "learning_rate": 4.59328441812081e-06, + "loss": 0.5675, + "step": 3830 + }, + { + "epoch": 0.5644891944990177, + "grad_norm": 0.6215922832489014, + "learning_rate": 4.593072429792843e-06, + "loss": 0.5884, + "step": 3831 + }, + { + "epoch": 0.5646365422396856, + "grad_norm": 0.5701497197151184, + "learning_rate": 4.592860391127466e-06, + "loss": 0.5899, + "step": 3832 + }, + { + "epoch": 0.5647838899803537, + "grad_norm": 0.5700125694274902, + "learning_rate": 4.592648302129778e-06, + "loss": 0.5847, + "step": 3833 + }, + { + "epoch": 0.5649312377210216, + "grad_norm": 0.5924897789955139, + "learning_rate": 4.59243616280488e-06, + "loss": 0.5544, + "step": 3834 + }, + { + "epoch": 0.5650785854616895, + "grad_norm": 0.6096572279930115, + "learning_rate": 4.592223973157874e-06, + "loss": 0.6066, + "step": 3835 + }, + { + "epoch": 0.5652259332023576, + "grad_norm": 0.6138851046562195, + "learning_rate": 4.592011733193862e-06, + "loss": 0.5873, + "step": 3836 + }, + { + "epoch": 0.5653732809430255, + "grad_norm": 0.5817020535469055, + "learning_rate": 4.59179944291795e-06, + "loss": 0.5757, + "step": 3837 + }, + { + "epoch": 0.5655206286836936, + "grad_norm": 0.5927668213844299, + "learning_rate": 4.5915871023352415e-06, + "loss": 0.5981, + "step": 3838 + }, + { + "epoch": 0.5656679764243615, + "grad_norm": 0.5806077718734741, + "learning_rate": 4.591374711450844e-06, + "loss": 0.5801, + "step": 3839 + }, + { + "epoch": 0.5658153241650294, + "grad_norm": 0.5617873072624207, + "learning_rate": 4.591162270269866e-06, + "loss": 0.5852, + "step": 3840 + }, + { + "epoch": 0.5659626719056975, + "grad_norm": 0.5821503400802612, + "learning_rate": 4.5909497787974175e-06, + "loss": 0.6049, + "step": 3841 + }, + { + "epoch": 0.5661100196463654, + "grad_norm": 0.5995963215827942, + "learning_rate": 4.590737237038606e-06, + "loss": 0.5941, + "step": 3842 + }, + { + "epoch": 0.5662573673870334, + "grad_norm": 0.5788363218307495, + "learning_rate": 4.5905246449985455e-06, + "loss": 0.5967, + "step": 3843 + }, + { + "epoch": 0.5664047151277014, + "grad_norm": 0.6136123538017273, + "learning_rate": 4.590312002682347e-06, + "loss": 0.5461, + "step": 3844 + }, + { + "epoch": 0.5665520628683693, + "grad_norm": 0.5727776885032654, + "learning_rate": 4.590099310095125e-06, + "loss": 0.5651, + "step": 3845 + }, + { + "epoch": 0.5666994106090373, + "grad_norm": 0.5992319583892822, + "learning_rate": 4.589886567241996e-06, + "loss": 0.5757, + "step": 3846 + }, + { + "epoch": 0.5668467583497053, + "grad_norm": 0.5713659524917603, + "learning_rate": 4.589673774128075e-06, + "loss": 0.5867, + "step": 3847 + }, + { + "epoch": 0.5669941060903733, + "grad_norm": 0.5978106260299683, + "learning_rate": 4.589460930758479e-06, + "loss": 0.5619, + "step": 3848 + }, + { + "epoch": 0.5671414538310413, + "grad_norm": 0.5736516118049622, + "learning_rate": 4.589248037138329e-06, + "loss": 0.5686, + "step": 3849 + }, + { + "epoch": 0.5672888015717092, + "grad_norm": 0.5825492739677429, + "learning_rate": 4.589035093272743e-06, + "loss": 0.5821, + "step": 3850 + }, + { + "epoch": 0.5674361493123772, + "grad_norm": 0.5821627974510193, + "learning_rate": 4.5888220991668424e-06, + "loss": 0.5733, + "step": 3851 + }, + { + "epoch": 0.5675834970530452, + "grad_norm": 0.6026495099067688, + "learning_rate": 4.58860905482575e-06, + "loss": 0.5887, + "step": 3852 + }, + { + "epoch": 0.5677308447937132, + "grad_norm": 0.5832722783088684, + "learning_rate": 4.58839596025459e-06, + "loss": 0.5855, + "step": 3853 + }, + { + "epoch": 0.5678781925343811, + "grad_norm": 0.5990504026412964, + "learning_rate": 4.588182815458486e-06, + "loss": 0.595, + "step": 3854 + }, + { + "epoch": 0.5680255402750491, + "grad_norm": 0.5836416482925415, + "learning_rate": 4.587969620442566e-06, + "loss": 0.579, + "step": 3855 + }, + { + "epoch": 0.5681728880157171, + "grad_norm": 0.6017864346504211, + "learning_rate": 4.587756375211955e-06, + "loss": 0.5902, + "step": 3856 + }, + { + "epoch": 0.568320235756385, + "grad_norm": 0.5907999873161316, + "learning_rate": 4.587543079771782e-06, + "loss": 0.5527, + "step": 3857 + }, + { + "epoch": 0.5684675834970531, + "grad_norm": 0.5626004934310913, + "learning_rate": 4.5873297341271775e-06, + "loss": 0.5833, + "step": 3858 + }, + { + "epoch": 0.568614931237721, + "grad_norm": 0.5988888740539551, + "learning_rate": 4.587116338283272e-06, + "loss": 0.5895, + "step": 3859 + }, + { + "epoch": 0.568762278978389, + "grad_norm": 0.5978417992591858, + "learning_rate": 4.586902892245197e-06, + "loss": 0.6029, + "step": 3860 + }, + { + "epoch": 0.568909626719057, + "grad_norm": 0.5953949689865112, + "learning_rate": 4.586689396018087e-06, + "loss": 0.6124, + "step": 3861 + }, + { + "epoch": 0.5690569744597249, + "grad_norm": 0.5639386773109436, + "learning_rate": 4.586475849607075e-06, + "loss": 0.5592, + "step": 3862 + }, + { + "epoch": 0.569204322200393, + "grad_norm": 0.6077260375022888, + "learning_rate": 4.586262253017299e-06, + "loss": 0.6141, + "step": 3863 + }, + { + "epoch": 0.5693516699410609, + "grad_norm": 0.5709747076034546, + "learning_rate": 4.586048606253892e-06, + "loss": 0.5821, + "step": 3864 + }, + { + "epoch": 0.5694990176817288, + "grad_norm": 0.5977672934532166, + "learning_rate": 4.585834909321996e-06, + "loss": 0.5861, + "step": 3865 + }, + { + "epoch": 0.5696463654223969, + "grad_norm": 0.6009902954101562, + "learning_rate": 4.585621162226749e-06, + "loss": 0.5923, + "step": 3866 + }, + { + "epoch": 0.5697937131630648, + "grad_norm": 0.5906940698623657, + "learning_rate": 4.58540736497329e-06, + "loss": 0.5581, + "step": 3867 + }, + { + "epoch": 0.5699410609037328, + "grad_norm": 0.6194680333137512, + "learning_rate": 4.585193517566763e-06, + "loss": 0.591, + "step": 3868 + }, + { + "epoch": 0.5700884086444008, + "grad_norm": 0.6262835264205933, + "learning_rate": 4.584979620012309e-06, + "loss": 0.6191, + "step": 3869 + }, + { + "epoch": 0.5702357563850687, + "grad_norm": 0.6227874159812927, + "learning_rate": 4.584765672315074e-06, + "loss": 0.5897, + "step": 3870 + }, + { + "epoch": 0.5703831041257368, + "grad_norm": 0.6126515865325928, + "learning_rate": 4.584551674480202e-06, + "loss": 0.6016, + "step": 3871 + }, + { + "epoch": 0.5705304518664047, + "grad_norm": 0.5659498572349548, + "learning_rate": 4.5843376265128405e-06, + "loss": 0.5938, + "step": 3872 + }, + { + "epoch": 0.5706777996070727, + "grad_norm": 0.5596170425415039, + "learning_rate": 4.584123528418135e-06, + "loss": 0.5801, + "step": 3873 + }, + { + "epoch": 0.5708251473477407, + "grad_norm": 0.5821897387504578, + "learning_rate": 4.583909380201238e-06, + "loss": 0.5566, + "step": 3874 + }, + { + "epoch": 0.5709724950884086, + "grad_norm": 0.583516001701355, + "learning_rate": 4.583695181867297e-06, + "loss": 0.5913, + "step": 3875 + }, + { + "epoch": 0.5711198428290766, + "grad_norm": 0.5721845626831055, + "learning_rate": 4.5834809334214645e-06, + "loss": 0.6035, + "step": 3876 + }, + { + "epoch": 0.5712671905697446, + "grad_norm": 0.5896309018135071, + "learning_rate": 4.583266634868893e-06, + "loss": 0.5518, + "step": 3877 + }, + { + "epoch": 0.5714145383104126, + "grad_norm": 0.5849339365959167, + "learning_rate": 4.583052286214735e-06, + "loss": 0.5615, + "step": 3878 + }, + { + "epoch": 0.5715618860510806, + "grad_norm": 0.6071663498878479, + "learning_rate": 4.582837887464146e-06, + "loss": 0.5735, + "step": 3879 + }, + { + "epoch": 0.5717092337917485, + "grad_norm": 0.5614022612571716, + "learning_rate": 4.582623438622284e-06, + "loss": 0.5219, + "step": 3880 + }, + { + "epoch": 0.5718565815324165, + "grad_norm": 0.5817151069641113, + "learning_rate": 4.582408939694305e-06, + "loss": 0.5721, + "step": 3881 + }, + { + "epoch": 0.5720039292730845, + "grad_norm": 0.5955167412757874, + "learning_rate": 4.582194390685368e-06, + "loss": 0.6061, + "step": 3882 + }, + { + "epoch": 0.5721512770137525, + "grad_norm": 0.5951298475265503, + "learning_rate": 4.581979791600632e-06, + "loss": 0.5743, + "step": 3883 + }, + { + "epoch": 0.5722986247544204, + "grad_norm": 0.5896925330162048, + "learning_rate": 4.581765142445259e-06, + "loss": 0.6103, + "step": 3884 + }, + { + "epoch": 0.5724459724950884, + "grad_norm": 0.5758142471313477, + "learning_rate": 4.581550443224411e-06, + "loss": 0.5749, + "step": 3885 + }, + { + "epoch": 0.5725933202357564, + "grad_norm": 0.6007471084594727, + "learning_rate": 4.58133569394325e-06, + "loss": 0.5985, + "step": 3886 + }, + { + "epoch": 0.5727406679764243, + "grad_norm": 0.5911087989807129, + "learning_rate": 4.581120894606942e-06, + "loss": 0.5712, + "step": 3887 + }, + { + "epoch": 0.5728880157170924, + "grad_norm": 0.5662864446640015, + "learning_rate": 4.580906045220653e-06, + "loss": 0.5791, + "step": 3888 + }, + { + "epoch": 0.5730353634577603, + "grad_norm": 0.5983741879463196, + "learning_rate": 4.5806911457895495e-06, + "loss": 0.5907, + "step": 3889 + }, + { + "epoch": 0.5731827111984283, + "grad_norm": 0.6195666790008545, + "learning_rate": 4.5804761963188e-06, + "loss": 0.5644, + "step": 3890 + }, + { + "epoch": 0.5733300589390963, + "grad_norm": 0.6031520366668701, + "learning_rate": 4.580261196813574e-06, + "loss": 0.5518, + "step": 3891 + }, + { + "epoch": 0.5734774066797642, + "grad_norm": 0.6181292533874512, + "learning_rate": 4.5800461472790414e-06, + "loss": 0.5763, + "step": 3892 + }, + { + "epoch": 0.5736247544204323, + "grad_norm": 0.5760086178779602, + "learning_rate": 4.579831047720375e-06, + "loss": 0.5785, + "step": 3893 + }, + { + "epoch": 0.5737721021611002, + "grad_norm": 0.5929883122444153, + "learning_rate": 4.579615898142747e-06, + "loss": 0.6085, + "step": 3894 + }, + { + "epoch": 0.5739194499017681, + "grad_norm": 0.6036742925643921, + "learning_rate": 4.5794006985513316e-06, + "loss": 0.5848, + "step": 3895 + }, + { + "epoch": 0.5740667976424362, + "grad_norm": 0.528723955154419, + "learning_rate": 4.579185448951305e-06, + "loss": 0.5445, + "step": 3896 + }, + { + "epoch": 0.5742141453831041, + "grad_norm": 0.5965567827224731, + "learning_rate": 4.578970149347845e-06, + "loss": 0.5967, + "step": 3897 + }, + { + "epoch": 0.574361493123772, + "grad_norm": 0.5750865340232849, + "learning_rate": 4.578754799746127e-06, + "loss": 0.5842, + "step": 3898 + }, + { + "epoch": 0.5745088408644401, + "grad_norm": 0.6223268508911133, + "learning_rate": 4.578539400151331e-06, + "loss": 0.5974, + "step": 3899 + }, + { + "epoch": 0.574656188605108, + "grad_norm": 0.5480567216873169, + "learning_rate": 4.578323950568636e-06, + "loss": 0.5808, + "step": 3900 + }, + { + "epoch": 0.5748035363457761, + "grad_norm": 0.5856636762619019, + "learning_rate": 4.578108451003226e-06, + "loss": 0.5338, + "step": 3901 + }, + { + "epoch": 0.574950884086444, + "grad_norm": 0.5902941226959229, + "learning_rate": 4.577892901460283e-06, + "loss": 0.5923, + "step": 3902 + }, + { + "epoch": 0.575098231827112, + "grad_norm": 0.5936366319656372, + "learning_rate": 4.57767730194499e-06, + "loss": 0.596, + "step": 3903 + }, + { + "epoch": 0.57524557956778, + "grad_norm": 0.5783981680870056, + "learning_rate": 4.577461652462532e-06, + "loss": 0.5964, + "step": 3904 + }, + { + "epoch": 0.5753929273084479, + "grad_norm": 0.5798457860946655, + "learning_rate": 4.577245953018095e-06, + "loss": 0.5605, + "step": 3905 + }, + { + "epoch": 0.5755402750491159, + "grad_norm": 0.5964351296424866, + "learning_rate": 4.577030203616869e-06, + "loss": 0.5853, + "step": 3906 + }, + { + "epoch": 0.5756876227897839, + "grad_norm": 0.6279726624488831, + "learning_rate": 4.57681440426404e-06, + "loss": 0.5501, + "step": 3907 + }, + { + "epoch": 0.5758349705304519, + "grad_norm": 0.5747349858283997, + "learning_rate": 4.576598554964798e-06, + "loss": 0.5735, + "step": 3908 + }, + { + "epoch": 0.5759823182711199, + "grad_norm": 0.5864795446395874, + "learning_rate": 4.5763826557243345e-06, + "loss": 0.6125, + "step": 3909 + }, + { + "epoch": 0.5761296660117878, + "grad_norm": 0.5946550965309143, + "learning_rate": 4.576166706547843e-06, + "loss": 0.5586, + "step": 3910 + }, + { + "epoch": 0.5762770137524558, + "grad_norm": 0.5650618076324463, + "learning_rate": 4.5759507074405155e-06, + "loss": 0.548, + "step": 3911 + }, + { + "epoch": 0.5764243614931238, + "grad_norm": 0.5847660303115845, + "learning_rate": 4.575734658407547e-06, + "loss": 0.5877, + "step": 3912 + }, + { + "epoch": 0.5765717092337918, + "grad_norm": 0.6187111735343933, + "learning_rate": 4.575518559454134e-06, + "loss": 0.5466, + "step": 3913 + }, + { + "epoch": 0.5767190569744597, + "grad_norm": 0.6288239359855652, + "learning_rate": 4.575302410585473e-06, + "loss": 0.5806, + "step": 3914 + }, + { + "epoch": 0.5768664047151277, + "grad_norm": 0.5599458813667297, + "learning_rate": 4.575086211806762e-06, + "loss": 0.5447, + "step": 3915 + }, + { + "epoch": 0.5770137524557957, + "grad_norm": 0.5981935858726501, + "learning_rate": 4.574869963123201e-06, + "loss": 0.5296, + "step": 3916 + }, + { + "epoch": 0.5771611001964636, + "grad_norm": 0.6017600893974304, + "learning_rate": 4.574653664539991e-06, + "loss": 0.5802, + "step": 3917 + }, + { + "epoch": 0.5773084479371317, + "grad_norm": 0.587976336479187, + "learning_rate": 4.574437316062333e-06, + "loss": 0.5684, + "step": 3918 + }, + { + "epoch": 0.5774557956777996, + "grad_norm": 0.5678153038024902, + "learning_rate": 4.574220917695431e-06, + "loss": 0.5822, + "step": 3919 + }, + { + "epoch": 0.5776031434184676, + "grad_norm": 0.593421995639801, + "learning_rate": 4.574004469444489e-06, + "loss": 0.5862, + "step": 3920 + }, + { + "epoch": 0.5777504911591356, + "grad_norm": 0.578088641166687, + "learning_rate": 4.5737879713147115e-06, + "loss": 0.5821, + "step": 3921 + }, + { + "epoch": 0.5778978388998035, + "grad_norm": 0.6058619618415833, + "learning_rate": 4.573571423311306e-06, + "loss": 0.6177, + "step": 3922 + }, + { + "epoch": 0.5780451866404716, + "grad_norm": 0.5755084156990051, + "learning_rate": 4.5733548254394805e-06, + "loss": 0.5621, + "step": 3923 + }, + { + "epoch": 0.5781925343811395, + "grad_norm": 0.6023474931716919, + "learning_rate": 4.573138177704444e-06, + "loss": 0.5618, + "step": 3924 + }, + { + "epoch": 0.5783398821218074, + "grad_norm": 0.618008553981781, + "learning_rate": 4.572921480111407e-06, + "loss": 0.6007, + "step": 3925 + }, + { + "epoch": 0.5784872298624755, + "grad_norm": 0.5669577121734619, + "learning_rate": 4.57270473266558e-06, + "loss": 0.6072, + "step": 3926 + }, + { + "epoch": 0.5786345776031434, + "grad_norm": 0.5671183466911316, + "learning_rate": 4.572487935372176e-06, + "loss": 0.5701, + "step": 3927 + }, + { + "epoch": 0.5787819253438113, + "grad_norm": 0.5838252305984497, + "learning_rate": 4.572271088236411e-06, + "loss": 0.5604, + "step": 3928 + }, + { + "epoch": 0.5789292730844794, + "grad_norm": 0.5709531307220459, + "learning_rate": 4.572054191263496e-06, + "loss": 0.5525, + "step": 3929 + }, + { + "epoch": 0.5790766208251473, + "grad_norm": 0.5896843075752258, + "learning_rate": 4.571837244458651e-06, + "loss": 0.5444, + "step": 3930 + }, + { + "epoch": 0.5792239685658154, + "grad_norm": 0.5704616904258728, + "learning_rate": 4.571620247827092e-06, + "loss": 0.611, + "step": 3931 + }, + { + "epoch": 0.5793713163064833, + "grad_norm": 0.5779021978378296, + "learning_rate": 4.571403201374037e-06, + "loss": 0.5624, + "step": 3932 + }, + { + "epoch": 0.5795186640471512, + "grad_norm": 0.6167982816696167, + "learning_rate": 4.571186105104707e-06, + "loss": 0.6028, + "step": 3933 + }, + { + "epoch": 0.5796660117878193, + "grad_norm": 0.6100846529006958, + "learning_rate": 4.570968959024323e-06, + "loss": 0.5714, + "step": 3934 + }, + { + "epoch": 0.5798133595284872, + "grad_norm": 0.6203821301460266, + "learning_rate": 4.570751763138106e-06, + "loss": 0.5766, + "step": 3935 + }, + { + "epoch": 0.5799607072691552, + "grad_norm": 0.5631136298179626, + "learning_rate": 4.570534517451282e-06, + "loss": 0.5778, + "step": 3936 + }, + { + "epoch": 0.5801080550098232, + "grad_norm": 0.5592049360275269, + "learning_rate": 4.5703172219690715e-06, + "loss": 0.6051, + "step": 3937 + }, + { + "epoch": 0.5802554027504911, + "grad_norm": 0.5946186780929565, + "learning_rate": 4.5700998766967046e-06, + "loss": 0.5778, + "step": 3938 + }, + { + "epoch": 0.5804027504911591, + "grad_norm": 0.5970011949539185, + "learning_rate": 4.569882481639406e-06, + "loss": 0.5617, + "step": 3939 + }, + { + "epoch": 0.5805500982318271, + "grad_norm": 0.5903819799423218, + "learning_rate": 4.569665036802405e-06, + "loss": 0.5814, + "step": 3940 + }, + { + "epoch": 0.5806974459724951, + "grad_norm": 0.5918266773223877, + "learning_rate": 4.56944754219093e-06, + "loss": 0.5557, + "step": 3941 + }, + { + "epoch": 0.5808447937131631, + "grad_norm": 0.6284914016723633, + "learning_rate": 4.569229997810213e-06, + "loss": 0.5853, + "step": 3942 + }, + { + "epoch": 0.580992141453831, + "grad_norm": 0.5950921177864075, + "learning_rate": 4.569012403665485e-06, + "loss": 0.5736, + "step": 3943 + }, + { + "epoch": 0.581139489194499, + "grad_norm": 0.6068068146705627, + "learning_rate": 4.5687947597619785e-06, + "loss": 0.5931, + "step": 3944 + }, + { + "epoch": 0.581286836935167, + "grad_norm": 0.5854286551475525, + "learning_rate": 4.568577066104928e-06, + "loss": 0.5933, + "step": 3945 + }, + { + "epoch": 0.581434184675835, + "grad_norm": 0.5936000347137451, + "learning_rate": 4.56835932269957e-06, + "loss": 0.5657, + "step": 3946 + }, + { + "epoch": 0.5815815324165029, + "grad_norm": 0.5768623948097229, + "learning_rate": 4.56814152955114e-06, + "loss": 0.5825, + "step": 3947 + }, + { + "epoch": 0.581728880157171, + "grad_norm": 0.5834292769432068, + "learning_rate": 4.5679236866648765e-06, + "loss": 0.575, + "step": 3948 + }, + { + "epoch": 0.5818762278978389, + "grad_norm": 0.6103802919387817, + "learning_rate": 4.567705794046019e-06, + "loss": 0.5881, + "step": 3949 + }, + { + "epoch": 0.5820235756385069, + "grad_norm": 0.5841482281684875, + "learning_rate": 4.567487851699806e-06, + "loss": 0.5878, + "step": 3950 + }, + { + "epoch": 0.5821709233791749, + "grad_norm": 0.5882751941680908, + "learning_rate": 4.56726985963148e-06, + "loss": 0.5609, + "step": 3951 + }, + { + "epoch": 0.5823182711198428, + "grad_norm": 0.5884996652603149, + "learning_rate": 4.567051817846283e-06, + "loss": 0.5805, + "step": 3952 + }, + { + "epoch": 0.5824656188605108, + "grad_norm": 0.6108590364456177, + "learning_rate": 4.56683372634946e-06, + "loss": 0.5898, + "step": 3953 + }, + { + "epoch": 0.5826129666011788, + "grad_norm": 0.5772464871406555, + "learning_rate": 4.566615585146256e-06, + "loss": 0.5728, + "step": 3954 + }, + { + "epoch": 0.5827603143418467, + "grad_norm": 0.5738604068756104, + "learning_rate": 4.566397394241915e-06, + "loss": 0.5737, + "step": 3955 + }, + { + "epoch": 0.5829076620825148, + "grad_norm": 0.5841423273086548, + "learning_rate": 4.566179153641686e-06, + "loss": 0.5741, + "step": 3956 + }, + { + "epoch": 0.5830550098231827, + "grad_norm": 0.5914438366889954, + "learning_rate": 4.565960863350818e-06, + "loss": 0.5872, + "step": 3957 + }, + { + "epoch": 0.5832023575638506, + "grad_norm": 0.5499804019927979, + "learning_rate": 4.565742523374561e-06, + "loss": 0.598, + "step": 3958 + }, + { + "epoch": 0.5833497053045187, + "grad_norm": 0.5859748721122742, + "learning_rate": 4.5655241337181644e-06, + "loss": 0.5546, + "step": 3959 + }, + { + "epoch": 0.5834970530451866, + "grad_norm": 0.5863354206085205, + "learning_rate": 4.565305694386881e-06, + "loss": 0.5874, + "step": 3960 + }, + { + "epoch": 0.5836444007858547, + "grad_norm": 0.5869422554969788, + "learning_rate": 4.565087205385964e-06, + "loss": 0.5872, + "step": 3961 + }, + { + "epoch": 0.5837917485265226, + "grad_norm": 0.6072686910629272, + "learning_rate": 4.564868666720669e-06, + "loss": 0.566, + "step": 3962 + }, + { + "epoch": 0.5839390962671905, + "grad_norm": 0.6067289710044861, + "learning_rate": 4.564650078396251e-06, + "loss": 0.6375, + "step": 3963 + }, + { + "epoch": 0.5840864440078586, + "grad_norm": 0.7259442806243896, + "learning_rate": 4.564431440417967e-06, + "loss": 0.5593, + "step": 3964 + }, + { + "epoch": 0.5842337917485265, + "grad_norm": 0.6075835227966309, + "learning_rate": 4.564212752791075e-06, + "loss": 0.5655, + "step": 3965 + }, + { + "epoch": 0.5843811394891945, + "grad_norm": 0.577599287033081, + "learning_rate": 4.563994015520834e-06, + "loss": 0.5571, + "step": 3966 + }, + { + "epoch": 0.5845284872298625, + "grad_norm": 0.5778148174285889, + "learning_rate": 4.563775228612506e-06, + "loss": 0.5767, + "step": 3967 + }, + { + "epoch": 0.5846758349705304, + "grad_norm": 0.5768938064575195, + "learning_rate": 4.563556392071351e-06, + "loss": 0.6067, + "step": 3968 + }, + { + "epoch": 0.5848231827111984, + "grad_norm": 0.5647265315055847, + "learning_rate": 4.563337505902633e-06, + "loss": 0.5773, + "step": 3969 + }, + { + "epoch": 0.5849705304518664, + "grad_norm": 0.5995790958404541, + "learning_rate": 4.563118570111615e-06, + "loss": 0.5768, + "step": 3970 + }, + { + "epoch": 0.5851178781925344, + "grad_norm": 0.5729926824569702, + "learning_rate": 4.562899584703564e-06, + "loss": 0.5876, + "step": 3971 + }, + { + "epoch": 0.5852652259332024, + "grad_norm": 0.6182589530944824, + "learning_rate": 4.5626805496837445e-06, + "loss": 0.6097, + "step": 3972 + }, + { + "epoch": 0.5854125736738703, + "grad_norm": 0.5800966024398804, + "learning_rate": 4.5624614650574265e-06, + "loss": 0.5649, + "step": 3973 + }, + { + "epoch": 0.5855599214145383, + "grad_norm": 0.5685977339744568, + "learning_rate": 4.562242330829877e-06, + "loss": 0.5577, + "step": 3974 + }, + { + "epoch": 0.5857072691552063, + "grad_norm": 0.5979241728782654, + "learning_rate": 4.562023147006366e-06, + "loss": 0.622, + "step": 3975 + }, + { + "epoch": 0.5858546168958743, + "grad_norm": 0.5999838709831238, + "learning_rate": 4.561803913592167e-06, + "loss": 0.5663, + "step": 3976 + }, + { + "epoch": 0.5860019646365422, + "grad_norm": 0.593863308429718, + "learning_rate": 4.5615846305925495e-06, + "loss": 0.5589, + "step": 3977 + }, + { + "epoch": 0.5861493123772102, + "grad_norm": 0.5877453684806824, + "learning_rate": 4.5613652980127895e-06, + "loss": 0.5683, + "step": 3978 + }, + { + "epoch": 0.5862966601178782, + "grad_norm": 0.596571147441864, + "learning_rate": 4.5611459158581605e-06, + "loss": 0.5801, + "step": 3979 + }, + { + "epoch": 0.5864440078585462, + "grad_norm": 0.585124671459198, + "learning_rate": 4.5609264841339385e-06, + "loss": 0.5691, + "step": 3980 + }, + { + "epoch": 0.5865913555992142, + "grad_norm": 0.571174144744873, + "learning_rate": 4.560707002845402e-06, + "loss": 0.5678, + "step": 3981 + }, + { + "epoch": 0.5867387033398821, + "grad_norm": 0.5795149207115173, + "learning_rate": 4.560487471997828e-06, + "loss": 0.5661, + "step": 3982 + }, + { + "epoch": 0.5868860510805501, + "grad_norm": 0.5942166447639465, + "learning_rate": 4.560267891596497e-06, + "loss": 0.5564, + "step": 3983 + }, + { + "epoch": 0.5870333988212181, + "grad_norm": 0.5652472972869873, + "learning_rate": 4.56004826164669e-06, + "loss": 0.6024, + "step": 3984 + }, + { + "epoch": 0.587180746561886, + "grad_norm": 0.5905004739761353, + "learning_rate": 4.559828582153688e-06, + "loss": 0.5762, + "step": 3985 + }, + { + "epoch": 0.5873280943025541, + "grad_norm": 0.5834776163101196, + "learning_rate": 4.559608853122774e-06, + "loss": 0.6077, + "step": 3986 + }, + { + "epoch": 0.587475442043222, + "grad_norm": 0.5593502521514893, + "learning_rate": 4.559389074559235e-06, + "loss": 0.5798, + "step": 3987 + }, + { + "epoch": 0.5876227897838899, + "grad_norm": 0.5920974612236023, + "learning_rate": 4.559169246468353e-06, + "loss": 0.562, + "step": 3988 + }, + { + "epoch": 0.587770137524558, + "grad_norm": 0.5788190364837646, + "learning_rate": 4.558949368855418e-06, + "loss": 0.5947, + "step": 3989 + }, + { + "epoch": 0.5879174852652259, + "grad_norm": 0.5671250224113464, + "learning_rate": 4.558729441725715e-06, + "loss": 0.565, + "step": 3990 + }, + { + "epoch": 0.588064833005894, + "grad_norm": 0.5782662630081177, + "learning_rate": 4.558509465084534e-06, + "loss": 0.5628, + "step": 3991 + }, + { + "epoch": 0.5882121807465619, + "grad_norm": 0.5925549268722534, + "learning_rate": 4.5582894389371675e-06, + "loss": 0.5744, + "step": 3992 + }, + { + "epoch": 0.5883595284872298, + "grad_norm": 0.5913642048835754, + "learning_rate": 4.558069363288905e-06, + "loss": 0.6223, + "step": 3993 + }, + { + "epoch": 0.5885068762278979, + "grad_norm": 0.6267321109771729, + "learning_rate": 4.557849238145039e-06, + "loss": 0.5434, + "step": 3994 + }, + { + "epoch": 0.5886542239685658, + "grad_norm": 0.581540048122406, + "learning_rate": 4.5576290635108645e-06, + "loss": 0.56, + "step": 3995 + }, + { + "epoch": 0.5888015717092338, + "grad_norm": 0.5588958859443665, + "learning_rate": 4.557408839391676e-06, + "loss": 0.5667, + "step": 3996 + }, + { + "epoch": 0.5889489194499018, + "grad_norm": 0.5754793882369995, + "learning_rate": 4.5571885657927696e-06, + "loss": 0.5942, + "step": 3997 + }, + { + "epoch": 0.5890962671905697, + "grad_norm": 0.6254404187202454, + "learning_rate": 4.556968242719443e-06, + "loss": 0.6092, + "step": 3998 + }, + { + "epoch": 0.5892436149312377, + "grad_norm": 0.579869270324707, + "learning_rate": 4.556747870176995e-06, + "loss": 0.5636, + "step": 3999 + }, + { + "epoch": 0.5893909626719057, + "grad_norm": 0.6004469990730286, + "learning_rate": 4.556527448170725e-06, + "loss": 0.5843, + "step": 4000 + }, + { + "epoch": 0.5895383104125737, + "grad_norm": 0.5634355545043945, + "learning_rate": 4.556306976705936e-06, + "loss": 0.5779, + "step": 4001 + }, + { + "epoch": 0.5896856581532417, + "grad_norm": 0.5875229239463806, + "learning_rate": 4.556086455787928e-06, + "loss": 0.5985, + "step": 4002 + }, + { + "epoch": 0.5898330058939096, + "grad_norm": 0.6618452072143555, + "learning_rate": 4.555865885422004e-06, + "loss": 0.5946, + "step": 4003 + }, + { + "epoch": 0.5899803536345776, + "grad_norm": 0.5823994874954224, + "learning_rate": 4.555645265613471e-06, + "loss": 0.5877, + "step": 4004 + }, + { + "epoch": 0.5901277013752456, + "grad_norm": 0.601453423500061, + "learning_rate": 4.555424596367633e-06, + "loss": 0.5391, + "step": 4005 + }, + { + "epoch": 0.5902750491159136, + "grad_norm": 0.5973524451255798, + "learning_rate": 4.555203877689798e-06, + "loss": 0.5968, + "step": 4006 + }, + { + "epoch": 0.5904223968565815, + "grad_norm": 0.6014629006385803, + "learning_rate": 4.554983109585273e-06, + "loss": 0.5988, + "step": 4007 + }, + { + "epoch": 0.5905697445972495, + "grad_norm": 0.6202374696731567, + "learning_rate": 4.5547622920593685e-06, + "loss": 0.5803, + "step": 4008 + }, + { + "epoch": 0.5907170923379175, + "grad_norm": 0.607284665107727, + "learning_rate": 4.5545414251173945e-06, + "loss": 0.5667, + "step": 4009 + }, + { + "epoch": 0.5908644400785854, + "grad_norm": 0.5759404301643372, + "learning_rate": 4.554320508764662e-06, + "loss": 0.5729, + "step": 4010 + }, + { + "epoch": 0.5910117878192535, + "grad_norm": 0.5891736745834351, + "learning_rate": 4.5540995430064865e-06, + "loss": 0.5792, + "step": 4011 + }, + { + "epoch": 0.5911591355599214, + "grad_norm": 0.5956506133079529, + "learning_rate": 4.553878527848179e-06, + "loss": 0.5986, + "step": 4012 + }, + { + "epoch": 0.5913064833005894, + "grad_norm": 0.6293287873268127, + "learning_rate": 4.553657463295057e-06, + "loss": 0.5838, + "step": 4013 + }, + { + "epoch": 0.5914538310412574, + "grad_norm": 0.5626173615455627, + "learning_rate": 4.553436349352436e-06, + "loss": 0.5662, + "step": 4014 + }, + { + "epoch": 0.5916011787819253, + "grad_norm": 0.5826026797294617, + "learning_rate": 4.5532151860256335e-06, + "loss": 0.5936, + "step": 4015 + }, + { + "epoch": 0.5917485265225934, + "grad_norm": 0.5832375884056091, + "learning_rate": 4.5529939733199696e-06, + "loss": 0.574, + "step": 4016 + }, + { + "epoch": 0.5918958742632613, + "grad_norm": 0.6088613867759705, + "learning_rate": 4.552772711240763e-06, + "loss": 0.6002, + "step": 4017 + }, + { + "epoch": 0.5920432220039292, + "grad_norm": 0.5700391530990601, + "learning_rate": 4.552551399793336e-06, + "loss": 0.5868, + "step": 4018 + }, + { + "epoch": 0.5921905697445973, + "grad_norm": 0.6300545334815979, + "learning_rate": 4.552330038983009e-06, + "loss": 0.598, + "step": 4019 + }, + { + "epoch": 0.5923379174852652, + "grad_norm": 0.5754348635673523, + "learning_rate": 4.5521086288151095e-06, + "loss": 0.5625, + "step": 4020 + }, + { + "epoch": 0.5924852652259333, + "grad_norm": 0.5976048111915588, + "learning_rate": 4.551887169294959e-06, + "loss": 0.5513, + "step": 4021 + }, + { + "epoch": 0.5926326129666012, + "grad_norm": 0.5751816034317017, + "learning_rate": 4.551665660427884e-06, + "loss": 0.6027, + "step": 4022 + }, + { + "epoch": 0.5927799607072691, + "grad_norm": 0.6279527544975281, + "learning_rate": 4.551444102219212e-06, + "loss": 0.5729, + "step": 4023 + }, + { + "epoch": 0.5929273084479372, + "grad_norm": 0.5729790329933167, + "learning_rate": 4.551222494674272e-06, + "loss": 0.5231, + "step": 4024 + }, + { + "epoch": 0.5930746561886051, + "grad_norm": 0.5924657583236694, + "learning_rate": 4.5510008377983935e-06, + "loss": 0.5931, + "step": 4025 + }, + { + "epoch": 0.593222003929273, + "grad_norm": 0.6468052864074707, + "learning_rate": 4.550779131596906e-06, + "loss": 0.5579, + "step": 4026 + }, + { + "epoch": 0.5933693516699411, + "grad_norm": 0.5803825259208679, + "learning_rate": 4.550557376075143e-06, + "loss": 0.5555, + "step": 4027 + }, + { + "epoch": 0.593516699410609, + "grad_norm": 0.5999215245246887, + "learning_rate": 4.550335571238437e-06, + "loss": 0.5859, + "step": 4028 + }, + { + "epoch": 0.593664047151277, + "grad_norm": 0.6027416586875916, + "learning_rate": 4.550113717092121e-06, + "loss": 0.5782, + "step": 4029 + }, + { + "epoch": 0.593811394891945, + "grad_norm": 0.6023759245872498, + "learning_rate": 4.549891813641533e-06, + "loss": 0.5991, + "step": 4030 + }, + { + "epoch": 0.593958742632613, + "grad_norm": 0.6045124530792236, + "learning_rate": 4.549669860892007e-06, + "loss": 0.5556, + "step": 4031 + }, + { + "epoch": 0.594106090373281, + "grad_norm": 0.5890872478485107, + "learning_rate": 4.5494478588488835e-06, + "loss": 0.6155, + "step": 4032 + }, + { + "epoch": 0.5942534381139489, + "grad_norm": 0.5796610713005066, + "learning_rate": 4.5492258075174996e-06, + "loss": 0.5985, + "step": 4033 + }, + { + "epoch": 0.5944007858546169, + "grad_norm": 0.5790695548057556, + "learning_rate": 4.549003706903196e-06, + "loss": 0.5918, + "step": 4034 + }, + { + "epoch": 0.5945481335952849, + "grad_norm": 0.570199191570282, + "learning_rate": 4.548781557011315e-06, + "loss": 0.5548, + "step": 4035 + }, + { + "epoch": 0.5946954813359528, + "grad_norm": 0.6473894715309143, + "learning_rate": 4.548559357847198e-06, + "loss": 0.5711, + "step": 4036 + }, + { + "epoch": 0.5948428290766208, + "grad_norm": 0.5919030904769897, + "learning_rate": 4.548337109416189e-06, + "loss": 0.57, + "step": 4037 + }, + { + "epoch": 0.5949901768172888, + "grad_norm": 0.6124026775360107, + "learning_rate": 4.548114811723633e-06, + "loss": 0.5657, + "step": 4038 + }, + { + "epoch": 0.5951375245579568, + "grad_norm": 0.5600143671035767, + "learning_rate": 4.547892464774877e-06, + "loss": 0.5785, + "step": 4039 + }, + { + "epoch": 0.5952848722986247, + "grad_norm": 0.5923816561698914, + "learning_rate": 4.5476700685752674e-06, + "loss": 0.5763, + "step": 4040 + }, + { + "epoch": 0.5954322200392927, + "grad_norm": 0.5789453387260437, + "learning_rate": 4.547447623130154e-06, + "loss": 0.5805, + "step": 4041 + }, + { + "epoch": 0.5955795677799607, + "grad_norm": 0.5986095070838928, + "learning_rate": 4.5472251284448845e-06, + "loss": 0.551, + "step": 4042 + }, + { + "epoch": 0.5957269155206287, + "grad_norm": 0.5844764113426208, + "learning_rate": 4.547002584524811e-06, + "loss": 0.5703, + "step": 4043 + }, + { + "epoch": 0.5958742632612967, + "grad_norm": 0.6382380127906799, + "learning_rate": 4.546779991375287e-06, + "loss": 0.617, + "step": 4044 + }, + { + "epoch": 0.5960216110019646, + "grad_norm": 0.5908476114273071, + "learning_rate": 4.546557349001663e-06, + "loss": 0.5596, + "step": 4045 + }, + { + "epoch": 0.5961689587426326, + "grad_norm": 0.5851648449897766, + "learning_rate": 4.546334657409294e-06, + "loss": 0.5509, + "step": 4046 + }, + { + "epoch": 0.5963163064833006, + "grad_norm": 0.549728274345398, + "learning_rate": 4.546111916603536e-06, + "loss": 0.5804, + "step": 4047 + }, + { + "epoch": 0.5964636542239685, + "grad_norm": 0.5757259130477905, + "learning_rate": 4.545889126589747e-06, + "loss": 0.5793, + "step": 4048 + }, + { + "epoch": 0.5966110019646366, + "grad_norm": 0.5762913227081299, + "learning_rate": 4.545666287373285e-06, + "loss": 0.6012, + "step": 4049 + }, + { + "epoch": 0.5967583497053045, + "grad_norm": 0.5641319155693054, + "learning_rate": 4.545443398959507e-06, + "loss": 0.5935, + "step": 4050 + }, + { + "epoch": 0.5969056974459725, + "grad_norm": 0.5995563864707947, + "learning_rate": 4.545220461353775e-06, + "loss": 0.5747, + "step": 4051 + }, + { + "epoch": 0.5970530451866405, + "grad_norm": 0.5632227659225464, + "learning_rate": 4.54499747456145e-06, + "loss": 0.5504, + "step": 4052 + }, + { + "epoch": 0.5972003929273084, + "grad_norm": 0.595560610294342, + "learning_rate": 4.544774438587894e-06, + "loss": 0.5934, + "step": 4053 + }, + { + "epoch": 0.5973477406679765, + "grad_norm": 0.6014308333396912, + "learning_rate": 4.544551353438473e-06, + "loss": 0.5595, + "step": 4054 + }, + { + "epoch": 0.5974950884086444, + "grad_norm": 0.6001010537147522, + "learning_rate": 4.544328219118551e-06, + "loss": 0.5417, + "step": 4055 + }, + { + "epoch": 0.5976424361493123, + "grad_norm": 0.579484760761261, + "learning_rate": 4.544105035633494e-06, + "loss": 0.5229, + "step": 4056 + }, + { + "epoch": 0.5977897838899804, + "grad_norm": 0.5873503088951111, + "learning_rate": 4.54388180298867e-06, + "loss": 0.5978, + "step": 4057 + }, + { + "epoch": 0.5979371316306483, + "grad_norm": 0.5871350169181824, + "learning_rate": 4.5436585211894465e-06, + "loss": 0.5705, + "step": 4058 + }, + { + "epoch": 0.5980844793713163, + "grad_norm": 0.6238685250282288, + "learning_rate": 4.543435190241195e-06, + "loss": 0.5799, + "step": 4059 + }, + { + "epoch": 0.5982318271119843, + "grad_norm": 0.5585926175117493, + "learning_rate": 4.543211810149285e-06, + "loss": 0.5458, + "step": 4060 + }, + { + "epoch": 0.5983791748526522, + "grad_norm": 0.6245765686035156, + "learning_rate": 4.5429883809190885e-06, + "loss": 0.5927, + "step": 4061 + }, + { + "epoch": 0.5985265225933203, + "grad_norm": 0.5667378902435303, + "learning_rate": 4.5427649025559805e-06, + "loss": 0.5778, + "step": 4062 + }, + { + "epoch": 0.5986738703339882, + "grad_norm": 0.5654176473617554, + "learning_rate": 4.542541375065335e-06, + "loss": 0.5757, + "step": 4063 + }, + { + "epoch": 0.5988212180746562, + "grad_norm": 0.5939280986785889, + "learning_rate": 4.542317798452527e-06, + "loss": 0.5704, + "step": 4064 + }, + { + "epoch": 0.5989685658153242, + "grad_norm": 0.5948266386985779, + "learning_rate": 4.542094172722934e-06, + "loss": 0.5456, + "step": 4065 + }, + { + "epoch": 0.5991159135559921, + "grad_norm": 0.5727508068084717, + "learning_rate": 4.541870497881934e-06, + "loss": 0.5861, + "step": 4066 + }, + { + "epoch": 0.5992632612966601, + "grad_norm": 0.5778104066848755, + "learning_rate": 4.541646773934906e-06, + "loss": 0.5875, + "step": 4067 + }, + { + "epoch": 0.5994106090373281, + "grad_norm": 0.5903974771499634, + "learning_rate": 4.5414230008872304e-06, + "loss": 0.5548, + "step": 4068 + }, + { + "epoch": 0.5995579567779961, + "grad_norm": 0.6060413718223572, + "learning_rate": 4.5411991787442896e-06, + "loss": 0.5719, + "step": 4069 + }, + { + "epoch": 0.599705304518664, + "grad_norm": 0.6257663369178772, + "learning_rate": 4.540975307511466e-06, + "loss": 0.5558, + "step": 4070 + }, + { + "epoch": 0.599852652259332, + "grad_norm": 0.5533835887908936, + "learning_rate": 4.540751387194143e-06, + "loss": 0.5588, + "step": 4071 + }, + { + "epoch": 0.6, + "grad_norm": 0.6055837869644165, + "learning_rate": 4.540527417797707e-06, + "loss": 0.588, + "step": 4072 + }, + { + "epoch": 0.600147347740668, + "grad_norm": 0.601180911064148, + "learning_rate": 4.540303399327544e-06, + "loss": 0.5539, + "step": 4073 + }, + { + "epoch": 0.600294695481336, + "grad_norm": 0.5682924389839172, + "learning_rate": 4.54007933178904e-06, + "loss": 0.5796, + "step": 4074 + }, + { + "epoch": 0.6004420432220039, + "grad_norm": 0.6100093126296997, + "learning_rate": 4.539855215187585e-06, + "loss": 0.5453, + "step": 4075 + }, + { + "epoch": 0.6005893909626719, + "grad_norm": 0.6224353909492493, + "learning_rate": 4.53963104952857e-06, + "loss": 0.5646, + "step": 4076 + }, + { + "epoch": 0.6007367387033399, + "grad_norm": 0.6208993196487427, + "learning_rate": 4.5394068348173835e-06, + "loss": 0.5994, + "step": 4077 + }, + { + "epoch": 0.6008840864440078, + "grad_norm": 0.5800502300262451, + "learning_rate": 4.53918257105942e-06, + "loss": 0.5723, + "step": 4078 + }, + { + "epoch": 0.6010314341846759, + "grad_norm": 0.5734683871269226, + "learning_rate": 4.538958258260072e-06, + "loss": 0.5431, + "step": 4079 + }, + { + "epoch": 0.6011787819253438, + "grad_norm": 0.6343799233436584, + "learning_rate": 4.538733896424734e-06, + "loss": 0.5857, + "step": 4080 + }, + { + "epoch": 0.6013261296660117, + "grad_norm": 0.5692081451416016, + "learning_rate": 4.538509485558802e-06, + "loss": 0.569, + "step": 4081 + }, + { + "epoch": 0.6014734774066798, + "grad_norm": 0.6191997528076172, + "learning_rate": 4.538285025667673e-06, + "loss": 0.5532, + "step": 4082 + }, + { + "epoch": 0.6016208251473477, + "grad_norm": 0.5801874995231628, + "learning_rate": 4.538060516756746e-06, + "loss": 0.5689, + "step": 4083 + }, + { + "epoch": 0.6017681728880158, + "grad_norm": 0.6428490877151489, + "learning_rate": 4.537835958831419e-06, + "loss": 0.5969, + "step": 4084 + }, + { + "epoch": 0.6019155206286837, + "grad_norm": 0.5631487369537354, + "learning_rate": 4.537611351897092e-06, + "loss": 0.5598, + "step": 4085 + }, + { + "epoch": 0.6020628683693516, + "grad_norm": 0.577946126461029, + "learning_rate": 4.537386695959169e-06, + "loss": 0.5496, + "step": 4086 + }, + { + "epoch": 0.6022102161100197, + "grad_norm": 0.5785812139511108, + "learning_rate": 4.537161991023051e-06, + "loss": 0.5559, + "step": 4087 + }, + { + "epoch": 0.6023575638506876, + "grad_norm": 0.6167454123497009, + "learning_rate": 4.536937237094143e-06, + "loss": 0.5868, + "step": 4088 + }, + { + "epoch": 0.6025049115913556, + "grad_norm": 0.6103595495223999, + "learning_rate": 4.536712434177849e-06, + "loss": 0.5806, + "step": 4089 + }, + { + "epoch": 0.6026522593320236, + "grad_norm": 0.5693298578262329, + "learning_rate": 4.536487582279577e-06, + "loss": 0.549, + "step": 4090 + }, + { + "epoch": 0.6027996070726915, + "grad_norm": 0.5904669761657715, + "learning_rate": 4.536262681404733e-06, + "loss": 0.6011, + "step": 4091 + }, + { + "epoch": 0.6029469548133596, + "grad_norm": 0.6017038226127625, + "learning_rate": 4.5360377315587275e-06, + "loss": 0.574, + "step": 4092 + }, + { + "epoch": 0.6030943025540275, + "grad_norm": 0.5881876945495605, + "learning_rate": 4.535812732746969e-06, + "loss": 0.6125, + "step": 4093 + }, + { + "epoch": 0.6032416502946955, + "grad_norm": 0.560092568397522, + "learning_rate": 4.535587684974869e-06, + "loss": 0.6016, + "step": 4094 + }, + { + "epoch": 0.6033889980353635, + "grad_norm": 0.6281226277351379, + "learning_rate": 4.53536258824784e-06, + "loss": 0.5776, + "step": 4095 + }, + { + "epoch": 0.6035363457760314, + "grad_norm": 0.5840016603469849, + "learning_rate": 4.535137442571296e-06, + "loss": 0.5339, + "step": 4096 + }, + { + "epoch": 0.6036836935166994, + "grad_norm": 0.5867325067520142, + "learning_rate": 4.53491224795065e-06, + "loss": 0.5702, + "step": 4097 + }, + { + "epoch": 0.6038310412573674, + "grad_norm": 0.5859635472297668, + "learning_rate": 4.53468700439132e-06, + "loss": 0.5736, + "step": 4098 + }, + { + "epoch": 0.6039783889980354, + "grad_norm": 0.5703176856040955, + "learning_rate": 4.534461711898721e-06, + "loss": 0.5631, + "step": 4099 + }, + { + "epoch": 0.6041257367387033, + "grad_norm": 0.6124093532562256, + "learning_rate": 4.534236370478273e-06, + "loss": 0.5659, + "step": 4100 + }, + { + "epoch": 0.6042730844793713, + "grad_norm": 0.579160749912262, + "learning_rate": 4.534010980135394e-06, + "loss": 0.5665, + "step": 4101 + }, + { + "epoch": 0.6044204322200393, + "grad_norm": 0.6632832288742065, + "learning_rate": 4.5337855408755044e-06, + "loss": 0.5907, + "step": 4102 + }, + { + "epoch": 0.6045677799607073, + "grad_norm": 0.558673620223999, + "learning_rate": 4.533560052704027e-06, + "loss": 0.5739, + "step": 4103 + }, + { + "epoch": 0.6047151277013753, + "grad_norm": 0.5907946825027466, + "learning_rate": 4.533334515626384e-06, + "loss": 0.5857, + "step": 4104 + }, + { + "epoch": 0.6048624754420432, + "grad_norm": 0.5925883650779724, + "learning_rate": 4.533108929647999e-06, + "loss": 0.6078, + "step": 4105 + }, + { + "epoch": 0.6050098231827112, + "grad_norm": 0.6010406017303467, + "learning_rate": 4.532883294774298e-06, + "loss": 0.5651, + "step": 4106 + }, + { + "epoch": 0.6051571709233792, + "grad_norm": 0.5944365859031677, + "learning_rate": 4.532657611010708e-06, + "loss": 0.5845, + "step": 4107 + }, + { + "epoch": 0.6053045186640471, + "grad_norm": 0.5673205852508545, + "learning_rate": 4.532431878362655e-06, + "loss": 0.5738, + "step": 4108 + }, + { + "epoch": 0.6054518664047152, + "grad_norm": 0.5638338923454285, + "learning_rate": 4.53220609683557e-06, + "loss": 0.5789, + "step": 4109 + }, + { + "epoch": 0.6055992141453831, + "grad_norm": 0.6026279330253601, + "learning_rate": 4.53198026643488e-06, + "loss": 0.6005, + "step": 4110 + }, + { + "epoch": 0.605746561886051, + "grad_norm": 0.6020640134811401, + "learning_rate": 4.531754387166018e-06, + "loss": 0.5863, + "step": 4111 + }, + { + "epoch": 0.6058939096267191, + "grad_norm": 0.6016120910644531, + "learning_rate": 4.531528459034416e-06, + "loss": 0.5437, + "step": 4112 + }, + { + "epoch": 0.606041257367387, + "grad_norm": 0.5935713052749634, + "learning_rate": 4.531302482045507e-06, + "loss": 0.5662, + "step": 4113 + }, + { + "epoch": 0.6061886051080551, + "grad_norm": 0.5797013640403748, + "learning_rate": 4.531076456204727e-06, + "loss": 0.572, + "step": 4114 + }, + { + "epoch": 0.606335952848723, + "grad_norm": 0.5773938894271851, + "learning_rate": 4.530850381517511e-06, + "loss": 0.5793, + "step": 4115 + }, + { + "epoch": 0.6064833005893909, + "grad_norm": 0.5786169767379761, + "learning_rate": 4.530624257989295e-06, + "loss": 0.5739, + "step": 4116 + }, + { + "epoch": 0.606630648330059, + "grad_norm": 0.6076707243919373, + "learning_rate": 4.530398085625518e-06, + "loss": 0.563, + "step": 4117 + }, + { + "epoch": 0.6067779960707269, + "grad_norm": 0.5866709351539612, + "learning_rate": 4.53017186443162e-06, + "loss": 0.5866, + "step": 4118 + }, + { + "epoch": 0.6069253438113948, + "grad_norm": 0.5803313851356506, + "learning_rate": 4.529945594413041e-06, + "loss": 0.5809, + "step": 4119 + }, + { + "epoch": 0.6070726915520629, + "grad_norm": 0.5811918377876282, + "learning_rate": 4.5297192755752215e-06, + "loss": 0.5567, + "step": 4120 + }, + { + "epoch": 0.6072200392927308, + "grad_norm": 0.5529104471206665, + "learning_rate": 4.529492907923606e-06, + "loss": 0.6025, + "step": 4121 + }, + { + "epoch": 0.6073673870333989, + "grad_norm": 0.6001884341239929, + "learning_rate": 4.5292664914636375e-06, + "loss": 0.5859, + "step": 4122 + }, + { + "epoch": 0.6075147347740668, + "grad_norm": 0.6312587857246399, + "learning_rate": 4.529040026200762e-06, + "loss": 0.5942, + "step": 4123 + }, + { + "epoch": 0.6076620825147347, + "grad_norm": 0.590526282787323, + "learning_rate": 4.528813512140427e-06, + "loss": 0.5665, + "step": 4124 + }, + { + "epoch": 0.6078094302554028, + "grad_norm": 0.5627483129501343, + "learning_rate": 4.528586949288076e-06, + "loss": 0.573, + "step": 4125 + }, + { + "epoch": 0.6079567779960707, + "grad_norm": 0.5797075629234314, + "learning_rate": 4.528360337649162e-06, + "loss": 0.5707, + "step": 4126 + }, + { + "epoch": 0.6081041257367387, + "grad_norm": 0.5679693222045898, + "learning_rate": 4.5281336772291326e-06, + "loss": 0.5743, + "step": 4127 + }, + { + "epoch": 0.6082514734774067, + "grad_norm": 0.5706878900527954, + "learning_rate": 4.52790696803344e-06, + "loss": 0.5872, + "step": 4128 + }, + { + "epoch": 0.6083988212180746, + "grad_norm": 0.5827901363372803, + "learning_rate": 4.5276802100675355e-06, + "loss": 0.5654, + "step": 4129 + }, + { + "epoch": 0.6085461689587426, + "grad_norm": 0.6052162051200867, + "learning_rate": 4.527453403336873e-06, + "loss": 0.58, + "step": 4130 + }, + { + "epoch": 0.6086935166994106, + "grad_norm": 0.5773880481719971, + "learning_rate": 4.527226547846907e-06, + "loss": 0.5927, + "step": 4131 + }, + { + "epoch": 0.6088408644400786, + "grad_norm": 0.5826122164726257, + "learning_rate": 4.526999643603094e-06, + "loss": 0.6016, + "step": 4132 + }, + { + "epoch": 0.6089882121807466, + "grad_norm": 0.6075924038887024, + "learning_rate": 4.52677269061089e-06, + "loss": 0.5656, + "step": 4133 + }, + { + "epoch": 0.6091355599214145, + "grad_norm": 0.5953847765922546, + "learning_rate": 4.526545688875753e-06, + "loss": 0.5667, + "step": 4134 + }, + { + "epoch": 0.6092829076620825, + "grad_norm": 0.5850903391838074, + "learning_rate": 4.526318638403143e-06, + "loss": 0.5529, + "step": 4135 + }, + { + "epoch": 0.6094302554027505, + "grad_norm": 0.6427224278450012, + "learning_rate": 4.5260915391985205e-06, + "loss": 0.6027, + "step": 4136 + }, + { + "epoch": 0.6095776031434185, + "grad_norm": 0.6639814376831055, + "learning_rate": 4.525864391267346e-06, + "loss": 0.591, + "step": 4137 + }, + { + "epoch": 0.6097249508840864, + "grad_norm": 0.6414369344711304, + "learning_rate": 4.5256371946150835e-06, + "loss": 0.5771, + "step": 4138 + }, + { + "epoch": 0.6098722986247544, + "grad_norm": 0.5815874338150024, + "learning_rate": 4.525409949247197e-06, + "loss": 0.5804, + "step": 4139 + }, + { + "epoch": 0.6100196463654224, + "grad_norm": 0.595587968826294, + "learning_rate": 4.5251826551691505e-06, + "loss": 0.5716, + "step": 4140 + }, + { + "epoch": 0.6101669941060903, + "grad_norm": 0.6197895407676697, + "learning_rate": 4.524955312386412e-06, + "loss": 0.5791, + "step": 4141 + }, + { + "epoch": 0.6103143418467584, + "grad_norm": 0.6096674203872681, + "learning_rate": 4.524727920904447e-06, + "loss": 0.5564, + "step": 4142 + }, + { + "epoch": 0.6104616895874263, + "grad_norm": 0.573253870010376, + "learning_rate": 4.524500480728725e-06, + "loss": 0.59, + "step": 4143 + }, + { + "epoch": 0.6106090373280944, + "grad_norm": 0.5955726504325867, + "learning_rate": 4.5242729918647165e-06, + "loss": 0.5893, + "step": 4144 + }, + { + "epoch": 0.6107563850687623, + "grad_norm": 0.6141558885574341, + "learning_rate": 4.524045454317892e-06, + "loss": 0.6138, + "step": 4145 + }, + { + "epoch": 0.6109037328094302, + "grad_norm": 0.5760575532913208, + "learning_rate": 4.523817868093723e-06, + "loss": 0.5803, + "step": 4146 + }, + { + "epoch": 0.6110510805500983, + "grad_norm": 0.5793676376342773, + "learning_rate": 4.523590233197685e-06, + "loss": 0.6036, + "step": 4147 + }, + { + "epoch": 0.6111984282907662, + "grad_norm": 0.6000881195068359, + "learning_rate": 4.5233625496352495e-06, + "loss": 0.5884, + "step": 4148 + }, + { + "epoch": 0.6113457760314341, + "grad_norm": 0.5830743312835693, + "learning_rate": 4.523134817411895e-06, + "loss": 0.5952, + "step": 4149 + }, + { + "epoch": 0.6114931237721022, + "grad_norm": 0.5830819010734558, + "learning_rate": 4.5229070365330955e-06, + "loss": 0.6104, + "step": 4150 + }, + { + "epoch": 0.6116404715127701, + "grad_norm": 0.5855065584182739, + "learning_rate": 4.522679207004331e-06, + "loss": 0.6265, + "step": 4151 + }, + { + "epoch": 0.6117878192534381, + "grad_norm": 0.5836002230644226, + "learning_rate": 4.5224513288310816e-06, + "loss": 0.5778, + "step": 4152 + }, + { + "epoch": 0.6119351669941061, + "grad_norm": 0.5698572993278503, + "learning_rate": 4.522223402018825e-06, + "loss": 0.5802, + "step": 4153 + }, + { + "epoch": 0.612082514734774, + "grad_norm": 0.6303333640098572, + "learning_rate": 4.521995426573045e-06, + "loss": 0.5337, + "step": 4154 + }, + { + "epoch": 0.6122298624754421, + "grad_norm": 0.5785889029502869, + "learning_rate": 4.521767402499222e-06, + "loss": 0.5909, + "step": 4155 + }, + { + "epoch": 0.61237721021611, + "grad_norm": 0.5918206572532654, + "learning_rate": 4.521539329802842e-06, + "loss": 0.5775, + "step": 4156 + }, + { + "epoch": 0.612524557956778, + "grad_norm": 0.5645989179611206, + "learning_rate": 4.52131120848939e-06, + "loss": 0.5378, + "step": 4157 + }, + { + "epoch": 0.612671905697446, + "grad_norm": 0.5705592632293701, + "learning_rate": 4.52108303856435e-06, + "loss": 0.5818, + "step": 4158 + }, + { + "epoch": 0.6128192534381139, + "grad_norm": 0.6225827932357788, + "learning_rate": 4.520854820033212e-06, + "loss": 0.5546, + "step": 4159 + }, + { + "epoch": 0.6129666011787819, + "grad_norm": 0.5812023282051086, + "learning_rate": 4.5206265529014634e-06, + "loss": 0.5973, + "step": 4160 + }, + { + "epoch": 0.6131139489194499, + "grad_norm": 0.5800842642784119, + "learning_rate": 4.520398237174593e-06, + "loss": 0.5774, + "step": 4161 + }, + { + "epoch": 0.6132612966601179, + "grad_norm": 0.5724430084228516, + "learning_rate": 4.520169872858093e-06, + "loss": 0.5389, + "step": 4162 + }, + { + "epoch": 0.6134086444007859, + "grad_norm": 0.5781359076499939, + "learning_rate": 4.519941459957456e-06, + "loss": 0.5947, + "step": 4163 + }, + { + "epoch": 0.6135559921414538, + "grad_norm": 0.5762154459953308, + "learning_rate": 4.519712998478174e-06, + "loss": 0.5827, + "step": 4164 + }, + { + "epoch": 0.6137033398821218, + "grad_norm": 0.5713616609573364, + "learning_rate": 4.51948448842574e-06, + "loss": 0.5884, + "step": 4165 + }, + { + "epoch": 0.6138506876227898, + "grad_norm": 0.5550837516784668, + "learning_rate": 4.5192559298056535e-06, + "loss": 0.5672, + "step": 4166 + }, + { + "epoch": 0.6139980353634578, + "grad_norm": 0.565195620059967, + "learning_rate": 4.519027322623408e-06, + "loss": 0.5956, + "step": 4167 + }, + { + "epoch": 0.6141453831041257, + "grad_norm": 0.6066784262657166, + "learning_rate": 4.518798666884502e-06, + "loss": 0.6096, + "step": 4168 + }, + { + "epoch": 0.6142927308447937, + "grad_norm": 0.6756887435913086, + "learning_rate": 4.518569962594435e-06, + "loss": 0.5791, + "step": 4169 + }, + { + "epoch": 0.6144400785854617, + "grad_norm": 0.6368065476417542, + "learning_rate": 4.518341209758708e-06, + "loss": 0.6007, + "step": 4170 + }, + { + "epoch": 0.6145874263261296, + "grad_norm": 0.5724445581436157, + "learning_rate": 4.518112408382821e-06, + "loss": 0.5767, + "step": 4171 + }, + { + "epoch": 0.6147347740667977, + "grad_norm": 0.5858621597290039, + "learning_rate": 4.517883558472277e-06, + "loss": 0.5444, + "step": 4172 + }, + { + "epoch": 0.6148821218074656, + "grad_norm": 0.571965754032135, + "learning_rate": 4.5176546600325805e-06, + "loss": 0.5379, + "step": 4173 + }, + { + "epoch": 0.6150294695481336, + "grad_norm": 0.6095662713050842, + "learning_rate": 4.517425713069235e-06, + "loss": 0.573, + "step": 4174 + }, + { + "epoch": 0.6151768172888016, + "grad_norm": 0.6090371608734131, + "learning_rate": 4.517196717587747e-06, + "loss": 0.5908, + "step": 4175 + }, + { + "epoch": 0.6153241650294695, + "grad_norm": 0.5738314390182495, + "learning_rate": 4.516967673593625e-06, + "loss": 0.5674, + "step": 4176 + }, + { + "epoch": 0.6154715127701376, + "grad_norm": 0.5553464889526367, + "learning_rate": 4.516738581092377e-06, + "loss": 0.5313, + "step": 4177 + }, + { + "epoch": 0.6156188605108055, + "grad_norm": 0.5807005763053894, + "learning_rate": 4.5165094400895104e-06, + "loss": 0.5839, + "step": 4178 + }, + { + "epoch": 0.6157662082514734, + "grad_norm": 0.5545210838317871, + "learning_rate": 4.516280250590539e-06, + "loss": 0.5904, + "step": 4179 + }, + { + "epoch": 0.6159135559921415, + "grad_norm": 0.5638349652290344, + "learning_rate": 4.516051012600973e-06, + "loss": 0.6055, + "step": 4180 + }, + { + "epoch": 0.6160609037328094, + "grad_norm": 0.5977433919906616, + "learning_rate": 4.515821726126325e-06, + "loss": 0.5794, + "step": 4181 + }, + { + "epoch": 0.6162082514734774, + "grad_norm": 0.5950258374214172, + "learning_rate": 4.51559239117211e-06, + "loss": 0.5857, + "step": 4182 + }, + { + "epoch": 0.6163555992141454, + "grad_norm": 0.5840324759483337, + "learning_rate": 4.515363007743843e-06, + "loss": 0.5798, + "step": 4183 + }, + { + "epoch": 0.6165029469548133, + "grad_norm": 0.593753457069397, + "learning_rate": 4.515133575847043e-06, + "loss": 0.5672, + "step": 4184 + }, + { + "epoch": 0.6166502946954814, + "grad_norm": 0.5710434317588806, + "learning_rate": 4.514904095487223e-06, + "loss": 0.5597, + "step": 4185 + }, + { + "epoch": 0.6167976424361493, + "grad_norm": 0.5817574858665466, + "learning_rate": 4.514674566669907e-06, + "loss": 0.585, + "step": 4186 + }, + { + "epoch": 0.6169449901768173, + "grad_norm": 0.5658730864524841, + "learning_rate": 4.514444989400611e-06, + "loss": 0.5819, + "step": 4187 + }, + { + "epoch": 0.6170923379174853, + "grad_norm": 0.5888286232948303, + "learning_rate": 4.514215363684858e-06, + "loss": 0.5717, + "step": 4188 + }, + { + "epoch": 0.6172396856581532, + "grad_norm": 0.5767615437507629, + "learning_rate": 4.513985689528171e-06, + "loss": 0.5959, + "step": 4189 + }, + { + "epoch": 0.6173870333988212, + "grad_norm": 0.5744044184684753, + "learning_rate": 4.513755966936072e-06, + "loss": 0.5773, + "step": 4190 + }, + { + "epoch": 0.6175343811394892, + "grad_norm": 0.663268506526947, + "learning_rate": 4.513526195914087e-06, + "loss": 0.5574, + "step": 4191 + }, + { + "epoch": 0.6176817288801572, + "grad_norm": 0.6025025844573975, + "learning_rate": 4.513296376467741e-06, + "loss": 0.5649, + "step": 4192 + }, + { + "epoch": 0.6178290766208252, + "grad_norm": 0.575153112411499, + "learning_rate": 4.5130665086025615e-06, + "loss": 0.5502, + "step": 4193 + }, + { + "epoch": 0.6179764243614931, + "grad_norm": 0.56160968542099, + "learning_rate": 4.5128365923240765e-06, + "loss": 0.5771, + "step": 4194 + }, + { + "epoch": 0.6181237721021611, + "grad_norm": 0.5792008638381958, + "learning_rate": 4.512606627637817e-06, + "loss": 0.5573, + "step": 4195 + }, + { + "epoch": 0.6182711198428291, + "grad_norm": 0.573826789855957, + "learning_rate": 4.51237661454931e-06, + "loss": 0.5656, + "step": 4196 + }, + { + "epoch": 0.6184184675834971, + "grad_norm": 0.5604415535926819, + "learning_rate": 4.512146553064091e-06, + "loss": 0.562, + "step": 4197 + }, + { + "epoch": 0.618565815324165, + "grad_norm": 0.5838871598243713, + "learning_rate": 4.511916443187689e-06, + "loss": 0.599, + "step": 4198 + }, + { + "epoch": 0.618713163064833, + "grad_norm": 0.5750766396522522, + "learning_rate": 4.511686284925642e-06, + "loss": 0.5946, + "step": 4199 + }, + { + "epoch": 0.618860510805501, + "grad_norm": 0.6179443597793579, + "learning_rate": 4.5114560782834825e-06, + "loss": 0.5169, + "step": 4200 + }, + { + "epoch": 0.6190078585461689, + "grad_norm": 0.5583307147026062, + "learning_rate": 4.511225823266748e-06, + "loss": 0.5705, + "step": 4201 + }, + { + "epoch": 0.619155206286837, + "grad_norm": 0.5702133774757385, + "learning_rate": 4.510995519880976e-06, + "loss": 0.552, + "step": 4202 + }, + { + "epoch": 0.6193025540275049, + "grad_norm": 0.5841966867446899, + "learning_rate": 4.510765168131705e-06, + "loss": 0.5836, + "step": 4203 + }, + { + "epoch": 0.6194499017681729, + "grad_norm": 0.6081490516662598, + "learning_rate": 4.510534768024475e-06, + "loss": 0.5938, + "step": 4204 + }, + { + "epoch": 0.6195972495088409, + "grad_norm": 0.5890136361122131, + "learning_rate": 4.510304319564827e-06, + "loss": 0.5767, + "step": 4205 + }, + { + "epoch": 0.6197445972495088, + "grad_norm": 0.58270263671875, + "learning_rate": 4.5100738227583015e-06, + "loss": 0.5894, + "step": 4206 + }, + { + "epoch": 0.6198919449901769, + "grad_norm": 0.5511330366134644, + "learning_rate": 4.509843277610445e-06, + "loss": 0.5745, + "step": 4207 + }, + { + "epoch": 0.6200392927308448, + "grad_norm": 0.5973516702651978, + "learning_rate": 4.5096126841268e-06, + "loss": 0.5649, + "step": 4208 + }, + { + "epoch": 0.6201866404715127, + "grad_norm": 0.5800023674964905, + "learning_rate": 4.509382042312912e-06, + "loss": 0.5982, + "step": 4209 + }, + { + "epoch": 0.6203339882121808, + "grad_norm": 0.5700022578239441, + "learning_rate": 4.509151352174328e-06, + "loss": 0.595, + "step": 4210 + }, + { + "epoch": 0.6204813359528487, + "grad_norm": 0.6016022562980652, + "learning_rate": 4.508920613716598e-06, + "loss": 0.5958, + "step": 4211 + }, + { + "epoch": 0.6206286836935166, + "grad_norm": 0.5725538730621338, + "learning_rate": 4.5086898269452675e-06, + "loss": 0.5691, + "step": 4212 + }, + { + "epoch": 0.6207760314341847, + "grad_norm": 0.5649779438972473, + "learning_rate": 4.50845899186589e-06, + "loss": 0.6057, + "step": 4213 + }, + { + "epoch": 0.6209233791748526, + "grad_norm": 0.5900271534919739, + "learning_rate": 4.508228108484015e-06, + "loss": 0.608, + "step": 4214 + }, + { + "epoch": 0.6210707269155207, + "grad_norm": 0.5713661313056946, + "learning_rate": 4.507997176805197e-06, + "loss": 0.5861, + "step": 4215 + }, + { + "epoch": 0.6212180746561886, + "grad_norm": 0.6355894207954407, + "learning_rate": 4.507766196834987e-06, + "loss": 0.5643, + "step": 4216 + }, + { + "epoch": 0.6213654223968565, + "grad_norm": 0.5637162327766418, + "learning_rate": 4.507535168578944e-06, + "loss": 0.586, + "step": 4217 + }, + { + "epoch": 0.6215127701375246, + "grad_norm": 0.5790041089057922, + "learning_rate": 4.507304092042619e-06, + "loss": 0.568, + "step": 4218 + }, + { + "epoch": 0.6216601178781925, + "grad_norm": 0.6117111444473267, + "learning_rate": 4.507072967231574e-06, + "loss": 0.6085, + "step": 4219 + }, + { + "epoch": 0.6218074656188605, + "grad_norm": 0.6147605180740356, + "learning_rate": 4.506841794151365e-06, + "loss": 0.5674, + "step": 4220 + }, + { + "epoch": 0.6219548133595285, + "grad_norm": 0.5882819890975952, + "learning_rate": 4.506610572807553e-06, + "loss": 0.5607, + "step": 4221 + }, + { + "epoch": 0.6221021611001964, + "grad_norm": 0.5803180932998657, + "learning_rate": 4.506379303205697e-06, + "loss": 0.5921, + "step": 4222 + }, + { + "epoch": 0.6222495088408644, + "grad_norm": 0.5560798645019531, + "learning_rate": 4.506147985351359e-06, + "loss": 0.5839, + "step": 4223 + }, + { + "epoch": 0.6223968565815324, + "grad_norm": 0.573438286781311, + "learning_rate": 4.505916619250104e-06, + "loss": 0.5819, + "step": 4224 + }, + { + "epoch": 0.6225442043222004, + "grad_norm": 0.6034354567527771, + "learning_rate": 4.505685204907495e-06, + "loss": 0.6052, + "step": 4225 + }, + { + "epoch": 0.6226915520628684, + "grad_norm": 0.5999050736427307, + "learning_rate": 4.505453742329098e-06, + "loss": 0.5698, + "step": 4226 + }, + { + "epoch": 0.6228388998035363, + "grad_norm": 0.5850294232368469, + "learning_rate": 4.505222231520478e-06, + "loss": 0.6098, + "step": 4227 + }, + { + "epoch": 0.6229862475442043, + "grad_norm": 0.6037107706069946, + "learning_rate": 4.504990672487205e-06, + "loss": 0.5627, + "step": 4228 + }, + { + "epoch": 0.6231335952848723, + "grad_norm": 0.5656865239143372, + "learning_rate": 4.504759065234846e-06, + "loss": 0.5416, + "step": 4229 + }, + { + "epoch": 0.6232809430255403, + "grad_norm": 0.5865097641944885, + "learning_rate": 4.504527409768972e-06, + "loss": 0.583, + "step": 4230 + }, + { + "epoch": 0.6234282907662082, + "grad_norm": 0.5476706027984619, + "learning_rate": 4.504295706095153e-06, + "loss": 0.5851, + "step": 4231 + }, + { + "epoch": 0.6235756385068763, + "grad_norm": 0.5871080160140991, + "learning_rate": 4.504063954218963e-06, + "loss": 0.544, + "step": 4232 + }, + { + "epoch": 0.6237229862475442, + "grad_norm": 0.5566933155059814, + "learning_rate": 4.5038321541459755e-06, + "loss": 0.5374, + "step": 4233 + }, + { + "epoch": 0.6238703339882122, + "grad_norm": 0.5995549559593201, + "learning_rate": 4.503600305881765e-06, + "loss": 0.6138, + "step": 4234 + }, + { + "epoch": 0.6240176817288802, + "grad_norm": 0.5446143746376038, + "learning_rate": 4.5033684094319055e-06, + "loss": 0.5798, + "step": 4235 + }, + { + "epoch": 0.6241650294695481, + "grad_norm": 0.5913087129592896, + "learning_rate": 4.503136464801976e-06, + "loss": 0.6047, + "step": 4236 + }, + { + "epoch": 0.6243123772102162, + "grad_norm": 0.5657643675804138, + "learning_rate": 4.502904471997554e-06, + "loss": 0.5685, + "step": 4237 + }, + { + "epoch": 0.6244597249508841, + "grad_norm": 0.5884108543395996, + "learning_rate": 4.502672431024219e-06, + "loss": 0.5777, + "step": 4238 + }, + { + "epoch": 0.624607072691552, + "grad_norm": 0.5901655554771423, + "learning_rate": 4.502440341887551e-06, + "loss": 0.5749, + "step": 4239 + }, + { + "epoch": 0.6247544204322201, + "grad_norm": 0.5993623733520508, + "learning_rate": 4.502208204593132e-06, + "loss": 0.5948, + "step": 4240 + }, + { + "epoch": 0.624901768172888, + "grad_norm": 0.5997147560119629, + "learning_rate": 4.501976019146544e-06, + "loss": 0.5786, + "step": 4241 + }, + { + "epoch": 0.6250491159135559, + "grad_norm": 0.5838434100151062, + "learning_rate": 4.501743785553373e-06, + "loss": 0.6027, + "step": 4242 + }, + { + "epoch": 0.625196463654224, + "grad_norm": 0.5979438424110413, + "learning_rate": 4.501511503819203e-06, + "loss": 0.5735, + "step": 4243 + }, + { + "epoch": 0.6253438113948919, + "grad_norm": 0.6060383915901184, + "learning_rate": 4.501279173949619e-06, + "loss": 0.5788, + "step": 4244 + }, + { + "epoch": 0.62549115913556, + "grad_norm": 0.5640358328819275, + "learning_rate": 4.50104679595021e-06, + "loss": 0.5552, + "step": 4245 + }, + { + "epoch": 0.6256385068762279, + "grad_norm": 0.5680961608886719, + "learning_rate": 4.5008143698265645e-06, + "loss": 0.5723, + "step": 4246 + }, + { + "epoch": 0.6257858546168958, + "grad_norm": 0.5654650926589966, + "learning_rate": 4.500581895584272e-06, + "loss": 0.5989, + "step": 4247 + }, + { + "epoch": 0.6259332023575639, + "grad_norm": 0.5589799284934998, + "learning_rate": 4.500349373228922e-06, + "loss": 0.5739, + "step": 4248 + }, + { + "epoch": 0.6260805500982318, + "grad_norm": 0.6006742715835571, + "learning_rate": 4.500116802766109e-06, + "loss": 0.616, + "step": 4249 + }, + { + "epoch": 0.6262278978388998, + "grad_norm": 0.5954450964927673, + "learning_rate": 4.499884184201424e-06, + "loss": 0.5817, + "step": 4250 + }, + { + "epoch": 0.6263752455795678, + "grad_norm": 0.5595595836639404, + "learning_rate": 4.4996515175404634e-06, + "loss": 0.5553, + "step": 4251 + }, + { + "epoch": 0.6265225933202357, + "grad_norm": 0.5877567529678345, + "learning_rate": 4.4994188027888205e-06, + "loss": 0.5494, + "step": 4252 + }, + { + "epoch": 0.6266699410609037, + "grad_norm": 0.5772625207901001, + "learning_rate": 4.499186039952095e-06, + "loss": 0.5816, + "step": 4253 + }, + { + "epoch": 0.6268172888015717, + "grad_norm": 0.5861586332321167, + "learning_rate": 4.498953229035882e-06, + "loss": 0.5478, + "step": 4254 + }, + { + "epoch": 0.6269646365422397, + "grad_norm": 0.5747215747833252, + "learning_rate": 4.49872037004578e-06, + "loss": 0.5799, + "step": 4255 + }, + { + "epoch": 0.6271119842829077, + "grad_norm": 0.5996657013893127, + "learning_rate": 4.498487462987392e-06, + "loss": 0.5765, + "step": 4256 + }, + { + "epoch": 0.6272593320235756, + "grad_norm": 0.592200517654419, + "learning_rate": 4.498254507866318e-06, + "loss": 0.568, + "step": 4257 + }, + { + "epoch": 0.6274066797642436, + "grad_norm": 0.577187716960907, + "learning_rate": 4.49802150468816e-06, + "loss": 0.5798, + "step": 4258 + }, + { + "epoch": 0.6275540275049116, + "grad_norm": 0.5924435257911682, + "learning_rate": 4.497788453458522e-06, + "loss": 0.5971, + "step": 4259 + }, + { + "epoch": 0.6277013752455796, + "grad_norm": 0.613988995552063, + "learning_rate": 4.497555354183009e-06, + "loss": 0.5712, + "step": 4260 + }, + { + "epoch": 0.6278487229862475, + "grad_norm": 0.5451729893684387, + "learning_rate": 4.497322206867226e-06, + "loss": 0.6066, + "step": 4261 + }, + { + "epoch": 0.6279960707269155, + "grad_norm": 0.5672998428344727, + "learning_rate": 4.497089011516781e-06, + "loss": 0.5926, + "step": 4262 + }, + { + "epoch": 0.6281434184675835, + "grad_norm": 0.5744456052780151, + "learning_rate": 4.496855768137282e-06, + "loss": 0.5554, + "step": 4263 + }, + { + "epoch": 0.6282907662082515, + "grad_norm": 0.6215322613716125, + "learning_rate": 4.496622476734338e-06, + "loss": 0.587, + "step": 4264 + }, + { + "epoch": 0.6284381139489195, + "grad_norm": 0.5633972883224487, + "learning_rate": 4.49638913731356e-06, + "loss": 0.586, + "step": 4265 + }, + { + "epoch": 0.6285854616895874, + "grad_norm": 0.5735778212547302, + "learning_rate": 4.49615574988056e-06, + "loss": 0.5754, + "step": 4266 + }, + { + "epoch": 0.6287328094302554, + "grad_norm": 0.5807338356971741, + "learning_rate": 4.49592231444095e-06, + "loss": 0.5641, + "step": 4267 + }, + { + "epoch": 0.6288801571709234, + "grad_norm": 0.5674999952316284, + "learning_rate": 4.495688831000345e-06, + "loss": 0.5992, + "step": 4268 + }, + { + "epoch": 0.6290275049115913, + "grad_norm": 0.5719695687294006, + "learning_rate": 4.4954552995643596e-06, + "loss": 0.5685, + "step": 4269 + }, + { + "epoch": 0.6291748526522594, + "grad_norm": 0.5713011622428894, + "learning_rate": 4.49522172013861e-06, + "loss": 0.5265, + "step": 4270 + }, + { + "epoch": 0.6293222003929273, + "grad_norm": 0.5810303688049316, + "learning_rate": 4.494988092728714e-06, + "loss": 0.5583, + "step": 4271 + }, + { + "epoch": 0.6294695481335952, + "grad_norm": 0.5693356990814209, + "learning_rate": 4.49475441734029e-06, + "loss": 0.5595, + "step": 4272 + }, + { + "epoch": 0.6296168958742633, + "grad_norm": 0.5772781372070312, + "learning_rate": 4.494520693978958e-06, + "loss": 0.5597, + "step": 4273 + }, + { + "epoch": 0.6297642436149312, + "grad_norm": 0.598275899887085, + "learning_rate": 4.4942869226503384e-06, + "loss": 0.6116, + "step": 4274 + }, + { + "epoch": 0.6299115913555993, + "grad_norm": 0.6286026835441589, + "learning_rate": 4.494053103360054e-06, + "loss": 0.5535, + "step": 4275 + }, + { + "epoch": 0.6300589390962672, + "grad_norm": 0.5743150115013123, + "learning_rate": 4.493819236113728e-06, + "loss": 0.5758, + "step": 4276 + }, + { + "epoch": 0.6302062868369351, + "grad_norm": 0.5902056097984314, + "learning_rate": 4.493585320916983e-06, + "loss": 0.569, + "step": 4277 + }, + { + "epoch": 0.6303536345776032, + "grad_norm": 0.5582719445228577, + "learning_rate": 4.493351357775447e-06, + "loss": 0.5608, + "step": 4278 + }, + { + "epoch": 0.6305009823182711, + "grad_norm": 0.5788253545761108, + "learning_rate": 4.493117346694746e-06, + "loss": 0.5451, + "step": 4279 + }, + { + "epoch": 0.630648330058939, + "grad_norm": 0.5673696398735046, + "learning_rate": 4.492883287680509e-06, + "loss": 0.5316, + "step": 4280 + }, + { + "epoch": 0.6307956777996071, + "grad_norm": 0.5847151875495911, + "learning_rate": 4.492649180738361e-06, + "loss": 0.5561, + "step": 4281 + }, + { + "epoch": 0.630943025540275, + "grad_norm": 0.5939843654632568, + "learning_rate": 4.4924150258739366e-06, + "loss": 0.5718, + "step": 4282 + }, + { + "epoch": 0.631090373280943, + "grad_norm": 0.5803812742233276, + "learning_rate": 4.492180823092865e-06, + "loss": 0.5776, + "step": 4283 + }, + { + "epoch": 0.631237721021611, + "grad_norm": 0.5446693897247314, + "learning_rate": 4.491946572400779e-06, + "loss": 0.5416, + "step": 4284 + }, + { + "epoch": 0.631385068762279, + "grad_norm": 0.5719100832939148, + "learning_rate": 4.491712273803312e-06, + "loss": 0.5706, + "step": 4285 + }, + { + "epoch": 0.631532416502947, + "grad_norm": 0.606528639793396, + "learning_rate": 4.491477927306098e-06, + "loss": 0.548, + "step": 4286 + }, + { + "epoch": 0.6316797642436149, + "grad_norm": 0.5670689940452576, + "learning_rate": 4.4912435329147755e-06, + "loss": 0.5887, + "step": 4287 + }, + { + "epoch": 0.6318271119842829, + "grad_norm": 0.5621157884597778, + "learning_rate": 4.4910090906349795e-06, + "loss": 0.5521, + "step": 4288 + }, + { + "epoch": 0.6319744597249509, + "grad_norm": 0.555321216583252, + "learning_rate": 4.490774600472348e-06, + "loss": 0.5769, + "step": 4289 + }, + { + "epoch": 0.6321218074656189, + "grad_norm": 0.6113997101783752, + "learning_rate": 4.4905400624325224e-06, + "loss": 0.5936, + "step": 4290 + }, + { + "epoch": 0.6322691552062868, + "grad_norm": 0.585459291934967, + "learning_rate": 4.490305476521141e-06, + "loss": 0.5619, + "step": 4291 + }, + { + "epoch": 0.6324165029469548, + "grad_norm": 0.5932852029800415, + "learning_rate": 4.490070842743847e-06, + "loss": 0.5768, + "step": 4292 + }, + { + "epoch": 0.6325638506876228, + "grad_norm": 0.5663589835166931, + "learning_rate": 4.489836161106282e-06, + "loss": 0.5647, + "step": 4293 + }, + { + "epoch": 0.6327111984282907, + "grad_norm": 0.568812370300293, + "learning_rate": 4.489601431614091e-06, + "loss": 0.5755, + "step": 4294 + }, + { + "epoch": 0.6328585461689588, + "grad_norm": 0.5838480591773987, + "learning_rate": 4.489366654272918e-06, + "loss": 0.5578, + "step": 4295 + }, + { + "epoch": 0.6330058939096267, + "grad_norm": 0.5734966397285461, + "learning_rate": 4.489131829088411e-06, + "loss": 0.6241, + "step": 4296 + }, + { + "epoch": 0.6331532416502947, + "grad_norm": 0.5930954217910767, + "learning_rate": 4.488896956066217e-06, + "loss": 0.5722, + "step": 4297 + }, + { + "epoch": 0.6333005893909627, + "grad_norm": 0.5805931687355042, + "learning_rate": 4.488662035211982e-06, + "loss": 0.566, + "step": 4298 + }, + { + "epoch": 0.6334479371316306, + "grad_norm": 0.5642044544219971, + "learning_rate": 4.48842706653136e-06, + "loss": 0.605, + "step": 4299 + }, + { + "epoch": 0.6335952848722987, + "grad_norm": 0.6286798715591431, + "learning_rate": 4.488192050029998e-06, + "loss": 0.5935, + "step": 4300 + }, + { + "epoch": 0.6337426326129666, + "grad_norm": 0.6011977791786194, + "learning_rate": 4.487956985713551e-06, + "loss": 0.5533, + "step": 4301 + }, + { + "epoch": 0.6338899803536345, + "grad_norm": 0.5535069108009338, + "learning_rate": 4.48772187358767e-06, + "loss": 0.5821, + "step": 4302 + }, + { + "epoch": 0.6340373280943026, + "grad_norm": 0.5696483850479126, + "learning_rate": 4.48748671365801e-06, + "loss": 0.5315, + "step": 4303 + }, + { + "epoch": 0.6341846758349705, + "grad_norm": 0.5985753536224365, + "learning_rate": 4.487251505930228e-06, + "loss": 0.5722, + "step": 4304 + }, + { + "epoch": 0.6343320235756386, + "grad_norm": 0.5764299035072327, + "learning_rate": 4.487016250409978e-06, + "loss": 0.6057, + "step": 4305 + }, + { + "epoch": 0.6344793713163065, + "grad_norm": 0.6111028790473938, + "learning_rate": 4.48678094710292e-06, + "loss": 0.5998, + "step": 4306 + }, + { + "epoch": 0.6346267190569744, + "grad_norm": 0.6184017062187195, + "learning_rate": 4.486545596014712e-06, + "loss": 0.5589, + "step": 4307 + }, + { + "epoch": 0.6347740667976425, + "grad_norm": 0.594161868095398, + "learning_rate": 4.4863101971510135e-06, + "loss": 0.5828, + "step": 4308 + }, + { + "epoch": 0.6349214145383104, + "grad_norm": 0.5990014672279358, + "learning_rate": 4.486074750517487e-06, + "loss": 0.575, + "step": 4309 + }, + { + "epoch": 0.6350687622789783, + "grad_norm": 0.5766279697418213, + "learning_rate": 4.485839256119794e-06, + "loss": 0.5567, + "step": 4310 + }, + { + "epoch": 0.6352161100196464, + "grad_norm": 0.5656799674034119, + "learning_rate": 4.485603713963599e-06, + "loss": 0.5611, + "step": 4311 + }, + { + "epoch": 0.6353634577603143, + "grad_norm": 0.5635125041007996, + "learning_rate": 4.485368124054565e-06, + "loss": 0.5604, + "step": 4312 + }, + { + "epoch": 0.6355108055009823, + "grad_norm": 0.5837377309799194, + "learning_rate": 4.4851324863983595e-06, + "loss": 0.5745, + "step": 4313 + }, + { + "epoch": 0.6356581532416503, + "grad_norm": 0.579524040222168, + "learning_rate": 4.484896801000648e-06, + "loss": 0.5956, + "step": 4314 + }, + { + "epoch": 0.6358055009823183, + "grad_norm": 0.6329876780509949, + "learning_rate": 4.4846610678671e-06, + "loss": 0.5815, + "step": 4315 + }, + { + "epoch": 0.6359528487229863, + "grad_norm": 0.5674718618392944, + "learning_rate": 4.4844252870033845e-06, + "loss": 0.5567, + "step": 4316 + }, + { + "epoch": 0.6361001964636542, + "grad_norm": 0.5658427476882935, + "learning_rate": 4.484189458415171e-06, + "loss": 0.551, + "step": 4317 + }, + { + "epoch": 0.6362475442043222, + "grad_norm": 0.5475931763648987, + "learning_rate": 4.4839535821081315e-06, + "loss": 0.5913, + "step": 4318 + }, + { + "epoch": 0.6363948919449902, + "grad_norm": 0.5910772085189819, + "learning_rate": 4.483717658087939e-06, + "loss": 0.5766, + "step": 4319 + }, + { + "epoch": 0.6365422396856582, + "grad_norm": 0.5919469594955444, + "learning_rate": 4.483481686360268e-06, + "loss": 0.5858, + "step": 4320 + }, + { + "epoch": 0.6366895874263261, + "grad_norm": 0.5729435682296753, + "learning_rate": 4.483245666930793e-06, + "loss": 0.5744, + "step": 4321 + }, + { + "epoch": 0.6368369351669941, + "grad_norm": 0.5997937917709351, + "learning_rate": 4.483009599805188e-06, + "loss": 0.5613, + "step": 4322 + }, + { + "epoch": 0.6369842829076621, + "grad_norm": 0.5774239301681519, + "learning_rate": 4.482773484989133e-06, + "loss": 0.558, + "step": 4323 + }, + { + "epoch": 0.63713163064833, + "grad_norm": 0.5790361762046814, + "learning_rate": 4.482537322488306e-06, + "loss": 0.5916, + "step": 4324 + }, + { + "epoch": 0.637278978388998, + "grad_norm": 0.5668784976005554, + "learning_rate": 4.482301112308387e-06, + "loss": 0.5698, + "step": 4325 + }, + { + "epoch": 0.637426326129666, + "grad_norm": 0.6149380803108215, + "learning_rate": 4.482064854455055e-06, + "loss": 0.5814, + "step": 4326 + }, + { + "epoch": 0.637573673870334, + "grad_norm": 0.6090195178985596, + "learning_rate": 4.481828548933993e-06, + "loss": 0.6048, + "step": 4327 + }, + { + "epoch": 0.637721021611002, + "grad_norm": 0.6057997941970825, + "learning_rate": 4.481592195750885e-06, + "loss": 0.5944, + "step": 4328 + }, + { + "epoch": 0.6378683693516699, + "grad_norm": 0.6084753274917603, + "learning_rate": 4.481355794911413e-06, + "loss": 0.5464, + "step": 4329 + }, + { + "epoch": 0.638015717092338, + "grad_norm": 0.597519040107727, + "learning_rate": 4.481119346421264e-06, + "loss": 0.5977, + "step": 4330 + }, + { + "epoch": 0.6381630648330059, + "grad_norm": 0.5595676898956299, + "learning_rate": 4.4808828502861235e-06, + "loss": 0.5445, + "step": 4331 + }, + { + "epoch": 0.6383104125736738, + "grad_norm": 0.5651946067810059, + "learning_rate": 4.480646306511679e-06, + "loss": 0.5601, + "step": 4332 + }, + { + "epoch": 0.6384577603143419, + "grad_norm": 0.5592743158340454, + "learning_rate": 4.480409715103621e-06, + "loss": 0.5915, + "step": 4333 + }, + { + "epoch": 0.6386051080550098, + "grad_norm": 0.5871950387954712, + "learning_rate": 4.480173076067637e-06, + "loss": 0.6008, + "step": 4334 + }, + { + "epoch": 0.6387524557956779, + "grad_norm": 0.5894269943237305, + "learning_rate": 4.479936389409421e-06, + "loss": 0.592, + "step": 4335 + }, + { + "epoch": 0.6388998035363458, + "grad_norm": 0.6148300170898438, + "learning_rate": 4.479699655134662e-06, + "loss": 0.5522, + "step": 4336 + }, + { + "epoch": 0.6390471512770137, + "grad_norm": 0.551325798034668, + "learning_rate": 4.479462873249055e-06, + "loss": 0.5657, + "step": 4337 + }, + { + "epoch": 0.6391944990176818, + "grad_norm": 0.5721043944358826, + "learning_rate": 4.479226043758294e-06, + "loss": 0.5535, + "step": 4338 + }, + { + "epoch": 0.6393418467583497, + "grad_norm": 0.6123032569885254, + "learning_rate": 4.478989166668077e-06, + "loss": 0.5751, + "step": 4339 + }, + { + "epoch": 0.6394891944990176, + "grad_norm": 0.6109795570373535, + "learning_rate": 4.4787522419840965e-06, + "loss": 0.5728, + "step": 4340 + }, + { + "epoch": 0.6396365422396857, + "grad_norm": 0.5802956223487854, + "learning_rate": 4.478515269712054e-06, + "loss": 0.5507, + "step": 4341 + }, + { + "epoch": 0.6397838899803536, + "grad_norm": 0.5732927322387695, + "learning_rate": 4.4782782498576464e-06, + "loss": 0.5616, + "step": 4342 + }, + { + "epoch": 0.6399312377210216, + "grad_norm": 0.597430408000946, + "learning_rate": 4.478041182426575e-06, + "loss": 0.5602, + "step": 4343 + }, + { + "epoch": 0.6400785854616896, + "grad_norm": 0.5667052865028381, + "learning_rate": 4.477804067424541e-06, + "loss": 0.5696, + "step": 4344 + }, + { + "epoch": 0.6402259332023575, + "grad_norm": 0.5929152965545654, + "learning_rate": 4.477566904857247e-06, + "loss": 0.5713, + "step": 4345 + }, + { + "epoch": 0.6403732809430256, + "grad_norm": 0.5944862961769104, + "learning_rate": 4.4773296947303955e-06, + "loss": 0.5889, + "step": 4346 + }, + { + "epoch": 0.6405206286836935, + "grad_norm": 0.5558852553367615, + "learning_rate": 4.477092437049694e-06, + "loss": 0.5894, + "step": 4347 + }, + { + "epoch": 0.6406679764243615, + "grad_norm": 0.5766568779945374, + "learning_rate": 4.4768551318208455e-06, + "loss": 0.5401, + "step": 4348 + }, + { + "epoch": 0.6408153241650295, + "grad_norm": 0.6189402341842651, + "learning_rate": 4.476617779049558e-06, + "loss": 0.5613, + "step": 4349 + }, + { + "epoch": 0.6409626719056974, + "grad_norm": 0.5683311223983765, + "learning_rate": 4.476380378741541e-06, + "loss": 0.5607, + "step": 4350 + }, + { + "epoch": 0.6411100196463654, + "grad_norm": 0.5961191654205322, + "learning_rate": 4.476142930902502e-06, + "loss": 0.5337, + "step": 4351 + }, + { + "epoch": 0.6412573673870334, + "grad_norm": 0.6144964694976807, + "learning_rate": 4.475905435538152e-06, + "loss": 0.5792, + "step": 4352 + }, + { + "epoch": 0.6414047151277014, + "grad_norm": 0.5962293744087219, + "learning_rate": 4.475667892654204e-06, + "loss": 0.5999, + "step": 4353 + }, + { + "epoch": 0.6415520628683693, + "grad_norm": 0.5797423124313354, + "learning_rate": 4.47543030225637e-06, + "loss": 0.577, + "step": 4354 + }, + { + "epoch": 0.6416994106090373, + "grad_norm": 0.5647794604301453, + "learning_rate": 4.475192664350364e-06, + "loss": 0.5861, + "step": 4355 + }, + { + "epoch": 0.6418467583497053, + "grad_norm": 0.6051338315010071, + "learning_rate": 4.4749549789419e-06, + "loss": 0.5798, + "step": 4356 + }, + { + "epoch": 0.6419941060903733, + "grad_norm": 0.6003469824790955, + "learning_rate": 4.474717246036695e-06, + "loss": 0.5902, + "step": 4357 + }, + { + "epoch": 0.6421414538310413, + "grad_norm": 0.6049631834030151, + "learning_rate": 4.474479465640467e-06, + "loss": 0.5779, + "step": 4358 + }, + { + "epoch": 0.6422888015717092, + "grad_norm": 0.5660160779953003, + "learning_rate": 4.4742416377589335e-06, + "loss": 0.5664, + "step": 4359 + }, + { + "epoch": 0.6424361493123772, + "grad_norm": 0.6117616295814514, + "learning_rate": 4.474003762397815e-06, + "loss": 0.5663, + "step": 4360 + }, + { + "epoch": 0.6425834970530452, + "grad_norm": 0.5712698698043823, + "learning_rate": 4.473765839562831e-06, + "loss": 0.5787, + "step": 4361 + }, + { + "epoch": 0.6427308447937131, + "grad_norm": 0.58878493309021, + "learning_rate": 4.473527869259706e-06, + "loss": 0.549, + "step": 4362 + }, + { + "epoch": 0.6428781925343812, + "grad_norm": 0.5723150968551636, + "learning_rate": 4.47328985149416e-06, + "loss": 0.5844, + "step": 4363 + }, + { + "epoch": 0.6430255402750491, + "grad_norm": 0.6075319051742554, + "learning_rate": 4.473051786271919e-06, + "loss": 0.5503, + "step": 4364 + }, + { + "epoch": 0.643172888015717, + "grad_norm": 0.6041104793548584, + "learning_rate": 4.472813673598708e-06, + "loss": 0.5731, + "step": 4365 + }, + { + "epoch": 0.6433202357563851, + "grad_norm": 0.5686420202255249, + "learning_rate": 4.472575513480253e-06, + "loss": 0.5651, + "step": 4366 + }, + { + "epoch": 0.643467583497053, + "grad_norm": 0.6058536767959595, + "learning_rate": 4.472337305922282e-06, + "loss": 0.5241, + "step": 4367 + }, + { + "epoch": 0.6436149312377211, + "grad_norm": 0.594180703163147, + "learning_rate": 4.472099050930525e-06, + "loss": 0.5729, + "step": 4368 + }, + { + "epoch": 0.643762278978389, + "grad_norm": 0.5899102091789246, + "learning_rate": 4.47186074851071e-06, + "loss": 0.6033, + "step": 4369 + }, + { + "epoch": 0.6439096267190569, + "grad_norm": 0.6000953316688538, + "learning_rate": 4.471622398668569e-06, + "loss": 0.5741, + "step": 4370 + }, + { + "epoch": 0.644056974459725, + "grad_norm": 0.582639217376709, + "learning_rate": 4.4713840014098335e-06, + "loss": 0.5712, + "step": 4371 + }, + { + "epoch": 0.6442043222003929, + "grad_norm": 0.5604982376098633, + "learning_rate": 4.471145556740238e-06, + "loss": 0.5924, + "step": 4372 + }, + { + "epoch": 0.6443516699410609, + "grad_norm": 0.5949102640151978, + "learning_rate": 4.470907064665516e-06, + "loss": 0.5902, + "step": 4373 + }, + { + "epoch": 0.6444990176817289, + "grad_norm": 0.5747248530387878, + "learning_rate": 4.470668525191404e-06, + "loss": 0.5636, + "step": 4374 + }, + { + "epoch": 0.6446463654223968, + "grad_norm": 0.5671010613441467, + "learning_rate": 4.470429938323638e-06, + "loss": 0.54, + "step": 4375 + }, + { + "epoch": 0.6447937131630649, + "grad_norm": 0.5930233001708984, + "learning_rate": 4.470191304067956e-06, + "loss": 0.5803, + "step": 4376 + }, + { + "epoch": 0.6449410609037328, + "grad_norm": 0.5539402365684509, + "learning_rate": 4.469952622430099e-06, + "loss": 0.5578, + "step": 4377 + }, + { + "epoch": 0.6450884086444008, + "grad_norm": 0.587037205696106, + "learning_rate": 4.469713893415803e-06, + "loss": 0.5725, + "step": 4378 + }, + { + "epoch": 0.6452357563850688, + "grad_norm": 0.5763368010520935, + "learning_rate": 4.469475117030813e-06, + "loss": 0.5773, + "step": 4379 + }, + { + "epoch": 0.6453831041257367, + "grad_norm": 0.5764393210411072, + "learning_rate": 4.469236293280869e-06, + "loss": 0.5683, + "step": 4380 + }, + { + "epoch": 0.6455304518664047, + "grad_norm": 0.5665243268013, + "learning_rate": 4.4689974221717165e-06, + "loss": 0.5819, + "step": 4381 + }, + { + "epoch": 0.6456777996070727, + "grad_norm": 0.5537976026535034, + "learning_rate": 4.4687585037091e-06, + "loss": 0.6006, + "step": 4382 + }, + { + "epoch": 0.6458251473477407, + "grad_norm": 0.5688421726226807, + "learning_rate": 4.468519537898764e-06, + "loss": 0.5486, + "step": 4383 + }, + { + "epoch": 0.6459724950884086, + "grad_norm": 0.5693696737289429, + "learning_rate": 4.468280524746456e-06, + "loss": 0.5754, + "step": 4384 + }, + { + "epoch": 0.6461198428290766, + "grad_norm": 0.5553688406944275, + "learning_rate": 4.4680414642579265e-06, + "loss": 0.5791, + "step": 4385 + }, + { + "epoch": 0.6462671905697446, + "grad_norm": 0.5748080611228943, + "learning_rate": 4.467802356438922e-06, + "loss": 0.6047, + "step": 4386 + }, + { + "epoch": 0.6464145383104126, + "grad_norm": 0.5766944885253906, + "learning_rate": 4.467563201295193e-06, + "loss": 0.5714, + "step": 4387 + }, + { + "epoch": 0.6465618860510806, + "grad_norm": 0.5499194264411926, + "learning_rate": 4.467323998832492e-06, + "loss": 0.5476, + "step": 4388 + }, + { + "epoch": 0.6467092337917485, + "grad_norm": 0.5675786733627319, + "learning_rate": 4.467084749056573e-06, + "loss": 0.5708, + "step": 4389 + }, + { + "epoch": 0.6468565815324165, + "grad_norm": 0.582976222038269, + "learning_rate": 4.466845451973187e-06, + "loss": 0.5927, + "step": 4390 + }, + { + "epoch": 0.6470039292730845, + "grad_norm": 0.5769259929656982, + "learning_rate": 4.466606107588091e-06, + "loss": 0.59, + "step": 4391 + }, + { + "epoch": 0.6471512770137524, + "grad_norm": 0.5812665820121765, + "learning_rate": 4.46636671590704e-06, + "loss": 0.518, + "step": 4392 + }, + { + "epoch": 0.6472986247544205, + "grad_norm": 0.6288315653800964, + "learning_rate": 4.466127276935792e-06, + "loss": 0.5874, + "step": 4393 + }, + { + "epoch": 0.6474459724950884, + "grad_norm": 0.5815035700798035, + "learning_rate": 4.465887790680105e-06, + "loss": 0.5801, + "step": 4394 + }, + { + "epoch": 0.6475933202357563, + "grad_norm": 0.5758741497993469, + "learning_rate": 4.465648257145739e-06, + "loss": 0.5876, + "step": 4395 + }, + { + "epoch": 0.6477406679764244, + "grad_norm": 0.6079601049423218, + "learning_rate": 4.465408676338454e-06, + "loss": 0.5805, + "step": 4396 + }, + { + "epoch": 0.6478880157170923, + "grad_norm": 0.5833507180213928, + "learning_rate": 4.465169048264012e-06, + "loss": 0.5874, + "step": 4397 + }, + { + "epoch": 0.6480353634577604, + "grad_norm": 0.601245105266571, + "learning_rate": 4.464929372928177e-06, + "loss": 0.5752, + "step": 4398 + }, + { + "epoch": 0.6481827111984283, + "grad_norm": 0.5727366209030151, + "learning_rate": 4.464689650336712e-06, + "loss": 0.5643, + "step": 4399 + }, + { + "epoch": 0.6483300589390962, + "grad_norm": 0.5776528716087341, + "learning_rate": 4.464449880495382e-06, + "loss": 0.5508, + "step": 4400 + }, + { + "epoch": 0.6484774066797643, + "grad_norm": 0.582632839679718, + "learning_rate": 4.464210063409953e-06, + "loss": 0.5861, + "step": 4401 + }, + { + "epoch": 0.6486247544204322, + "grad_norm": 0.5555137991905212, + "learning_rate": 4.463970199086194e-06, + "loss": 0.5538, + "step": 4402 + }, + { + "epoch": 0.6487721021611002, + "grad_norm": 0.6355410218238831, + "learning_rate": 4.463730287529874e-06, + "loss": 0.5762, + "step": 4403 + }, + { + "epoch": 0.6489194499017682, + "grad_norm": 0.5573116540908813, + "learning_rate": 4.4634903287467605e-06, + "loss": 0.5653, + "step": 4404 + }, + { + "epoch": 0.6490667976424361, + "grad_norm": 0.5689885020256042, + "learning_rate": 4.463250322742625e-06, + "loss": 0.5218, + "step": 4405 + }, + { + "epoch": 0.6492141453831042, + "grad_norm": 0.5708058476448059, + "learning_rate": 4.46301026952324e-06, + "loss": 0.5835, + "step": 4406 + }, + { + "epoch": 0.6493614931237721, + "grad_norm": 0.5604806542396545, + "learning_rate": 4.46277016909438e-06, + "loss": 0.5744, + "step": 4407 + }, + { + "epoch": 0.64950884086444, + "grad_norm": 0.5795336365699768, + "learning_rate": 4.462530021461817e-06, + "loss": 0.5615, + "step": 4408 + }, + { + "epoch": 0.6496561886051081, + "grad_norm": 0.5539414286613464, + "learning_rate": 4.462289826631329e-06, + "loss": 0.5691, + "step": 4409 + }, + { + "epoch": 0.649803536345776, + "grad_norm": 0.6201887726783752, + "learning_rate": 4.46204958460869e-06, + "loss": 0.5649, + "step": 4410 + }, + { + "epoch": 0.649950884086444, + "grad_norm": 0.5792020559310913, + "learning_rate": 4.461809295399678e-06, + "loss": 0.5635, + "step": 4411 + }, + { + "epoch": 0.650098231827112, + "grad_norm": 0.6031951904296875, + "learning_rate": 4.461568959010073e-06, + "loss": 0.5342, + "step": 4412 + }, + { + "epoch": 0.65024557956778, + "grad_norm": 0.5669357776641846, + "learning_rate": 4.4613285754456545e-06, + "loss": 0.5526, + "step": 4413 + }, + { + "epoch": 0.6503929273084479, + "grad_norm": 0.5673928260803223, + "learning_rate": 4.4610881447122045e-06, + "loss": 0.5675, + "step": 4414 + }, + { + "epoch": 0.6505402750491159, + "grad_norm": 0.5764795541763306, + "learning_rate": 4.4608476668155035e-06, + "loss": 0.5814, + "step": 4415 + }, + { + "epoch": 0.6506876227897839, + "grad_norm": 0.55159592628479, + "learning_rate": 4.460607141761337e-06, + "loss": 0.5424, + "step": 4416 + }, + { + "epoch": 0.6508349705304519, + "grad_norm": 0.5713334083557129, + "learning_rate": 4.4603665695554874e-06, + "loss": 0.5273, + "step": 4417 + }, + { + "epoch": 0.6509823182711199, + "grad_norm": 0.5786926746368408, + "learning_rate": 4.46012595020374e-06, + "loss": 0.5674, + "step": 4418 + }, + { + "epoch": 0.6511296660117878, + "grad_norm": 0.5775808095932007, + "learning_rate": 4.459885283711886e-06, + "loss": 0.6036, + "step": 4419 + }, + { + "epoch": 0.6512770137524558, + "grad_norm": 0.5773966312408447, + "learning_rate": 4.459644570085708e-06, + "loss": 0.5904, + "step": 4420 + }, + { + "epoch": 0.6514243614931238, + "grad_norm": 0.5612008571624756, + "learning_rate": 4.459403809330997e-06, + "loss": 0.54, + "step": 4421 + }, + { + "epoch": 0.6515717092337917, + "grad_norm": 0.5777307748794556, + "learning_rate": 4.459163001453545e-06, + "loss": 0.6068, + "step": 4422 + }, + { + "epoch": 0.6517190569744598, + "grad_norm": 0.5856270790100098, + "learning_rate": 4.45892214645914e-06, + "loss": 0.5643, + "step": 4423 + }, + { + "epoch": 0.6518664047151277, + "grad_norm": 0.5603070855140686, + "learning_rate": 4.458681244353577e-06, + "loss": 0.5912, + "step": 4424 + }, + { + "epoch": 0.6520137524557956, + "grad_norm": 0.5738238096237183, + "learning_rate": 4.4584402951426485e-06, + "loss": 0.5599, + "step": 4425 + }, + { + "epoch": 0.6521611001964637, + "grad_norm": 0.5907599329948425, + "learning_rate": 4.4581992988321495e-06, + "loss": 0.5224, + "step": 4426 + }, + { + "epoch": 0.6523084479371316, + "grad_norm": 0.5831449627876282, + "learning_rate": 4.457958255427876e-06, + "loss": 0.5676, + "step": 4427 + }, + { + "epoch": 0.6524557956777997, + "grad_norm": 0.5613323450088501, + "learning_rate": 4.4577171649356255e-06, + "loss": 0.5741, + "step": 4428 + }, + { + "epoch": 0.6526031434184676, + "grad_norm": 0.5811100602149963, + "learning_rate": 4.457476027361195e-06, + "loss": 0.5917, + "step": 4429 + }, + { + "epoch": 0.6527504911591355, + "grad_norm": 0.5713329911231995, + "learning_rate": 4.457234842710383e-06, + "loss": 0.5395, + "step": 4430 + }, + { + "epoch": 0.6528978388998036, + "grad_norm": 0.5793220400810242, + "learning_rate": 4.4569936109889925e-06, + "loss": 0.5704, + "step": 4431 + }, + { + "epoch": 0.6530451866404715, + "grad_norm": 0.6026418209075928, + "learning_rate": 4.456752332202823e-06, + "loss": 0.5882, + "step": 4432 + }, + { + "epoch": 0.6531925343811394, + "grad_norm": 0.5547422170639038, + "learning_rate": 4.456511006357677e-06, + "loss": 0.5676, + "step": 4433 + }, + { + "epoch": 0.6533398821218075, + "grad_norm": 0.6477126479148865, + "learning_rate": 4.45626963345936e-06, + "loss": 0.5629, + "step": 4434 + }, + { + "epoch": 0.6534872298624754, + "grad_norm": 0.5929026007652283, + "learning_rate": 4.4560282135136754e-06, + "loss": 0.5553, + "step": 4435 + }, + { + "epoch": 0.6536345776031434, + "grad_norm": 0.5689057111740112, + "learning_rate": 4.455786746526429e-06, + "loss": 0.5597, + "step": 4436 + }, + { + "epoch": 0.6537819253438114, + "grad_norm": 0.6725549101829529, + "learning_rate": 4.4555452325034295e-06, + "loss": 0.5537, + "step": 4437 + }, + { + "epoch": 0.6539292730844793, + "grad_norm": 0.5988209247589111, + "learning_rate": 4.455303671450485e-06, + "loss": 0.5831, + "step": 4438 + }, + { + "epoch": 0.6540766208251474, + "grad_norm": 0.595184862613678, + "learning_rate": 4.4550620633734035e-06, + "loss": 0.5435, + "step": 4439 + }, + { + "epoch": 0.6542239685658153, + "grad_norm": 0.6073307394981384, + "learning_rate": 4.454820408277996e-06, + "loss": 0.5661, + "step": 4440 + }, + { + "epoch": 0.6543713163064833, + "grad_norm": 0.5431700348854065, + "learning_rate": 4.454578706170075e-06, + "loss": 0.5226, + "step": 4441 + }, + { + "epoch": 0.6545186640471513, + "grad_norm": 0.5949360728263855, + "learning_rate": 4.454336957055453e-06, + "loss": 0.5876, + "step": 4442 + }, + { + "epoch": 0.6546660117878192, + "grad_norm": 0.5782335996627808, + "learning_rate": 4.454095160939943e-06, + "loss": 0.596, + "step": 4443 + }, + { + "epoch": 0.6548133595284872, + "grad_norm": 0.5905597805976868, + "learning_rate": 4.453853317829362e-06, + "loss": 0.5622, + "step": 4444 + }, + { + "epoch": 0.6549607072691552, + "grad_norm": 0.5644593834877014, + "learning_rate": 4.453611427729524e-06, + "loss": 0.5806, + "step": 4445 + }, + { + "epoch": 0.6551080550098232, + "grad_norm": 0.5821254253387451, + "learning_rate": 4.4533694906462485e-06, + "loss": 0.5769, + "step": 4446 + }, + { + "epoch": 0.6552554027504912, + "grad_norm": 0.6213120222091675, + "learning_rate": 4.453127506585353e-06, + "loss": 0.5599, + "step": 4447 + }, + { + "epoch": 0.6554027504911591, + "grad_norm": 0.5567158460617065, + "learning_rate": 4.452885475552656e-06, + "loss": 0.5509, + "step": 4448 + }, + { + "epoch": 0.6555500982318271, + "grad_norm": 0.5623661875724792, + "learning_rate": 4.45264339755398e-06, + "loss": 0.5551, + "step": 4449 + }, + { + "epoch": 0.6556974459724951, + "grad_norm": 0.5903201699256897, + "learning_rate": 4.452401272595146e-06, + "loss": 0.6086, + "step": 4450 + }, + { + "epoch": 0.6558447937131631, + "grad_norm": 0.5638332962989807, + "learning_rate": 4.452159100681977e-06, + "loss": 0.544, + "step": 4451 + }, + { + "epoch": 0.655992141453831, + "grad_norm": 0.5725784301757812, + "learning_rate": 4.4519168818202974e-06, + "loss": 0.5743, + "step": 4452 + }, + { + "epoch": 0.656139489194499, + "grad_norm": 0.6157773733139038, + "learning_rate": 4.451674616015932e-06, + "loss": 0.5981, + "step": 4453 + }, + { + "epoch": 0.656286836935167, + "grad_norm": 0.5550537705421448, + "learning_rate": 4.451432303274708e-06, + "loss": 0.5697, + "step": 4454 + }, + { + "epoch": 0.6564341846758349, + "grad_norm": 0.6135299801826477, + "learning_rate": 4.451189943602452e-06, + "loss": 0.5952, + "step": 4455 + }, + { + "epoch": 0.656581532416503, + "grad_norm": 0.5774899125099182, + "learning_rate": 4.450947537004994e-06, + "loss": 0.5738, + "step": 4456 + }, + { + "epoch": 0.6567288801571709, + "grad_norm": 0.5687068104743958, + "learning_rate": 4.4507050834881615e-06, + "loss": 0.572, + "step": 4457 + }, + { + "epoch": 0.656876227897839, + "grad_norm": 0.5883556008338928, + "learning_rate": 4.450462583057787e-06, + "loss": 0.5647, + "step": 4458 + }, + { + "epoch": 0.6570235756385069, + "grad_norm": 0.5716426372528076, + "learning_rate": 4.450220035719702e-06, + "loss": 0.5671, + "step": 4459 + }, + { + "epoch": 0.6571709233791748, + "grad_norm": 0.6073064208030701, + "learning_rate": 4.44997744147974e-06, + "loss": 0.5797, + "step": 4460 + }, + { + "epoch": 0.6573182711198429, + "grad_norm": 0.5420489311218262, + "learning_rate": 4.449734800343736e-06, + "loss": 0.5875, + "step": 4461 + }, + { + "epoch": 0.6574656188605108, + "grad_norm": 0.5778927206993103, + "learning_rate": 4.449492112317523e-06, + "loss": 0.5691, + "step": 4462 + }, + { + "epoch": 0.6576129666011787, + "grad_norm": 0.5724616646766663, + "learning_rate": 4.4492493774069404e-06, + "loss": 0.5575, + "step": 4463 + }, + { + "epoch": 0.6577603143418468, + "grad_norm": 0.5909255743026733, + "learning_rate": 4.449006595617824e-06, + "loss": 0.5786, + "step": 4464 + }, + { + "epoch": 0.6579076620825147, + "grad_norm": 0.5646083950996399, + "learning_rate": 4.448763766956013e-06, + "loss": 0.5587, + "step": 4465 + }, + { + "epoch": 0.6580550098231827, + "grad_norm": 0.5989118814468384, + "learning_rate": 4.448520891427348e-06, + "loss": 0.5826, + "step": 4466 + }, + { + "epoch": 0.6582023575638507, + "grad_norm": 0.5689816474914551, + "learning_rate": 4.448277969037669e-06, + "loss": 0.5986, + "step": 4467 + }, + { + "epoch": 0.6583497053045186, + "grad_norm": 0.5541342496871948, + "learning_rate": 4.448034999792818e-06, + "loss": 0.5566, + "step": 4468 + }, + { + "epoch": 0.6584970530451867, + "grad_norm": 0.5642526149749756, + "learning_rate": 4.44779198369864e-06, + "loss": 0.5554, + "step": 4469 + }, + { + "epoch": 0.6586444007858546, + "grad_norm": 0.5622909665107727, + "learning_rate": 4.4475489207609776e-06, + "loss": 0.5466, + "step": 4470 + }, + { + "epoch": 0.6587917485265226, + "grad_norm": 0.6101922988891602, + "learning_rate": 4.447305810985677e-06, + "loss": 0.5966, + "step": 4471 + }, + { + "epoch": 0.6589390962671906, + "grad_norm": 0.5646790266036987, + "learning_rate": 4.447062654378585e-06, + "loss": 0.5389, + "step": 4472 + }, + { + "epoch": 0.6590864440078585, + "grad_norm": 0.5733986496925354, + "learning_rate": 4.446819450945551e-06, + "loss": 0.5947, + "step": 4473 + }, + { + "epoch": 0.6592337917485265, + "grad_norm": 0.5686771869659424, + "learning_rate": 4.446576200692421e-06, + "loss": 0.5374, + "step": 4474 + }, + { + "epoch": 0.6593811394891945, + "grad_norm": 0.5713015794754028, + "learning_rate": 4.4463329036250465e-06, + "loss": 0.5546, + "step": 4475 + }, + { + "epoch": 0.6595284872298625, + "grad_norm": 0.5856745839118958, + "learning_rate": 4.446089559749278e-06, + "loss": 0.5832, + "step": 4476 + }, + { + "epoch": 0.6596758349705305, + "grad_norm": 0.6039512157440186, + "learning_rate": 4.4458461690709694e-06, + "loss": 0.6027, + "step": 4477 + }, + { + "epoch": 0.6598231827111984, + "grad_norm": 0.6452032923698425, + "learning_rate": 4.4456027315959725e-06, + "loss": 0.6021, + "step": 4478 + }, + { + "epoch": 0.6599705304518664, + "grad_norm": 0.6258617043495178, + "learning_rate": 4.445359247330143e-06, + "loss": 0.5514, + "step": 4479 + }, + { + "epoch": 0.6601178781925344, + "grad_norm": 0.564216136932373, + "learning_rate": 4.445115716279335e-06, + "loss": 0.5543, + "step": 4480 + }, + { + "epoch": 0.6602652259332024, + "grad_norm": 0.6109811663627625, + "learning_rate": 4.444872138449408e-06, + "loss": 0.6008, + "step": 4481 + }, + { + "epoch": 0.6604125736738703, + "grad_norm": 0.6040644645690918, + "learning_rate": 4.444628513846217e-06, + "loss": 0.5679, + "step": 4482 + }, + { + "epoch": 0.6605599214145383, + "grad_norm": 0.5750970244407654, + "learning_rate": 4.444384842475622e-06, + "loss": 0.596, + "step": 4483 + }, + { + "epoch": 0.6607072691552063, + "grad_norm": 0.5906097888946533, + "learning_rate": 4.444141124343484e-06, + "loss": 0.5953, + "step": 4484 + }, + { + "epoch": 0.6608546168958742, + "grad_norm": 0.611907958984375, + "learning_rate": 4.443897359455664e-06, + "loss": 0.5672, + "step": 4485 + }, + { + "epoch": 0.6610019646365423, + "grad_norm": 0.5690088272094727, + "learning_rate": 4.443653547818024e-06, + "loss": 0.5825, + "step": 4486 + }, + { + "epoch": 0.6611493123772102, + "grad_norm": 0.6040909886360168, + "learning_rate": 4.443409689436427e-06, + "loss": 0.5756, + "step": 4487 + }, + { + "epoch": 0.6612966601178782, + "grad_norm": 0.6256014704704285, + "learning_rate": 4.44316578431674e-06, + "loss": 0.5235, + "step": 4488 + }, + { + "epoch": 0.6614440078585462, + "grad_norm": 0.6547619700431824, + "learning_rate": 4.442921832464827e-06, + "loss": 0.5645, + "step": 4489 + }, + { + "epoch": 0.6615913555992141, + "grad_norm": 0.5770155787467957, + "learning_rate": 4.442677833886555e-06, + "loss": 0.604, + "step": 4490 + }, + { + "epoch": 0.6617387033398822, + "grad_norm": 0.5934328436851501, + "learning_rate": 4.442433788587792e-06, + "loss": 0.596, + "step": 4491 + }, + { + "epoch": 0.6618860510805501, + "grad_norm": 0.5925421118736267, + "learning_rate": 4.442189696574408e-06, + "loss": 0.577, + "step": 4492 + }, + { + "epoch": 0.662033398821218, + "grad_norm": 0.5882105231285095, + "learning_rate": 4.441945557852272e-06, + "loss": 0.536, + "step": 4493 + }, + { + "epoch": 0.6621807465618861, + "grad_norm": 0.5945889949798584, + "learning_rate": 4.441701372427256e-06, + "loss": 0.5478, + "step": 4494 + }, + { + "epoch": 0.662328094302554, + "grad_norm": 0.5982955694198608, + "learning_rate": 4.441457140305234e-06, + "loss": 0.5588, + "step": 4495 + }, + { + "epoch": 0.662475442043222, + "grad_norm": 0.585934579372406, + "learning_rate": 4.441212861492078e-06, + "loss": 0.602, + "step": 4496 + }, + { + "epoch": 0.66262278978389, + "grad_norm": 0.5436350703239441, + "learning_rate": 4.440968535993661e-06, + "loss": 0.5509, + "step": 4497 + }, + { + "epoch": 0.6627701375245579, + "grad_norm": 0.55832439661026, + "learning_rate": 4.440724163815864e-06, + "loss": 0.5899, + "step": 4498 + }, + { + "epoch": 0.662917485265226, + "grad_norm": 0.5823162794113159, + "learning_rate": 4.44047974496456e-06, + "loss": 0.5575, + "step": 4499 + }, + { + "epoch": 0.6630648330058939, + "grad_norm": 0.5937305092811584, + "learning_rate": 4.440235279445628e-06, + "loss": 0.6133, + "step": 4500 + }, + { + "epoch": 0.6632121807465619, + "grad_norm": 0.5714017748832703, + "learning_rate": 4.439990767264948e-06, + "loss": 0.5946, + "step": 4501 + }, + { + "epoch": 0.6633595284872299, + "grad_norm": 0.6102770566940308, + "learning_rate": 4.4397462084283996e-06, + "loss": 0.5865, + "step": 4502 + }, + { + "epoch": 0.6635068762278978, + "grad_norm": 0.5512343645095825, + "learning_rate": 4.439501602941864e-06, + "loss": 0.5877, + "step": 4503 + }, + { + "epoch": 0.6636542239685658, + "grad_norm": 0.6031101942062378, + "learning_rate": 4.439256950811226e-06, + "loss": 0.5821, + "step": 4504 + }, + { + "epoch": 0.6638015717092338, + "grad_norm": 0.5655280947685242, + "learning_rate": 4.439012252042367e-06, + "loss": 0.5806, + "step": 4505 + }, + { + "epoch": 0.6639489194499018, + "grad_norm": 0.5580786466598511, + "learning_rate": 4.438767506641174e-06, + "loss": 0.5487, + "step": 4506 + }, + { + "epoch": 0.6640962671905698, + "grad_norm": 0.5692557692527771, + "learning_rate": 4.4385227146135305e-06, + "loss": 0.5825, + "step": 4507 + }, + { + "epoch": 0.6642436149312377, + "grad_norm": 0.5852710008621216, + "learning_rate": 4.438277875965325e-06, + "loss": 0.5951, + "step": 4508 + }, + { + "epoch": 0.6643909626719057, + "grad_norm": 0.5757617950439453, + "learning_rate": 4.438032990702445e-06, + "loss": 0.5933, + "step": 4509 + }, + { + "epoch": 0.6645383104125737, + "grad_norm": 0.5953415036201477, + "learning_rate": 4.437788058830782e-06, + "loss": 0.5908, + "step": 4510 + }, + { + "epoch": 0.6646856581532417, + "grad_norm": 0.5761964321136475, + "learning_rate": 4.4375430803562255e-06, + "loss": 0.5959, + "step": 4511 + }, + { + "epoch": 0.6648330058939096, + "grad_norm": 0.5644736289978027, + "learning_rate": 4.437298055284666e-06, + "loss": 0.5781, + "step": 4512 + }, + { + "epoch": 0.6649803536345776, + "grad_norm": 0.5834951996803284, + "learning_rate": 4.437052983621996e-06, + "loss": 0.5675, + "step": 4513 + }, + { + "epoch": 0.6651277013752456, + "grad_norm": 0.5633566379547119, + "learning_rate": 4.436807865374111e-06, + "loss": 0.5822, + "step": 4514 + }, + { + "epoch": 0.6652750491159135, + "grad_norm": 0.5846062898635864, + "learning_rate": 4.436562700546905e-06, + "loss": 0.5571, + "step": 4515 + }, + { + "epoch": 0.6654223968565816, + "grad_norm": 0.6220495700836182, + "learning_rate": 4.4363174891462745e-06, + "loss": 0.5705, + "step": 4516 + }, + { + "epoch": 0.6655697445972495, + "grad_norm": 0.5785233974456787, + "learning_rate": 4.436072231178117e-06, + "loss": 0.5956, + "step": 4517 + }, + { + "epoch": 0.6657170923379175, + "grad_norm": 0.6068530678749084, + "learning_rate": 4.4358269266483305e-06, + "loss": 0.579, + "step": 4518 + }, + { + "epoch": 0.6658644400785855, + "grad_norm": 0.6117302179336548, + "learning_rate": 4.435581575562814e-06, + "loss": 0.5857, + "step": 4519 + }, + { + "epoch": 0.6660117878192534, + "grad_norm": 0.5672574639320374, + "learning_rate": 4.435336177927468e-06, + "loss": 0.5564, + "step": 4520 + }, + { + "epoch": 0.6661591355599215, + "grad_norm": 0.604851484298706, + "learning_rate": 4.4350907337481955e-06, + "loss": 0.5528, + "step": 4521 + }, + { + "epoch": 0.6663064833005894, + "grad_norm": 0.578098475933075, + "learning_rate": 4.434845243030897e-06, + "loss": 0.571, + "step": 4522 + }, + { + "epoch": 0.6664538310412573, + "grad_norm": 0.5854591131210327, + "learning_rate": 4.434599705781479e-06, + "loss": 0.5768, + "step": 4523 + }, + { + "epoch": 0.6666011787819254, + "grad_norm": 0.5910122990608215, + "learning_rate": 4.434354122005846e-06, + "loss": 0.5609, + "step": 4524 + }, + { + "epoch": 0.6667485265225933, + "grad_norm": 0.5548622012138367, + "learning_rate": 4.4341084917099026e-06, + "loss": 0.5695, + "step": 4525 + }, + { + "epoch": 0.6668958742632612, + "grad_norm": 0.5677403807640076, + "learning_rate": 4.4338628148995575e-06, + "loss": 0.5598, + "step": 4526 + }, + { + "epoch": 0.6670432220039293, + "grad_norm": 0.5963663458824158, + "learning_rate": 4.433617091580719e-06, + "loss": 0.5529, + "step": 4527 + }, + { + "epoch": 0.6671905697445972, + "grad_norm": 0.5787600874900818, + "learning_rate": 4.433371321759296e-06, + "loss": 0.5669, + "step": 4528 + }, + { + "epoch": 0.6673379174852653, + "grad_norm": 0.6075705885887146, + "learning_rate": 4.4331255054412e-06, + "loss": 0.5911, + "step": 4529 + }, + { + "epoch": 0.6674852652259332, + "grad_norm": 0.6012109518051147, + "learning_rate": 4.4328796426323414e-06, + "loss": 0.5799, + "step": 4530 + }, + { + "epoch": 0.6676326129666011, + "grad_norm": 0.5788846611976624, + "learning_rate": 4.432633733338635e-06, + "loss": 0.58, + "step": 4531 + }, + { + "epoch": 0.6677799607072692, + "grad_norm": 0.5914950370788574, + "learning_rate": 4.432387777565993e-06, + "loss": 0.5399, + "step": 4532 + }, + { + "epoch": 0.6679273084479371, + "grad_norm": 0.562204897403717, + "learning_rate": 4.432141775320331e-06, + "loss": 0.5749, + "step": 4533 + }, + { + "epoch": 0.6680746561886051, + "grad_norm": 0.5996679067611694, + "learning_rate": 4.431895726607567e-06, + "loss": 0.5659, + "step": 4534 + }, + { + "epoch": 0.6682220039292731, + "grad_norm": 0.5792819857597351, + "learning_rate": 4.431649631433616e-06, + "loss": 0.598, + "step": 4535 + }, + { + "epoch": 0.668369351669941, + "grad_norm": 0.6039233207702637, + "learning_rate": 4.431403489804397e-06, + "loss": 0.5308, + "step": 4536 + }, + { + "epoch": 0.668516699410609, + "grad_norm": 0.578622579574585, + "learning_rate": 4.43115730172583e-06, + "loss": 0.5721, + "step": 4537 + }, + { + "epoch": 0.668664047151277, + "grad_norm": 0.5783876776695251, + "learning_rate": 4.430911067203836e-06, + "loss": 0.5558, + "step": 4538 + }, + { + "epoch": 0.668811394891945, + "grad_norm": 0.6026222109794617, + "learning_rate": 4.4306647862443365e-06, + "loss": 0.5829, + "step": 4539 + }, + { + "epoch": 0.668958742632613, + "grad_norm": 0.5945489406585693, + "learning_rate": 4.430418458853254e-06, + "loss": 0.5523, + "step": 4540 + }, + { + "epoch": 0.669106090373281, + "grad_norm": 0.5874488353729248, + "learning_rate": 4.430172085036514e-06, + "loss": 0.5783, + "step": 4541 + }, + { + "epoch": 0.6692534381139489, + "grad_norm": 0.5502488613128662, + "learning_rate": 4.42992566480004e-06, + "loss": 0.558, + "step": 4542 + }, + { + "epoch": 0.6694007858546169, + "grad_norm": 0.617928683757782, + "learning_rate": 4.429679198149759e-06, + "loss": 0.552, + "step": 4543 + }, + { + "epoch": 0.6695481335952849, + "grad_norm": 0.569605827331543, + "learning_rate": 4.429432685091598e-06, + "loss": 0.5662, + "step": 4544 + }, + { + "epoch": 0.6696954813359528, + "grad_norm": 0.6241242289543152, + "learning_rate": 4.429186125631486e-06, + "loss": 0.6172, + "step": 4545 + }, + { + "epoch": 0.6698428290766208, + "grad_norm": 0.5919626951217651, + "learning_rate": 4.428939519775354e-06, + "loss": 0.552, + "step": 4546 + }, + { + "epoch": 0.6699901768172888, + "grad_norm": 0.5867242217063904, + "learning_rate": 4.428692867529129e-06, + "loss": 0.5745, + "step": 4547 + }, + { + "epoch": 0.6701375245579568, + "grad_norm": 0.6016740202903748, + "learning_rate": 4.4284461688987464e-06, + "loss": 0.5814, + "step": 4548 + }, + { + "epoch": 0.6702848722986248, + "grad_norm": 0.6189945340156555, + "learning_rate": 4.428199423890137e-06, + "loss": 0.565, + "step": 4549 + }, + { + "epoch": 0.6704322200392927, + "grad_norm": 0.5858295559883118, + "learning_rate": 4.427952632509237e-06, + "loss": 0.5899, + "step": 4550 + }, + { + "epoch": 0.6705795677799608, + "grad_norm": 0.5764870643615723, + "learning_rate": 4.42770579476198e-06, + "loss": 0.5731, + "step": 4551 + }, + { + "epoch": 0.6707269155206287, + "grad_norm": 0.6118077039718628, + "learning_rate": 4.427458910654303e-06, + "loss": 0.5487, + "step": 4552 + }, + { + "epoch": 0.6708742632612966, + "grad_norm": 0.58146733045578, + "learning_rate": 4.427211980192142e-06, + "loss": 0.5581, + "step": 4553 + }, + { + "epoch": 0.6710216110019647, + "grad_norm": 0.5935811400413513, + "learning_rate": 4.426965003381438e-06, + "loss": 0.5994, + "step": 4554 + }, + { + "epoch": 0.6711689587426326, + "grad_norm": 0.58124178647995, + "learning_rate": 4.42671798022813e-06, + "loss": 0.5705, + "step": 4555 + }, + { + "epoch": 0.6713163064833005, + "grad_norm": 0.5695306658744812, + "learning_rate": 4.4264709107381575e-06, + "loss": 0.5614, + "step": 4556 + }, + { + "epoch": 0.6714636542239686, + "grad_norm": 0.5864295363426208, + "learning_rate": 4.426223794917463e-06, + "loss": 0.5909, + "step": 4557 + }, + { + "epoch": 0.6716110019646365, + "grad_norm": 0.6753764748573303, + "learning_rate": 4.42597663277199e-06, + "loss": 0.5501, + "step": 4558 + }, + { + "epoch": 0.6717583497053046, + "grad_norm": 0.5932819247245789, + "learning_rate": 4.425729424307682e-06, + "loss": 0.5608, + "step": 4559 + }, + { + "epoch": 0.6719056974459725, + "grad_norm": 0.5890439748764038, + "learning_rate": 4.425482169530485e-06, + "loss": 0.5617, + "step": 4560 + }, + { + "epoch": 0.6720530451866404, + "grad_norm": 0.5967613458633423, + "learning_rate": 4.425234868446344e-06, + "loss": 0.565, + "step": 4561 + }, + { + "epoch": 0.6722003929273085, + "grad_norm": 0.6014990210533142, + "learning_rate": 4.424987521061208e-06, + "loss": 0.5679, + "step": 4562 + }, + { + "epoch": 0.6723477406679764, + "grad_norm": 0.6606670618057251, + "learning_rate": 4.424740127381026e-06, + "loss": 0.5796, + "step": 4563 + }, + { + "epoch": 0.6724950884086444, + "grad_norm": 0.5695639848709106, + "learning_rate": 4.424492687411745e-06, + "loss": 0.5888, + "step": 4564 + }, + { + "epoch": 0.6726424361493124, + "grad_norm": 0.599985659122467, + "learning_rate": 4.424245201159318e-06, + "loss": 0.5616, + "step": 4565 + }, + { + "epoch": 0.6727897838899803, + "grad_norm": 0.5825404524803162, + "learning_rate": 4.423997668629697e-06, + "loss": 0.6161, + "step": 4566 + }, + { + "epoch": 0.6729371316306483, + "grad_norm": 0.5798940658569336, + "learning_rate": 4.423750089828834e-06, + "loss": 0.5419, + "step": 4567 + }, + { + "epoch": 0.6730844793713163, + "grad_norm": 0.5629483461380005, + "learning_rate": 4.423502464762684e-06, + "loss": 0.5778, + "step": 4568 + }, + { + "epoch": 0.6732318271119843, + "grad_norm": 0.5493709444999695, + "learning_rate": 4.423254793437202e-06, + "loss": 0.5588, + "step": 4569 + }, + { + "epoch": 0.6733791748526523, + "grad_norm": 0.5625666975975037, + "learning_rate": 4.423007075858344e-06, + "loss": 0.5309, + "step": 4570 + }, + { + "epoch": 0.6735265225933202, + "grad_norm": 0.5995171666145325, + "learning_rate": 4.422759312032068e-06, + "loss": 0.5712, + "step": 4571 + }, + { + "epoch": 0.6736738703339882, + "grad_norm": 0.5610697865486145, + "learning_rate": 4.4225115019643315e-06, + "loss": 0.5394, + "step": 4572 + }, + { + "epoch": 0.6738212180746562, + "grad_norm": 0.5599421858787537, + "learning_rate": 4.422263645661096e-06, + "loss": 0.551, + "step": 4573 + }, + { + "epoch": 0.6739685658153242, + "grad_norm": 0.6640001535415649, + "learning_rate": 4.4220157431283215e-06, + "loss": 0.5828, + "step": 4574 + }, + { + "epoch": 0.6741159135559921, + "grad_norm": 0.5789798498153687, + "learning_rate": 4.42176779437197e-06, + "loss": 0.5422, + "step": 4575 + }, + { + "epoch": 0.6742632612966601, + "grad_norm": 0.6004306674003601, + "learning_rate": 4.421519799398003e-06, + "loss": 0.5074, + "step": 4576 + }, + { + "epoch": 0.6744106090373281, + "grad_norm": 0.5790132284164429, + "learning_rate": 4.421271758212387e-06, + "loss": 0.5513, + "step": 4577 + }, + { + "epoch": 0.6745579567779961, + "grad_norm": 0.5847217440605164, + "learning_rate": 4.421023670821087e-06, + "loss": 0.6117, + "step": 4578 + }, + { + "epoch": 0.6747053045186641, + "grad_norm": 0.5925368070602417, + "learning_rate": 4.4207755372300675e-06, + "loss": 0.6028, + "step": 4579 + }, + { + "epoch": 0.674852652259332, + "grad_norm": 0.5800615549087524, + "learning_rate": 4.420527357445298e-06, + "loss": 0.575, + "step": 4580 + }, + { + "epoch": 0.675, + "grad_norm": 0.5918018817901611, + "learning_rate": 4.4202791314727465e-06, + "loss": 0.576, + "step": 4581 + }, + { + "epoch": 0.675147347740668, + "grad_norm": 0.5998877882957458, + "learning_rate": 4.420030859318381e-06, + "loss": 0.6105, + "step": 4582 + }, + { + "epoch": 0.6752946954813359, + "grad_norm": 0.5725364089012146, + "learning_rate": 4.419782540988176e-06, + "loss": 0.5943, + "step": 4583 + }, + { + "epoch": 0.675442043222004, + "grad_norm": 0.5887604355812073, + "learning_rate": 4.4195341764881e-06, + "loss": 0.5656, + "step": 4584 + }, + { + "epoch": 0.6755893909626719, + "grad_norm": 0.5794057250022888, + "learning_rate": 4.4192857658241284e-06, + "loss": 0.5822, + "step": 4585 + }, + { + "epoch": 0.6757367387033398, + "grad_norm": 0.5851329565048218, + "learning_rate": 4.419037309002233e-06, + "loss": 0.6139, + "step": 4586 + }, + { + "epoch": 0.6758840864440079, + "grad_norm": 0.5738150477409363, + "learning_rate": 4.418788806028391e-06, + "loss": 0.6019, + "step": 4587 + }, + { + "epoch": 0.6760314341846758, + "grad_norm": 0.5553366541862488, + "learning_rate": 4.418540256908579e-06, + "loss": 0.5586, + "step": 4588 + }, + { + "epoch": 0.6761787819253439, + "grad_norm": 0.5731503963470459, + "learning_rate": 4.418291661648773e-06, + "loss": 0.5629, + "step": 4589 + }, + { + "epoch": 0.6763261296660118, + "grad_norm": 0.5621981620788574, + "learning_rate": 4.4180430202549515e-06, + "loss": 0.5679, + "step": 4590 + }, + { + "epoch": 0.6764734774066797, + "grad_norm": 0.5971049070358276, + "learning_rate": 4.4177943327330965e-06, + "loss": 0.5368, + "step": 4591 + }, + { + "epoch": 0.6766208251473478, + "grad_norm": 0.5739446878433228, + "learning_rate": 4.417545599089186e-06, + "loss": 0.5649, + "step": 4592 + }, + { + "epoch": 0.6767681728880157, + "grad_norm": 0.5928981304168701, + "learning_rate": 4.417296819329204e-06, + "loss": 0.5854, + "step": 4593 + }, + { + "epoch": 0.6769155206286837, + "grad_norm": 0.5947955250740051, + "learning_rate": 4.417047993459133e-06, + "loss": 0.5487, + "step": 4594 + }, + { + "epoch": 0.6770628683693517, + "grad_norm": 0.5507635474205017, + "learning_rate": 4.416799121484956e-06, + "loss": 0.5846, + "step": 4595 + }, + { + "epoch": 0.6772102161100196, + "grad_norm": 0.5914395451545715, + "learning_rate": 4.416550203412659e-06, + "loss": 0.5728, + "step": 4596 + }, + { + "epoch": 0.6773575638506876, + "grad_norm": 0.5629754662513733, + "learning_rate": 4.416301239248229e-06, + "loss": 0.5816, + "step": 4597 + }, + { + "epoch": 0.6775049115913556, + "grad_norm": 0.6019145250320435, + "learning_rate": 4.416052228997654e-06, + "loss": 0.5566, + "step": 4598 + }, + { + "epoch": 0.6776522593320236, + "grad_norm": 0.55635005235672, + "learning_rate": 4.41580317266692e-06, + "loss": 0.5685, + "step": 4599 + }, + { + "epoch": 0.6777996070726916, + "grad_norm": 0.585409939289093, + "learning_rate": 4.415554070262019e-06, + "loss": 0.5523, + "step": 4600 + }, + { + "epoch": 0.6779469548133595, + "grad_norm": 0.5757809281349182, + "learning_rate": 4.415304921788941e-06, + "loss": 0.5712, + "step": 4601 + }, + { + "epoch": 0.6780943025540275, + "grad_norm": 0.5852257013320923, + "learning_rate": 4.415055727253677e-06, + "loss": 0.5749, + "step": 4602 + }, + { + "epoch": 0.6782416502946955, + "grad_norm": 0.5862535834312439, + "learning_rate": 4.414806486662223e-06, + "loss": 0.5532, + "step": 4603 + }, + { + "epoch": 0.6783889980353635, + "grad_norm": 0.5954148173332214, + "learning_rate": 4.41455720002057e-06, + "loss": 0.5857, + "step": 4604 + }, + { + "epoch": 0.6785363457760314, + "grad_norm": 0.588077962398529, + "learning_rate": 4.414307867334714e-06, + "loss": 0.5685, + "step": 4605 + }, + { + "epoch": 0.6786836935166994, + "grad_norm": 0.571867048740387, + "learning_rate": 4.414058488610652e-06, + "loss": 0.572, + "step": 4606 + }, + { + "epoch": 0.6788310412573674, + "grad_norm": 0.5725008249282837, + "learning_rate": 4.413809063854381e-06, + "loss": 0.5799, + "step": 4607 + }, + { + "epoch": 0.6789783889980353, + "grad_norm": 0.5865229964256287, + "learning_rate": 4.4135595930719e-06, + "loss": 0.5797, + "step": 4608 + }, + { + "epoch": 0.6791257367387034, + "grad_norm": 0.5628940463066101, + "learning_rate": 4.413310076269208e-06, + "loss": 0.5635, + "step": 4609 + }, + { + "epoch": 0.6792730844793713, + "grad_norm": 0.5575417280197144, + "learning_rate": 4.413060513452306e-06, + "loss": 0.5833, + "step": 4610 + }, + { + "epoch": 0.6794204322200393, + "grad_norm": 0.5673980116844177, + "learning_rate": 4.412810904627196e-06, + "loss": 0.5721, + "step": 4611 + }, + { + "epoch": 0.6795677799607073, + "grad_norm": 0.6186982989311218, + "learning_rate": 4.4125612497998806e-06, + "loss": 0.5811, + "step": 4612 + }, + { + "epoch": 0.6797151277013752, + "grad_norm": 0.596513569355011, + "learning_rate": 4.412311548976365e-06, + "loss": 0.5737, + "step": 4613 + }, + { + "epoch": 0.6798624754420433, + "grad_norm": 0.5643644332885742, + "learning_rate": 4.412061802162653e-06, + "loss": 0.5763, + "step": 4614 + }, + { + "epoch": 0.6800098231827112, + "grad_norm": 0.5710729956626892, + "learning_rate": 4.4118120093647515e-06, + "loss": 0.5991, + "step": 4615 + }, + { + "epoch": 0.6801571709233791, + "grad_norm": 0.5922512412071228, + "learning_rate": 4.411562170588668e-06, + "loss": 0.5844, + "step": 4616 + }, + { + "epoch": 0.6803045186640472, + "grad_norm": 0.5694136619567871, + "learning_rate": 4.411312285840411e-06, + "loss": 0.6006, + "step": 4617 + }, + { + "epoch": 0.6804518664047151, + "grad_norm": 0.5390284061431885, + "learning_rate": 4.411062355125989e-06, + "loss": 0.5539, + "step": 4618 + }, + { + "epoch": 0.6805992141453832, + "grad_norm": 0.5941581726074219, + "learning_rate": 4.410812378451414e-06, + "loss": 0.5684, + "step": 4619 + }, + { + "epoch": 0.6807465618860511, + "grad_norm": 0.5783143043518066, + "learning_rate": 4.410562355822699e-06, + "loss": 0.5257, + "step": 4620 + }, + { + "epoch": 0.680893909626719, + "grad_norm": 0.56678706407547, + "learning_rate": 4.410312287245854e-06, + "loss": 0.5552, + "step": 4621 + }, + { + "epoch": 0.6810412573673871, + "grad_norm": 0.6335793733596802, + "learning_rate": 4.410062172726896e-06, + "loss": 0.5944, + "step": 4622 + }, + { + "epoch": 0.681188605108055, + "grad_norm": 0.580886960029602, + "learning_rate": 4.409812012271837e-06, + "loss": 0.5719, + "step": 4623 + }, + { + "epoch": 0.681335952848723, + "grad_norm": 0.5759002566337585, + "learning_rate": 4.409561805886695e-06, + "loss": 0.6072, + "step": 4624 + }, + { + "epoch": 0.681483300589391, + "grad_norm": 0.6134092807769775, + "learning_rate": 4.409311553577488e-06, + "loss": 0.5483, + "step": 4625 + }, + { + "epoch": 0.6816306483300589, + "grad_norm": 0.5626240372657776, + "learning_rate": 4.409061255350233e-06, + "loss": 0.5678, + "step": 4626 + }, + { + "epoch": 0.6817779960707269, + "grad_norm": 0.5631334185600281, + "learning_rate": 4.4088109112109505e-06, + "loss": 0.5746, + "step": 4627 + }, + { + "epoch": 0.6819253438113949, + "grad_norm": 0.5896942615509033, + "learning_rate": 4.408560521165661e-06, + "loss": 0.6007, + "step": 4628 + }, + { + "epoch": 0.6820726915520628, + "grad_norm": 0.5581459999084473, + "learning_rate": 4.408310085220387e-06, + "loss": 0.5492, + "step": 4629 + }, + { + "epoch": 0.6822200392927309, + "grad_norm": 0.6029034852981567, + "learning_rate": 4.408059603381148e-06, + "loss": 0.6086, + "step": 4630 + }, + { + "epoch": 0.6823673870333988, + "grad_norm": 0.6249373555183411, + "learning_rate": 4.4078090756539725e-06, + "loss": 0.5984, + "step": 4631 + }, + { + "epoch": 0.6825147347740668, + "grad_norm": 0.5439174771308899, + "learning_rate": 4.407558502044883e-06, + "loss": 0.5432, + "step": 4632 + }, + { + "epoch": 0.6826620825147348, + "grad_norm": 0.56302809715271, + "learning_rate": 4.4073078825599054e-06, + "loss": 0.5468, + "step": 4633 + }, + { + "epoch": 0.6828094302554027, + "grad_norm": 0.5889176726341248, + "learning_rate": 4.40705721720507e-06, + "loss": 0.5614, + "step": 4634 + }, + { + "epoch": 0.6829567779960707, + "grad_norm": 0.5624769330024719, + "learning_rate": 4.406806505986401e-06, + "loss": 0.5787, + "step": 4635 + }, + { + "epoch": 0.6831041257367387, + "grad_norm": 0.5568370819091797, + "learning_rate": 4.4065557489099305e-06, + "loss": 0.5985, + "step": 4636 + }, + { + "epoch": 0.6832514734774067, + "grad_norm": 0.5947816967964172, + "learning_rate": 4.406304945981688e-06, + "loss": 0.5867, + "step": 4637 + }, + { + "epoch": 0.6833988212180746, + "grad_norm": 0.574122428894043, + "learning_rate": 4.406054097207707e-06, + "loss": 0.5415, + "step": 4638 + }, + { + "epoch": 0.6835461689587427, + "grad_norm": 0.580679178237915, + "learning_rate": 4.405803202594018e-06, + "loss": 0.5764, + "step": 4639 + }, + { + "epoch": 0.6836935166994106, + "grad_norm": 0.5712281465530396, + "learning_rate": 4.405552262146657e-06, + "loss": 0.5564, + "step": 4640 + }, + { + "epoch": 0.6838408644400786, + "grad_norm": 0.581551730632782, + "learning_rate": 4.405301275871656e-06, + "loss": 0.5654, + "step": 4641 + }, + { + "epoch": 0.6839882121807466, + "grad_norm": 0.583261251449585, + "learning_rate": 4.405050243775053e-06, + "loss": 0.5616, + "step": 4642 + }, + { + "epoch": 0.6841355599214145, + "grad_norm": 0.5860658884048462, + "learning_rate": 4.404799165862886e-06, + "loss": 0.5712, + "step": 4643 + }, + { + "epoch": 0.6842829076620826, + "grad_norm": 0.5528440475463867, + "learning_rate": 4.404548042141192e-06, + "loss": 0.5821, + "step": 4644 + }, + { + "epoch": 0.6844302554027505, + "grad_norm": 0.5991865396499634, + "learning_rate": 4.4042968726160115e-06, + "loss": 0.5628, + "step": 4645 + }, + { + "epoch": 0.6845776031434184, + "grad_norm": 0.5843717455863953, + "learning_rate": 4.404045657293385e-06, + "loss": 0.576, + "step": 4646 + }, + { + "epoch": 0.6847249508840865, + "grad_norm": 0.57865971326828, + "learning_rate": 4.403794396179352e-06, + "loss": 0.5964, + "step": 4647 + }, + { + "epoch": 0.6848722986247544, + "grad_norm": 0.5766008496284485, + "learning_rate": 4.403543089279957e-06, + "loss": 0.56, + "step": 4648 + }, + { + "epoch": 0.6850196463654225, + "grad_norm": 0.566778838634491, + "learning_rate": 4.403291736601245e-06, + "loss": 0.5632, + "step": 4649 + }, + { + "epoch": 0.6851669941060904, + "grad_norm": 0.5536060333251953, + "learning_rate": 4.403040338149258e-06, + "loss": 0.554, + "step": 4650 + }, + { + "epoch": 0.6853143418467583, + "grad_norm": 0.5620838403701782, + "learning_rate": 4.402788893930044e-06, + "loss": 0.5763, + "step": 4651 + }, + { + "epoch": 0.6854616895874264, + "grad_norm": 0.6161757707595825, + "learning_rate": 4.4025374039496485e-06, + "loss": 0.5694, + "step": 4652 + }, + { + "epoch": 0.6856090373280943, + "grad_norm": 0.6083515882492065, + "learning_rate": 4.402285868214122e-06, + "loss": 0.5553, + "step": 4653 + }, + { + "epoch": 0.6857563850687622, + "grad_norm": 0.5716081261634827, + "learning_rate": 4.402034286729512e-06, + "loss": 0.5282, + "step": 4654 + }, + { + "epoch": 0.6859037328094303, + "grad_norm": 0.5953242182731628, + "learning_rate": 4.401782659501869e-06, + "loss": 0.5603, + "step": 4655 + }, + { + "epoch": 0.6860510805500982, + "grad_norm": 0.6021009087562561, + "learning_rate": 4.401530986537246e-06, + "loss": 0.6057, + "step": 4656 + }, + { + "epoch": 0.6861984282907662, + "grad_norm": 0.6117413640022278, + "learning_rate": 4.401279267841695e-06, + "loss": 0.5928, + "step": 4657 + }, + { + "epoch": 0.6863457760314342, + "grad_norm": 0.5715383887290955, + "learning_rate": 4.401027503421268e-06, + "loss": 0.565, + "step": 4658 + }, + { + "epoch": 0.6864931237721021, + "grad_norm": 0.55782550573349, + "learning_rate": 4.400775693282022e-06, + "loss": 0.5498, + "step": 4659 + }, + { + "epoch": 0.6866404715127702, + "grad_norm": 0.5744534730911255, + "learning_rate": 4.400523837430013e-06, + "loss": 0.5648, + "step": 4660 + }, + { + "epoch": 0.6867878192534381, + "grad_norm": 0.6110382080078125, + "learning_rate": 4.400271935871295e-06, + "loss": 0.5804, + "step": 4661 + }, + { + "epoch": 0.6869351669941061, + "grad_norm": 0.5786530375480652, + "learning_rate": 4.40001998861193e-06, + "loss": 0.5586, + "step": 4662 + }, + { + "epoch": 0.6870825147347741, + "grad_norm": 0.5803412795066833, + "learning_rate": 4.399767995657974e-06, + "loss": 0.5547, + "step": 4663 + }, + { + "epoch": 0.687229862475442, + "grad_norm": 0.5623260140419006, + "learning_rate": 4.39951595701549e-06, + "loss": 0.6176, + "step": 4664 + }, + { + "epoch": 0.68737721021611, + "grad_norm": 0.5622225403785706, + "learning_rate": 4.399263872690538e-06, + "loss": 0.5624, + "step": 4665 + }, + { + "epoch": 0.687524557956778, + "grad_norm": 0.5531833171844482, + "learning_rate": 4.39901174268918e-06, + "loss": 0.586, + "step": 4666 + }, + { + "epoch": 0.687671905697446, + "grad_norm": 0.6180583238601685, + "learning_rate": 4.398759567017481e-06, + "loss": 0.5792, + "step": 4667 + }, + { + "epoch": 0.6878192534381139, + "grad_norm": 0.5609333515167236, + "learning_rate": 4.398507345681504e-06, + "loss": 0.5594, + "step": 4668 + }, + { + "epoch": 0.687966601178782, + "grad_norm": 0.5629006624221802, + "learning_rate": 4.398255078687317e-06, + "loss": 0.5565, + "step": 4669 + }, + { + "epoch": 0.6881139489194499, + "grad_norm": 0.5774564146995544, + "learning_rate": 4.398002766040985e-06, + "loss": 0.5912, + "step": 4670 + }, + { + "epoch": 0.6882612966601179, + "grad_norm": 0.5818188190460205, + "learning_rate": 4.397750407748577e-06, + "loss": 0.5935, + "step": 4671 + }, + { + "epoch": 0.6884086444007859, + "grad_norm": 0.5722780227661133, + "learning_rate": 4.397498003816161e-06, + "loss": 0.56, + "step": 4672 + }, + { + "epoch": 0.6885559921414538, + "grad_norm": 0.5828471183776855, + "learning_rate": 4.3972455542498085e-06, + "loss": 0.5767, + "step": 4673 + }, + { + "epoch": 0.6887033398821218, + "grad_norm": 0.5885579586029053, + "learning_rate": 4.396993059055591e-06, + "loss": 0.5604, + "step": 4674 + }, + { + "epoch": 0.6888506876227898, + "grad_norm": 0.600109338760376, + "learning_rate": 4.396740518239579e-06, + "loss": 0.5809, + "step": 4675 + }, + { + "epoch": 0.6889980353634577, + "grad_norm": 0.6254822611808777, + "learning_rate": 4.396487931807848e-06, + "loss": 0.5301, + "step": 4676 + }, + { + "epoch": 0.6891453831041258, + "grad_norm": 0.5736216306686401, + "learning_rate": 4.396235299766471e-06, + "loss": 0.575, + "step": 4677 + }, + { + "epoch": 0.6892927308447937, + "grad_norm": 0.6062886714935303, + "learning_rate": 4.395982622121525e-06, + "loss": 0.6129, + "step": 4678 + }, + { + "epoch": 0.6894400785854616, + "grad_norm": 0.5571427941322327, + "learning_rate": 4.395729898879086e-06, + "loss": 0.55, + "step": 4679 + }, + { + "epoch": 0.6895874263261297, + "grad_norm": 0.5491499304771423, + "learning_rate": 4.395477130045233e-06, + "loss": 0.5921, + "step": 4680 + }, + { + "epoch": 0.6897347740667976, + "grad_norm": 0.5553284287452698, + "learning_rate": 4.395224315626042e-06, + "loss": 0.563, + "step": 4681 + }, + { + "epoch": 0.6898821218074657, + "grad_norm": 0.5907211303710938, + "learning_rate": 4.394971455627597e-06, + "loss": 0.5823, + "step": 4682 + }, + { + "epoch": 0.6900294695481336, + "grad_norm": 0.5620044469833374, + "learning_rate": 4.394718550055976e-06, + "loss": 0.5598, + "step": 4683 + }, + { + "epoch": 0.6901768172888015, + "grad_norm": 0.559834897518158, + "learning_rate": 4.394465598917264e-06, + "loss": 0.5476, + "step": 4684 + }, + { + "epoch": 0.6903241650294696, + "grad_norm": 0.5840017795562744, + "learning_rate": 4.394212602217541e-06, + "loss": 0.574, + "step": 4685 + }, + { + "epoch": 0.6904715127701375, + "grad_norm": 0.5524498820304871, + "learning_rate": 4.3939595599628945e-06, + "loss": 0.5488, + "step": 4686 + }, + { + "epoch": 0.6906188605108055, + "grad_norm": 0.5700981020927429, + "learning_rate": 4.393706472159409e-06, + "loss": 0.5701, + "step": 4687 + }, + { + "epoch": 0.6907662082514735, + "grad_norm": 0.5818437337875366, + "learning_rate": 4.393453338813171e-06, + "loss": 0.5567, + "step": 4688 + }, + { + "epoch": 0.6909135559921414, + "grad_norm": 0.5561785101890564, + "learning_rate": 4.393200159930268e-06, + "loss": 0.5722, + "step": 4689 + }, + { + "epoch": 0.6910609037328095, + "grad_norm": 0.5902314782142639, + "learning_rate": 4.39294693551679e-06, + "loss": 0.5507, + "step": 4690 + }, + { + "epoch": 0.6912082514734774, + "grad_norm": 0.6222233176231384, + "learning_rate": 4.392693665578825e-06, + "loss": 0.562, + "step": 4691 + }, + { + "epoch": 0.6913555992141454, + "grad_norm": 0.6174062490463257, + "learning_rate": 4.392440350122465e-06, + "loss": 0.5421, + "step": 4692 + }, + { + "epoch": 0.6915029469548134, + "grad_norm": 0.601850152015686, + "learning_rate": 4.392186989153803e-06, + "loss": 0.5713, + "step": 4693 + }, + { + "epoch": 0.6916502946954813, + "grad_norm": 0.5642279982566833, + "learning_rate": 4.391933582678931e-06, + "loss": 0.5649, + "step": 4694 + }, + { + "epoch": 0.6917976424361493, + "grad_norm": 0.6010230779647827, + "learning_rate": 4.391680130703943e-06, + "loss": 0.5682, + "step": 4695 + }, + { + "epoch": 0.6919449901768173, + "grad_norm": 0.580098569393158, + "learning_rate": 4.391426633234937e-06, + "loss": 0.583, + "step": 4696 + }, + { + "epoch": 0.6920923379174853, + "grad_norm": 0.5662804245948792, + "learning_rate": 4.3911730902780055e-06, + "loss": 0.5672, + "step": 4697 + }, + { + "epoch": 0.6922396856581532, + "grad_norm": 0.5753039121627808, + "learning_rate": 4.390919501839249e-06, + "loss": 0.5941, + "step": 4698 + }, + { + "epoch": 0.6923870333988212, + "grad_norm": 0.5743362307548523, + "learning_rate": 4.390665867924766e-06, + "loss": 0.5713, + "step": 4699 + }, + { + "epoch": 0.6925343811394892, + "grad_norm": 0.5708368420600891, + "learning_rate": 4.3904121885406544e-06, + "loss": 0.5747, + "step": 4700 + }, + { + "epoch": 0.6926817288801572, + "grad_norm": 0.5885193943977356, + "learning_rate": 4.3901584636930175e-06, + "loss": 0.5646, + "step": 4701 + }, + { + "epoch": 0.6928290766208252, + "grad_norm": 0.5971992611885071, + "learning_rate": 4.3899046933879556e-06, + "loss": 0.5593, + "step": 4702 + }, + { + "epoch": 0.6929764243614931, + "grad_norm": 0.5819382071495056, + "learning_rate": 4.389650877631572e-06, + "loss": 0.547, + "step": 4703 + }, + { + "epoch": 0.6931237721021611, + "grad_norm": 0.610308825969696, + "learning_rate": 4.389397016429972e-06, + "loss": 0.558, + "step": 4704 + }, + { + "epoch": 0.6932711198428291, + "grad_norm": 0.6625985503196716, + "learning_rate": 4.389143109789259e-06, + "loss": 0.5841, + "step": 4705 + }, + { + "epoch": 0.693418467583497, + "grad_norm": 0.5817344188690186, + "learning_rate": 4.38888915771554e-06, + "loss": 0.589, + "step": 4706 + }, + { + "epoch": 0.6935658153241651, + "grad_norm": 0.590357780456543, + "learning_rate": 4.388635160214924e-06, + "loss": 0.6035, + "step": 4707 + }, + { + "epoch": 0.693713163064833, + "grad_norm": 0.5506422519683838, + "learning_rate": 4.3883811172935175e-06, + "loss": 0.5936, + "step": 4708 + }, + { + "epoch": 0.6938605108055009, + "grad_norm": 0.5645080208778381, + "learning_rate": 4.388127028957431e-06, + "loss": 0.567, + "step": 4709 + }, + { + "epoch": 0.694007858546169, + "grad_norm": 0.6508591771125793, + "learning_rate": 4.387872895212774e-06, + "loss": 0.5383, + "step": 4710 + }, + { + "epoch": 0.6941552062868369, + "grad_norm": 0.6518366932868958, + "learning_rate": 4.38761871606566e-06, + "loss": 0.5756, + "step": 4711 + }, + { + "epoch": 0.694302554027505, + "grad_norm": 0.5705069899559021, + "learning_rate": 4.3873644915222015e-06, + "loss": 0.5618, + "step": 4712 + }, + { + "epoch": 0.6944499017681729, + "grad_norm": 0.5668212175369263, + "learning_rate": 4.387110221588512e-06, + "loss": 0.5433, + "step": 4713 + }, + { + "epoch": 0.6945972495088408, + "grad_norm": 0.6383241415023804, + "learning_rate": 4.386855906270707e-06, + "loss": 0.571, + "step": 4714 + }, + { + "epoch": 0.6947445972495089, + "grad_norm": 0.5720488429069519, + "learning_rate": 4.386601545574903e-06, + "loss": 0.5424, + "step": 4715 + }, + { + "epoch": 0.6948919449901768, + "grad_norm": 0.6004454493522644, + "learning_rate": 4.386347139507216e-06, + "loss": 0.5528, + "step": 4716 + }, + { + "epoch": 0.6950392927308447, + "grad_norm": 0.5962433815002441, + "learning_rate": 4.3860926880737655e-06, + "loss": 0.5747, + "step": 4717 + }, + { + "epoch": 0.6951866404715128, + "grad_norm": 0.6334710121154785, + "learning_rate": 4.385838191280669e-06, + "loss": 0.5804, + "step": 4718 + }, + { + "epoch": 0.6953339882121807, + "grad_norm": 0.60092693567276, + "learning_rate": 4.38558364913405e-06, + "loss": 0.5553, + "step": 4719 + }, + { + "epoch": 0.6954813359528488, + "grad_norm": 0.6133226156234741, + "learning_rate": 4.385329061640028e-06, + "loss": 0.5822, + "step": 4720 + }, + { + "epoch": 0.6956286836935167, + "grad_norm": 0.6144272685050964, + "learning_rate": 4.385074428804727e-06, + "loss": 0.5747, + "step": 4721 + }, + { + "epoch": 0.6957760314341846, + "grad_norm": 0.5927139520645142, + "learning_rate": 4.384819750634269e-06, + "loss": 0.5677, + "step": 4722 + }, + { + "epoch": 0.6959233791748527, + "grad_norm": 0.6107029914855957, + "learning_rate": 4.384565027134781e-06, + "loss": 0.5397, + "step": 4723 + }, + { + "epoch": 0.6960707269155206, + "grad_norm": 0.5843642950057983, + "learning_rate": 4.384310258312388e-06, + "loss": 0.5847, + "step": 4724 + }, + { + "epoch": 0.6962180746561886, + "grad_norm": 0.5519974827766418, + "learning_rate": 4.384055444173216e-06, + "loss": 0.6047, + "step": 4725 + }, + { + "epoch": 0.6963654223968566, + "grad_norm": 0.5486780405044556, + "learning_rate": 4.383800584723396e-06, + "loss": 0.6066, + "step": 4726 + }, + { + "epoch": 0.6965127701375246, + "grad_norm": 0.587200403213501, + "learning_rate": 4.383545679969055e-06, + "loss": 0.5702, + "step": 4727 + }, + { + "epoch": 0.6966601178781925, + "grad_norm": 0.6242398023605347, + "learning_rate": 4.383290729916323e-06, + "loss": 0.5695, + "step": 4728 + }, + { + "epoch": 0.6968074656188605, + "grad_norm": 0.5928612351417542, + "learning_rate": 4.383035734571333e-06, + "loss": 0.5237, + "step": 4729 + }, + { + "epoch": 0.6969548133595285, + "grad_norm": 0.568626344203949, + "learning_rate": 4.382780693940216e-06, + "loss": 0.5679, + "step": 4730 + }, + { + "epoch": 0.6971021611001965, + "grad_norm": 0.6194708347320557, + "learning_rate": 4.382525608029107e-06, + "loss": 0.5228, + "step": 4731 + }, + { + "epoch": 0.6972495088408645, + "grad_norm": 0.5631119012832642, + "learning_rate": 4.382270476844139e-06, + "loss": 0.5535, + "step": 4732 + }, + { + "epoch": 0.6973968565815324, + "grad_norm": 0.5784069895744324, + "learning_rate": 4.38201530039145e-06, + "loss": 0.5704, + "step": 4733 + }, + { + "epoch": 0.6975442043222004, + "grad_norm": 0.586400032043457, + "learning_rate": 4.381760078677176e-06, + "loss": 0.5388, + "step": 4734 + }, + { + "epoch": 0.6976915520628684, + "grad_norm": 0.575776219367981, + "learning_rate": 4.381504811707455e-06, + "loss": 0.5394, + "step": 4735 + }, + { + "epoch": 0.6978388998035363, + "grad_norm": 0.593783974647522, + "learning_rate": 4.381249499488424e-06, + "loss": 0.5857, + "step": 4736 + }, + { + "epoch": 0.6979862475442044, + "grad_norm": 0.6025440096855164, + "learning_rate": 4.380994142026226e-06, + "loss": 0.578, + "step": 4737 + }, + { + "epoch": 0.6981335952848723, + "grad_norm": 0.5600822567939758, + "learning_rate": 4.380738739327001e-06, + "loss": 0.5982, + "step": 4738 + }, + { + "epoch": 0.6982809430255402, + "grad_norm": 0.6010894775390625, + "learning_rate": 4.380483291396891e-06, + "loss": 0.6181, + "step": 4739 + }, + { + "epoch": 0.6984282907662083, + "grad_norm": 0.5891285538673401, + "learning_rate": 4.380227798242041e-06, + "loss": 0.6074, + "step": 4740 + }, + { + "epoch": 0.6985756385068762, + "grad_norm": 0.5925010442733765, + "learning_rate": 4.379972259868593e-06, + "loss": 0.5654, + "step": 4741 + }, + { + "epoch": 0.6987229862475443, + "grad_norm": 0.6098121404647827, + "learning_rate": 4.379716676282694e-06, + "loss": 0.5682, + "step": 4742 + }, + { + "epoch": 0.6988703339882122, + "grad_norm": 0.6038902997970581, + "learning_rate": 4.379461047490492e-06, + "loss": 0.5672, + "step": 4743 + }, + { + "epoch": 0.6990176817288801, + "grad_norm": 0.5714973211288452, + "learning_rate": 4.379205373498132e-06, + "loss": 0.6003, + "step": 4744 + }, + { + "epoch": 0.6991650294695482, + "grad_norm": 0.5943643450737, + "learning_rate": 4.3789496543117645e-06, + "loss": 0.5727, + "step": 4745 + }, + { + "epoch": 0.6993123772102161, + "grad_norm": 0.6314318776130676, + "learning_rate": 4.3786938899375395e-06, + "loss": 0.6064, + "step": 4746 + }, + { + "epoch": 0.699459724950884, + "grad_norm": 0.5766987800598145, + "learning_rate": 4.378438080381607e-06, + "loss": 0.5542, + "step": 4747 + }, + { + "epoch": 0.6996070726915521, + "grad_norm": 0.5922217965126038, + "learning_rate": 4.378182225650121e-06, + "loss": 0.5212, + "step": 4748 + }, + { + "epoch": 0.69975442043222, + "grad_norm": 0.5695374608039856, + "learning_rate": 4.3779263257492325e-06, + "loss": 0.5764, + "step": 4749 + }, + { + "epoch": 0.699901768172888, + "grad_norm": 0.593249499797821, + "learning_rate": 4.3776703806850965e-06, + "loss": 0.5583, + "step": 4750 + }, + { + "epoch": 0.700049115913556, + "grad_norm": 0.5854286551475525, + "learning_rate": 4.377414390463868e-06, + "loss": 0.5759, + "step": 4751 + }, + { + "epoch": 0.7001964636542239, + "grad_norm": 0.5852760672569275, + "learning_rate": 4.377158355091704e-06, + "loss": 0.5869, + "step": 4752 + }, + { + "epoch": 0.700343811394892, + "grad_norm": 0.584938108921051, + "learning_rate": 4.376902274574763e-06, + "loss": 0.568, + "step": 4753 + }, + { + "epoch": 0.7004911591355599, + "grad_norm": 0.564212441444397, + "learning_rate": 4.3766461489192025e-06, + "loss": 0.5751, + "step": 4754 + }, + { + "epoch": 0.7006385068762279, + "grad_norm": 0.5983337759971619, + "learning_rate": 4.376389978131182e-06, + "loss": 0.591, + "step": 4755 + }, + { + "epoch": 0.7007858546168959, + "grad_norm": 0.5580597519874573, + "learning_rate": 4.376133762216862e-06, + "loss": 0.5342, + "step": 4756 + }, + { + "epoch": 0.7009332023575638, + "grad_norm": 0.5786446928977966, + "learning_rate": 4.3758775011824055e-06, + "loss": 0.5681, + "step": 4757 + }, + { + "epoch": 0.7010805500982318, + "grad_norm": 0.6172060966491699, + "learning_rate": 4.375621195033975e-06, + "loss": 0.5572, + "step": 4758 + }, + { + "epoch": 0.7012278978388998, + "grad_norm": 0.5823342204093933, + "learning_rate": 4.375364843777735e-06, + "loss": 0.59, + "step": 4759 + }, + { + "epoch": 0.7013752455795678, + "grad_norm": 0.5801482200622559, + "learning_rate": 4.375108447419849e-06, + "loss": 0.5332, + "step": 4760 + }, + { + "epoch": 0.7015225933202358, + "grad_norm": 0.5764120817184448, + "learning_rate": 4.3748520059664854e-06, + "loss": 0.6081, + "step": 4761 + }, + { + "epoch": 0.7016699410609037, + "grad_norm": 0.6010942459106445, + "learning_rate": 4.3745955194238095e-06, + "loss": 0.5497, + "step": 4762 + }, + { + "epoch": 0.7018172888015717, + "grad_norm": 0.613289475440979, + "learning_rate": 4.374338987797991e-06, + "loss": 0.563, + "step": 4763 + }, + { + "epoch": 0.7019646365422397, + "grad_norm": 0.6110879182815552, + "learning_rate": 4.3740824110952e-06, + "loss": 0.5912, + "step": 4764 + }, + { + "epoch": 0.7021119842829077, + "grad_norm": 0.5798676609992981, + "learning_rate": 4.373825789321604e-06, + "loss": 0.5708, + "step": 4765 + }, + { + "epoch": 0.7022593320235756, + "grad_norm": 0.5750716924667358, + "learning_rate": 4.3735691224833775e-06, + "loss": 0.5558, + "step": 4766 + }, + { + "epoch": 0.7024066797642436, + "grad_norm": 0.5712149143218994, + "learning_rate": 4.373312410586693e-06, + "loss": 0.6026, + "step": 4767 + }, + { + "epoch": 0.7025540275049116, + "grad_norm": 0.594089150428772, + "learning_rate": 4.373055653637723e-06, + "loss": 0.5612, + "step": 4768 + }, + { + "epoch": 0.7027013752455795, + "grad_norm": 0.5639338493347168, + "learning_rate": 4.372798851642643e-06, + "loss": 0.5946, + "step": 4769 + }, + { + "epoch": 0.7028487229862476, + "grad_norm": 0.6132320165634155, + "learning_rate": 4.37254200460763e-06, + "loss": 0.5943, + "step": 4770 + }, + { + "epoch": 0.7029960707269155, + "grad_norm": 0.5862087607383728, + "learning_rate": 4.3722851125388585e-06, + "loss": 0.5716, + "step": 4771 + }, + { + "epoch": 0.7031434184675835, + "grad_norm": 0.5818725228309631, + "learning_rate": 4.3720281754425096e-06, + "loss": 0.581, + "step": 4772 + }, + { + "epoch": 0.7032907662082515, + "grad_norm": 0.5931466817855835, + "learning_rate": 4.37177119332476e-06, + "loss": 0.5834, + "step": 4773 + }, + { + "epoch": 0.7034381139489194, + "grad_norm": 0.5877476334571838, + "learning_rate": 4.3715141661917915e-06, + "loss": 0.5887, + "step": 4774 + }, + { + "epoch": 0.7035854616895875, + "grad_norm": 0.6465587019920349, + "learning_rate": 4.371257094049786e-06, + "loss": 0.5639, + "step": 4775 + }, + { + "epoch": 0.7037328094302554, + "grad_norm": 0.5724374651908875, + "learning_rate": 4.3709999769049235e-06, + "loss": 0.5473, + "step": 4776 + }, + { + "epoch": 0.7038801571709233, + "grad_norm": 0.5901824831962585, + "learning_rate": 4.37074281476339e-06, + "loss": 0.586, + "step": 4777 + }, + { + "epoch": 0.7040275049115914, + "grad_norm": 0.6037982106208801, + "learning_rate": 4.37048560763137e-06, + "loss": 0.5782, + "step": 4778 + }, + { + "epoch": 0.7041748526522593, + "grad_norm": 0.6140215396881104, + "learning_rate": 4.3702283555150474e-06, + "loss": 0.5799, + "step": 4779 + }, + { + "epoch": 0.7043222003929273, + "grad_norm": 0.5803971886634827, + "learning_rate": 4.36997105842061e-06, + "loss": 0.5814, + "step": 4780 + }, + { + "epoch": 0.7044695481335953, + "grad_norm": 0.5893085598945618, + "learning_rate": 4.369713716354246e-06, + "loss": 0.6014, + "step": 4781 + }, + { + "epoch": 0.7046168958742632, + "grad_norm": 0.61434006690979, + "learning_rate": 4.369456329322143e-06, + "loss": 0.5498, + "step": 4782 + }, + { + "epoch": 0.7047642436149313, + "grad_norm": 0.5672570466995239, + "learning_rate": 4.369198897330493e-06, + "loss": 0.5622, + "step": 4783 + }, + { + "epoch": 0.7049115913555992, + "grad_norm": 0.5732830166816711, + "learning_rate": 4.368941420385487e-06, + "loss": 0.5748, + "step": 4784 + }, + { + "epoch": 0.7050589390962672, + "grad_norm": 0.5966798663139343, + "learning_rate": 4.368683898493315e-06, + "loss": 0.5696, + "step": 4785 + }, + { + "epoch": 0.7052062868369352, + "grad_norm": 0.5755050182342529, + "learning_rate": 4.3684263316601735e-06, + "loss": 0.5662, + "step": 4786 + }, + { + "epoch": 0.7053536345776031, + "grad_norm": 0.5883263349533081, + "learning_rate": 4.368168719892254e-06, + "loss": 0.5826, + "step": 4787 + }, + { + "epoch": 0.7055009823182711, + "grad_norm": 0.5746579170227051, + "learning_rate": 4.367911063195753e-06, + "loss": 0.5969, + "step": 4788 + }, + { + "epoch": 0.7056483300589391, + "grad_norm": 0.5910331606864929, + "learning_rate": 4.367653361576867e-06, + "loss": 0.5494, + "step": 4789 + }, + { + "epoch": 0.7057956777996071, + "grad_norm": 0.6382441520690918, + "learning_rate": 4.367395615041793e-06, + "loss": 0.5369, + "step": 4790 + }, + { + "epoch": 0.7059430255402751, + "grad_norm": 0.5712760090827942, + "learning_rate": 4.367137823596732e-06, + "loss": 0.5769, + "step": 4791 + }, + { + "epoch": 0.706090373280943, + "grad_norm": 0.6155206561088562, + "learning_rate": 4.366879987247881e-06, + "loss": 0.5679, + "step": 4792 + }, + { + "epoch": 0.706237721021611, + "grad_norm": 0.5895358920097351, + "learning_rate": 4.366622106001441e-06, + "loss": 0.5538, + "step": 4793 + }, + { + "epoch": 0.706385068762279, + "grad_norm": 0.592744767665863, + "learning_rate": 4.366364179863616e-06, + "loss": 0.5701, + "step": 4794 + }, + { + "epoch": 0.706532416502947, + "grad_norm": 0.5617288947105408, + "learning_rate": 4.366106208840608e-06, + "loss": 0.5386, + "step": 4795 + }, + { + "epoch": 0.7066797642436149, + "grad_norm": 0.5687915086746216, + "learning_rate": 4.365848192938621e-06, + "loss": 0.5917, + "step": 4796 + }, + { + "epoch": 0.7068271119842829, + "grad_norm": 0.6247605085372925, + "learning_rate": 4.36559013216386e-06, + "loss": 0.5615, + "step": 4797 + }, + { + "epoch": 0.7069744597249509, + "grad_norm": 0.5704158544540405, + "learning_rate": 4.365332026522531e-06, + "loss": 0.5857, + "step": 4798 + }, + { + "epoch": 0.7071218074656188, + "grad_norm": 0.5719661712646484, + "learning_rate": 4.365073876020842e-06, + "loss": 0.5716, + "step": 4799 + }, + { + "epoch": 0.7072691552062869, + "grad_norm": 0.5690080523490906, + "learning_rate": 4.364815680665e-06, + "loss": 0.5298, + "step": 4800 + }, + { + "epoch": 0.7074165029469548, + "grad_norm": 0.5670444965362549, + "learning_rate": 4.3645574404612165e-06, + "loss": 0.5893, + "step": 4801 + }, + { + "epoch": 0.7075638506876228, + "grad_norm": 0.5497636198997498, + "learning_rate": 4.364299155415701e-06, + "loss": 0.5232, + "step": 4802 + }, + { + "epoch": 0.7077111984282908, + "grad_norm": 0.5765066742897034, + "learning_rate": 4.364040825534665e-06, + "loss": 0.5615, + "step": 4803 + }, + { + "epoch": 0.7078585461689587, + "grad_norm": 0.5819323658943176, + "learning_rate": 4.363782450824322e-06, + "loss": 0.5713, + "step": 4804 + }, + { + "epoch": 0.7080058939096268, + "grad_norm": 0.5892776846885681, + "learning_rate": 4.363524031290884e-06, + "loss": 0.5828, + "step": 4805 + }, + { + "epoch": 0.7081532416502947, + "grad_norm": 0.542167603969574, + "learning_rate": 4.363265566940568e-06, + "loss": 0.5961, + "step": 4806 + }, + { + "epoch": 0.7083005893909626, + "grad_norm": 0.5570982694625854, + "learning_rate": 4.363007057779589e-06, + "loss": 0.5871, + "step": 4807 + }, + { + "epoch": 0.7084479371316307, + "grad_norm": 0.6166428923606873, + "learning_rate": 4.362748503814165e-06, + "loss": 0.5667, + "step": 4808 + }, + { + "epoch": 0.7085952848722986, + "grad_norm": 0.5996714234352112, + "learning_rate": 4.362489905050512e-06, + "loss": 0.555, + "step": 4809 + }, + { + "epoch": 0.7087426326129665, + "grad_norm": 0.5837660431861877, + "learning_rate": 4.36223126149485e-06, + "loss": 0.5499, + "step": 4810 + }, + { + "epoch": 0.7088899803536346, + "grad_norm": 0.5876431465148926, + "learning_rate": 4.3619725731534005e-06, + "loss": 0.5768, + "step": 4811 + }, + { + "epoch": 0.7090373280943025, + "grad_norm": 0.611047089099884, + "learning_rate": 4.361713840032383e-06, + "loss": 0.5711, + "step": 4812 + }, + { + "epoch": 0.7091846758349706, + "grad_norm": 0.5450125336647034, + "learning_rate": 4.361455062138021e-06, + "loss": 0.568, + "step": 4813 + }, + { + "epoch": 0.7093320235756385, + "grad_norm": 0.6296247839927673, + "learning_rate": 4.3611962394765385e-06, + "loss": 0.579, + "step": 4814 + }, + { + "epoch": 0.7094793713163065, + "grad_norm": 0.6090545654296875, + "learning_rate": 4.360937372054159e-06, + "loss": 0.5565, + "step": 4815 + }, + { + "epoch": 0.7096267190569745, + "grad_norm": 0.5752817392349243, + "learning_rate": 4.360678459877109e-06, + "loss": 0.5642, + "step": 4816 + }, + { + "epoch": 0.7097740667976424, + "grad_norm": 0.5816760659217834, + "learning_rate": 4.360419502951614e-06, + "loss": 0.5642, + "step": 4817 + }, + { + "epoch": 0.7099214145383104, + "grad_norm": 0.5455346703529358, + "learning_rate": 4.360160501283903e-06, + "loss": 0.5778, + "step": 4818 + }, + { + "epoch": 0.7100687622789784, + "grad_norm": 0.6215771436691284, + "learning_rate": 4.359901454880205e-06, + "loss": 0.5547, + "step": 4819 + }, + { + "epoch": 0.7102161100196464, + "grad_norm": 0.5645962357521057, + "learning_rate": 4.359642363746749e-06, + "loss": 0.5956, + "step": 4820 + }, + { + "epoch": 0.7103634577603143, + "grad_norm": 0.5988280177116394, + "learning_rate": 4.359383227889765e-06, + "loss": 0.5766, + "step": 4821 + }, + { + "epoch": 0.7105108055009823, + "grad_norm": 0.600161075592041, + "learning_rate": 4.3591240473154884e-06, + "loss": 0.5963, + "step": 4822 + }, + { + "epoch": 0.7106581532416503, + "grad_norm": 0.5836207270622253, + "learning_rate": 4.358864822030149e-06, + "loss": 0.5624, + "step": 4823 + }, + { + "epoch": 0.7108055009823183, + "grad_norm": 0.5825710892677307, + "learning_rate": 4.358605552039983e-06, + "loss": 0.5623, + "step": 4824 + }, + { + "epoch": 0.7109528487229863, + "grad_norm": 0.5573276281356812, + "learning_rate": 4.358346237351225e-06, + "loss": 0.5591, + "step": 4825 + }, + { + "epoch": 0.7111001964636542, + "grad_norm": 0.5728501081466675, + "learning_rate": 4.358086877970112e-06, + "loss": 0.5749, + "step": 4826 + }, + { + "epoch": 0.7112475442043222, + "grad_norm": 0.5610383152961731, + "learning_rate": 4.357827473902881e-06, + "loss": 0.5721, + "step": 4827 + }, + { + "epoch": 0.7113948919449902, + "grad_norm": 0.5823855996131897, + "learning_rate": 4.357568025155771e-06, + "loss": 0.5754, + "step": 4828 + }, + { + "epoch": 0.7115422396856581, + "grad_norm": 0.6085037589073181, + "learning_rate": 4.357308531735021e-06, + "loss": 0.595, + "step": 4829 + }, + { + "epoch": 0.7116895874263262, + "grad_norm": 0.5817249417304993, + "learning_rate": 4.3570489936468716e-06, + "loss": 0.5675, + "step": 4830 + }, + { + "epoch": 0.7118369351669941, + "grad_norm": 0.5785537958145142, + "learning_rate": 4.356789410897565e-06, + "loss": 0.5674, + "step": 4831 + }, + { + "epoch": 0.7119842829076621, + "grad_norm": 0.6031897664070129, + "learning_rate": 4.356529783493345e-06, + "loss": 0.6082, + "step": 4832 + }, + { + "epoch": 0.7121316306483301, + "grad_norm": 0.5759187340736389, + "learning_rate": 4.356270111440453e-06, + "loss": 0.585, + "step": 4833 + }, + { + "epoch": 0.712278978388998, + "grad_norm": 0.5722894668579102, + "learning_rate": 4.356010394745136e-06, + "loss": 0.5658, + "step": 4834 + }, + { + "epoch": 0.712426326129666, + "grad_norm": 0.592162549495697, + "learning_rate": 4.35575063341364e-06, + "loss": 0.564, + "step": 4835 + }, + { + "epoch": 0.712573673870334, + "grad_norm": 0.5925313830375671, + "learning_rate": 4.3554908274522105e-06, + "loss": 0.5525, + "step": 4836 + }, + { + "epoch": 0.7127210216110019, + "grad_norm": 0.5728654861450195, + "learning_rate": 4.355230976867097e-06, + "loss": 0.5707, + "step": 4837 + }, + { + "epoch": 0.71286836935167, + "grad_norm": 0.5576339960098267, + "learning_rate": 4.354971081664549e-06, + "loss": 0.5824, + "step": 4838 + }, + { + "epoch": 0.7130157170923379, + "grad_norm": 0.5846651196479797, + "learning_rate": 4.354711141850817e-06, + "loss": 0.5852, + "step": 4839 + }, + { + "epoch": 0.7131630648330058, + "grad_norm": 0.5705350637435913, + "learning_rate": 4.354451157432152e-06, + "loss": 0.57, + "step": 4840 + }, + { + "epoch": 0.7133104125736739, + "grad_norm": 0.5875198841094971, + "learning_rate": 4.354191128414806e-06, + "loss": 0.5746, + "step": 4841 + }, + { + "epoch": 0.7134577603143418, + "grad_norm": 0.5874246954917908, + "learning_rate": 4.353931054805034e-06, + "loss": 0.5624, + "step": 4842 + }, + { + "epoch": 0.7136051080550099, + "grad_norm": 0.5665212869644165, + "learning_rate": 4.353670936609089e-06, + "loss": 0.5622, + "step": 4843 + }, + { + "epoch": 0.7137524557956778, + "grad_norm": 0.5837354063987732, + "learning_rate": 4.353410773833227e-06, + "loss": 0.5502, + "step": 4844 + }, + { + "epoch": 0.7138998035363457, + "grad_norm": 0.5746813416481018, + "learning_rate": 4.3531505664837055e-06, + "loss": 0.5678, + "step": 4845 + }, + { + "epoch": 0.7140471512770138, + "grad_norm": 0.5884709358215332, + "learning_rate": 4.3528903145667824e-06, + "loss": 0.5589, + "step": 4846 + }, + { + "epoch": 0.7141944990176817, + "grad_norm": 0.5920301079750061, + "learning_rate": 4.3526300180887156e-06, + "loss": 0.5569, + "step": 4847 + }, + { + "epoch": 0.7143418467583497, + "grad_norm": 0.5898815393447876, + "learning_rate": 4.352369677055765e-06, + "loss": 0.5668, + "step": 4848 + }, + { + "epoch": 0.7144891944990177, + "grad_norm": 0.5660265684127808, + "learning_rate": 4.352109291474195e-06, + "loss": 0.5486, + "step": 4849 + }, + { + "epoch": 0.7146365422396856, + "grad_norm": 0.5627000331878662, + "learning_rate": 4.351848861350263e-06, + "loss": 0.5607, + "step": 4850 + }, + { + "epoch": 0.7147838899803536, + "grad_norm": 0.5766807198524475, + "learning_rate": 4.351588386690235e-06, + "loss": 0.5603, + "step": 4851 + }, + { + "epoch": 0.7149312377210216, + "grad_norm": 0.5848796963691711, + "learning_rate": 4.351327867500376e-06, + "loss": 0.5661, + "step": 4852 + }, + { + "epoch": 0.7150785854616896, + "grad_norm": 0.5813462734222412, + "learning_rate": 4.351067303786949e-06, + "loss": 0.5664, + "step": 4853 + }, + { + "epoch": 0.7152259332023576, + "grad_norm": 0.6484068036079407, + "learning_rate": 4.350806695556221e-06, + "loss": 0.5421, + "step": 4854 + }, + { + "epoch": 0.7153732809430255, + "grad_norm": 0.5739293098449707, + "learning_rate": 4.350546042814461e-06, + "loss": 0.6185, + "step": 4855 + }, + { + "epoch": 0.7155206286836935, + "grad_norm": 0.5793870687484741, + "learning_rate": 4.350285345567937e-06, + "loss": 0.5902, + "step": 4856 + }, + { + "epoch": 0.7156679764243615, + "grad_norm": 0.6283004879951477, + "learning_rate": 4.350024603822917e-06, + "loss": 0.5628, + "step": 4857 + }, + { + "epoch": 0.7158153241650295, + "grad_norm": 0.6110396981239319, + "learning_rate": 4.349763817585674e-06, + "loss": 0.5645, + "step": 4858 + }, + { + "epoch": 0.7159626719056974, + "grad_norm": 0.5742848515510559, + "learning_rate": 4.349502986862479e-06, + "loss": 0.5738, + "step": 4859 + }, + { + "epoch": 0.7161100196463654, + "grad_norm": 0.5619089603424072, + "learning_rate": 4.349242111659603e-06, + "loss": 0.5986, + "step": 4860 + }, + { + "epoch": 0.7162573673870334, + "grad_norm": 0.5509751439094543, + "learning_rate": 4.3489811919833225e-06, + "loss": 0.5684, + "step": 4861 + }, + { + "epoch": 0.7164047151277014, + "grad_norm": 0.5667873620986938, + "learning_rate": 4.348720227839912e-06, + "loss": 0.524, + "step": 4862 + }, + { + "epoch": 0.7165520628683694, + "grad_norm": 0.5636016130447388, + "learning_rate": 4.348459219235647e-06, + "loss": 0.5763, + "step": 4863 + }, + { + "epoch": 0.7166994106090373, + "grad_norm": 0.5845561623573303, + "learning_rate": 4.348198166176805e-06, + "loss": 0.6065, + "step": 4864 + }, + { + "epoch": 0.7168467583497053, + "grad_norm": 0.5961069464683533, + "learning_rate": 4.347937068669663e-06, + "loss": 0.5631, + "step": 4865 + }, + { + "epoch": 0.7169941060903733, + "grad_norm": 0.6192647814750671, + "learning_rate": 4.3476759267205015e-06, + "loss": 0.5931, + "step": 4866 + }, + { + "epoch": 0.7171414538310412, + "grad_norm": 0.5827889442443848, + "learning_rate": 4.347414740335601e-06, + "loss": 0.5614, + "step": 4867 + }, + { + "epoch": 0.7172888015717093, + "grad_norm": 0.6088237762451172, + "learning_rate": 4.347153509521242e-06, + "loss": 0.5615, + "step": 4868 + }, + { + "epoch": 0.7174361493123772, + "grad_norm": 0.582711935043335, + "learning_rate": 4.346892234283708e-06, + "loss": 0.5561, + "step": 4869 + }, + { + "epoch": 0.7175834970530451, + "grad_norm": 0.5726487636566162, + "learning_rate": 4.346630914629282e-06, + "loss": 0.5701, + "step": 4870 + }, + { + "epoch": 0.7177308447937132, + "grad_norm": 0.5690292119979858, + "learning_rate": 4.346369550564248e-06, + "loss": 0.5954, + "step": 4871 + }, + { + "epoch": 0.7178781925343811, + "grad_norm": 0.539833664894104, + "learning_rate": 4.346108142094892e-06, + "loss": 0.5401, + "step": 4872 + }, + { + "epoch": 0.7180255402750492, + "grad_norm": 0.6297300457954407, + "learning_rate": 4.345846689227502e-06, + "loss": 0.5697, + "step": 4873 + }, + { + "epoch": 0.7181728880157171, + "grad_norm": 0.5616930723190308, + "learning_rate": 4.345585191968365e-06, + "loss": 0.5462, + "step": 4874 + }, + { + "epoch": 0.718320235756385, + "grad_norm": 0.5582658648490906, + "learning_rate": 4.34532365032377e-06, + "loss": 0.5592, + "step": 4875 + }, + { + "epoch": 0.7184675834970531, + "grad_norm": 0.5585818290710449, + "learning_rate": 4.345062064300006e-06, + "loss": 0.5601, + "step": 4876 + }, + { + "epoch": 0.718614931237721, + "grad_norm": 0.5820412635803223, + "learning_rate": 4.3448004339033645e-06, + "loss": 0.5411, + "step": 4877 + }, + { + "epoch": 0.718762278978389, + "grad_norm": 0.574055552482605, + "learning_rate": 4.344538759140138e-06, + "loss": 0.5852, + "step": 4878 + }, + { + "epoch": 0.718909626719057, + "grad_norm": 0.5882728099822998, + "learning_rate": 4.34427704001662e-06, + "loss": 0.5973, + "step": 4879 + }, + { + "epoch": 0.7190569744597249, + "grad_norm": 0.5866184830665588, + "learning_rate": 4.3440152765391045e-06, + "loss": 0.5938, + "step": 4880 + }, + { + "epoch": 0.7192043222003929, + "grad_norm": 0.5656053423881531, + "learning_rate": 4.343753468713885e-06, + "loss": 0.6017, + "step": 4881 + }, + { + "epoch": 0.7193516699410609, + "grad_norm": 0.5849009156227112, + "learning_rate": 4.3434916165472604e-06, + "loss": 0.5752, + "step": 4882 + }, + { + "epoch": 0.7194990176817289, + "grad_norm": 0.5765342116355896, + "learning_rate": 4.343229720045527e-06, + "loss": 0.5844, + "step": 4883 + }, + { + "epoch": 0.7196463654223969, + "grad_norm": 0.5758318901062012, + "learning_rate": 4.342967779214983e-06, + "loss": 0.5865, + "step": 4884 + }, + { + "epoch": 0.7197937131630648, + "grad_norm": 0.6212090849876404, + "learning_rate": 4.342705794061929e-06, + "loss": 0.5761, + "step": 4885 + }, + { + "epoch": 0.7199410609037328, + "grad_norm": 0.6006289124488831, + "learning_rate": 4.342443764592664e-06, + "loss": 0.6056, + "step": 4886 + }, + { + "epoch": 0.7200884086444008, + "grad_norm": 0.5751615166664124, + "learning_rate": 4.342181690813492e-06, + "loss": 0.5875, + "step": 4887 + }, + { + "epoch": 0.7202357563850688, + "grad_norm": 0.5476275086402893, + "learning_rate": 4.341919572730713e-06, + "loss": 0.5758, + "step": 4888 + }, + { + "epoch": 0.7203831041257367, + "grad_norm": 0.5712285041809082, + "learning_rate": 4.341657410350632e-06, + "loss": 0.5299, + "step": 4889 + }, + { + "epoch": 0.7205304518664047, + "grad_norm": 0.588533341884613, + "learning_rate": 4.341395203679555e-06, + "loss": 0.5559, + "step": 4890 + }, + { + "epoch": 0.7206777996070727, + "grad_norm": 0.5852192640304565, + "learning_rate": 4.341132952723787e-06, + "loss": 0.5571, + "step": 4891 + }, + { + "epoch": 0.7208251473477406, + "grad_norm": 0.5915302038192749, + "learning_rate": 4.340870657489635e-06, + "loss": 0.5745, + "step": 4892 + }, + { + "epoch": 0.7209724950884087, + "grad_norm": 0.6032344102859497, + "learning_rate": 4.340608317983407e-06, + "loss": 0.572, + "step": 4893 + }, + { + "epoch": 0.7211198428290766, + "grad_norm": 0.5525994300842285, + "learning_rate": 4.3403459342114115e-06, + "loss": 0.5584, + "step": 4894 + }, + { + "epoch": 0.7212671905697446, + "grad_norm": 0.5936063528060913, + "learning_rate": 4.340083506179961e-06, + "loss": 0.5401, + "step": 4895 + }, + { + "epoch": 0.7214145383104126, + "grad_norm": 0.6146224141120911, + "learning_rate": 4.339821033895364e-06, + "loss": 0.5909, + "step": 4896 + }, + { + "epoch": 0.7215618860510805, + "grad_norm": 0.5596509575843811, + "learning_rate": 4.339558517363935e-06, + "loss": 0.5614, + "step": 4897 + }, + { + "epoch": 0.7217092337917486, + "grad_norm": 0.5660408139228821, + "learning_rate": 4.339295956591985e-06, + "loss": 0.5472, + "step": 4898 + }, + { + "epoch": 0.7218565815324165, + "grad_norm": 0.5924898386001587, + "learning_rate": 4.339033351585831e-06, + "loss": 0.5678, + "step": 4899 + }, + { + "epoch": 0.7220039292730844, + "grad_norm": 0.5815090537071228, + "learning_rate": 4.338770702351787e-06, + "loss": 0.5396, + "step": 4900 + }, + { + "epoch": 0.7221512770137525, + "grad_norm": 0.5690509676933289, + "learning_rate": 4.3385080088961705e-06, + "loss": 0.5677, + "step": 4901 + }, + { + "epoch": 0.7222986247544204, + "grad_norm": 0.6002525091171265, + "learning_rate": 4.338245271225299e-06, + "loss": 0.5526, + "step": 4902 + }, + { + "epoch": 0.7224459724950885, + "grad_norm": 0.5707238912582397, + "learning_rate": 4.33798248934549e-06, + "loss": 0.574, + "step": 4903 + }, + { + "epoch": 0.7225933202357564, + "grad_norm": 0.5802203416824341, + "learning_rate": 4.3377196632630646e-06, + "loss": 0.5538, + "step": 4904 + }, + { + "epoch": 0.7227406679764243, + "grad_norm": 0.5536789894104004, + "learning_rate": 4.337456792984343e-06, + "loss": 0.5643, + "step": 4905 + }, + { + "epoch": 0.7228880157170924, + "grad_norm": 0.5498889088630676, + "learning_rate": 4.337193878515647e-06, + "loss": 0.556, + "step": 4906 + }, + { + "epoch": 0.7230353634577603, + "grad_norm": 0.5929723381996155, + "learning_rate": 4.336930919863301e-06, + "loss": 0.5831, + "step": 4907 + }, + { + "epoch": 0.7231827111984283, + "grad_norm": 0.5861184597015381, + "learning_rate": 4.336667917033628e-06, + "loss": 0.5802, + "step": 4908 + }, + { + "epoch": 0.7233300589390963, + "grad_norm": 0.5749788284301758, + "learning_rate": 4.336404870032952e-06, + "loss": 0.5672, + "step": 4909 + }, + { + "epoch": 0.7234774066797642, + "grad_norm": 0.5653027892112732, + "learning_rate": 4.336141778867601e-06, + "loss": 0.5534, + "step": 4910 + }, + { + "epoch": 0.7236247544204322, + "grad_norm": 0.5716651678085327, + "learning_rate": 4.335878643543901e-06, + "loss": 0.5735, + "step": 4911 + }, + { + "epoch": 0.7237721021611002, + "grad_norm": 0.5960413217544556, + "learning_rate": 4.335615464068181e-06, + "loss": 0.5683, + "step": 4912 + }, + { + "epoch": 0.7239194499017682, + "grad_norm": 0.6076773405075073, + "learning_rate": 4.33535224044677e-06, + "loss": 0.5728, + "step": 4913 + }, + { + "epoch": 0.7240667976424362, + "grad_norm": 0.563842236995697, + "learning_rate": 4.335088972685998e-06, + "loss": 0.5619, + "step": 4914 + }, + { + "epoch": 0.7242141453831041, + "grad_norm": 0.582249641418457, + "learning_rate": 4.334825660792197e-06, + "loss": 0.5708, + "step": 4915 + }, + { + "epoch": 0.7243614931237721, + "grad_norm": 0.5637404322624207, + "learning_rate": 4.3345623047717e-06, + "loss": 0.5608, + "step": 4916 + }, + { + "epoch": 0.7245088408644401, + "grad_norm": 0.5635576844215393, + "learning_rate": 4.334298904630839e-06, + "loss": 0.573, + "step": 4917 + }, + { + "epoch": 0.724656188605108, + "grad_norm": 0.5553464889526367, + "learning_rate": 4.334035460375951e-06, + "loss": 0.568, + "step": 4918 + }, + { + "epoch": 0.724803536345776, + "grad_norm": 0.5697479248046875, + "learning_rate": 4.33377197201337e-06, + "loss": 0.5541, + "step": 4919 + }, + { + "epoch": 0.724950884086444, + "grad_norm": 0.5534694194793701, + "learning_rate": 4.333508439549433e-06, + "loss": 0.5604, + "step": 4920 + }, + { + "epoch": 0.725098231827112, + "grad_norm": 0.5802233219146729, + "learning_rate": 4.333244862990478e-06, + "loss": 0.5837, + "step": 4921 + }, + { + "epoch": 0.7252455795677799, + "grad_norm": 0.5772551894187927, + "learning_rate": 4.332981242342843e-06, + "loss": 0.5668, + "step": 4922 + }, + { + "epoch": 0.725392927308448, + "grad_norm": 0.567039430141449, + "learning_rate": 4.3327175776128704e-06, + "loss": 0.5501, + "step": 4923 + }, + { + "epoch": 0.7255402750491159, + "grad_norm": 0.5763731002807617, + "learning_rate": 4.332453868806898e-06, + "loss": 0.5588, + "step": 4924 + }, + { + "epoch": 0.7256876227897839, + "grad_norm": 0.58036869764328, + "learning_rate": 4.332190115931271e-06, + "loss": 0.5699, + "step": 4925 + }, + { + "epoch": 0.7258349705304519, + "grad_norm": 0.5891345739364624, + "learning_rate": 4.33192631899233e-06, + "loss": 0.5485, + "step": 4926 + }, + { + "epoch": 0.7259823182711198, + "grad_norm": 0.5538686513900757, + "learning_rate": 4.331662477996421e-06, + "loss": 0.588, + "step": 4927 + }, + { + "epoch": 0.7261296660117879, + "grad_norm": 0.560168981552124, + "learning_rate": 4.331398592949889e-06, + "loss": 0.5677, + "step": 4928 + }, + { + "epoch": 0.7262770137524558, + "grad_norm": 0.5445869565010071, + "learning_rate": 4.331134663859079e-06, + "loss": 0.551, + "step": 4929 + }, + { + "epoch": 0.7264243614931237, + "grad_norm": 0.5672941207885742, + "learning_rate": 4.330870690730339e-06, + "loss": 0.5738, + "step": 4930 + }, + { + "epoch": 0.7265717092337918, + "grad_norm": 0.598488986492157, + "learning_rate": 4.330606673570018e-06, + "loss": 0.5864, + "step": 4931 + }, + { + "epoch": 0.7267190569744597, + "grad_norm": 0.5942561030387878, + "learning_rate": 4.330342612384465e-06, + "loss": 0.5173, + "step": 4932 + }, + { + "epoch": 0.7268664047151278, + "grad_norm": 0.5534928441047668, + "learning_rate": 4.33007850718003e-06, + "loss": 0.5597, + "step": 4933 + }, + { + "epoch": 0.7270137524557957, + "grad_norm": 0.5808005928993225, + "learning_rate": 4.329814357963067e-06, + "loss": 0.5481, + "step": 4934 + }, + { + "epoch": 0.7271611001964636, + "grad_norm": 0.573520839214325, + "learning_rate": 4.329550164739925e-06, + "loss": 0.5539, + "step": 4935 + }, + { + "epoch": 0.7273084479371317, + "grad_norm": 0.6798419952392578, + "learning_rate": 4.329285927516961e-06, + "loss": 0.5689, + "step": 4936 + }, + { + "epoch": 0.7274557956777996, + "grad_norm": 0.5858158469200134, + "learning_rate": 4.329021646300528e-06, + "loss": 0.5749, + "step": 4937 + }, + { + "epoch": 0.7276031434184675, + "grad_norm": 0.5703850388526917, + "learning_rate": 4.3287573210969815e-06, + "loss": 0.5759, + "step": 4938 + }, + { + "epoch": 0.7277504911591356, + "grad_norm": 0.5887730121612549, + "learning_rate": 4.32849295191268e-06, + "loss": 0.5786, + "step": 4939 + }, + { + "epoch": 0.7278978388998035, + "grad_norm": 0.5337129235267639, + "learning_rate": 4.328228538753981e-06, + "loss": 0.5206, + "step": 4940 + }, + { + "epoch": 0.7280451866404715, + "grad_norm": 0.6131261587142944, + "learning_rate": 4.327964081627243e-06, + "loss": 0.5742, + "step": 4941 + }, + { + "epoch": 0.7281925343811395, + "grad_norm": 0.5790795087814331, + "learning_rate": 4.327699580538825e-06, + "loss": 0.5447, + "step": 4942 + }, + { + "epoch": 0.7283398821218074, + "grad_norm": 0.5825923085212708, + "learning_rate": 4.32743503549509e-06, + "loss": 0.5642, + "step": 4943 + }, + { + "epoch": 0.7284872298624755, + "grad_norm": 0.5787452459335327, + "learning_rate": 4.3271704465024e-06, + "loss": 0.562, + "step": 4944 + }, + { + "epoch": 0.7286345776031434, + "grad_norm": 0.5803149938583374, + "learning_rate": 4.326905813567117e-06, + "loss": 0.5643, + "step": 4945 + }, + { + "epoch": 0.7287819253438114, + "grad_norm": 0.6066681146621704, + "learning_rate": 4.3266411366956075e-06, + "loss": 0.5548, + "step": 4946 + }, + { + "epoch": 0.7289292730844794, + "grad_norm": 0.6044027209281921, + "learning_rate": 4.326376415894234e-06, + "loss": 0.5678, + "step": 4947 + }, + { + "epoch": 0.7290766208251473, + "grad_norm": 0.5782045125961304, + "learning_rate": 4.326111651169364e-06, + "loss": 0.5646, + "step": 4948 + }, + { + "epoch": 0.7292239685658153, + "grad_norm": 0.5885677337646484, + "learning_rate": 4.3258468425273666e-06, + "loss": 0.5858, + "step": 4949 + }, + { + "epoch": 0.7293713163064833, + "grad_norm": 0.5830740332603455, + "learning_rate": 4.325581989974608e-06, + "loss": 0.5552, + "step": 4950 + }, + { + "epoch": 0.7295186640471513, + "grad_norm": 0.5892771482467651, + "learning_rate": 4.325317093517459e-06, + "loss": 0.5737, + "step": 4951 + }, + { + "epoch": 0.7296660117878192, + "grad_norm": 0.5679969787597656, + "learning_rate": 4.3250521531622905e-06, + "loss": 0.5668, + "step": 4952 + }, + { + "epoch": 0.7298133595284872, + "grad_norm": 0.5624618530273438, + "learning_rate": 4.324787168915474e-06, + "loss": 0.5727, + "step": 4953 + }, + { + "epoch": 0.7299607072691552, + "grad_norm": 0.6232205033302307, + "learning_rate": 4.324522140783381e-06, + "loss": 0.5532, + "step": 4954 + }, + { + "epoch": 0.7301080550098232, + "grad_norm": 0.6019687056541443, + "learning_rate": 4.3242570687723864e-06, + "loss": 0.5984, + "step": 4955 + }, + { + "epoch": 0.7302554027504912, + "grad_norm": 0.586944043636322, + "learning_rate": 4.323991952888865e-06, + "loss": 0.551, + "step": 4956 + }, + { + "epoch": 0.7304027504911591, + "grad_norm": 0.5897248983383179, + "learning_rate": 4.323726793139194e-06, + "loss": 0.5754, + "step": 4957 + }, + { + "epoch": 0.7305500982318271, + "grad_norm": 0.59512859582901, + "learning_rate": 4.323461589529747e-06, + "loss": 0.5451, + "step": 4958 + }, + { + "epoch": 0.7306974459724951, + "grad_norm": 0.6264282464981079, + "learning_rate": 4.323196342066905e-06, + "loss": 0.5474, + "step": 4959 + }, + { + "epoch": 0.730844793713163, + "grad_norm": 0.5575962662696838, + "learning_rate": 4.322931050757046e-06, + "loss": 0.5747, + "step": 4960 + }, + { + "epoch": 0.7309921414538311, + "grad_norm": 0.5706198811531067, + "learning_rate": 4.32266571560655e-06, + "loss": 0.5709, + "step": 4961 + }, + { + "epoch": 0.731139489194499, + "grad_norm": 0.5915094017982483, + "learning_rate": 4.322400336621799e-06, + "loss": 0.5828, + "step": 4962 + }, + { + "epoch": 0.7312868369351669, + "grad_norm": 0.5637624859809875, + "learning_rate": 4.3221349138091735e-06, + "loss": 0.5643, + "step": 4963 + }, + { + "epoch": 0.731434184675835, + "grad_norm": 0.5906665325164795, + "learning_rate": 4.3218694471750596e-06, + "loss": 0.5428, + "step": 4964 + }, + { + "epoch": 0.7315815324165029, + "grad_norm": 0.5775624513626099, + "learning_rate": 4.321603936725839e-06, + "loss": 0.5586, + "step": 4965 + }, + { + "epoch": 0.731728880157171, + "grad_norm": 0.5972519516944885, + "learning_rate": 4.321338382467898e-06, + "loss": 0.5829, + "step": 4966 + }, + { + "epoch": 0.7318762278978389, + "grad_norm": 0.5571146607398987, + "learning_rate": 4.321072784407624e-06, + "loss": 0.549, + "step": 4967 + }, + { + "epoch": 0.7320235756385068, + "grad_norm": 0.5784038305282593, + "learning_rate": 4.320807142551402e-06, + "loss": 0.5679, + "step": 4968 + }, + { + "epoch": 0.7321709233791749, + "grad_norm": 0.5839369297027588, + "learning_rate": 4.320541456905624e-06, + "loss": 0.5472, + "step": 4969 + }, + { + "epoch": 0.7323182711198428, + "grad_norm": 0.5411736965179443, + "learning_rate": 4.320275727476677e-06, + "loss": 0.5619, + "step": 4970 + }, + { + "epoch": 0.7324656188605108, + "grad_norm": 0.6032383441925049, + "learning_rate": 4.320009954270953e-06, + "loss": 0.6007, + "step": 4971 + }, + { + "epoch": 0.7326129666011788, + "grad_norm": 0.5746312141418457, + "learning_rate": 4.319744137294843e-06, + "loss": 0.556, + "step": 4972 + }, + { + "epoch": 0.7327603143418467, + "grad_norm": 0.6138305068016052, + "learning_rate": 4.319478276554739e-06, + "loss": 0.5995, + "step": 4973 + }, + { + "epoch": 0.7329076620825148, + "grad_norm": 0.5768448710441589, + "learning_rate": 4.319212372057037e-06, + "loss": 0.5549, + "step": 4974 + }, + { + "epoch": 0.7330550098231827, + "grad_norm": 0.5810784101486206, + "learning_rate": 4.318946423808131e-06, + "loss": 0.5857, + "step": 4975 + }, + { + "epoch": 0.7332023575638507, + "grad_norm": 0.5791382789611816, + "learning_rate": 4.318680431814416e-06, + "loss": 0.5851, + "step": 4976 + }, + { + "epoch": 0.7333497053045187, + "grad_norm": 0.5832365155220032, + "learning_rate": 4.31841439608229e-06, + "loss": 0.5421, + "step": 4977 + }, + { + "epoch": 0.7334970530451866, + "grad_norm": 0.5799663066864014, + "learning_rate": 4.31814831661815e-06, + "loss": 0.6141, + "step": 4978 + }, + { + "epoch": 0.7336444007858546, + "grad_norm": 0.5885497331619263, + "learning_rate": 4.317882193428397e-06, + "loss": 0.5761, + "step": 4979 + }, + { + "epoch": 0.7337917485265226, + "grad_norm": 0.5940432548522949, + "learning_rate": 4.317616026519428e-06, + "loss": 0.5711, + "step": 4980 + }, + { + "epoch": 0.7339390962671906, + "grad_norm": 0.5757717490196228, + "learning_rate": 4.3173498158976475e-06, + "loss": 0.5885, + "step": 4981 + }, + { + "epoch": 0.7340864440078585, + "grad_norm": 0.5679107904434204, + "learning_rate": 4.317083561569456e-06, + "loss": 0.5415, + "step": 4982 + }, + { + "epoch": 0.7342337917485265, + "grad_norm": 0.571753978729248, + "learning_rate": 4.316817263541256e-06, + "loss": 0.5619, + "step": 4983 + }, + { + "epoch": 0.7343811394891945, + "grad_norm": 0.5729131102561951, + "learning_rate": 4.316550921819455e-06, + "loss": 0.5806, + "step": 4984 + }, + { + "epoch": 0.7345284872298625, + "grad_norm": 0.5823174715042114, + "learning_rate": 4.316284536410455e-06, + "loss": 0.5921, + "step": 4985 + }, + { + "epoch": 0.7346758349705305, + "grad_norm": 0.5764565467834473, + "learning_rate": 4.316018107320664e-06, + "loss": 0.5717, + "step": 4986 + }, + { + "epoch": 0.7348231827111984, + "grad_norm": 0.6280874609947205, + "learning_rate": 4.3157516345564896e-06, + "loss": 0.5974, + "step": 4987 + }, + { + "epoch": 0.7349705304518664, + "grad_norm": 0.5823214054107666, + "learning_rate": 4.315485118124339e-06, + "loss": 0.5111, + "step": 4988 + }, + { + "epoch": 0.7351178781925344, + "grad_norm": 0.5977814793586731, + "learning_rate": 4.315218558030624e-06, + "loss": 0.603, + "step": 4989 + }, + { + "epoch": 0.7352652259332023, + "grad_norm": 0.6338955760002136, + "learning_rate": 4.314951954281754e-06, + "loss": 0.5869, + "step": 4990 + }, + { + "epoch": 0.7354125736738704, + "grad_norm": 0.5899448394775391, + "learning_rate": 4.314685306884141e-06, + "loss": 0.5371, + "step": 4991 + }, + { + "epoch": 0.7355599214145383, + "grad_norm": 0.5539519190788269, + "learning_rate": 4.314418615844197e-06, + "loss": 0.5577, + "step": 4992 + }, + { + "epoch": 0.7357072691552062, + "grad_norm": 0.5505826473236084, + "learning_rate": 4.3141518811683355e-06, + "loss": 0.5831, + "step": 4993 + }, + { + "epoch": 0.7358546168958743, + "grad_norm": 0.5822529196739197, + "learning_rate": 4.313885102862974e-06, + "loss": 0.5763, + "step": 4994 + }, + { + "epoch": 0.7360019646365422, + "grad_norm": 0.5752310752868652, + "learning_rate": 4.3136182809345254e-06, + "loss": 0.5484, + "step": 4995 + }, + { + "epoch": 0.7361493123772103, + "grad_norm": 0.6199281811714172, + "learning_rate": 4.313351415389407e-06, + "loss": 0.6131, + "step": 4996 + }, + { + "epoch": 0.7362966601178782, + "grad_norm": 0.5808787941932678, + "learning_rate": 4.313084506234038e-06, + "loss": 0.5748, + "step": 4997 + }, + { + "epoch": 0.7364440078585461, + "grad_norm": 0.6077798008918762, + "learning_rate": 4.312817553474838e-06, + "loss": 0.5581, + "step": 4998 + }, + { + "epoch": 0.7365913555992142, + "grad_norm": 0.6510355472564697, + "learning_rate": 4.312550557118226e-06, + "loss": 0.5593, + "step": 4999 + }, + { + "epoch": 0.7367387033398821, + "grad_norm": 0.5734135508537292, + "learning_rate": 4.3122835171706225e-06, + "loss": 0.6036, + "step": 5000 + }, + { + "epoch": 0.73688605108055, + "grad_norm": 0.5771000385284424, + "learning_rate": 4.31201643363845e-06, + "loss": 0.5889, + "step": 5001 + }, + { + "epoch": 0.7370333988212181, + "grad_norm": 0.5837230682373047, + "learning_rate": 4.311749306528133e-06, + "loss": 0.5616, + "step": 5002 + }, + { + "epoch": 0.737180746561886, + "grad_norm": 0.5964196920394897, + "learning_rate": 4.3114821358460955e-06, + "loss": 0.5778, + "step": 5003 + }, + { + "epoch": 0.7373280943025541, + "grad_norm": 0.5447560548782349, + "learning_rate": 4.311214921598761e-06, + "loss": 0.5504, + "step": 5004 + }, + { + "epoch": 0.737475442043222, + "grad_norm": 0.5717914700508118, + "learning_rate": 4.310947663792559e-06, + "loss": 0.5467, + "step": 5005 + }, + { + "epoch": 0.73762278978389, + "grad_norm": 0.5731456279754639, + "learning_rate": 4.310680362433914e-06, + "loss": 0.5843, + "step": 5006 + }, + { + "epoch": 0.737770137524558, + "grad_norm": 0.5602874755859375, + "learning_rate": 4.3104130175292545e-06, + "loss": 0.5938, + "step": 5007 + }, + { + "epoch": 0.7379174852652259, + "grad_norm": 0.5806612968444824, + "learning_rate": 4.3101456290850115e-06, + "loss": 0.5787, + "step": 5008 + }, + { + "epoch": 0.7380648330058939, + "grad_norm": 0.5971798896789551, + "learning_rate": 4.309878197107615e-06, + "loss": 0.5763, + "step": 5009 + }, + { + "epoch": 0.7382121807465619, + "grad_norm": 0.5576286315917969, + "learning_rate": 4.309610721603498e-06, + "loss": 0.5551, + "step": 5010 + }, + { + "epoch": 0.7383595284872299, + "grad_norm": 0.579736590385437, + "learning_rate": 4.309343202579091e-06, + "loss": 0.5578, + "step": 5011 + }, + { + "epoch": 0.7385068762278978, + "grad_norm": 0.6100813150405884, + "learning_rate": 4.309075640040829e-06, + "loss": 0.5514, + "step": 5012 + }, + { + "epoch": 0.7386542239685658, + "grad_norm": 0.5937638878822327, + "learning_rate": 4.308808033995146e-06, + "loss": 0.5463, + "step": 5013 + }, + { + "epoch": 0.7388015717092338, + "grad_norm": 0.5728991031646729, + "learning_rate": 4.308540384448479e-06, + "loss": 0.5661, + "step": 5014 + }, + { + "epoch": 0.7389489194499018, + "grad_norm": 0.6001527309417725, + "learning_rate": 4.308272691407263e-06, + "loss": 0.5485, + "step": 5015 + }, + { + "epoch": 0.7390962671905698, + "grad_norm": 0.5707272887229919, + "learning_rate": 4.308004954877937e-06, + "loss": 0.561, + "step": 5016 + }, + { + "epoch": 0.7392436149312377, + "grad_norm": 0.5667203068733215, + "learning_rate": 4.30773717486694e-06, + "loss": 0.5749, + "step": 5017 + }, + { + "epoch": 0.7393909626719057, + "grad_norm": 0.5944525003433228, + "learning_rate": 4.307469351380712e-06, + "loss": 0.5611, + "step": 5018 + }, + { + "epoch": 0.7395383104125737, + "grad_norm": 0.5979703664779663, + "learning_rate": 4.307201484425694e-06, + "loss": 0.6134, + "step": 5019 + }, + { + "epoch": 0.7396856581532416, + "grad_norm": 0.6114655137062073, + "learning_rate": 4.3069335740083274e-06, + "loss": 0.5732, + "step": 5020 + }, + { + "epoch": 0.7398330058939097, + "grad_norm": 0.5483928322792053, + "learning_rate": 4.306665620135057e-06, + "loss": 0.5537, + "step": 5021 + }, + { + "epoch": 0.7399803536345776, + "grad_norm": 0.5549610257148743, + "learning_rate": 4.306397622812324e-06, + "loss": 0.5786, + "step": 5022 + }, + { + "epoch": 0.7401277013752455, + "grad_norm": 0.572201669216156, + "learning_rate": 4.306129582046577e-06, + "loss": 0.5743, + "step": 5023 + }, + { + "epoch": 0.7402750491159136, + "grad_norm": 0.6088916659355164, + "learning_rate": 4.305861497844259e-06, + "loss": 0.5479, + "step": 5024 + }, + { + "epoch": 0.7404223968565815, + "grad_norm": 0.6015365123748779, + "learning_rate": 4.30559337021182e-06, + "loss": 0.5828, + "step": 5025 + }, + { + "epoch": 0.7405697445972496, + "grad_norm": 0.6392014622688293, + "learning_rate": 4.305325199155708e-06, + "loss": 0.5536, + "step": 5026 + }, + { + "epoch": 0.7407170923379175, + "grad_norm": 0.5631026029586792, + "learning_rate": 4.305056984682371e-06, + "loss": 0.5771, + "step": 5027 + }, + { + "epoch": 0.7408644400785854, + "grad_norm": 0.580232560634613, + "learning_rate": 4.304788726798261e-06, + "loss": 0.5639, + "step": 5028 + }, + { + "epoch": 0.7410117878192535, + "grad_norm": 0.5785886645317078, + "learning_rate": 4.304520425509827e-06, + "loss": 0.596, + "step": 5029 + }, + { + "epoch": 0.7411591355599214, + "grad_norm": 0.5725448727607727, + "learning_rate": 4.304252080823524e-06, + "loss": 0.5323, + "step": 5030 + }, + { + "epoch": 0.7413064833005893, + "grad_norm": 0.5825521349906921, + "learning_rate": 4.303983692745804e-06, + "loss": 0.5594, + "step": 5031 + }, + { + "epoch": 0.7414538310412574, + "grad_norm": 0.5868968367576599, + "learning_rate": 4.3037152612831235e-06, + "loss": 0.558, + "step": 5032 + }, + { + "epoch": 0.7416011787819253, + "grad_norm": 0.6131497621536255, + "learning_rate": 4.303446786441936e-06, + "loss": 0.5676, + "step": 5033 + }, + { + "epoch": 0.7417485265225933, + "grad_norm": 0.5788185000419617, + "learning_rate": 4.3031782682287e-06, + "loss": 0.5613, + "step": 5034 + }, + { + "epoch": 0.7418958742632613, + "grad_norm": 0.5835681557655334, + "learning_rate": 4.302909706649872e-06, + "loss": 0.5858, + "step": 5035 + }, + { + "epoch": 0.7420432220039292, + "grad_norm": 0.5684791803359985, + "learning_rate": 4.302641101711911e-06, + "loss": 0.5194, + "step": 5036 + }, + { + "epoch": 0.7421905697445973, + "grad_norm": 0.5979777574539185, + "learning_rate": 4.302372453421277e-06, + "loss": 0.5533, + "step": 5037 + }, + { + "epoch": 0.7423379174852652, + "grad_norm": 0.6086001992225647, + "learning_rate": 4.302103761784431e-06, + "loss": 0.5423, + "step": 5038 + }, + { + "epoch": 0.7424852652259332, + "grad_norm": 0.5788574814796448, + "learning_rate": 4.301835026807835e-06, + "loss": 0.5681, + "step": 5039 + }, + { + "epoch": 0.7426326129666012, + "grad_norm": 0.5869604349136353, + "learning_rate": 4.301566248497951e-06, + "loss": 0.5809, + "step": 5040 + }, + { + "epoch": 0.7427799607072691, + "grad_norm": 0.5764033198356628, + "learning_rate": 4.301297426861244e-06, + "loss": 0.5595, + "step": 5041 + }, + { + "epoch": 0.7429273084479371, + "grad_norm": 0.5761706233024597, + "learning_rate": 4.3010285619041794e-06, + "loss": 0.5299, + "step": 5042 + }, + { + "epoch": 0.7430746561886051, + "grad_norm": 0.5713658332824707, + "learning_rate": 4.300759653633222e-06, + "loss": 0.5884, + "step": 5043 + }, + { + "epoch": 0.7432220039292731, + "grad_norm": 0.5948759913444519, + "learning_rate": 4.3004907020548396e-06, + "loss": 0.5795, + "step": 5044 + }, + { + "epoch": 0.7433693516699411, + "grad_norm": 0.5949463248252869, + "learning_rate": 4.3002217071755e-06, + "loss": 0.5744, + "step": 5045 + }, + { + "epoch": 0.743516699410609, + "grad_norm": 0.6125065684318542, + "learning_rate": 4.299952669001673e-06, + "loss": 0.5703, + "step": 5046 + }, + { + "epoch": 0.743664047151277, + "grad_norm": 0.5798038840293884, + "learning_rate": 4.299683587539829e-06, + "loss": 0.5503, + "step": 5047 + }, + { + "epoch": 0.743811394891945, + "grad_norm": 0.6870542764663696, + "learning_rate": 4.299414462796439e-06, + "loss": 0.6005, + "step": 5048 + }, + { + "epoch": 0.743958742632613, + "grad_norm": 0.5841313600540161, + "learning_rate": 4.299145294777975e-06, + "loss": 0.5599, + "step": 5049 + }, + { + "epoch": 0.7441060903732809, + "grad_norm": 0.5746855735778809, + "learning_rate": 4.298876083490909e-06, + "loss": 0.5549, + "step": 5050 + }, + { + "epoch": 0.744253438113949, + "grad_norm": 0.5904500484466553, + "learning_rate": 4.298606828941718e-06, + "loss": 0.5822, + "step": 5051 + }, + { + "epoch": 0.7444007858546169, + "grad_norm": 0.5640993118286133, + "learning_rate": 4.298337531136876e-06, + "loss": 0.5365, + "step": 5052 + }, + { + "epoch": 0.7445481335952848, + "grad_norm": 0.5807704925537109, + "learning_rate": 4.2980681900828606e-06, + "loss": 0.5366, + "step": 5053 + }, + { + "epoch": 0.7446954813359529, + "grad_norm": 0.5762634873390198, + "learning_rate": 4.297798805786149e-06, + "loss": 0.5614, + "step": 5054 + }, + { + "epoch": 0.7448428290766208, + "grad_norm": 0.570772111415863, + "learning_rate": 4.2975293782532175e-06, + "loss": 0.5725, + "step": 5055 + }, + { + "epoch": 0.7449901768172889, + "grad_norm": 0.5905441045761108, + "learning_rate": 4.297259907490549e-06, + "loss": 0.5579, + "step": 5056 + }, + { + "epoch": 0.7451375245579568, + "grad_norm": 0.5937953591346741, + "learning_rate": 4.296990393504622e-06, + "loss": 0.5786, + "step": 5057 + }, + { + "epoch": 0.7452848722986247, + "grad_norm": 0.6282385587692261, + "learning_rate": 4.2967208363019195e-06, + "loss": 0.5997, + "step": 5058 + }, + { + "epoch": 0.7454322200392928, + "grad_norm": 0.606924831867218, + "learning_rate": 4.2964512358889235e-06, + "loss": 0.5603, + "step": 5059 + }, + { + "epoch": 0.7455795677799607, + "grad_norm": 0.5906804800033569, + "learning_rate": 4.2961815922721176e-06, + "loss": 0.5696, + "step": 5060 + }, + { + "epoch": 0.7457269155206286, + "grad_norm": 0.6040922999382019, + "learning_rate": 4.295911905457987e-06, + "loss": 0.5814, + "step": 5061 + }, + { + "epoch": 0.7458742632612967, + "grad_norm": 0.5852967500686646, + "learning_rate": 4.295642175453016e-06, + "loss": 0.594, + "step": 5062 + }, + { + "epoch": 0.7460216110019646, + "grad_norm": 0.589722216129303, + "learning_rate": 4.295372402263694e-06, + "loss": 0.575, + "step": 5063 + }, + { + "epoch": 0.7461689587426326, + "grad_norm": 0.578370213508606, + "learning_rate": 4.295102585896509e-06, + "loss": 0.5672, + "step": 5064 + }, + { + "epoch": 0.7463163064833006, + "grad_norm": 0.5715278387069702, + "learning_rate": 4.294832726357948e-06, + "loss": 0.5601, + "step": 5065 + }, + { + "epoch": 0.7464636542239685, + "grad_norm": 0.5674099326133728, + "learning_rate": 4.294562823654501e-06, + "loss": 0.5562, + "step": 5066 + }, + { + "epoch": 0.7466110019646366, + "grad_norm": 0.5809049606323242, + "learning_rate": 4.29429287779266e-06, + "loss": 0.5642, + "step": 5067 + }, + { + "epoch": 0.7467583497053045, + "grad_norm": 0.6103378534317017, + "learning_rate": 4.294022888778917e-06, + "loss": 0.5773, + "step": 5068 + }, + { + "epoch": 0.7469056974459725, + "grad_norm": 0.6061887145042419, + "learning_rate": 4.293752856619766e-06, + "loss": 0.5519, + "step": 5069 + }, + { + "epoch": 0.7470530451866405, + "grad_norm": 0.6091132164001465, + "learning_rate": 4.293482781321699e-06, + "loss": 0.5488, + "step": 5070 + }, + { + "epoch": 0.7472003929273084, + "grad_norm": 0.5568970441818237, + "learning_rate": 4.293212662891212e-06, + "loss": 0.552, + "step": 5071 + }, + { + "epoch": 0.7473477406679764, + "grad_norm": 0.6124438643455505, + "learning_rate": 4.292942501334802e-06, + "loss": 0.6299, + "step": 5072 + }, + { + "epoch": 0.7474950884086444, + "grad_norm": 0.5640188455581665, + "learning_rate": 4.292672296658966e-06, + "loss": 0.5638, + "step": 5073 + }, + { + "epoch": 0.7476424361493124, + "grad_norm": 0.5807719230651855, + "learning_rate": 4.292402048870202e-06, + "loss": 0.5379, + "step": 5074 + }, + { + "epoch": 0.7477897838899804, + "grad_norm": 0.5849514603614807, + "learning_rate": 4.292131757975009e-06, + "loss": 0.5477, + "step": 5075 + }, + { + "epoch": 0.7479371316306483, + "grad_norm": 0.6478999257087708, + "learning_rate": 4.291861423979888e-06, + "loss": 0.5944, + "step": 5076 + }, + { + "epoch": 0.7480844793713163, + "grad_norm": 0.5882793664932251, + "learning_rate": 4.291591046891341e-06, + "loss": 0.5458, + "step": 5077 + }, + { + "epoch": 0.7482318271119843, + "grad_norm": 0.5792173147201538, + "learning_rate": 4.291320626715868e-06, + "loss": 0.5642, + "step": 5078 + }, + { + "epoch": 0.7483791748526523, + "grad_norm": 0.588499903678894, + "learning_rate": 4.291050163459975e-06, + "loss": 0.577, + "step": 5079 + }, + { + "epoch": 0.7485265225933202, + "grad_norm": 0.6063986420631409, + "learning_rate": 4.290779657130166e-06, + "loss": 0.5923, + "step": 5080 + }, + { + "epoch": 0.7486738703339882, + "grad_norm": 0.5530142188072205, + "learning_rate": 4.290509107732946e-06, + "loss": 0.5906, + "step": 5081 + }, + { + "epoch": 0.7488212180746562, + "grad_norm": 0.5800560712814331, + "learning_rate": 4.290238515274822e-06, + "loss": 0.602, + "step": 5082 + }, + { + "epoch": 0.7489685658153241, + "grad_norm": 0.5849379897117615, + "learning_rate": 4.289967879762301e-06, + "loss": 0.5868, + "step": 5083 + }, + { + "epoch": 0.7491159135559922, + "grad_norm": 0.6246476173400879, + "learning_rate": 4.289697201201892e-06, + "loss": 0.5956, + "step": 5084 + }, + { + "epoch": 0.7492632612966601, + "grad_norm": 0.5747687220573425, + "learning_rate": 4.289426479600104e-06, + "loss": 0.5765, + "step": 5085 + }, + { + "epoch": 0.7494106090373281, + "grad_norm": 0.5828307867050171, + "learning_rate": 4.28915571496345e-06, + "loss": 0.5522, + "step": 5086 + }, + { + "epoch": 0.7495579567779961, + "grad_norm": 0.5841087698936462, + "learning_rate": 4.288884907298439e-06, + "loss": 0.5936, + "step": 5087 + }, + { + "epoch": 0.749705304518664, + "grad_norm": 0.590087890625, + "learning_rate": 4.288614056611585e-06, + "loss": 0.5898, + "step": 5088 + }, + { + "epoch": 0.7498526522593321, + "grad_norm": 0.6047664880752563, + "learning_rate": 4.288343162909403e-06, + "loss": 0.556, + "step": 5089 + }, + { + "epoch": 0.75, + "grad_norm": 0.5679372549057007, + "learning_rate": 4.288072226198405e-06, + "loss": 0.5815, + "step": 5090 + }, + { + "epoch": 0.7501473477406679, + "grad_norm": 0.5584534406661987, + "learning_rate": 4.2878012464851105e-06, + "loss": 0.5681, + "step": 5091 + }, + { + "epoch": 0.750294695481336, + "grad_norm": 0.5706104636192322, + "learning_rate": 4.287530223776034e-06, + "loss": 0.5982, + "step": 5092 + }, + { + "epoch": 0.7504420432220039, + "grad_norm": 0.5835344195365906, + "learning_rate": 4.287259158077693e-06, + "loss": 0.5698, + "step": 5093 + }, + { + "epoch": 0.7505893909626719, + "grad_norm": 0.6651552319526672, + "learning_rate": 4.286988049396609e-06, + "loss": 0.5801, + "step": 5094 + }, + { + "epoch": 0.7507367387033399, + "grad_norm": 0.585995614528656, + "learning_rate": 4.286716897739299e-06, + "loss": 0.5589, + "step": 5095 + }, + { + "epoch": 0.7508840864440078, + "grad_norm": 0.5770377516746521, + "learning_rate": 4.2864457031122865e-06, + "loss": 0.591, + "step": 5096 + }, + { + "epoch": 0.7510314341846759, + "grad_norm": 0.5835183262825012, + "learning_rate": 4.286174465522093e-06, + "loss": 0.5694, + "step": 5097 + }, + { + "epoch": 0.7511787819253438, + "grad_norm": 0.5892894268035889, + "learning_rate": 4.285903184975241e-06, + "loss": 0.5727, + "step": 5098 + }, + { + "epoch": 0.7513261296660118, + "grad_norm": 0.556981086730957, + "learning_rate": 4.285631861478255e-06, + "loss": 0.5881, + "step": 5099 + }, + { + "epoch": 0.7514734774066798, + "grad_norm": 0.5992540121078491, + "learning_rate": 4.28536049503766e-06, + "loss": 0.5758, + "step": 5100 + }, + { + "epoch": 0.7516208251473477, + "grad_norm": 0.5818927884101868, + "learning_rate": 4.285089085659983e-06, + "loss": 0.5428, + "step": 5101 + }, + { + "epoch": 0.7517681728880157, + "grad_norm": 0.5763471722602844, + "learning_rate": 4.28481763335175e-06, + "loss": 0.5706, + "step": 5102 + }, + { + "epoch": 0.7519155206286837, + "grad_norm": 0.5965990424156189, + "learning_rate": 4.284546138119491e-06, + "loss": 0.5423, + "step": 5103 + }, + { + "epoch": 0.7520628683693517, + "grad_norm": 0.5841104388237, + "learning_rate": 4.284274599969734e-06, + "loss": 0.5517, + "step": 5104 + }, + { + "epoch": 0.7522102161100196, + "grad_norm": 0.5575698018074036, + "learning_rate": 4.28400301890901e-06, + "loss": 0.5535, + "step": 5105 + }, + { + "epoch": 0.7523575638506876, + "grad_norm": 0.5878612399101257, + "learning_rate": 4.283731394943849e-06, + "loss": 0.5547, + "step": 5106 + }, + { + "epoch": 0.7525049115913556, + "grad_norm": 0.6107683181762695, + "learning_rate": 4.283459728080785e-06, + "loss": 0.5772, + "step": 5107 + }, + { + "epoch": 0.7526522593320236, + "grad_norm": 0.5784721970558167, + "learning_rate": 4.283188018326352e-06, + "loss": 0.5942, + "step": 5108 + }, + { + "epoch": 0.7527996070726916, + "grad_norm": 0.6270819306373596, + "learning_rate": 4.282916265687083e-06, + "loss": 0.5877, + "step": 5109 + }, + { + "epoch": 0.7529469548133595, + "grad_norm": 0.6024899482727051, + "learning_rate": 4.282644470169514e-06, + "loss": 0.5969, + "step": 5110 + }, + { + "epoch": 0.7530943025540275, + "grad_norm": 0.5646007061004639, + "learning_rate": 4.282372631780182e-06, + "loss": 0.5579, + "step": 5111 + }, + { + "epoch": 0.7532416502946955, + "grad_norm": 0.5910894274711609, + "learning_rate": 4.282100750525623e-06, + "loss": 0.5816, + "step": 5112 + }, + { + "epoch": 0.7533889980353634, + "grad_norm": 0.569368839263916, + "learning_rate": 4.281828826412378e-06, + "loss": 0.5572, + "step": 5113 + }, + { + "epoch": 0.7535363457760315, + "grad_norm": 0.5527791976928711, + "learning_rate": 4.2815568594469845e-06, + "loss": 0.5574, + "step": 5114 + }, + { + "epoch": 0.7536836935166994, + "grad_norm": 0.6289870142936707, + "learning_rate": 4.281284849635985e-06, + "loss": 0.571, + "step": 5115 + }, + { + "epoch": 0.7538310412573674, + "grad_norm": 2.209343910217285, + "learning_rate": 4.2810127969859205e-06, + "loss": 0.5741, + "step": 5116 + }, + { + "epoch": 0.7539783889980354, + "grad_norm": 0.5739281177520752, + "learning_rate": 4.280740701503333e-06, + "loss": 0.5797, + "step": 5117 + }, + { + "epoch": 0.7541257367387033, + "grad_norm": 0.5930657982826233, + "learning_rate": 4.280468563194767e-06, + "loss": 0.5755, + "step": 5118 + }, + { + "epoch": 0.7542730844793714, + "grad_norm": 0.5845992565155029, + "learning_rate": 4.280196382066767e-06, + "loss": 0.5673, + "step": 5119 + }, + { + "epoch": 0.7544204322200393, + "grad_norm": 0.5929595828056335, + "learning_rate": 4.2799241581258785e-06, + "loss": 0.5936, + "step": 5120 + }, + { + "epoch": 0.7545677799607072, + "grad_norm": 0.558774471282959, + "learning_rate": 4.27965189137865e-06, + "loss": 0.5647, + "step": 5121 + }, + { + "epoch": 0.7547151277013753, + "grad_norm": 0.5876796841621399, + "learning_rate": 4.2793795818316275e-06, + "loss": 0.575, + "step": 5122 + }, + { + "epoch": 0.7548624754420432, + "grad_norm": 0.5594955086708069, + "learning_rate": 4.2791072294913605e-06, + "loss": 0.5665, + "step": 5123 + }, + { + "epoch": 0.7550098231827111, + "grad_norm": 0.6456551551818848, + "learning_rate": 4.278834834364399e-06, + "loss": 0.5816, + "step": 5124 + }, + { + "epoch": 0.7551571709233792, + "grad_norm": 0.552578866481781, + "learning_rate": 4.278562396457296e-06, + "loss": 0.5848, + "step": 5125 + }, + { + "epoch": 0.7553045186640471, + "grad_norm": 0.5686046481132507, + "learning_rate": 4.278289915776599e-06, + "loss": 0.5715, + "step": 5126 + }, + { + "epoch": 0.7554518664047152, + "grad_norm": 0.5919221043586731, + "learning_rate": 4.278017392328865e-06, + "loss": 0.5788, + "step": 5127 + }, + { + "epoch": 0.7555992141453831, + "grad_norm": 0.5828486084938049, + "learning_rate": 4.277744826120646e-06, + "loss": 0.5844, + "step": 5128 + }, + { + "epoch": 0.755746561886051, + "grad_norm": 0.6243718862533569, + "learning_rate": 4.277472217158497e-06, + "loss": 0.5821, + "step": 5129 + }, + { + "epoch": 0.7558939096267191, + "grad_norm": 0.5897116661071777, + "learning_rate": 4.277199565448976e-06, + "loss": 0.5407, + "step": 5130 + }, + { + "epoch": 0.756041257367387, + "grad_norm": 0.5729791522026062, + "learning_rate": 4.276926870998639e-06, + "loss": 0.5746, + "step": 5131 + }, + { + "epoch": 0.756188605108055, + "grad_norm": 0.572661280632019, + "learning_rate": 4.276654133814044e-06, + "loss": 0.5918, + "step": 5132 + }, + { + "epoch": 0.756335952848723, + "grad_norm": 0.5834421515464783, + "learning_rate": 4.27638135390175e-06, + "loss": 0.5687, + "step": 5133 + }, + { + "epoch": 0.756483300589391, + "grad_norm": 0.6165165305137634, + "learning_rate": 4.276108531268318e-06, + "loss": 0.5696, + "step": 5134 + }, + { + "epoch": 0.7566306483300589, + "grad_norm": 0.6231330037117004, + "learning_rate": 4.27583566592031e-06, + "loss": 0.557, + "step": 5135 + }, + { + "epoch": 0.7567779960707269, + "grad_norm": 0.6299552321434021, + "learning_rate": 4.275562757864286e-06, + "loss": 0.5775, + "step": 5136 + }, + { + "epoch": 0.7569253438113949, + "grad_norm": 0.5556866526603699, + "learning_rate": 4.27528980710681e-06, + "loss": 0.5359, + "step": 5137 + }, + { + "epoch": 0.7570726915520629, + "grad_norm": 0.5695484280586243, + "learning_rate": 4.275016813654447e-06, + "loss": 0.5568, + "step": 5138 + }, + { + "epoch": 0.7572200392927309, + "grad_norm": 0.5734732151031494, + "learning_rate": 4.2747437775137625e-06, + "loss": 0.5627, + "step": 5139 + }, + { + "epoch": 0.7573673870333988, + "grad_norm": 0.5781581401824951, + "learning_rate": 4.2744706986913234e-06, + "loss": 0.572, + "step": 5140 + }, + { + "epoch": 0.7575147347740668, + "grad_norm": 0.5688138008117676, + "learning_rate": 4.274197577193695e-06, + "loss": 0.5612, + "step": 5141 + }, + { + "epoch": 0.7576620825147348, + "grad_norm": 0.5698906183242798, + "learning_rate": 4.2739244130274475e-06, + "loss": 0.589, + "step": 5142 + }, + { + "epoch": 0.7578094302554027, + "grad_norm": 0.573183000087738, + "learning_rate": 4.273651206199149e-06, + "loss": 0.5803, + "step": 5143 + }, + { + "epoch": 0.7579567779960708, + "grad_norm": 0.5663153529167175, + "learning_rate": 4.273377956715372e-06, + "loss": 0.5492, + "step": 5144 + }, + { + "epoch": 0.7581041257367387, + "grad_norm": 0.5882687568664551, + "learning_rate": 4.2731046645826864e-06, + "loss": 0.5492, + "step": 5145 + }, + { + "epoch": 0.7582514734774067, + "grad_norm": 0.6147663593292236, + "learning_rate": 4.272831329807665e-06, + "loss": 0.5774, + "step": 5146 + }, + { + "epoch": 0.7583988212180747, + "grad_norm": 0.5778620839118958, + "learning_rate": 4.272557952396883e-06, + "loss": 0.5551, + "step": 5147 + }, + { + "epoch": 0.7585461689587426, + "grad_norm": 0.6194990277290344, + "learning_rate": 4.2722845323569125e-06, + "loss": 0.5972, + "step": 5148 + }, + { + "epoch": 0.7586935166994107, + "grad_norm": 0.590305745601654, + "learning_rate": 4.272011069694331e-06, + "loss": 0.6006, + "step": 5149 + }, + { + "epoch": 0.7588408644400786, + "grad_norm": 0.5784345269203186, + "learning_rate": 4.271737564415713e-06, + "loss": 0.5842, + "step": 5150 + }, + { + "epoch": 0.7589882121807465, + "grad_norm": 0.6305643320083618, + "learning_rate": 4.2714640165276384e-06, + "loss": 0.5887, + "step": 5151 + }, + { + "epoch": 0.7591355599214146, + "grad_norm": 0.5483279228210449, + "learning_rate": 4.271190426036685e-06, + "loss": 0.541, + "step": 5152 + }, + { + "epoch": 0.7592829076620825, + "grad_norm": 0.5718250274658203, + "learning_rate": 4.2709167929494324e-06, + "loss": 0.5764, + "step": 5153 + }, + { + "epoch": 0.7594302554027504, + "grad_norm": 0.5681878924369812, + "learning_rate": 4.2706431172724615e-06, + "loss": 0.5932, + "step": 5154 + }, + { + "epoch": 0.7595776031434185, + "grad_norm": 0.56844562292099, + "learning_rate": 4.270369399012355e-06, + "loss": 0.6107, + "step": 5155 + }, + { + "epoch": 0.7597249508840864, + "grad_norm": 0.5833823084831238, + "learning_rate": 4.270095638175695e-06, + "loss": 0.5753, + "step": 5156 + }, + { + "epoch": 0.7598722986247545, + "grad_norm": 0.6039177179336548, + "learning_rate": 4.269821834769063e-06, + "loss": 0.5791, + "step": 5157 + }, + { + "epoch": 0.7600196463654224, + "grad_norm": 0.5968135595321655, + "learning_rate": 4.269547988799047e-06, + "loss": 0.5606, + "step": 5158 + }, + { + "epoch": 0.7601669941060903, + "grad_norm": 0.5998954176902771, + "learning_rate": 4.269274100272233e-06, + "loss": 0.5611, + "step": 5159 + }, + { + "epoch": 0.7603143418467584, + "grad_norm": 0.5581230521202087, + "learning_rate": 4.269000169195206e-06, + "loss": 0.546, + "step": 5160 + }, + { + "epoch": 0.7604616895874263, + "grad_norm": 0.575477123260498, + "learning_rate": 4.268726195574554e-06, + "loss": 0.5347, + "step": 5161 + }, + { + "epoch": 0.7606090373280943, + "grad_norm": 0.5791460275650024, + "learning_rate": 4.268452179416867e-06, + "loss": 0.5689, + "step": 5162 + }, + { + "epoch": 0.7607563850687623, + "grad_norm": 0.5616105198860168, + "learning_rate": 4.268178120728735e-06, + "loss": 0.5637, + "step": 5163 + }, + { + "epoch": 0.7609037328094302, + "grad_norm": 0.57393479347229, + "learning_rate": 4.267904019516748e-06, + "loss": 0.5606, + "step": 5164 + }, + { + "epoch": 0.7610510805500982, + "grad_norm": 0.5582591891288757, + "learning_rate": 4.267629875787499e-06, + "loss": 0.5778, + "step": 5165 + }, + { + "epoch": 0.7611984282907662, + "grad_norm": 0.5560251474380493, + "learning_rate": 4.26735568954758e-06, + "loss": 0.5485, + "step": 5166 + }, + { + "epoch": 0.7613457760314342, + "grad_norm": 0.5710726976394653, + "learning_rate": 4.267081460803586e-06, + "loss": 0.5691, + "step": 5167 + }, + { + "epoch": 0.7614931237721022, + "grad_norm": 0.5677807331085205, + "learning_rate": 4.266807189562111e-06, + "loss": 0.5542, + "step": 5168 + }, + { + "epoch": 0.7616404715127701, + "grad_norm": 0.5890932679176331, + "learning_rate": 4.266532875829753e-06, + "loss": 0.555, + "step": 5169 + }, + { + "epoch": 0.7617878192534381, + "grad_norm": 0.5888049006462097, + "learning_rate": 4.266258519613107e-06, + "loss": 0.5655, + "step": 5170 + }, + { + "epoch": 0.7619351669941061, + "grad_norm": 0.5862941145896912, + "learning_rate": 4.265984120918773e-06, + "loss": 0.5796, + "step": 5171 + }, + { + "epoch": 0.7620825147347741, + "grad_norm": 0.5729769468307495, + "learning_rate": 4.2657096797533474e-06, + "loss": 0.5442, + "step": 5172 + }, + { + "epoch": 0.762229862475442, + "grad_norm": 0.5633658170700073, + "learning_rate": 4.265435196123433e-06, + "loss": 0.5875, + "step": 5173 + }, + { + "epoch": 0.76237721021611, + "grad_norm": 0.5853629112243652, + "learning_rate": 4.2651606700356305e-06, + "loss": 0.5943, + "step": 5174 + }, + { + "epoch": 0.762524557956778, + "grad_norm": 0.5884847044944763, + "learning_rate": 4.264886101496542e-06, + "loss": 0.5812, + "step": 5175 + }, + { + "epoch": 0.7626719056974459, + "grad_norm": 0.5808977484703064, + "learning_rate": 4.26461149051277e-06, + "loss": 0.5693, + "step": 5176 + }, + { + "epoch": 0.762819253438114, + "grad_norm": 0.5694929957389832, + "learning_rate": 4.2643368370909185e-06, + "loss": 0.5948, + "step": 5177 + }, + { + "epoch": 0.7629666011787819, + "grad_norm": 0.5715570449829102, + "learning_rate": 4.264062141237595e-06, + "loss": 0.5468, + "step": 5178 + }, + { + "epoch": 0.76311394891945, + "grad_norm": 0.5887993574142456, + "learning_rate": 4.263787402959404e-06, + "loss": 0.5737, + "step": 5179 + }, + { + "epoch": 0.7632612966601179, + "grad_norm": 0.5662089586257935, + "learning_rate": 4.263512622262953e-06, + "loss": 0.5791, + "step": 5180 + }, + { + "epoch": 0.7634086444007858, + "grad_norm": 0.5449264049530029, + "learning_rate": 4.263237799154851e-06, + "loss": 0.5464, + "step": 5181 + }, + { + "epoch": 0.7635559921414539, + "grad_norm": 0.6032928228378296, + "learning_rate": 4.262962933641707e-06, + "loss": 0.5674, + "step": 5182 + }, + { + "epoch": 0.7637033398821218, + "grad_norm": 0.5596848726272583, + "learning_rate": 4.262688025730131e-06, + "loss": 0.5623, + "step": 5183 + }, + { + "epoch": 0.7638506876227897, + "grad_norm": 0.585213303565979, + "learning_rate": 4.2624130754267345e-06, + "loss": 0.5621, + "step": 5184 + }, + { + "epoch": 0.7639980353634578, + "grad_norm": 0.5950469970703125, + "learning_rate": 4.262138082738131e-06, + "loss": 0.6004, + "step": 5185 + }, + { + "epoch": 0.7641453831041257, + "grad_norm": 0.5766116380691528, + "learning_rate": 4.261863047670932e-06, + "loss": 0.5672, + "step": 5186 + }, + { + "epoch": 0.7642927308447938, + "grad_norm": 0.579006552696228, + "learning_rate": 4.261587970231754e-06, + "loss": 0.5874, + "step": 5187 + }, + { + "epoch": 0.7644400785854617, + "grad_norm": 0.5914785861968994, + "learning_rate": 4.261312850427211e-06, + "loss": 0.5757, + "step": 5188 + }, + { + "epoch": 0.7645874263261296, + "grad_norm": 0.5885526537895203, + "learning_rate": 4.26103768826392e-06, + "loss": 0.5444, + "step": 5189 + }, + { + "epoch": 0.7647347740667977, + "grad_norm": 0.5888834595680237, + "learning_rate": 4.260762483748499e-06, + "loss": 0.5795, + "step": 5190 + }, + { + "epoch": 0.7648821218074656, + "grad_norm": 0.5978228449821472, + "learning_rate": 4.260487236887566e-06, + "loss": 0.5594, + "step": 5191 + }, + { + "epoch": 0.7650294695481336, + "grad_norm": 0.5721073746681213, + "learning_rate": 4.2602119476877405e-06, + "loss": 0.5547, + "step": 5192 + }, + { + "epoch": 0.7651768172888016, + "grad_norm": 0.5721637010574341, + "learning_rate": 4.2599366161556435e-06, + "loss": 0.5787, + "step": 5193 + }, + { + "epoch": 0.7653241650294695, + "grad_norm": 0.5860452651977539, + "learning_rate": 4.259661242297895e-06, + "loss": 0.5845, + "step": 5194 + }, + { + "epoch": 0.7654715127701375, + "grad_norm": 0.5690872669219971, + "learning_rate": 4.259385826121121e-06, + "loss": 0.5949, + "step": 5195 + }, + { + "epoch": 0.7656188605108055, + "grad_norm": 0.5810747146606445, + "learning_rate": 4.259110367631942e-06, + "loss": 0.5417, + "step": 5196 + }, + { + "epoch": 0.7657662082514735, + "grad_norm": 0.5690611600875854, + "learning_rate": 4.258834866836984e-06, + "loss": 0.5632, + "step": 5197 + }, + { + "epoch": 0.7659135559921415, + "grad_norm": 0.582888126373291, + "learning_rate": 4.258559323742873e-06, + "loss": 0.5758, + "step": 5198 + }, + { + "epoch": 0.7660609037328094, + "grad_norm": 0.5708674788475037, + "learning_rate": 4.258283738356234e-06, + "loss": 0.5783, + "step": 5199 + }, + { + "epoch": 0.7662082514734774, + "grad_norm": 0.5495302081108093, + "learning_rate": 4.258008110683696e-06, + "loss": 0.5364, + "step": 5200 + }, + { + "epoch": 0.7663555992141454, + "grad_norm": 0.6021237969398499, + "learning_rate": 4.257732440731888e-06, + "loss": 0.5685, + "step": 5201 + }, + { + "epoch": 0.7665029469548134, + "grad_norm": 0.5978443622589111, + "learning_rate": 4.257456728507438e-06, + "loss": 0.5856, + "step": 5202 + }, + { + "epoch": 0.7666502946954813, + "grad_norm": 0.5717548727989197, + "learning_rate": 4.257180974016979e-06, + "loss": 0.5636, + "step": 5203 + }, + { + "epoch": 0.7667976424361493, + "grad_norm": 0.5934511423110962, + "learning_rate": 4.256905177267142e-06, + "loss": 0.567, + "step": 5204 + }, + { + "epoch": 0.7669449901768173, + "grad_norm": 0.610914945602417, + "learning_rate": 4.256629338264558e-06, + "loss": 0.5444, + "step": 5205 + }, + { + "epoch": 0.7670923379174852, + "grad_norm": 0.5819429159164429, + "learning_rate": 4.2563534570158646e-06, + "loss": 0.5541, + "step": 5206 + }, + { + "epoch": 0.7672396856581533, + "grad_norm": 0.5931657552719116, + "learning_rate": 4.2560775335276926e-06, + "loss": 0.5812, + "step": 5207 + }, + { + "epoch": 0.7673870333988212, + "grad_norm": 0.6156556010246277, + "learning_rate": 4.2558015678066806e-06, + "loss": 0.5664, + "step": 5208 + }, + { + "epoch": 0.7675343811394892, + "grad_norm": 0.6028849482536316, + "learning_rate": 4.255525559859463e-06, + "loss": 0.5466, + "step": 5209 + }, + { + "epoch": 0.7676817288801572, + "grad_norm": 0.5593896508216858, + "learning_rate": 4.255249509692681e-06, + "loss": 0.5492, + "step": 5210 + }, + { + "epoch": 0.7678290766208251, + "grad_norm": 0.5733510255813599, + "learning_rate": 4.2549734173129706e-06, + "loss": 0.5748, + "step": 5211 + }, + { + "epoch": 0.7679764243614932, + "grad_norm": 0.6102385520935059, + "learning_rate": 4.254697282726972e-06, + "loss": 0.5451, + "step": 5212 + }, + { + "epoch": 0.7681237721021611, + "grad_norm": 0.58672696352005, + "learning_rate": 4.254421105941327e-06, + "loss": 0.5455, + "step": 5213 + }, + { + "epoch": 0.768271119842829, + "grad_norm": 0.5905429720878601, + "learning_rate": 4.254144886962678e-06, + "loss": 0.5299, + "step": 5214 + }, + { + "epoch": 0.7684184675834971, + "grad_norm": 0.5806798934936523, + "learning_rate": 4.253868625797667e-06, + "loss": 0.5531, + "step": 5215 + }, + { + "epoch": 0.768565815324165, + "grad_norm": 0.5800710320472717, + "learning_rate": 4.253592322452938e-06, + "loss": 0.5523, + "step": 5216 + }, + { + "epoch": 0.7687131630648331, + "grad_norm": 0.596091628074646, + "learning_rate": 4.253315976935136e-06, + "loss": 0.5802, + "step": 5217 + }, + { + "epoch": 0.768860510805501, + "grad_norm": 0.5828782320022583, + "learning_rate": 4.253039589250908e-06, + "loss": 0.5644, + "step": 5218 + }, + { + "epoch": 0.7690078585461689, + "grad_norm": 0.5877309441566467, + "learning_rate": 4.2527631594068994e-06, + "loss": 0.5615, + "step": 5219 + }, + { + "epoch": 0.769155206286837, + "grad_norm": 0.5898804068565369, + "learning_rate": 4.252486687409759e-06, + "loss": 0.5158, + "step": 5220 + }, + { + "epoch": 0.7693025540275049, + "grad_norm": 0.5838584303855896, + "learning_rate": 4.252210173266135e-06, + "loss": 0.5939, + "step": 5221 + }, + { + "epoch": 0.7694499017681729, + "grad_norm": 0.5555184483528137, + "learning_rate": 4.251933616982679e-06, + "loss": 0.5373, + "step": 5222 + }, + { + "epoch": 0.7695972495088409, + "grad_norm": 0.5968228578567505, + "learning_rate": 4.251657018566041e-06, + "loss": 0.5865, + "step": 5223 + }, + { + "epoch": 0.7697445972495088, + "grad_norm": 0.5634402632713318, + "learning_rate": 4.2513803780228735e-06, + "loss": 0.5662, + "step": 5224 + }, + { + "epoch": 0.7698919449901768, + "grad_norm": 0.6147152781486511, + "learning_rate": 4.251103695359829e-06, + "loss": 0.5643, + "step": 5225 + }, + { + "epoch": 0.7700392927308448, + "grad_norm": 0.6147010326385498, + "learning_rate": 4.250826970583563e-06, + "loss": 0.5449, + "step": 5226 + }, + { + "epoch": 0.7701866404715128, + "grad_norm": 0.576544463634491, + "learning_rate": 4.250550203700728e-06, + "loss": 0.5638, + "step": 5227 + }, + { + "epoch": 0.7703339882121808, + "grad_norm": 0.5774623155593872, + "learning_rate": 4.250273394717982e-06, + "loss": 0.5706, + "step": 5228 + }, + { + "epoch": 0.7704813359528487, + "grad_norm": 0.5702281594276428, + "learning_rate": 4.249996543641982e-06, + "loss": 0.5117, + "step": 5229 + }, + { + "epoch": 0.7706286836935167, + "grad_norm": 0.5963106751441956, + "learning_rate": 4.249719650479386e-06, + "loss": 0.5582, + "step": 5230 + }, + { + "epoch": 0.7707760314341847, + "grad_norm": 0.5722272396087646, + "learning_rate": 4.249442715236854e-06, + "loss": 0.588, + "step": 5231 + }, + { + "epoch": 0.7709233791748527, + "grad_norm": 0.732505202293396, + "learning_rate": 4.249165737921044e-06, + "loss": 0.5616, + "step": 5232 + }, + { + "epoch": 0.7710707269155206, + "grad_norm": 0.5787243843078613, + "learning_rate": 4.248888718538618e-06, + "loss": 0.5762, + "step": 5233 + }, + { + "epoch": 0.7712180746561886, + "grad_norm": 0.5853893160820007, + "learning_rate": 4.2486116570962396e-06, + "loss": 0.5898, + "step": 5234 + }, + { + "epoch": 0.7713654223968566, + "grad_norm": 0.5772911906242371, + "learning_rate": 4.248334553600571e-06, + "loss": 0.5525, + "step": 5235 + }, + { + "epoch": 0.7715127701375245, + "grad_norm": 0.5533375144004822, + "learning_rate": 4.2480574080582756e-06, + "loss": 0.5609, + "step": 5236 + }, + { + "epoch": 0.7716601178781926, + "grad_norm": 0.5914943218231201, + "learning_rate": 4.24778022047602e-06, + "loss": 0.566, + "step": 5237 + }, + { + "epoch": 0.7718074656188605, + "grad_norm": 0.5632745623588562, + "learning_rate": 4.247502990860471e-06, + "loss": 0.5481, + "step": 5238 + }, + { + "epoch": 0.7719548133595285, + "grad_norm": 0.5727052092552185, + "learning_rate": 4.247225719218293e-06, + "loss": 0.5184, + "step": 5239 + }, + { + "epoch": 0.7721021611001965, + "grad_norm": 0.5872175693511963, + "learning_rate": 4.246948405556156e-06, + "loss": 0.5221, + "step": 5240 + }, + { + "epoch": 0.7722495088408644, + "grad_norm": 0.5503101944923401, + "learning_rate": 4.2466710498807294e-06, + "loss": 0.5721, + "step": 5241 + }, + { + "epoch": 0.7723968565815325, + "grad_norm": 0.5947669148445129, + "learning_rate": 4.246393652198683e-06, + "loss": 0.5738, + "step": 5242 + }, + { + "epoch": 0.7725442043222004, + "grad_norm": 0.6190189719200134, + "learning_rate": 4.24611621251669e-06, + "loss": 0.547, + "step": 5243 + }, + { + "epoch": 0.7726915520628683, + "grad_norm": 0.5849734544754028, + "learning_rate": 4.24583873084142e-06, + "loss": 0.5612, + "step": 5244 + }, + { + "epoch": 0.7728388998035364, + "grad_norm": 0.6079813838005066, + "learning_rate": 4.245561207179547e-06, + "loss": 0.5637, + "step": 5245 + }, + { + "epoch": 0.7729862475442043, + "grad_norm": 0.5803741216659546, + "learning_rate": 4.245283641537747e-06, + "loss": 0.575, + "step": 5246 + }, + { + "epoch": 0.7731335952848722, + "grad_norm": 0.5982162952423096, + "learning_rate": 4.245006033922693e-06, + "loss": 0.5921, + "step": 5247 + }, + { + "epoch": 0.7732809430255403, + "grad_norm": 0.6322979927062988, + "learning_rate": 4.244728384341061e-06, + "loss": 0.5743, + "step": 5248 + }, + { + "epoch": 0.7734282907662082, + "grad_norm": 0.5756106376647949, + "learning_rate": 4.244450692799532e-06, + "loss": 0.5806, + "step": 5249 + }, + { + "epoch": 0.7735756385068763, + "grad_norm": 0.5571581721305847, + "learning_rate": 4.244172959304781e-06, + "loss": 0.5691, + "step": 5250 + }, + { + "epoch": 0.7737229862475442, + "grad_norm": 0.6487487554550171, + "learning_rate": 4.243895183863488e-06, + "loss": 0.5731, + "step": 5251 + }, + { + "epoch": 0.7738703339882121, + "grad_norm": 0.5783430933952332, + "learning_rate": 4.2436173664823345e-06, + "loss": 0.5503, + "step": 5252 + }, + { + "epoch": 0.7740176817288802, + "grad_norm": 0.5845322012901306, + "learning_rate": 4.243339507168001e-06, + "loss": 0.5536, + "step": 5253 + }, + { + "epoch": 0.7741650294695481, + "grad_norm": 0.6070813536643982, + "learning_rate": 4.24306160592717e-06, + "loss": 0.582, + "step": 5254 + }, + { + "epoch": 0.7743123772102161, + "grad_norm": 0.5717104077339172, + "learning_rate": 4.2427836627665255e-06, + "loss": 0.5916, + "step": 5255 + }, + { + "epoch": 0.7744597249508841, + "grad_norm": 0.5930984616279602, + "learning_rate": 4.24250567769275e-06, + "loss": 0.5705, + "step": 5256 + }, + { + "epoch": 0.774607072691552, + "grad_norm": 0.6232183575630188, + "learning_rate": 4.242227650712532e-06, + "loss": 0.555, + "step": 5257 + }, + { + "epoch": 0.7747544204322201, + "grad_norm": 0.5741205215454102, + "learning_rate": 4.241949581832554e-06, + "loss": 0.5783, + "step": 5258 + }, + { + "epoch": 0.774901768172888, + "grad_norm": 0.5963667035102844, + "learning_rate": 4.2416714710595065e-06, + "loss": 0.5717, + "step": 5259 + }, + { + "epoch": 0.775049115913556, + "grad_norm": 0.5600559115409851, + "learning_rate": 4.241393318400078e-06, + "loss": 0.5919, + "step": 5260 + }, + { + "epoch": 0.775196463654224, + "grad_norm": 0.5732530355453491, + "learning_rate": 4.241115123860956e-06, + "loss": 0.5571, + "step": 5261 + }, + { + "epoch": 0.775343811394892, + "grad_norm": 0.5635586977005005, + "learning_rate": 4.2408368874488315e-06, + "loss": 0.5306, + "step": 5262 + }, + { + "epoch": 0.7754911591355599, + "grad_norm": 0.5559437274932861, + "learning_rate": 4.240558609170397e-06, + "loss": 0.5619, + "step": 5263 + }, + { + "epoch": 0.7756385068762279, + "grad_norm": 0.5735060572624207, + "learning_rate": 4.240280289032345e-06, + "loss": 0.554, + "step": 5264 + }, + { + "epoch": 0.7757858546168959, + "grad_norm": 0.5800585746765137, + "learning_rate": 4.240001927041367e-06, + "loss": 0.5897, + "step": 5265 + }, + { + "epoch": 0.7759332023575638, + "grad_norm": 0.6065037846565247, + "learning_rate": 4.239723523204159e-06, + "loss": 0.589, + "step": 5266 + }, + { + "epoch": 0.7760805500982318, + "grad_norm": 0.6189467906951904, + "learning_rate": 4.2394450775274165e-06, + "loss": 0.5795, + "step": 5267 + }, + { + "epoch": 0.7762278978388998, + "grad_norm": 0.5545510053634644, + "learning_rate": 4.239166590017836e-06, + "loss": 0.5316, + "step": 5268 + }, + { + "epoch": 0.7763752455795678, + "grad_norm": 0.6148989796638489, + "learning_rate": 4.2388880606821144e-06, + "loss": 0.5716, + "step": 5269 + }, + { + "epoch": 0.7765225933202358, + "grad_norm": 0.5826708078384399, + "learning_rate": 4.238609489526951e-06, + "loss": 0.5615, + "step": 5270 + }, + { + "epoch": 0.7766699410609037, + "grad_norm": 0.5715269446372986, + "learning_rate": 4.238330876559044e-06, + "loss": 0.5658, + "step": 5271 + }, + { + "epoch": 0.7768172888015717, + "grad_norm": 0.6113008260726929, + "learning_rate": 4.238052221785095e-06, + "loss": 0.5711, + "step": 5272 + }, + { + "epoch": 0.7769646365422397, + "grad_norm": 0.6077571511268616, + "learning_rate": 4.2377735252118056e-06, + "loss": 0.5939, + "step": 5273 + }, + { + "epoch": 0.7771119842829076, + "grad_norm": 0.5571595430374146, + "learning_rate": 4.2374947868458785e-06, + "loss": 0.5498, + "step": 5274 + }, + { + "epoch": 0.7772593320235757, + "grad_norm": 0.5743693113327026, + "learning_rate": 4.237216006694015e-06, + "loss": 0.5363, + "step": 5275 + }, + { + "epoch": 0.7774066797642436, + "grad_norm": 0.5930222868919373, + "learning_rate": 4.236937184762923e-06, + "loss": 0.5373, + "step": 5276 + }, + { + "epoch": 0.7775540275049115, + "grad_norm": 0.572381854057312, + "learning_rate": 4.236658321059306e-06, + "loss": 0.568, + "step": 5277 + }, + { + "epoch": 0.7777013752455796, + "grad_norm": 0.5696147680282593, + "learning_rate": 4.2363794155898705e-06, + "loss": 0.5472, + "step": 5278 + }, + { + "epoch": 0.7778487229862475, + "grad_norm": 0.5849428176879883, + "learning_rate": 4.236100468361325e-06, + "loss": 0.5348, + "step": 5279 + }, + { + "epoch": 0.7779960707269156, + "grad_norm": 0.5930163264274597, + "learning_rate": 4.235821479380376e-06, + "loss": 0.5593, + "step": 5280 + }, + { + "epoch": 0.7781434184675835, + "grad_norm": 0.5876350998878479, + "learning_rate": 4.235542448653737e-06, + "loss": 0.588, + "step": 5281 + }, + { + "epoch": 0.7782907662082514, + "grad_norm": 0.5700976848602295, + "learning_rate": 4.235263376188114e-06, + "loss": 0.552, + "step": 5282 + }, + { + "epoch": 0.7784381139489195, + "grad_norm": 0.5829318165779114, + "learning_rate": 4.234984261990222e-06, + "loss": 0.552, + "step": 5283 + }, + { + "epoch": 0.7785854616895874, + "grad_norm": 0.5664739608764648, + "learning_rate": 4.2347051060667724e-06, + "loss": 0.5709, + "step": 5284 + }, + { + "epoch": 0.7787328094302554, + "grad_norm": 0.5769188404083252, + "learning_rate": 4.234425908424477e-06, + "loss": 0.5756, + "step": 5285 + }, + { + "epoch": 0.7788801571709234, + "grad_norm": 0.6189342737197876, + "learning_rate": 4.2341466690700535e-06, + "loss": 0.5828, + "step": 5286 + }, + { + "epoch": 0.7790275049115913, + "grad_norm": 0.5721401572227478, + "learning_rate": 4.233867388010215e-06, + "loss": 0.5537, + "step": 5287 + }, + { + "epoch": 0.7791748526522594, + "grad_norm": 0.5834298729896545, + "learning_rate": 4.2335880652516796e-06, + "loss": 0.5402, + "step": 5288 + }, + { + "epoch": 0.7793222003929273, + "grad_norm": 0.553033173084259, + "learning_rate": 4.233308700801164e-06, + "loss": 0.5337, + "step": 5289 + }, + { + "epoch": 0.7794695481335953, + "grad_norm": 0.5967013239860535, + "learning_rate": 4.233029294665388e-06, + "loss": 0.5601, + "step": 5290 + }, + { + "epoch": 0.7796168958742633, + "grad_norm": 0.5599421262741089, + "learning_rate": 4.23274984685107e-06, + "loss": 0.559, + "step": 5291 + }, + { + "epoch": 0.7797642436149312, + "grad_norm": 0.5700132846832275, + "learning_rate": 4.23247035736493e-06, + "loss": 0.5676, + "step": 5292 + }, + { + "epoch": 0.7799115913555992, + "grad_norm": 0.5716684460639954, + "learning_rate": 4.2321908262136915e-06, + "loss": 0.5402, + "step": 5293 + }, + { + "epoch": 0.7800589390962672, + "grad_norm": 0.5678990483283997, + "learning_rate": 4.2319112534040755e-06, + "loss": 0.5338, + "step": 5294 + }, + { + "epoch": 0.7802062868369352, + "grad_norm": 0.6128599047660828, + "learning_rate": 4.231631638942806e-06, + "loss": 0.5859, + "step": 5295 + }, + { + "epoch": 0.7803536345776031, + "grad_norm": 0.567688524723053, + "learning_rate": 4.231351982836608e-06, + "loss": 0.5796, + "step": 5296 + }, + { + "epoch": 0.7805009823182711, + "grad_norm": 0.5614991784095764, + "learning_rate": 4.231072285092207e-06, + "loss": 0.5812, + "step": 5297 + }, + { + "epoch": 0.7806483300589391, + "grad_norm": 0.5749772787094116, + "learning_rate": 4.2307925457163304e-06, + "loss": 0.5586, + "step": 5298 + }, + { + "epoch": 0.7807956777996071, + "grad_norm": 0.5888197422027588, + "learning_rate": 4.2305127647157036e-06, + "loss": 0.5715, + "step": 5299 + }, + { + "epoch": 0.7809430255402751, + "grad_norm": 0.5787168741226196, + "learning_rate": 4.2302329420970564e-06, + "loss": 0.5379, + "step": 5300 + }, + { + "epoch": 0.781090373280943, + "grad_norm": 0.607259213924408, + "learning_rate": 4.22995307786712e-06, + "loss": 0.5711, + "step": 5301 + }, + { + "epoch": 0.781237721021611, + "grad_norm": 0.5712140798568726, + "learning_rate": 4.229673172032621e-06, + "loss": 0.57, + "step": 5302 + }, + { + "epoch": 0.781385068762279, + "grad_norm": 0.6029157042503357, + "learning_rate": 4.229393224600295e-06, + "loss": 0.6005, + "step": 5303 + }, + { + "epoch": 0.7815324165029469, + "grad_norm": 0.5612097382545471, + "learning_rate": 4.229113235576872e-06, + "loss": 0.5387, + "step": 5304 + }, + { + "epoch": 0.781679764243615, + "grad_norm": 0.597843587398529, + "learning_rate": 4.2288332049690885e-06, + "loss": 0.5388, + "step": 5305 + }, + { + "epoch": 0.7818271119842829, + "grad_norm": 0.5855188965797424, + "learning_rate": 4.228553132783677e-06, + "loss": 0.5391, + "step": 5306 + }, + { + "epoch": 0.7819744597249508, + "grad_norm": 0.5924221873283386, + "learning_rate": 4.228273019027372e-06, + "loss": 0.5874, + "step": 5307 + }, + { + "epoch": 0.7821218074656189, + "grad_norm": 0.5934986472129822, + "learning_rate": 4.227992863706913e-06, + "loss": 0.577, + "step": 5308 + }, + { + "epoch": 0.7822691552062868, + "grad_norm": 0.598666250705719, + "learning_rate": 4.2277126668290345e-06, + "loss": 0.5345, + "step": 5309 + }, + { + "epoch": 0.7824165029469549, + "grad_norm": 0.5814545750617981, + "learning_rate": 4.227432428400478e-06, + "loss": 0.563, + "step": 5310 + }, + { + "epoch": 0.7825638506876228, + "grad_norm": 0.5939489006996155, + "learning_rate": 4.227152148427981e-06, + "loss": 0.5449, + "step": 5311 + }, + { + "epoch": 0.7827111984282907, + "grad_norm": 0.5568521618843079, + "learning_rate": 4.226871826918285e-06, + "loss": 0.5715, + "step": 5312 + }, + { + "epoch": 0.7828585461689588, + "grad_norm": 0.5502527356147766, + "learning_rate": 4.226591463878132e-06, + "loss": 0.5882, + "step": 5313 + }, + { + "epoch": 0.7830058939096267, + "grad_norm": 0.6041135787963867, + "learning_rate": 4.2263110593142646e-06, + "loss": 0.5409, + "step": 5314 + }, + { + "epoch": 0.7831532416502947, + "grad_norm": 0.579203188419342, + "learning_rate": 4.226030613233425e-06, + "loss": 0.579, + "step": 5315 + }, + { + "epoch": 0.7833005893909627, + "grad_norm": 0.575016438961029, + "learning_rate": 4.225750125642359e-06, + "loss": 0.5626, + "step": 5316 + }, + { + "epoch": 0.7834479371316306, + "grad_norm": 0.5868917107582092, + "learning_rate": 4.2254695965478125e-06, + "loss": 0.6029, + "step": 5317 + }, + { + "epoch": 0.7835952848722986, + "grad_norm": 0.5565376877784729, + "learning_rate": 4.22518902595653e-06, + "loss": 0.5755, + "step": 5318 + }, + { + "epoch": 0.7837426326129666, + "grad_norm": 0.5894243717193604, + "learning_rate": 4.224908413875262e-06, + "loss": 0.5466, + "step": 5319 + }, + { + "epoch": 0.7838899803536346, + "grad_norm": 0.6049026250839233, + "learning_rate": 4.2246277603107544e-06, + "loss": 0.5766, + "step": 5320 + }, + { + "epoch": 0.7840373280943026, + "grad_norm": 0.5770626068115234, + "learning_rate": 4.224347065269759e-06, + "loss": 0.5542, + "step": 5321 + }, + { + "epoch": 0.7841846758349705, + "grad_norm": 0.5822217464447021, + "learning_rate": 4.224066328759025e-06, + "loss": 0.5395, + "step": 5322 + }, + { + "epoch": 0.7843320235756385, + "grad_norm": 0.5548313856124878, + "learning_rate": 4.223785550785304e-06, + "loss": 0.5792, + "step": 5323 + }, + { + "epoch": 0.7844793713163065, + "grad_norm": 0.5857640504837036, + "learning_rate": 4.2235047313553504e-06, + "loss": 0.5781, + "step": 5324 + }, + { + "epoch": 0.7846267190569745, + "grad_norm": 0.5734198689460754, + "learning_rate": 4.223223870475916e-06, + "loss": 0.5798, + "step": 5325 + }, + { + "epoch": 0.7847740667976424, + "grad_norm": 0.5780408382415771, + "learning_rate": 4.222942968153755e-06, + "loss": 0.5503, + "step": 5326 + }, + { + "epoch": 0.7849214145383104, + "grad_norm": 0.5982962250709534, + "learning_rate": 4.222662024395624e-06, + "loss": 0.5515, + "step": 5327 + }, + { + "epoch": 0.7850687622789784, + "grad_norm": 0.5665527582168579, + "learning_rate": 4.222381039208279e-06, + "loss": 0.6017, + "step": 5328 + }, + { + "epoch": 0.7852161100196464, + "grad_norm": 0.5736485719680786, + "learning_rate": 4.222100012598479e-06, + "loss": 0.544, + "step": 5329 + }, + { + "epoch": 0.7853634577603144, + "grad_norm": 0.571587085723877, + "learning_rate": 4.221818944572981e-06, + "loss": 0.5757, + "step": 5330 + }, + { + "epoch": 0.7855108055009823, + "grad_norm": 0.5641197562217712, + "learning_rate": 4.2215378351385435e-06, + "loss": 0.563, + "step": 5331 + }, + { + "epoch": 0.7856581532416503, + "grad_norm": 0.5736072659492493, + "learning_rate": 4.2212566843019305e-06, + "loss": 0.5437, + "step": 5332 + }, + { + "epoch": 0.7858055009823183, + "grad_norm": 0.5753357410430908, + "learning_rate": 4.220975492069901e-06, + "loss": 0.5556, + "step": 5333 + }, + { + "epoch": 0.7859528487229862, + "grad_norm": 0.5830897688865662, + "learning_rate": 4.220694258449217e-06, + "loss": 0.5696, + "step": 5334 + }, + { + "epoch": 0.7861001964636543, + "grad_norm": 0.5884495377540588, + "learning_rate": 4.220412983446644e-06, + "loss": 0.5873, + "step": 5335 + }, + { + "epoch": 0.7862475442043222, + "grad_norm": 0.5994848608970642, + "learning_rate": 4.220131667068946e-06, + "loss": 0.5715, + "step": 5336 + }, + { + "epoch": 0.7863948919449901, + "grad_norm": 0.5854806303977966, + "learning_rate": 4.219850309322888e-06, + "loss": 0.5629, + "step": 5337 + }, + { + "epoch": 0.7865422396856582, + "grad_norm": 0.5660422444343567, + "learning_rate": 4.219568910215236e-06, + "loss": 0.5567, + "step": 5338 + }, + { + "epoch": 0.7866895874263261, + "grad_norm": 0.550727367401123, + "learning_rate": 4.219287469752759e-06, + "loss": 0.5581, + "step": 5339 + }, + { + "epoch": 0.7868369351669942, + "grad_norm": 0.5961816310882568, + "learning_rate": 4.219005987942224e-06, + "loss": 0.5644, + "step": 5340 + }, + { + "epoch": 0.7869842829076621, + "grad_norm": 0.5932006239891052, + "learning_rate": 4.218724464790403e-06, + "loss": 0.5562, + "step": 5341 + }, + { + "epoch": 0.78713163064833, + "grad_norm": 0.5684391260147095, + "learning_rate": 4.218442900304063e-06, + "loss": 0.5502, + "step": 5342 + }, + { + "epoch": 0.7872789783889981, + "grad_norm": 0.5716351270675659, + "learning_rate": 4.218161294489977e-06, + "loss": 0.5476, + "step": 5343 + }, + { + "epoch": 0.787426326129666, + "grad_norm": 0.5911459922790527, + "learning_rate": 4.217879647354919e-06, + "loss": 0.5396, + "step": 5344 + }, + { + "epoch": 0.787573673870334, + "grad_norm": 0.5571990609169006, + "learning_rate": 4.21759795890566e-06, + "loss": 0.5876, + "step": 5345 + }, + { + "epoch": 0.787721021611002, + "grad_norm": 0.5747010111808777, + "learning_rate": 4.217316229148978e-06, + "loss": 0.5458, + "step": 5346 + }, + { + "epoch": 0.7878683693516699, + "grad_norm": 0.5918692946434021, + "learning_rate": 4.217034458091644e-06, + "loss": 0.5388, + "step": 5347 + }, + { + "epoch": 0.7880157170923379, + "grad_norm": 0.6022171974182129, + "learning_rate": 4.216752645740437e-06, + "loss": 0.5826, + "step": 5348 + }, + { + "epoch": 0.7881630648330059, + "grad_norm": 0.5530702471733093, + "learning_rate": 4.2164707921021345e-06, + "loss": 0.5351, + "step": 5349 + }, + { + "epoch": 0.7883104125736738, + "grad_norm": 0.5827894806861877, + "learning_rate": 4.216188897183514e-06, + "loss": 0.5428, + "step": 5350 + }, + { + "epoch": 0.7884577603143419, + "grad_norm": 0.6207494735717773, + "learning_rate": 4.215906960991356e-06, + "loss": 0.5686, + "step": 5351 + }, + { + "epoch": 0.7886051080550098, + "grad_norm": 0.5821472406387329, + "learning_rate": 4.21562498353244e-06, + "loss": 0.5528, + "step": 5352 + }, + { + "epoch": 0.7887524557956778, + "grad_norm": 0.6058489680290222, + "learning_rate": 4.215342964813548e-06, + "loss": 0.5947, + "step": 5353 + }, + { + "epoch": 0.7888998035363458, + "grad_norm": 0.5728542804718018, + "learning_rate": 4.215060904841462e-06, + "loss": 0.5588, + "step": 5354 + }, + { + "epoch": 0.7890471512770137, + "grad_norm": 0.6246836185455322, + "learning_rate": 4.214778803622967e-06, + "loss": 0.5861, + "step": 5355 + }, + { + "epoch": 0.7891944990176817, + "grad_norm": 0.5999270677566528, + "learning_rate": 4.214496661164844e-06, + "loss": 0.5957, + "step": 5356 + }, + { + "epoch": 0.7893418467583497, + "grad_norm": 0.5607271790504456, + "learning_rate": 4.214214477473881e-06, + "loss": 0.5484, + "step": 5357 + }, + { + "epoch": 0.7894891944990177, + "grad_norm": 0.5870435237884521, + "learning_rate": 4.213932252556864e-06, + "loss": 0.5619, + "step": 5358 + }, + { + "epoch": 0.7896365422396857, + "grad_norm": 0.5961217284202576, + "learning_rate": 4.21364998642058e-06, + "loss": 0.5381, + "step": 5359 + }, + { + "epoch": 0.7897838899803536, + "grad_norm": 0.5900673270225525, + "learning_rate": 4.213367679071819e-06, + "loss": 0.5691, + "step": 5360 + }, + { + "epoch": 0.7899312377210216, + "grad_norm": 0.556176483631134, + "learning_rate": 4.213085330517367e-06, + "loss": 0.5474, + "step": 5361 + }, + { + "epoch": 0.7900785854616896, + "grad_norm": 0.6135327219963074, + "learning_rate": 4.2128029407640184e-06, + "loss": 0.5559, + "step": 5362 + }, + { + "epoch": 0.7902259332023576, + "grad_norm": 0.6078706383705139, + "learning_rate": 4.2125205098185604e-06, + "loss": 0.5617, + "step": 5363 + }, + { + "epoch": 0.7903732809430255, + "grad_norm": 0.5856319069862366, + "learning_rate": 4.212238037687789e-06, + "loss": 0.5463, + "step": 5364 + }, + { + "epoch": 0.7905206286836935, + "grad_norm": 0.5859596729278564, + "learning_rate": 4.211955524378495e-06, + "loss": 0.5179, + "step": 5365 + }, + { + "epoch": 0.7906679764243615, + "grad_norm": 0.5763766169548035, + "learning_rate": 4.211672969897473e-06, + "loss": 0.5701, + "step": 5366 + }, + { + "epoch": 0.7908153241650294, + "grad_norm": 0.7911742925643921, + "learning_rate": 4.21139037425152e-06, + "loss": 0.5727, + "step": 5367 + }, + { + "epoch": 0.7909626719056975, + "grad_norm": 0.5944916009902954, + "learning_rate": 4.2111077374474306e-06, + "loss": 0.5879, + "step": 5368 + }, + { + "epoch": 0.7911100196463654, + "grad_norm": 0.5821462273597717, + "learning_rate": 4.210825059492003e-06, + "loss": 0.5378, + "step": 5369 + }, + { + "epoch": 0.7912573673870335, + "grad_norm": 0.6192207336425781, + "learning_rate": 4.210542340392034e-06, + "loss": 0.5574, + "step": 5370 + }, + { + "epoch": 0.7914047151277014, + "grad_norm": 0.567951500415802, + "learning_rate": 4.210259580154326e-06, + "loss": 0.5829, + "step": 5371 + }, + { + "epoch": 0.7915520628683693, + "grad_norm": 0.5643907189369202, + "learning_rate": 4.209976778785676e-06, + "loss": 0.5681, + "step": 5372 + }, + { + "epoch": 0.7916994106090374, + "grad_norm": 0.6120558381080627, + "learning_rate": 4.209693936292887e-06, + "loss": 0.5985, + "step": 5373 + }, + { + "epoch": 0.7918467583497053, + "grad_norm": 0.609737753868103, + "learning_rate": 4.20941105268276e-06, + "loss": 0.6075, + "step": 5374 + }, + { + "epoch": 0.7919941060903732, + "grad_norm": 0.5799480080604553, + "learning_rate": 4.2091281279621e-06, + "loss": 0.564, + "step": 5375 + }, + { + "epoch": 0.7921414538310413, + "grad_norm": 0.6021174192428589, + "learning_rate": 4.208845162137709e-06, + "loss": 0.5875, + "step": 5376 + }, + { + "epoch": 0.7922888015717092, + "grad_norm": 0.579852819442749, + "learning_rate": 4.208562155216394e-06, + "loss": 0.5625, + "step": 5377 + }, + { + "epoch": 0.7924361493123772, + "grad_norm": 0.5888698101043701, + "learning_rate": 4.20827910720496e-06, + "loss": 0.5761, + "step": 5378 + }, + { + "epoch": 0.7925834970530452, + "grad_norm": 0.5738195180892944, + "learning_rate": 4.207996018110215e-06, + "loss": 0.5495, + "step": 5379 + }, + { + "epoch": 0.7927308447937131, + "grad_norm": 0.601041853427887, + "learning_rate": 4.207712887938968e-06, + "loss": 0.5408, + "step": 5380 + }, + { + "epoch": 0.7928781925343812, + "grad_norm": 0.5719479322433472, + "learning_rate": 4.207429716698026e-06, + "loss": 0.5443, + "step": 5381 + }, + { + "epoch": 0.7930255402750491, + "grad_norm": 0.5911372900009155, + "learning_rate": 4.2071465043942e-06, + "loss": 0.5804, + "step": 5382 + }, + { + "epoch": 0.7931728880157171, + "grad_norm": 0.6553266644477844, + "learning_rate": 4.206863251034301e-06, + "loss": 0.6023, + "step": 5383 + }, + { + "epoch": 0.7933202357563851, + "grad_norm": 0.5658379793167114, + "learning_rate": 4.206579956625142e-06, + "loss": 0.5586, + "step": 5384 + }, + { + "epoch": 0.793467583497053, + "grad_norm": 0.5833011865615845, + "learning_rate": 4.206296621173535e-06, + "loss": 0.5469, + "step": 5385 + }, + { + "epoch": 0.793614931237721, + "grad_norm": 0.5614168047904968, + "learning_rate": 4.206013244686295e-06, + "loss": 0.5804, + "step": 5386 + }, + { + "epoch": 0.793762278978389, + "grad_norm": 0.5990191698074341, + "learning_rate": 4.205729827170237e-06, + "loss": 0.5602, + "step": 5387 + }, + { + "epoch": 0.793909626719057, + "grad_norm": 0.7063112258911133, + "learning_rate": 4.2054463686321755e-06, + "loss": 0.5622, + "step": 5388 + }, + { + "epoch": 0.7940569744597249, + "grad_norm": 0.564127504825592, + "learning_rate": 4.20516286907893e-06, + "loss": 0.6026, + "step": 5389 + }, + { + "epoch": 0.7942043222003929, + "grad_norm": 0.5893886685371399, + "learning_rate": 4.204879328517316e-06, + "loss": 0.5701, + "step": 5390 + }, + { + "epoch": 0.7943516699410609, + "grad_norm": 0.5991474986076355, + "learning_rate": 4.204595746954155e-06, + "loss": 0.5737, + "step": 5391 + }, + { + "epoch": 0.7944990176817289, + "grad_norm": 0.5541865229606628, + "learning_rate": 4.204312124396265e-06, + "loss": 0.5638, + "step": 5392 + }, + { + "epoch": 0.7946463654223969, + "grad_norm": 0.5752266645431519, + "learning_rate": 4.204028460850469e-06, + "loss": 0.5784, + "step": 5393 + }, + { + "epoch": 0.7947937131630648, + "grad_norm": 0.5937653183937073, + "learning_rate": 4.203744756323586e-06, + "loss": 0.5593, + "step": 5394 + }, + { + "epoch": 0.7949410609037328, + "grad_norm": 0.5884901881217957, + "learning_rate": 4.203461010822441e-06, + "loss": 0.6027, + "step": 5395 + }, + { + "epoch": 0.7950884086444008, + "grad_norm": 0.5863538980484009, + "learning_rate": 4.203177224353858e-06, + "loss": 0.5831, + "step": 5396 + }, + { + "epoch": 0.7952357563850687, + "grad_norm": 0.5540417432785034, + "learning_rate": 4.202893396924662e-06, + "loss": 0.5107, + "step": 5397 + }, + { + "epoch": 0.7953831041257368, + "grad_norm": 0.5834562182426453, + "learning_rate": 4.2026095285416785e-06, + "loss": 0.538, + "step": 5398 + }, + { + "epoch": 0.7955304518664047, + "grad_norm": 0.5989788770675659, + "learning_rate": 4.202325619211735e-06, + "loss": 0.5818, + "step": 5399 + }, + { + "epoch": 0.7956777996070727, + "grad_norm": 0.5875646471977234, + "learning_rate": 4.202041668941657e-06, + "loss": 0.5842, + "step": 5400 + }, + { + "epoch": 0.7958251473477407, + "grad_norm": 0.5808630585670471, + "learning_rate": 4.201757677738276e-06, + "loss": 0.5636, + "step": 5401 + }, + { + "epoch": 0.7959724950884086, + "grad_norm": 0.6018409729003906, + "learning_rate": 4.201473645608422e-06, + "loss": 0.5212, + "step": 5402 + }, + { + "epoch": 0.7961198428290767, + "grad_norm": 0.5545368790626526, + "learning_rate": 4.2011895725589236e-06, + "loss": 0.5376, + "step": 5403 + }, + { + "epoch": 0.7962671905697446, + "grad_norm": 0.5750635266304016, + "learning_rate": 4.200905458596614e-06, + "loss": 0.5548, + "step": 5404 + }, + { + "epoch": 0.7964145383104125, + "grad_norm": 0.5837636590003967, + "learning_rate": 4.200621303728326e-06, + "loss": 0.5805, + "step": 5405 + }, + { + "epoch": 0.7965618860510806, + "grad_norm": 0.5964565277099609, + "learning_rate": 4.200337107960893e-06, + "loss": 0.5608, + "step": 5406 + }, + { + "epoch": 0.7967092337917485, + "grad_norm": 0.5919738411903381, + "learning_rate": 4.200052871301151e-06, + "loss": 0.5539, + "step": 5407 + }, + { + "epoch": 0.7968565815324165, + "grad_norm": 0.5859822034835815, + "learning_rate": 4.199768593755935e-06, + "loss": 0.5722, + "step": 5408 + }, + { + "epoch": 0.7970039292730845, + "grad_norm": 0.591156542301178, + "learning_rate": 4.199484275332081e-06, + "loss": 0.6143, + "step": 5409 + }, + { + "epoch": 0.7971512770137524, + "grad_norm": 0.578068196773529, + "learning_rate": 4.199199916036427e-06, + "loss": 0.582, + "step": 5410 + }, + { + "epoch": 0.7972986247544205, + "grad_norm": 0.5849480628967285, + "learning_rate": 4.198915515875812e-06, + "loss": 0.5492, + "step": 5411 + }, + { + "epoch": 0.7974459724950884, + "grad_norm": 0.5802563428878784, + "learning_rate": 4.198631074857076e-06, + "loss": 0.5963, + "step": 5412 + }, + { + "epoch": 0.7975933202357564, + "grad_norm": 0.5760462284088135, + "learning_rate": 4.198346592987059e-06, + "loss": 0.5663, + "step": 5413 + }, + { + "epoch": 0.7977406679764244, + "grad_norm": 0.6171784996986389, + "learning_rate": 4.1980620702726035e-06, + "loss": 0.5975, + "step": 5414 + }, + { + "epoch": 0.7978880157170923, + "grad_norm": 0.5851408839225769, + "learning_rate": 4.197777506720551e-06, + "loss": 0.5719, + "step": 5415 + }, + { + "epoch": 0.7980353634577603, + "grad_norm": 0.5728028416633606, + "learning_rate": 4.197492902337747e-06, + "loss": 0.5593, + "step": 5416 + }, + { + "epoch": 0.7981827111984283, + "grad_norm": 0.6771201491355896, + "learning_rate": 4.1972082571310335e-06, + "loss": 0.5688, + "step": 5417 + }, + { + "epoch": 0.7983300589390963, + "grad_norm": 0.5946121215820312, + "learning_rate": 4.196923571107258e-06, + "loss": 0.5662, + "step": 5418 + }, + { + "epoch": 0.7984774066797642, + "grad_norm": 0.5655754804611206, + "learning_rate": 4.196638844273266e-06, + "loss": 0.566, + "step": 5419 + }, + { + "epoch": 0.7986247544204322, + "grad_norm": 0.5868388414382935, + "learning_rate": 4.196354076635907e-06, + "loss": 0.5907, + "step": 5420 + }, + { + "epoch": 0.7987721021611002, + "grad_norm": 0.5620986819267273, + "learning_rate": 4.196069268202027e-06, + "loss": 0.5647, + "step": 5421 + }, + { + "epoch": 0.7989194499017682, + "grad_norm": 0.5545685887336731, + "learning_rate": 4.195784418978477e-06, + "loss": 0.5664, + "step": 5422 + }, + { + "epoch": 0.7990667976424362, + "grad_norm": 0.5685362815856934, + "learning_rate": 4.1954995289721066e-06, + "loss": 0.56, + "step": 5423 + }, + { + "epoch": 0.7992141453831041, + "grad_norm": 0.6042630672454834, + "learning_rate": 4.195214598189768e-06, + "loss": 0.591, + "step": 5424 + }, + { + "epoch": 0.7993614931237721, + "grad_norm": 0.5780912041664124, + "learning_rate": 4.194929626638313e-06, + "loss": 0.5663, + "step": 5425 + }, + { + "epoch": 0.7995088408644401, + "grad_norm": 0.5704417824745178, + "learning_rate": 4.1946446143245965e-06, + "loss": 0.5962, + "step": 5426 + }, + { + "epoch": 0.799656188605108, + "grad_norm": 0.5909837484359741, + "learning_rate": 4.194359561255471e-06, + "loss": 0.5571, + "step": 5427 + }, + { + "epoch": 0.7998035363457761, + "grad_norm": 0.5892950296401978, + "learning_rate": 4.194074467437793e-06, + "loss": 0.567, + "step": 5428 + }, + { + "epoch": 0.799950884086444, + "grad_norm": 0.5921050906181335, + "learning_rate": 4.193789332878418e-06, + "loss": 0.5958, + "step": 5429 + }, + { + "epoch": 0.800098231827112, + "grad_norm": 0.5516866445541382, + "learning_rate": 4.193504157584205e-06, + "loss": 0.5683, + "step": 5430 + }, + { + "epoch": 0.80024557956778, + "grad_norm": 0.5873015522956848, + "learning_rate": 4.193218941562011e-06, + "loss": 0.5597, + "step": 5431 + }, + { + "epoch": 0.8003929273084479, + "grad_norm": 0.5786586403846741, + "learning_rate": 4.192933684818695e-06, + "loss": 0.5288, + "step": 5432 + }, + { + "epoch": 0.800540275049116, + "grad_norm": 0.5644842982292175, + "learning_rate": 4.192648387361119e-06, + "loss": 0.5742, + "step": 5433 + }, + { + "epoch": 0.8006876227897839, + "grad_norm": 0.5645532608032227, + "learning_rate": 4.192363049196143e-06, + "loss": 0.5411, + "step": 5434 + }, + { + "epoch": 0.8008349705304518, + "grad_norm": 0.6065436601638794, + "learning_rate": 4.192077670330629e-06, + "loss": 0.5685, + "step": 5435 + }, + { + "epoch": 0.8009823182711199, + "grad_norm": 0.6090109348297119, + "learning_rate": 4.19179225077144e-06, + "loss": 0.5485, + "step": 5436 + }, + { + "epoch": 0.8011296660117878, + "grad_norm": 0.5916923880577087, + "learning_rate": 4.191506790525442e-06, + "loss": 0.5773, + "step": 5437 + }, + { + "epoch": 0.8012770137524557, + "grad_norm": 0.565869927406311, + "learning_rate": 4.191221289599499e-06, + "loss": 0.5671, + "step": 5438 + }, + { + "epoch": 0.8014243614931238, + "grad_norm": 0.590549647808075, + "learning_rate": 4.1909357480004765e-06, + "loss": 0.5882, + "step": 5439 + }, + { + "epoch": 0.8015717092337917, + "grad_norm": 0.5875887274742126, + "learning_rate": 4.190650165735243e-06, + "loss": 0.5401, + "step": 5440 + }, + { + "epoch": 0.8017190569744598, + "grad_norm": 0.5827804207801819, + "learning_rate": 4.190364542810666e-06, + "loss": 0.58, + "step": 5441 + }, + { + "epoch": 0.8018664047151277, + "grad_norm": 0.5722939372062683, + "learning_rate": 4.190078879233614e-06, + "loss": 0.5811, + "step": 5442 + }, + { + "epoch": 0.8020137524557956, + "grad_norm": 0.5849546790122986, + "learning_rate": 4.189793175010958e-06, + "loss": 0.5444, + "step": 5443 + }, + { + "epoch": 0.8021611001964637, + "grad_norm": 0.6134750843048096, + "learning_rate": 4.189507430149569e-06, + "loss": 0.6092, + "step": 5444 + }, + { + "epoch": 0.8023084479371316, + "grad_norm": 0.5969536304473877, + "learning_rate": 4.189221644656318e-06, + "loss": 0.5569, + "step": 5445 + }, + { + "epoch": 0.8024557956777996, + "grad_norm": 0.5834222435951233, + "learning_rate": 4.188935818538079e-06, + "loss": 0.5588, + "step": 5446 + }, + { + "epoch": 0.8026031434184676, + "grad_norm": 0.6322603821754456, + "learning_rate": 4.1886499518017265e-06, + "loss": 0.5708, + "step": 5447 + }, + { + "epoch": 0.8027504911591355, + "grad_norm": 0.6041045188903809, + "learning_rate": 4.188364044454134e-06, + "loss": 0.5761, + "step": 5448 + }, + { + "epoch": 0.8028978388998035, + "grad_norm": 0.5461978912353516, + "learning_rate": 4.188078096502178e-06, + "loss": 0.532, + "step": 5449 + }, + { + "epoch": 0.8030451866404715, + "grad_norm": 0.5908074378967285, + "learning_rate": 4.187792107952735e-06, + "loss": 0.5466, + "step": 5450 + }, + { + "epoch": 0.8031925343811395, + "grad_norm": 0.5661520957946777, + "learning_rate": 4.1875060788126844e-06, + "loss": 0.5555, + "step": 5451 + }, + { + "epoch": 0.8033398821218075, + "grad_norm": 0.5823700428009033, + "learning_rate": 4.187220009088903e-06, + "loss": 0.5585, + "step": 5452 + }, + { + "epoch": 0.8034872298624754, + "grad_norm": 0.6049845218658447, + "learning_rate": 4.186933898788274e-06, + "loss": 0.5717, + "step": 5453 + }, + { + "epoch": 0.8036345776031434, + "grad_norm": 0.5478331446647644, + "learning_rate": 4.186647747917673e-06, + "loss": 0.5325, + "step": 5454 + }, + { + "epoch": 0.8037819253438114, + "grad_norm": 0.576515257358551, + "learning_rate": 4.186361556483987e-06, + "loss": 0.5413, + "step": 5455 + }, + { + "epoch": 0.8039292730844794, + "grad_norm": 0.5588356256484985, + "learning_rate": 4.186075324494096e-06, + "loss": 0.5632, + "step": 5456 + }, + { + "epoch": 0.8040766208251473, + "grad_norm": 0.5657811164855957, + "learning_rate": 4.185789051954884e-06, + "loss": 0.5782, + "step": 5457 + }, + { + "epoch": 0.8042239685658154, + "grad_norm": 0.5893856287002563, + "learning_rate": 4.185502738873236e-06, + "loss": 0.5733, + "step": 5458 + }, + { + "epoch": 0.8043713163064833, + "grad_norm": 0.5703029632568359, + "learning_rate": 4.1852163852560375e-06, + "loss": 0.5815, + "step": 5459 + }, + { + "epoch": 0.8045186640471512, + "grad_norm": 0.6088166832923889, + "learning_rate": 4.184929991110176e-06, + "loss": 0.6002, + "step": 5460 + }, + { + "epoch": 0.8046660117878193, + "grad_norm": 0.5938341617584229, + "learning_rate": 4.184643556442537e-06, + "loss": 0.5856, + "step": 5461 + }, + { + "epoch": 0.8048133595284872, + "grad_norm": 0.6007800698280334, + "learning_rate": 4.184357081260012e-06, + "loss": 0.5706, + "step": 5462 + }, + { + "epoch": 0.8049607072691553, + "grad_norm": 0.588433027267456, + "learning_rate": 4.184070565569489e-06, + "loss": 0.5586, + "step": 5463 + }, + { + "epoch": 0.8051080550098232, + "grad_norm": 0.5710928440093994, + "learning_rate": 4.183784009377858e-06, + "loss": 0.5699, + "step": 5464 + }, + { + "epoch": 0.8052554027504911, + "grad_norm": 0.5795049667358398, + "learning_rate": 4.1834974126920126e-06, + "loss": 0.5693, + "step": 5465 + }, + { + "epoch": 0.8054027504911592, + "grad_norm": 0.6027731895446777, + "learning_rate": 4.183210775518843e-06, + "loss": 0.5454, + "step": 5466 + }, + { + "epoch": 0.8055500982318271, + "grad_norm": 0.5519710183143616, + "learning_rate": 4.182924097865244e-06, + "loss": 0.583, + "step": 5467 + }, + { + "epoch": 0.805697445972495, + "grad_norm": 0.5693422555923462, + "learning_rate": 4.18263737973811e-06, + "loss": 0.539, + "step": 5468 + }, + { + "epoch": 0.8058447937131631, + "grad_norm": 0.5741474628448486, + "learning_rate": 4.182350621144336e-06, + "loss": 0.5779, + "step": 5469 + }, + { + "epoch": 0.805992141453831, + "grad_norm": 0.5819971561431885, + "learning_rate": 4.1820638220908185e-06, + "loss": 0.5616, + "step": 5470 + }, + { + "epoch": 0.8061394891944991, + "grad_norm": 0.6070727705955505, + "learning_rate": 4.181776982584455e-06, + "loss": 0.5845, + "step": 5471 + }, + { + "epoch": 0.806286836935167, + "grad_norm": 0.601151168346405, + "learning_rate": 4.1814901026321445e-06, + "loss": 0.5572, + "step": 5472 + }, + { + "epoch": 0.8064341846758349, + "grad_norm": 0.5960268378257751, + "learning_rate": 4.181203182240786e-06, + "loss": 0.5799, + "step": 5473 + }, + { + "epoch": 0.806581532416503, + "grad_norm": 0.5724292397499084, + "learning_rate": 4.1809162214172784e-06, + "loss": 0.5776, + "step": 5474 + }, + { + "epoch": 0.8067288801571709, + "grad_norm": 0.5871511697769165, + "learning_rate": 4.180629220168525e-06, + "loss": 0.5513, + "step": 5475 + }, + { + "epoch": 0.8068762278978389, + "grad_norm": 0.5948945879936218, + "learning_rate": 4.1803421785014265e-06, + "loss": 0.5695, + "step": 5476 + }, + { + "epoch": 0.8070235756385069, + "grad_norm": 0.5921706557273865, + "learning_rate": 4.180055096422886e-06, + "loss": 0.5541, + "step": 5477 + }, + { + "epoch": 0.8071709233791748, + "grad_norm": 0.5690926909446716, + "learning_rate": 4.17976797393981e-06, + "loss": 0.5962, + "step": 5478 + }, + { + "epoch": 0.8073182711198428, + "grad_norm": 0.6078546643257141, + "learning_rate": 4.179480811059101e-06, + "loss": 0.5517, + "step": 5479 + }, + { + "epoch": 0.8074656188605108, + "grad_norm": 0.5956836342811584, + "learning_rate": 4.179193607787667e-06, + "loss": 0.5327, + "step": 5480 + }, + { + "epoch": 0.8076129666011788, + "grad_norm": 0.6139363646507263, + "learning_rate": 4.178906364132414e-06, + "loss": 0.5551, + "step": 5481 + }, + { + "epoch": 0.8077603143418468, + "grad_norm": 0.5724121332168579, + "learning_rate": 4.1786190801002504e-06, + "loss": 0.5255, + "step": 5482 + }, + { + "epoch": 0.8079076620825147, + "grad_norm": 0.5942135453224182, + "learning_rate": 4.178331755698084e-06, + "loss": 0.5545, + "step": 5483 + }, + { + "epoch": 0.8080550098231827, + "grad_norm": 0.5637643933296204, + "learning_rate": 4.178044390932828e-06, + "loss": 0.543, + "step": 5484 + }, + { + "epoch": 0.8082023575638507, + "grad_norm": 0.5708051323890686, + "learning_rate": 4.17775698581139e-06, + "loss": 0.5474, + "step": 5485 + }, + { + "epoch": 0.8083497053045187, + "grad_norm": 0.5890586376190186, + "learning_rate": 4.1774695403406846e-06, + "loss": 0.5762, + "step": 5486 + }, + { + "epoch": 0.8084970530451866, + "grad_norm": 0.6016728281974792, + "learning_rate": 4.177182054527623e-06, + "loss": 0.5743, + "step": 5487 + }, + { + "epoch": 0.8086444007858546, + "grad_norm": 0.5814455151557922, + "learning_rate": 4.1768945283791195e-06, + "loss": 0.5673, + "step": 5488 + }, + { + "epoch": 0.8087917485265226, + "grad_norm": 0.5725035071372986, + "learning_rate": 4.1766069619020885e-06, + "loss": 0.56, + "step": 5489 + }, + { + "epoch": 0.8089390962671905, + "grad_norm": 0.5856314897537231, + "learning_rate": 4.1763193551034474e-06, + "loss": 0.5661, + "step": 5490 + }, + { + "epoch": 0.8090864440078586, + "grad_norm": 0.5551742911338806, + "learning_rate": 4.176031707990112e-06, + "loss": 0.5312, + "step": 5491 + }, + { + "epoch": 0.8092337917485265, + "grad_norm": 0.5873529314994812, + "learning_rate": 4.175744020569e-06, + "loss": 0.5424, + "step": 5492 + }, + { + "epoch": 0.8093811394891945, + "grad_norm": 0.57545006275177, + "learning_rate": 4.17545629284703e-06, + "loss": 0.5886, + "step": 5493 + }, + { + "epoch": 0.8095284872298625, + "grad_norm": 0.5739279389381409, + "learning_rate": 4.175168524831122e-06, + "loss": 0.5212, + "step": 5494 + }, + { + "epoch": 0.8096758349705304, + "grad_norm": 0.5994240045547485, + "learning_rate": 4.1748807165281974e-06, + "loss": 0.5627, + "step": 5495 + }, + { + "epoch": 0.8098231827111985, + "grad_norm": 0.5974462032318115, + "learning_rate": 4.174592867945176e-06, + "loss": 0.57, + "step": 5496 + }, + { + "epoch": 0.8099705304518664, + "grad_norm": 0.6016819477081299, + "learning_rate": 4.174304979088982e-06, + "loss": 0.5756, + "step": 5497 + }, + { + "epoch": 0.8101178781925343, + "grad_norm": 0.5683718323707581, + "learning_rate": 4.17401704996654e-06, + "loss": 0.5662, + "step": 5498 + }, + { + "epoch": 0.8102652259332024, + "grad_norm": 0.5947040915489197, + "learning_rate": 4.17372908058477e-06, + "loss": 0.5545, + "step": 5499 + }, + { + "epoch": 0.8104125736738703, + "grad_norm": 0.5954248309135437, + "learning_rate": 4.173441070950604e-06, + "loss": 0.5336, + "step": 5500 + }, + { + "epoch": 0.8105599214145384, + "grad_norm": 0.5968733429908752, + "learning_rate": 4.1731530210709634e-06, + "loss": 0.5667, + "step": 5501 + }, + { + "epoch": 0.8107072691552063, + "grad_norm": 0.5735760927200317, + "learning_rate": 4.172864930952778e-06, + "loss": 0.5705, + "step": 5502 + }, + { + "epoch": 0.8108546168958742, + "grad_norm": 0.5856454372406006, + "learning_rate": 4.172576800602975e-06, + "loss": 0.5365, + "step": 5503 + }, + { + "epoch": 0.8110019646365423, + "grad_norm": 0.5868255496025085, + "learning_rate": 4.172288630028485e-06, + "loss": 0.5895, + "step": 5504 + }, + { + "epoch": 0.8111493123772102, + "grad_norm": 0.5879335403442383, + "learning_rate": 4.1720004192362375e-06, + "loss": 0.5721, + "step": 5505 + }, + { + "epoch": 0.8112966601178782, + "grad_norm": 0.5638869404792786, + "learning_rate": 4.171712168233163e-06, + "loss": 0.5437, + "step": 5506 + }, + { + "epoch": 0.8114440078585462, + "grad_norm": 0.582191526889801, + "learning_rate": 4.171423877026196e-06, + "loss": 0.5684, + "step": 5507 + }, + { + "epoch": 0.8115913555992141, + "grad_norm": 0.5887058973312378, + "learning_rate": 4.171135545622269e-06, + "loss": 0.5143, + "step": 5508 + }, + { + "epoch": 0.8117387033398821, + "grad_norm": 0.5916251540184021, + "learning_rate": 4.170847174028316e-06, + "loss": 0.5905, + "step": 5509 + }, + { + "epoch": 0.8118860510805501, + "grad_norm": 0.5757601857185364, + "learning_rate": 4.170558762251271e-06, + "loss": 0.587, + "step": 5510 + }, + { + "epoch": 0.812033398821218, + "grad_norm": 0.5623841881752014, + "learning_rate": 4.170270310298072e-06, + "loss": 0.5721, + "step": 5511 + }, + { + "epoch": 0.8121807465618861, + "grad_norm": 0.57603520154953, + "learning_rate": 4.169981818175657e-06, + "loss": 0.5892, + "step": 5512 + }, + { + "epoch": 0.812328094302554, + "grad_norm": 0.5927779674530029, + "learning_rate": 4.16969328589096e-06, + "loss": 0.5547, + "step": 5513 + }, + { + "epoch": 0.812475442043222, + "grad_norm": 0.5464863181114197, + "learning_rate": 4.169404713450924e-06, + "loss": 0.576, + "step": 5514 + }, + { + "epoch": 0.81262278978389, + "grad_norm": 0.5987304449081421, + "learning_rate": 4.169116100862488e-06, + "loss": 0.5777, + "step": 5515 + }, + { + "epoch": 0.812770137524558, + "grad_norm": 0.5840292572975159, + "learning_rate": 4.168827448132592e-06, + "loss": 0.5745, + "step": 5516 + }, + { + "epoch": 0.8129174852652259, + "grad_norm": 0.619178295135498, + "learning_rate": 4.1685387552681775e-06, + "loss": 0.5729, + "step": 5517 + }, + { + "epoch": 0.8130648330058939, + "grad_norm": 0.5835867524147034, + "learning_rate": 4.16825002227619e-06, + "loss": 0.5913, + "step": 5518 + }, + { + "epoch": 0.8132121807465619, + "grad_norm": 0.5728238224983215, + "learning_rate": 4.1679612491635715e-06, + "loss": 0.5882, + "step": 5519 + }, + { + "epoch": 0.8133595284872298, + "grad_norm": 0.5813177824020386, + "learning_rate": 4.167672435937268e-06, + "loss": 0.5744, + "step": 5520 + }, + { + "epoch": 0.8135068762278979, + "grad_norm": 0.5792195200920105, + "learning_rate": 4.167383582604223e-06, + "loss": 0.5434, + "step": 5521 + }, + { + "epoch": 0.8136542239685658, + "grad_norm": 0.6232622861862183, + "learning_rate": 4.167094689171386e-06, + "loss": 0.5461, + "step": 5522 + }, + { + "epoch": 0.8138015717092338, + "grad_norm": 0.5929641127586365, + "learning_rate": 4.166805755645704e-06, + "loss": 0.5842, + "step": 5523 + }, + { + "epoch": 0.8139489194499018, + "grad_norm": 0.5802962779998779, + "learning_rate": 4.166516782034124e-06, + "loss": 0.5865, + "step": 5524 + }, + { + "epoch": 0.8140962671905697, + "grad_norm": 0.6067826151847839, + "learning_rate": 4.166227768343598e-06, + "loss": 0.5513, + "step": 5525 + }, + { + "epoch": 0.8142436149312378, + "grad_norm": 0.6215111613273621, + "learning_rate": 4.165938714581076e-06, + "loss": 0.5654, + "step": 5526 + }, + { + "epoch": 0.8143909626719057, + "grad_norm": 0.5968583822250366, + "learning_rate": 4.165649620753508e-06, + "loss": 0.5737, + "step": 5527 + }, + { + "epoch": 0.8145383104125736, + "grad_norm": 0.5974391102790833, + "learning_rate": 4.165360486867849e-06, + "loss": 0.5576, + "step": 5528 + }, + { + "epoch": 0.8146856581532417, + "grad_norm": 0.6548751592636108, + "learning_rate": 4.165071312931051e-06, + "loss": 0.5466, + "step": 5529 + }, + { + "epoch": 0.8148330058939096, + "grad_norm": 0.5940407514572144, + "learning_rate": 4.164782098950069e-06, + "loss": 0.5927, + "step": 5530 + }, + { + "epoch": 0.8149803536345775, + "grad_norm": 0.5674737691879272, + "learning_rate": 4.164492844931859e-06, + "loss": 0.5349, + "step": 5531 + }, + { + "epoch": 0.8151277013752456, + "grad_norm": 0.5731592774391174, + "learning_rate": 4.164203550883376e-06, + "loss": 0.5246, + "step": 5532 + }, + { + "epoch": 0.8152750491159135, + "grad_norm": 0.5753234624862671, + "learning_rate": 4.163914216811578e-06, + "loss": 0.568, + "step": 5533 + }, + { + "epoch": 0.8154223968565816, + "grad_norm": 0.5728522539138794, + "learning_rate": 4.163624842723423e-06, + "loss": 0.5727, + "step": 5534 + }, + { + "epoch": 0.8155697445972495, + "grad_norm": 0.6005954146385193, + "learning_rate": 4.1633354286258724e-06, + "loss": 0.555, + "step": 5535 + }, + { + "epoch": 0.8157170923379174, + "grad_norm": 0.5652506351470947, + "learning_rate": 4.163045974525884e-06, + "loss": 0.5664, + "step": 5536 + }, + { + "epoch": 0.8158644400785855, + "grad_norm": 0.620663583278656, + "learning_rate": 4.16275648043042e-06, + "loss": 0.5695, + "step": 5537 + }, + { + "epoch": 0.8160117878192534, + "grad_norm": 0.5780614018440247, + "learning_rate": 4.162466946346442e-06, + "loss": 0.5979, + "step": 5538 + }, + { + "epoch": 0.8161591355599214, + "grad_norm": 0.618254542350769, + "learning_rate": 4.1621773722809145e-06, + "loss": 0.5663, + "step": 5539 + }, + { + "epoch": 0.8163064833005894, + "grad_norm": 0.5676729083061218, + "learning_rate": 4.1618877582408e-06, + "loss": 0.5841, + "step": 5540 + }, + { + "epoch": 0.8164538310412573, + "grad_norm": 0.5727306604385376, + "learning_rate": 4.161598104233065e-06, + "loss": 0.5677, + "step": 5541 + }, + { + "epoch": 0.8166011787819254, + "grad_norm": 0.5467238426208496, + "learning_rate": 4.161308410264675e-06, + "loss": 0.5319, + "step": 5542 + }, + { + "epoch": 0.8167485265225933, + "grad_norm": 0.5782485008239746, + "learning_rate": 4.161018676342596e-06, + "loss": 0.5817, + "step": 5543 + }, + { + "epoch": 0.8168958742632613, + "grad_norm": 0.5511963367462158, + "learning_rate": 4.160728902473799e-06, + "loss": 0.571, + "step": 5544 + }, + { + "epoch": 0.8170432220039293, + "grad_norm": 0.5557025671005249, + "learning_rate": 4.160439088665248e-06, + "loss": 0.5829, + "step": 5545 + }, + { + "epoch": 0.8171905697445973, + "grad_norm": 0.5598852634429932, + "learning_rate": 4.160149234923918e-06, + "loss": 0.5544, + "step": 5546 + }, + { + "epoch": 0.8173379174852652, + "grad_norm": 0.6545085906982422, + "learning_rate": 4.159859341256777e-06, + "loss": 0.5471, + "step": 5547 + }, + { + "epoch": 0.8174852652259332, + "grad_norm": 0.5754643082618713, + "learning_rate": 4.159569407670796e-06, + "loss": 0.5988, + "step": 5548 + }, + { + "epoch": 0.8176326129666012, + "grad_norm": 0.5919362306594849, + "learning_rate": 4.159279434172951e-06, + "loss": 0.6042, + "step": 5549 + }, + { + "epoch": 0.8177799607072691, + "grad_norm": 0.5833360552787781, + "learning_rate": 4.1589894207702135e-06, + "loss": 0.5439, + "step": 5550 + }, + { + "epoch": 0.8179273084479372, + "grad_norm": 0.5767645239830017, + "learning_rate": 4.158699367469557e-06, + "loss": 0.5861, + "step": 5551 + }, + { + "epoch": 0.8180746561886051, + "grad_norm": 0.5736212730407715, + "learning_rate": 4.15840927427796e-06, + "loss": 0.516, + "step": 5552 + }, + { + "epoch": 0.8182220039292731, + "grad_norm": 0.574692964553833, + "learning_rate": 4.158119141202398e-06, + "loss": 0.5479, + "step": 5553 + }, + { + "epoch": 0.8183693516699411, + "grad_norm": 0.5718523263931274, + "learning_rate": 4.157828968249848e-06, + "loss": 0.569, + "step": 5554 + }, + { + "epoch": 0.818516699410609, + "grad_norm": 0.5551040172576904, + "learning_rate": 4.157538755427288e-06, + "loss": 0.5514, + "step": 5555 + }, + { + "epoch": 0.818664047151277, + "grad_norm": 0.5931767821311951, + "learning_rate": 4.1572485027417e-06, + "loss": 0.591, + "step": 5556 + }, + { + "epoch": 0.818811394891945, + "grad_norm": 0.586395263671875, + "learning_rate": 4.1569582102000616e-06, + "loss": 0.5644, + "step": 5557 + }, + { + "epoch": 0.8189587426326129, + "grad_norm": 0.6171603798866272, + "learning_rate": 4.156667877809356e-06, + "loss": 0.5665, + "step": 5558 + }, + { + "epoch": 0.819106090373281, + "grad_norm": 0.5741291642189026, + "learning_rate": 4.156377505576565e-06, + "loss": 0.5462, + "step": 5559 + }, + { + "epoch": 0.8192534381139489, + "grad_norm": 0.5852720737457275, + "learning_rate": 4.156087093508672e-06, + "loss": 0.5433, + "step": 5560 + }, + { + "epoch": 0.8194007858546168, + "grad_norm": 0.5828628540039062, + "learning_rate": 4.155796641612661e-06, + "loss": 0.565, + "step": 5561 + }, + { + "epoch": 0.8195481335952849, + "grad_norm": 0.5974177122116089, + "learning_rate": 4.155506149895516e-06, + "loss": 0.5643, + "step": 5562 + }, + { + "epoch": 0.8196954813359528, + "grad_norm": 0.664922833442688, + "learning_rate": 4.155215618364226e-06, + "loss": 0.5564, + "step": 5563 + }, + { + "epoch": 0.8198428290766209, + "grad_norm": 0.6156178116798401, + "learning_rate": 4.154925047025777e-06, + "loss": 0.5861, + "step": 5564 + }, + { + "epoch": 0.8199901768172888, + "grad_norm": 0.6071075201034546, + "learning_rate": 4.154634435887156e-06, + "loss": 0.5696, + "step": 5565 + }, + { + "epoch": 0.8201375245579567, + "grad_norm": 0.5894790291786194, + "learning_rate": 4.154343784955353e-06, + "loss": 0.5717, + "step": 5566 + }, + { + "epoch": 0.8202848722986248, + "grad_norm": 0.5784132480621338, + "learning_rate": 4.154053094237358e-06, + "loss": 0.5723, + "step": 5567 + }, + { + "epoch": 0.8204322200392927, + "grad_norm": 0.5963858962059021, + "learning_rate": 4.153762363740163e-06, + "loss": 0.5639, + "step": 5568 + }, + { + "epoch": 0.8205795677799607, + "grad_norm": 0.6016804575920105, + "learning_rate": 4.153471593470757e-06, + "loss": 0.5931, + "step": 5569 + }, + { + "epoch": 0.8207269155206287, + "grad_norm": 0.5876834988594055, + "learning_rate": 4.153180783436136e-06, + "loss": 0.5584, + "step": 5570 + }, + { + "epoch": 0.8208742632612966, + "grad_norm": 0.5850545167922974, + "learning_rate": 4.152889933643291e-06, + "loss": 0.5614, + "step": 5571 + }, + { + "epoch": 0.8210216110019647, + "grad_norm": 0.5881807804107666, + "learning_rate": 4.15259904409922e-06, + "loss": 0.5545, + "step": 5572 + }, + { + "epoch": 0.8211689587426326, + "grad_norm": 0.5593586564064026, + "learning_rate": 4.152308114810917e-06, + "loss": 0.5323, + "step": 5573 + }, + { + "epoch": 0.8213163064833006, + "grad_norm": 0.5657834410667419, + "learning_rate": 4.152017145785378e-06, + "loss": 0.6061, + "step": 5574 + }, + { + "epoch": 0.8214636542239686, + "grad_norm": 0.5701698660850525, + "learning_rate": 4.151726137029602e-06, + "loss": 0.587, + "step": 5575 + }, + { + "epoch": 0.8216110019646365, + "grad_norm": 0.5535200834274292, + "learning_rate": 4.1514350885505875e-06, + "loss": 0.554, + "step": 5576 + }, + { + "epoch": 0.8217583497053045, + "grad_norm": 0.566336452960968, + "learning_rate": 4.151144000355332e-06, + "loss": 0.5792, + "step": 5577 + }, + { + "epoch": 0.8219056974459725, + "grad_norm": 0.5584920048713684, + "learning_rate": 4.150852872450839e-06, + "loss": 0.5835, + "step": 5578 + }, + { + "epoch": 0.8220530451866405, + "grad_norm": 0.5957313179969788, + "learning_rate": 4.150561704844108e-06, + "loss": 0.566, + "step": 5579 + }, + { + "epoch": 0.8222003929273084, + "grad_norm": 0.5980052351951599, + "learning_rate": 4.150270497542143e-06, + "loss": 0.5642, + "step": 5580 + }, + { + "epoch": 0.8223477406679764, + "grad_norm": 0.5493764281272888, + "learning_rate": 4.149979250551946e-06, + "loss": 0.5754, + "step": 5581 + }, + { + "epoch": 0.8224950884086444, + "grad_norm": 0.5584763288497925, + "learning_rate": 4.149687963880523e-06, + "loss": 0.5484, + "step": 5582 + }, + { + "epoch": 0.8226424361493124, + "grad_norm": 0.5657752156257629, + "learning_rate": 4.149396637534877e-06, + "loss": 0.5602, + "step": 5583 + }, + { + "epoch": 0.8227897838899804, + "grad_norm": 0.5722087025642395, + "learning_rate": 4.149105271522016e-06, + "loss": 0.5656, + "step": 5584 + }, + { + "epoch": 0.8229371316306483, + "grad_norm": 0.6238241791725159, + "learning_rate": 4.148813865848946e-06, + "loss": 0.5716, + "step": 5585 + }, + { + "epoch": 0.8230844793713163, + "grad_norm": 0.5954170823097229, + "learning_rate": 4.148522420522677e-06, + "loss": 0.5701, + "step": 5586 + }, + { + "epoch": 0.8232318271119843, + "grad_norm": 0.5697188973426819, + "learning_rate": 4.148230935550216e-06, + "loss": 0.5556, + "step": 5587 + }, + { + "epoch": 0.8233791748526522, + "grad_norm": 0.5819408893585205, + "learning_rate": 4.147939410938575e-06, + "loss": 0.5765, + "step": 5588 + }, + { + "epoch": 0.8235265225933203, + "grad_norm": 0.6170991659164429, + "learning_rate": 4.1476478466947636e-06, + "loss": 0.5666, + "step": 5589 + }, + { + "epoch": 0.8236738703339882, + "grad_norm": 0.5887056589126587, + "learning_rate": 4.147356242825794e-06, + "loss": 0.5804, + "step": 5590 + }, + { + "epoch": 0.8238212180746561, + "grad_norm": 0.6010666489601135, + "learning_rate": 4.14706459933868e-06, + "loss": 0.5714, + "step": 5591 + }, + { + "epoch": 0.8239685658153242, + "grad_norm": 0.5711400508880615, + "learning_rate": 4.146772916240435e-06, + "loss": 0.5853, + "step": 5592 + }, + { + "epoch": 0.8241159135559921, + "grad_norm": 0.5630747079849243, + "learning_rate": 4.146481193538073e-06, + "loss": 0.5697, + "step": 5593 + }, + { + "epoch": 0.8242632612966602, + "grad_norm": 0.6075701117515564, + "learning_rate": 4.1461894312386106e-06, + "loss": 0.6036, + "step": 5594 + }, + { + "epoch": 0.8244106090373281, + "grad_norm": 0.5890079140663147, + "learning_rate": 4.145897629349065e-06, + "loss": 0.5762, + "step": 5595 + }, + { + "epoch": 0.824557956777996, + "grad_norm": 0.576225996017456, + "learning_rate": 4.1456057878764535e-06, + "loss": 0.5263, + "step": 5596 + }, + { + "epoch": 0.8247053045186641, + "grad_norm": 0.5526752471923828, + "learning_rate": 4.1453139068277945e-06, + "loss": 0.5047, + "step": 5597 + }, + { + "epoch": 0.824852652259332, + "grad_norm": 0.5703461170196533, + "learning_rate": 4.145021986210107e-06, + "loss": 0.5658, + "step": 5598 + }, + { + "epoch": 0.825, + "grad_norm": 0.5983473062515259, + "learning_rate": 4.1447300260304125e-06, + "loss": 0.5533, + "step": 5599 + }, + { + "epoch": 0.825147347740668, + "grad_norm": 0.5739492774009705, + "learning_rate": 4.144438026295733e-06, + "loss": 0.5403, + "step": 5600 + }, + { + "epoch": 0.8252946954813359, + "grad_norm": 0.5880619883537292, + "learning_rate": 4.14414598701309e-06, + "loss": 0.5813, + "step": 5601 + }, + { + "epoch": 0.8254420432220039, + "grad_norm": 0.565998911857605, + "learning_rate": 4.143853908189507e-06, + "loss": 0.5758, + "step": 5602 + }, + { + "epoch": 0.8255893909626719, + "grad_norm": 0.5978142023086548, + "learning_rate": 4.143561789832009e-06, + "loss": 0.5914, + "step": 5603 + }, + { + "epoch": 0.8257367387033399, + "grad_norm": 0.5961796045303345, + "learning_rate": 4.143269631947619e-06, + "loss": 0.5886, + "step": 5604 + }, + { + "epoch": 0.8258840864440079, + "grad_norm": 0.583991289138794, + "learning_rate": 4.1429774345433675e-06, + "loss": 0.5506, + "step": 5605 + }, + { + "epoch": 0.8260314341846758, + "grad_norm": 0.596197783946991, + "learning_rate": 4.142685197626278e-06, + "loss": 0.5355, + "step": 5606 + }, + { + "epoch": 0.8261787819253438, + "grad_norm": 0.5941674709320068, + "learning_rate": 4.142392921203381e-06, + "loss": 0.5516, + "step": 5607 + }, + { + "epoch": 0.8263261296660118, + "grad_norm": 0.5762512683868408, + "learning_rate": 4.142100605281703e-06, + "loss": 0.5448, + "step": 5608 + }, + { + "epoch": 0.8264734774066798, + "grad_norm": 0.6013820767402649, + "learning_rate": 4.141808249868276e-06, + "loss": 0.5549, + "step": 5609 + }, + { + "epoch": 0.8266208251473477, + "grad_norm": 0.5875470638275146, + "learning_rate": 4.141515854970132e-06, + "loss": 0.5542, + "step": 5610 + }, + { + "epoch": 0.8267681728880157, + "grad_norm": 0.5878833532333374, + "learning_rate": 4.1412234205943004e-06, + "loss": 0.5319, + "step": 5611 + }, + { + "epoch": 0.8269155206286837, + "grad_norm": 0.5845406651496887, + "learning_rate": 4.140930946747816e-06, + "loss": 0.5514, + "step": 5612 + }, + { + "epoch": 0.8270628683693517, + "grad_norm": 0.6016947031021118, + "learning_rate": 4.140638433437712e-06, + "loss": 0.5446, + "step": 5613 + }, + { + "epoch": 0.8272102161100197, + "grad_norm": 0.5685457587242126, + "learning_rate": 4.140345880671022e-06, + "loss": 0.5689, + "step": 5614 + }, + { + "epoch": 0.8273575638506876, + "grad_norm": 0.5694936513900757, + "learning_rate": 4.140053288454784e-06, + "loss": 0.531, + "step": 5615 + }, + { + "epoch": 0.8275049115913556, + "grad_norm": 0.5692597031593323, + "learning_rate": 4.139760656796033e-06, + "loss": 0.5678, + "step": 5616 + }, + { + "epoch": 0.8276522593320236, + "grad_norm": 0.5666220784187317, + "learning_rate": 4.139467985701808e-06, + "loss": 0.5783, + "step": 5617 + }, + { + "epoch": 0.8277996070726915, + "grad_norm": 0.5738397240638733, + "learning_rate": 4.139175275179146e-06, + "loss": 0.5792, + "step": 5618 + }, + { + "epoch": 0.8279469548133596, + "grad_norm": 0.6054690480232239, + "learning_rate": 4.138882525235088e-06, + "loss": 0.5688, + "step": 5619 + }, + { + "epoch": 0.8280943025540275, + "grad_norm": 0.5759205222129822, + "learning_rate": 4.138589735876674e-06, + "loss": 0.5845, + "step": 5620 + }, + { + "epoch": 0.8282416502946954, + "grad_norm": 0.5898460745811462, + "learning_rate": 4.138296907110946e-06, + "loss": 0.5498, + "step": 5621 + }, + { + "epoch": 0.8283889980353635, + "grad_norm": 0.6200886368751526, + "learning_rate": 4.138004038944945e-06, + "loss": 0.5416, + "step": 5622 + }, + { + "epoch": 0.8285363457760314, + "grad_norm": 0.5924447178840637, + "learning_rate": 4.137711131385715e-06, + "loss": 0.5734, + "step": 5623 + }, + { + "epoch": 0.8286836935166995, + "grad_norm": 0.5870310664176941, + "learning_rate": 4.137418184440301e-06, + "loss": 0.5561, + "step": 5624 + }, + { + "epoch": 0.8288310412573674, + "grad_norm": 0.5724959969520569, + "learning_rate": 4.137125198115747e-06, + "loss": 0.564, + "step": 5625 + }, + { + "epoch": 0.8289783889980353, + "grad_norm": 0.5837857723236084, + "learning_rate": 4.136832172419101e-06, + "loss": 0.574, + "step": 5626 + }, + { + "epoch": 0.8291257367387034, + "grad_norm": 0.5546053051948547, + "learning_rate": 4.136539107357408e-06, + "loss": 0.5778, + "step": 5627 + }, + { + "epoch": 0.8292730844793713, + "grad_norm": 0.5808196067810059, + "learning_rate": 4.136246002937717e-06, + "loss": 0.572, + "step": 5628 + }, + { + "epoch": 0.8294204322200392, + "grad_norm": 0.5674851536750793, + "learning_rate": 4.135952859167077e-06, + "loss": 0.5556, + "step": 5629 + }, + { + "epoch": 0.8295677799607073, + "grad_norm": 0.5690741539001465, + "learning_rate": 4.135659676052539e-06, + "loss": 0.5704, + "step": 5630 + }, + { + "epoch": 0.8297151277013752, + "grad_norm": 0.5757570266723633, + "learning_rate": 4.1353664536011516e-06, + "loss": 0.5406, + "step": 5631 + }, + { + "epoch": 0.8298624754420432, + "grad_norm": 0.594703733921051, + "learning_rate": 4.13507319181997e-06, + "loss": 0.5755, + "step": 5632 + }, + { + "epoch": 0.8300098231827112, + "grad_norm": 0.6041760444641113, + "learning_rate": 4.134779890716043e-06, + "loss": 0.5736, + "step": 5633 + }, + { + "epoch": 0.8301571709233792, + "grad_norm": 0.6014150977134705, + "learning_rate": 4.134486550296427e-06, + "loss": 0.5858, + "step": 5634 + }, + { + "epoch": 0.8303045186640472, + "grad_norm": 0.5802200436592102, + "learning_rate": 4.134193170568176e-06, + "loss": 0.5901, + "step": 5635 + }, + { + "epoch": 0.8304518664047151, + "grad_norm": 0.6014387011528015, + "learning_rate": 4.133899751538346e-06, + "loss": 0.5793, + "step": 5636 + }, + { + "epoch": 0.8305992141453831, + "grad_norm": 0.5809230208396912, + "learning_rate": 4.133606293213993e-06, + "loss": 0.585, + "step": 5637 + }, + { + "epoch": 0.8307465618860511, + "grad_norm": 0.6004248857498169, + "learning_rate": 4.133312795602176e-06, + "loss": 0.5323, + "step": 5638 + }, + { + "epoch": 0.830893909626719, + "grad_norm": 0.5916800498962402, + "learning_rate": 4.133019258709951e-06, + "loss": 0.5431, + "step": 5639 + }, + { + "epoch": 0.831041257367387, + "grad_norm": 0.5878300070762634, + "learning_rate": 4.13272568254438e-06, + "loss": 0.5476, + "step": 5640 + }, + { + "epoch": 0.831188605108055, + "grad_norm": 0.6053356528282166, + "learning_rate": 4.132432067112521e-06, + "loss": 0.5467, + "step": 5641 + }, + { + "epoch": 0.831335952848723, + "grad_norm": 0.5934663414955139, + "learning_rate": 4.132138412421437e-06, + "loss": 0.5626, + "step": 5642 + }, + { + "epoch": 0.831483300589391, + "grad_norm": 0.5654465556144714, + "learning_rate": 4.131844718478189e-06, + "loss": 0.5632, + "step": 5643 + }, + { + "epoch": 0.831630648330059, + "grad_norm": 0.5547035932540894, + "learning_rate": 4.131550985289842e-06, + "loss": 0.5576, + "step": 5644 + }, + { + "epoch": 0.8317779960707269, + "grad_norm": 0.6088604927062988, + "learning_rate": 4.131257212863458e-06, + "loss": 0.5124, + "step": 5645 + }, + { + "epoch": 0.8319253438113949, + "grad_norm": 0.5517475008964539, + "learning_rate": 4.130963401206104e-06, + "loss": 0.562, + "step": 5646 + }, + { + "epoch": 0.8320726915520629, + "grad_norm": 0.5804700255393982, + "learning_rate": 4.130669550324844e-06, + "loss": 0.5634, + "step": 5647 + }, + { + "epoch": 0.8322200392927308, + "grad_norm": 0.6005029678344727, + "learning_rate": 4.130375660226748e-06, + "loss": 0.5604, + "step": 5648 + }, + { + "epoch": 0.8323673870333989, + "grad_norm": 0.5722749829292297, + "learning_rate": 4.13008173091888e-06, + "loss": 0.5466, + "step": 5649 + }, + { + "epoch": 0.8325147347740668, + "grad_norm": 0.6361074447631836, + "learning_rate": 4.129787762408312e-06, + "loss": 0.5323, + "step": 5650 + }, + { + "epoch": 0.8326620825147347, + "grad_norm": 0.6012475490570068, + "learning_rate": 4.129493754702113e-06, + "loss": 0.589, + "step": 5651 + }, + { + "epoch": 0.8328094302554028, + "grad_norm": 0.6255943775177002, + "learning_rate": 4.129199707807353e-06, + "loss": 0.5423, + "step": 5652 + }, + { + "epoch": 0.8329567779960707, + "grad_norm": 0.5745341777801514, + "learning_rate": 4.128905621731104e-06, + "loss": 0.5462, + "step": 5653 + }, + { + "epoch": 0.8331041257367388, + "grad_norm": 0.6449954509735107, + "learning_rate": 4.128611496480439e-06, + "loss": 0.5422, + "step": 5654 + }, + { + "epoch": 0.8332514734774067, + "grad_norm": 0.5615523457527161, + "learning_rate": 4.128317332062431e-06, + "loss": 0.5578, + "step": 5655 + }, + { + "epoch": 0.8333988212180746, + "grad_norm": 0.5903351306915283, + "learning_rate": 4.128023128484156e-06, + "loss": 0.6043, + "step": 5656 + }, + { + "epoch": 0.8335461689587427, + "grad_norm": 0.6236037611961365, + "learning_rate": 4.127728885752688e-06, + "loss": 0.5745, + "step": 5657 + }, + { + "epoch": 0.8336935166994106, + "grad_norm": 0.5845517516136169, + "learning_rate": 4.1274346038751035e-06, + "loss": 0.5795, + "step": 5658 + }, + { + "epoch": 0.8338408644400785, + "grad_norm": 0.5587565302848816, + "learning_rate": 4.12714028285848e-06, + "loss": 0.5818, + "step": 5659 + }, + { + "epoch": 0.8339882121807466, + "grad_norm": 0.598199188709259, + "learning_rate": 4.126845922709895e-06, + "loss": 0.6051, + "step": 5660 + }, + { + "epoch": 0.8341355599214145, + "grad_norm": 0.5804771184921265, + "learning_rate": 4.12655152343643e-06, + "loss": 0.6084, + "step": 5661 + }, + { + "epoch": 0.8342829076620825, + "grad_norm": 0.5871742963790894, + "learning_rate": 4.126257085045164e-06, + "loss": 0.5485, + "step": 5662 + }, + { + "epoch": 0.8344302554027505, + "grad_norm": 0.5986305475234985, + "learning_rate": 4.125962607543177e-06, + "loss": 0.5997, + "step": 5663 + }, + { + "epoch": 0.8345776031434184, + "grad_norm": 0.5681033730506897, + "learning_rate": 4.125668090937552e-06, + "loss": 0.527, + "step": 5664 + }, + { + "epoch": 0.8347249508840865, + "grad_norm": 0.5767742395401001, + "learning_rate": 4.1253735352353716e-06, + "loss": 0.5576, + "step": 5665 + }, + { + "epoch": 0.8348722986247544, + "grad_norm": 0.562311053276062, + "learning_rate": 4.125078940443721e-06, + "loss": 0.5672, + "step": 5666 + }, + { + "epoch": 0.8350196463654224, + "grad_norm": 0.6140784621238708, + "learning_rate": 4.124784306569683e-06, + "loss": 0.5498, + "step": 5667 + }, + { + "epoch": 0.8351669941060904, + "grad_norm": 0.6009570956230164, + "learning_rate": 4.124489633620345e-06, + "loss": 0.5614, + "step": 5668 + }, + { + "epoch": 0.8353143418467583, + "grad_norm": 0.6009500622749329, + "learning_rate": 4.1241949216027935e-06, + "loss": 0.5619, + "step": 5669 + }, + { + "epoch": 0.8354616895874263, + "grad_norm": 0.6016167998313904, + "learning_rate": 4.123900170524115e-06, + "loss": 0.5807, + "step": 5670 + }, + { + "epoch": 0.8356090373280943, + "grad_norm": 0.626898467540741, + "learning_rate": 4.1236053803914e-06, + "loss": 0.5641, + "step": 5671 + }, + { + "epoch": 0.8357563850687623, + "grad_norm": 0.5813217163085938, + "learning_rate": 4.123310551211737e-06, + "loss": 0.5892, + "step": 5672 + }, + { + "epoch": 0.8359037328094302, + "grad_norm": 0.5675024390220642, + "learning_rate": 4.123015682992216e-06, + "loss": 0.5528, + "step": 5673 + }, + { + "epoch": 0.8360510805500982, + "grad_norm": 0.6106071472167969, + "learning_rate": 4.12272077573993e-06, + "loss": 0.5558, + "step": 5674 + }, + { + "epoch": 0.8361984282907662, + "grad_norm": 0.5772626996040344, + "learning_rate": 4.122425829461969e-06, + "loss": 0.5715, + "step": 5675 + }, + { + "epoch": 0.8363457760314342, + "grad_norm": 0.5711527466773987, + "learning_rate": 4.122130844165429e-06, + "loss": 0.5757, + "step": 5676 + }, + { + "epoch": 0.8364931237721022, + "grad_norm": 0.5617825388908386, + "learning_rate": 4.121835819857403e-06, + "loss": 0.5803, + "step": 5677 + }, + { + "epoch": 0.8366404715127701, + "grad_norm": 0.5496557354927063, + "learning_rate": 4.121540756544986e-06, + "loss": 0.5315, + "step": 5678 + }, + { + "epoch": 0.8367878192534381, + "grad_norm": 0.5931736826896667, + "learning_rate": 4.121245654235274e-06, + "loss": 0.5711, + "step": 5679 + }, + { + "epoch": 0.8369351669941061, + "grad_norm": 0.5851522088050842, + "learning_rate": 4.120950512935364e-06, + "loss": 0.5827, + "step": 5680 + }, + { + "epoch": 0.837082514734774, + "grad_norm": 0.568281352519989, + "learning_rate": 4.1206553326523555e-06, + "loss": 0.5342, + "step": 5681 + }, + { + "epoch": 0.8372298624754421, + "grad_norm": 0.5442686676979065, + "learning_rate": 4.120360113393346e-06, + "loss": 0.5799, + "step": 5682 + }, + { + "epoch": 0.83737721021611, + "grad_norm": 0.5758512616157532, + "learning_rate": 4.120064855165436e-06, + "loss": 0.5165, + "step": 5683 + }, + { + "epoch": 0.837524557956778, + "grad_norm": 0.572907030582428, + "learning_rate": 4.119769557975726e-06, + "loss": 0.585, + "step": 5684 + }, + { + "epoch": 0.837671905697446, + "grad_norm": 0.5880909562110901, + "learning_rate": 4.119474221831318e-06, + "loss": 0.5817, + "step": 5685 + }, + { + "epoch": 0.8378192534381139, + "grad_norm": 0.5490662455558777, + "learning_rate": 4.119178846739315e-06, + "loss": 0.5111, + "step": 5686 + }, + { + "epoch": 0.837966601178782, + "grad_norm": 0.5619904398918152, + "learning_rate": 4.118883432706819e-06, + "loss": 0.5577, + "step": 5687 + }, + { + "epoch": 0.8381139489194499, + "grad_norm": 0.5881204009056091, + "learning_rate": 4.1185879797409365e-06, + "loss": 0.5604, + "step": 5688 + }, + { + "epoch": 0.8382612966601178, + "grad_norm": 0.5595253109931946, + "learning_rate": 4.1182924878487715e-06, + "loss": 0.5632, + "step": 5689 + }, + { + "epoch": 0.8384086444007859, + "grad_norm": 0.5563174486160278, + "learning_rate": 4.117996957037431e-06, + "loss": 0.5735, + "step": 5690 + }, + { + "epoch": 0.8385559921414538, + "grad_norm": 0.5702851414680481, + "learning_rate": 4.1177013873140235e-06, + "loss": 0.565, + "step": 5691 + }, + { + "epoch": 0.8387033398821218, + "grad_norm": 0.5760751962661743, + "learning_rate": 4.117405778685656e-06, + "loss": 0.5822, + "step": 5692 + }, + { + "epoch": 0.8388506876227898, + "grad_norm": 0.5900784730911255, + "learning_rate": 4.117110131159438e-06, + "loss": 0.574, + "step": 5693 + }, + { + "epoch": 0.8389980353634577, + "grad_norm": 0.5504359006881714, + "learning_rate": 4.116814444742481e-06, + "loss": 0.5436, + "step": 5694 + }, + { + "epoch": 0.8391453831041258, + "grad_norm": 0.5717391967773438, + "learning_rate": 4.116518719441893e-06, + "loss": 0.5562, + "step": 5695 + }, + { + "epoch": 0.8392927308447937, + "grad_norm": 0.5760939717292786, + "learning_rate": 4.116222955264789e-06, + "loss": 0.5412, + "step": 5696 + }, + { + "epoch": 0.8394400785854617, + "grad_norm": 0.5749611258506775, + "learning_rate": 4.115927152218281e-06, + "loss": 0.5713, + "step": 5697 + }, + { + "epoch": 0.8395874263261297, + "grad_norm": 0.5889787673950195, + "learning_rate": 4.115631310309483e-06, + "loss": 0.5803, + "step": 5698 + }, + { + "epoch": 0.8397347740667976, + "grad_norm": 0.5927038192749023, + "learning_rate": 4.11533542954551e-06, + "loss": 0.5747, + "step": 5699 + }, + { + "epoch": 0.8398821218074656, + "grad_norm": 0.581001877784729, + "learning_rate": 4.115039509933477e-06, + "loss": 0.5674, + "step": 5700 + }, + { + "epoch": 0.8400294695481336, + "grad_norm": 0.5905871391296387, + "learning_rate": 4.114743551480501e-06, + "loss": 0.5334, + "step": 5701 + }, + { + "epoch": 0.8401768172888016, + "grad_norm": 0.5613478422164917, + "learning_rate": 4.1144475541937015e-06, + "loss": 0.516, + "step": 5702 + }, + { + "epoch": 0.8403241650294695, + "grad_norm": 0.5800443291664124, + "learning_rate": 4.114151518080194e-06, + "loss": 0.5578, + "step": 5703 + }, + { + "epoch": 0.8404715127701375, + "grad_norm": 0.5851225256919861, + "learning_rate": 4.1138554431471e-06, + "loss": 0.5875, + "step": 5704 + }, + { + "epoch": 0.8406188605108055, + "grad_norm": 0.6035647988319397, + "learning_rate": 4.113559329401539e-06, + "loss": 0.5542, + "step": 5705 + }, + { + "epoch": 0.8407662082514735, + "grad_norm": 0.5674980282783508, + "learning_rate": 4.113263176850634e-06, + "loss": 0.546, + "step": 5706 + }, + { + "epoch": 0.8409135559921415, + "grad_norm": 0.5514079332351685, + "learning_rate": 4.112966985501506e-06, + "loss": 0.5664, + "step": 5707 + }, + { + "epoch": 0.8410609037328094, + "grad_norm": 0.5505837798118591, + "learning_rate": 4.112670755361278e-06, + "loss": 0.5497, + "step": 5708 + }, + { + "epoch": 0.8412082514734774, + "grad_norm": 0.6005622744560242, + "learning_rate": 4.1123744864370756e-06, + "loss": 0.5886, + "step": 5709 + }, + { + "epoch": 0.8413555992141454, + "grad_norm": 0.6450445652008057, + "learning_rate": 4.112078178736022e-06, + "loss": 0.5637, + "step": 5710 + }, + { + "epoch": 0.8415029469548133, + "grad_norm": 0.5886719822883606, + "learning_rate": 4.1117818322652445e-06, + "loss": 0.5322, + "step": 5711 + }, + { + "epoch": 0.8416502946954814, + "grad_norm": 0.620814323425293, + "learning_rate": 4.11148544703187e-06, + "loss": 0.5369, + "step": 5712 + }, + { + "epoch": 0.8417976424361493, + "grad_norm": 0.5827550292015076, + "learning_rate": 4.111189023043027e-06, + "loss": 0.5547, + "step": 5713 + }, + { + "epoch": 0.8419449901768173, + "grad_norm": 0.6018425226211548, + "learning_rate": 4.110892560305843e-06, + "loss": 0.5369, + "step": 5714 + }, + { + "epoch": 0.8420923379174853, + "grad_norm": 0.5684431791305542, + "learning_rate": 4.110596058827449e-06, + "loss": 0.5803, + "step": 5715 + }, + { + "epoch": 0.8422396856581532, + "grad_norm": 0.5876915454864502, + "learning_rate": 4.110299518614974e-06, + "loss": 0.5733, + "step": 5716 + }, + { + "epoch": 0.8423870333988213, + "grad_norm": 0.59451824426651, + "learning_rate": 4.110002939675552e-06, + "loss": 0.5598, + "step": 5717 + }, + { + "epoch": 0.8425343811394892, + "grad_norm": 0.6002209782600403, + "learning_rate": 4.109706322016314e-06, + "loss": 0.5592, + "step": 5718 + }, + { + "epoch": 0.8426817288801571, + "grad_norm": 0.5869345664978027, + "learning_rate": 4.1094096656443945e-06, + "loss": 0.5584, + "step": 5719 + }, + { + "epoch": 0.8428290766208252, + "grad_norm": 0.6243475079536438, + "learning_rate": 4.109112970566927e-06, + "loss": 0.5695, + "step": 5720 + }, + { + "epoch": 0.8429764243614931, + "grad_norm": 0.5978853106498718, + "learning_rate": 4.108816236791048e-06, + "loss": 0.5477, + "step": 5721 + }, + { + "epoch": 0.843123772102161, + "grad_norm": 0.5768588185310364, + "learning_rate": 4.108519464323892e-06, + "loss": 0.5585, + "step": 5722 + }, + { + "epoch": 0.8432711198428291, + "grad_norm": 0.5600433349609375, + "learning_rate": 4.108222653172598e-06, + "loss": 0.5325, + "step": 5723 + }, + { + "epoch": 0.843418467583497, + "grad_norm": 0.5787793397903442, + "learning_rate": 4.1079258033443026e-06, + "loss": 0.5913, + "step": 5724 + }, + { + "epoch": 0.8435658153241651, + "grad_norm": 0.6040998101234436, + "learning_rate": 4.107628914846147e-06, + "loss": 0.5635, + "step": 5725 + }, + { + "epoch": 0.843713163064833, + "grad_norm": 0.578668475151062, + "learning_rate": 4.107331987685269e-06, + "loss": 0.5366, + "step": 5726 + }, + { + "epoch": 0.843860510805501, + "grad_norm": 0.568734347820282, + "learning_rate": 4.107035021868811e-06, + "loss": 0.601, + "step": 5727 + }, + { + "epoch": 0.844007858546169, + "grad_norm": 0.5670081377029419, + "learning_rate": 4.106738017403914e-06, + "loss": 0.5706, + "step": 5728 + }, + { + "epoch": 0.8441552062868369, + "grad_norm": 0.5695661306381226, + "learning_rate": 4.106440974297722e-06, + "loss": 0.5915, + "step": 5729 + }, + { + "epoch": 0.8443025540275049, + "grad_norm": 0.6104286313056946, + "learning_rate": 4.106143892557378e-06, + "loss": 0.5449, + "step": 5730 + }, + { + "epoch": 0.8444499017681729, + "grad_norm": 0.5609763264656067, + "learning_rate": 4.105846772190026e-06, + "loss": 0.5674, + "step": 5731 + }, + { + "epoch": 0.8445972495088409, + "grad_norm": 0.5715862512588501, + "learning_rate": 4.105549613202813e-06, + "loss": 0.5529, + "step": 5732 + }, + { + "epoch": 0.8447445972495088, + "grad_norm": 0.5843965411186218, + "learning_rate": 4.105252415602885e-06, + "loss": 0.5985, + "step": 5733 + }, + { + "epoch": 0.8448919449901768, + "grad_norm": 0.556862473487854, + "learning_rate": 4.104955179397389e-06, + "loss": 0.5629, + "step": 5734 + }, + { + "epoch": 0.8450392927308448, + "grad_norm": 0.5772679448127747, + "learning_rate": 4.104657904593473e-06, + "loss": 0.5825, + "step": 5735 + }, + { + "epoch": 0.8451866404715128, + "grad_norm": 0.5977596640586853, + "learning_rate": 4.104360591198288e-06, + "loss": 0.5661, + "step": 5736 + }, + { + "epoch": 0.8453339882121808, + "grad_norm": 0.5502188205718994, + "learning_rate": 4.104063239218983e-06, + "loss": 0.5169, + "step": 5737 + }, + { + "epoch": 0.8454813359528487, + "grad_norm": 0.5710992813110352, + "learning_rate": 4.1037658486627095e-06, + "loss": 0.5543, + "step": 5738 + }, + { + "epoch": 0.8456286836935167, + "grad_norm": 0.5950000882148743, + "learning_rate": 4.10346841953662e-06, + "loss": 0.5552, + "step": 5739 + }, + { + "epoch": 0.8457760314341847, + "grad_norm": 0.5620822310447693, + "learning_rate": 4.103170951847866e-06, + "loss": 0.5561, + "step": 5740 + }, + { + "epoch": 0.8459233791748526, + "grad_norm": 0.557816743850708, + "learning_rate": 4.1028734456036036e-06, + "loss": 0.5395, + "step": 5741 + }, + { + "epoch": 0.8460707269155207, + "grad_norm": 0.5418674349784851, + "learning_rate": 4.1025759008109855e-06, + "loss": 0.5217, + "step": 5742 + }, + { + "epoch": 0.8462180746561886, + "grad_norm": 0.5938374996185303, + "learning_rate": 4.10227831747717e-06, + "loss": 0.5797, + "step": 5743 + }, + { + "epoch": 0.8463654223968566, + "grad_norm": 0.5888987183570862, + "learning_rate": 4.101980695609311e-06, + "loss": 0.5416, + "step": 5744 + }, + { + "epoch": 0.8465127701375246, + "grad_norm": 0.5468157529830933, + "learning_rate": 4.101683035214568e-06, + "loss": 0.547, + "step": 5745 + }, + { + "epoch": 0.8466601178781925, + "grad_norm": 0.5774982571601868, + "learning_rate": 4.1013853363001e-06, + "loss": 0.5746, + "step": 5746 + }, + { + "epoch": 0.8468074656188606, + "grad_norm": 0.5771351456642151, + "learning_rate": 4.101087598873065e-06, + "loss": 0.586, + "step": 5747 + }, + { + "epoch": 0.8469548133595285, + "grad_norm": 0.5688709020614624, + "learning_rate": 4.100789822940625e-06, + "loss": 0.5575, + "step": 5748 + }, + { + "epoch": 0.8471021611001964, + "grad_norm": 0.5738121867179871, + "learning_rate": 4.10049200850994e-06, + "loss": 0.5847, + "step": 5749 + }, + { + "epoch": 0.8472495088408645, + "grad_norm": 0.5775041580200195, + "learning_rate": 4.100194155588173e-06, + "loss": 0.5751, + "step": 5750 + }, + { + "epoch": 0.8473968565815324, + "grad_norm": 0.5982654094696045, + "learning_rate": 4.099896264182488e-06, + "loss": 0.5722, + "step": 5751 + }, + { + "epoch": 0.8475442043222003, + "grad_norm": 0.5751017332077026, + "learning_rate": 4.099598334300047e-06, + "loss": 0.5378, + "step": 5752 + }, + { + "epoch": 0.8476915520628684, + "grad_norm": 0.5804681777954102, + "learning_rate": 4.0993003659480165e-06, + "loss": 0.5727, + "step": 5753 + }, + { + "epoch": 0.8478388998035363, + "grad_norm": 0.5872969031333923, + "learning_rate": 4.0990023591335625e-06, + "loss": 0.5694, + "step": 5754 + }, + { + "epoch": 0.8479862475442044, + "grad_norm": 0.5721479654312134, + "learning_rate": 4.098704313863852e-06, + "loss": 0.5997, + "step": 5755 + }, + { + "epoch": 0.8481335952848723, + "grad_norm": 0.607875645160675, + "learning_rate": 4.0984062301460526e-06, + "loss": 0.571, + "step": 5756 + }, + { + "epoch": 0.8482809430255402, + "grad_norm": 0.5826417803764343, + "learning_rate": 4.098108107987332e-06, + "loss": 0.5735, + "step": 5757 + }, + { + "epoch": 0.8484282907662083, + "grad_norm": 0.567381739616394, + "learning_rate": 4.097809947394862e-06, + "loss": 0.5915, + "step": 5758 + }, + { + "epoch": 0.8485756385068762, + "grad_norm": 0.5594774484634399, + "learning_rate": 4.097511748375812e-06, + "loss": 0.5729, + "step": 5759 + }, + { + "epoch": 0.8487229862475442, + "grad_norm": 0.5558671951293945, + "learning_rate": 4.097213510937353e-06, + "loss": 0.5697, + "step": 5760 + }, + { + "epoch": 0.8488703339882122, + "grad_norm": 0.6152036786079407, + "learning_rate": 4.096915235086659e-06, + "loss": 0.5695, + "step": 5761 + }, + { + "epoch": 0.8490176817288801, + "grad_norm": 0.5810125470161438, + "learning_rate": 4.096616920830903e-06, + "loss": 0.5719, + "step": 5762 + }, + { + "epoch": 0.8491650294695481, + "grad_norm": 0.6416929364204407, + "learning_rate": 4.096318568177258e-06, + "loss": 0.5538, + "step": 5763 + }, + { + "epoch": 0.8493123772102161, + "grad_norm": 0.5701199173927307, + "learning_rate": 4.0960201771329e-06, + "loss": 0.5689, + "step": 5764 + }, + { + "epoch": 0.8494597249508841, + "grad_norm": 0.6027393341064453, + "learning_rate": 4.0957217477050065e-06, + "loss": 0.5588, + "step": 5765 + }, + { + "epoch": 0.8496070726915521, + "grad_norm": 0.5962352156639099, + "learning_rate": 4.0954232799007535e-06, + "loss": 0.5757, + "step": 5766 + }, + { + "epoch": 0.84975442043222, + "grad_norm": 0.5883955955505371, + "learning_rate": 4.0951247737273175e-06, + "loss": 0.569, + "step": 5767 + }, + { + "epoch": 0.849901768172888, + "grad_norm": 0.6070334315299988, + "learning_rate": 4.0948262291918796e-06, + "loss": 0.5526, + "step": 5768 + }, + { + "epoch": 0.850049115913556, + "grad_norm": 0.5846845507621765, + "learning_rate": 4.094527646301618e-06, + "loss": 0.5991, + "step": 5769 + }, + { + "epoch": 0.850196463654224, + "grad_norm": 0.6046852469444275, + "learning_rate": 4.094229025063717e-06, + "loss": 0.5545, + "step": 5770 + }, + { + "epoch": 0.8503438113948919, + "grad_norm": 0.5782310366630554, + "learning_rate": 4.093930365485352e-06, + "loss": 0.5745, + "step": 5771 + }, + { + "epoch": 0.85049115913556, + "grad_norm": 0.5584620237350464, + "learning_rate": 4.093631667573712e-06, + "loss": 0.597, + "step": 5772 + }, + { + "epoch": 0.8506385068762279, + "grad_norm": 0.5429295301437378, + "learning_rate": 4.093332931335977e-06, + "loss": 0.5618, + "step": 5773 + }, + { + "epoch": 0.8507858546168958, + "grad_norm": 0.5802679657936096, + "learning_rate": 4.093034156779333e-06, + "loss": 0.5741, + "step": 5774 + }, + { + "epoch": 0.8509332023575639, + "grad_norm": 0.5862051248550415, + "learning_rate": 4.092735343910964e-06, + "loss": 0.5501, + "step": 5775 + }, + { + "epoch": 0.8510805500982318, + "grad_norm": 0.5637308359146118, + "learning_rate": 4.092436492738057e-06, + "loss": 0.5695, + "step": 5776 + }, + { + "epoch": 0.8512278978388998, + "grad_norm": 0.5639547109603882, + "learning_rate": 4.092137603267799e-06, + "loss": 0.5681, + "step": 5777 + }, + { + "epoch": 0.8513752455795678, + "grad_norm": 0.5402106642723083, + "learning_rate": 4.091838675507379e-06, + "loss": 0.527, + "step": 5778 + }, + { + "epoch": 0.8515225933202357, + "grad_norm": 0.614039421081543, + "learning_rate": 4.091539709463986e-06, + "loss": 0.5548, + "step": 5779 + }, + { + "epoch": 0.8516699410609038, + "grad_norm": 0.6077301502227783, + "learning_rate": 4.0912407051448075e-06, + "loss": 0.5563, + "step": 5780 + }, + { + "epoch": 0.8518172888015717, + "grad_norm": 0.559733510017395, + "learning_rate": 4.0909416625570375e-06, + "loss": 0.5769, + "step": 5781 + }, + { + "epoch": 0.8519646365422396, + "grad_norm": 0.6097682118415833, + "learning_rate": 4.090642581707867e-06, + "loss": 0.5687, + "step": 5782 + }, + { + "epoch": 0.8521119842829077, + "grad_norm": 0.5676030516624451, + "learning_rate": 4.090343462604488e-06, + "loss": 0.5462, + "step": 5783 + }, + { + "epoch": 0.8522593320235756, + "grad_norm": 0.6157848834991455, + "learning_rate": 4.090044305254095e-06, + "loss": 0.5707, + "step": 5784 + }, + { + "epoch": 0.8524066797642437, + "grad_norm": 0.5557563304901123, + "learning_rate": 4.089745109663882e-06, + "loss": 0.5227, + "step": 5785 + }, + { + "epoch": 0.8525540275049116, + "grad_norm": 0.575340747833252, + "learning_rate": 4.089445875841045e-06, + "loss": 0.5741, + "step": 5786 + }, + { + "epoch": 0.8527013752455795, + "grad_norm": 0.5812382698059082, + "learning_rate": 4.08914660379278e-06, + "loss": 0.5486, + "step": 5787 + }, + { + "epoch": 0.8528487229862476, + "grad_norm": 0.5829058289527893, + "learning_rate": 4.0888472935262845e-06, + "loss": 0.554, + "step": 5788 + }, + { + "epoch": 0.8529960707269155, + "grad_norm": 0.6166400909423828, + "learning_rate": 4.0885479450487566e-06, + "loss": 0.5557, + "step": 5789 + }, + { + "epoch": 0.8531434184675835, + "grad_norm": 0.5816385746002197, + "learning_rate": 4.0882485583673966e-06, + "loss": 0.5688, + "step": 5790 + }, + { + "epoch": 0.8532907662082515, + "grad_norm": 0.5765778422355652, + "learning_rate": 4.087949133489403e-06, + "loss": 0.5611, + "step": 5791 + }, + { + "epoch": 0.8534381139489194, + "grad_norm": 0.5816887021064758, + "learning_rate": 4.087649670421978e-06, + "loss": 0.5922, + "step": 5792 + }, + { + "epoch": 0.8535854616895874, + "grad_norm": 0.5969111919403076, + "learning_rate": 4.087350169172322e-06, + "loss": 0.5653, + "step": 5793 + }, + { + "epoch": 0.8537328094302554, + "grad_norm": 0.5723377466201782, + "learning_rate": 4.08705062974764e-06, + "loss": 0.5782, + "step": 5794 + }, + { + "epoch": 0.8538801571709234, + "grad_norm": 0.5803005695343018, + "learning_rate": 4.086751052155134e-06, + "loss": 0.548, + "step": 5795 + }, + { + "epoch": 0.8540275049115914, + "grad_norm": 0.5862765312194824, + "learning_rate": 4.0864514364020105e-06, + "loss": 0.5596, + "step": 5796 + }, + { + "epoch": 0.8541748526522593, + "grad_norm": 0.5518089532852173, + "learning_rate": 4.0861517824954725e-06, + "loss": 0.5552, + "step": 5797 + }, + { + "epoch": 0.8543222003929273, + "grad_norm": 0.562146782875061, + "learning_rate": 4.085852090442729e-06, + "loss": 0.5461, + "step": 5798 + }, + { + "epoch": 0.8544695481335953, + "grad_norm": 0.5977123379707336, + "learning_rate": 4.085552360250987e-06, + "loss": 0.5215, + "step": 5799 + }, + { + "epoch": 0.8546168958742633, + "grad_norm": 0.5833629965782166, + "learning_rate": 4.085252591927453e-06, + "loss": 0.563, + "step": 5800 + }, + { + "epoch": 0.8547642436149312, + "grad_norm": 0.6061369776725769, + "learning_rate": 4.084952785479339e-06, + "loss": 0.5739, + "step": 5801 + }, + { + "epoch": 0.8549115913555992, + "grad_norm": 0.5895542502403259, + "learning_rate": 4.084652940913854e-06, + "loss": 0.5802, + "step": 5802 + }, + { + "epoch": 0.8550589390962672, + "grad_norm": 0.5613864064216614, + "learning_rate": 4.084353058238208e-06, + "loss": 0.5397, + "step": 5803 + }, + { + "epoch": 0.8552062868369351, + "grad_norm": 0.6262488961219788, + "learning_rate": 4.084053137459615e-06, + "loss": 0.5884, + "step": 5804 + }, + { + "epoch": 0.8553536345776032, + "grad_norm": 0.5755891799926758, + "learning_rate": 4.083753178585286e-06, + "loss": 0.5316, + "step": 5805 + }, + { + "epoch": 0.8555009823182711, + "grad_norm": 0.5833985805511475, + "learning_rate": 4.0834531816224365e-06, + "loss": 0.5559, + "step": 5806 + }, + { + "epoch": 0.8556483300589391, + "grad_norm": 0.5817159414291382, + "learning_rate": 4.083153146578281e-06, + "loss": 0.5809, + "step": 5807 + }, + { + "epoch": 0.8557956777996071, + "grad_norm": 0.5609080195426941, + "learning_rate": 4.082853073460034e-06, + "loss": 0.5517, + "step": 5808 + }, + { + "epoch": 0.855943025540275, + "grad_norm": 0.5369923710823059, + "learning_rate": 4.082552962274913e-06, + "loss": 0.591, + "step": 5809 + }, + { + "epoch": 0.8560903732809431, + "grad_norm": 0.5822405219078064, + "learning_rate": 4.082252813030136e-06, + "loss": 0.5711, + "step": 5810 + }, + { + "epoch": 0.856237721021611, + "grad_norm": 0.5877491235733032, + "learning_rate": 4.081952625732921e-06, + "loss": 0.588, + "step": 5811 + }, + { + "epoch": 0.8563850687622789, + "grad_norm": 0.6010174751281738, + "learning_rate": 4.081652400390488e-06, + "loss": 0.5668, + "step": 5812 + }, + { + "epoch": 0.856532416502947, + "grad_norm": 0.6367900967597961, + "learning_rate": 4.081352137010055e-06, + "loss": 0.5416, + "step": 5813 + }, + { + "epoch": 0.8566797642436149, + "grad_norm": 0.5748488306999207, + "learning_rate": 4.081051835598846e-06, + "loss": 0.5877, + "step": 5814 + }, + { + "epoch": 0.856827111984283, + "grad_norm": 0.5637363791465759, + "learning_rate": 4.0807514961640815e-06, + "loss": 0.5532, + "step": 5815 + }, + { + "epoch": 0.8569744597249509, + "grad_norm": 0.5918633937835693, + "learning_rate": 4.080451118712985e-06, + "loss": 0.5573, + "step": 5816 + }, + { + "epoch": 0.8571218074656188, + "grad_norm": 0.5759455561637878, + "learning_rate": 4.080150703252781e-06, + "loss": 0.5113, + "step": 5817 + }, + { + "epoch": 0.8572691552062869, + "grad_norm": 0.5709832906723022, + "learning_rate": 4.0798502497906925e-06, + "loss": 0.5576, + "step": 5818 + }, + { + "epoch": 0.8574165029469548, + "grad_norm": 0.6126560568809509, + "learning_rate": 4.079549758333947e-06, + "loss": 0.5276, + "step": 5819 + }, + { + "epoch": 0.8575638506876228, + "grad_norm": 0.5739246606826782, + "learning_rate": 4.079249228889771e-06, + "loss": 0.5842, + "step": 5820 + }, + { + "epoch": 0.8577111984282908, + "grad_norm": 0.582832396030426, + "learning_rate": 4.0789486614653915e-06, + "loss": 0.5554, + "step": 5821 + }, + { + "epoch": 0.8578585461689587, + "grad_norm": 0.585044264793396, + "learning_rate": 4.078648056068037e-06, + "loss": 0.5597, + "step": 5822 + }, + { + "epoch": 0.8580058939096267, + "grad_norm": 0.5528361201286316, + "learning_rate": 4.078347412704937e-06, + "loss": 0.5476, + "step": 5823 + }, + { + "epoch": 0.8581532416502947, + "grad_norm": 0.5724666118621826, + "learning_rate": 4.078046731383323e-06, + "loss": 0.5521, + "step": 5824 + }, + { + "epoch": 0.8583005893909627, + "grad_norm": 0.5722667574882507, + "learning_rate": 4.077746012110425e-06, + "loss": 0.5237, + "step": 5825 + }, + { + "epoch": 0.8584479371316307, + "grad_norm": 0.5572859048843384, + "learning_rate": 4.077445254893475e-06, + "loss": 0.5668, + "step": 5826 + }, + { + "epoch": 0.8585952848722986, + "grad_norm": 0.5671130418777466, + "learning_rate": 4.077144459739706e-06, + "loss": 0.5464, + "step": 5827 + }, + { + "epoch": 0.8587426326129666, + "grad_norm": 0.5363937020301819, + "learning_rate": 4.076843626656354e-06, + "loss": 0.5517, + "step": 5828 + }, + { + "epoch": 0.8588899803536346, + "grad_norm": 0.5859374403953552, + "learning_rate": 4.076542755650651e-06, + "loss": 0.5303, + "step": 5829 + }, + { + "epoch": 0.8590373280943026, + "grad_norm": 0.558892011642456, + "learning_rate": 4.0762418467298345e-06, + "loss": 0.5559, + "step": 5830 + }, + { + "epoch": 0.8591846758349705, + "grad_norm": 0.5808870792388916, + "learning_rate": 4.075940899901141e-06, + "loss": 0.567, + "step": 5831 + }, + { + "epoch": 0.8593320235756385, + "grad_norm": 0.5878781676292419, + "learning_rate": 4.075639915171808e-06, + "loss": 0.5594, + "step": 5832 + }, + { + "epoch": 0.8594793713163065, + "grad_norm": 0.5518795847892761, + "learning_rate": 4.0753388925490745e-06, + "loss": 0.5608, + "step": 5833 + }, + { + "epoch": 0.8596267190569744, + "grad_norm": 0.5685462355613708, + "learning_rate": 4.075037832040179e-06, + "loss": 0.545, + "step": 5834 + }, + { + "epoch": 0.8597740667976425, + "grad_norm": 0.5873472690582275, + "learning_rate": 4.074736733652362e-06, + "loss": 0.5201, + "step": 5835 + }, + { + "epoch": 0.8599214145383104, + "grad_norm": 0.595531702041626, + "learning_rate": 4.074435597392866e-06, + "loss": 0.57, + "step": 5836 + }, + { + "epoch": 0.8600687622789784, + "grad_norm": 0.616465151309967, + "learning_rate": 4.074134423268932e-06, + "loss": 0.5705, + "step": 5837 + }, + { + "epoch": 0.8602161100196464, + "grad_norm": 0.570146381855011, + "learning_rate": 4.073833211287803e-06, + "loss": 0.5973, + "step": 5838 + }, + { + "epoch": 0.8603634577603143, + "grad_norm": 0.5818800926208496, + "learning_rate": 4.073531961456724e-06, + "loss": 0.5433, + "step": 5839 + }, + { + "epoch": 0.8605108055009824, + "grad_norm": 0.5941011309623718, + "learning_rate": 4.073230673782939e-06, + "loss": 0.545, + "step": 5840 + }, + { + "epoch": 0.8606581532416503, + "grad_norm": 0.5620572566986084, + "learning_rate": 4.072929348273695e-06, + "loss": 0.5779, + "step": 5841 + }, + { + "epoch": 0.8608055009823182, + "grad_norm": 0.5876070857048035, + "learning_rate": 4.072627984936237e-06, + "loss": 0.5806, + "step": 5842 + }, + { + "epoch": 0.8609528487229863, + "grad_norm": 0.5874423980712891, + "learning_rate": 4.072326583777814e-06, + "loss": 0.5357, + "step": 5843 + }, + { + "epoch": 0.8611001964636542, + "grad_norm": 0.5968165993690491, + "learning_rate": 4.072025144805674e-06, + "loss": 0.5479, + "step": 5844 + }, + { + "epoch": 0.8612475442043221, + "grad_norm": 0.5536109209060669, + "learning_rate": 4.071723668027066e-06, + "loss": 0.55, + "step": 5845 + }, + { + "epoch": 0.8613948919449902, + "grad_norm": 0.5641182065010071, + "learning_rate": 4.0714221534492424e-06, + "loss": 0.5577, + "step": 5846 + }, + { + "epoch": 0.8615422396856581, + "grad_norm": 0.5794035196304321, + "learning_rate": 4.071120601079452e-06, + "loss": 0.5657, + "step": 5847 + }, + { + "epoch": 0.8616895874263262, + "grad_norm": 0.5593725442886353, + "learning_rate": 4.070819010924948e-06, + "loss": 0.5503, + "step": 5848 + }, + { + "epoch": 0.8618369351669941, + "grad_norm": 0.5907076597213745, + "learning_rate": 4.070517382992984e-06, + "loss": 0.5565, + "step": 5849 + }, + { + "epoch": 0.861984282907662, + "grad_norm": 0.5835578441619873, + "learning_rate": 4.070215717290813e-06, + "loss": 0.5689, + "step": 5850 + }, + { + "epoch": 0.8621316306483301, + "grad_norm": 0.5858766436576843, + "learning_rate": 4.069914013825691e-06, + "loss": 0.5803, + "step": 5851 + }, + { + "epoch": 0.862278978388998, + "grad_norm": 0.6292656660079956, + "learning_rate": 4.0696122726048734e-06, + "loss": 0.5607, + "step": 5852 + }, + { + "epoch": 0.862426326129666, + "grad_norm": 0.6095239520072937, + "learning_rate": 4.069310493635616e-06, + "loss": 0.5703, + "step": 5853 + }, + { + "epoch": 0.862573673870334, + "grad_norm": 0.6027410626411438, + "learning_rate": 4.0690086769251786e-06, + "loss": 0.5472, + "step": 5854 + }, + { + "epoch": 0.862721021611002, + "grad_norm": 0.5970966219902039, + "learning_rate": 4.068706822480818e-06, + "loss": 0.5523, + "step": 5855 + }, + { + "epoch": 0.86286836935167, + "grad_norm": 0.6073654294013977, + "learning_rate": 4.068404930309793e-06, + "loss": 0.5219, + "step": 5856 + }, + { + "epoch": 0.8630157170923379, + "grad_norm": 0.5688180923461914, + "learning_rate": 4.068103000419366e-06, + "loss": 0.5756, + "step": 5857 + }, + { + "epoch": 0.8631630648330059, + "grad_norm": 0.6012741327285767, + "learning_rate": 4.067801032816797e-06, + "loss": 0.551, + "step": 5858 + }, + { + "epoch": 0.8633104125736739, + "grad_norm": 0.617352306842804, + "learning_rate": 4.0674990275093484e-06, + "loss": 0.5759, + "step": 5859 + }, + { + "epoch": 0.8634577603143418, + "grad_norm": 0.5690360069274902, + "learning_rate": 4.067196984504284e-06, + "loss": 0.5584, + "step": 5860 + }, + { + "epoch": 0.8636051080550098, + "grad_norm": 0.6167478561401367, + "learning_rate": 4.066894903808867e-06, + "loss": 0.5762, + "step": 5861 + }, + { + "epoch": 0.8637524557956778, + "grad_norm": 0.6351656913757324, + "learning_rate": 4.066592785430362e-06, + "loss": 0.5939, + "step": 5862 + }, + { + "epoch": 0.8638998035363458, + "grad_norm": 0.573788046836853, + "learning_rate": 4.066290629376035e-06, + "loss": 0.5726, + "step": 5863 + }, + { + "epoch": 0.8640471512770137, + "grad_norm": 0.5900269150733948, + "learning_rate": 4.065988435653154e-06, + "loss": 0.5812, + "step": 5864 + }, + { + "epoch": 0.8641944990176817, + "grad_norm": 0.571159839630127, + "learning_rate": 4.065686204268986e-06, + "loss": 0.5745, + "step": 5865 + }, + { + "epoch": 0.8643418467583497, + "grad_norm": 0.5900415778160095, + "learning_rate": 4.065383935230798e-06, + "loss": 0.5678, + "step": 5866 + }, + { + "epoch": 0.8644891944990177, + "grad_norm": 0.5849247574806213, + "learning_rate": 4.065081628545861e-06, + "loss": 0.5566, + "step": 5867 + }, + { + "epoch": 0.8646365422396857, + "grad_norm": 0.5902476906776428, + "learning_rate": 4.064779284221445e-06, + "loss": 0.5282, + "step": 5868 + }, + { + "epoch": 0.8647838899803536, + "grad_norm": 0.5915306806564331, + "learning_rate": 4.0644769022648215e-06, + "loss": 0.5539, + "step": 5869 + }, + { + "epoch": 0.8649312377210217, + "grad_norm": 0.5665923953056335, + "learning_rate": 4.06417448268326e-06, + "loss": 0.573, + "step": 5870 + }, + { + "epoch": 0.8650785854616896, + "grad_norm": 0.6018277406692505, + "learning_rate": 4.0638720254840385e-06, + "loss": 0.5798, + "step": 5871 + }, + { + "epoch": 0.8652259332023575, + "grad_norm": 0.5698878765106201, + "learning_rate": 4.063569530674427e-06, + "loss": 0.5429, + "step": 5872 + }, + { + "epoch": 0.8653732809430256, + "grad_norm": 0.5848173499107361, + "learning_rate": 4.063266998261702e-06, + "loss": 0.5723, + "step": 5873 + }, + { + "epoch": 0.8655206286836935, + "grad_norm": 0.5721728205680847, + "learning_rate": 4.062964428253139e-06, + "loss": 0.5713, + "step": 5874 + }, + { + "epoch": 0.8656679764243614, + "grad_norm": 0.6076770424842834, + "learning_rate": 4.062661820656014e-06, + "loss": 0.5941, + "step": 5875 + }, + { + "epoch": 0.8658153241650295, + "grad_norm": 0.5654727220535278, + "learning_rate": 4.0623591754776055e-06, + "loss": 0.5945, + "step": 5876 + }, + { + "epoch": 0.8659626719056974, + "grad_norm": 0.5489534139633179, + "learning_rate": 4.062056492725193e-06, + "loss": 0.5583, + "step": 5877 + }, + { + "epoch": 0.8661100196463655, + "grad_norm": 0.5849024057388306, + "learning_rate": 4.061753772406053e-06, + "loss": 0.5726, + "step": 5878 + }, + { + "epoch": 0.8662573673870334, + "grad_norm": 0.5995901226997375, + "learning_rate": 4.061451014527467e-06, + "loss": 0.5553, + "step": 5879 + }, + { + "epoch": 0.8664047151277013, + "grad_norm": 0.6064639687538147, + "learning_rate": 4.061148219096718e-06, + "loss": 0.5506, + "step": 5880 + }, + { + "epoch": 0.8665520628683694, + "grad_norm": 0.5674570202827454, + "learning_rate": 4.060845386121085e-06, + "loss": 0.5535, + "step": 5881 + }, + { + "epoch": 0.8666994106090373, + "grad_norm": 0.5811032056808472, + "learning_rate": 4.0605425156078525e-06, + "loss": 0.5415, + "step": 5882 + }, + { + "epoch": 0.8668467583497053, + "grad_norm": 0.5755921006202698, + "learning_rate": 4.060239607564306e-06, + "loss": 0.5507, + "step": 5883 + }, + { + "epoch": 0.8669941060903733, + "grad_norm": 0.5672495365142822, + "learning_rate": 4.0599366619977275e-06, + "loss": 0.5769, + "step": 5884 + }, + { + "epoch": 0.8671414538310412, + "grad_norm": 0.5888561606407166, + "learning_rate": 4.059633678915404e-06, + "loss": 0.5848, + "step": 5885 + }, + { + "epoch": 0.8672888015717093, + "grad_norm": 0.6529759764671326, + "learning_rate": 4.059330658324622e-06, + "loss": 0.5597, + "step": 5886 + }, + { + "epoch": 0.8674361493123772, + "grad_norm": 0.6075409054756165, + "learning_rate": 4.0590276002326695e-06, + "loss": 0.5766, + "step": 5887 + }, + { + "epoch": 0.8675834970530452, + "grad_norm": 0.5889926552772522, + "learning_rate": 4.058724504646834e-06, + "loss": 0.5586, + "step": 5888 + }, + { + "epoch": 0.8677308447937132, + "grad_norm": 0.5830223560333252, + "learning_rate": 4.058421371574406e-06, + "loss": 0.5767, + "step": 5889 + }, + { + "epoch": 0.8678781925343811, + "grad_norm": 0.590015709400177, + "learning_rate": 4.058118201022675e-06, + "loss": 0.5635, + "step": 5890 + }, + { + "epoch": 0.8680255402750491, + "grad_norm": 0.5843978524208069, + "learning_rate": 4.057814992998931e-06, + "loss": 0.5389, + "step": 5891 + }, + { + "epoch": 0.8681728880157171, + "grad_norm": 0.5546496510505676, + "learning_rate": 4.057511747510467e-06, + "loss": 0.578, + "step": 5892 + }, + { + "epoch": 0.8683202357563851, + "grad_norm": 0.5782389044761658, + "learning_rate": 4.057208464564576e-06, + "loss": 0.5639, + "step": 5893 + }, + { + "epoch": 0.868467583497053, + "grad_norm": 0.596951425075531, + "learning_rate": 4.056905144168553e-06, + "loss": 0.5715, + "step": 5894 + }, + { + "epoch": 0.868614931237721, + "grad_norm": 0.6034998893737793, + "learning_rate": 4.056601786329691e-06, + "loss": 0.5646, + "step": 5895 + }, + { + "epoch": 0.868762278978389, + "grad_norm": 0.5640336871147156, + "learning_rate": 4.056298391055285e-06, + "loss": 0.5608, + "step": 5896 + }, + { + "epoch": 0.868909626719057, + "grad_norm": 0.588342010974884, + "learning_rate": 4.0559949583526346e-06, + "loss": 0.5747, + "step": 5897 + }, + { + "epoch": 0.869056974459725, + "grad_norm": 0.5756846070289612, + "learning_rate": 4.055691488229035e-06, + "loss": 0.5743, + "step": 5898 + }, + { + "epoch": 0.8692043222003929, + "grad_norm": 0.5612202882766724, + "learning_rate": 4.055387980691783e-06, + "loss": 0.5709, + "step": 5899 + }, + { + "epoch": 0.869351669941061, + "grad_norm": 0.619737446308136, + "learning_rate": 4.055084435748181e-06, + "loss": 0.5679, + "step": 5900 + }, + { + "epoch": 0.8694990176817289, + "grad_norm": 0.6015217304229736, + "learning_rate": 4.054780853405528e-06, + "loss": 0.5665, + "step": 5901 + }, + { + "epoch": 0.8696463654223968, + "grad_norm": 0.5690114498138428, + "learning_rate": 4.0544772336711235e-06, + "loss": 0.533, + "step": 5902 + }, + { + "epoch": 0.8697937131630649, + "grad_norm": 0.5780991315841675, + "learning_rate": 4.054173576552271e-06, + "loss": 0.5396, + "step": 5903 + }, + { + "epoch": 0.8699410609037328, + "grad_norm": 0.5992357134819031, + "learning_rate": 4.0538698820562736e-06, + "loss": 0.5788, + "step": 5904 + }, + { + "epoch": 0.8700884086444007, + "grad_norm": 0.5819776058197021, + "learning_rate": 4.053566150190433e-06, + "loss": 0.5679, + "step": 5905 + }, + { + "epoch": 0.8702357563850688, + "grad_norm": 0.5504255890846252, + "learning_rate": 4.053262380962057e-06, + "loss": 0.6026, + "step": 5906 + }, + { + "epoch": 0.8703831041257367, + "grad_norm": 0.5989416241645813, + "learning_rate": 4.052958574378448e-06, + "loss": 0.5613, + "step": 5907 + }, + { + "epoch": 0.8705304518664048, + "grad_norm": 0.5834305286407471, + "learning_rate": 4.052654730446914e-06, + "loss": 0.5747, + "step": 5908 + }, + { + "epoch": 0.8706777996070727, + "grad_norm": 0.5981047749519348, + "learning_rate": 4.052350849174762e-06, + "loss": 0.5728, + "step": 5909 + }, + { + "epoch": 0.8708251473477406, + "grad_norm": 0.5813505053520203, + "learning_rate": 4.0520469305693e-06, + "loss": 0.5228, + "step": 5910 + }, + { + "epoch": 0.8709724950884087, + "grad_norm": 0.584093451499939, + "learning_rate": 4.051742974637837e-06, + "loss": 0.5318, + "step": 5911 + }, + { + "epoch": 0.8711198428290766, + "grad_norm": 0.5942853689193726, + "learning_rate": 4.051438981387683e-06, + "loss": 0.5479, + "step": 5912 + }, + { + "epoch": 0.8712671905697446, + "grad_norm": 0.5489634275436401, + "learning_rate": 4.05113495082615e-06, + "loss": 0.5674, + "step": 5913 + }, + { + "epoch": 0.8714145383104126, + "grad_norm": 0.5817708373069763, + "learning_rate": 4.050830882960549e-06, + "loss": 0.5331, + "step": 5914 + }, + { + "epoch": 0.8715618860510805, + "grad_norm": 0.5659977793693542, + "learning_rate": 4.050526777798193e-06, + "loss": 0.586, + "step": 5915 + }, + { + "epoch": 0.8717092337917485, + "grad_norm": 0.6183599829673767, + "learning_rate": 4.050222635346395e-06, + "loss": 0.5831, + "step": 5916 + }, + { + "epoch": 0.8718565815324165, + "grad_norm": 0.571246862411499, + "learning_rate": 4.049918455612469e-06, + "loss": 0.5579, + "step": 5917 + }, + { + "epoch": 0.8720039292730845, + "grad_norm": 0.578670084476471, + "learning_rate": 4.0496142386037316e-06, + "loss": 0.5411, + "step": 5918 + }, + { + "epoch": 0.8721512770137525, + "grad_norm": 0.5608612895011902, + "learning_rate": 4.049309984327498e-06, + "loss": 0.5667, + "step": 5919 + }, + { + "epoch": 0.8722986247544204, + "grad_norm": 0.5846002101898193, + "learning_rate": 4.049005692791087e-06, + "loss": 0.5713, + "step": 5920 + }, + { + "epoch": 0.8724459724950884, + "grad_norm": 0.5806613564491272, + "learning_rate": 4.0487013640018145e-06, + "loss": 0.56, + "step": 5921 + }, + { + "epoch": 0.8725933202357564, + "grad_norm": 0.5927800536155701, + "learning_rate": 4.048396997967001e-06, + "loss": 0.5325, + "step": 5922 + }, + { + "epoch": 0.8727406679764244, + "grad_norm": 0.6234303712844849, + "learning_rate": 4.048092594693967e-06, + "loss": 0.5424, + "step": 5923 + }, + { + "epoch": 0.8728880157170923, + "grad_norm": 0.5833067297935486, + "learning_rate": 4.047788154190031e-06, + "loss": 0.5257, + "step": 5924 + }, + { + "epoch": 0.8730353634577603, + "grad_norm": 0.6006855964660645, + "learning_rate": 4.047483676462516e-06, + "loss": 0.5424, + "step": 5925 + }, + { + "epoch": 0.8731827111984283, + "grad_norm": 0.5804669260978699, + "learning_rate": 4.047179161518744e-06, + "loss": 0.5546, + "step": 5926 + }, + { + "epoch": 0.8733300589390963, + "grad_norm": 0.5721926093101501, + "learning_rate": 4.046874609366039e-06, + "loss": 0.5398, + "step": 5927 + }, + { + "epoch": 0.8734774066797643, + "grad_norm": 0.5730080008506775, + "learning_rate": 4.046570020011726e-06, + "loss": 0.5541, + "step": 5928 + }, + { + "epoch": 0.8736247544204322, + "grad_norm": 0.5899401903152466, + "learning_rate": 4.046265393463128e-06, + "loss": 0.5747, + "step": 5929 + }, + { + "epoch": 0.8737721021611002, + "grad_norm": 0.6027246117591858, + "learning_rate": 4.045960729727573e-06, + "loss": 0.5477, + "step": 5930 + }, + { + "epoch": 0.8739194499017682, + "grad_norm": 0.5697555541992188, + "learning_rate": 4.0456560288123895e-06, + "loss": 0.5448, + "step": 5931 + }, + { + "epoch": 0.8740667976424361, + "grad_norm": 0.6130381226539612, + "learning_rate": 4.045351290724901e-06, + "loss": 0.5767, + "step": 5932 + }, + { + "epoch": 0.8742141453831042, + "grad_norm": 0.5569694638252258, + "learning_rate": 4.04504651547244e-06, + "loss": 0.5672, + "step": 5933 + }, + { + "epoch": 0.8743614931237721, + "grad_norm": 0.6078464984893799, + "learning_rate": 4.044741703062335e-06, + "loss": 0.591, + "step": 5934 + }, + { + "epoch": 0.87450884086444, + "grad_norm": 0.5791886448860168, + "learning_rate": 4.0444368535019165e-06, + "loss": 0.5637, + "step": 5935 + }, + { + "epoch": 0.8746561886051081, + "grad_norm": 0.605648398399353, + "learning_rate": 4.044131966798515e-06, + "loss": 0.5573, + "step": 5936 + }, + { + "epoch": 0.874803536345776, + "grad_norm": 0.5871698260307312, + "learning_rate": 4.043827042959465e-06, + "loss": 0.5502, + "step": 5937 + }, + { + "epoch": 0.8749508840864441, + "grad_norm": 0.5848788619041443, + "learning_rate": 4.043522081992099e-06, + "loss": 0.5674, + "step": 5938 + }, + { + "epoch": 0.875098231827112, + "grad_norm": 0.6014954447746277, + "learning_rate": 4.04321708390375e-06, + "loss": 0.5854, + "step": 5939 + }, + { + "epoch": 0.8752455795677799, + "grad_norm": 0.6278068423271179, + "learning_rate": 4.042912048701754e-06, + "loss": 0.5765, + "step": 5940 + }, + { + "epoch": 0.875392927308448, + "grad_norm": 0.6218485832214355, + "learning_rate": 4.042606976393448e-06, + "loss": 0.5382, + "step": 5941 + }, + { + "epoch": 0.8755402750491159, + "grad_norm": 0.616398274898529, + "learning_rate": 4.042301866986166e-06, + "loss": 0.5929, + "step": 5942 + }, + { + "epoch": 0.8756876227897838, + "grad_norm": 0.5971825122833252, + "learning_rate": 4.0419967204872485e-06, + "loss": 0.5874, + "step": 5943 + }, + { + "epoch": 0.8758349705304519, + "grad_norm": 0.5894083380699158, + "learning_rate": 4.041691536904034e-06, + "loss": 0.6331, + "step": 5944 + }, + { + "epoch": 0.8759823182711198, + "grad_norm": 0.605607807636261, + "learning_rate": 4.041386316243859e-06, + "loss": 0.5569, + "step": 5945 + }, + { + "epoch": 0.8761296660117878, + "grad_norm": 0.6084697842597961, + "learning_rate": 4.0410810585140675e-06, + "loss": 0.5783, + "step": 5946 + }, + { + "epoch": 0.8762770137524558, + "grad_norm": 0.6061842441558838, + "learning_rate": 4.040775763721999e-06, + "loss": 0.602, + "step": 5947 + }, + { + "epoch": 0.8764243614931237, + "grad_norm": 0.590846061706543, + "learning_rate": 4.040470431874996e-06, + "loss": 0.5673, + "step": 5948 + }, + { + "epoch": 0.8765717092337918, + "grad_norm": 0.5716122388839722, + "learning_rate": 4.040165062980401e-06, + "loss": 0.5734, + "step": 5949 + }, + { + "epoch": 0.8767190569744597, + "grad_norm": 0.5508177280426025, + "learning_rate": 4.03985965704556e-06, + "loss": 0.5563, + "step": 5950 + }, + { + "epoch": 0.8768664047151277, + "grad_norm": 0.5922397971153259, + "learning_rate": 4.039554214077816e-06, + "loss": 0.5843, + "step": 5951 + }, + { + "epoch": 0.8770137524557957, + "grad_norm": 0.579257607460022, + "learning_rate": 4.039248734084515e-06, + "loss": 0.5767, + "step": 5952 + }, + { + "epoch": 0.8771611001964637, + "grad_norm": 0.571636438369751, + "learning_rate": 4.038943217073004e-06, + "loss": 0.5633, + "step": 5953 + }, + { + "epoch": 0.8773084479371316, + "grad_norm": 0.639074444770813, + "learning_rate": 4.03863766305063e-06, + "loss": 0.5568, + "step": 5954 + }, + { + "epoch": 0.8774557956777996, + "grad_norm": 0.5916700959205627, + "learning_rate": 4.038332072024743e-06, + "loss": 0.5375, + "step": 5955 + }, + { + "epoch": 0.8776031434184676, + "grad_norm": 0.6134429574012756, + "learning_rate": 4.038026444002691e-06, + "loss": 0.5665, + "step": 5956 + }, + { + "epoch": 0.8777504911591356, + "grad_norm": 0.5871884822845459, + "learning_rate": 4.037720778991823e-06, + "loss": 0.5982, + "step": 5957 + }, + { + "epoch": 0.8778978388998036, + "grad_norm": 0.5778781175613403, + "learning_rate": 4.037415076999493e-06, + "loss": 0.5861, + "step": 5958 + }, + { + "epoch": 0.8780451866404715, + "grad_norm": 0.5923734307289124, + "learning_rate": 4.037109338033051e-06, + "loss": 0.5742, + "step": 5959 + }, + { + "epoch": 0.8781925343811395, + "grad_norm": 0.5941122770309448, + "learning_rate": 4.03680356209985e-06, + "loss": 0.5771, + "step": 5960 + }, + { + "epoch": 0.8783398821218075, + "grad_norm": 0.597716748714447, + "learning_rate": 4.036497749207244e-06, + "loss": 0.5744, + "step": 5961 + }, + { + "epoch": 0.8784872298624754, + "grad_norm": 0.6312869191169739, + "learning_rate": 4.0361918993625885e-06, + "loss": 0.5624, + "step": 5962 + }, + { + "epoch": 0.8786345776031435, + "grad_norm": 0.5750976800918579, + "learning_rate": 4.035886012573238e-06, + "loss": 0.5574, + "step": 5963 + }, + { + "epoch": 0.8787819253438114, + "grad_norm": 0.5871099829673767, + "learning_rate": 4.035580088846549e-06, + "loss": 0.5591, + "step": 5964 + }, + { + "epoch": 0.8789292730844793, + "grad_norm": 0.5748763084411621, + "learning_rate": 4.03527412818988e-06, + "loss": 0.5826, + "step": 5965 + }, + { + "epoch": 0.8790766208251474, + "grad_norm": 0.5739413499832153, + "learning_rate": 4.034968130610587e-06, + "loss": 0.5633, + "step": 5966 + }, + { + "epoch": 0.8792239685658153, + "grad_norm": 0.5696526765823364, + "learning_rate": 4.034662096116031e-06, + "loss": 0.5862, + "step": 5967 + }, + { + "epoch": 0.8793713163064834, + "grad_norm": 0.6095146536827087, + "learning_rate": 4.0343560247135715e-06, + "loss": 0.5205, + "step": 5968 + }, + { + "epoch": 0.8795186640471513, + "grad_norm": 0.5643733143806458, + "learning_rate": 4.034049916410569e-06, + "loss": 0.5711, + "step": 5969 + }, + { + "epoch": 0.8796660117878192, + "grad_norm": 0.5869238376617432, + "learning_rate": 4.033743771214385e-06, + "loss": 0.5573, + "step": 5970 + }, + { + "epoch": 0.8798133595284873, + "grad_norm": 0.5956771373748779, + "learning_rate": 4.033437589132384e-06, + "loss": 0.5637, + "step": 5971 + }, + { + "epoch": 0.8799607072691552, + "grad_norm": 0.5892709493637085, + "learning_rate": 4.033131370171928e-06, + "loss": 0.6022, + "step": 5972 + }, + { + "epoch": 0.8801080550098231, + "grad_norm": 0.5857936143875122, + "learning_rate": 4.032825114340381e-06, + "loss": 0.5449, + "step": 5973 + }, + { + "epoch": 0.8802554027504912, + "grad_norm": 0.588574230670929, + "learning_rate": 4.032518821645109e-06, + "loss": 0.5608, + "step": 5974 + }, + { + "epoch": 0.8804027504911591, + "grad_norm": 0.5837409496307373, + "learning_rate": 4.032212492093479e-06, + "loss": 0.5572, + "step": 5975 + }, + { + "epoch": 0.8805500982318271, + "grad_norm": 0.5494445562362671, + "learning_rate": 4.031906125692856e-06, + "loss": 0.5405, + "step": 5976 + }, + { + "epoch": 0.8806974459724951, + "grad_norm": 0.5586562156677246, + "learning_rate": 4.03159972245061e-06, + "loss": 0.5751, + "step": 5977 + }, + { + "epoch": 0.880844793713163, + "grad_norm": 0.5844871401786804, + "learning_rate": 4.031293282374109e-06, + "loss": 0.5391, + "step": 5978 + }, + { + "epoch": 0.8809921414538311, + "grad_norm": 0.5630969405174255, + "learning_rate": 4.030986805470723e-06, + "loss": 0.5692, + "step": 5979 + }, + { + "epoch": 0.881139489194499, + "grad_norm": 0.5635632872581482, + "learning_rate": 4.030680291747822e-06, + "loss": 0.5682, + "step": 5980 + }, + { + "epoch": 0.881286836935167, + "grad_norm": 0.5480016469955444, + "learning_rate": 4.030373741212777e-06, + "loss": 0.5213, + "step": 5981 + }, + { + "epoch": 0.881434184675835, + "grad_norm": 0.5957310795783997, + "learning_rate": 4.030067153872963e-06, + "loss": 0.5884, + "step": 5982 + }, + { + "epoch": 0.8815815324165029, + "grad_norm": 0.5584052801132202, + "learning_rate": 4.02976052973575e-06, + "loss": 0.5771, + "step": 5983 + }, + { + "epoch": 0.8817288801571709, + "grad_norm": 0.5556562542915344, + "learning_rate": 4.029453868808515e-06, + "loss": 0.5586, + "step": 5984 + }, + { + "epoch": 0.8818762278978389, + "grad_norm": 0.562979519367218, + "learning_rate": 4.0291471710986304e-06, + "loss": 0.6042, + "step": 5985 + }, + { + "epoch": 0.8820235756385069, + "grad_norm": 0.6002570390701294, + "learning_rate": 4.028840436613475e-06, + "loss": 0.5721, + "step": 5986 + }, + { + "epoch": 0.8821709233791748, + "grad_norm": 0.5826911926269531, + "learning_rate": 4.028533665360424e-06, + "loss": 0.5755, + "step": 5987 + }, + { + "epoch": 0.8823182711198428, + "grad_norm": 0.5788297653198242, + "learning_rate": 4.028226857346854e-06, + "loss": 0.5649, + "step": 5988 + }, + { + "epoch": 0.8824656188605108, + "grad_norm": 0.5682358741760254, + "learning_rate": 4.027920012580147e-06, + "loss": 0.5906, + "step": 5989 + }, + { + "epoch": 0.8826129666011788, + "grad_norm": 0.5723706483840942, + "learning_rate": 4.0276131310676784e-06, + "loss": 0.5756, + "step": 5990 + }, + { + "epoch": 0.8827603143418468, + "grad_norm": 0.5726301074028015, + "learning_rate": 4.027306212816832e-06, + "loss": 0.5684, + "step": 5991 + }, + { + "epoch": 0.8829076620825147, + "grad_norm": 0.5850448608398438, + "learning_rate": 4.0269992578349855e-06, + "loss": 0.5529, + "step": 5992 + }, + { + "epoch": 0.8830550098231827, + "grad_norm": 0.5543481111526489, + "learning_rate": 4.0266922661295245e-06, + "loss": 0.5364, + "step": 5993 + }, + { + "epoch": 0.8832023575638507, + "grad_norm": 0.5813553333282471, + "learning_rate": 4.0263852377078305e-06, + "loss": 0.5533, + "step": 5994 + }, + { + "epoch": 0.8833497053045186, + "grad_norm": 0.5865628719329834, + "learning_rate": 4.026078172577287e-06, + "loss": 0.5385, + "step": 5995 + }, + { + "epoch": 0.8834970530451867, + "grad_norm": 0.5491152405738831, + "learning_rate": 4.0257710707452805e-06, + "loss": 0.5633, + "step": 5996 + }, + { + "epoch": 0.8836444007858546, + "grad_norm": 0.5711942315101624, + "learning_rate": 4.025463932219194e-06, + "loss": 0.5838, + "step": 5997 + }, + { + "epoch": 0.8837917485265226, + "grad_norm": 0.5831656455993652, + "learning_rate": 4.025156757006416e-06, + "loss": 0.5849, + "step": 5998 + }, + { + "epoch": 0.8839390962671906, + "grad_norm": 0.5740633606910706, + "learning_rate": 4.024849545114333e-06, + "loss": 0.5643, + "step": 5999 + }, + { + "epoch": 0.8840864440078585, + "grad_norm": 0.5893856287002563, + "learning_rate": 4.024542296550334e-06, + "loss": 0.58, + "step": 6000 + }, + { + "epoch": 0.8842337917485266, + "grad_norm": 0.5599595308303833, + "learning_rate": 4.024235011321808e-06, + "loss": 0.5374, + "step": 6001 + }, + { + "epoch": 0.8843811394891945, + "grad_norm": 0.6006614565849304, + "learning_rate": 4.023927689436144e-06, + "loss": 0.5328, + "step": 6002 + }, + { + "epoch": 0.8845284872298624, + "grad_norm": 0.6064037680625916, + "learning_rate": 4.023620330900734e-06, + "loss": 0.5592, + "step": 6003 + }, + { + "epoch": 0.8846758349705305, + "grad_norm": 0.6297599673271179, + "learning_rate": 4.02331293572297e-06, + "loss": 0.5905, + "step": 6004 + }, + { + "epoch": 0.8848231827111984, + "grad_norm": 0.5915156602859497, + "learning_rate": 4.023005503910244e-06, + "loss": 0.5502, + "step": 6005 + }, + { + "epoch": 0.8849705304518664, + "grad_norm": 0.5594606399536133, + "learning_rate": 4.022698035469951e-06, + "loss": 0.5377, + "step": 6006 + }, + { + "epoch": 0.8851178781925344, + "grad_norm": 0.5729222893714905, + "learning_rate": 4.022390530409484e-06, + "loss": 0.5649, + "step": 6007 + }, + { + "epoch": 0.8852652259332023, + "grad_norm": 0.5843609571456909, + "learning_rate": 4.022082988736238e-06, + "loss": 0.5499, + "step": 6008 + }, + { + "epoch": 0.8854125736738704, + "grad_norm": 0.578248143196106, + "learning_rate": 4.021775410457611e-06, + "loss": 0.5604, + "step": 6009 + }, + { + "epoch": 0.8855599214145383, + "grad_norm": 0.5983719825744629, + "learning_rate": 4.021467795580999e-06, + "loss": 0.5661, + "step": 6010 + }, + { + "epoch": 0.8857072691552063, + "grad_norm": 0.5892780423164368, + "learning_rate": 4.0211601441138e-06, + "loss": 0.5594, + "step": 6011 + }, + { + "epoch": 0.8858546168958743, + "grad_norm": 0.5763846039772034, + "learning_rate": 4.0208524560634125e-06, + "loss": 0.5818, + "step": 6012 + }, + { + "epoch": 0.8860019646365422, + "grad_norm": 0.609272301197052, + "learning_rate": 4.020544731437237e-06, + "loss": 0.5724, + "step": 6013 + }, + { + "epoch": 0.8861493123772102, + "grad_norm": 0.5687239170074463, + "learning_rate": 4.020236970242674e-06, + "loss": 0.5533, + "step": 6014 + }, + { + "epoch": 0.8862966601178782, + "grad_norm": 0.5906710624694824, + "learning_rate": 4.019929172487125e-06, + "loss": 0.5294, + "step": 6015 + }, + { + "epoch": 0.8864440078585462, + "grad_norm": 0.576718807220459, + "learning_rate": 4.019621338177993e-06, + "loss": 0.5835, + "step": 6016 + }, + { + "epoch": 0.8865913555992141, + "grad_norm": 0.5662139058113098, + "learning_rate": 4.0193134673226785e-06, + "loss": 0.5282, + "step": 6017 + }, + { + "epoch": 0.8867387033398821, + "grad_norm": 0.6011795401573181, + "learning_rate": 4.019005559928589e-06, + "loss": 0.5315, + "step": 6018 + }, + { + "epoch": 0.8868860510805501, + "grad_norm": 0.5903263688087463, + "learning_rate": 4.018697616003129e-06, + "loss": 0.5827, + "step": 6019 + }, + { + "epoch": 0.8870333988212181, + "grad_norm": 0.5952478051185608, + "learning_rate": 4.018389635553702e-06, + "loss": 0.5509, + "step": 6020 + }, + { + "epoch": 0.8871807465618861, + "grad_norm": 0.5739912986755371, + "learning_rate": 4.018081618587717e-06, + "loss": 0.534, + "step": 6021 + }, + { + "epoch": 0.887328094302554, + "grad_norm": 0.5570973753929138, + "learning_rate": 4.017773565112582e-06, + "loss": 0.5692, + "step": 6022 + }, + { + "epoch": 0.887475442043222, + "grad_norm": 0.5569357872009277, + "learning_rate": 4.017465475135704e-06, + "loss": 0.5695, + "step": 6023 + }, + { + "epoch": 0.88762278978389, + "grad_norm": 0.5756839513778687, + "learning_rate": 4.017157348664493e-06, + "loss": 0.5884, + "step": 6024 + }, + { + "epoch": 0.8877701375245579, + "grad_norm": 0.5717715620994568, + "learning_rate": 4.01684918570636e-06, + "loss": 0.5453, + "step": 6025 + }, + { + "epoch": 0.887917485265226, + "grad_norm": 0.5977330803871155, + "learning_rate": 4.016540986268714e-06, + "loss": 0.5401, + "step": 6026 + }, + { + "epoch": 0.8880648330058939, + "grad_norm": 0.562987744808197, + "learning_rate": 4.01623275035897e-06, + "loss": 0.5171, + "step": 6027 + }, + { + "epoch": 0.8882121807465619, + "grad_norm": 0.5704028606414795, + "learning_rate": 4.015924477984539e-06, + "loss": 0.5805, + "step": 6028 + }, + { + "epoch": 0.8883595284872299, + "grad_norm": 0.5734341144561768, + "learning_rate": 4.015616169152835e-06, + "loss": 0.595, + "step": 6029 + }, + { + "epoch": 0.8885068762278978, + "grad_norm": 0.5748011469841003, + "learning_rate": 4.015307823871274e-06, + "loss": 0.5667, + "step": 6030 + }, + { + "epoch": 0.8886542239685659, + "grad_norm": 0.5327093601226807, + "learning_rate": 4.014999442147269e-06, + "loss": 0.5338, + "step": 6031 + }, + { + "epoch": 0.8888015717092338, + "grad_norm": 0.5290953516960144, + "learning_rate": 4.01469102398824e-06, + "loss": 0.5456, + "step": 6032 + }, + { + "epoch": 0.8889489194499017, + "grad_norm": 0.5721867084503174, + "learning_rate": 4.014382569401601e-06, + "loss": 0.55, + "step": 6033 + }, + { + "epoch": 0.8890962671905698, + "grad_norm": 0.5871017575263977, + "learning_rate": 4.014074078394772e-06, + "loss": 0.5682, + "step": 6034 + }, + { + "epoch": 0.8892436149312377, + "grad_norm": 0.5887936949729919, + "learning_rate": 4.013765550975171e-06, + "loss": 0.5211, + "step": 6035 + }, + { + "epoch": 0.8893909626719056, + "grad_norm": 0.60042405128479, + "learning_rate": 4.0134569871502185e-06, + "loss": 0.5725, + "step": 6036 + }, + { + "epoch": 0.8895383104125737, + "grad_norm": 0.6040025949478149, + "learning_rate": 4.013148386927336e-06, + "loss": 0.5596, + "step": 6037 + }, + { + "epoch": 0.8896856581532416, + "grad_norm": 0.6228603720664978, + "learning_rate": 4.012839750313944e-06, + "loss": 0.5931, + "step": 6038 + }, + { + "epoch": 0.8898330058939097, + "grad_norm": 0.5624776482582092, + "learning_rate": 4.0125310773174655e-06, + "loss": 0.56, + "step": 6039 + }, + { + "epoch": 0.8899803536345776, + "grad_norm": 0.5811115503311157, + "learning_rate": 4.012222367945324e-06, + "loss": 0.558, + "step": 6040 + }, + { + "epoch": 0.8901277013752456, + "grad_norm": 0.5589559674263, + "learning_rate": 4.011913622204944e-06, + "loss": 0.5608, + "step": 6041 + }, + { + "epoch": 0.8902750491159136, + "grad_norm": 0.568010151386261, + "learning_rate": 4.01160484010375e-06, + "loss": 0.5681, + "step": 6042 + }, + { + "epoch": 0.8904223968565815, + "grad_norm": 0.5772536993026733, + "learning_rate": 4.011296021649169e-06, + "loss": 0.5563, + "step": 6043 + }, + { + "epoch": 0.8905697445972495, + "grad_norm": 0.5533536076545715, + "learning_rate": 4.0109871668486275e-06, + "loss": 0.5543, + "step": 6044 + }, + { + "epoch": 0.8907170923379175, + "grad_norm": 0.5950711965560913, + "learning_rate": 4.010678275709554e-06, + "loss": 0.5532, + "step": 6045 + }, + { + "epoch": 0.8908644400785855, + "grad_norm": 0.5799126625061035, + "learning_rate": 4.0103693482393765e-06, + "loss": 0.5735, + "step": 6046 + }, + { + "epoch": 0.8910117878192534, + "grad_norm": 0.6179525256156921, + "learning_rate": 4.0100603844455235e-06, + "loss": 0.5446, + "step": 6047 + }, + { + "epoch": 0.8911591355599214, + "grad_norm": 0.604509711265564, + "learning_rate": 4.0097513843354275e-06, + "loss": 0.5832, + "step": 6048 + }, + { + "epoch": 0.8913064833005894, + "grad_norm": 0.5933088660240173, + "learning_rate": 4.009442347916518e-06, + "loss": 0.54, + "step": 6049 + }, + { + "epoch": 0.8914538310412574, + "grad_norm": 0.598089337348938, + "learning_rate": 4.009133275196229e-06, + "loss": 0.5684, + "step": 6050 + }, + { + "epoch": 0.8916011787819254, + "grad_norm": 0.5761120915412903, + "learning_rate": 4.008824166181991e-06, + "loss": 0.5677, + "step": 6051 + }, + { + "epoch": 0.8917485265225933, + "grad_norm": 0.5675162672996521, + "learning_rate": 4.008515020881241e-06, + "loss": 0.5487, + "step": 6052 + }, + { + "epoch": 0.8918958742632613, + "grad_norm": 0.5838099122047424, + "learning_rate": 4.008205839301412e-06, + "loss": 0.536, + "step": 6053 + }, + { + "epoch": 0.8920432220039293, + "grad_norm": 0.6019006371498108, + "learning_rate": 4.007896621449939e-06, + "loss": 0.5748, + "step": 6054 + }, + { + "epoch": 0.8921905697445972, + "grad_norm": 0.5765785574913025, + "learning_rate": 4.00758736733426e-06, + "loss": 0.5308, + "step": 6055 + }, + { + "epoch": 0.8923379174852653, + "grad_norm": 0.589081883430481, + "learning_rate": 4.007278076961812e-06, + "loss": 0.5849, + "step": 6056 + }, + { + "epoch": 0.8924852652259332, + "grad_norm": 0.5840272903442383, + "learning_rate": 4.006968750340034e-06, + "loss": 0.59, + "step": 6057 + }, + { + "epoch": 0.8926326129666011, + "grad_norm": 0.569466769695282, + "learning_rate": 4.006659387476364e-06, + "loss": 0.5671, + "step": 6058 + }, + { + "epoch": 0.8927799607072692, + "grad_norm": 0.6010791063308716, + "learning_rate": 4.0063499883782414e-06, + "loss": 0.5731, + "step": 6059 + }, + { + "epoch": 0.8929273084479371, + "grad_norm": 0.5555022954940796, + "learning_rate": 4.0060405530531084e-06, + "loss": 0.5438, + "step": 6060 + }, + { + "epoch": 0.8930746561886052, + "grad_norm": 0.5792735815048218, + "learning_rate": 4.005731081508407e-06, + "loss": 0.577, + "step": 6061 + }, + { + "epoch": 0.8932220039292731, + "grad_norm": 0.6114909052848816, + "learning_rate": 4.005421573751579e-06, + "loss": 0.5453, + "step": 6062 + }, + { + "epoch": 0.893369351669941, + "grad_norm": 0.5701487064361572, + "learning_rate": 4.0051120297900695e-06, + "loss": 0.5465, + "step": 6063 + }, + { + "epoch": 0.8935166994106091, + "grad_norm": 0.5970178246498108, + "learning_rate": 4.00480244963132e-06, + "loss": 0.5483, + "step": 6064 + }, + { + "epoch": 0.893664047151277, + "grad_norm": 0.5419290065765381, + "learning_rate": 4.004492833282778e-06, + "loss": 0.5418, + "step": 6065 + }, + { + "epoch": 0.8938113948919449, + "grad_norm": 0.5619843602180481, + "learning_rate": 4.004183180751888e-06, + "loss": 0.5775, + "step": 6066 + }, + { + "epoch": 0.893958742632613, + "grad_norm": 0.6054506897926331, + "learning_rate": 4.0038734920461e-06, + "loss": 0.5484, + "step": 6067 + }, + { + "epoch": 0.8941060903732809, + "grad_norm": 0.5640261173248291, + "learning_rate": 4.003563767172859e-06, + "loss": 0.5743, + "step": 6068 + }, + { + "epoch": 0.894253438113949, + "grad_norm": 0.6179876327514648, + "learning_rate": 4.003254006139614e-06, + "loss": 0.5147, + "step": 6069 + }, + { + "epoch": 0.8944007858546169, + "grad_norm": 0.5633024573326111, + "learning_rate": 4.002944208953815e-06, + "loss": 0.546, + "step": 6070 + }, + { + "epoch": 0.8945481335952848, + "grad_norm": 0.578592836856842, + "learning_rate": 4.002634375622913e-06, + "loss": 0.5443, + "step": 6071 + }, + { + "epoch": 0.8946954813359529, + "grad_norm": 0.5740900039672852, + "learning_rate": 4.00232450615436e-06, + "loss": 0.5839, + "step": 6072 + }, + { + "epoch": 0.8948428290766208, + "grad_norm": 0.5680674314498901, + "learning_rate": 4.0020146005556074e-06, + "loss": 0.5699, + "step": 6073 + }, + { + "epoch": 0.8949901768172888, + "grad_norm": 0.5728430151939392, + "learning_rate": 4.001704658834107e-06, + "loss": 0.5788, + "step": 6074 + }, + { + "epoch": 0.8951375245579568, + "grad_norm": 0.5795100331306458, + "learning_rate": 4.001394680997314e-06, + "loss": 0.5784, + "step": 6075 + }, + { + "epoch": 0.8952848722986247, + "grad_norm": 0.5753192901611328, + "learning_rate": 4.001084667052684e-06, + "loss": 0.5378, + "step": 6076 + }, + { + "epoch": 0.8954322200392927, + "grad_norm": 0.5806611180305481, + "learning_rate": 4.00077461700767e-06, + "loss": 0.5702, + "step": 6077 + }, + { + "epoch": 0.8955795677799607, + "grad_norm": 0.5881778597831726, + "learning_rate": 4.000464530869732e-06, + "loss": 0.5712, + "step": 6078 + }, + { + "epoch": 0.8957269155206287, + "grad_norm": 0.602916419506073, + "learning_rate": 4.000154408646325e-06, + "loss": 0.5562, + "step": 6079 + }, + { + "epoch": 0.8958742632612967, + "grad_norm": 0.5804906487464905, + "learning_rate": 3.999844250344909e-06, + "loss": 0.5311, + "step": 6080 + }, + { + "epoch": 0.8960216110019646, + "grad_norm": 0.5764369368553162, + "learning_rate": 3.999534055972941e-06, + "loss": 0.5659, + "step": 6081 + }, + { + "epoch": 0.8961689587426326, + "grad_norm": 0.5567813515663147, + "learning_rate": 3.999223825537884e-06, + "loss": 0.5643, + "step": 6082 + }, + { + "epoch": 0.8963163064833006, + "grad_norm": 0.5691680908203125, + "learning_rate": 3.9989135590471955e-06, + "loss": 0.5571, + "step": 6083 + }, + { + "epoch": 0.8964636542239686, + "grad_norm": 0.5899155735969543, + "learning_rate": 3.99860325650834e-06, + "loss": 0.5821, + "step": 6084 + }, + { + "epoch": 0.8966110019646365, + "grad_norm": 0.5719903111457825, + "learning_rate": 3.998292917928778e-06, + "loss": 0.5394, + "step": 6085 + }, + { + "epoch": 0.8967583497053045, + "grad_norm": 0.5963704586029053, + "learning_rate": 3.997982543315975e-06, + "loss": 0.5609, + "step": 6086 + }, + { + "epoch": 0.8969056974459725, + "grad_norm": 0.6290658116340637, + "learning_rate": 3.997672132677393e-06, + "loss": 0.5537, + "step": 6087 + }, + { + "epoch": 0.8970530451866404, + "grad_norm": 0.5513363480567932, + "learning_rate": 3.997361686020501e-06, + "loss": 0.5271, + "step": 6088 + }, + { + "epoch": 0.8972003929273085, + "grad_norm": 0.5843978524208069, + "learning_rate": 3.997051203352762e-06, + "loss": 0.5531, + "step": 6089 + }, + { + "epoch": 0.8973477406679764, + "grad_norm": 0.6203262209892273, + "learning_rate": 3.996740684681643e-06, + "loss": 0.5434, + "step": 6090 + }, + { + "epoch": 0.8974950884086444, + "grad_norm": 0.581207275390625, + "learning_rate": 3.996430130014612e-06, + "loss": 0.562, + "step": 6091 + }, + { + "epoch": 0.8976424361493124, + "grad_norm": 0.5634926557540894, + "learning_rate": 3.99611953935914e-06, + "loss": 0.5619, + "step": 6092 + }, + { + "epoch": 0.8977897838899803, + "grad_norm": 0.5630001425743103, + "learning_rate": 3.995808912722694e-06, + "loss": 0.5569, + "step": 6093 + }, + { + "epoch": 0.8979371316306484, + "grad_norm": 0.5796622037887573, + "learning_rate": 3.995498250112745e-06, + "loss": 0.5436, + "step": 6094 + }, + { + "epoch": 0.8980844793713163, + "grad_norm": 0.568167507648468, + "learning_rate": 3.995187551536764e-06, + "loss": 0.5352, + "step": 6095 + }, + { + "epoch": 0.8982318271119842, + "grad_norm": 0.5770615339279175, + "learning_rate": 3.994876817002225e-06, + "loss": 0.5634, + "step": 6096 + }, + { + "epoch": 0.8983791748526523, + "grad_norm": 0.5642428994178772, + "learning_rate": 3.994566046516598e-06, + "loss": 0.5817, + "step": 6097 + }, + { + "epoch": 0.8985265225933202, + "grad_norm": 0.5812709927558899, + "learning_rate": 3.99425524008736e-06, + "loss": 0.5274, + "step": 6098 + }, + { + "epoch": 0.8986738703339883, + "grad_norm": 0.5930283665657043, + "learning_rate": 3.993944397721984e-06, + "loss": 0.5636, + "step": 6099 + }, + { + "epoch": 0.8988212180746562, + "grad_norm": 0.6527401208877563, + "learning_rate": 3.993633519427945e-06, + "loss": 0.5408, + "step": 6100 + }, + { + "epoch": 0.8989685658153241, + "grad_norm": 0.5803483724594116, + "learning_rate": 3.993322605212721e-06, + "loss": 0.5353, + "step": 6101 + }, + { + "epoch": 0.8991159135559922, + "grad_norm": 0.5996838212013245, + "learning_rate": 3.9930116550837895e-06, + "loss": 0.6064, + "step": 6102 + }, + { + "epoch": 0.8992632612966601, + "grad_norm": 0.5773062109947205, + "learning_rate": 3.992700669048628e-06, + "loss": 0.5566, + "step": 6103 + }, + { + "epoch": 0.8994106090373281, + "grad_norm": 0.5799014568328857, + "learning_rate": 3.992389647114715e-06, + "loss": 0.5359, + "step": 6104 + }, + { + "epoch": 0.8995579567779961, + "grad_norm": 0.5873996019363403, + "learning_rate": 3.992078589289531e-06, + "loss": 0.552, + "step": 6105 + }, + { + "epoch": 0.899705304518664, + "grad_norm": 0.5626732707023621, + "learning_rate": 3.991767495580556e-06, + "loss": 0.5732, + "step": 6106 + }, + { + "epoch": 0.899852652259332, + "grad_norm": 0.5664138197898865, + "learning_rate": 3.991456365995273e-06, + "loss": 0.562, + "step": 6107 + }, + { + "epoch": 0.9, + "grad_norm": 0.5757091641426086, + "learning_rate": 3.9911452005411645e-06, + "loss": 0.5337, + "step": 6108 + }, + { + "epoch": 0.900147347740668, + "grad_norm": 0.5772122144699097, + "learning_rate": 3.9908339992257124e-06, + "loss": 0.542, + "step": 6109 + }, + { + "epoch": 0.900294695481336, + "grad_norm": 0.5806431770324707, + "learning_rate": 3.990522762056403e-06, + "loss": 0.5708, + "step": 6110 + }, + { + "epoch": 0.9004420432220039, + "grad_norm": 0.5651638507843018, + "learning_rate": 3.9902114890407186e-06, + "loss": 0.5991, + "step": 6111 + }, + { + "epoch": 0.9005893909626719, + "grad_norm": 0.5728091597557068, + "learning_rate": 3.989900180186148e-06, + "loss": 0.5452, + "step": 6112 + }, + { + "epoch": 0.9007367387033399, + "grad_norm": 0.5674816966056824, + "learning_rate": 3.9895888355001765e-06, + "loss": 0.5738, + "step": 6113 + }, + { + "epoch": 0.9008840864440079, + "grad_norm": 0.5575186014175415, + "learning_rate": 3.989277454990292e-06, + "loss": 0.5847, + "step": 6114 + }, + { + "epoch": 0.9010314341846758, + "grad_norm": 0.5769307017326355, + "learning_rate": 3.988966038663984e-06, + "loss": 0.5306, + "step": 6115 + }, + { + "epoch": 0.9011787819253438, + "grad_norm": 0.5651907324790955, + "learning_rate": 3.988654586528741e-06, + "loss": 0.5384, + "step": 6116 + }, + { + "epoch": 0.9013261296660118, + "grad_norm": 0.589223325252533, + "learning_rate": 3.988343098592053e-06, + "loss": 0.5531, + "step": 6117 + }, + { + "epoch": 0.9014734774066797, + "grad_norm": 0.554950475692749, + "learning_rate": 3.9880315748614115e-06, + "loss": 0.5498, + "step": 6118 + }, + { + "epoch": 0.9016208251473478, + "grad_norm": 0.5858728289604187, + "learning_rate": 3.987720015344308e-06, + "loss": 0.555, + "step": 6119 + }, + { + "epoch": 0.9017681728880157, + "grad_norm": 0.5910630822181702, + "learning_rate": 3.9874084200482374e-06, + "loss": 0.5427, + "step": 6120 + }, + { + "epoch": 0.9019155206286837, + "grad_norm": 0.5918857455253601, + "learning_rate": 3.9870967889806915e-06, + "loss": 0.5771, + "step": 6121 + }, + { + "epoch": 0.9020628683693517, + "grad_norm": 0.5767453908920288, + "learning_rate": 3.986785122149165e-06, + "loss": 0.5314, + "step": 6122 + }, + { + "epoch": 0.9022102161100196, + "grad_norm": 0.6455809473991394, + "learning_rate": 3.9864734195611535e-06, + "loss": 0.5796, + "step": 6123 + }, + { + "epoch": 0.9023575638506877, + "grad_norm": 0.6056203246116638, + "learning_rate": 3.986161681224153e-06, + "loss": 0.5345, + "step": 6124 + }, + { + "epoch": 0.9025049115913556, + "grad_norm": 0.5573270916938782, + "learning_rate": 3.9858499071456625e-06, + "loss": 0.5687, + "step": 6125 + }, + { + "epoch": 0.9026522593320235, + "grad_norm": 0.5530769228935242, + "learning_rate": 3.985538097333178e-06, + "loss": 0.5544, + "step": 6126 + }, + { + "epoch": 0.9027996070726916, + "grad_norm": 0.5799046754837036, + "learning_rate": 3.9852262517941984e-06, + "loss": 0.572, + "step": 6127 + }, + { + "epoch": 0.9029469548133595, + "grad_norm": 0.5837818384170532, + "learning_rate": 3.984914370536224e-06, + "loss": 0.5529, + "step": 6128 + }, + { + "epoch": 0.9030943025540275, + "grad_norm": 0.5514419078826904, + "learning_rate": 3.984602453566757e-06, + "loss": 0.5455, + "step": 6129 + }, + { + "epoch": 0.9032416502946955, + "grad_norm": 0.574781060218811, + "learning_rate": 3.984290500893296e-06, + "loss": 0.5864, + "step": 6130 + }, + { + "epoch": 0.9033889980353634, + "grad_norm": 0.5827940702438354, + "learning_rate": 3.983978512523345e-06, + "loss": 0.5495, + "step": 6131 + }, + { + "epoch": 0.9035363457760315, + "grad_norm": 0.556594967842102, + "learning_rate": 3.983666488464406e-06, + "loss": 0.5662, + "step": 6132 + }, + { + "epoch": 0.9036836935166994, + "grad_norm": 0.6038408279418945, + "learning_rate": 3.983354428723985e-06, + "loss": 0.5747, + "step": 6133 + }, + { + "epoch": 0.9038310412573674, + "grad_norm": 0.5734047293663025, + "learning_rate": 3.983042333309585e-06, + "loss": 0.5553, + "step": 6134 + }, + { + "epoch": 0.9039783889980354, + "grad_norm": 0.5864964723587036, + "learning_rate": 3.9827302022287125e-06, + "loss": 0.5474, + "step": 6135 + }, + { + "epoch": 0.9041257367387033, + "grad_norm": 0.603393018245697, + "learning_rate": 3.982418035488875e-06, + "loss": 0.572, + "step": 6136 + }, + { + "epoch": 0.9042730844793713, + "grad_norm": 0.5723116397857666, + "learning_rate": 3.982105833097578e-06, + "loss": 0.5882, + "step": 6137 + }, + { + "epoch": 0.9044204322200393, + "grad_norm": 0.5972945690155029, + "learning_rate": 3.981793595062332e-06, + "loss": 0.5844, + "step": 6138 + }, + { + "epoch": 0.9045677799607073, + "grad_norm": 0.5602189302444458, + "learning_rate": 3.981481321390644e-06, + "loss": 0.5481, + "step": 6139 + }, + { + "epoch": 0.9047151277013753, + "grad_norm": 0.5986003875732422, + "learning_rate": 3.981169012090025e-06, + "loss": 0.5712, + "step": 6140 + }, + { + "epoch": 0.9048624754420432, + "grad_norm": 0.565601110458374, + "learning_rate": 3.980856667167987e-06, + "loss": 0.5615, + "step": 6141 + }, + { + "epoch": 0.9050098231827112, + "grad_norm": 0.5630431771278381, + "learning_rate": 3.98054428663204e-06, + "loss": 0.5296, + "step": 6142 + }, + { + "epoch": 0.9051571709233792, + "grad_norm": 0.5821294784545898, + "learning_rate": 3.980231870489697e-06, + "loss": 0.5585, + "step": 6143 + }, + { + "epoch": 0.9053045186640472, + "grad_norm": 0.5913892388343811, + "learning_rate": 3.979919418748473e-06, + "loss": 0.5742, + "step": 6144 + }, + { + "epoch": 0.9054518664047151, + "grad_norm": 0.5859915614128113, + "learning_rate": 3.9796069314158794e-06, + "loss": 0.5581, + "step": 6145 + }, + { + "epoch": 0.9055992141453831, + "grad_norm": 0.5542556643486023, + "learning_rate": 3.979294408499434e-06, + "loss": 0.5437, + "step": 6146 + }, + { + "epoch": 0.9057465618860511, + "grad_norm": 0.5897487998008728, + "learning_rate": 3.978981850006652e-06, + "loss": 0.5727, + "step": 6147 + }, + { + "epoch": 0.905893909626719, + "grad_norm": 0.5765531659126282, + "learning_rate": 3.97866925594505e-06, + "loss": 0.5724, + "step": 6148 + }, + { + "epoch": 0.906041257367387, + "grad_norm": 0.5730356574058533, + "learning_rate": 3.978356626322146e-06, + "loss": 0.5343, + "step": 6149 + }, + { + "epoch": 0.906188605108055, + "grad_norm": 0.5506396293640137, + "learning_rate": 3.978043961145458e-06, + "loss": 0.5363, + "step": 6150 + }, + { + "epoch": 0.906335952848723, + "grad_norm": 0.6177586317062378, + "learning_rate": 3.977731260422506e-06, + "loss": 0.5189, + "step": 6151 + }, + { + "epoch": 0.906483300589391, + "grad_norm": 0.7657085657119751, + "learning_rate": 3.977418524160811e-06, + "loss": 0.5389, + "step": 6152 + }, + { + "epoch": 0.9066306483300589, + "grad_norm": 0.5973541140556335, + "learning_rate": 3.977105752367894e-06, + "loss": 0.5608, + "step": 6153 + }, + { + "epoch": 0.906777996070727, + "grad_norm": 0.5942824482917786, + "learning_rate": 3.976792945051275e-06, + "loss": 0.545, + "step": 6154 + }, + { + "epoch": 0.9069253438113949, + "grad_norm": 0.5745764374732971, + "learning_rate": 3.9764801022184794e-06, + "loss": 0.5647, + "step": 6155 + }, + { + "epoch": 0.9070726915520628, + "grad_norm": 0.5592877268791199, + "learning_rate": 3.9761672238770284e-06, + "loss": 0.5392, + "step": 6156 + }, + { + "epoch": 0.9072200392927309, + "grad_norm": 0.5895373225212097, + "learning_rate": 3.97585431003445e-06, + "loss": 0.5595, + "step": 6157 + }, + { + "epoch": 0.9073673870333988, + "grad_norm": 0.600983738899231, + "learning_rate": 3.9755413606982664e-06, + "loss": 0.5433, + "step": 6158 + }, + { + "epoch": 0.9075147347740667, + "grad_norm": 0.5914933085441589, + "learning_rate": 3.9752283758760055e-06, + "loss": 0.5532, + "step": 6159 + }, + { + "epoch": 0.9076620825147348, + "grad_norm": 0.5965682864189148, + "learning_rate": 3.974915355575194e-06, + "loss": 0.5166, + "step": 6160 + }, + { + "epoch": 0.9078094302554027, + "grad_norm": 0.5855010747909546, + "learning_rate": 3.974602299803361e-06, + "loss": 0.5452, + "step": 6161 + }, + { + "epoch": 0.9079567779960708, + "grad_norm": 0.5612408518791199, + "learning_rate": 3.974289208568033e-06, + "loss": 0.5475, + "step": 6162 + }, + { + "epoch": 0.9081041257367387, + "grad_norm": 0.5594128370285034, + "learning_rate": 3.973976081876741e-06, + "loss": 0.5681, + "step": 6163 + }, + { + "epoch": 0.9082514734774066, + "grad_norm": 0.5716376304626465, + "learning_rate": 3.973662919737016e-06, + "loss": 0.5795, + "step": 6164 + }, + { + "epoch": 0.9083988212180747, + "grad_norm": 0.6206640601158142, + "learning_rate": 3.973349722156389e-06, + "loss": 0.5485, + "step": 6165 + }, + { + "epoch": 0.9085461689587426, + "grad_norm": 0.6000376343727112, + "learning_rate": 3.973036489142393e-06, + "loss": 0.5641, + "step": 6166 + }, + { + "epoch": 0.9086935166994106, + "grad_norm": 0.5903461575508118, + "learning_rate": 3.972723220702559e-06, + "loss": 0.5391, + "step": 6167 + }, + { + "epoch": 0.9088408644400786, + "grad_norm": 0.576965868473053, + "learning_rate": 3.972409916844423e-06, + "loss": 0.5676, + "step": 6168 + }, + { + "epoch": 0.9089882121807465, + "grad_norm": 0.5691534280776978, + "learning_rate": 3.9720965775755185e-06, + "loss": 0.5461, + "step": 6169 + }, + { + "epoch": 0.9091355599214146, + "grad_norm": 0.5768135786056519, + "learning_rate": 3.971783202903382e-06, + "loss": 0.5333, + "step": 6170 + }, + { + "epoch": 0.9092829076620825, + "grad_norm": 0.5650196075439453, + "learning_rate": 3.971469792835549e-06, + "loss": 0.5781, + "step": 6171 + }, + { + "epoch": 0.9094302554027505, + "grad_norm": 0.594393253326416, + "learning_rate": 3.971156347379559e-06, + "loss": 0.5625, + "step": 6172 + }, + { + "epoch": 0.9095776031434185, + "grad_norm": 0.5587553977966309, + "learning_rate": 3.9708428665429486e-06, + "loss": 0.5514, + "step": 6173 + }, + { + "epoch": 0.9097249508840864, + "grad_norm": 0.5698346495628357, + "learning_rate": 3.970529350333257e-06, + "loss": 0.5585, + "step": 6174 + }, + { + "epoch": 0.9098722986247544, + "grad_norm": 0.5761030912399292, + "learning_rate": 3.970215798758024e-06, + "loss": 0.5648, + "step": 6175 + }, + { + "epoch": 0.9100196463654224, + "grad_norm": 0.5761317014694214, + "learning_rate": 3.969902211824791e-06, + "loss": 0.5459, + "step": 6176 + }, + { + "epoch": 0.9101669941060904, + "grad_norm": 0.6115208864212036, + "learning_rate": 3.969588589541099e-06, + "loss": 0.569, + "step": 6177 + }, + { + "epoch": 0.9103143418467583, + "grad_norm": 0.5699384808540344, + "learning_rate": 3.969274931914491e-06, + "loss": 0.5635, + "step": 6178 + }, + { + "epoch": 0.9104616895874263, + "grad_norm": 0.5816739201545715, + "learning_rate": 3.9689612389525095e-06, + "loss": 0.542, + "step": 6179 + }, + { + "epoch": 0.9106090373280943, + "grad_norm": 0.5818178057670593, + "learning_rate": 3.968647510662699e-06, + "loss": 0.5353, + "step": 6180 + }, + { + "epoch": 0.9107563850687623, + "grad_norm": 0.5822853446006775, + "learning_rate": 3.968333747052605e-06, + "loss": 0.5641, + "step": 6181 + }, + { + "epoch": 0.9109037328094303, + "grad_norm": 0.5751838088035583, + "learning_rate": 3.968019948129774e-06, + "loss": 0.5424, + "step": 6182 + }, + { + "epoch": 0.9110510805500982, + "grad_norm": 0.5755302309989929, + "learning_rate": 3.967706113901751e-06, + "loss": 0.5701, + "step": 6183 + }, + { + "epoch": 0.9111984282907662, + "grad_norm": 0.5634126663208008, + "learning_rate": 3.967392244376084e-06, + "loss": 0.5629, + "step": 6184 + }, + { + "epoch": 0.9113457760314342, + "grad_norm": 0.5774502754211426, + "learning_rate": 3.967078339560322e-06, + "loss": 0.555, + "step": 6185 + }, + { + "epoch": 0.9114931237721021, + "grad_norm": 0.5780031681060791, + "learning_rate": 3.966764399462015e-06, + "loss": 0.5877, + "step": 6186 + }, + { + "epoch": 0.9116404715127702, + "grad_norm": 0.6019590497016907, + "learning_rate": 3.96645042408871e-06, + "loss": 0.5441, + "step": 6187 + }, + { + "epoch": 0.9117878192534381, + "grad_norm": 0.5600149631500244, + "learning_rate": 3.966136413447962e-06, + "loss": 0.554, + "step": 6188 + }, + { + "epoch": 0.911935166994106, + "grad_norm": 0.6157128810882568, + "learning_rate": 3.96582236754732e-06, + "loss": 0.5566, + "step": 6189 + }, + { + "epoch": 0.9120825147347741, + "grad_norm": 0.609338104724884, + "learning_rate": 3.965508286394338e-06, + "loss": 0.5678, + "step": 6190 + }, + { + "epoch": 0.912229862475442, + "grad_norm": 0.5601155161857605, + "learning_rate": 3.965194169996569e-06, + "loss": 0.5522, + "step": 6191 + }, + { + "epoch": 0.9123772102161101, + "grad_norm": 0.582933783531189, + "learning_rate": 3.964880018361568e-06, + "loss": 0.5465, + "step": 6192 + }, + { + "epoch": 0.912524557956778, + "grad_norm": 0.5906549692153931, + "learning_rate": 3.9645658314968885e-06, + "loss": 0.5331, + "step": 6193 + }, + { + "epoch": 0.9126719056974459, + "grad_norm": 0.6052451729774475, + "learning_rate": 3.964251609410089e-06, + "loss": 0.5488, + "step": 6194 + }, + { + "epoch": 0.912819253438114, + "grad_norm": 0.5753977298736572, + "learning_rate": 3.963937352108724e-06, + "loss": 0.5858, + "step": 6195 + }, + { + "epoch": 0.9129666011787819, + "grad_norm": 0.601349949836731, + "learning_rate": 3.963623059600352e-06, + "loss": 0.5407, + "step": 6196 + }, + { + "epoch": 0.9131139489194499, + "grad_norm": 0.5576774477958679, + "learning_rate": 3.963308731892533e-06, + "loss": 0.5705, + "step": 6197 + }, + { + "epoch": 0.9132612966601179, + "grad_norm": 0.573086678981781, + "learning_rate": 3.962994368992825e-06, + "loss": 0.5656, + "step": 6198 + }, + { + "epoch": 0.9134086444007858, + "grad_norm": 0.5556859374046326, + "learning_rate": 3.962679970908789e-06, + "loss": 0.5714, + "step": 6199 + }, + { + "epoch": 0.9135559921414538, + "grad_norm": 0.5748143196105957, + "learning_rate": 3.962365537647984e-06, + "loss": 0.5609, + "step": 6200 + }, + { + "epoch": 0.9137033398821218, + "grad_norm": 0.6258203387260437, + "learning_rate": 3.9620510692179755e-06, + "loss": 0.5625, + "step": 6201 + }, + { + "epoch": 0.9138506876227898, + "grad_norm": 0.581674337387085, + "learning_rate": 3.961736565626323e-06, + "loss": 0.5249, + "step": 6202 + }, + { + "epoch": 0.9139980353634578, + "grad_norm": 0.5672242641448975, + "learning_rate": 3.961422026880593e-06, + "loss": 0.5278, + "step": 6203 + }, + { + "epoch": 0.9141453831041257, + "grad_norm": 0.5957567691802979, + "learning_rate": 3.961107452988349e-06, + "loss": 0.5518, + "step": 6204 + }, + { + "epoch": 0.9142927308447937, + "grad_norm": 0.5521392822265625, + "learning_rate": 3.9607928439571555e-06, + "loss": 0.5649, + "step": 6205 + }, + { + "epoch": 0.9144400785854617, + "grad_norm": 0.5746392011642456, + "learning_rate": 3.9604781997945786e-06, + "loss": 0.5711, + "step": 6206 + }, + { + "epoch": 0.9145874263261297, + "grad_norm": 0.6289478540420532, + "learning_rate": 3.960163520508186e-06, + "loss": 0.5421, + "step": 6207 + }, + { + "epoch": 0.9147347740667976, + "grad_norm": 0.5919883251190186, + "learning_rate": 3.959848806105546e-06, + "loss": 0.5672, + "step": 6208 + }, + { + "epoch": 0.9148821218074656, + "grad_norm": 0.5952133536338806, + "learning_rate": 3.959534056594227e-06, + "loss": 0.5159, + "step": 6209 + }, + { + "epoch": 0.9150294695481336, + "grad_norm": 0.5707306861877441, + "learning_rate": 3.959219271981799e-06, + "loss": 0.5878, + "step": 6210 + }, + { + "epoch": 0.9151768172888016, + "grad_norm": 0.5629863739013672, + "learning_rate": 3.958904452275831e-06, + "loss": 0.5612, + "step": 6211 + }, + { + "epoch": 0.9153241650294696, + "grad_norm": 0.5722527503967285, + "learning_rate": 3.958589597483896e-06, + "loss": 0.5513, + "step": 6212 + }, + { + "epoch": 0.9154715127701375, + "grad_norm": 0.5875434279441833, + "learning_rate": 3.958274707613565e-06, + "loss": 0.5828, + "step": 6213 + }, + { + "epoch": 0.9156188605108055, + "grad_norm": 0.586750328540802, + "learning_rate": 3.957959782672411e-06, + "loss": 0.5441, + "step": 6214 + }, + { + "epoch": 0.9157662082514735, + "grad_norm": 0.5818741321563721, + "learning_rate": 3.9576448226680085e-06, + "loss": 0.5735, + "step": 6215 + }, + { + "epoch": 0.9159135559921414, + "grad_norm": 0.5632708668708801, + "learning_rate": 3.957329827607932e-06, + "loss": 0.535, + "step": 6216 + }, + { + "epoch": 0.9160609037328095, + "grad_norm": 0.5605157613754272, + "learning_rate": 3.957014797499755e-06, + "loss": 0.5294, + "step": 6217 + }, + { + "epoch": 0.9162082514734774, + "grad_norm": 0.6145162582397461, + "learning_rate": 3.956699732351057e-06, + "loss": 0.5648, + "step": 6218 + }, + { + "epoch": 0.9163555992141453, + "grad_norm": 0.5959775447845459, + "learning_rate": 3.9563846321694135e-06, + "loss": 0.5854, + "step": 6219 + }, + { + "epoch": 0.9165029469548134, + "grad_norm": 0.5935190916061401, + "learning_rate": 3.956069496962403e-06, + "loss": 0.5793, + "step": 6220 + }, + { + "epoch": 0.9166502946954813, + "grad_norm": 0.6199712753295898, + "learning_rate": 3.955754326737603e-06, + "loss": 0.5578, + "step": 6221 + }, + { + "epoch": 0.9167976424361494, + "grad_norm": 0.5658186674118042, + "learning_rate": 3.955439121502595e-06, + "loss": 0.5919, + "step": 6222 + }, + { + "epoch": 0.9169449901768173, + "grad_norm": 0.5878244638442993, + "learning_rate": 3.955123881264958e-06, + "loss": 0.5166, + "step": 6223 + }, + { + "epoch": 0.9170923379174852, + "grad_norm": 0.572766125202179, + "learning_rate": 3.954808606032274e-06, + "loss": 0.5735, + "step": 6224 + }, + { + "epoch": 0.9172396856581533, + "grad_norm": 0.5892237424850464, + "learning_rate": 3.954493295812127e-06, + "loss": 0.5944, + "step": 6225 + }, + { + "epoch": 0.9173870333988212, + "grad_norm": 0.5919833183288574, + "learning_rate": 3.954177950612097e-06, + "loss": 0.5506, + "step": 6226 + }, + { + "epoch": 0.9175343811394892, + "grad_norm": 0.5787703990936279, + "learning_rate": 3.95386257043977e-06, + "loss": 0.5558, + "step": 6227 + }, + { + "epoch": 0.9176817288801572, + "grad_norm": 0.574927031993866, + "learning_rate": 3.95354715530273e-06, + "loss": 0.5683, + "step": 6228 + }, + { + "epoch": 0.9178290766208251, + "grad_norm": 0.5940486192703247, + "learning_rate": 3.953231705208562e-06, + "loss": 0.589, + "step": 6229 + }, + { + "epoch": 0.9179764243614931, + "grad_norm": 0.5964310765266418, + "learning_rate": 3.9529162201648545e-06, + "loss": 0.5497, + "step": 6230 + }, + { + "epoch": 0.9181237721021611, + "grad_norm": 0.5870473384857178, + "learning_rate": 3.952600700179192e-06, + "loss": 0.5529, + "step": 6231 + }, + { + "epoch": 0.918271119842829, + "grad_norm": 0.6320831775665283, + "learning_rate": 3.952285145259165e-06, + "loss": 0.568, + "step": 6232 + }, + { + "epoch": 0.9184184675834971, + "grad_norm": 0.5893584489822388, + "learning_rate": 3.9519695554123604e-06, + "loss": 0.5705, + "step": 6233 + }, + { + "epoch": 0.918565815324165, + "grad_norm": 0.5763649344444275, + "learning_rate": 3.951653930646369e-06, + "loss": 0.5629, + "step": 6234 + }, + { + "epoch": 0.918713163064833, + "grad_norm": 0.5837989449501038, + "learning_rate": 3.951338270968783e-06, + "loss": 0.5806, + "step": 6235 + }, + { + "epoch": 0.918860510805501, + "grad_norm": 0.5809240937232971, + "learning_rate": 3.951022576387191e-06, + "loss": 0.5362, + "step": 6236 + }, + { + "epoch": 0.919007858546169, + "grad_norm": 0.5663577318191528, + "learning_rate": 3.950706846909187e-06, + "loss": 0.5329, + "step": 6237 + }, + { + "epoch": 0.9191552062868369, + "grad_norm": 0.6176763772964478, + "learning_rate": 3.950391082542363e-06, + "loss": 0.5722, + "step": 6238 + }, + { + "epoch": 0.9193025540275049, + "grad_norm": 0.6163426637649536, + "learning_rate": 3.950075283294314e-06, + "loss": 0.5712, + "step": 6239 + }, + { + "epoch": 0.9194499017681729, + "grad_norm": 0.586837112903595, + "learning_rate": 3.949759449172635e-06, + "loss": 0.5323, + "step": 6240 + }, + { + "epoch": 0.9195972495088409, + "grad_norm": 0.6062776446342468, + "learning_rate": 3.949443580184922e-06, + "loss": 0.5544, + "step": 6241 + }, + { + "epoch": 0.9197445972495089, + "grad_norm": 0.6055116653442383, + "learning_rate": 3.949127676338769e-06, + "loss": 0.5653, + "step": 6242 + }, + { + "epoch": 0.9198919449901768, + "grad_norm": 0.5664275288581848, + "learning_rate": 3.9488117376417764e-06, + "loss": 0.5283, + "step": 6243 + }, + { + "epoch": 0.9200392927308448, + "grad_norm": 0.592738687992096, + "learning_rate": 3.948495764101541e-06, + "loss": 0.5919, + "step": 6244 + }, + { + "epoch": 0.9201866404715128, + "grad_norm": 0.5846566557884216, + "learning_rate": 3.948179755725661e-06, + "loss": 0.5277, + "step": 6245 + }, + { + "epoch": 0.9203339882121807, + "grad_norm": 0.5642157793045044, + "learning_rate": 3.947863712521738e-06, + "loss": 0.5158, + "step": 6246 + }, + { + "epoch": 0.9204813359528488, + "grad_norm": 0.5890281796455383, + "learning_rate": 3.947547634497371e-06, + "loss": 0.562, + "step": 6247 + }, + { + "epoch": 0.9206286836935167, + "grad_norm": 0.5785731077194214, + "learning_rate": 3.947231521660163e-06, + "loss": 0.5649, + "step": 6248 + }, + { + "epoch": 0.9207760314341846, + "grad_norm": 0.5787564516067505, + "learning_rate": 3.946915374017714e-06, + "loss": 0.53, + "step": 6249 + }, + { + "epoch": 0.9209233791748527, + "grad_norm": 0.5973361134529114, + "learning_rate": 3.946599191577631e-06, + "loss": 0.5869, + "step": 6250 + }, + { + "epoch": 0.9210707269155206, + "grad_norm": 0.5598211884498596, + "learning_rate": 3.946282974347515e-06, + "loss": 0.5278, + "step": 6251 + }, + { + "epoch": 0.9212180746561887, + "grad_norm": 0.6410945653915405, + "learning_rate": 3.945966722334973e-06, + "loss": 0.5488, + "step": 6252 + }, + { + "epoch": 0.9213654223968566, + "grad_norm": 0.5953996181488037, + "learning_rate": 3.9456504355476086e-06, + "loss": 0.5334, + "step": 6253 + }, + { + "epoch": 0.9215127701375245, + "grad_norm": 0.5970427393913269, + "learning_rate": 3.945334113993029e-06, + "loss": 0.5372, + "step": 6254 + }, + { + "epoch": 0.9216601178781926, + "grad_norm": 0.5981902480125427, + "learning_rate": 3.945017757678842e-06, + "loss": 0.5696, + "step": 6255 + }, + { + "epoch": 0.9218074656188605, + "grad_norm": 0.5559574365615845, + "learning_rate": 3.944701366612655e-06, + "loss": 0.5804, + "step": 6256 + }, + { + "epoch": 0.9219548133595284, + "grad_norm": 0.6052730083465576, + "learning_rate": 3.94438494080208e-06, + "loss": 0.5952, + "step": 6257 + }, + { + "epoch": 0.9221021611001965, + "grad_norm": 0.5890336632728577, + "learning_rate": 3.944068480254722e-06, + "loss": 0.572, + "step": 6258 + }, + { + "epoch": 0.9222495088408644, + "grad_norm": 0.5799636840820312, + "learning_rate": 3.943751984978196e-06, + "loss": 0.529, + "step": 6259 + }, + { + "epoch": 0.9223968565815324, + "grad_norm": 0.6121273040771484, + "learning_rate": 3.9434354549801114e-06, + "loss": 0.594, + "step": 6260 + }, + { + "epoch": 0.9225442043222004, + "grad_norm": 0.6159238219261169, + "learning_rate": 3.943118890268081e-06, + "loss": 0.5564, + "step": 6261 + }, + { + "epoch": 0.9226915520628683, + "grad_norm": 0.5892455577850342, + "learning_rate": 3.942802290849718e-06, + "loss": 0.5523, + "step": 6262 + }, + { + "epoch": 0.9228388998035364, + "grad_norm": 0.6142945289611816, + "learning_rate": 3.942485656732637e-06, + "loss": 0.5592, + "step": 6263 + }, + { + "epoch": 0.9229862475442043, + "grad_norm": 0.6190429329872131, + "learning_rate": 3.942168987924452e-06, + "loss": 0.5907, + "step": 6264 + }, + { + "epoch": 0.9231335952848723, + "grad_norm": 0.5927194356918335, + "learning_rate": 3.94185228443278e-06, + "loss": 0.5825, + "step": 6265 + }, + { + "epoch": 0.9232809430255403, + "grad_norm": 0.598016083240509, + "learning_rate": 3.941535546265237e-06, + "loss": 0.5454, + "step": 6266 + }, + { + "epoch": 0.9234282907662082, + "grad_norm": 0.5929679274559021, + "learning_rate": 3.94121877342944e-06, + "loss": 0.5444, + "step": 6267 + }, + { + "epoch": 0.9235756385068762, + "grad_norm": 0.6054603457450867, + "learning_rate": 3.940901965933007e-06, + "loss": 0.5961, + "step": 6268 + }, + { + "epoch": 0.9237229862475442, + "grad_norm": 0.5816689729690552, + "learning_rate": 3.9405851237835575e-06, + "loss": 0.586, + "step": 6269 + }, + { + "epoch": 0.9238703339882122, + "grad_norm": 0.5524342656135559, + "learning_rate": 3.9402682469887115e-06, + "loss": 0.542, + "step": 6270 + }, + { + "epoch": 0.9240176817288801, + "grad_norm": 0.5762366056442261, + "learning_rate": 3.93995133555609e-06, + "loss": 0.5689, + "step": 6271 + }, + { + "epoch": 0.9241650294695481, + "grad_norm": 0.5657413601875305, + "learning_rate": 3.939634389493314e-06, + "loss": 0.5405, + "step": 6272 + }, + { + "epoch": 0.9243123772102161, + "grad_norm": 0.5900119543075562, + "learning_rate": 3.939317408808007e-06, + "loss": 0.5634, + "step": 6273 + }, + { + "epoch": 0.9244597249508841, + "grad_norm": 0.5839059352874756, + "learning_rate": 3.93900039350779e-06, + "loss": 0.5844, + "step": 6274 + }, + { + "epoch": 0.9246070726915521, + "grad_norm": 0.560601532459259, + "learning_rate": 3.938683343600289e-06, + "loss": 0.5604, + "step": 6275 + }, + { + "epoch": 0.92475442043222, + "grad_norm": 0.5647944211959839, + "learning_rate": 3.9383662590931295e-06, + "loss": 0.5413, + "step": 6276 + }, + { + "epoch": 0.924901768172888, + "grad_norm": 0.8166641592979431, + "learning_rate": 3.938049139993935e-06, + "loss": 0.5796, + "step": 6277 + }, + { + "epoch": 0.925049115913556, + "grad_norm": 0.5774555206298828, + "learning_rate": 3.937731986310334e-06, + "loss": 0.5754, + "step": 6278 + }, + { + "epoch": 0.9251964636542239, + "grad_norm": 0.6139751076698303, + "learning_rate": 3.937414798049951e-06, + "loss": 0.5612, + "step": 6279 + }, + { + "epoch": 0.925343811394892, + "grad_norm": 0.5965851545333862, + "learning_rate": 3.937097575220418e-06, + "loss": 0.536, + "step": 6280 + }, + { + "epoch": 0.9254911591355599, + "grad_norm": 0.5783348083496094, + "learning_rate": 3.936780317829362e-06, + "loss": 0.5359, + "step": 6281 + }, + { + "epoch": 0.925638506876228, + "grad_norm": 0.5713353157043457, + "learning_rate": 3.936463025884414e-06, + "loss": 0.5603, + "step": 6282 + }, + { + "epoch": 0.9257858546168959, + "grad_norm": 0.5539727807044983, + "learning_rate": 3.936145699393202e-06, + "loss": 0.55, + "step": 6283 + }, + { + "epoch": 0.9259332023575638, + "grad_norm": 0.6071409583091736, + "learning_rate": 3.935828338363361e-06, + "loss": 0.5685, + "step": 6284 + }, + { + "epoch": 0.9260805500982319, + "grad_norm": 0.582511842250824, + "learning_rate": 3.935510942802522e-06, + "loss": 0.5789, + "step": 6285 + }, + { + "epoch": 0.9262278978388998, + "grad_norm": 0.5919045805931091, + "learning_rate": 3.935193512718318e-06, + "loss": 0.5308, + "step": 6286 + }, + { + "epoch": 0.9263752455795677, + "grad_norm": 0.5761578679084778, + "learning_rate": 3.934876048118384e-06, + "loss": 0.5747, + "step": 6287 + }, + { + "epoch": 0.9265225933202358, + "grad_norm": 0.5896131992340088, + "learning_rate": 3.934558549010352e-06, + "loss": 0.5517, + "step": 6288 + }, + { + "epoch": 0.9266699410609037, + "grad_norm": 0.5812504291534424, + "learning_rate": 3.934241015401861e-06, + "loss": 0.5665, + "step": 6289 + }, + { + "epoch": 0.9268172888015717, + "grad_norm": 0.5783523321151733, + "learning_rate": 3.933923447300546e-06, + "loss": 0.5492, + "step": 6290 + }, + { + "epoch": 0.9269646365422397, + "grad_norm": 0.5940061211585999, + "learning_rate": 3.933605844714045e-06, + "loss": 0.532, + "step": 6291 + }, + { + "epoch": 0.9271119842829076, + "grad_norm": 0.589440643787384, + "learning_rate": 3.9332882076499955e-06, + "loss": 0.5449, + "step": 6292 + }, + { + "epoch": 0.9272593320235757, + "grad_norm": 0.5868598818778992, + "learning_rate": 3.932970536116036e-06, + "loss": 0.5672, + "step": 6293 + }, + { + "epoch": 0.9274066797642436, + "grad_norm": 0.5824891924858093, + "learning_rate": 3.932652830119808e-06, + "loss": 0.5607, + "step": 6294 + }, + { + "epoch": 0.9275540275049116, + "grad_norm": 0.5566008687019348, + "learning_rate": 3.932335089668952e-06, + "loss": 0.522, + "step": 6295 + }, + { + "epoch": 0.9277013752455796, + "grad_norm": 0.6256442070007324, + "learning_rate": 3.932017314771108e-06, + "loss": 0.5861, + "step": 6296 + }, + { + "epoch": 0.9278487229862475, + "grad_norm": 0.6183721423149109, + "learning_rate": 3.93169950543392e-06, + "loss": 0.579, + "step": 6297 + }, + { + "epoch": 0.9279960707269155, + "grad_norm": 0.6111847758293152, + "learning_rate": 3.931381661665031e-06, + "loss": 0.5424, + "step": 6298 + }, + { + "epoch": 0.9281434184675835, + "grad_norm": 0.6119224429130554, + "learning_rate": 3.931063783472082e-06, + "loss": 0.5869, + "step": 6299 + }, + { + "epoch": 0.9282907662082515, + "grad_norm": 0.5677787661552429, + "learning_rate": 3.930745870862723e-06, + "loss": 0.5308, + "step": 6300 + }, + { + "epoch": 0.9284381139489194, + "grad_norm": 0.6039600968360901, + "learning_rate": 3.930427923844594e-06, + "loss": 0.5779, + "step": 6301 + }, + { + "epoch": 0.9285854616895874, + "grad_norm": 0.5756985545158386, + "learning_rate": 3.930109942425347e-06, + "loss": 0.5682, + "step": 6302 + }, + { + "epoch": 0.9287328094302554, + "grad_norm": 0.5706172585487366, + "learning_rate": 3.929791926612625e-06, + "loss": 0.5454, + "step": 6303 + }, + { + "epoch": 0.9288801571709234, + "grad_norm": 0.57500821352005, + "learning_rate": 3.929473876414078e-06, + "loss": 0.5818, + "step": 6304 + }, + { + "epoch": 0.9290275049115914, + "grad_norm": 0.5549022555351257, + "learning_rate": 3.9291557918373554e-06, + "loss": 0.5416, + "step": 6305 + }, + { + "epoch": 0.9291748526522593, + "grad_norm": 0.5666303634643555, + "learning_rate": 3.928837672890105e-06, + "loss": 0.5552, + "step": 6306 + }, + { + "epoch": 0.9293222003929273, + "grad_norm": 0.5777755975723267, + "learning_rate": 3.92851951957998e-06, + "loss": 0.562, + "step": 6307 + }, + { + "epoch": 0.9294695481335953, + "grad_norm": 0.5964195132255554, + "learning_rate": 3.92820133191463e-06, + "loss": 0.578, + "step": 6308 + }, + { + "epoch": 0.9296168958742632, + "grad_norm": 0.5881003141403198, + "learning_rate": 3.927883109901709e-06, + "loss": 0.5939, + "step": 6309 + }, + { + "epoch": 0.9297642436149313, + "grad_norm": 0.6055705547332764, + "learning_rate": 3.927564853548868e-06, + "loss": 0.5689, + "step": 6310 + }, + { + "epoch": 0.9299115913555992, + "grad_norm": 0.6037144660949707, + "learning_rate": 3.927246562863761e-06, + "loss": 0.5782, + "step": 6311 + }, + { + "epoch": 0.9300589390962672, + "grad_norm": 0.6009835600852966, + "learning_rate": 3.926928237854045e-06, + "loss": 0.5555, + "step": 6312 + }, + { + "epoch": 0.9302062868369352, + "grad_norm": 0.5760956406593323, + "learning_rate": 3.926609878527374e-06, + "loss": 0.5637, + "step": 6313 + }, + { + "epoch": 0.9303536345776031, + "grad_norm": 0.6071508526802063, + "learning_rate": 3.926291484891404e-06, + "loss": 0.5525, + "step": 6314 + }, + { + "epoch": 0.9305009823182712, + "grad_norm": 0.6282926201820374, + "learning_rate": 3.925973056953792e-06, + "loss": 0.5524, + "step": 6315 + }, + { + "epoch": 0.9306483300589391, + "grad_norm": 0.5925191640853882, + "learning_rate": 3.925654594722198e-06, + "loss": 0.5845, + "step": 6316 + }, + { + "epoch": 0.930795677799607, + "grad_norm": 0.5882472395896912, + "learning_rate": 3.925336098204279e-06, + "loss": 0.5489, + "step": 6317 + }, + { + "epoch": 0.9309430255402751, + "grad_norm": 0.5992404818534851, + "learning_rate": 3.925017567407696e-06, + "loss": 0.5592, + "step": 6318 + }, + { + "epoch": 0.931090373280943, + "grad_norm": 0.6136581301689148, + "learning_rate": 3.9246990023401086e-06, + "loss": 0.5635, + "step": 6319 + }, + { + "epoch": 0.931237721021611, + "grad_norm": 0.6020328402519226, + "learning_rate": 3.924380403009178e-06, + "loss": 0.5876, + "step": 6320 + }, + { + "epoch": 0.931385068762279, + "grad_norm": 0.5853222012519836, + "learning_rate": 3.924061769422568e-06, + "loss": 0.5538, + "step": 6321 + }, + { + "epoch": 0.9315324165029469, + "grad_norm": 0.5952227711677551, + "learning_rate": 3.923743101587939e-06, + "loss": 0.5534, + "step": 6322 + }, + { + "epoch": 0.931679764243615, + "grad_norm": 0.5870448350906372, + "learning_rate": 3.923424399512957e-06, + "loss": 0.5618, + "step": 6323 + }, + { + "epoch": 0.9318271119842829, + "grad_norm": 0.5758079886436462, + "learning_rate": 3.923105663205286e-06, + "loss": 0.5774, + "step": 6324 + }, + { + "epoch": 0.9319744597249509, + "grad_norm": 0.5987907648086548, + "learning_rate": 3.92278689267259e-06, + "loss": 0.5786, + "step": 6325 + }, + { + "epoch": 0.9321218074656189, + "grad_norm": 0.5951076745986938, + "learning_rate": 3.9224680879225376e-06, + "loss": 0.5915, + "step": 6326 + }, + { + "epoch": 0.9322691552062868, + "grad_norm": 0.604290783405304, + "learning_rate": 3.9221492489627944e-06, + "loss": 0.5604, + "step": 6327 + }, + { + "epoch": 0.9324165029469548, + "grad_norm": 0.6079489588737488, + "learning_rate": 3.921830375801029e-06, + "loss": 0.5598, + "step": 6328 + }, + { + "epoch": 0.9325638506876228, + "grad_norm": 0.5912467837333679, + "learning_rate": 3.92151146844491e-06, + "loss": 0.5971, + "step": 6329 + }, + { + "epoch": 0.9327111984282908, + "grad_norm": 0.55604487657547, + "learning_rate": 3.921192526902107e-06, + "loss": 0.5366, + "step": 6330 + }, + { + "epoch": 0.9328585461689587, + "grad_norm": 0.593495786190033, + "learning_rate": 3.9208735511802904e-06, + "loss": 0.5526, + "step": 6331 + }, + { + "epoch": 0.9330058939096267, + "grad_norm": 0.5956639051437378, + "learning_rate": 3.920554541287131e-06, + "loss": 0.5845, + "step": 6332 + }, + { + "epoch": 0.9331532416502947, + "grad_norm": 0.618801474571228, + "learning_rate": 3.920235497230301e-06, + "loss": 0.576, + "step": 6333 + }, + { + "epoch": 0.9333005893909627, + "grad_norm": 0.5735820531845093, + "learning_rate": 3.919916419017474e-06, + "loss": 0.5499, + "step": 6334 + }, + { + "epoch": 0.9334479371316307, + "grad_norm": 0.5984732508659363, + "learning_rate": 3.9195973066563225e-06, + "loss": 0.5484, + "step": 6335 + }, + { + "epoch": 0.9335952848722986, + "grad_norm": 0.5961229205131531, + "learning_rate": 3.919278160154522e-06, + "loss": 0.544, + "step": 6336 + }, + { + "epoch": 0.9337426326129666, + "grad_norm": 0.5846070647239685, + "learning_rate": 3.9189589795197475e-06, + "loss": 0.5745, + "step": 6337 + }, + { + "epoch": 0.9338899803536346, + "grad_norm": 0.5647512078285217, + "learning_rate": 3.918639764759675e-06, + "loss": 0.5316, + "step": 6338 + }, + { + "epoch": 0.9340373280943025, + "grad_norm": 0.5825871825218201, + "learning_rate": 3.918320515881982e-06, + "loss": 0.5469, + "step": 6339 + }, + { + "epoch": 0.9341846758349706, + "grad_norm": 0.5685995817184448, + "learning_rate": 3.918001232894345e-06, + "loss": 0.5324, + "step": 6340 + }, + { + "epoch": 0.9343320235756385, + "grad_norm": 0.596994161605835, + "learning_rate": 3.917681915804444e-06, + "loss": 0.582, + "step": 6341 + }, + { + "epoch": 0.9344793713163064, + "grad_norm": 0.5821899175643921, + "learning_rate": 3.917362564619956e-06, + "loss": 0.5701, + "step": 6342 + }, + { + "epoch": 0.9346267190569745, + "grad_norm": 0.5824646949768066, + "learning_rate": 3.917043179348566e-06, + "loss": 0.5584, + "step": 6343 + }, + { + "epoch": 0.9347740667976424, + "grad_norm": 0.552142322063446, + "learning_rate": 3.91672375999795e-06, + "loss": 0.5761, + "step": 6344 + }, + { + "epoch": 0.9349214145383105, + "grad_norm": 0.5723969340324402, + "learning_rate": 3.916404306575793e-06, + "loss": 0.5588, + "step": 6345 + }, + { + "epoch": 0.9350687622789784, + "grad_norm": 0.5893797278404236, + "learning_rate": 3.916084819089777e-06, + "loss": 0.5687, + "step": 6346 + }, + { + "epoch": 0.9352161100196463, + "grad_norm": 0.60999995470047, + "learning_rate": 3.915765297547584e-06, + "loss": 0.5925, + "step": 6347 + }, + { + "epoch": 0.9353634577603144, + "grad_norm": 0.5694921612739563, + "learning_rate": 3.915445741956901e-06, + "loss": 0.5783, + "step": 6348 + }, + { + "epoch": 0.9355108055009823, + "grad_norm": 0.6067562103271484, + "learning_rate": 3.915126152325412e-06, + "loss": 0.5627, + "step": 6349 + }, + { + "epoch": 0.9356581532416502, + "grad_norm": 0.6012370586395264, + "learning_rate": 3.914806528660802e-06, + "loss": 0.5212, + "step": 6350 + }, + { + "epoch": 0.9358055009823183, + "grad_norm": 0.5782627463340759, + "learning_rate": 3.914486870970759e-06, + "loss": 0.5583, + "step": 6351 + }, + { + "epoch": 0.9359528487229862, + "grad_norm": 0.5588791370391846, + "learning_rate": 3.91416717926297e-06, + "loss": 0.5501, + "step": 6352 + }, + { + "epoch": 0.9361001964636543, + "grad_norm": 0.6148021221160889, + "learning_rate": 3.913847453545123e-06, + "loss": 0.5589, + "step": 6353 + }, + { + "epoch": 0.9362475442043222, + "grad_norm": 0.6093019843101501, + "learning_rate": 3.913527693824909e-06, + "loss": 0.568, + "step": 6354 + }, + { + "epoch": 0.9363948919449901, + "grad_norm": 0.5474956631660461, + "learning_rate": 3.9132079001100155e-06, + "loss": 0.5474, + "step": 6355 + }, + { + "epoch": 0.9365422396856582, + "grad_norm": 0.5879419445991516, + "learning_rate": 3.912888072408136e-06, + "loss": 0.5749, + "step": 6356 + }, + { + "epoch": 0.9366895874263261, + "grad_norm": 0.575123131275177, + "learning_rate": 3.91256821072696e-06, + "loss": 0.5544, + "step": 6357 + }, + { + "epoch": 0.9368369351669941, + "grad_norm": 0.5727521777153015, + "learning_rate": 3.912248315074183e-06, + "loss": 0.5788, + "step": 6358 + }, + { + "epoch": 0.9369842829076621, + "grad_norm": 0.5747479200363159, + "learning_rate": 3.911928385457495e-06, + "loss": 0.5714, + "step": 6359 + }, + { + "epoch": 0.93713163064833, + "grad_norm": 0.6278137564659119, + "learning_rate": 3.911608421884591e-06, + "loss": 0.5947, + "step": 6360 + }, + { + "epoch": 0.937278978388998, + "grad_norm": 0.5570438504219055, + "learning_rate": 3.9112884243631676e-06, + "loss": 0.5273, + "step": 6361 + }, + { + "epoch": 0.937426326129666, + "grad_norm": 0.5628070831298828, + "learning_rate": 3.910968392900919e-06, + "loss": 0.5567, + "step": 6362 + }, + { + "epoch": 0.937573673870334, + "grad_norm": 0.5740931034088135, + "learning_rate": 3.910648327505543e-06, + "loss": 0.5741, + "step": 6363 + }, + { + "epoch": 0.937721021611002, + "grad_norm": 0.601681113243103, + "learning_rate": 3.910328228184735e-06, + "loss": 0.5642, + "step": 6364 + }, + { + "epoch": 0.93786836935167, + "grad_norm": 0.6180621981620789, + "learning_rate": 3.910008094946196e-06, + "loss": 0.5397, + "step": 6365 + }, + { + "epoch": 0.9380157170923379, + "grad_norm": 0.5837226510047913, + "learning_rate": 3.909687927797623e-06, + "loss": 0.5342, + "step": 6366 + }, + { + "epoch": 0.9381630648330059, + "grad_norm": 0.5924923419952393, + "learning_rate": 3.909367726746717e-06, + "loss": 0.5629, + "step": 6367 + }, + { + "epoch": 0.9383104125736739, + "grad_norm": 0.6068455576896667, + "learning_rate": 3.909047491801178e-06, + "loss": 0.5784, + "step": 6368 + }, + { + "epoch": 0.9384577603143418, + "grad_norm": 0.5702017545700073, + "learning_rate": 3.908727222968708e-06, + "loss": 0.5717, + "step": 6369 + }, + { + "epoch": 0.9386051080550099, + "grad_norm": 0.5866938829421997, + "learning_rate": 3.908406920257009e-06, + "loss": 0.5171, + "step": 6370 + }, + { + "epoch": 0.9387524557956778, + "grad_norm": 0.5715490579605103, + "learning_rate": 3.9080865836737826e-06, + "loss": 0.5976, + "step": 6371 + }, + { + "epoch": 0.9388998035363457, + "grad_norm": 0.5543949604034424, + "learning_rate": 3.907766213226736e-06, + "loss": 0.5601, + "step": 6372 + }, + { + "epoch": 0.9390471512770138, + "grad_norm": 0.588870644569397, + "learning_rate": 3.907445808923571e-06, + "loss": 0.5396, + "step": 6373 + }, + { + "epoch": 0.9391944990176817, + "grad_norm": 0.5710598230361938, + "learning_rate": 3.907125370771995e-06, + "loss": 0.5191, + "step": 6374 + }, + { + "epoch": 0.9393418467583498, + "grad_norm": 0.5911480784416199, + "learning_rate": 3.906804898779714e-06, + "loss": 0.6058, + "step": 6375 + }, + { + "epoch": 0.9394891944990177, + "grad_norm": 0.5755545496940613, + "learning_rate": 3.906484392954436e-06, + "loss": 0.548, + "step": 6376 + }, + { + "epoch": 0.9396365422396856, + "grad_norm": 0.581590473651886, + "learning_rate": 3.906163853303866e-06, + "loss": 0.5572, + "step": 6377 + }, + { + "epoch": 0.9397838899803537, + "grad_norm": 0.6444994807243347, + "learning_rate": 3.905843279835716e-06, + "loss": 0.5692, + "step": 6378 + }, + { + "epoch": 0.9399312377210216, + "grad_norm": 0.5926851034164429, + "learning_rate": 3.905522672557694e-06, + "loss": 0.5463, + "step": 6379 + }, + { + "epoch": 0.9400785854616895, + "grad_norm": 0.6195091605186462, + "learning_rate": 3.905202031477512e-06, + "loss": 0.5607, + "step": 6380 + }, + { + "epoch": 0.9402259332023576, + "grad_norm": 0.5790183544158936, + "learning_rate": 3.90488135660288e-06, + "loss": 0.5763, + "step": 6381 + }, + { + "epoch": 0.9403732809430255, + "grad_norm": 0.5764585733413696, + "learning_rate": 3.904560647941509e-06, + "loss": 0.5558, + "step": 6382 + }, + { + "epoch": 0.9405206286836936, + "grad_norm": 0.5713776350021362, + "learning_rate": 3.904239905501114e-06, + "loss": 0.5603, + "step": 6383 + }, + { + "epoch": 0.9406679764243615, + "grad_norm": 0.5627696514129639, + "learning_rate": 3.9039191292894086e-06, + "loss": 0.5344, + "step": 6384 + }, + { + "epoch": 0.9408153241650294, + "grad_norm": 0.6115598678588867, + "learning_rate": 3.903598319314106e-06, + "loss": 0.5717, + "step": 6385 + }, + { + "epoch": 0.9409626719056975, + "grad_norm": 0.5985063314437866, + "learning_rate": 3.903277475582921e-06, + "loss": 0.586, + "step": 6386 + }, + { + "epoch": 0.9411100196463654, + "grad_norm": 0.5914734601974487, + "learning_rate": 3.902956598103573e-06, + "loss": 0.5556, + "step": 6387 + }, + { + "epoch": 0.9412573673870334, + "grad_norm": 0.5796979069709778, + "learning_rate": 3.902635686883775e-06, + "loss": 0.5647, + "step": 6388 + }, + { + "epoch": 0.9414047151277014, + "grad_norm": 0.5670685768127441, + "learning_rate": 3.902314741931248e-06, + "loss": 0.5462, + "step": 6389 + }, + { + "epoch": 0.9415520628683693, + "grad_norm": 0.5794163942337036, + "learning_rate": 3.901993763253709e-06, + "loss": 0.5528, + "step": 6390 + }, + { + "epoch": 0.9416994106090373, + "grad_norm": 0.5655044317245483, + "learning_rate": 3.901672750858877e-06, + "loss": 0.5518, + "step": 6391 + }, + { + "epoch": 0.9418467583497053, + "grad_norm": 0.5856906771659851, + "learning_rate": 3.901351704754472e-06, + "loss": 0.5725, + "step": 6392 + }, + { + "epoch": 0.9419941060903733, + "grad_norm": 0.5819120407104492, + "learning_rate": 3.9010306249482175e-06, + "loss": 0.5318, + "step": 6393 + }, + { + "epoch": 0.9421414538310413, + "grad_norm": 0.5845201015472412, + "learning_rate": 3.900709511447832e-06, + "loss": 0.5496, + "step": 6394 + }, + { + "epoch": 0.9422888015717092, + "grad_norm": 0.6095598936080933, + "learning_rate": 3.900388364261041e-06, + "loss": 0.5716, + "step": 6395 + }, + { + "epoch": 0.9424361493123772, + "grad_norm": 0.56944340467453, + "learning_rate": 3.900067183395566e-06, + "loss": 0.5882, + "step": 6396 + }, + { + "epoch": 0.9425834970530452, + "grad_norm": 0.5926942229270935, + "learning_rate": 3.899745968859132e-06, + "loss": 0.575, + "step": 6397 + }, + { + "epoch": 0.9427308447937132, + "grad_norm": 0.6036485433578491, + "learning_rate": 3.8994247206594645e-06, + "loss": 0.5732, + "step": 6398 + }, + { + "epoch": 0.9428781925343811, + "grad_norm": 0.6094908714294434, + "learning_rate": 3.899103438804288e-06, + "loss": 0.5273, + "step": 6399 + }, + { + "epoch": 0.9430255402750491, + "grad_norm": 0.5860429406166077, + "learning_rate": 3.8987821233013314e-06, + "loss": 0.5168, + "step": 6400 + }, + { + "epoch": 0.9431728880157171, + "grad_norm": 0.5933125019073486, + "learning_rate": 3.898460774158319e-06, + "loss": 0.5589, + "step": 6401 + }, + { + "epoch": 0.943320235756385, + "grad_norm": 0.5621139407157898, + "learning_rate": 3.898139391382982e-06, + "loss": 0.5585, + "step": 6402 + }, + { + "epoch": 0.9434675834970531, + "grad_norm": 0.6188940405845642, + "learning_rate": 3.897817974983048e-06, + "loss": 0.5444, + "step": 6403 + }, + { + "epoch": 0.943614931237721, + "grad_norm": 0.5664979219436646, + "learning_rate": 3.897496524966248e-06, + "loss": 0.5506, + "step": 6404 + }, + { + "epoch": 0.943762278978389, + "grad_norm": 0.5874075889587402, + "learning_rate": 3.897175041340312e-06, + "loss": 0.5764, + "step": 6405 + }, + { + "epoch": 0.943909626719057, + "grad_norm": 0.6398625373840332, + "learning_rate": 3.89685352411297e-06, + "loss": 0.594, + "step": 6406 + }, + { + "epoch": 0.9440569744597249, + "grad_norm": 0.56669682264328, + "learning_rate": 3.896531973291957e-06, + "loss": 0.5873, + "step": 6407 + }, + { + "epoch": 0.944204322200393, + "grad_norm": 0.5745807886123657, + "learning_rate": 3.896210388885005e-06, + "loss": 0.5534, + "step": 6408 + }, + { + "epoch": 0.9443516699410609, + "grad_norm": 0.565658688545227, + "learning_rate": 3.895888770899848e-06, + "loss": 0.5666, + "step": 6409 + }, + { + "epoch": 0.9444990176817288, + "grad_norm": 0.588519811630249, + "learning_rate": 3.895567119344221e-06, + "loss": 0.5756, + "step": 6410 + }, + { + "epoch": 0.9446463654223969, + "grad_norm": 0.6469225883483887, + "learning_rate": 3.8952454342258595e-06, + "loss": 0.5797, + "step": 6411 + }, + { + "epoch": 0.9447937131630648, + "grad_norm": 0.5521818399429321, + "learning_rate": 3.894923715552499e-06, + "loss": 0.5488, + "step": 6412 + }, + { + "epoch": 0.9449410609037328, + "grad_norm": 0.5672182440757751, + "learning_rate": 3.894601963331878e-06, + "loss": 0.5395, + "step": 6413 + }, + { + "epoch": 0.9450884086444008, + "grad_norm": 0.5565996170043945, + "learning_rate": 3.894280177571733e-06, + "loss": 0.5206, + "step": 6414 + }, + { + "epoch": 0.9452357563850687, + "grad_norm": 0.5885131359100342, + "learning_rate": 3.893958358279805e-06, + "loss": 0.5551, + "step": 6415 + }, + { + "epoch": 0.9453831041257368, + "grad_norm": 0.6174693703651428, + "learning_rate": 3.893636505463831e-06, + "loss": 0.5665, + "step": 6416 + }, + { + "epoch": 0.9455304518664047, + "grad_norm": 0.5764871835708618, + "learning_rate": 3.893314619131554e-06, + "loss": 0.5783, + "step": 6417 + }, + { + "epoch": 0.9456777996070727, + "grad_norm": 0.5828034281730652, + "learning_rate": 3.892992699290713e-06, + "loss": 0.5888, + "step": 6418 + }, + { + "epoch": 0.9458251473477407, + "grad_norm": 0.5376036167144775, + "learning_rate": 3.8926707459490505e-06, + "loss": 0.5687, + "step": 6419 + }, + { + "epoch": 0.9459724950884086, + "grad_norm": 0.6013522148132324, + "learning_rate": 3.892348759114311e-06, + "loss": 0.5876, + "step": 6420 + }, + { + "epoch": 0.9461198428290766, + "grad_norm": 0.549885630607605, + "learning_rate": 3.892026738794237e-06, + "loss": 0.5432, + "step": 6421 + }, + { + "epoch": 0.9462671905697446, + "grad_norm": 0.5852346420288086, + "learning_rate": 3.8917046849965715e-06, + "loss": 0.5484, + "step": 6422 + }, + { + "epoch": 0.9464145383104126, + "grad_norm": 0.5904257893562317, + "learning_rate": 3.8913825977290624e-06, + "loss": 0.5382, + "step": 6423 + }, + { + "epoch": 0.9465618860510806, + "grad_norm": 0.5809610486030579, + "learning_rate": 3.891060476999454e-06, + "loss": 0.5597, + "step": 6424 + }, + { + "epoch": 0.9467092337917485, + "grad_norm": 0.5724186301231384, + "learning_rate": 3.890738322815493e-06, + "loss": 0.5641, + "step": 6425 + }, + { + "epoch": 0.9468565815324165, + "grad_norm": 0.5857220888137817, + "learning_rate": 3.890416135184928e-06, + "loss": 0.5619, + "step": 6426 + }, + { + "epoch": 0.9470039292730845, + "grad_norm": 0.6030302047729492, + "learning_rate": 3.8900939141155065e-06, + "loss": 0.5627, + "step": 6427 + }, + { + "epoch": 0.9471512770137525, + "grad_norm": 0.5612055659294128, + "learning_rate": 3.889771659614979e-06, + "loss": 0.5433, + "step": 6428 + }, + { + "epoch": 0.9472986247544204, + "grad_norm": 0.5700392127037048, + "learning_rate": 3.889449371691095e-06, + "loss": 0.5439, + "step": 6429 + }, + { + "epoch": 0.9474459724950884, + "grad_norm": 0.5764816999435425, + "learning_rate": 3.8891270503516045e-06, + "loss": 0.5584, + "step": 6430 + }, + { + "epoch": 0.9475933202357564, + "grad_norm": 0.5636060833930969, + "learning_rate": 3.888804695604261e-06, + "loss": 0.5377, + "step": 6431 + }, + { + "epoch": 0.9477406679764243, + "grad_norm": 0.6064815521240234, + "learning_rate": 3.8884823074568155e-06, + "loss": 0.5844, + "step": 6432 + }, + { + "epoch": 0.9478880157170924, + "grad_norm": 0.6006370186805725, + "learning_rate": 3.888159885917022e-06, + "loss": 0.5283, + "step": 6433 + }, + { + "epoch": 0.9480353634577603, + "grad_norm": 0.5748791098594666, + "learning_rate": 3.887837430992634e-06, + "loss": 0.5554, + "step": 6434 + }, + { + "epoch": 0.9481827111984283, + "grad_norm": 0.5797114372253418, + "learning_rate": 3.887514942691407e-06, + "loss": 0.5562, + "step": 6435 + }, + { + "epoch": 0.9483300589390963, + "grad_norm": 0.5936449766159058, + "learning_rate": 3.887192421021096e-06, + "loss": 0.5454, + "step": 6436 + }, + { + "epoch": 0.9484774066797642, + "grad_norm": 0.620985209941864, + "learning_rate": 3.8868698659894584e-06, + "loss": 0.5466, + "step": 6437 + }, + { + "epoch": 0.9486247544204323, + "grad_norm": 0.6043334603309631, + "learning_rate": 3.8865472776042515e-06, + "loss": 0.5402, + "step": 6438 + }, + { + "epoch": 0.9487721021611002, + "grad_norm": 0.5835230350494385, + "learning_rate": 3.886224655873232e-06, + "loss": 0.538, + "step": 6439 + }, + { + "epoch": 0.9489194499017681, + "grad_norm": 0.5871859192848206, + "learning_rate": 3.8859020008041595e-06, + "loss": 0.5573, + "step": 6440 + }, + { + "epoch": 0.9490667976424362, + "grad_norm": 0.5839502811431885, + "learning_rate": 3.8855793124047945e-06, + "loss": 0.5771, + "step": 6441 + }, + { + "epoch": 0.9492141453831041, + "grad_norm": 0.5796563625335693, + "learning_rate": 3.885256590682897e-06, + "loss": 0.5635, + "step": 6442 + }, + { + "epoch": 0.949361493123772, + "grad_norm": 0.5707823038101196, + "learning_rate": 3.8849338356462276e-06, + "loss": 0.5763, + "step": 6443 + }, + { + "epoch": 0.9495088408644401, + "grad_norm": 0.5706336498260498, + "learning_rate": 3.884611047302549e-06, + "loss": 0.5471, + "step": 6444 + }, + { + "epoch": 0.949656188605108, + "grad_norm": 0.5739217400550842, + "learning_rate": 3.8842882256596246e-06, + "loss": 0.5374, + "step": 6445 + }, + { + "epoch": 0.9498035363457761, + "grad_norm": 0.6129982471466064, + "learning_rate": 3.883965370725218e-06, + "loss": 0.5607, + "step": 6446 + }, + { + "epoch": 0.949950884086444, + "grad_norm": 0.5720517635345459, + "learning_rate": 3.8836424825070925e-06, + "loss": 0.5323, + "step": 6447 + }, + { + "epoch": 0.950098231827112, + "grad_norm": 0.59163498878479, + "learning_rate": 3.8833195610130145e-06, + "loss": 0.5769, + "step": 6448 + }, + { + "epoch": 0.95024557956778, + "grad_norm": 0.5997556447982788, + "learning_rate": 3.882996606250751e-06, + "loss": 0.5578, + "step": 6449 + }, + { + "epoch": 0.9503929273084479, + "grad_norm": 0.5774507522583008, + "learning_rate": 3.882673618228067e-06, + "loss": 0.5378, + "step": 6450 + }, + { + "epoch": 0.9505402750491159, + "grad_norm": 0.5685418844223022, + "learning_rate": 3.882350596952729e-06, + "loss": 0.5477, + "step": 6451 + }, + { + "epoch": 0.9506876227897839, + "grad_norm": 0.5903952717781067, + "learning_rate": 3.882027542432511e-06, + "loss": 0.5417, + "step": 6452 + }, + { + "epoch": 0.9508349705304519, + "grad_norm": 0.5932748317718506, + "learning_rate": 3.881704454675177e-06, + "loss": 0.525, + "step": 6453 + }, + { + "epoch": 0.9509823182711199, + "grad_norm": 0.5592724680900574, + "learning_rate": 3.881381333688499e-06, + "loss": 0.5753, + "step": 6454 + }, + { + "epoch": 0.9511296660117878, + "grad_norm": 0.566970944404602, + "learning_rate": 3.881058179480247e-06, + "loss": 0.5718, + "step": 6455 + }, + { + "epoch": 0.9512770137524558, + "grad_norm": 0.5677683353424072, + "learning_rate": 3.880734992058195e-06, + "loss": 0.5631, + "step": 6456 + }, + { + "epoch": 0.9514243614931238, + "grad_norm": 0.5459764003753662, + "learning_rate": 3.880411771430114e-06, + "loss": 0.542, + "step": 6457 + }, + { + "epoch": 0.9515717092337918, + "grad_norm": 0.5767671465873718, + "learning_rate": 3.880088517603777e-06, + "loss": 0.548, + "step": 6458 + }, + { + "epoch": 0.9517190569744597, + "grad_norm": 0.5901451706886292, + "learning_rate": 3.879765230586958e-06, + "loss": 0.5642, + "step": 6459 + }, + { + "epoch": 0.9518664047151277, + "grad_norm": 0.5409843921661377, + "learning_rate": 3.879441910387432e-06, + "loss": 0.5703, + "step": 6460 + }, + { + "epoch": 0.9520137524557957, + "grad_norm": 0.5440302491188049, + "learning_rate": 3.879118557012977e-06, + "loss": 0.5463, + "step": 6461 + }, + { + "epoch": 0.9521611001964636, + "grad_norm": 0.5715996623039246, + "learning_rate": 3.8787951704713655e-06, + "loss": 0.551, + "step": 6462 + }, + { + "epoch": 0.9523084479371317, + "grad_norm": 0.5949322581291199, + "learning_rate": 3.878471750770377e-06, + "loss": 0.5435, + "step": 6463 + }, + { + "epoch": 0.9524557956777996, + "grad_norm": 0.5663079619407654, + "learning_rate": 3.87814829791779e-06, + "loss": 0.5456, + "step": 6464 + }, + { + "epoch": 0.9526031434184676, + "grad_norm": 0.5583405494689941, + "learning_rate": 3.877824811921383e-06, + "loss": 0.5648, + "step": 6465 + }, + { + "epoch": 0.9527504911591356, + "grad_norm": 0.5822124481201172, + "learning_rate": 3.8775012927889356e-06, + "loss": 0.5237, + "step": 6466 + }, + { + "epoch": 0.9528978388998035, + "grad_norm": 0.569800615310669, + "learning_rate": 3.877177740528228e-06, + "loss": 0.523, + "step": 6467 + }, + { + "epoch": 0.9530451866404716, + "grad_norm": 0.5559419989585876, + "learning_rate": 3.8768541551470415e-06, + "loss": 0.5333, + "step": 6468 + }, + { + "epoch": 0.9531925343811395, + "grad_norm": 0.613082230091095, + "learning_rate": 3.876530536653159e-06, + "loss": 0.5845, + "step": 6469 + }, + { + "epoch": 0.9533398821218074, + "grad_norm": 0.5711299180984497, + "learning_rate": 3.8762068850543614e-06, + "loss": 0.5744, + "step": 6470 + }, + { + "epoch": 0.9534872298624755, + "grad_norm": 0.5834798216819763, + "learning_rate": 3.875883200358434e-06, + "loss": 0.5747, + "step": 6471 + }, + { + "epoch": 0.9536345776031434, + "grad_norm": 0.6351979374885559, + "learning_rate": 3.875559482573163e-06, + "loss": 0.5756, + "step": 6472 + }, + { + "epoch": 0.9537819253438113, + "grad_norm": 0.5899248123168945, + "learning_rate": 3.87523573170633e-06, + "loss": 0.5955, + "step": 6473 + }, + { + "epoch": 0.9539292730844794, + "grad_norm": 0.5686385035514832, + "learning_rate": 3.874911947765722e-06, + "loss": 0.5398, + "step": 6474 + }, + { + "epoch": 0.9540766208251473, + "grad_norm": 0.5667167901992798, + "learning_rate": 3.874588130759127e-06, + "loss": 0.5734, + "step": 6475 + }, + { + "epoch": 0.9542239685658154, + "grad_norm": 0.6033629179000854, + "learning_rate": 3.874264280694333e-06, + "loss": 0.5988, + "step": 6476 + }, + { + "epoch": 0.9543713163064833, + "grad_norm": 0.5694980621337891, + "learning_rate": 3.873940397579127e-06, + "loss": 0.5466, + "step": 6477 + }, + { + "epoch": 0.9545186640471512, + "grad_norm": 0.5806958675384521, + "learning_rate": 3.873616481421299e-06, + "loss": 0.5236, + "step": 6478 + }, + { + "epoch": 0.9546660117878193, + "grad_norm": 0.5684722065925598, + "learning_rate": 3.873292532228638e-06, + "loss": 0.5425, + "step": 6479 + }, + { + "epoch": 0.9548133595284872, + "grad_norm": 0.5751994848251343, + "learning_rate": 3.8729685500089375e-06, + "loss": 0.5378, + "step": 6480 + }, + { + "epoch": 0.9549607072691552, + "grad_norm": 0.5958223938941956, + "learning_rate": 3.872644534769986e-06, + "loss": 0.5394, + "step": 6481 + }, + { + "epoch": 0.9551080550098232, + "grad_norm": 0.5727978944778442, + "learning_rate": 3.872320486519577e-06, + "loss": 0.5478, + "step": 6482 + }, + { + "epoch": 0.9552554027504911, + "grad_norm": 0.5600660443305969, + "learning_rate": 3.871996405265505e-06, + "loss": 0.5775, + "step": 6483 + }, + { + "epoch": 0.9554027504911591, + "grad_norm": 0.6525623798370361, + "learning_rate": 3.871672291015563e-06, + "loss": 0.5576, + "step": 6484 + }, + { + "epoch": 0.9555500982318271, + "grad_norm": 0.615059494972229, + "learning_rate": 3.871348143777544e-06, + "loss": 0.5612, + "step": 6485 + }, + { + "epoch": 0.9556974459724951, + "grad_norm": 0.5866638422012329, + "learning_rate": 3.871023963559247e-06, + "loss": 0.5489, + "step": 6486 + }, + { + "epoch": 0.9558447937131631, + "grad_norm": 0.5773454904556274, + "learning_rate": 3.870699750368467e-06, + "loss": 0.5738, + "step": 6487 + }, + { + "epoch": 0.955992141453831, + "grad_norm": 0.5920143127441406, + "learning_rate": 3.8703755042129995e-06, + "loss": 0.5691, + "step": 6488 + }, + { + "epoch": 0.956139489194499, + "grad_norm": 0.5719210505485535, + "learning_rate": 3.870051225100645e-06, + "loss": 0.5438, + "step": 6489 + }, + { + "epoch": 0.956286836935167, + "grad_norm": 0.6137596368789673, + "learning_rate": 3.869726913039202e-06, + "loss": 0.5599, + "step": 6490 + }, + { + "epoch": 0.956434184675835, + "grad_norm": 0.5843523144721985, + "learning_rate": 3.869402568036467e-06, + "loss": 0.5696, + "step": 6491 + }, + { + "epoch": 0.9565815324165029, + "grad_norm": 0.5673428177833557, + "learning_rate": 3.869078190100244e-06, + "loss": 0.5079, + "step": 6492 + }, + { + "epoch": 0.956728880157171, + "grad_norm": 0.579939603805542, + "learning_rate": 3.868753779238332e-06, + "loss": 0.5354, + "step": 6493 + }, + { + "epoch": 0.9568762278978389, + "grad_norm": 0.5797792077064514, + "learning_rate": 3.868429335458533e-06, + "loss": 0.5464, + "step": 6494 + }, + { + "epoch": 0.9570235756385069, + "grad_norm": 0.5644405484199524, + "learning_rate": 3.868104858768652e-06, + "loss": 0.5301, + "step": 6495 + }, + { + "epoch": 0.9571709233791749, + "grad_norm": 0.6154546141624451, + "learning_rate": 3.867780349176489e-06, + "loss": 0.5675, + "step": 6496 + }, + { + "epoch": 0.9573182711198428, + "grad_norm": 0.5616672039031982, + "learning_rate": 3.867455806689851e-06, + "loss": 0.5434, + "step": 6497 + }, + { + "epoch": 0.9574656188605108, + "grad_norm": 0.5751081109046936, + "learning_rate": 3.867131231316542e-06, + "loss": 0.5536, + "step": 6498 + }, + { + "epoch": 0.9576129666011788, + "grad_norm": 0.5855151414871216, + "learning_rate": 3.8668066230643685e-06, + "loss": 0.5819, + "step": 6499 + }, + { + "epoch": 0.9577603143418467, + "grad_norm": 0.5691272616386414, + "learning_rate": 3.866481981941137e-06, + "loss": 0.5677, + "step": 6500 + }, + { + "epoch": 0.9579076620825148, + "grad_norm": 0.5604066848754883, + "learning_rate": 3.866157307954654e-06, + "loss": 0.5486, + "step": 6501 + }, + { + "epoch": 0.9580550098231827, + "grad_norm": 0.6179413795471191, + "learning_rate": 3.8658326011127285e-06, + "loss": 0.5914, + "step": 6502 + }, + { + "epoch": 0.9582023575638506, + "grad_norm": 0.5713818073272705, + "learning_rate": 3.86550786142317e-06, + "loss": 0.5591, + "step": 6503 + }, + { + "epoch": 0.9583497053045187, + "grad_norm": 0.5625079870223999, + "learning_rate": 3.865183088893788e-06, + "loss": 0.5705, + "step": 6504 + }, + { + "epoch": 0.9584970530451866, + "grad_norm": 0.5767236948013306, + "learning_rate": 3.864858283532394e-06, + "loss": 0.5619, + "step": 6505 + }, + { + "epoch": 0.9586444007858547, + "grad_norm": 0.5943488478660583, + "learning_rate": 3.8645334453467966e-06, + "loss": 0.5642, + "step": 6506 + }, + { + "epoch": 0.9587917485265226, + "grad_norm": 0.5654432773590088, + "learning_rate": 3.86420857434481e-06, + "loss": 0.5178, + "step": 6507 + }, + { + "epoch": 0.9589390962671905, + "grad_norm": 0.6000134944915771, + "learning_rate": 3.863883670534247e-06, + "loss": 0.5467, + "step": 6508 + }, + { + "epoch": 0.9590864440078586, + "grad_norm": 0.5755266547203064, + "learning_rate": 3.863558733922923e-06, + "loss": 0.587, + "step": 6509 + }, + { + "epoch": 0.9592337917485265, + "grad_norm": 0.5869132876396179, + "learning_rate": 3.86323376451865e-06, + "loss": 0.5846, + "step": 6510 + }, + { + "epoch": 0.9593811394891945, + "grad_norm": 0.6052197217941284, + "learning_rate": 3.8629087623292435e-06, + "loss": 0.5615, + "step": 6511 + }, + { + "epoch": 0.9595284872298625, + "grad_norm": 0.5824786424636841, + "learning_rate": 3.8625837273625225e-06, + "loss": 0.5824, + "step": 6512 + }, + { + "epoch": 0.9596758349705304, + "grad_norm": 0.6282157897949219, + "learning_rate": 3.8622586596263e-06, + "loss": 0.5447, + "step": 6513 + }, + { + "epoch": 0.9598231827111984, + "grad_norm": 0.6075464487075806, + "learning_rate": 3.861933559128397e-06, + "loss": 0.5901, + "step": 6514 + }, + { + "epoch": 0.9599705304518664, + "grad_norm": 0.5939127802848816, + "learning_rate": 3.861608425876631e-06, + "loss": 0.5495, + "step": 6515 + }, + { + "epoch": 0.9601178781925344, + "grad_norm": 0.549312949180603, + "learning_rate": 3.86128325987882e-06, + "loss": 0.5712, + "step": 6516 + }, + { + "epoch": 0.9602652259332024, + "grad_norm": 0.5974910259246826, + "learning_rate": 3.8609580611427845e-06, + "loss": 0.5764, + "step": 6517 + }, + { + "epoch": 0.9604125736738703, + "grad_norm": 0.5500815510749817, + "learning_rate": 3.860632829676347e-06, + "loss": 0.5158, + "step": 6518 + }, + { + "epoch": 0.9605599214145383, + "grad_norm": 0.5601874589920044, + "learning_rate": 3.860307565487327e-06, + "loss": 0.5756, + "step": 6519 + }, + { + "epoch": 0.9607072691552063, + "grad_norm": 0.5878623723983765, + "learning_rate": 3.859982268583549e-06, + "loss": 0.5626, + "step": 6520 + }, + { + "epoch": 0.9608546168958743, + "grad_norm": 0.5719782114028931, + "learning_rate": 3.859656938972836e-06, + "loss": 0.5605, + "step": 6521 + }, + { + "epoch": 0.9610019646365422, + "grad_norm": 0.566650927066803, + "learning_rate": 3.85933157666301e-06, + "loss": 0.5782, + "step": 6522 + }, + { + "epoch": 0.9611493123772102, + "grad_norm": 0.5749436020851135, + "learning_rate": 3.859006181661897e-06, + "loss": 0.5632, + "step": 6523 + }, + { + "epoch": 0.9612966601178782, + "grad_norm": 0.5429242253303528, + "learning_rate": 3.858680753977324e-06, + "loss": 0.5217, + "step": 6524 + }, + { + "epoch": 0.9614440078585462, + "grad_norm": 0.5919736623764038, + "learning_rate": 3.858355293617116e-06, + "loss": 0.5964, + "step": 6525 + }, + { + "epoch": 0.9615913555992142, + "grad_norm": 0.586329996585846, + "learning_rate": 3.858029800589099e-06, + "loss": 0.5282, + "step": 6526 + }, + { + "epoch": 0.9617387033398821, + "grad_norm": 0.5824005603790283, + "learning_rate": 3.857704274901104e-06, + "loss": 0.532, + "step": 6527 + }, + { + "epoch": 0.9618860510805501, + "grad_norm": 0.5696355104446411, + "learning_rate": 3.857378716560957e-06, + "loss": 0.569, + "step": 6528 + }, + { + "epoch": 0.9620333988212181, + "grad_norm": 0.5625349879264832, + "learning_rate": 3.857053125576489e-06, + "loss": 0.591, + "step": 6529 + }, + { + "epoch": 0.962180746561886, + "grad_norm": 0.5725974440574646, + "learning_rate": 3.856727501955528e-06, + "loss": 0.5491, + "step": 6530 + }, + { + "epoch": 0.9623280943025541, + "grad_norm": 0.5590876936912537, + "learning_rate": 3.8564018457059095e-06, + "loss": 0.5384, + "step": 6531 + }, + { + "epoch": 0.962475442043222, + "grad_norm": 0.6323602199554443, + "learning_rate": 3.856076156835461e-06, + "loss": 0.5518, + "step": 6532 + }, + { + "epoch": 0.9626227897838899, + "grad_norm": 0.6084800362586975, + "learning_rate": 3.855750435352018e-06, + "loss": 0.5692, + "step": 6533 + }, + { + "epoch": 0.962770137524558, + "grad_norm": 0.5635969042778015, + "learning_rate": 3.8554246812634125e-06, + "loss": 0.5559, + "step": 6534 + }, + { + "epoch": 0.9629174852652259, + "grad_norm": 0.5833244323730469, + "learning_rate": 3.855098894577479e-06, + "loss": 0.5643, + "step": 6535 + }, + { + "epoch": 0.963064833005894, + "grad_norm": 0.5769339203834534, + "learning_rate": 3.8547730753020526e-06, + "loss": 0.5797, + "step": 6536 + }, + { + "epoch": 0.9632121807465619, + "grad_norm": 0.579251766204834, + "learning_rate": 3.854447223444969e-06, + "loss": 0.5792, + "step": 6537 + }, + { + "epoch": 0.9633595284872298, + "grad_norm": 0.6006804704666138, + "learning_rate": 3.8541213390140655e-06, + "loss": 0.5868, + "step": 6538 + }, + { + "epoch": 0.9635068762278979, + "grad_norm": 0.5914876461029053, + "learning_rate": 3.8537954220171795e-06, + "loss": 0.5697, + "step": 6539 + }, + { + "epoch": 0.9636542239685658, + "grad_norm": 0.5783504247665405, + "learning_rate": 3.853469472462148e-06, + "loss": 0.5582, + "step": 6540 + }, + { + "epoch": 0.9638015717092338, + "grad_norm": 0.5652262568473816, + "learning_rate": 3.85314349035681e-06, + "loss": 0.5724, + "step": 6541 + }, + { + "epoch": 0.9639489194499018, + "grad_norm": 0.589743435382843, + "learning_rate": 3.852817475709006e-06, + "loss": 0.5471, + "step": 6542 + }, + { + "epoch": 0.9640962671905697, + "grad_norm": 0.5705616474151611, + "learning_rate": 3.8524914285265764e-06, + "loss": 0.5501, + "step": 6543 + }, + { + "epoch": 0.9642436149312377, + "grad_norm": 0.5820775628089905, + "learning_rate": 3.8521653488173625e-06, + "loss": 0.5939, + "step": 6544 + }, + { + "epoch": 0.9643909626719057, + "grad_norm": 0.5787723660469055, + "learning_rate": 3.851839236589205e-06, + "loss": 0.5526, + "step": 6545 + }, + { + "epoch": 0.9645383104125737, + "grad_norm": 0.5766347050666809, + "learning_rate": 3.851513091849949e-06, + "loss": 0.5825, + "step": 6546 + }, + { + "epoch": 0.9646856581532417, + "grad_norm": 0.589040219783783, + "learning_rate": 3.851186914607436e-06, + "loss": 0.5487, + "step": 6547 + }, + { + "epoch": 0.9648330058939096, + "grad_norm": 0.5988291501998901, + "learning_rate": 3.850860704869514e-06, + "loss": 0.5722, + "step": 6548 + }, + { + "epoch": 0.9649803536345776, + "grad_norm": 0.5789790749549866, + "learning_rate": 3.850534462644023e-06, + "loss": 0.5631, + "step": 6549 + }, + { + "epoch": 0.9651277013752456, + "grad_norm": 0.5828260779380798, + "learning_rate": 3.8502081879388125e-06, + "loss": 0.5012, + "step": 6550 + }, + { + "epoch": 0.9652750491159136, + "grad_norm": 0.6151747703552246, + "learning_rate": 3.849881880761729e-06, + "loss": 0.552, + "step": 6551 + }, + { + "epoch": 0.9654223968565815, + "grad_norm": 0.6211181879043579, + "learning_rate": 3.849555541120618e-06, + "loss": 0.5806, + "step": 6552 + }, + { + "epoch": 0.9655697445972495, + "grad_norm": 0.611431896686554, + "learning_rate": 3.8492291690233304e-06, + "loss": 0.5877, + "step": 6553 + }, + { + "epoch": 0.9657170923379175, + "grad_norm": 0.5862647891044617, + "learning_rate": 3.848902764477713e-06, + "loss": 0.5752, + "step": 6554 + }, + { + "epoch": 0.9658644400785854, + "grad_norm": 0.5670418739318848, + "learning_rate": 3.848576327491618e-06, + "loss": 0.5545, + "step": 6555 + }, + { + "epoch": 0.9660117878192535, + "grad_norm": 0.5932023525238037, + "learning_rate": 3.848249858072894e-06, + "loss": 0.5389, + "step": 6556 + }, + { + "epoch": 0.9661591355599214, + "grad_norm": 0.5721176266670227, + "learning_rate": 3.847923356229394e-06, + "loss": 0.5664, + "step": 6557 + }, + { + "epoch": 0.9663064833005894, + "grad_norm": 0.5885310173034668, + "learning_rate": 3.847596821968969e-06, + "loss": 0.5667, + "step": 6558 + }, + { + "epoch": 0.9664538310412574, + "grad_norm": 0.5765991806983948, + "learning_rate": 3.8472702552994725e-06, + "loss": 0.5879, + "step": 6559 + }, + { + "epoch": 0.9666011787819253, + "grad_norm": 0.5737959146499634, + "learning_rate": 3.8469436562287585e-06, + "loss": 0.5817, + "step": 6560 + }, + { + "epoch": 0.9667485265225934, + "grad_norm": 0.5756208896636963, + "learning_rate": 3.8466170247646815e-06, + "loss": 0.5445, + "step": 6561 + }, + { + "epoch": 0.9668958742632613, + "grad_norm": 0.6364098191261292, + "learning_rate": 3.846290360915096e-06, + "loss": 0.5408, + "step": 6562 + }, + { + "epoch": 0.9670432220039292, + "grad_norm": 0.5902275443077087, + "learning_rate": 3.84596366468786e-06, + "loss": 0.5668, + "step": 6563 + }, + { + "epoch": 0.9671905697445973, + "grad_norm": 0.6189146637916565, + "learning_rate": 3.8456369360908276e-06, + "loss": 0.5623, + "step": 6564 + }, + { + "epoch": 0.9673379174852652, + "grad_norm": 0.6155731081962585, + "learning_rate": 3.845310175131859e-06, + "loss": 0.5772, + "step": 6565 + }, + { + "epoch": 0.9674852652259333, + "grad_norm": 0.5668459534645081, + "learning_rate": 3.844983381818811e-06, + "loss": 0.5865, + "step": 6566 + }, + { + "epoch": 0.9676326129666012, + "grad_norm": 0.564410924911499, + "learning_rate": 3.844656556159544e-06, + "loss": 0.5487, + "step": 6567 + }, + { + "epoch": 0.9677799607072691, + "grad_norm": 0.6036076545715332, + "learning_rate": 3.844329698161917e-06, + "loss": 0.5688, + "step": 6568 + }, + { + "epoch": 0.9679273084479372, + "grad_norm": 0.5987454056739807, + "learning_rate": 3.844002807833792e-06, + "loss": 0.5503, + "step": 6569 + }, + { + "epoch": 0.9680746561886051, + "grad_norm": 0.5818021297454834, + "learning_rate": 3.843675885183029e-06, + "loss": 0.5921, + "step": 6570 + }, + { + "epoch": 0.968222003929273, + "grad_norm": 0.5732938051223755, + "learning_rate": 3.843348930217492e-06, + "loss": 0.5248, + "step": 6571 + }, + { + "epoch": 0.9683693516699411, + "grad_norm": 0.5728085041046143, + "learning_rate": 3.843021942945044e-06, + "loss": 0.5543, + "step": 6572 + }, + { + "epoch": 0.968516699410609, + "grad_norm": 0.6052839159965515, + "learning_rate": 3.842694923373547e-06, + "loss": 0.5072, + "step": 6573 + }, + { + "epoch": 0.968664047151277, + "grad_norm": 0.6022443175315857, + "learning_rate": 3.842367871510867e-06, + "loss": 0.5379, + "step": 6574 + }, + { + "epoch": 0.968811394891945, + "grad_norm": 0.5926876068115234, + "learning_rate": 3.842040787364869e-06, + "loss": 0.5672, + "step": 6575 + }, + { + "epoch": 0.968958742632613, + "grad_norm": 0.5743926167488098, + "learning_rate": 3.84171367094342e-06, + "loss": 0.5731, + "step": 6576 + }, + { + "epoch": 0.969106090373281, + "grad_norm": 0.6203618049621582, + "learning_rate": 3.841386522254385e-06, + "loss": 0.5825, + "step": 6577 + }, + { + "epoch": 0.9692534381139489, + "grad_norm": 0.6021530032157898, + "learning_rate": 3.841059341305635e-06, + "loss": 0.5661, + "step": 6578 + }, + { + "epoch": 0.9694007858546169, + "grad_norm": 0.5897454023361206, + "learning_rate": 3.840732128105037e-06, + "loss": 0.5623, + "step": 6579 + }, + { + "epoch": 0.9695481335952849, + "grad_norm": 0.6083402037620544, + "learning_rate": 3.84040488266046e-06, + "loss": 0.5437, + "step": 6580 + }, + { + "epoch": 0.9696954813359528, + "grad_norm": 0.5743944048881531, + "learning_rate": 3.8400776049797735e-06, + "loss": 0.5288, + "step": 6581 + }, + { + "epoch": 0.9698428290766208, + "grad_norm": 0.5895102024078369, + "learning_rate": 3.83975029507085e-06, + "loss": 0.5706, + "step": 6582 + }, + { + "epoch": 0.9699901768172888, + "grad_norm": 0.5600093603134155, + "learning_rate": 3.8394229529415596e-06, + "loss": 0.5614, + "step": 6583 + }, + { + "epoch": 0.9701375245579568, + "grad_norm": 0.5581523180007935, + "learning_rate": 3.839095578599776e-06, + "loss": 0.548, + "step": 6584 + }, + { + "epoch": 0.9702848722986247, + "grad_norm": 0.5694721341133118, + "learning_rate": 3.838768172053371e-06, + "loss": 0.5158, + "step": 6585 + }, + { + "epoch": 0.9704322200392927, + "grad_norm": 0.6014356017112732, + "learning_rate": 3.8384407333102205e-06, + "loss": 0.576, + "step": 6586 + }, + { + "epoch": 0.9705795677799607, + "grad_norm": 0.5707666873931885, + "learning_rate": 3.838113262378197e-06, + "loss": 0.528, + "step": 6587 + }, + { + "epoch": 0.9707269155206287, + "grad_norm": 0.5824368000030518, + "learning_rate": 3.837785759265178e-06, + "loss": 0.5496, + "step": 6588 + }, + { + "epoch": 0.9708742632612967, + "grad_norm": 0.6002948880195618, + "learning_rate": 3.837458223979038e-06, + "loss": 0.5422, + "step": 6589 + }, + { + "epoch": 0.9710216110019646, + "grad_norm": 0.6063507795333862, + "learning_rate": 3.837130656527655e-06, + "loss": 0.5534, + "step": 6590 + }, + { + "epoch": 0.9711689587426326, + "grad_norm": 0.6136573553085327, + "learning_rate": 3.836803056918908e-06, + "loss": 0.5444, + "step": 6591 + }, + { + "epoch": 0.9713163064833006, + "grad_norm": 0.5677516460418701, + "learning_rate": 3.836475425160674e-06, + "loss": 0.5239, + "step": 6592 + }, + { + "epoch": 0.9714636542239685, + "grad_norm": 0.565879762172699, + "learning_rate": 3.836147761260832e-06, + "loss": 0.5764, + "step": 6593 + }, + { + "epoch": 0.9716110019646366, + "grad_norm": 0.5840448141098022, + "learning_rate": 3.835820065227264e-06, + "loss": 0.5676, + "step": 6594 + }, + { + "epoch": 0.9717583497053045, + "grad_norm": 0.5916604399681091, + "learning_rate": 3.8354923370678495e-06, + "loss": 0.5503, + "step": 6595 + }, + { + "epoch": 0.9719056974459725, + "grad_norm": 0.6123899221420288, + "learning_rate": 3.835164576790472e-06, + "loss": 0.5677, + "step": 6596 + }, + { + "epoch": 0.9720530451866405, + "grad_norm": 0.5788568258285522, + "learning_rate": 3.834836784403011e-06, + "loss": 0.5952, + "step": 6597 + }, + { + "epoch": 0.9722003929273084, + "grad_norm": 0.5820828080177307, + "learning_rate": 3.8345089599133525e-06, + "loss": 0.564, + "step": 6598 + }, + { + "epoch": 0.9723477406679765, + "grad_norm": 0.5785847902297974, + "learning_rate": 3.834181103329379e-06, + "loss": 0.5241, + "step": 6599 + }, + { + "epoch": 0.9724950884086444, + "grad_norm": 0.5698215961456299, + "learning_rate": 3.833853214658976e-06, + "loss": 0.5665, + "step": 6600 + }, + { + "epoch": 0.9726424361493123, + "grad_norm": 0.5634599328041077, + "learning_rate": 3.8335252939100285e-06, + "loss": 0.5455, + "step": 6601 + }, + { + "epoch": 0.9727897838899804, + "grad_norm": 0.5746351480484009, + "learning_rate": 3.833197341090424e-06, + "loss": 0.5581, + "step": 6602 + }, + { + "epoch": 0.9729371316306483, + "grad_norm": 0.5725167393684387, + "learning_rate": 3.832869356208047e-06, + "loss": 0.5438, + "step": 6603 + }, + { + "epoch": 0.9730844793713163, + "grad_norm": 0.6432546973228455, + "learning_rate": 3.832541339270787e-06, + "loss": 0.56, + "step": 6604 + }, + { + "epoch": 0.9732318271119843, + "grad_norm": 0.588798463344574, + "learning_rate": 3.832213290286535e-06, + "loss": 0.5509, + "step": 6605 + }, + { + "epoch": 0.9733791748526522, + "grad_norm": 0.573080837726593, + "learning_rate": 3.831885209263177e-06, + "loss": 0.5028, + "step": 6606 + }, + { + "epoch": 0.9735265225933203, + "grad_norm": 0.5492053031921387, + "learning_rate": 3.831557096208605e-06, + "loss": 0.5393, + "step": 6607 + }, + { + "epoch": 0.9736738703339882, + "grad_norm": 0.5600624084472656, + "learning_rate": 3.8312289511307086e-06, + "loss": 0.6055, + "step": 6608 + }, + { + "epoch": 0.9738212180746562, + "grad_norm": 0.6545931100845337, + "learning_rate": 3.83090077403738e-06, + "loss": 0.5659, + "step": 6609 + }, + { + "epoch": 0.9739685658153242, + "grad_norm": 0.587066650390625, + "learning_rate": 3.830572564936513e-06, + "loss": 0.5978, + "step": 6610 + }, + { + "epoch": 0.9741159135559921, + "grad_norm": 0.5838323831558228, + "learning_rate": 3.830244323835999e-06, + "loss": 0.5874, + "step": 6611 + }, + { + "epoch": 0.9742632612966601, + "grad_norm": 0.5983948111534119, + "learning_rate": 3.829916050743733e-06, + "loss": 0.5793, + "step": 6612 + }, + { + "epoch": 0.9744106090373281, + "grad_norm": 0.6306968331336975, + "learning_rate": 3.82958774566761e-06, + "loss": 0.5423, + "step": 6613 + }, + { + "epoch": 0.9745579567779961, + "grad_norm": 0.5906886458396912, + "learning_rate": 3.829259408615525e-06, + "loss": 0.576, + "step": 6614 + }, + { + "epoch": 0.974705304518664, + "grad_norm": 0.5654301047325134, + "learning_rate": 3.828931039595376e-06, + "loss": 0.5213, + "step": 6615 + }, + { + "epoch": 0.974852652259332, + "grad_norm": 0.5854045748710632, + "learning_rate": 3.828602638615057e-06, + "loss": 0.5429, + "step": 6616 + }, + { + "epoch": 0.975, + "grad_norm": 0.5837438106536865, + "learning_rate": 3.8282742056824694e-06, + "loss": 0.5552, + "step": 6617 + }, + { + "epoch": 0.975147347740668, + "grad_norm": 0.5942155122756958, + "learning_rate": 3.82794574080551e-06, + "loss": 0.5158, + "step": 6618 + }, + { + "epoch": 0.975294695481336, + "grad_norm": 0.564123272895813, + "learning_rate": 3.8276172439920776e-06, + "loss": 0.5752, + "step": 6619 + }, + { + "epoch": 0.9754420432220039, + "grad_norm": 0.5688325762748718, + "learning_rate": 3.827288715250073e-06, + "loss": 0.5318, + "step": 6620 + }, + { + "epoch": 0.9755893909626719, + "grad_norm": 0.5491056442260742, + "learning_rate": 3.826960154587398e-06, + "loss": 0.5282, + "step": 6621 + }, + { + "epoch": 0.9757367387033399, + "grad_norm": 0.5983983874320984, + "learning_rate": 3.826631562011953e-06, + "loss": 0.581, + "step": 6622 + }, + { + "epoch": 0.9758840864440078, + "grad_norm": 0.5869712829589844, + "learning_rate": 3.826302937531642e-06, + "loss": 0.5495, + "step": 6623 + }, + { + "epoch": 0.9760314341846759, + "grad_norm": 0.5891305804252625, + "learning_rate": 3.825974281154367e-06, + "loss": 0.5562, + "step": 6624 + }, + { + "epoch": 0.9761787819253438, + "grad_norm": 0.6096721291542053, + "learning_rate": 3.825645592888032e-06, + "loss": 0.5557, + "step": 6625 + }, + { + "epoch": 0.9763261296660117, + "grad_norm": 0.5734555125236511, + "learning_rate": 3.825316872740543e-06, + "loss": 0.5758, + "step": 6626 + }, + { + "epoch": 0.9764734774066798, + "grad_norm": 0.5786436796188354, + "learning_rate": 3.824988120719805e-06, + "loss": 0.5382, + "step": 6627 + }, + { + "epoch": 0.9766208251473477, + "grad_norm": 0.5795121192932129, + "learning_rate": 3.824659336833723e-06, + "loss": 0.5733, + "step": 6628 + }, + { + "epoch": 0.9767681728880158, + "grad_norm": 0.616045355796814, + "learning_rate": 3.824330521090206e-06, + "loss": 0.5326, + "step": 6629 + }, + { + "epoch": 0.9769155206286837, + "grad_norm": 0.5821729898452759, + "learning_rate": 3.824001673497161e-06, + "loss": 0.565, + "step": 6630 + }, + { + "epoch": 0.9770628683693516, + "grad_norm": 0.5763100981712341, + "learning_rate": 3.823672794062496e-06, + "loss": 0.6028, + "step": 6631 + }, + { + "epoch": 0.9772102161100197, + "grad_norm": 0.584391176700592, + "learning_rate": 3.823343882794122e-06, + "loss": 0.5607, + "step": 6632 + }, + { + "epoch": 0.9773575638506876, + "grad_norm": 0.5925949215888977, + "learning_rate": 3.823014939699948e-06, + "loss": 0.5336, + "step": 6633 + }, + { + "epoch": 0.9775049115913556, + "grad_norm": 0.6348164677619934, + "learning_rate": 3.822685964787886e-06, + "loss": 0.5265, + "step": 6634 + }, + { + "epoch": 0.9776522593320236, + "grad_norm": 0.5797012448310852, + "learning_rate": 3.822356958065846e-06, + "loss": 0.5442, + "step": 6635 + }, + { + "epoch": 0.9777996070726915, + "grad_norm": 0.5840872526168823, + "learning_rate": 3.822027919541742e-06, + "loss": 0.5641, + "step": 6636 + }, + { + "epoch": 0.9779469548133596, + "grad_norm": 0.5699143409729004, + "learning_rate": 3.821698849223486e-06, + "loss": 0.5727, + "step": 6637 + }, + { + "epoch": 0.9780943025540275, + "grad_norm": 0.6087173819541931, + "learning_rate": 3.821369747118993e-06, + "loss": 0.5603, + "step": 6638 + }, + { + "epoch": 0.9782416502946955, + "grad_norm": 0.5848338603973389, + "learning_rate": 3.8210406132361775e-06, + "loss": 0.5513, + "step": 6639 + }, + { + "epoch": 0.9783889980353635, + "grad_norm": 0.5913975238800049, + "learning_rate": 3.8207114475829546e-06, + "loss": 0.5581, + "step": 6640 + }, + { + "epoch": 0.9785363457760314, + "grad_norm": 0.6035642027854919, + "learning_rate": 3.82038225016724e-06, + "loss": 0.5734, + "step": 6641 + }, + { + "epoch": 0.9786836935166994, + "grad_norm": 0.5940127968788147, + "learning_rate": 3.820053020996953e-06, + "loss": 0.5556, + "step": 6642 + }, + { + "epoch": 0.9788310412573674, + "grad_norm": 0.582810640335083, + "learning_rate": 3.81972376008001e-06, + "loss": 0.5414, + "step": 6643 + }, + { + "epoch": 0.9789783889980354, + "grad_norm": 0.5873481035232544, + "learning_rate": 3.8193944674243285e-06, + "loss": 0.5557, + "step": 6644 + }, + { + "epoch": 0.9791257367387033, + "grad_norm": 0.551673948764801, + "learning_rate": 3.819065143037829e-06, + "loss": 0.5494, + "step": 6645 + }, + { + "epoch": 0.9792730844793713, + "grad_norm": 0.5949100255966187, + "learning_rate": 3.818735786928432e-06, + "loss": 0.5579, + "step": 6646 + }, + { + "epoch": 0.9794204322200393, + "grad_norm": 0.5619867444038391, + "learning_rate": 3.818406399104059e-06, + "loss": 0.5534, + "step": 6647 + }, + { + "epoch": 0.9795677799607073, + "grad_norm": 0.5977136492729187, + "learning_rate": 3.818076979572628e-06, + "loss": 0.5632, + "step": 6648 + }, + { + "epoch": 0.9797151277013753, + "grad_norm": 0.6074683666229248, + "learning_rate": 3.817747528342065e-06, + "loss": 0.576, + "step": 6649 + }, + { + "epoch": 0.9798624754420432, + "grad_norm": 0.5725196003913879, + "learning_rate": 3.817418045420293e-06, + "loss": 0.5643, + "step": 6650 + }, + { + "epoch": 0.9800098231827112, + "grad_norm": 0.5718877911567688, + "learning_rate": 3.817088530815233e-06, + "loss": 0.5945, + "step": 6651 + }, + { + "epoch": 0.9801571709233792, + "grad_norm": 0.6071932315826416, + "learning_rate": 3.816758984534813e-06, + "loss": 0.5557, + "step": 6652 + }, + { + "epoch": 0.9803045186640471, + "grad_norm": 0.580797553062439, + "learning_rate": 3.816429406586956e-06, + "loss": 0.5436, + "step": 6653 + }, + { + "epoch": 0.9804518664047152, + "grad_norm": 0.6577277779579163, + "learning_rate": 3.81609979697959e-06, + "loss": 0.5428, + "step": 6654 + }, + { + "epoch": 0.9805992141453831, + "grad_norm": 0.618476927280426, + "learning_rate": 3.8157701557206415e-06, + "loss": 0.5343, + "step": 6655 + }, + { + "epoch": 0.980746561886051, + "grad_norm": 0.5909995436668396, + "learning_rate": 3.8154404828180365e-06, + "loss": 0.5458, + "step": 6656 + }, + { + "epoch": 0.9808939096267191, + "grad_norm": 0.576741635799408, + "learning_rate": 3.8151107782797056e-06, + "loss": 0.5841, + "step": 6657 + }, + { + "epoch": 0.981041257367387, + "grad_norm": 0.5898845195770264, + "learning_rate": 3.814781042113578e-06, + "loss": 0.551, + "step": 6658 + }, + { + "epoch": 0.9811886051080551, + "grad_norm": 0.5773476362228394, + "learning_rate": 3.814451274327582e-06, + "loss": 0.5524, + "step": 6659 + }, + { + "epoch": 0.981335952848723, + "grad_norm": 0.5872828364372253, + "learning_rate": 3.8141214749296502e-06, + "loss": 0.5807, + "step": 6660 + }, + { + "epoch": 0.9814833005893909, + "grad_norm": 0.6145313382148743, + "learning_rate": 3.813791643927712e-06, + "loss": 0.5796, + "step": 6661 + }, + { + "epoch": 0.981630648330059, + "grad_norm": 0.5716175436973572, + "learning_rate": 3.8134617813297027e-06, + "loss": 0.5747, + "step": 6662 + }, + { + "epoch": 0.9817779960707269, + "grad_norm": 0.6214751601219177, + "learning_rate": 3.813131887143553e-06, + "loss": 0.5625, + "step": 6663 + }, + { + "epoch": 0.9819253438113948, + "grad_norm": 0.5545549988746643, + "learning_rate": 3.8128019613771964e-06, + "loss": 0.578, + "step": 6664 + }, + { + "epoch": 0.9820726915520629, + "grad_norm": 0.5908538103103638, + "learning_rate": 3.8124720040385692e-06, + "loss": 0.5485, + "step": 6665 + }, + { + "epoch": 0.9822200392927308, + "grad_norm": 0.5865662097930908, + "learning_rate": 3.8121420151356057e-06, + "loss": 0.5667, + "step": 6666 + }, + { + "epoch": 0.9823673870333989, + "grad_norm": 0.6226565837860107, + "learning_rate": 3.811811994676242e-06, + "loss": 0.5592, + "step": 6667 + }, + { + "epoch": 0.9825147347740668, + "grad_norm": 0.578981339931488, + "learning_rate": 3.811481942668416e-06, + "loss": 0.5692, + "step": 6668 + }, + { + "epoch": 0.9826620825147347, + "grad_norm": 0.6078598499298096, + "learning_rate": 3.8111518591200635e-06, + "loss": 0.5404, + "step": 6669 + }, + { + "epoch": 0.9828094302554028, + "grad_norm": 0.5538727641105652, + "learning_rate": 3.8108217440391237e-06, + "loss": 0.5843, + "step": 6670 + }, + { + "epoch": 0.9829567779960707, + "grad_norm": 0.5997453331947327, + "learning_rate": 3.810491597433536e-06, + "loss": 0.5459, + "step": 6671 + }, + { + "epoch": 0.9831041257367387, + "grad_norm": 0.5993818640708923, + "learning_rate": 3.81016141931124e-06, + "loss": 0.5678, + "step": 6672 + }, + { + "epoch": 0.9832514734774067, + "grad_norm": 0.5882107615470886, + "learning_rate": 3.8098312096801763e-06, + "loss": 0.5246, + "step": 6673 + }, + { + "epoch": 0.9833988212180746, + "grad_norm": 0.5680830478668213, + "learning_rate": 3.8095009685482864e-06, + "loss": 0.544, + "step": 6674 + }, + { + "epoch": 0.9835461689587426, + "grad_norm": 0.5962804555892944, + "learning_rate": 3.8091706959235132e-06, + "loss": 0.5698, + "step": 6675 + }, + { + "epoch": 0.9836935166994106, + "grad_norm": 0.5946127772331238, + "learning_rate": 3.8088403918137974e-06, + "loss": 0.5587, + "step": 6676 + }, + { + "epoch": 0.9838408644400786, + "grad_norm": 0.6182558536529541, + "learning_rate": 3.808510056227085e-06, + "loss": 0.5785, + "step": 6677 + }, + { + "epoch": 0.9839882121807466, + "grad_norm": 0.5657941102981567, + "learning_rate": 3.808179689171319e-06, + "loss": 0.5369, + "step": 6678 + }, + { + "epoch": 0.9841355599214145, + "grad_norm": 0.5600358247756958, + "learning_rate": 3.8078492906544452e-06, + "loss": 0.5432, + "step": 6679 + }, + { + "epoch": 0.9842829076620825, + "grad_norm": 0.5971575379371643, + "learning_rate": 3.807518860684409e-06, + "loss": 0.564, + "step": 6680 + }, + { + "epoch": 0.9844302554027505, + "grad_norm": 0.5597718358039856, + "learning_rate": 3.8071883992691584e-06, + "loss": 0.557, + "step": 6681 + }, + { + "epoch": 0.9845776031434185, + "grad_norm": 0.5484075546264648, + "learning_rate": 3.8068579064166393e-06, + "loss": 0.548, + "step": 6682 + }, + { + "epoch": 0.9847249508840864, + "grad_norm": 0.584077000617981, + "learning_rate": 3.806527382134801e-06, + "loss": 0.5436, + "step": 6683 + }, + { + "epoch": 0.9848722986247544, + "grad_norm": 0.5524597764015198, + "learning_rate": 3.8061968264315906e-06, + "loss": 0.5658, + "step": 6684 + }, + { + "epoch": 0.9850196463654224, + "grad_norm": 0.552975594997406, + "learning_rate": 3.80586623931496e-06, + "loss": 0.5569, + "step": 6685 + }, + { + "epoch": 0.9851669941060903, + "grad_norm": 0.6115763783454895, + "learning_rate": 3.8055356207928595e-06, + "loss": 0.5548, + "step": 6686 + }, + { + "epoch": 0.9853143418467584, + "grad_norm": 0.5642319321632385, + "learning_rate": 3.8052049708732393e-06, + "loss": 0.55, + "step": 6687 + }, + { + "epoch": 0.9854616895874263, + "grad_norm": 0.5679916143417358, + "learning_rate": 3.804874289564051e-06, + "loss": 0.5776, + "step": 6688 + }, + { + "epoch": 0.9856090373280944, + "grad_norm": 0.5906273722648621, + "learning_rate": 3.8045435768732497e-06, + "loss": 0.581, + "step": 6689 + }, + { + "epoch": 0.9857563850687623, + "grad_norm": 0.5921862721443176, + "learning_rate": 3.804212832808786e-06, + "loss": 0.5566, + "step": 6690 + }, + { + "epoch": 0.9859037328094302, + "grad_norm": 0.6286768317222595, + "learning_rate": 3.803882057378615e-06, + "loss": 0.6045, + "step": 6691 + }, + { + "epoch": 0.9860510805500983, + "grad_norm": 0.5910508632659912, + "learning_rate": 3.803551250590693e-06, + "loss": 0.5439, + "step": 6692 + }, + { + "epoch": 0.9861984282907662, + "grad_norm": 0.5671054124832153, + "learning_rate": 3.803220412452975e-06, + "loss": 0.5794, + "step": 6693 + }, + { + "epoch": 0.9863457760314341, + "grad_norm": 0.592008650302887, + "learning_rate": 3.8028895429734158e-06, + "loss": 0.5644, + "step": 6694 + }, + { + "epoch": 0.9864931237721022, + "grad_norm": 0.5616171360015869, + "learning_rate": 3.8025586421599756e-06, + "loss": 0.5757, + "step": 6695 + }, + { + "epoch": 0.9866404715127701, + "grad_norm": 0.5894167423248291, + "learning_rate": 3.8022277100206106e-06, + "loss": 0.5591, + "step": 6696 + }, + { + "epoch": 0.9867878192534381, + "grad_norm": 0.5947283506393433, + "learning_rate": 3.8018967465632793e-06, + "loss": 0.5549, + "step": 6697 + }, + { + "epoch": 0.9869351669941061, + "grad_norm": 0.5952001214027405, + "learning_rate": 3.8015657517959426e-06, + "loss": 0.5498, + "step": 6698 + }, + { + "epoch": 0.987082514734774, + "grad_norm": 0.5751347541809082, + "learning_rate": 3.80123472572656e-06, + "loss": 0.5836, + "step": 6699 + }, + { + "epoch": 0.9872298624754421, + "grad_norm": 0.5691291093826294, + "learning_rate": 3.800903668363093e-06, + "loss": 0.539, + "step": 6700 + }, + { + "epoch": 0.98737721021611, + "grad_norm": 0.5879769325256348, + "learning_rate": 3.800572579713502e-06, + "loss": 0.5882, + "step": 6701 + }, + { + "epoch": 0.987524557956778, + "grad_norm": 0.6021491289138794, + "learning_rate": 3.8002414597857507e-06, + "loss": 0.5516, + "step": 6702 + }, + { + "epoch": 0.987671905697446, + "grad_norm": 0.5831882953643799, + "learning_rate": 3.799910308587802e-06, + "loss": 0.5703, + "step": 6703 + }, + { + "epoch": 0.9878192534381139, + "grad_norm": 0.601181149482727, + "learning_rate": 3.7995791261276206e-06, + "loss": 0.5862, + "step": 6704 + }, + { + "epoch": 0.9879666011787819, + "grad_norm": 0.5578700304031372, + "learning_rate": 3.7992479124131705e-06, + "loss": 0.5455, + "step": 6705 + }, + { + "epoch": 0.9881139489194499, + "grad_norm": 0.5325280427932739, + "learning_rate": 3.7989166674524174e-06, + "loss": 0.5394, + "step": 6706 + }, + { + "epoch": 0.9882612966601179, + "grad_norm": 0.584345817565918, + "learning_rate": 3.7985853912533276e-06, + "loss": 0.551, + "step": 6707 + }, + { + "epoch": 0.9884086444007859, + "grad_norm": 0.5861040353775024, + "learning_rate": 3.7982540838238692e-06, + "loss": 0.5769, + "step": 6708 + }, + { + "epoch": 0.9885559921414538, + "grad_norm": 0.5532112121582031, + "learning_rate": 3.797922745172008e-06, + "loss": 0.5565, + "step": 6709 + }, + { + "epoch": 0.9887033398821218, + "grad_norm": 0.6336901187896729, + "learning_rate": 3.7975913753057137e-06, + "loss": 0.5303, + "step": 6710 + }, + { + "epoch": 0.9888506876227898, + "grad_norm": 0.5725635290145874, + "learning_rate": 3.797259974232955e-06, + "loss": 0.5654, + "step": 6711 + }, + { + "epoch": 0.9889980353634578, + "grad_norm": 0.6068928837776184, + "learning_rate": 3.796928541961703e-06, + "loss": 0.551, + "step": 6712 + }, + { + "epoch": 0.9891453831041257, + "grad_norm": 0.5792505145072937, + "learning_rate": 3.7965970784999286e-06, + "loss": 0.5313, + "step": 6713 + }, + { + "epoch": 0.9892927308447937, + "grad_norm": 0.6153581738471985, + "learning_rate": 3.7962655838556017e-06, + "loss": 0.5605, + "step": 6714 + }, + { + "epoch": 0.9894400785854617, + "grad_norm": 0.5630253553390503, + "learning_rate": 3.7959340580366956e-06, + "loss": 0.5223, + "step": 6715 + }, + { + "epoch": 0.9895874263261296, + "grad_norm": 0.5938056111335754, + "learning_rate": 3.795602501051183e-06, + "loss": 0.5677, + "step": 6716 + }, + { + "epoch": 0.9897347740667977, + "grad_norm": 0.6084017753601074, + "learning_rate": 3.7952709129070383e-06, + "loss": 0.5227, + "step": 6717 + }, + { + "epoch": 0.9898821218074656, + "grad_norm": 0.5739919543266296, + "learning_rate": 3.7949392936122354e-06, + "loss": 0.5489, + "step": 6718 + }, + { + "epoch": 0.9900294695481336, + "grad_norm": 0.6069080233573914, + "learning_rate": 3.7946076431747496e-06, + "loss": 0.5526, + "step": 6719 + }, + { + "epoch": 0.9901768172888016, + "grad_norm": 0.5714866518974304, + "learning_rate": 3.794275961602558e-06, + "loss": 0.5501, + "step": 6720 + }, + { + "epoch": 0.9903241650294695, + "grad_norm": 0.637834906578064, + "learning_rate": 3.793944248903635e-06, + "loss": 0.5586, + "step": 6721 + }, + { + "epoch": 0.9904715127701376, + "grad_norm": 0.5944707989692688, + "learning_rate": 3.7936125050859606e-06, + "loss": 0.5398, + "step": 6722 + }, + { + "epoch": 0.9906188605108055, + "grad_norm": 0.6091006398200989, + "learning_rate": 3.7932807301575125e-06, + "loss": 0.5468, + "step": 6723 + }, + { + "epoch": 0.9907662082514734, + "grad_norm": 0.5963391065597534, + "learning_rate": 3.7929489241262695e-06, + "loss": 0.5149, + "step": 6724 + }, + { + "epoch": 0.9909135559921415, + "grad_norm": 0.573854386806488, + "learning_rate": 3.792617087000211e-06, + "loss": 0.5828, + "step": 6725 + }, + { + "epoch": 0.9910609037328094, + "grad_norm": 0.5896579027175903, + "learning_rate": 3.7922852187873184e-06, + "loss": 0.5702, + "step": 6726 + }, + { + "epoch": 0.9912082514734774, + "grad_norm": 0.5914243459701538, + "learning_rate": 3.7919533194955715e-06, + "loss": 0.561, + "step": 6727 + }, + { + "epoch": 0.9913555992141454, + "grad_norm": 0.5906693339347839, + "learning_rate": 3.7916213891329535e-06, + "loss": 0.5507, + "step": 6728 + }, + { + "epoch": 0.9915029469548133, + "grad_norm": 0.5821155309677124, + "learning_rate": 3.7912894277074473e-06, + "loss": 0.5388, + "step": 6729 + }, + { + "epoch": 0.9916502946954814, + "grad_norm": 0.6050426959991455, + "learning_rate": 3.7909574352270362e-06, + "loss": 0.5538, + "step": 6730 + }, + { + "epoch": 0.9917976424361493, + "grad_norm": 0.6039214730262756, + "learning_rate": 3.790625411699703e-06, + "loss": 0.5651, + "step": 6731 + }, + { + "epoch": 0.9919449901768173, + "grad_norm": 0.5484625101089478, + "learning_rate": 3.7902933571334355e-06, + "loss": 0.561, + "step": 6732 + }, + { + "epoch": 0.9920923379174853, + "grad_norm": 0.5735034346580505, + "learning_rate": 3.789961271536217e-06, + "loss": 0.5527, + "step": 6733 + }, + { + "epoch": 0.9922396856581532, + "grad_norm": 0.5642784237861633, + "learning_rate": 3.7896291549160354e-06, + "loss": 0.5475, + "step": 6734 + }, + { + "epoch": 0.9923870333988212, + "grad_norm": 0.5778886675834656, + "learning_rate": 3.789297007280877e-06, + "loss": 0.571, + "step": 6735 + }, + { + "epoch": 0.9925343811394892, + "grad_norm": 0.5853998064994812, + "learning_rate": 3.788964828638731e-06, + "loss": 0.558, + "step": 6736 + }, + { + "epoch": 0.9926817288801572, + "grad_norm": 0.5724614858627319, + "learning_rate": 3.7886326189975854e-06, + "loss": 0.5578, + "step": 6737 + }, + { + "epoch": 0.9928290766208252, + "grad_norm": 0.56874018907547, + "learning_rate": 3.788300378365429e-06, + "loss": 0.5867, + "step": 6738 + }, + { + "epoch": 0.9929764243614931, + "grad_norm": 0.5819116234779358, + "learning_rate": 3.7879681067502536e-06, + "loss": 0.5502, + "step": 6739 + }, + { + "epoch": 0.9931237721021611, + "grad_norm": 0.5852745175361633, + "learning_rate": 3.787635804160048e-06, + "loss": 0.5574, + "step": 6740 + }, + { + "epoch": 0.9932711198428291, + "grad_norm": 0.5996602773666382, + "learning_rate": 3.7873034706028065e-06, + "loss": 0.5478, + "step": 6741 + }, + { + "epoch": 0.9934184675834971, + "grad_norm": 0.5735631585121155, + "learning_rate": 3.78697110608652e-06, + "loss": 0.535, + "step": 6742 + }, + { + "epoch": 0.993565815324165, + "grad_norm": 0.6179010272026062, + "learning_rate": 3.7866387106191814e-06, + "loss": 0.5832, + "step": 6743 + }, + { + "epoch": 0.993713163064833, + "grad_norm": 0.565230667591095, + "learning_rate": 3.786306284208786e-06, + "loss": 0.5733, + "step": 6744 + }, + { + "epoch": 0.993860510805501, + "grad_norm": 0.587731122970581, + "learning_rate": 3.7859738268633277e-06, + "loss": 0.5283, + "step": 6745 + }, + { + "epoch": 0.9940078585461689, + "grad_norm": 0.5759673714637756, + "learning_rate": 3.785641338590802e-06, + "loss": 0.5561, + "step": 6746 + }, + { + "epoch": 0.994155206286837, + "grad_norm": 0.5511008501052856, + "learning_rate": 3.785308819399205e-06, + "loss": 0.5615, + "step": 6747 + }, + { + "epoch": 0.9943025540275049, + "grad_norm": 0.566585898399353, + "learning_rate": 3.7849762692965343e-06, + "loss": 0.5497, + "step": 6748 + }, + { + "epoch": 0.9944499017681729, + "grad_norm": 0.5625060200691223, + "learning_rate": 3.7846436882907854e-06, + "loss": 0.5632, + "step": 6749 + }, + { + "epoch": 0.9945972495088409, + "grad_norm": 0.5712078809738159, + "learning_rate": 3.78431107638996e-06, + "loss": 0.5848, + "step": 6750 + }, + { + "epoch": 0.9947445972495088, + "grad_norm": 0.5797983407974243, + "learning_rate": 3.7839784336020546e-06, + "loss": 0.5764, + "step": 6751 + }, + { + "epoch": 0.9948919449901769, + "grad_norm": 0.5935981869697571, + "learning_rate": 3.7836457599350707e-06, + "loss": 0.5456, + "step": 6752 + }, + { + "epoch": 0.9950392927308448, + "grad_norm": 0.5746538043022156, + "learning_rate": 3.783313055397008e-06, + "loss": 0.5579, + "step": 6753 + }, + { + "epoch": 0.9951866404715127, + "grad_norm": 0.5818923115730286, + "learning_rate": 3.782980319995869e-06, + "loss": 0.563, + "step": 6754 + }, + { + "epoch": 0.9953339882121808, + "grad_norm": 0.5951783657073975, + "learning_rate": 3.782647553739654e-06, + "loss": 0.5689, + "step": 6755 + }, + { + "epoch": 0.9954813359528487, + "grad_norm": 0.5639636516571045, + "learning_rate": 3.7823147566363673e-06, + "loss": 0.5622, + "step": 6756 + }, + { + "epoch": 0.9956286836935166, + "grad_norm": 0.5691885352134705, + "learning_rate": 3.7819819286940123e-06, + "loss": 0.541, + "step": 6757 + }, + { + "epoch": 0.9957760314341847, + "grad_norm": 0.6019887924194336, + "learning_rate": 3.781649069920593e-06, + "loss": 0.5581, + "step": 6758 + }, + { + "epoch": 0.9959233791748526, + "grad_norm": 0.5815661549568176, + "learning_rate": 3.781316180324115e-06, + "loss": 0.5496, + "step": 6759 + }, + { + "epoch": 0.9960707269155207, + "grad_norm": 0.5647879838943481, + "learning_rate": 3.7809832599125834e-06, + "loss": 0.5847, + "step": 6760 + }, + { + "epoch": 0.9962180746561886, + "grad_norm": 0.5521979928016663, + "learning_rate": 3.7806503086940057e-06, + "loss": 0.509, + "step": 6761 + }, + { + "epoch": 0.9963654223968565, + "grad_norm": 0.5819664001464844, + "learning_rate": 3.780317326676388e-06, + "loss": 0.5711, + "step": 6762 + }, + { + "epoch": 0.9965127701375246, + "grad_norm": 0.5798903107643127, + "learning_rate": 3.77998431386774e-06, + "loss": 0.593, + "step": 6763 + }, + { + "epoch": 0.9966601178781925, + "grad_norm": 0.5823028087615967, + "learning_rate": 3.779651270276069e-06, + "loss": 0.5698, + "step": 6764 + }, + { + "epoch": 0.9968074656188605, + "grad_norm": 0.5556360483169556, + "learning_rate": 3.7793181959093856e-06, + "loss": 0.5336, + "step": 6765 + }, + { + "epoch": 0.9969548133595285, + "grad_norm": 0.5631321668624878, + "learning_rate": 3.7789850907756993e-06, + "loss": 0.5748, + "step": 6766 + }, + { + "epoch": 0.9971021611001964, + "grad_norm": 0.5938863754272461, + "learning_rate": 3.7786519548830216e-06, + "loss": 0.5683, + "step": 6767 + }, + { + "epoch": 0.9972495088408644, + "grad_norm": 0.5786289572715759, + "learning_rate": 3.7783187882393633e-06, + "loss": 0.5527, + "step": 6768 + }, + { + "epoch": 0.9973968565815324, + "grad_norm": 0.5952579975128174, + "learning_rate": 3.777985590852739e-06, + "loss": 0.5651, + "step": 6769 + }, + { + "epoch": 0.9975442043222004, + "grad_norm": 0.5789318680763245, + "learning_rate": 3.77765236273116e-06, + "loss": 0.5469, + "step": 6770 + }, + { + "epoch": 0.9976915520628684, + "grad_norm": 0.5986153483390808, + "learning_rate": 3.7773191038826407e-06, + "loss": 0.5874, + "step": 6771 + }, + { + "epoch": 0.9978388998035363, + "grad_norm": 0.6043206453323364, + "learning_rate": 3.7769858143151966e-06, + "loss": 0.5938, + "step": 6772 + }, + { + "epoch": 0.9979862475442043, + "grad_norm": 0.5426123142242432, + "learning_rate": 3.776652494036842e-06, + "loss": 0.5164, + "step": 6773 + }, + { + "epoch": 0.9981335952848723, + "grad_norm": 0.5831897854804993, + "learning_rate": 3.7763191430555944e-06, + "loss": 0.5974, + "step": 6774 + }, + { + "epoch": 0.9982809430255403, + "grad_norm": 0.6260004043579102, + "learning_rate": 3.7759857613794698e-06, + "loss": 0.569, + "step": 6775 + }, + { + "epoch": 0.9984282907662082, + "grad_norm": 0.59408038854599, + "learning_rate": 3.7756523490164853e-06, + "loss": 0.5528, + "step": 6776 + }, + { + "epoch": 0.9985756385068763, + "grad_norm": 0.5583187341690063, + "learning_rate": 3.775318905974661e-06, + "loss": 0.5514, + "step": 6777 + }, + { + "epoch": 0.9987229862475442, + "grad_norm": 0.5795233249664307, + "learning_rate": 3.7749854322620148e-06, + "loss": 0.5707, + "step": 6778 + }, + { + "epoch": 0.9988703339882122, + "grad_norm": 0.5581522583961487, + "learning_rate": 3.774651927886568e-06, + "loss": 0.5566, + "step": 6779 + }, + { + "epoch": 0.9990176817288802, + "grad_norm": 0.6161776185035706, + "learning_rate": 3.774318392856339e-06, + "loss": 0.5143, + "step": 6780 + }, + { + "epoch": 0.9991650294695481, + "grad_norm": 0.5630253553390503, + "learning_rate": 3.773984827179351e-06, + "loss": 0.5634, + "step": 6781 + }, + { + "epoch": 0.9993123772102162, + "grad_norm": 0.5881425142288208, + "learning_rate": 3.7736512308636254e-06, + "loss": 0.5583, + "step": 6782 + }, + { + "epoch": 0.9994597249508841, + "grad_norm": 0.5651354789733887, + "learning_rate": 3.773317603917185e-06, + "loss": 0.5409, + "step": 6783 + }, + { + "epoch": 0.999607072691552, + "grad_norm": 0.5653038620948792, + "learning_rate": 3.7729839463480527e-06, + "loss": 0.5408, + "step": 6784 + }, + { + "epoch": 0.9997544204322201, + "grad_norm": 0.5741205811500549, + "learning_rate": 3.772650258164254e-06, + "loss": 0.5286, + "step": 6785 + }, + { + "epoch": 0.999901768172888, + "grad_norm": 0.5757745504379272, + "learning_rate": 3.7723165393738137e-06, + "loss": 0.56, + "step": 6786 + }, + { + "epoch": 1.000147347740668, + "grad_norm": 1.0997387170791626, + "learning_rate": 3.7719827899847573e-06, + "loss": 1.1885, + "step": 6787 + }, + { + "epoch": 1.0002946954813359, + "grad_norm": 0.5765748023986816, + "learning_rate": 3.7716490100051116e-06, + "loss": 0.5262, + "step": 6788 + }, + { + "epoch": 1.000442043222004, + "grad_norm": 0.5902374982833862, + "learning_rate": 3.7713151994429033e-06, + "loss": 0.565, + "step": 6789 + }, + { + "epoch": 1.000589390962672, + "grad_norm": 0.5425161123275757, + "learning_rate": 3.7709813583061604e-06, + "loss": 0.565, + "step": 6790 + }, + { + "epoch": 1.00073673870334, + "grad_norm": 0.5813264846801758, + "learning_rate": 3.770647486602913e-06, + "loss": 0.5484, + "step": 6791 + }, + { + "epoch": 1.0008840864440078, + "grad_norm": 0.6004243493080139, + "learning_rate": 3.7703135843411885e-06, + "loss": 0.5423, + "step": 6792 + }, + { + "epoch": 1.0010314341846758, + "grad_norm": 0.5699193477630615, + "learning_rate": 3.769979651529019e-06, + "loss": 0.5556, + "step": 6793 + }, + { + "epoch": 1.0011787819253437, + "grad_norm": 0.5816091895103455, + "learning_rate": 3.7696456881744337e-06, + "loss": 0.5414, + "step": 6794 + }, + { + "epoch": 1.0013261296660119, + "grad_norm": 0.5719310641288757, + "learning_rate": 3.7693116942854657e-06, + "loss": 0.5303, + "step": 6795 + }, + { + "epoch": 1.0014734774066798, + "grad_norm": 0.5761677026748657, + "learning_rate": 3.7689776698701464e-06, + "loss": 0.5539, + "step": 6796 + }, + { + "epoch": 1.0016208251473477, + "grad_norm": 0.5952512621879578, + "learning_rate": 3.768643614936509e-06, + "loss": 0.5315, + "step": 6797 + }, + { + "epoch": 1.0017681728880157, + "grad_norm": 0.6029044985771179, + "learning_rate": 3.7683095294925888e-06, + "loss": 0.558, + "step": 6798 + }, + { + "epoch": 1.0019155206286836, + "grad_norm": 0.5636295676231384, + "learning_rate": 3.7679754135464187e-06, + "loss": 0.5293, + "step": 6799 + }, + { + "epoch": 1.0020628683693518, + "grad_norm": 0.5912231802940369, + "learning_rate": 3.7676412671060346e-06, + "loss": 0.5172, + "step": 6800 + }, + { + "epoch": 1.0022102161100197, + "grad_norm": 0.581246554851532, + "learning_rate": 3.7673070901794727e-06, + "loss": 0.5651, + "step": 6801 + }, + { + "epoch": 1.0023575638506876, + "grad_norm": 0.6231141090393066, + "learning_rate": 3.7669728827747694e-06, + "loss": 0.5465, + "step": 6802 + }, + { + "epoch": 1.0025049115913556, + "grad_norm": 0.5678769946098328, + "learning_rate": 3.7666386448999624e-06, + "loss": 0.5278, + "step": 6803 + }, + { + "epoch": 1.0026522593320235, + "grad_norm": 0.6406692266464233, + "learning_rate": 3.7663043765630915e-06, + "loss": 0.5563, + "step": 6804 + }, + { + "epoch": 1.0027996070726914, + "grad_norm": 0.5821007490158081, + "learning_rate": 3.765970077772193e-06, + "loss": 0.5532, + "step": 6805 + }, + { + "epoch": 1.0029469548133596, + "grad_norm": 0.5955839157104492, + "learning_rate": 3.765635748535308e-06, + "loss": 0.5612, + "step": 6806 + }, + { + "epoch": 1.0030943025540275, + "grad_norm": 0.5832028985023499, + "learning_rate": 3.7653013888604768e-06, + "loss": 0.5302, + "step": 6807 + }, + { + "epoch": 1.0032416502946955, + "grad_norm": 0.6079351305961609, + "learning_rate": 3.764966998755741e-06, + "loss": 0.5513, + "step": 6808 + }, + { + "epoch": 1.0033889980353634, + "grad_norm": 0.5969521403312683, + "learning_rate": 3.7646325782291416e-06, + "loss": 0.5344, + "step": 6809 + }, + { + "epoch": 1.0035363457760313, + "grad_norm": 0.5876597166061401, + "learning_rate": 3.7642981272887224e-06, + "loss": 0.5572, + "step": 6810 + }, + { + "epoch": 1.0036836935166995, + "grad_norm": 0.5986264944076538, + "learning_rate": 3.763963645942526e-06, + "loss": 0.5112, + "step": 6811 + }, + { + "epoch": 1.0038310412573674, + "grad_norm": 0.5589534640312195, + "learning_rate": 3.7636291341985965e-06, + "loss": 0.4848, + "step": 6812 + }, + { + "epoch": 1.0039783889980354, + "grad_norm": 0.6179212927818298, + "learning_rate": 3.7632945920649788e-06, + "loss": 0.5304, + "step": 6813 + }, + { + "epoch": 1.0041257367387033, + "grad_norm": 0.6066630482673645, + "learning_rate": 3.7629600195497194e-06, + "loss": 0.5415, + "step": 6814 + }, + { + "epoch": 1.0042730844793712, + "grad_norm": 0.5710508227348328, + "learning_rate": 3.7626254166608632e-06, + "loss": 0.5503, + "step": 6815 + }, + { + "epoch": 1.0044204322200392, + "grad_norm": 0.5879745483398438, + "learning_rate": 3.7622907834064594e-06, + "loss": 0.533, + "step": 6816 + }, + { + "epoch": 1.0045677799607073, + "grad_norm": 0.5440017580986023, + "learning_rate": 3.7619561197945532e-06, + "loss": 0.52, + "step": 6817 + }, + { + "epoch": 1.0047151277013753, + "grad_norm": 0.5862153172492981, + "learning_rate": 3.761621425833194e-06, + "loss": 0.5468, + "step": 6818 + }, + { + "epoch": 1.0048624754420432, + "grad_norm": 0.5623803734779358, + "learning_rate": 3.7612867015304318e-06, + "loss": 0.5404, + "step": 6819 + }, + { + "epoch": 1.0050098231827111, + "grad_norm": 0.5801146030426025, + "learning_rate": 3.7609519468943156e-06, + "loss": 0.531, + "step": 6820 + }, + { + "epoch": 1.005157170923379, + "grad_norm": 0.5720169544219971, + "learning_rate": 3.760617161932897e-06, + "loss": 0.5617, + "step": 6821 + }, + { + "epoch": 1.0053045186640472, + "grad_norm": 0.5624615550041199, + "learning_rate": 3.760282346654226e-06, + "loss": 0.5579, + "step": 6822 + }, + { + "epoch": 1.0054518664047152, + "grad_norm": 0.5917442440986633, + "learning_rate": 3.7599475010663566e-06, + "loss": 0.5217, + "step": 6823 + }, + { + "epoch": 1.0055992141453831, + "grad_norm": 0.5808259844779968, + "learning_rate": 3.7596126251773402e-06, + "loss": 0.5373, + "step": 6824 + }, + { + "epoch": 1.005746561886051, + "grad_norm": 0.5867030024528503, + "learning_rate": 3.759277718995231e-06, + "loss": 0.5346, + "step": 6825 + }, + { + "epoch": 1.005893909626719, + "grad_norm": 0.5875909328460693, + "learning_rate": 3.758942782528084e-06, + "loss": 0.5484, + "step": 6826 + }, + { + "epoch": 1.0060412573673871, + "grad_norm": 0.5928582549095154, + "learning_rate": 3.7586078157839527e-06, + "loss": 0.5595, + "step": 6827 + }, + { + "epoch": 1.006188605108055, + "grad_norm": 0.6211316585540771, + "learning_rate": 3.758272818770895e-06, + "loss": 0.5029, + "step": 6828 + }, + { + "epoch": 1.006335952848723, + "grad_norm": 0.5940349698066711, + "learning_rate": 3.757937791496965e-06, + "loss": 0.5309, + "step": 6829 + }, + { + "epoch": 1.006483300589391, + "grad_norm": 0.5780348181724548, + "learning_rate": 3.7576027339702215e-06, + "loss": 0.5487, + "step": 6830 + }, + { + "epoch": 1.0066306483300589, + "grad_norm": 0.6201276779174805, + "learning_rate": 3.7572676461987217e-06, + "loss": 0.544, + "step": 6831 + }, + { + "epoch": 1.0067779960707268, + "grad_norm": 0.5714828968048096, + "learning_rate": 3.7569325281905246e-06, + "loss": 0.5435, + "step": 6832 + }, + { + "epoch": 1.006925343811395, + "grad_norm": 0.6021549701690674, + "learning_rate": 3.75659737995369e-06, + "loss": 0.5295, + "step": 6833 + }, + { + "epoch": 1.007072691552063, + "grad_norm": 0.6425979733467102, + "learning_rate": 3.756262201496278e-06, + "loss": 0.527, + "step": 6834 + }, + { + "epoch": 1.0072200392927309, + "grad_norm": 0.6134456992149353, + "learning_rate": 3.7559269928263492e-06, + "loss": 0.5597, + "step": 6835 + }, + { + "epoch": 1.0073673870333988, + "grad_norm": 0.5799898505210876, + "learning_rate": 3.7555917539519652e-06, + "loss": 0.543, + "step": 6836 + }, + { + "epoch": 1.0075147347740667, + "grad_norm": 0.5823966860771179, + "learning_rate": 3.755256484881189e-06, + "loss": 0.5457, + "step": 6837 + }, + { + "epoch": 1.0076620825147349, + "grad_norm": 0.5819360613822937, + "learning_rate": 3.7549211856220825e-06, + "loss": 0.5312, + "step": 6838 + }, + { + "epoch": 1.0078094302554028, + "grad_norm": 0.5884981155395508, + "learning_rate": 3.7545858561827096e-06, + "loss": 0.5384, + "step": 6839 + }, + { + "epoch": 1.0079567779960708, + "grad_norm": 0.5802105069160461, + "learning_rate": 3.7542504965711363e-06, + "loss": 0.5033, + "step": 6840 + }, + { + "epoch": 1.0081041257367387, + "grad_norm": 0.5688816905021667, + "learning_rate": 3.753915106795426e-06, + "loss": 0.5073, + "step": 6841 + }, + { + "epoch": 1.0082514734774066, + "grad_norm": 0.5924059748649597, + "learning_rate": 3.7535796868636464e-06, + "loss": 0.546, + "step": 6842 + }, + { + "epoch": 1.0083988212180746, + "grad_norm": 0.5985544323921204, + "learning_rate": 3.753244236783863e-06, + "loss": 0.5144, + "step": 6843 + }, + { + "epoch": 1.0085461689587427, + "grad_norm": 0.6114506125450134, + "learning_rate": 3.752908756564143e-06, + "loss": 0.5238, + "step": 6844 + }, + { + "epoch": 1.0086935166994107, + "grad_norm": 0.6048887372016907, + "learning_rate": 3.7525732462125553e-06, + "loss": 0.5378, + "step": 6845 + }, + { + "epoch": 1.0088408644400786, + "grad_norm": 0.5709229707717896, + "learning_rate": 3.7522377057371682e-06, + "loss": 0.5813, + "step": 6846 + }, + { + "epoch": 1.0089882121807465, + "grad_norm": 0.5657650828361511, + "learning_rate": 3.751902135146053e-06, + "loss": 0.5622, + "step": 6847 + }, + { + "epoch": 1.0091355599214145, + "grad_norm": 0.6166815757751465, + "learning_rate": 3.751566534447277e-06, + "loss": 0.5654, + "step": 6848 + }, + { + "epoch": 1.0092829076620826, + "grad_norm": 0.5931811928749084, + "learning_rate": 3.751230903648914e-06, + "loss": 0.5345, + "step": 6849 + }, + { + "epoch": 1.0094302554027506, + "grad_norm": 0.6043866276741028, + "learning_rate": 3.750895242759034e-06, + "loss": 0.575, + "step": 6850 + }, + { + "epoch": 1.0095776031434185, + "grad_norm": 0.5627220273017883, + "learning_rate": 3.7505595517857095e-06, + "loss": 0.5209, + "step": 6851 + }, + { + "epoch": 1.0097249508840864, + "grad_norm": 0.580883264541626, + "learning_rate": 3.750223830737015e-06, + "loss": 0.52, + "step": 6852 + }, + { + "epoch": 1.0098722986247544, + "grad_norm": 0.5736096501350403, + "learning_rate": 3.7498880796210246e-06, + "loss": 0.5244, + "step": 6853 + }, + { + "epoch": 1.0100196463654223, + "grad_norm": 0.6349263191223145, + "learning_rate": 3.749552298445811e-06, + "loss": 0.5294, + "step": 6854 + }, + { + "epoch": 1.0101669941060905, + "grad_norm": 0.5786256194114685, + "learning_rate": 3.7492164872194512e-06, + "loss": 0.5261, + "step": 6855 + }, + { + "epoch": 1.0103143418467584, + "grad_norm": 0.5968038439750671, + "learning_rate": 3.7488806459500205e-06, + "loss": 0.5596, + "step": 6856 + }, + { + "epoch": 1.0104616895874263, + "grad_norm": 0.5804051160812378, + "learning_rate": 3.748544774645596e-06, + "loss": 0.5161, + "step": 6857 + }, + { + "epoch": 1.0106090373280943, + "grad_norm": 0.5641347169876099, + "learning_rate": 3.748208873314256e-06, + "loss": 0.547, + "step": 6858 + }, + { + "epoch": 1.0107563850687622, + "grad_norm": 0.5796567797660828, + "learning_rate": 3.747872941964077e-06, + "loss": 0.5442, + "step": 6859 + }, + { + "epoch": 1.0109037328094304, + "grad_norm": 0.6022166609764099, + "learning_rate": 3.7475369806031396e-06, + "loss": 0.5428, + "step": 6860 + }, + { + "epoch": 1.0110510805500983, + "grad_norm": 0.5698715448379517, + "learning_rate": 3.7472009892395227e-06, + "loss": 0.5164, + "step": 6861 + }, + { + "epoch": 1.0111984282907662, + "grad_norm": 0.6206821799278259, + "learning_rate": 3.7468649678813074e-06, + "loss": 0.5392, + "step": 6862 + }, + { + "epoch": 1.0113457760314342, + "grad_norm": 0.5580244064331055, + "learning_rate": 3.746528916536574e-06, + "loss": 0.5514, + "step": 6863 + }, + { + "epoch": 1.011493123772102, + "grad_norm": 0.5955545902252197, + "learning_rate": 3.7461928352134055e-06, + "loss": 0.5344, + "step": 6864 + }, + { + "epoch": 1.01164047151277, + "grad_norm": 0.597061038017273, + "learning_rate": 3.7458567239198827e-06, + "loss": 0.5713, + "step": 6865 + }, + { + "epoch": 1.0117878192534382, + "grad_norm": 0.60896897315979, + "learning_rate": 3.745520582664091e-06, + "loss": 0.5361, + "step": 6866 + }, + { + "epoch": 1.0119351669941061, + "grad_norm": 0.6072891354560852, + "learning_rate": 3.745184411454113e-06, + "loss": 0.5352, + "step": 6867 + }, + { + "epoch": 1.012082514734774, + "grad_norm": 0.6335226893424988, + "learning_rate": 3.7448482102980344e-06, + "loss": 0.5649, + "step": 6868 + }, + { + "epoch": 1.012229862475442, + "grad_norm": 0.5828091502189636, + "learning_rate": 3.744511979203939e-06, + "loss": 0.5489, + "step": 6869 + }, + { + "epoch": 1.01237721021611, + "grad_norm": 0.5883364081382751, + "learning_rate": 3.7441757181799156e-06, + "loss": 0.5443, + "step": 6870 + }, + { + "epoch": 1.012524557956778, + "grad_norm": 0.5840660333633423, + "learning_rate": 3.7438394272340496e-06, + "loss": 0.5276, + "step": 6871 + }, + { + "epoch": 1.012671905697446, + "grad_norm": 0.5720158815383911, + "learning_rate": 3.7435031063744282e-06, + "loss": 0.5097, + "step": 6872 + }, + { + "epoch": 1.012819253438114, + "grad_norm": 0.5688081979751587, + "learning_rate": 3.7431667556091406e-06, + "loss": 0.5393, + "step": 6873 + }, + { + "epoch": 1.012966601178782, + "grad_norm": 0.5781294107437134, + "learning_rate": 3.7428303749462754e-06, + "loss": 0.5637, + "step": 6874 + }, + { + "epoch": 1.0131139489194498, + "grad_norm": 0.5572324395179749, + "learning_rate": 3.7424939643939227e-06, + "loss": 0.5669, + "step": 6875 + }, + { + "epoch": 1.0132612966601178, + "grad_norm": 0.5726388096809387, + "learning_rate": 3.7421575239601725e-06, + "loss": 0.5331, + "step": 6876 + }, + { + "epoch": 1.013408644400786, + "grad_norm": 0.560218870639801, + "learning_rate": 3.7418210536531174e-06, + "loss": 0.5401, + "step": 6877 + }, + { + "epoch": 1.0135559921414539, + "grad_norm": 0.5914381146430969, + "learning_rate": 3.7414845534808473e-06, + "loss": 0.5508, + "step": 6878 + }, + { + "epoch": 1.0137033398821218, + "grad_norm": 0.5506847500801086, + "learning_rate": 3.741148023451457e-06, + "loss": 0.5554, + "step": 6879 + }, + { + "epoch": 1.0138506876227897, + "grad_norm": 0.6016296744346619, + "learning_rate": 3.7408114635730386e-06, + "loss": 0.5213, + "step": 6880 + }, + { + "epoch": 1.0139980353634577, + "grad_norm": 0.5686876177787781, + "learning_rate": 3.740474873853686e-06, + "loss": 0.5332, + "step": 6881 + }, + { + "epoch": 1.0141453831041258, + "grad_norm": 0.6341216564178467, + "learning_rate": 3.740138254301495e-06, + "loss": 0.5637, + "step": 6882 + }, + { + "epoch": 1.0142927308447938, + "grad_norm": 0.5809710025787354, + "learning_rate": 3.7398016049245607e-06, + "loss": 0.54, + "step": 6883 + }, + { + "epoch": 1.0144400785854617, + "grad_norm": 0.6334750652313232, + "learning_rate": 3.739464925730979e-06, + "loss": 0.5478, + "step": 6884 + }, + { + "epoch": 1.0145874263261296, + "grad_norm": 0.6075417399406433, + "learning_rate": 3.7391282167288475e-06, + "loss": 0.5512, + "step": 6885 + }, + { + "epoch": 1.0147347740667976, + "grad_norm": 0.5743424892425537, + "learning_rate": 3.738791477926263e-06, + "loss": 0.5341, + "step": 6886 + }, + { + "epoch": 1.0148821218074655, + "grad_norm": 0.5874279141426086, + "learning_rate": 3.738454709331324e-06, + "loss": 0.5536, + "step": 6887 + }, + { + "epoch": 1.0150294695481337, + "grad_norm": 0.5890275239944458, + "learning_rate": 3.7381179109521313e-06, + "loss": 0.5442, + "step": 6888 + }, + { + "epoch": 1.0151768172888016, + "grad_norm": 0.5994965434074402, + "learning_rate": 3.737781082796783e-06, + "loss": 0.5366, + "step": 6889 + }, + { + "epoch": 1.0153241650294695, + "grad_norm": 0.6054962277412415, + "learning_rate": 3.7374442248733807e-06, + "loss": 0.5399, + "step": 6890 + }, + { + "epoch": 1.0154715127701375, + "grad_norm": 0.5984225869178772, + "learning_rate": 3.7371073371900247e-06, + "loss": 0.5772, + "step": 6891 + }, + { + "epoch": 1.0156188605108054, + "grad_norm": 0.6520337462425232, + "learning_rate": 3.7367704197548176e-06, + "loss": 0.5368, + "step": 6892 + }, + { + "epoch": 1.0157662082514736, + "grad_norm": 0.5933197736740112, + "learning_rate": 3.7364334725758623e-06, + "loss": 0.516, + "step": 6893 + }, + { + "epoch": 1.0159135559921415, + "grad_norm": 0.5611729621887207, + "learning_rate": 3.7360964956612623e-06, + "loss": 0.5519, + "step": 6894 + }, + { + "epoch": 1.0160609037328094, + "grad_norm": 0.6424007415771484, + "learning_rate": 3.7357594890191203e-06, + "loss": 0.5387, + "step": 6895 + }, + { + "epoch": 1.0162082514734774, + "grad_norm": 0.5805339217185974, + "learning_rate": 3.7354224526575428e-06, + "loss": 0.5385, + "step": 6896 + }, + { + "epoch": 1.0163555992141453, + "grad_norm": 0.5881481170654297, + "learning_rate": 3.735085386584634e-06, + "loss": 0.5147, + "step": 6897 + }, + { + "epoch": 1.0165029469548132, + "grad_norm": 0.6232162714004517, + "learning_rate": 3.734748290808502e-06, + "loss": 0.5565, + "step": 6898 + }, + { + "epoch": 1.0166502946954814, + "grad_norm": 0.6170052289962769, + "learning_rate": 3.734411165337252e-06, + "loss": 0.5185, + "step": 6899 + }, + { + "epoch": 1.0167976424361493, + "grad_norm": 0.5903793573379517, + "learning_rate": 3.734074010178993e-06, + "loss": 0.5587, + "step": 6900 + }, + { + "epoch": 1.0169449901768173, + "grad_norm": 0.5885986089706421, + "learning_rate": 3.733736825341832e-06, + "loss": 0.5441, + "step": 6901 + }, + { + "epoch": 1.0170923379174852, + "grad_norm": 0.6235013604164124, + "learning_rate": 3.73339961083388e-06, + "loss": 0.5058, + "step": 6902 + }, + { + "epoch": 1.0172396856581531, + "grad_norm": 0.6283426880836487, + "learning_rate": 3.7330623666632454e-06, + "loss": 0.5484, + "step": 6903 + }, + { + "epoch": 1.0173870333988213, + "grad_norm": 0.5870009064674377, + "learning_rate": 3.7327250928380394e-06, + "loss": 0.5445, + "step": 6904 + }, + { + "epoch": 1.0175343811394892, + "grad_norm": 0.5829852223396301, + "learning_rate": 3.7323877893663724e-06, + "loss": 0.523, + "step": 6905 + }, + { + "epoch": 1.0176817288801572, + "grad_norm": 0.5709249973297119, + "learning_rate": 3.7320504562563576e-06, + "loss": 0.5523, + "step": 6906 + }, + { + "epoch": 1.0178290766208251, + "grad_norm": 0.6071498394012451, + "learning_rate": 3.731713093516107e-06, + "loss": 0.5708, + "step": 6907 + }, + { + "epoch": 1.017976424361493, + "grad_norm": 0.6153287887573242, + "learning_rate": 3.731375701153734e-06, + "loss": 0.5616, + "step": 6908 + }, + { + "epoch": 1.0181237721021612, + "grad_norm": 0.5964510440826416, + "learning_rate": 3.7310382791773534e-06, + "loss": 0.5492, + "step": 6909 + }, + { + "epoch": 1.0182711198428291, + "grad_norm": 0.5495530366897583, + "learning_rate": 3.7307008275950803e-06, + "loss": 0.5462, + "step": 6910 + }, + { + "epoch": 1.018418467583497, + "grad_norm": 0.5959994792938232, + "learning_rate": 3.7303633464150285e-06, + "loss": 0.5366, + "step": 6911 + }, + { + "epoch": 1.018565815324165, + "grad_norm": 0.5857171416282654, + "learning_rate": 3.7300258356453155e-06, + "loss": 0.5304, + "step": 6912 + }, + { + "epoch": 1.018713163064833, + "grad_norm": 0.5905296206474304, + "learning_rate": 3.729688295294058e-06, + "loss": 0.5452, + "step": 6913 + }, + { + "epoch": 1.0188605108055009, + "grad_norm": 0.5453545451164246, + "learning_rate": 3.729350725369374e-06, + "loss": 0.5236, + "step": 6914 + }, + { + "epoch": 1.019007858546169, + "grad_norm": 0.6208633780479431, + "learning_rate": 3.7290131258793814e-06, + "loss": 0.5135, + "step": 6915 + }, + { + "epoch": 1.019155206286837, + "grad_norm": 0.5810394287109375, + "learning_rate": 3.7286754968321997e-06, + "loss": 0.5324, + "step": 6916 + }, + { + "epoch": 1.019302554027505, + "grad_norm": 0.5644306540489197, + "learning_rate": 3.728337838235949e-06, + "loss": 0.514, + "step": 6917 + }, + { + "epoch": 1.0194499017681729, + "grad_norm": 0.602594256401062, + "learning_rate": 3.728000150098749e-06, + "loss": 0.5571, + "step": 6918 + }, + { + "epoch": 1.0195972495088408, + "grad_norm": 0.5818173289299011, + "learning_rate": 3.7276624324287215e-06, + "loss": 0.5348, + "step": 6919 + }, + { + "epoch": 1.019744597249509, + "grad_norm": 0.585392951965332, + "learning_rate": 3.7273246852339886e-06, + "loss": 0.5805, + "step": 6920 + }, + { + "epoch": 1.0198919449901769, + "grad_norm": 0.606370747089386, + "learning_rate": 3.726986908522672e-06, + "loss": 0.5228, + "step": 6921 + }, + { + "epoch": 1.0200392927308448, + "grad_norm": 0.5900963544845581, + "learning_rate": 3.726649102302896e-06, + "loss": 0.5435, + "step": 6922 + }, + { + "epoch": 1.0201866404715128, + "grad_norm": 0.5843120217323303, + "learning_rate": 3.7263112665827843e-06, + "loss": 0.5403, + "step": 6923 + }, + { + "epoch": 1.0203339882121807, + "grad_norm": 0.5747662782669067, + "learning_rate": 3.7259734013704617e-06, + "loss": 0.5451, + "step": 6924 + }, + { + "epoch": 1.0204813359528486, + "grad_norm": 0.5992105603218079, + "learning_rate": 3.7256355066740542e-06, + "loss": 0.5517, + "step": 6925 + }, + { + "epoch": 1.0206286836935168, + "grad_norm": 0.6073231101036072, + "learning_rate": 3.7252975825016875e-06, + "loss": 0.5281, + "step": 6926 + }, + { + "epoch": 1.0207760314341847, + "grad_norm": 0.5629758834838867, + "learning_rate": 3.724959628861489e-06, + "loss": 0.5437, + "step": 6927 + }, + { + "epoch": 1.0209233791748527, + "grad_norm": 0.541096031665802, + "learning_rate": 3.7246216457615846e-06, + "loss": 0.5372, + "step": 6928 + }, + { + "epoch": 1.0210707269155206, + "grad_norm": 0.5997256636619568, + "learning_rate": 3.724283633210105e-06, + "loss": 0.543, + "step": 6929 + }, + { + "epoch": 1.0212180746561885, + "grad_norm": 0.5690730214118958, + "learning_rate": 3.7239455912151785e-06, + "loss": 0.5585, + "step": 6930 + }, + { + "epoch": 1.0213654223968567, + "grad_norm": 0.585489809513092, + "learning_rate": 3.723607519784934e-06, + "loss": 0.5692, + "step": 6931 + }, + { + "epoch": 1.0215127701375246, + "grad_norm": 0.5858542323112488, + "learning_rate": 3.723269418927503e-06, + "loss": 0.5402, + "step": 6932 + }, + { + "epoch": 1.0216601178781926, + "grad_norm": 0.5649324655532837, + "learning_rate": 3.722931288651016e-06, + "loss": 0.5597, + "step": 6933 + }, + { + "epoch": 1.0218074656188605, + "grad_norm": 0.5856573581695557, + "learning_rate": 3.7225931289636052e-06, + "loss": 0.5553, + "step": 6934 + }, + { + "epoch": 1.0219548133595284, + "grad_norm": 0.6013530492782593, + "learning_rate": 3.7222549398734032e-06, + "loss": 0.5261, + "step": 6935 + }, + { + "epoch": 1.0221021611001964, + "grad_norm": 0.612244725227356, + "learning_rate": 3.721916721388543e-06, + "loss": 0.5193, + "step": 6936 + }, + { + "epoch": 1.0222495088408645, + "grad_norm": 0.5885947942733765, + "learning_rate": 3.7215784735171587e-06, + "loss": 0.5539, + "step": 6937 + }, + { + "epoch": 1.0223968565815325, + "grad_norm": 0.6160715818405151, + "learning_rate": 3.7212401962673856e-06, + "loss": 0.5464, + "step": 6938 + }, + { + "epoch": 1.0225442043222004, + "grad_norm": 0.604069709777832, + "learning_rate": 3.7209018896473583e-06, + "loss": 0.5583, + "step": 6939 + }, + { + "epoch": 1.0226915520628683, + "grad_norm": 0.5682252645492554, + "learning_rate": 3.720563553665213e-06, + "loss": 0.5303, + "step": 6940 + }, + { + "epoch": 1.0228388998035363, + "grad_norm": 0.607027530670166, + "learning_rate": 3.720225188329086e-06, + "loss": 0.5476, + "step": 6941 + }, + { + "epoch": 1.0229862475442044, + "grad_norm": 0.5855837464332581, + "learning_rate": 3.7198867936471167e-06, + "loss": 0.5444, + "step": 6942 + }, + { + "epoch": 1.0231335952848724, + "grad_norm": 0.5581984519958496, + "learning_rate": 3.7195483696274415e-06, + "loss": 0.5483, + "step": 6943 + }, + { + "epoch": 1.0232809430255403, + "grad_norm": 0.585788369178772, + "learning_rate": 3.7192099162782002e-06, + "loss": 0.5389, + "step": 6944 + }, + { + "epoch": 1.0234282907662082, + "grad_norm": 0.5821696519851685, + "learning_rate": 3.718871433607533e-06, + "loss": 0.5253, + "step": 6945 + }, + { + "epoch": 1.0235756385068762, + "grad_norm": 0.6263024806976318, + "learning_rate": 3.718532921623579e-06, + "loss": 0.556, + "step": 6946 + }, + { + "epoch": 1.023722986247544, + "grad_norm": 0.5607151985168457, + "learning_rate": 3.7181943803344787e-06, + "loss": 0.5431, + "step": 6947 + }, + { + "epoch": 1.0238703339882123, + "grad_norm": 0.5990955233573914, + "learning_rate": 3.717855809748376e-06, + "loss": 0.5427, + "step": 6948 + }, + { + "epoch": 1.0240176817288802, + "grad_norm": 0.5926447510719299, + "learning_rate": 3.7175172098734126e-06, + "loss": 0.5015, + "step": 6949 + }, + { + "epoch": 1.0241650294695481, + "grad_norm": 0.5632432103157043, + "learning_rate": 3.717178580717731e-06, + "loss": 0.5166, + "step": 6950 + }, + { + "epoch": 1.024312377210216, + "grad_norm": 0.5988313555717468, + "learning_rate": 3.7168399222894747e-06, + "loss": 0.5589, + "step": 6951 + }, + { + "epoch": 1.024459724950884, + "grad_norm": 0.5895028710365295, + "learning_rate": 3.7165012345967897e-06, + "loss": 0.5571, + "step": 6952 + }, + { + "epoch": 1.0246070726915522, + "grad_norm": 0.5915322303771973, + "learning_rate": 3.7161625176478205e-06, + "loss": 0.5911, + "step": 6953 + }, + { + "epoch": 1.02475442043222, + "grad_norm": 0.5838443636894226, + "learning_rate": 3.7158237714507134e-06, + "loss": 0.549, + "step": 6954 + }, + { + "epoch": 1.024901768172888, + "grad_norm": 0.59836345911026, + "learning_rate": 3.715484996013614e-06, + "loss": 0.55, + "step": 6955 + }, + { + "epoch": 1.025049115913556, + "grad_norm": 0.5869991779327393, + "learning_rate": 3.715146191344672e-06, + "loss": 0.5353, + "step": 6956 + }, + { + "epoch": 1.025196463654224, + "grad_norm": 0.5707433223724365, + "learning_rate": 3.7148073574520326e-06, + "loss": 0.5476, + "step": 6957 + }, + { + "epoch": 1.025343811394892, + "grad_norm": 0.5558188557624817, + "learning_rate": 3.714468494343847e-06, + "loss": 0.5325, + "step": 6958 + }, + { + "epoch": 1.02549115913556, + "grad_norm": 0.6265944242477417, + "learning_rate": 3.714129602028263e-06, + "loss": 0.5158, + "step": 6959 + }, + { + "epoch": 1.025638506876228, + "grad_norm": 0.5973754525184631, + "learning_rate": 3.7137906805134316e-06, + "loss": 0.5667, + "step": 6960 + }, + { + "epoch": 1.0257858546168959, + "grad_norm": 0.5647190809249878, + "learning_rate": 3.713451729807504e-06, + "loss": 0.5475, + "step": 6961 + }, + { + "epoch": 1.0259332023575638, + "grad_norm": 0.6267011165618896, + "learning_rate": 3.7131127499186314e-06, + "loss": 0.5416, + "step": 6962 + }, + { + "epoch": 1.0260805500982317, + "grad_norm": 0.5598411560058594, + "learning_rate": 3.712773740854966e-06, + "loss": 0.5345, + "step": 6963 + }, + { + "epoch": 1.0262278978389, + "grad_norm": 0.5945513248443604, + "learning_rate": 3.712434702624661e-06, + "loss": 0.5229, + "step": 6964 + }, + { + "epoch": 1.0263752455795678, + "grad_norm": 0.5945391654968262, + "learning_rate": 3.7120956352358707e-06, + "loss": 0.5448, + "step": 6965 + }, + { + "epoch": 1.0265225933202358, + "grad_norm": 0.6277302503585815, + "learning_rate": 3.711756538696748e-06, + "loss": 0.5477, + "step": 6966 + }, + { + "epoch": 1.0266699410609037, + "grad_norm": 0.5808442234992981, + "learning_rate": 3.711417413015449e-06, + "loss": 0.5413, + "step": 6967 + }, + { + "epoch": 1.0268172888015716, + "grad_norm": 0.6209503412246704, + "learning_rate": 3.7110782582001298e-06, + "loss": 0.5512, + "step": 6968 + }, + { + "epoch": 1.0269646365422398, + "grad_norm": 0.6075316071510315, + "learning_rate": 3.710739074258946e-06, + "loss": 0.5588, + "step": 6969 + }, + { + "epoch": 1.0271119842829077, + "grad_norm": 0.5749371647834778, + "learning_rate": 3.710399861200056e-06, + "loss": 0.5543, + "step": 6970 + }, + { + "epoch": 1.0272593320235757, + "grad_norm": 0.5937145352363586, + "learning_rate": 3.710060619031617e-06, + "loss": 0.5514, + "step": 6971 + }, + { + "epoch": 1.0274066797642436, + "grad_norm": 0.573373556137085, + "learning_rate": 3.7097213477617866e-06, + "loss": 0.5597, + "step": 6972 + }, + { + "epoch": 1.0275540275049115, + "grad_norm": 0.5933250188827515, + "learning_rate": 3.709382047398726e-06, + "loss": 0.5237, + "step": 6973 + }, + { + "epoch": 1.0277013752455795, + "grad_norm": 0.599058210849762, + "learning_rate": 3.7090427179505944e-06, + "loss": 0.5377, + "step": 6974 + }, + { + "epoch": 1.0278487229862476, + "grad_norm": 0.6087549924850464, + "learning_rate": 3.708703359425553e-06, + "loss": 0.4976, + "step": 6975 + }, + { + "epoch": 1.0279960707269156, + "grad_norm": 0.576706051826477, + "learning_rate": 3.708363971831762e-06, + "loss": 0.5431, + "step": 6976 + }, + { + "epoch": 1.0281434184675835, + "grad_norm": 0.5721247792243958, + "learning_rate": 3.708024555177384e-06, + "loss": 0.5651, + "step": 6977 + }, + { + "epoch": 1.0282907662082514, + "grad_norm": 0.5993499159812927, + "learning_rate": 3.7076851094705825e-06, + "loss": 0.5751, + "step": 6978 + }, + { + "epoch": 1.0284381139489194, + "grad_norm": 0.5969237685203552, + "learning_rate": 3.7073456347195204e-06, + "loss": 0.5455, + "step": 6979 + }, + { + "epoch": 1.0285854616895875, + "grad_norm": 0.6094169616699219, + "learning_rate": 3.707006130932362e-06, + "loss": 0.5599, + "step": 6980 + }, + { + "epoch": 1.0287328094302555, + "grad_norm": 0.6425067186355591, + "learning_rate": 3.7066665981172723e-06, + "loss": 0.5298, + "step": 6981 + }, + { + "epoch": 1.0288801571709234, + "grad_norm": 0.5599893927574158, + "learning_rate": 3.706327036282417e-06, + "loss": 0.5631, + "step": 6982 + }, + { + "epoch": 1.0290275049115913, + "grad_norm": 0.5938907265663147, + "learning_rate": 3.705987445435962e-06, + "loss": 0.5246, + "step": 6983 + }, + { + "epoch": 1.0291748526522593, + "grad_norm": 0.5540385246276855, + "learning_rate": 3.705647825586075e-06, + "loss": 0.5754, + "step": 6984 + }, + { + "epoch": 1.0293222003929272, + "grad_norm": 0.5984258651733398, + "learning_rate": 3.7053081767409238e-06, + "loss": 0.5451, + "step": 6985 + }, + { + "epoch": 1.0294695481335954, + "grad_norm": 0.5839360356330872, + "learning_rate": 3.7049684989086753e-06, + "loss": 0.5359, + "step": 6986 + }, + { + "epoch": 1.0296168958742633, + "grad_norm": 0.6191169619560242, + "learning_rate": 3.7046287920974998e-06, + "loss": 0.5334, + "step": 6987 + }, + { + "epoch": 1.0297642436149312, + "grad_norm": 0.5937367677688599, + "learning_rate": 3.7042890563155676e-06, + "loss": 0.5017, + "step": 6988 + }, + { + "epoch": 1.0299115913555992, + "grad_norm": 0.5985887050628662, + "learning_rate": 3.703949291571047e-06, + "loss": 0.5742, + "step": 6989 + }, + { + "epoch": 1.0300589390962671, + "grad_norm": 0.5586316585540771, + "learning_rate": 3.703609497872112e-06, + "loss": 0.5458, + "step": 6990 + }, + { + "epoch": 1.0302062868369353, + "grad_norm": 0.6345371007919312, + "learning_rate": 3.7032696752269324e-06, + "loss": 0.5588, + "step": 6991 + }, + { + "epoch": 1.0303536345776032, + "grad_norm": 0.558776319026947, + "learning_rate": 3.7029298236436815e-06, + "loss": 0.5065, + "step": 6992 + }, + { + "epoch": 1.0305009823182711, + "grad_norm": 0.5808897018432617, + "learning_rate": 3.702589943130533e-06, + "loss": 0.5248, + "step": 6993 + }, + { + "epoch": 1.030648330058939, + "grad_norm": 0.602005124092102, + "learning_rate": 3.70225003369566e-06, + "loss": 0.5379, + "step": 6994 + }, + { + "epoch": 1.030795677799607, + "grad_norm": 0.6000505685806274, + "learning_rate": 3.7019100953472377e-06, + "loss": 0.5524, + "step": 6995 + }, + { + "epoch": 1.030943025540275, + "grad_norm": 0.5556173920631409, + "learning_rate": 3.701570128093441e-06, + "loss": 0.5142, + "step": 6996 + }, + { + "epoch": 1.031090373280943, + "grad_norm": 0.5770368576049805, + "learning_rate": 3.701230131942446e-06, + "loss": 0.562, + "step": 6997 + }, + { + "epoch": 1.031237721021611, + "grad_norm": 0.6113936901092529, + "learning_rate": 3.7008901069024305e-06, + "loss": 0.5252, + "step": 6998 + }, + { + "epoch": 1.031385068762279, + "grad_norm": 0.609965980052948, + "learning_rate": 3.700550052981571e-06, + "loss": 0.5607, + "step": 6999 + }, + { + "epoch": 1.031532416502947, + "grad_norm": 0.5998886227607727, + "learning_rate": 3.7002099701880457e-06, + "loss": 0.5344, + "step": 7000 + }, + { + "epoch": 1.0316797642436148, + "grad_norm": 0.6054246425628662, + "learning_rate": 3.6998698585300335e-06, + "loss": 0.5517, + "step": 7001 + }, + { + "epoch": 1.031827111984283, + "grad_norm": 0.5810316801071167, + "learning_rate": 3.699529718015714e-06, + "loss": 0.5303, + "step": 7002 + }, + { + "epoch": 1.031974459724951, + "grad_norm": 0.5528673529624939, + "learning_rate": 3.699189548653268e-06, + "loss": 0.5187, + "step": 7003 + }, + { + "epoch": 1.0321218074656189, + "grad_norm": 0.590428352355957, + "learning_rate": 3.698849350450875e-06, + "loss": 0.5448, + "step": 7004 + }, + { + "epoch": 1.0322691552062868, + "grad_norm": 0.5887482166290283, + "learning_rate": 3.698509123416718e-06, + "loss": 0.5539, + "step": 7005 + }, + { + "epoch": 1.0324165029469548, + "grad_norm": 0.6430219411849976, + "learning_rate": 3.6981688675589785e-06, + "loss": 0.513, + "step": 7006 + }, + { + "epoch": 1.0325638506876227, + "grad_norm": 0.5202969908714294, + "learning_rate": 3.6978285828858395e-06, + "loss": 0.5409, + "step": 7007 + }, + { + "epoch": 1.0327111984282908, + "grad_norm": 0.5573074221611023, + "learning_rate": 3.6974882694054847e-06, + "loss": 0.5296, + "step": 7008 + }, + { + "epoch": 1.0328585461689588, + "grad_norm": 0.5887788534164429, + "learning_rate": 3.6971479271261e-06, + "loss": 0.5194, + "step": 7009 + }, + { + "epoch": 1.0330058939096267, + "grad_norm": 0.5747781991958618, + "learning_rate": 3.696807556055868e-06, + "loss": 0.5431, + "step": 7010 + }, + { + "epoch": 1.0331532416502947, + "grad_norm": 0.5698339343070984, + "learning_rate": 3.6964671562029757e-06, + "loss": 0.5439, + "step": 7011 + }, + { + "epoch": 1.0333005893909626, + "grad_norm": 0.6545946598052979, + "learning_rate": 3.69612672757561e-06, + "loss": 0.5418, + "step": 7012 + }, + { + "epoch": 1.0334479371316307, + "grad_norm": 0.5601657629013062, + "learning_rate": 3.695786270181957e-06, + "loss": 0.5049, + "step": 7013 + }, + { + "epoch": 1.0335952848722987, + "grad_norm": 0.6031295657157898, + "learning_rate": 3.695445784030205e-06, + "loss": 0.5474, + "step": 7014 + }, + { + "epoch": 1.0337426326129666, + "grad_norm": 0.5855572819709778, + "learning_rate": 3.6951052691285427e-06, + "loss": 0.5543, + "step": 7015 + }, + { + "epoch": 1.0338899803536346, + "grad_norm": 0.5777137279510498, + "learning_rate": 3.694764725485159e-06, + "loss": 0.5397, + "step": 7016 + }, + { + "epoch": 1.0340373280943025, + "grad_norm": 0.5898032784461975, + "learning_rate": 3.694424153108244e-06, + "loss": 0.5372, + "step": 7017 + }, + { + "epoch": 1.0341846758349704, + "grad_norm": 0.5904741287231445, + "learning_rate": 3.6940835520059885e-06, + "loss": 0.558, + "step": 7018 + }, + { + "epoch": 1.0343320235756386, + "grad_norm": 0.5963598489761353, + "learning_rate": 3.693742922186584e-06, + "loss": 0.5557, + "step": 7019 + }, + { + "epoch": 1.0344793713163065, + "grad_norm": 0.5920001268386841, + "learning_rate": 3.6934022636582216e-06, + "loss": 0.5634, + "step": 7020 + }, + { + "epoch": 1.0346267190569745, + "grad_norm": 0.5896898508071899, + "learning_rate": 3.693061576429095e-06, + "loss": 0.5544, + "step": 7021 + }, + { + "epoch": 1.0347740667976424, + "grad_norm": 0.6190035939216614, + "learning_rate": 3.6927208605073965e-06, + "loss": 0.5234, + "step": 7022 + }, + { + "epoch": 1.0349214145383103, + "grad_norm": 0.615669846534729, + "learning_rate": 3.6923801159013214e-06, + "loss": 0.5261, + "step": 7023 + }, + { + "epoch": 1.0350687622789785, + "grad_norm": 0.6169673800468445, + "learning_rate": 3.6920393426190635e-06, + "loss": 0.4887, + "step": 7024 + }, + { + "epoch": 1.0352161100196464, + "grad_norm": 0.6080037355422974, + "learning_rate": 3.6916985406688184e-06, + "loss": 0.5457, + "step": 7025 + }, + { + "epoch": 1.0353634577603144, + "grad_norm": 0.5731981992721558, + "learning_rate": 3.691357710058782e-06, + "loss": 0.5473, + "step": 7026 + }, + { + "epoch": 1.0355108055009823, + "grad_norm": 0.6155089735984802, + "learning_rate": 3.6910168507971523e-06, + "loss": 0.5376, + "step": 7027 + }, + { + "epoch": 1.0356581532416502, + "grad_norm": 0.592800498008728, + "learning_rate": 3.690675962892125e-06, + "loss": 0.5453, + "step": 7028 + }, + { + "epoch": 1.0358055009823182, + "grad_norm": 0.57892245054245, + "learning_rate": 3.6903350463519e-06, + "loss": 0.5523, + "step": 7029 + }, + { + "epoch": 1.0359528487229863, + "grad_norm": 0.6164074540138245, + "learning_rate": 3.689994101184675e-06, + "loss": 0.5504, + "step": 7030 + }, + { + "epoch": 1.0361001964636543, + "grad_norm": 0.5844961404800415, + "learning_rate": 3.68965312739865e-06, + "loss": 0.5229, + "step": 7031 + }, + { + "epoch": 1.0362475442043222, + "grad_norm": 0.5908360481262207, + "learning_rate": 3.689312125002026e-06, + "loss": 0.543, + "step": 7032 + }, + { + "epoch": 1.0363948919449901, + "grad_norm": 0.6298962831497192, + "learning_rate": 3.688971094003003e-06, + "loss": 0.5221, + "step": 7033 + }, + { + "epoch": 1.036542239685658, + "grad_norm": 0.5588817000389099, + "learning_rate": 3.688630034409782e-06, + "loss": 0.541, + "step": 7034 + }, + { + "epoch": 1.0366895874263262, + "grad_norm": 0.5955601930618286, + "learning_rate": 3.6882889462305673e-06, + "loss": 0.5248, + "step": 7035 + }, + { + "epoch": 1.0368369351669942, + "grad_norm": 0.5664930939674377, + "learning_rate": 3.68794782947356e-06, + "loss": 0.578, + "step": 7036 + }, + { + "epoch": 1.036984282907662, + "grad_norm": 0.5782855153083801, + "learning_rate": 3.687606684146964e-06, + "loss": 0.5491, + "step": 7037 + }, + { + "epoch": 1.03713163064833, + "grad_norm": 0.5970259308815002, + "learning_rate": 3.6872655102589857e-06, + "loss": 0.5312, + "step": 7038 + }, + { + "epoch": 1.037278978388998, + "grad_norm": 0.5399637818336487, + "learning_rate": 3.686924307817828e-06, + "loss": 0.5399, + "step": 7039 + }, + { + "epoch": 1.037426326129666, + "grad_norm": 0.576542854309082, + "learning_rate": 3.6865830768316974e-06, + "loss": 0.5591, + "step": 7040 + }, + { + "epoch": 1.037573673870334, + "grad_norm": 0.5865419507026672, + "learning_rate": 3.6862418173088006e-06, + "loss": 0.5306, + "step": 7041 + }, + { + "epoch": 1.037721021611002, + "grad_norm": 0.6091778874397278, + "learning_rate": 3.685900529257344e-06, + "loss": 0.4908, + "step": 7042 + }, + { + "epoch": 1.03786836935167, + "grad_norm": 0.6200711727142334, + "learning_rate": 3.685559212685536e-06, + "loss": 0.5326, + "step": 7043 + }, + { + "epoch": 1.0380157170923379, + "grad_norm": 0.6200711131095886, + "learning_rate": 3.6852178676015853e-06, + "loss": 0.5443, + "step": 7044 + }, + { + "epoch": 1.0381630648330058, + "grad_norm": 0.5572575926780701, + "learning_rate": 3.6848764940137005e-06, + "loss": 0.514, + "step": 7045 + }, + { + "epoch": 1.038310412573674, + "grad_norm": 0.5861216187477112, + "learning_rate": 3.684535091930092e-06, + "loss": 0.5549, + "step": 7046 + }, + { + "epoch": 1.038457760314342, + "grad_norm": 0.6035165190696716, + "learning_rate": 3.68419366135897e-06, + "loss": 0.5366, + "step": 7047 + }, + { + "epoch": 1.0386051080550098, + "grad_norm": 0.5891938805580139, + "learning_rate": 3.683852202308546e-06, + "loss": 0.5422, + "step": 7048 + }, + { + "epoch": 1.0387524557956778, + "grad_norm": 0.5795395374298096, + "learning_rate": 3.6835107147870318e-06, + "loss": 0.4874, + "step": 7049 + }, + { + "epoch": 1.0388998035363457, + "grad_norm": 0.5737882852554321, + "learning_rate": 3.6831691988026395e-06, + "loss": 0.5049, + "step": 7050 + }, + { + "epoch": 1.0390471512770139, + "grad_norm": 0.5976554751396179, + "learning_rate": 3.6828276543635837e-06, + "loss": 0.5747, + "step": 7051 + }, + { + "epoch": 1.0391944990176818, + "grad_norm": 0.596429169178009, + "learning_rate": 3.6824860814780774e-06, + "loss": 0.5487, + "step": 7052 + }, + { + "epoch": 1.0393418467583497, + "grad_norm": 0.6047536134719849, + "learning_rate": 3.6821444801543348e-06, + "loss": 0.552, + "step": 7053 + }, + { + "epoch": 1.0394891944990177, + "grad_norm": 0.5946558713912964, + "learning_rate": 3.6818028504005725e-06, + "loss": 0.5273, + "step": 7054 + }, + { + "epoch": 1.0396365422396856, + "grad_norm": 0.5743873715400696, + "learning_rate": 3.681461192225006e-06, + "loss": 0.5604, + "step": 7055 + }, + { + "epoch": 1.0397838899803535, + "grad_norm": 0.5717832446098328, + "learning_rate": 3.6811195056358517e-06, + "loss": 0.489, + "step": 7056 + }, + { + "epoch": 1.0399312377210217, + "grad_norm": 0.5782458186149597, + "learning_rate": 3.6807777906413277e-06, + "loss": 0.5132, + "step": 7057 + }, + { + "epoch": 1.0400785854616896, + "grad_norm": 0.5957342982292175, + "learning_rate": 3.6804360472496515e-06, + "loss": 0.5359, + "step": 7058 + }, + { + "epoch": 1.0402259332023576, + "grad_norm": 0.5674042701721191, + "learning_rate": 3.6800942754690418e-06, + "loss": 0.5109, + "step": 7059 + }, + { + "epoch": 1.0403732809430255, + "grad_norm": 0.6165805459022522, + "learning_rate": 3.679752475307718e-06, + "loss": 0.5016, + "step": 7060 + }, + { + "epoch": 1.0405206286836934, + "grad_norm": 0.5826864838600159, + "learning_rate": 3.679410646773901e-06, + "loss": 0.5138, + "step": 7061 + }, + { + "epoch": 1.0406679764243616, + "grad_norm": 0.5948231220245361, + "learning_rate": 3.679068789875811e-06, + "loss": 0.5239, + "step": 7062 + }, + { + "epoch": 1.0408153241650295, + "grad_norm": 0.5797086358070374, + "learning_rate": 3.6787269046216695e-06, + "loss": 0.5368, + "step": 7063 + }, + { + "epoch": 1.0409626719056975, + "grad_norm": 0.5643095970153809, + "learning_rate": 3.678384991019699e-06, + "loss": 0.5393, + "step": 7064 + }, + { + "epoch": 1.0411100196463654, + "grad_norm": 0.6281516551971436, + "learning_rate": 3.6780430490781214e-06, + "loss": 0.5511, + "step": 7065 + }, + { + "epoch": 1.0412573673870333, + "grad_norm": 0.5654546022415161, + "learning_rate": 3.677701078805162e-06, + "loss": 0.5309, + "step": 7066 + }, + { + "epoch": 1.0414047151277013, + "grad_norm": 0.6092115640640259, + "learning_rate": 3.6773590802090434e-06, + "loss": 0.5537, + "step": 7067 + }, + { + "epoch": 1.0415520628683694, + "grad_norm": 0.595291793346405, + "learning_rate": 3.6770170532979917e-06, + "loss": 0.5616, + "step": 7068 + }, + { + "epoch": 1.0416994106090374, + "grad_norm": 0.5847666263580322, + "learning_rate": 3.6766749980802314e-06, + "loss": 0.5451, + "step": 7069 + }, + { + "epoch": 1.0418467583497053, + "grad_norm": 0.573360025882721, + "learning_rate": 3.6763329145639893e-06, + "loss": 0.5364, + "step": 7070 + }, + { + "epoch": 1.0419941060903732, + "grad_norm": 0.5641289949417114, + "learning_rate": 3.675990802757492e-06, + "loss": 0.5498, + "step": 7071 + }, + { + "epoch": 1.0421414538310412, + "grad_norm": 0.5709989666938782, + "learning_rate": 3.675648662668968e-06, + "loss": 0.5145, + "step": 7072 + }, + { + "epoch": 1.0422888015717093, + "grad_norm": 0.5956394672393799, + "learning_rate": 3.6753064943066452e-06, + "loss": 0.5437, + "step": 7073 + }, + { + "epoch": 1.0424361493123773, + "grad_norm": 0.6350311636924744, + "learning_rate": 3.6749642976787524e-06, + "loss": 0.5638, + "step": 7074 + }, + { + "epoch": 1.0425834970530452, + "grad_norm": 0.5690354704856873, + "learning_rate": 3.6746220727935195e-06, + "loss": 0.5241, + "step": 7075 + }, + { + "epoch": 1.0427308447937131, + "grad_norm": 0.5862532258033752, + "learning_rate": 3.6742798196591766e-06, + "loss": 0.5603, + "step": 7076 + }, + { + "epoch": 1.042878192534381, + "grad_norm": 0.5739933252334595, + "learning_rate": 3.6739375382839548e-06, + "loss": 0.5583, + "step": 7077 + }, + { + "epoch": 1.043025540275049, + "grad_norm": 0.6058277487754822, + "learning_rate": 3.6735952286760855e-06, + "loss": 0.5229, + "step": 7078 + }, + { + "epoch": 1.0431728880157172, + "grad_norm": 0.6250144243240356, + "learning_rate": 3.673252890843802e-06, + "loss": 0.5345, + "step": 7079 + }, + { + "epoch": 1.043320235756385, + "grad_norm": 0.599766731262207, + "learning_rate": 3.672910524795336e-06, + "loss": 0.5114, + "step": 7080 + }, + { + "epoch": 1.043467583497053, + "grad_norm": 0.5812764167785645, + "learning_rate": 3.6725681305389226e-06, + "loss": 0.5629, + "step": 7081 + }, + { + "epoch": 1.043614931237721, + "grad_norm": 0.6004272699356079, + "learning_rate": 3.6722257080827956e-06, + "loss": 0.5357, + "step": 7082 + }, + { + "epoch": 1.043762278978389, + "grad_norm": 0.5994766354560852, + "learning_rate": 3.67188325743519e-06, + "loss": 0.5336, + "step": 7083 + }, + { + "epoch": 1.043909626719057, + "grad_norm": 0.6055973768234253, + "learning_rate": 3.6715407786043413e-06, + "loss": 0.5474, + "step": 7084 + }, + { + "epoch": 1.044056974459725, + "grad_norm": 0.5702179670333862, + "learning_rate": 3.6711982715984867e-06, + "loss": 0.5428, + "step": 7085 + }, + { + "epoch": 1.044204322200393, + "grad_norm": 0.6007552742958069, + "learning_rate": 3.670855736425863e-06, + "loss": 0.5414, + "step": 7086 + }, + { + "epoch": 1.0443516699410609, + "grad_norm": 0.5657010078430176, + "learning_rate": 3.670513173094708e-06, + "loss": 0.5484, + "step": 7087 + }, + { + "epoch": 1.0444990176817288, + "grad_norm": 0.5574595332145691, + "learning_rate": 3.67017058161326e-06, + "loss": 0.4752, + "step": 7088 + }, + { + "epoch": 1.0446463654223967, + "grad_norm": 0.6335998177528381, + "learning_rate": 3.6698279619897585e-06, + "loss": 0.5426, + "step": 7089 + }, + { + "epoch": 1.044793713163065, + "grad_norm": 0.6100543141365051, + "learning_rate": 3.669485314232443e-06, + "loss": 0.5164, + "step": 7090 + }, + { + "epoch": 1.0449410609037328, + "grad_norm": 0.5867056846618652, + "learning_rate": 3.6691426383495544e-06, + "loss": 0.5858, + "step": 7091 + }, + { + "epoch": 1.0450884086444008, + "grad_norm": 0.6074063777923584, + "learning_rate": 3.6687999343493336e-06, + "loss": 0.5332, + "step": 7092 + }, + { + "epoch": 1.0452357563850687, + "grad_norm": 0.599074125289917, + "learning_rate": 3.668457202240023e-06, + "loss": 0.5366, + "step": 7093 + }, + { + "epoch": 1.0453831041257367, + "grad_norm": 0.6331992149353027, + "learning_rate": 3.6681144420298644e-06, + "loss": 0.5376, + "step": 7094 + }, + { + "epoch": 1.0455304518664048, + "grad_norm": 0.5524094104766846, + "learning_rate": 3.6677716537271014e-06, + "loss": 0.5573, + "step": 7095 + }, + { + "epoch": 1.0456777996070727, + "grad_norm": 0.6323485970497131, + "learning_rate": 3.6674288373399776e-06, + "loss": 0.5334, + "step": 7096 + }, + { + "epoch": 1.0458251473477407, + "grad_norm": 0.7073274254798889, + "learning_rate": 3.6670859928767376e-06, + "loss": 0.5574, + "step": 7097 + }, + { + "epoch": 1.0459724950884086, + "grad_norm": 0.5537186861038208, + "learning_rate": 3.6667431203456277e-06, + "loss": 0.5514, + "step": 7098 + }, + { + "epoch": 1.0461198428290766, + "grad_norm": 0.598686933517456, + "learning_rate": 3.6664002197548925e-06, + "loss": 0.554, + "step": 7099 + }, + { + "epoch": 1.0462671905697447, + "grad_norm": 0.5858633518218994, + "learning_rate": 3.66605729111278e-06, + "loss": 0.5413, + "step": 7100 + }, + { + "epoch": 1.0464145383104126, + "grad_norm": 0.5541053414344788, + "learning_rate": 3.665714334427536e-06, + "loss": 0.5476, + "step": 7101 + }, + { + "epoch": 1.0465618860510806, + "grad_norm": 0.6071951389312744, + "learning_rate": 3.6653713497074083e-06, + "loss": 0.5299, + "step": 7102 + }, + { + "epoch": 1.0467092337917485, + "grad_norm": 0.6235869526863098, + "learning_rate": 3.6650283369606467e-06, + "loss": 0.5659, + "step": 7103 + }, + { + "epoch": 1.0468565815324165, + "grad_norm": 0.590246856212616, + "learning_rate": 3.6646852961954997e-06, + "loss": 0.5499, + "step": 7104 + }, + { + "epoch": 1.0470039292730844, + "grad_norm": 0.5745143294334412, + "learning_rate": 3.6643422274202183e-06, + "loss": 0.5172, + "step": 7105 + }, + { + "epoch": 1.0471512770137525, + "grad_norm": 0.5896230936050415, + "learning_rate": 3.663999130643052e-06, + "loss": 0.5551, + "step": 7106 + }, + { + "epoch": 1.0472986247544205, + "grad_norm": 0.6128101944923401, + "learning_rate": 3.6636560058722527e-06, + "loss": 0.5059, + "step": 7107 + }, + { + "epoch": 1.0474459724950884, + "grad_norm": 0.5541479587554932, + "learning_rate": 3.6633128531160715e-06, + "loss": 0.5297, + "step": 7108 + }, + { + "epoch": 1.0475933202357564, + "grad_norm": 0.6647347807884216, + "learning_rate": 3.6629696723827623e-06, + "loss": 0.5372, + "step": 7109 + }, + { + "epoch": 1.0477406679764243, + "grad_norm": 0.5623935461044312, + "learning_rate": 3.6626264636805785e-06, + "loss": 0.523, + "step": 7110 + }, + { + "epoch": 1.0478880157170924, + "grad_norm": 0.5701240301132202, + "learning_rate": 3.662283227017773e-06, + "loss": 0.5533, + "step": 7111 + }, + { + "epoch": 1.0480353634577604, + "grad_norm": 0.5859612822532654, + "learning_rate": 3.6619399624026015e-06, + "loss": 0.5708, + "step": 7112 + }, + { + "epoch": 1.0481827111984283, + "grad_norm": 0.5676068663597107, + "learning_rate": 3.6615966698433186e-06, + "loss": 0.5039, + "step": 7113 + }, + { + "epoch": 1.0483300589390963, + "grad_norm": 0.5981242656707764, + "learning_rate": 3.6612533493481806e-06, + "loss": 0.5481, + "step": 7114 + }, + { + "epoch": 1.0484774066797642, + "grad_norm": 0.5995326042175293, + "learning_rate": 3.660910000925444e-06, + "loss": 0.5151, + "step": 7115 + }, + { + "epoch": 1.0486247544204321, + "grad_norm": 0.6228243112564087, + "learning_rate": 3.6605666245833666e-06, + "loss": 0.5564, + "step": 7116 + }, + { + "epoch": 1.0487721021611003, + "grad_norm": 0.5702508091926575, + "learning_rate": 3.660223220330206e-06, + "loss": 0.5378, + "step": 7117 + }, + { + "epoch": 1.0489194499017682, + "grad_norm": 0.5517382621765137, + "learning_rate": 3.6598797881742214e-06, + "loss": 0.5291, + "step": 7118 + }, + { + "epoch": 1.0490667976424362, + "grad_norm": 0.5988302230834961, + "learning_rate": 3.659536328123672e-06, + "loss": 0.5048, + "step": 7119 + }, + { + "epoch": 1.049214145383104, + "grad_norm": 0.6128009557723999, + "learning_rate": 3.6591928401868177e-06, + "loss": 0.5475, + "step": 7120 + }, + { + "epoch": 1.049361493123772, + "grad_norm": 0.558975100517273, + "learning_rate": 3.6588493243719193e-06, + "loss": 0.5395, + "step": 7121 + }, + { + "epoch": 1.0495088408644402, + "grad_norm": 0.5727588534355164, + "learning_rate": 3.658505780687238e-06, + "loss": 0.5729, + "step": 7122 + }, + { + "epoch": 1.0496561886051081, + "grad_norm": 0.5740322470664978, + "learning_rate": 3.658162209141036e-06, + "loss": 0.5589, + "step": 7123 + }, + { + "epoch": 1.049803536345776, + "grad_norm": 0.6039756536483765, + "learning_rate": 3.657818609741577e-06, + "loss": 0.5348, + "step": 7124 + }, + { + "epoch": 1.049950884086444, + "grad_norm": 0.5848475098609924, + "learning_rate": 3.657474982497122e-06, + "loss": 0.5241, + "step": 7125 + }, + { + "epoch": 1.050098231827112, + "grad_norm": 0.557646632194519, + "learning_rate": 3.657131327415937e-06, + "loss": 0.5275, + "step": 7126 + }, + { + "epoch": 1.0502455795677799, + "grad_norm": 0.6418898701667786, + "learning_rate": 3.6567876445062868e-06, + "loss": 0.5384, + "step": 7127 + }, + { + "epoch": 1.050392927308448, + "grad_norm": 0.5736205577850342, + "learning_rate": 3.6564439337764364e-06, + "loss": 0.5804, + "step": 7128 + }, + { + "epoch": 1.050540275049116, + "grad_norm": 0.5659908652305603, + "learning_rate": 3.656100195234651e-06, + "loss": 0.5017, + "step": 7129 + }, + { + "epoch": 1.050687622789784, + "grad_norm": 0.5751831531524658, + "learning_rate": 3.655756428889199e-06, + "loss": 0.541, + "step": 7130 + }, + { + "epoch": 1.0508349705304518, + "grad_norm": 0.6197642683982849, + "learning_rate": 3.6554126347483465e-06, + "loss": 0.5342, + "step": 7131 + }, + { + "epoch": 1.0509823182711198, + "grad_norm": 0.5780088901519775, + "learning_rate": 3.655068812820362e-06, + "loss": 0.5521, + "step": 7132 + }, + { + "epoch": 1.051129666011788, + "grad_norm": 0.5688672065734863, + "learning_rate": 3.6547249631135146e-06, + "loss": 0.5097, + "step": 7133 + }, + { + "epoch": 1.0512770137524559, + "grad_norm": 0.5759710669517517, + "learning_rate": 3.654381085636073e-06, + "loss": 0.5235, + "step": 7134 + }, + { + "epoch": 1.0514243614931238, + "grad_norm": 0.6058147549629211, + "learning_rate": 3.6540371803963084e-06, + "loss": 0.5156, + "step": 7135 + }, + { + "epoch": 1.0515717092337917, + "grad_norm": 0.5818078517913818, + "learning_rate": 3.65369324740249e-06, + "loss": 0.5485, + "step": 7136 + }, + { + "epoch": 1.0517190569744597, + "grad_norm": 0.5649715065956116, + "learning_rate": 3.653349286662891e-06, + "loss": 0.5426, + "step": 7137 + }, + { + "epoch": 1.0518664047151276, + "grad_norm": 0.5723581910133362, + "learning_rate": 3.6530052981857822e-06, + "loss": 0.5241, + "step": 7138 + }, + { + "epoch": 1.0520137524557958, + "grad_norm": 0.5770206451416016, + "learning_rate": 3.6526612819794367e-06, + "loss": 0.5292, + "step": 7139 + }, + { + "epoch": 1.0521611001964637, + "grad_norm": 0.5662242770195007, + "learning_rate": 3.6523172380521283e-06, + "loss": 0.4888, + "step": 7140 + }, + { + "epoch": 1.0523084479371316, + "grad_norm": 0.6010959148406982, + "learning_rate": 3.6519731664121304e-06, + "loss": 0.5363, + "step": 7141 + }, + { + "epoch": 1.0524557956777996, + "grad_norm": 0.5544166564941406, + "learning_rate": 3.6516290670677186e-06, + "loss": 0.5307, + "step": 7142 + }, + { + "epoch": 1.0526031434184675, + "grad_norm": 0.5922421813011169, + "learning_rate": 3.651284940027168e-06, + "loss": 0.5507, + "step": 7143 + }, + { + "epoch": 1.0527504911591357, + "grad_norm": 0.6260432004928589, + "learning_rate": 3.6509407852987537e-06, + "loss": 0.5352, + "step": 7144 + }, + { + "epoch": 1.0528978388998036, + "grad_norm": 0.6104339957237244, + "learning_rate": 3.6505966028907534e-06, + "loss": 0.5622, + "step": 7145 + }, + { + "epoch": 1.0530451866404715, + "grad_norm": 0.569291889667511, + "learning_rate": 3.6502523928114454e-06, + "loss": 0.5166, + "step": 7146 + }, + { + "epoch": 1.0531925343811395, + "grad_norm": 0.5862615704536438, + "learning_rate": 3.649908155069106e-06, + "loss": 0.576, + "step": 7147 + }, + { + "epoch": 1.0533398821218074, + "grad_norm": 0.5955272912979126, + "learning_rate": 3.6495638896720156e-06, + "loss": 0.5427, + "step": 7148 + }, + { + "epoch": 1.0534872298624753, + "grad_norm": 0.5883499979972839, + "learning_rate": 3.6492195966284524e-06, + "loss": 0.558, + "step": 7149 + }, + { + "epoch": 1.0536345776031435, + "grad_norm": 0.6166231632232666, + "learning_rate": 3.6488752759466967e-06, + "loss": 0.5631, + "step": 7150 + }, + { + "epoch": 1.0537819253438114, + "grad_norm": 0.5818992257118225, + "learning_rate": 3.64853092763503e-06, + "loss": 0.5468, + "step": 7151 + }, + { + "epoch": 1.0539292730844794, + "grad_norm": 0.5918216705322266, + "learning_rate": 3.648186551701733e-06, + "loss": 0.5316, + "step": 7152 + }, + { + "epoch": 1.0540766208251473, + "grad_norm": 0.5958614945411682, + "learning_rate": 3.6478421481550875e-06, + "loss": 0.5569, + "step": 7153 + }, + { + "epoch": 1.0542239685658152, + "grad_norm": 0.5985686779022217, + "learning_rate": 3.647497717003377e-06, + "loss": 0.5636, + "step": 7154 + }, + { + "epoch": 1.0543713163064834, + "grad_norm": 0.5936171412467957, + "learning_rate": 3.6471532582548846e-06, + "loss": 0.5199, + "step": 7155 + }, + { + "epoch": 1.0545186640471513, + "grad_norm": 0.5840606093406677, + "learning_rate": 3.6468087719178946e-06, + "loss": 0.5548, + "step": 7156 + }, + { + "epoch": 1.0546660117878193, + "grad_norm": 0.5826893448829651, + "learning_rate": 3.6464642580006915e-06, + "loss": 0.5554, + "step": 7157 + }, + { + "epoch": 1.0548133595284872, + "grad_norm": 0.6044207811355591, + "learning_rate": 3.64611971651156e-06, + "loss": 0.5469, + "step": 7158 + }, + { + "epoch": 1.0549607072691551, + "grad_norm": 0.6127269268035889, + "learning_rate": 3.645775147458788e-06, + "loss": 0.5374, + "step": 7159 + }, + { + "epoch": 1.055108055009823, + "grad_norm": 0.5905352234840393, + "learning_rate": 3.6454305508506603e-06, + "loss": 0.5259, + "step": 7160 + }, + { + "epoch": 1.0552554027504912, + "grad_norm": 0.5666106939315796, + "learning_rate": 3.6450859266954653e-06, + "loss": 0.5573, + "step": 7161 + }, + { + "epoch": 1.0554027504911592, + "grad_norm": 0.6103636026382446, + "learning_rate": 3.644741275001491e-06, + "loss": 0.5274, + "step": 7162 + }, + { + "epoch": 1.055550098231827, + "grad_norm": 0.5741715431213379, + "learning_rate": 3.6443965957770255e-06, + "loss": 0.524, + "step": 7163 + }, + { + "epoch": 1.055697445972495, + "grad_norm": 0.5905442237854004, + "learning_rate": 3.6440518890303588e-06, + "loss": 0.5298, + "step": 7164 + }, + { + "epoch": 1.055844793713163, + "grad_norm": 0.5810044407844543, + "learning_rate": 3.6437071547697813e-06, + "loss": 0.5431, + "step": 7165 + }, + { + "epoch": 1.0559921414538311, + "grad_norm": 0.5982298254966736, + "learning_rate": 3.6433623930035834e-06, + "loss": 0.5469, + "step": 7166 + }, + { + "epoch": 1.056139489194499, + "grad_norm": 0.6420773863792419, + "learning_rate": 3.6430176037400553e-06, + "loss": 0.5219, + "step": 7167 + }, + { + "epoch": 1.056286836935167, + "grad_norm": 0.60770583152771, + "learning_rate": 3.64267278698749e-06, + "loss": 0.5441, + "step": 7168 + }, + { + "epoch": 1.056434184675835, + "grad_norm": 0.5896263718605042, + "learning_rate": 3.642327942754181e-06, + "loss": 0.5638, + "step": 7169 + }, + { + "epoch": 1.0565815324165029, + "grad_norm": 0.5639711022377014, + "learning_rate": 3.64198307104842e-06, + "loss": 0.5173, + "step": 7170 + }, + { + "epoch": 1.0567288801571708, + "grad_norm": 0.5849398374557495, + "learning_rate": 3.641638171878502e-06, + "loss": 0.5507, + "step": 7171 + }, + { + "epoch": 1.056876227897839, + "grad_norm": 0.5667764544487, + "learning_rate": 3.641293245252721e-06, + "loss": 0.5598, + "step": 7172 + }, + { + "epoch": 1.057023575638507, + "grad_norm": 0.5612334609031677, + "learning_rate": 3.6409482911793733e-06, + "loss": 0.4972, + "step": 7173 + }, + { + "epoch": 1.0571709233791748, + "grad_norm": 0.5911298990249634, + "learning_rate": 3.640603309666754e-06, + "loss": 0.5378, + "step": 7174 + }, + { + "epoch": 1.0573182711198428, + "grad_norm": 0.6207029223442078, + "learning_rate": 3.64025830072316e-06, + "loss": 0.5205, + "step": 7175 + }, + { + "epoch": 1.0574656188605107, + "grad_norm": 0.5742807984352112, + "learning_rate": 3.639913264356889e-06, + "loss": 0.5215, + "step": 7176 + }, + { + "epoch": 1.0576129666011789, + "grad_norm": 0.6011903285980225, + "learning_rate": 3.6395682005762383e-06, + "loss": 0.5509, + "step": 7177 + }, + { + "epoch": 1.0577603143418468, + "grad_norm": 0.5974141955375671, + "learning_rate": 3.6392231093895074e-06, + "loss": 0.5313, + "step": 7178 + }, + { + "epoch": 1.0579076620825147, + "grad_norm": 0.5917812585830688, + "learning_rate": 3.6388779908049947e-06, + "loss": 0.5643, + "step": 7179 + }, + { + "epoch": 1.0580550098231827, + "grad_norm": 0.6059077978134155, + "learning_rate": 3.6385328448310004e-06, + "loss": 0.5529, + "step": 7180 + }, + { + "epoch": 1.0582023575638506, + "grad_norm": 0.575763463973999, + "learning_rate": 3.638187671475826e-06, + "loss": 0.5553, + "step": 7181 + }, + { + "epoch": 1.0583497053045186, + "grad_norm": 0.5732123255729675, + "learning_rate": 3.63784247074777e-06, + "loss": 0.5252, + "step": 7182 + }, + { + "epoch": 1.0584970530451867, + "grad_norm": 0.5847505331039429, + "learning_rate": 3.6374972426551375e-06, + "loss": 0.5024, + "step": 7183 + }, + { + "epoch": 1.0586444007858546, + "grad_norm": 0.5496455430984497, + "learning_rate": 3.63715198720623e-06, + "loss": 0.5417, + "step": 7184 + }, + { + "epoch": 1.0587917485265226, + "grad_norm": 0.5820969343185425, + "learning_rate": 3.63680670440935e-06, + "loss": 0.5373, + "step": 7185 + }, + { + "epoch": 1.0589390962671905, + "grad_norm": 0.5821393728256226, + "learning_rate": 3.636461394272803e-06, + "loss": 0.5571, + "step": 7186 + }, + { + "epoch": 1.0590864440078585, + "grad_norm": 0.5826473832130432, + "learning_rate": 3.6361160568048914e-06, + "loss": 0.5502, + "step": 7187 + }, + { + "epoch": 1.0592337917485266, + "grad_norm": 0.5842054486274719, + "learning_rate": 3.6357706920139222e-06, + "loss": 0.5536, + "step": 7188 + }, + { + "epoch": 1.0593811394891945, + "grad_norm": 0.6449840664863586, + "learning_rate": 3.6354252999082e-06, + "loss": 0.5296, + "step": 7189 + }, + { + "epoch": 1.0595284872298625, + "grad_norm": 0.575804591178894, + "learning_rate": 3.6350798804960322e-06, + "loss": 0.5484, + "step": 7190 + }, + { + "epoch": 1.0596758349705304, + "grad_norm": 0.6343610286712646, + "learning_rate": 3.6347344337857264e-06, + "loss": 0.5427, + "step": 7191 + }, + { + "epoch": 1.0598231827111984, + "grad_norm": 0.6166189908981323, + "learning_rate": 3.6343889597855893e-06, + "loss": 0.5227, + "step": 7192 + }, + { + "epoch": 1.0599705304518665, + "grad_norm": 0.5619294047355652, + "learning_rate": 3.6340434585039293e-06, + "loss": 0.537, + "step": 7193 + }, + { + "epoch": 1.0601178781925344, + "grad_norm": 0.5885251760482788, + "learning_rate": 3.633697929949056e-06, + "loss": 0.5462, + "step": 7194 + }, + { + "epoch": 1.0602652259332024, + "grad_norm": 0.566780149936676, + "learning_rate": 3.6333523741292793e-06, + "loss": 0.5401, + "step": 7195 + }, + { + "epoch": 1.0604125736738703, + "grad_norm": 0.594727635383606, + "learning_rate": 3.63300679105291e-06, + "loss": 0.5324, + "step": 7196 + }, + { + "epoch": 1.0605599214145383, + "grad_norm": 0.6087473034858704, + "learning_rate": 3.632661180728258e-06, + "loss": 0.5461, + "step": 7197 + }, + { + "epoch": 1.0607072691552062, + "grad_norm": 0.58721923828125, + "learning_rate": 3.6323155431636363e-06, + "loss": 0.5369, + "step": 7198 + }, + { + "epoch": 1.0608546168958743, + "grad_norm": 0.6056281924247742, + "learning_rate": 3.6319698783673573e-06, + "loss": 0.547, + "step": 7199 + }, + { + "epoch": 1.0610019646365423, + "grad_norm": 0.6030473113059998, + "learning_rate": 3.6316241863477324e-06, + "loss": 0.5562, + "step": 7200 + }, + { + "epoch": 1.0611493123772102, + "grad_norm": 0.5967280864715576, + "learning_rate": 3.6312784671130774e-06, + "loss": 0.5159, + "step": 7201 + }, + { + "epoch": 1.0612966601178782, + "grad_norm": 0.5752320289611816, + "learning_rate": 3.630932720671706e-06, + "loss": 0.5537, + "step": 7202 + }, + { + "epoch": 1.061444007858546, + "grad_norm": 0.6068273782730103, + "learning_rate": 3.6305869470319334e-06, + "loss": 0.5349, + "step": 7203 + }, + { + "epoch": 1.0615913555992142, + "grad_norm": 0.564403772354126, + "learning_rate": 3.630241146202074e-06, + "loss": 0.5685, + "step": 7204 + }, + { + "epoch": 1.0617387033398822, + "grad_norm": 0.6037245392799377, + "learning_rate": 3.629895318190446e-06, + "loss": 0.5501, + "step": 7205 + }, + { + "epoch": 1.0618860510805501, + "grad_norm": 0.6158340573310852, + "learning_rate": 3.6295494630053652e-06, + "loss": 0.5513, + "step": 7206 + }, + { + "epoch": 1.062033398821218, + "grad_norm": 0.5873990058898926, + "learning_rate": 3.6292035806551497e-06, + "loss": 0.5096, + "step": 7207 + }, + { + "epoch": 1.062180746561886, + "grad_norm": 0.5686133503913879, + "learning_rate": 3.6288576711481177e-06, + "loss": 0.5383, + "step": 7208 + }, + { + "epoch": 1.062328094302554, + "grad_norm": 0.6479019522666931, + "learning_rate": 3.628511734492588e-06, + "loss": 0.5453, + "step": 7209 + }, + { + "epoch": 1.062475442043222, + "grad_norm": 0.5907291173934937, + "learning_rate": 3.628165770696881e-06, + "loss": 0.555, + "step": 7210 + }, + { + "epoch": 1.06262278978389, + "grad_norm": 0.5927151441574097, + "learning_rate": 3.6278197797693152e-06, + "loss": 0.5585, + "step": 7211 + }, + { + "epoch": 1.062770137524558, + "grad_norm": 0.5835152268409729, + "learning_rate": 3.6274737617182136e-06, + "loss": 0.546, + "step": 7212 + }, + { + "epoch": 1.062917485265226, + "grad_norm": 0.6051298379898071, + "learning_rate": 3.627127716551897e-06, + "loss": 0.554, + "step": 7213 + }, + { + "epoch": 1.0630648330058938, + "grad_norm": 0.6334056258201599, + "learning_rate": 3.626781644278687e-06, + "loss": 0.5628, + "step": 7214 + }, + { + "epoch": 1.063212180746562, + "grad_norm": 0.5632016658782959, + "learning_rate": 3.626435544906908e-06, + "loss": 0.528, + "step": 7215 + }, + { + "epoch": 1.06335952848723, + "grad_norm": 0.5882475972175598, + "learning_rate": 3.626089418444881e-06, + "loss": 0.5271, + "step": 7216 + }, + { + "epoch": 1.0635068762278979, + "grad_norm": 0.589799165725708, + "learning_rate": 3.6257432649009323e-06, + "loss": 0.5127, + "step": 7217 + }, + { + "epoch": 1.0636542239685658, + "grad_norm": 0.5965408086776733, + "learning_rate": 3.625397084283385e-06, + "loss": 0.5583, + "step": 7218 + }, + { + "epoch": 1.0638015717092337, + "grad_norm": 0.5858712196350098, + "learning_rate": 3.625050876600567e-06, + "loss": 0.5245, + "step": 7219 + }, + { + "epoch": 1.0639489194499017, + "grad_norm": 0.6015287041664124, + "learning_rate": 3.6247046418608027e-06, + "loss": 0.5241, + "step": 7220 + }, + { + "epoch": 1.0640962671905698, + "grad_norm": 0.5811641812324524, + "learning_rate": 3.624358380072419e-06, + "loss": 0.5548, + "step": 7221 + }, + { + "epoch": 1.0642436149312378, + "grad_norm": 0.5887271761894226, + "learning_rate": 3.624012091243744e-06, + "loss": 0.5633, + "step": 7222 + }, + { + "epoch": 1.0643909626719057, + "grad_norm": 0.6333345770835876, + "learning_rate": 3.6236657753831056e-06, + "loss": 0.5211, + "step": 7223 + }, + { + "epoch": 1.0645383104125736, + "grad_norm": 0.5983574390411377, + "learning_rate": 3.623319432498832e-06, + "loss": 0.5269, + "step": 7224 + }, + { + "epoch": 1.0646856581532416, + "grad_norm": 0.570339560508728, + "learning_rate": 3.622973062599253e-06, + "loss": 0.5794, + "step": 7225 + }, + { + "epoch": 1.0648330058939097, + "grad_norm": 0.5820622444152832, + "learning_rate": 3.6226266656926988e-06, + "loss": 0.5494, + "step": 7226 + }, + { + "epoch": 1.0649803536345777, + "grad_norm": 0.6165522336959839, + "learning_rate": 3.622280241787499e-06, + "loss": 0.5526, + "step": 7227 + }, + { + "epoch": 1.0651277013752456, + "grad_norm": 0.6095291972160339, + "learning_rate": 3.6219337908919866e-06, + "loss": 0.5553, + "step": 7228 + }, + { + "epoch": 1.0652750491159135, + "grad_norm": 0.5860608816146851, + "learning_rate": 3.6215873130144925e-06, + "loss": 0.5171, + "step": 7229 + }, + { + "epoch": 1.0654223968565815, + "grad_norm": 0.6033528447151184, + "learning_rate": 3.62124080816335e-06, + "loss": 0.5461, + "step": 7230 + }, + { + "epoch": 1.0655697445972496, + "grad_norm": 0.5694832801818848, + "learning_rate": 3.6208942763468912e-06, + "loss": 0.5328, + "step": 7231 + }, + { + "epoch": 1.0657170923379176, + "grad_norm": 0.6000003814697266, + "learning_rate": 3.620547717573451e-06, + "loss": 0.568, + "step": 7232 + }, + { + "epoch": 1.0658644400785855, + "grad_norm": 0.6034849882125854, + "learning_rate": 3.6202011318513643e-06, + "loss": 0.5311, + "step": 7233 + }, + { + "epoch": 1.0660117878192534, + "grad_norm": 0.5940713882446289, + "learning_rate": 3.619854519188965e-06, + "loss": 0.5348, + "step": 7234 + }, + { + "epoch": 1.0661591355599214, + "grad_norm": 0.5747829675674438, + "learning_rate": 3.6195078795945906e-06, + "loss": 0.5406, + "step": 7235 + }, + { + "epoch": 1.0663064833005893, + "grad_norm": 0.5764442682266235, + "learning_rate": 3.619161213076576e-06, + "loss": 0.5032, + "step": 7236 + }, + { + "epoch": 1.0664538310412575, + "grad_norm": 0.5517794489860535, + "learning_rate": 3.618814519643259e-06, + "loss": 0.5261, + "step": 7237 + }, + { + "epoch": 1.0666011787819254, + "grad_norm": 0.5791620016098022, + "learning_rate": 3.618467799302978e-06, + "loss": 0.5665, + "step": 7238 + }, + { + "epoch": 1.0667485265225933, + "grad_norm": 0.5855233073234558, + "learning_rate": 3.6181210520640715e-06, + "loss": 0.5386, + "step": 7239 + }, + { + "epoch": 1.0668958742632613, + "grad_norm": 0.5968170166015625, + "learning_rate": 3.6177742779348775e-06, + "loss": 0.5608, + "step": 7240 + }, + { + "epoch": 1.0670432220039292, + "grad_norm": 0.5949876308441162, + "learning_rate": 3.617427476923736e-06, + "loss": 0.5397, + "step": 7241 + }, + { + "epoch": 1.0671905697445974, + "grad_norm": 0.5976306796073914, + "learning_rate": 3.6170806490389886e-06, + "loss": 0.5242, + "step": 7242 + }, + { + "epoch": 1.0673379174852653, + "grad_norm": 0.5611566305160522, + "learning_rate": 3.616733794288975e-06, + "loss": 0.5111, + "step": 7243 + }, + { + "epoch": 1.0674852652259332, + "grad_norm": 0.5876027941703796, + "learning_rate": 3.616386912682038e-06, + "loss": 0.5499, + "step": 7244 + }, + { + "epoch": 1.0676326129666012, + "grad_norm": 0.5936868190765381, + "learning_rate": 3.6160400042265187e-06, + "loss": 0.5451, + "step": 7245 + }, + { + "epoch": 1.067779960707269, + "grad_norm": 0.6007698178291321, + "learning_rate": 3.615693068930761e-06, + "loss": 0.5108, + "step": 7246 + }, + { + "epoch": 1.067927308447937, + "grad_norm": 0.5892627239227295, + "learning_rate": 3.615346106803109e-06, + "loss": 0.535, + "step": 7247 + }, + { + "epoch": 1.0680746561886052, + "grad_norm": 0.5684611201286316, + "learning_rate": 3.614999117851905e-06, + "loss": 0.5341, + "step": 7248 + }, + { + "epoch": 1.0682220039292731, + "grad_norm": 0.5920925736427307, + "learning_rate": 3.6146521020854965e-06, + "loss": 0.5256, + "step": 7249 + }, + { + "epoch": 1.068369351669941, + "grad_norm": 0.6045699119567871, + "learning_rate": 3.6143050595122263e-06, + "loss": 0.5225, + "step": 7250 + }, + { + "epoch": 1.068516699410609, + "grad_norm": 0.5751377940177917, + "learning_rate": 3.613957990140443e-06, + "loss": 0.5484, + "step": 7251 + }, + { + "epoch": 1.068664047151277, + "grad_norm": 0.5912401676177979, + "learning_rate": 3.6136108939784925e-06, + "loss": 0.5523, + "step": 7252 + }, + { + "epoch": 1.068811394891945, + "grad_norm": 0.5932708382606506, + "learning_rate": 3.613263771034722e-06, + "loss": 0.5768, + "step": 7253 + }, + { + "epoch": 1.068958742632613, + "grad_norm": 0.5958127975463867, + "learning_rate": 3.6129166213174804e-06, + "loss": 0.5384, + "step": 7254 + }, + { + "epoch": 1.069106090373281, + "grad_norm": 0.6054184436798096, + "learning_rate": 3.6125694448351152e-06, + "loss": 0.5507, + "step": 7255 + }, + { + "epoch": 1.069253438113949, + "grad_norm": 0.6057135462760925, + "learning_rate": 3.6122222415959775e-06, + "loss": 0.5223, + "step": 7256 + }, + { + "epoch": 1.0694007858546168, + "grad_norm": 0.5878720283508301, + "learning_rate": 3.6118750116084168e-06, + "loss": 0.5345, + "step": 7257 + }, + { + "epoch": 1.0695481335952848, + "grad_norm": 0.5934422612190247, + "learning_rate": 3.6115277548807827e-06, + "loss": 0.5602, + "step": 7258 + }, + { + "epoch": 1.069695481335953, + "grad_norm": 0.6106769442558289, + "learning_rate": 3.6111804714214275e-06, + "loss": 0.5319, + "step": 7259 + }, + { + "epoch": 1.0698428290766209, + "grad_norm": 0.6024656891822815, + "learning_rate": 3.6108331612387038e-06, + "loss": 0.5456, + "step": 7260 + }, + { + "epoch": 1.0699901768172888, + "grad_norm": 0.6156215071678162, + "learning_rate": 3.6104858243409628e-06, + "loss": 0.5361, + "step": 7261 + }, + { + "epoch": 1.0701375245579567, + "grad_norm": 0.5883569717407227, + "learning_rate": 3.6101384607365592e-06, + "loss": 0.567, + "step": 7262 + }, + { + "epoch": 1.0702848722986247, + "grad_norm": 0.6367737054824829, + "learning_rate": 3.609791070433846e-06, + "loss": 0.5065, + "step": 7263 + }, + { + "epoch": 1.0704322200392928, + "grad_norm": 0.5971492528915405, + "learning_rate": 3.609443653441178e-06, + "loss": 0.5172, + "step": 7264 + }, + { + "epoch": 1.0705795677799608, + "grad_norm": 0.5547652244567871, + "learning_rate": 3.6090962097669113e-06, + "loss": 0.5436, + "step": 7265 + }, + { + "epoch": 1.0707269155206287, + "grad_norm": 0.5839380025863647, + "learning_rate": 3.6087487394194e-06, + "loss": 0.5413, + "step": 7266 + }, + { + "epoch": 1.0708742632612966, + "grad_norm": 0.5970078706741333, + "learning_rate": 3.608401242407002e-06, + "loss": 0.5452, + "step": 7267 + }, + { + "epoch": 1.0710216110019646, + "grad_norm": 0.5915998816490173, + "learning_rate": 3.608053718738074e-06, + "loss": 0.5211, + "step": 7268 + }, + { + "epoch": 1.0711689587426325, + "grad_norm": 0.5810936689376831, + "learning_rate": 3.6077061684209734e-06, + "loss": 0.5257, + "step": 7269 + }, + { + "epoch": 1.0713163064833007, + "grad_norm": 0.5661906599998474, + "learning_rate": 3.607358591464059e-06, + "loss": 0.537, + "step": 7270 + }, + { + "epoch": 1.0714636542239686, + "grad_norm": 0.6320169568061829, + "learning_rate": 3.6070109878756903e-06, + "loss": 0.5411, + "step": 7271 + }, + { + "epoch": 1.0716110019646365, + "grad_norm": 0.6127253770828247, + "learning_rate": 3.6066633576642264e-06, + "loss": 0.5584, + "step": 7272 + }, + { + "epoch": 1.0717583497053045, + "grad_norm": 0.5901724696159363, + "learning_rate": 3.606315700838028e-06, + "loss": 0.5473, + "step": 7273 + }, + { + "epoch": 1.0719056974459724, + "grad_norm": 0.5838456749916077, + "learning_rate": 3.6059680174054546e-06, + "loss": 0.5443, + "step": 7274 + }, + { + "epoch": 1.0720530451866406, + "grad_norm": 0.5919814109802246, + "learning_rate": 3.6056203073748704e-06, + "loss": 0.5741, + "step": 7275 + }, + { + "epoch": 1.0722003929273085, + "grad_norm": 0.6006463766098022, + "learning_rate": 3.605272570754636e-06, + "loss": 0.5406, + "step": 7276 + }, + { + "epoch": 1.0723477406679764, + "grad_norm": 0.6036713719367981, + "learning_rate": 3.604924807553115e-06, + "loss": 0.533, + "step": 7277 + }, + { + "epoch": 1.0724950884086444, + "grad_norm": 0.6131343245506287, + "learning_rate": 3.6045770177786703e-06, + "loss": 0.5517, + "step": 7278 + }, + { + "epoch": 1.0726424361493123, + "grad_norm": 0.5849007964134216, + "learning_rate": 3.604229201439666e-06, + "loss": 0.5437, + "step": 7279 + }, + { + "epoch": 1.0727897838899803, + "grad_norm": 0.5803000330924988, + "learning_rate": 3.6038813585444684e-06, + "loss": 0.5525, + "step": 7280 + }, + { + "epoch": 1.0729371316306484, + "grad_norm": 0.5646486878395081, + "learning_rate": 3.6035334891014417e-06, + "loss": 0.5497, + "step": 7281 + }, + { + "epoch": 1.0730844793713163, + "grad_norm": 0.5827721953392029, + "learning_rate": 3.603185593118952e-06, + "loss": 0.5581, + "step": 7282 + }, + { + "epoch": 1.0732318271119843, + "grad_norm": 0.5854324102401733, + "learning_rate": 3.6028376706053657e-06, + "loss": 0.535, + "step": 7283 + }, + { + "epoch": 1.0733791748526522, + "grad_norm": 0.7234612703323364, + "learning_rate": 3.6024897215690512e-06, + "loss": 0.523, + "step": 7284 + }, + { + "epoch": 1.0735265225933202, + "grad_norm": 0.6153497695922852, + "learning_rate": 3.6021417460183753e-06, + "loss": 0.5092, + "step": 7285 + }, + { + "epoch": 1.0736738703339883, + "grad_norm": 0.5958165526390076, + "learning_rate": 3.601793743961708e-06, + "loss": 0.557, + "step": 7286 + }, + { + "epoch": 1.0738212180746562, + "grad_norm": 0.5646045804023743, + "learning_rate": 3.6014457154074176e-06, + "loss": 0.4919, + "step": 7287 + }, + { + "epoch": 1.0739685658153242, + "grad_norm": 0.6102526783943176, + "learning_rate": 3.601097660363875e-06, + "loss": 0.5046, + "step": 7288 + }, + { + "epoch": 1.0741159135559921, + "grad_norm": 0.6166004538536072, + "learning_rate": 3.6007495788394496e-06, + "loss": 0.5528, + "step": 7289 + }, + { + "epoch": 1.07426326129666, + "grad_norm": 0.5982775688171387, + "learning_rate": 3.6004014708425127e-06, + "loss": 0.5444, + "step": 7290 + }, + { + "epoch": 1.074410609037328, + "grad_norm": 0.6015328764915466, + "learning_rate": 3.6000533363814373e-06, + "loss": 0.5106, + "step": 7291 + }, + { + "epoch": 1.0745579567779961, + "grad_norm": 0.5734000205993652, + "learning_rate": 3.5997051754645945e-06, + "loss": 0.5471, + "step": 7292 + }, + { + "epoch": 1.074705304518664, + "grad_norm": 0.5815190672874451, + "learning_rate": 3.599356988100358e-06, + "loss": 0.539, + "step": 7293 + }, + { + "epoch": 1.074852652259332, + "grad_norm": 0.6541617512702942, + "learning_rate": 3.599008774297102e-06, + "loss": 0.5437, + "step": 7294 + }, + { + "epoch": 1.075, + "grad_norm": 0.599636435508728, + "learning_rate": 3.5986605340632002e-06, + "loss": 0.5387, + "step": 7295 + }, + { + "epoch": 1.075147347740668, + "grad_norm": 0.5805221796035767, + "learning_rate": 3.598312267407028e-06, + "loss": 0.5269, + "step": 7296 + }, + { + "epoch": 1.075294695481336, + "grad_norm": 0.60236656665802, + "learning_rate": 3.597963974336961e-06, + "loss": 0.5567, + "step": 7297 + }, + { + "epoch": 1.075442043222004, + "grad_norm": 0.5745360255241394, + "learning_rate": 3.5976156548613757e-06, + "loss": 0.4742, + "step": 7298 + }, + { + "epoch": 1.075589390962672, + "grad_norm": 0.6066673994064331, + "learning_rate": 3.597267308988648e-06, + "loss": 0.5303, + "step": 7299 + }, + { + "epoch": 1.0757367387033399, + "grad_norm": 0.5800331234931946, + "learning_rate": 3.596918936727156e-06, + "loss": 0.5355, + "step": 7300 + }, + { + "epoch": 1.0758840864440078, + "grad_norm": 0.5880657434463501, + "learning_rate": 3.596570538085279e-06, + "loss": 0.5542, + "step": 7301 + }, + { + "epoch": 1.0760314341846757, + "grad_norm": 0.5769110918045044, + "learning_rate": 3.5962221130713943e-06, + "loss": 0.5321, + "step": 7302 + }, + { + "epoch": 1.0761787819253439, + "grad_norm": 0.5976635217666626, + "learning_rate": 3.595873661693882e-06, + "loss": 0.5505, + "step": 7303 + }, + { + "epoch": 1.0763261296660118, + "grad_norm": 0.614575982093811, + "learning_rate": 3.595525183961122e-06, + "loss": 0.5443, + "step": 7304 + }, + { + "epoch": 1.0764734774066798, + "grad_norm": 0.6239699721336365, + "learning_rate": 3.595176679881496e-06, + "loss": 0.5683, + "step": 7305 + }, + { + "epoch": 1.0766208251473477, + "grad_norm": 0.610190749168396, + "learning_rate": 3.594828149463384e-06, + "loss": 0.527, + "step": 7306 + }, + { + "epoch": 1.0767681728880156, + "grad_norm": 0.5800924301147461, + "learning_rate": 3.5944795927151683e-06, + "loss": 0.5493, + "step": 7307 + }, + { + "epoch": 1.0769155206286838, + "grad_norm": 0.5747696161270142, + "learning_rate": 3.594131009645232e-06, + "loss": 0.5254, + "step": 7308 + }, + { + "epoch": 1.0770628683693517, + "grad_norm": 0.5572823286056519, + "learning_rate": 3.5937824002619575e-06, + "loss": 0.5685, + "step": 7309 + }, + { + "epoch": 1.0772102161100197, + "grad_norm": 0.5629456043243408, + "learning_rate": 3.59343376457373e-06, + "loss": 0.5571, + "step": 7310 + }, + { + "epoch": 1.0773575638506876, + "grad_norm": 0.5841039419174194, + "learning_rate": 3.5930851025889326e-06, + "loss": 0.4997, + "step": 7311 + }, + { + "epoch": 1.0775049115913555, + "grad_norm": 0.597963273525238, + "learning_rate": 3.5927364143159514e-06, + "loss": 0.5562, + "step": 7312 + }, + { + "epoch": 1.0776522593320235, + "grad_norm": 0.5733063220977783, + "learning_rate": 3.5923876997631725e-06, + "loss": 0.5261, + "step": 7313 + }, + { + "epoch": 1.0777996070726916, + "grad_norm": 0.5999749302864075, + "learning_rate": 3.592038958938981e-06, + "loss": 0.5523, + "step": 7314 + }, + { + "epoch": 1.0779469548133596, + "grad_norm": 0.5711727738380432, + "learning_rate": 3.5916901918517657e-06, + "loss": 0.5229, + "step": 7315 + }, + { + "epoch": 1.0780943025540275, + "grad_norm": 0.6047905683517456, + "learning_rate": 3.591341398509912e-06, + "loss": 0.5289, + "step": 7316 + }, + { + "epoch": 1.0782416502946954, + "grad_norm": 0.6385325789451599, + "learning_rate": 3.5909925789218103e-06, + "loss": 0.5286, + "step": 7317 + }, + { + "epoch": 1.0783889980353634, + "grad_norm": 0.5937090516090393, + "learning_rate": 3.5906437330958487e-06, + "loss": 0.553, + "step": 7318 + }, + { + "epoch": 1.0785363457760315, + "grad_norm": 0.6389570236206055, + "learning_rate": 3.5902948610404175e-06, + "loss": 0.5383, + "step": 7319 + }, + { + "epoch": 1.0786836935166995, + "grad_norm": 0.6735303401947021, + "learning_rate": 3.5899459627639053e-06, + "loss": 0.5295, + "step": 7320 + }, + { + "epoch": 1.0788310412573674, + "grad_norm": 0.6059373021125793, + "learning_rate": 3.5895970382747046e-06, + "loss": 0.5618, + "step": 7321 + }, + { + "epoch": 1.0789783889980353, + "grad_norm": 0.5918625593185425, + "learning_rate": 3.589248087581206e-06, + "loss": 0.5467, + "step": 7322 + }, + { + "epoch": 1.0791257367387033, + "grad_norm": 0.5787927508354187, + "learning_rate": 3.5888991106918015e-06, + "loss": 0.535, + "step": 7323 + }, + { + "epoch": 1.0792730844793712, + "grad_norm": 0.6470239758491516, + "learning_rate": 3.5885501076148843e-06, + "loss": 0.5482, + "step": 7324 + }, + { + "epoch": 1.0794204322200394, + "grad_norm": 0.5849828720092773, + "learning_rate": 3.5882010783588477e-06, + "loss": 0.4881, + "step": 7325 + }, + { + "epoch": 1.0795677799607073, + "grad_norm": 0.5886238813400269, + "learning_rate": 3.587852022932086e-06, + "loss": 0.4974, + "step": 7326 + }, + { + "epoch": 1.0797151277013752, + "grad_norm": 0.6199536323547363, + "learning_rate": 3.5875029413429924e-06, + "loss": 0.5305, + "step": 7327 + }, + { + "epoch": 1.0798624754420432, + "grad_norm": 0.5925297737121582, + "learning_rate": 3.5871538335999635e-06, + "loss": 0.5527, + "step": 7328 + }, + { + "epoch": 1.080009823182711, + "grad_norm": 0.5974549055099487, + "learning_rate": 3.586804699711395e-06, + "loss": 0.5258, + "step": 7329 + }, + { + "epoch": 1.0801571709233793, + "grad_norm": 0.5798362493515015, + "learning_rate": 3.5864555396856826e-06, + "loss": 0.5262, + "step": 7330 + }, + { + "epoch": 1.0803045186640472, + "grad_norm": 0.5682826042175293, + "learning_rate": 3.5861063535312247e-06, + "loss": 0.5063, + "step": 7331 + }, + { + "epoch": 1.0804518664047151, + "grad_norm": 0.5602499842643738, + "learning_rate": 3.5857571412564184e-06, + "loss": 0.5504, + "step": 7332 + }, + { + "epoch": 1.080599214145383, + "grad_norm": 0.6210076808929443, + "learning_rate": 3.585407902869662e-06, + "loss": 0.5626, + "step": 7333 + }, + { + "epoch": 1.080746561886051, + "grad_norm": 0.6056528687477112, + "learning_rate": 3.5850586383793546e-06, + "loss": 0.5399, + "step": 7334 + }, + { + "epoch": 1.080893909626719, + "grad_norm": 0.5930976271629333, + "learning_rate": 3.5847093477938955e-06, + "loss": 0.5243, + "step": 7335 + }, + { + "epoch": 1.081041257367387, + "grad_norm": 0.6102489829063416, + "learning_rate": 3.584360031121686e-06, + "loss": 0.5533, + "step": 7336 + }, + { + "epoch": 1.081188605108055, + "grad_norm": 0.6077744960784912, + "learning_rate": 3.5840106883711255e-06, + "loss": 0.5149, + "step": 7337 + }, + { + "epoch": 1.081335952848723, + "grad_norm": 0.634323000907898, + "learning_rate": 3.5836613195506176e-06, + "loss": 0.4909, + "step": 7338 + }, + { + "epoch": 1.081483300589391, + "grad_norm": 0.5870230197906494, + "learning_rate": 3.583311924668562e-06, + "loss": 0.5396, + "step": 7339 + }, + { + "epoch": 1.0816306483300588, + "grad_norm": 0.640206515789032, + "learning_rate": 3.5829625037333635e-06, + "loss": 0.5576, + "step": 7340 + }, + { + "epoch": 1.081777996070727, + "grad_norm": 0.605709433555603, + "learning_rate": 3.5826130567534245e-06, + "loss": 0.53, + "step": 7341 + }, + { + "epoch": 1.081925343811395, + "grad_norm": 0.5932363271713257, + "learning_rate": 3.5822635837371488e-06, + "loss": 0.5374, + "step": 7342 + }, + { + "epoch": 1.0820726915520629, + "grad_norm": 0.5793914198875427, + "learning_rate": 3.581914084692942e-06, + "loss": 0.5494, + "step": 7343 + }, + { + "epoch": 1.0822200392927308, + "grad_norm": 0.560952365398407, + "learning_rate": 3.581564559629209e-06, + "loss": 0.5446, + "step": 7344 + }, + { + "epoch": 1.0823673870333987, + "grad_norm": 0.5806127190589905, + "learning_rate": 3.5812150085543555e-06, + "loss": 0.5137, + "step": 7345 + }, + { + "epoch": 1.082514734774067, + "grad_norm": 0.6117939949035645, + "learning_rate": 3.580865431476788e-06, + "loss": 0.5352, + "step": 7346 + }, + { + "epoch": 1.0826620825147348, + "grad_norm": 0.57564777135849, + "learning_rate": 3.580515828404914e-06, + "loss": 0.5603, + "step": 7347 + }, + { + "epoch": 1.0828094302554028, + "grad_norm": 0.6142427921295166, + "learning_rate": 3.5801661993471416e-06, + "loss": 0.5582, + "step": 7348 + }, + { + "epoch": 1.0829567779960707, + "grad_norm": 0.5720625519752502, + "learning_rate": 3.5798165443118783e-06, + "loss": 0.5951, + "step": 7349 + }, + { + "epoch": 1.0831041257367386, + "grad_norm": 0.5904316902160645, + "learning_rate": 3.5794668633075335e-06, + "loss": 0.5193, + "step": 7350 + }, + { + "epoch": 1.0832514734774066, + "grad_norm": 0.5905547142028809, + "learning_rate": 3.579117156342517e-06, + "loss": 0.5357, + "step": 7351 + }, + { + "epoch": 1.0833988212180747, + "grad_norm": 0.595649003982544, + "learning_rate": 3.578767423425239e-06, + "loss": 0.532, + "step": 7352 + }, + { + "epoch": 1.0835461689587427, + "grad_norm": 0.567411482334137, + "learning_rate": 3.57841766456411e-06, + "loss": 0.5379, + "step": 7353 + }, + { + "epoch": 1.0836935166994106, + "grad_norm": 0.5760126709938049, + "learning_rate": 3.578067879767543e-06, + "loss": 0.4935, + "step": 7354 + }, + { + "epoch": 1.0838408644400785, + "grad_norm": 0.5999034643173218, + "learning_rate": 3.577718069043948e-06, + "loss": 0.5293, + "step": 7355 + }, + { + "epoch": 1.0839882121807465, + "grad_norm": 0.5878951549530029, + "learning_rate": 3.5773682324017395e-06, + "loss": 0.473, + "step": 7356 + }, + { + "epoch": 1.0841355599214146, + "grad_norm": 0.5807485580444336, + "learning_rate": 3.5770183698493304e-06, + "loss": 0.5189, + "step": 7357 + }, + { + "epoch": 1.0842829076620826, + "grad_norm": 0.5971704721450806, + "learning_rate": 3.576668481395135e-06, + "loss": 0.5241, + "step": 7358 + }, + { + "epoch": 1.0844302554027505, + "grad_norm": 0.6245435476303101, + "learning_rate": 3.5763185670475665e-06, + "loss": 0.5224, + "step": 7359 + }, + { + "epoch": 1.0845776031434184, + "grad_norm": 0.545188307762146, + "learning_rate": 3.5759686268150423e-06, + "loss": 0.5512, + "step": 7360 + }, + { + "epoch": 1.0847249508840864, + "grad_norm": 0.5784313082695007, + "learning_rate": 3.5756186607059766e-06, + "loss": 0.5538, + "step": 7361 + }, + { + "epoch": 1.0848722986247543, + "grad_norm": 0.5973635315895081, + "learning_rate": 3.575268668728787e-06, + "loss": 0.5194, + "step": 7362 + }, + { + "epoch": 1.0850196463654225, + "grad_norm": 0.6022404432296753, + "learning_rate": 3.5749186508918905e-06, + "loss": 0.5225, + "step": 7363 + }, + { + "epoch": 1.0851669941060904, + "grad_norm": 0.561161994934082, + "learning_rate": 3.574568607203704e-06, + "loss": 0.5342, + "step": 7364 + }, + { + "epoch": 1.0853143418467583, + "grad_norm": 0.5998069047927856, + "learning_rate": 3.574218537672647e-06, + "loss": 0.535, + "step": 7365 + }, + { + "epoch": 1.0854616895874263, + "grad_norm": 0.5916635990142822, + "learning_rate": 3.573868442307137e-06, + "loss": 0.5667, + "step": 7366 + }, + { + "epoch": 1.0856090373280942, + "grad_norm": 0.595943808555603, + "learning_rate": 3.5735183211155955e-06, + "loss": 0.5319, + "step": 7367 + }, + { + "epoch": 1.0857563850687624, + "grad_norm": 0.5768951177597046, + "learning_rate": 3.573168174106442e-06, + "loss": 0.5181, + "step": 7368 + }, + { + "epoch": 1.0859037328094303, + "grad_norm": 0.5973921418190002, + "learning_rate": 3.572818001288097e-06, + "loss": 0.5632, + "step": 7369 + }, + { + "epoch": 1.0860510805500982, + "grad_norm": 0.5681929588317871, + "learning_rate": 3.5724678026689824e-06, + "loss": 0.5439, + "step": 7370 + }, + { + "epoch": 1.0861984282907662, + "grad_norm": 0.5721032023429871, + "learning_rate": 3.57211757825752e-06, + "loss": 0.5525, + "step": 7371 + }, + { + "epoch": 1.0863457760314341, + "grad_norm": 0.5663259029388428, + "learning_rate": 3.5717673280621336e-06, + "loss": 0.5586, + "step": 7372 + }, + { + "epoch": 1.0864931237721023, + "grad_norm": 0.5826587677001953, + "learning_rate": 3.5714170520912443e-06, + "loss": 0.5808, + "step": 7373 + }, + { + "epoch": 1.0866404715127702, + "grad_norm": 0.5955490469932556, + "learning_rate": 3.5710667503532783e-06, + "loss": 0.5509, + "step": 7374 + }, + { + "epoch": 1.0867878192534381, + "grad_norm": 0.6388561725616455, + "learning_rate": 3.5707164228566587e-06, + "loss": 0.5587, + "step": 7375 + }, + { + "epoch": 1.086935166994106, + "grad_norm": 0.6071009039878845, + "learning_rate": 3.570366069609812e-06, + "loss": 0.5166, + "step": 7376 + }, + { + "epoch": 1.087082514734774, + "grad_norm": 0.6370716094970703, + "learning_rate": 3.5700156906211624e-06, + "loss": 0.5757, + "step": 7377 + }, + { + "epoch": 1.087229862475442, + "grad_norm": 0.6389632225036621, + "learning_rate": 3.5696652858991382e-06, + "loss": 0.5448, + "step": 7378 + }, + { + "epoch": 1.0873772102161101, + "grad_norm": 0.5615331530570984, + "learning_rate": 3.569314855452165e-06, + "loss": 0.5673, + "step": 7379 + }, + { + "epoch": 1.087524557956778, + "grad_norm": 0.566323459148407, + "learning_rate": 3.568964399288671e-06, + "loss": 0.5247, + "step": 7380 + }, + { + "epoch": 1.087671905697446, + "grad_norm": 0.6425341963768005, + "learning_rate": 3.5686139174170854e-06, + "loss": 0.5119, + "step": 7381 + }, + { + "epoch": 1.087819253438114, + "grad_norm": 0.5915940403938293, + "learning_rate": 3.5682634098458356e-06, + "loss": 0.4893, + "step": 7382 + }, + { + "epoch": 1.0879666011787819, + "grad_norm": 0.6284379959106445, + "learning_rate": 3.567912876583352e-06, + "loss": 0.55, + "step": 7383 + }, + { + "epoch": 1.08811394891945, + "grad_norm": 0.5988163352012634, + "learning_rate": 3.5675623176380644e-06, + "loss": 0.5334, + "step": 7384 + }, + { + "epoch": 1.088261296660118, + "grad_norm": 0.597752571105957, + "learning_rate": 3.5672117330184043e-06, + "loss": 0.5745, + "step": 7385 + }, + { + "epoch": 1.0884086444007859, + "grad_norm": 0.5903252363204956, + "learning_rate": 3.566861122732802e-06, + "loss": 0.4977, + "step": 7386 + }, + { + "epoch": 1.0885559921414538, + "grad_norm": 0.6231931447982788, + "learning_rate": 3.5665104867896903e-06, + "loss": 0.513, + "step": 7387 + }, + { + "epoch": 1.0887033398821218, + "grad_norm": 0.6054072976112366, + "learning_rate": 3.5661598251975015e-06, + "loss": 0.5085, + "step": 7388 + }, + { + "epoch": 1.0888506876227897, + "grad_norm": 0.5946445465087891, + "learning_rate": 3.565809137964669e-06, + "loss": 0.5543, + "step": 7389 + }, + { + "epoch": 1.0889980353634579, + "grad_norm": 0.5693309903144836, + "learning_rate": 3.565458425099627e-06, + "loss": 0.5271, + "step": 7390 + }, + { + "epoch": 1.0891453831041258, + "grad_norm": 0.5729987025260925, + "learning_rate": 3.565107686610809e-06, + "loss": 0.5445, + "step": 7391 + }, + { + "epoch": 1.0892927308447937, + "grad_norm": 0.6295577883720398, + "learning_rate": 3.5647569225066514e-06, + "loss": 0.558, + "step": 7392 + }, + { + "epoch": 1.0894400785854617, + "grad_norm": 0.5713592767715454, + "learning_rate": 3.564406132795589e-06, + "loss": 0.5096, + "step": 7393 + }, + { + "epoch": 1.0895874263261296, + "grad_norm": 0.612934947013855, + "learning_rate": 3.5640553174860583e-06, + "loss": 0.5199, + "step": 7394 + }, + { + "epoch": 1.0897347740667978, + "grad_norm": 0.591647207736969, + "learning_rate": 3.5637044765864958e-06, + "loss": 0.5377, + "step": 7395 + }, + { + "epoch": 1.0898821218074657, + "grad_norm": 0.5789877772331238, + "learning_rate": 3.5633536101053405e-06, + "loss": 0.5587, + "step": 7396 + }, + { + "epoch": 1.0900294695481336, + "grad_norm": 0.6643548011779785, + "learning_rate": 3.5630027180510294e-06, + "loss": 0.5282, + "step": 7397 + }, + { + "epoch": 1.0901768172888016, + "grad_norm": 0.6115054488182068, + "learning_rate": 3.562651800432001e-06, + "loss": 0.5427, + "step": 7398 + }, + { + "epoch": 1.0903241650294695, + "grad_norm": 0.5940015316009521, + "learning_rate": 3.5623008572566952e-06, + "loss": 0.5354, + "step": 7399 + }, + { + "epoch": 1.0904715127701374, + "grad_norm": 0.5694457292556763, + "learning_rate": 3.5619498885335523e-06, + "loss": 0.5786, + "step": 7400 + }, + { + "epoch": 1.0906188605108056, + "grad_norm": 0.5745207071304321, + "learning_rate": 3.5615988942710128e-06, + "loss": 0.5285, + "step": 7401 + }, + { + "epoch": 1.0907662082514735, + "grad_norm": 0.5918727517127991, + "learning_rate": 3.5612478744775175e-06, + "loss": 0.5439, + "step": 7402 + }, + { + "epoch": 1.0909135559921415, + "grad_norm": 0.6257019639015198, + "learning_rate": 3.5608968291615083e-06, + "loss": 0.5706, + "step": 7403 + }, + { + "epoch": 1.0910609037328094, + "grad_norm": 0.5626807808876038, + "learning_rate": 3.5605457583314285e-06, + "loss": 0.5717, + "step": 7404 + }, + { + "epoch": 1.0912082514734773, + "grad_norm": 0.5931563973426819, + "learning_rate": 3.5601946619957205e-06, + "loss": 0.5449, + "step": 7405 + }, + { + "epoch": 1.0913555992141455, + "grad_norm": 0.6007542014122009, + "learning_rate": 3.559843540162828e-06, + "loss": 0.5687, + "step": 7406 + }, + { + "epoch": 1.0915029469548134, + "grad_norm": 0.620060920715332, + "learning_rate": 3.5594923928411955e-06, + "loss": 0.5469, + "step": 7407 + }, + { + "epoch": 1.0916502946954814, + "grad_norm": 0.626956582069397, + "learning_rate": 3.5591412200392676e-06, + "loss": 0.5283, + "step": 7408 + }, + { + "epoch": 1.0917976424361493, + "grad_norm": 0.5962748527526855, + "learning_rate": 3.558790021765491e-06, + "loss": 0.5379, + "step": 7409 + }, + { + "epoch": 1.0919449901768172, + "grad_norm": 0.6331344842910767, + "learning_rate": 3.5584387980283104e-06, + "loss": 0.5686, + "step": 7410 + }, + { + "epoch": 1.0920923379174852, + "grad_norm": 0.5884971022605896, + "learning_rate": 3.558087548836173e-06, + "loss": 0.5429, + "step": 7411 + }, + { + "epoch": 1.0922396856581533, + "grad_norm": 0.5868773460388184, + "learning_rate": 3.5577362741975263e-06, + "loss": 0.5563, + "step": 7412 + }, + { + "epoch": 1.0923870333988213, + "grad_norm": 0.6006597876548767, + "learning_rate": 3.557384974120819e-06, + "loss": 0.5663, + "step": 7413 + }, + { + "epoch": 1.0925343811394892, + "grad_norm": 0.6140289902687073, + "learning_rate": 3.5570336486144983e-06, + "loss": 0.5542, + "step": 7414 + }, + { + "epoch": 1.0926817288801571, + "grad_norm": 0.6132239699363708, + "learning_rate": 3.5566822976870143e-06, + "loss": 0.5363, + "step": 7415 + }, + { + "epoch": 1.092829076620825, + "grad_norm": 0.5970762372016907, + "learning_rate": 3.5563309213468168e-06, + "loss": 0.5564, + "step": 7416 + }, + { + "epoch": 1.0929764243614932, + "grad_norm": 0.5977652668952942, + "learning_rate": 3.555979519602356e-06, + "loss": 0.513, + "step": 7417 + }, + { + "epoch": 1.0931237721021612, + "grad_norm": 0.5559905767440796, + "learning_rate": 3.5556280924620833e-06, + "loss": 0.4982, + "step": 7418 + }, + { + "epoch": 1.093271119842829, + "grad_norm": 0.5873255133628845, + "learning_rate": 3.5552766399344494e-06, + "loss": 0.5475, + "step": 7419 + }, + { + "epoch": 1.093418467583497, + "grad_norm": 0.5957420468330383, + "learning_rate": 3.554925162027908e-06, + "loss": 0.547, + "step": 7420 + }, + { + "epoch": 1.093565815324165, + "grad_norm": 0.6156150102615356, + "learning_rate": 3.554573658750911e-06, + "loss": 0.5227, + "step": 7421 + }, + { + "epoch": 1.093713163064833, + "grad_norm": 0.5926945209503174, + "learning_rate": 3.5542221301119123e-06, + "loss": 0.5525, + "step": 7422 + }, + { + "epoch": 1.093860510805501, + "grad_norm": 0.6172235012054443, + "learning_rate": 3.5538705761193655e-06, + "loss": 0.5354, + "step": 7423 + }, + { + "epoch": 1.094007858546169, + "grad_norm": 0.5759809017181396, + "learning_rate": 3.5535189967817257e-06, + "loss": 0.55, + "step": 7424 + }, + { + "epoch": 1.094155206286837, + "grad_norm": 0.5901938676834106, + "learning_rate": 3.5531673921074483e-06, + "loss": 0.534, + "step": 7425 + }, + { + "epoch": 1.0943025540275049, + "grad_norm": 0.6052404046058655, + "learning_rate": 3.552815762104989e-06, + "loss": 0.5517, + "step": 7426 + }, + { + "epoch": 1.0944499017681728, + "grad_norm": 0.6317077279090881, + "learning_rate": 3.552464106782805e-06, + "loss": 0.5354, + "step": 7427 + }, + { + "epoch": 1.094597249508841, + "grad_norm": 0.5936709046363831, + "learning_rate": 3.5521124261493516e-06, + "loss": 0.5684, + "step": 7428 + }, + { + "epoch": 1.094744597249509, + "grad_norm": 0.5880652070045471, + "learning_rate": 3.5517607202130888e-06, + "loss": 0.5107, + "step": 7429 + }, + { + "epoch": 1.0948919449901768, + "grad_norm": 0.5865320563316345, + "learning_rate": 3.551408988982473e-06, + "loss": 0.5601, + "step": 7430 + }, + { + "epoch": 1.0950392927308448, + "grad_norm": 0.5742371678352356, + "learning_rate": 3.551057232465965e-06, + "loss": 0.5218, + "step": 7431 + }, + { + "epoch": 1.0951866404715127, + "grad_norm": 0.6388010382652283, + "learning_rate": 3.550705450672024e-06, + "loss": 0.5789, + "step": 7432 + }, + { + "epoch": 1.0953339882121806, + "grad_norm": 0.5971278548240662, + "learning_rate": 3.550353643609109e-06, + "loss": 0.551, + "step": 7433 + }, + { + "epoch": 1.0954813359528488, + "grad_norm": 0.5667780041694641, + "learning_rate": 3.5500018112856816e-06, + "loss": 0.5214, + "step": 7434 + }, + { + "epoch": 1.0956286836935167, + "grad_norm": 0.5968946814537048, + "learning_rate": 3.549649953710203e-06, + "loss": 0.5144, + "step": 7435 + }, + { + "epoch": 1.0957760314341847, + "grad_norm": 0.5526967644691467, + "learning_rate": 3.5492980708911355e-06, + "loss": 0.5688, + "step": 7436 + }, + { + "epoch": 1.0959233791748526, + "grad_norm": 0.588583767414093, + "learning_rate": 3.5489461628369414e-06, + "loss": 0.55, + "step": 7437 + }, + { + "epoch": 1.0960707269155205, + "grad_norm": 0.601176381111145, + "learning_rate": 3.5485942295560843e-06, + "loss": 0.5342, + "step": 7438 + }, + { + "epoch": 1.0962180746561887, + "grad_norm": 0.5758091807365417, + "learning_rate": 3.548242271057028e-06, + "loss": 0.5449, + "step": 7439 + }, + { + "epoch": 1.0963654223968566, + "grad_norm": 0.5880931615829468, + "learning_rate": 3.5478902873482364e-06, + "loss": 0.5115, + "step": 7440 + }, + { + "epoch": 1.0965127701375246, + "grad_norm": 0.5778232216835022, + "learning_rate": 3.5475382784381753e-06, + "loss": 0.5546, + "step": 7441 + }, + { + "epoch": 1.0966601178781925, + "grad_norm": 0.5900147557258606, + "learning_rate": 3.5471862443353088e-06, + "loss": 0.5722, + "step": 7442 + }, + { + "epoch": 1.0968074656188604, + "grad_norm": 0.5717021822929382, + "learning_rate": 3.5468341850481054e-06, + "loss": 0.5196, + "step": 7443 + }, + { + "epoch": 1.0969548133595284, + "grad_norm": 0.5747436881065369, + "learning_rate": 3.54648210058503e-06, + "loss": 0.5511, + "step": 7444 + }, + { + "epoch": 1.0971021611001965, + "grad_norm": 0.558871328830719, + "learning_rate": 3.5461299909545515e-06, + "loss": 0.5015, + "step": 7445 + }, + { + "epoch": 1.0972495088408645, + "grad_norm": 0.5816622972488403, + "learning_rate": 3.5457778561651373e-06, + "loss": 0.5517, + "step": 7446 + }, + { + "epoch": 1.0973968565815324, + "grad_norm": 0.5619738698005676, + "learning_rate": 3.5454256962252566e-06, + "loss": 0.5623, + "step": 7447 + }, + { + "epoch": 1.0975442043222003, + "grad_norm": 0.5809935927391052, + "learning_rate": 3.5450735111433776e-06, + "loss": 0.5399, + "step": 7448 + }, + { + "epoch": 1.0976915520628683, + "grad_norm": 0.5861467719078064, + "learning_rate": 3.5447213009279706e-06, + "loss": 0.5298, + "step": 7449 + }, + { + "epoch": 1.0978388998035364, + "grad_norm": 0.560295820236206, + "learning_rate": 3.544369065587507e-06, + "loss": 0.5087, + "step": 7450 + }, + { + "epoch": 1.0979862475442044, + "grad_norm": 0.573689341545105, + "learning_rate": 3.5440168051304568e-06, + "loss": 0.4851, + "step": 7451 + }, + { + "epoch": 1.0981335952848723, + "grad_norm": 0.5923088788986206, + "learning_rate": 3.5436645195652926e-06, + "loss": 0.5321, + "step": 7452 + }, + { + "epoch": 1.0982809430255402, + "grad_norm": 0.5942450761795044, + "learning_rate": 3.5433122089004856e-06, + "loss": 0.5085, + "step": 7453 + }, + { + "epoch": 1.0984282907662082, + "grad_norm": 0.6032446622848511, + "learning_rate": 3.542959873144509e-06, + "loss": 0.5767, + "step": 7454 + }, + { + "epoch": 1.0985756385068761, + "grad_norm": 0.593079149723053, + "learning_rate": 3.542607512305838e-06, + "loss": 0.5438, + "step": 7455 + }, + { + "epoch": 1.0987229862475443, + "grad_norm": 0.6761676073074341, + "learning_rate": 3.5422551263929437e-06, + "loss": 0.5599, + "step": 7456 + }, + { + "epoch": 1.0988703339882122, + "grad_norm": 0.5704506635665894, + "learning_rate": 3.541902715414304e-06, + "loss": 0.558, + "step": 7457 + }, + { + "epoch": 1.0990176817288801, + "grad_norm": 0.5832436680793762, + "learning_rate": 3.541550279378391e-06, + "loss": 0.5526, + "step": 7458 + }, + { + "epoch": 1.099165029469548, + "grad_norm": 0.5824304819107056, + "learning_rate": 3.5411978182936837e-06, + "loss": 0.5471, + "step": 7459 + }, + { + "epoch": 1.099312377210216, + "grad_norm": 0.5875660181045532, + "learning_rate": 3.5408453321686563e-06, + "loss": 0.5161, + "step": 7460 + }, + { + "epoch": 1.0994597249508842, + "grad_norm": 0.6034870147705078, + "learning_rate": 3.5404928210117877e-06, + "loss": 0.5368, + "step": 7461 + }, + { + "epoch": 1.0996070726915521, + "grad_norm": 0.5900062918663025, + "learning_rate": 3.540140284831554e-06, + "loss": 0.5573, + "step": 7462 + }, + { + "epoch": 1.09975442043222, + "grad_norm": 0.5792563557624817, + "learning_rate": 3.539787723636435e-06, + "loss": 0.5383, + "step": 7463 + }, + { + "epoch": 1.099901768172888, + "grad_norm": 0.58955317735672, + "learning_rate": 3.539435137434909e-06, + "loss": 0.5275, + "step": 7464 + }, + { + "epoch": 1.100049115913556, + "grad_norm": 0.6218745112419128, + "learning_rate": 3.5390825262354557e-06, + "loss": 0.529, + "step": 7465 + }, + { + "epoch": 1.1001964636542239, + "grad_norm": 0.6135135293006897, + "learning_rate": 3.5387298900465537e-06, + "loss": 0.541, + "step": 7466 + }, + { + "epoch": 1.100343811394892, + "grad_norm": 0.6345595717430115, + "learning_rate": 3.538377228876686e-06, + "loss": 0.5239, + "step": 7467 + }, + { + "epoch": 1.10049115913556, + "grad_norm": 0.6142773628234863, + "learning_rate": 3.538024542734333e-06, + "loss": 0.5526, + "step": 7468 + }, + { + "epoch": 1.1006385068762279, + "grad_norm": 0.577447235584259, + "learning_rate": 3.5376718316279763e-06, + "loss": 0.5413, + "step": 7469 + }, + { + "epoch": 1.1007858546168958, + "grad_norm": 0.607782781124115, + "learning_rate": 3.5373190955660987e-06, + "loss": 0.5343, + "step": 7470 + }, + { + "epoch": 1.1009332023575638, + "grad_norm": 0.574221670627594, + "learning_rate": 3.5369663345571836e-06, + "loss": 0.5265, + "step": 7471 + }, + { + "epoch": 1.101080550098232, + "grad_norm": 0.6247369647026062, + "learning_rate": 3.536613548609714e-06, + "loss": 0.5135, + "step": 7472 + }, + { + "epoch": 1.1012278978388998, + "grad_norm": 0.6139459013938904, + "learning_rate": 3.5362607377321754e-06, + "loss": 0.5281, + "step": 7473 + }, + { + "epoch": 1.1013752455795678, + "grad_norm": 0.6000802516937256, + "learning_rate": 3.5359079019330518e-06, + "loss": 0.5234, + "step": 7474 + }, + { + "epoch": 1.1015225933202357, + "grad_norm": 0.6343809366226196, + "learning_rate": 3.535555041220829e-06, + "loss": 0.5475, + "step": 7475 + }, + { + "epoch": 1.1016699410609037, + "grad_norm": 0.5704521536827087, + "learning_rate": 3.535202155603993e-06, + "loss": 0.5366, + "step": 7476 + }, + { + "epoch": 1.1018172888015716, + "grad_norm": 0.5712850093841553, + "learning_rate": 3.534849245091031e-06, + "loss": 0.5268, + "step": 7477 + }, + { + "epoch": 1.1019646365422398, + "grad_norm": 0.5959328413009644, + "learning_rate": 3.5344963096904295e-06, + "loss": 0.5369, + "step": 7478 + }, + { + "epoch": 1.1021119842829077, + "grad_norm": 0.5776106119155884, + "learning_rate": 3.5341433494106773e-06, + "loss": 0.5342, + "step": 7479 + }, + { + "epoch": 1.1022593320235756, + "grad_norm": 0.5710034370422363, + "learning_rate": 3.533790364260263e-06, + "loss": 0.5272, + "step": 7480 + }, + { + "epoch": 1.1024066797642436, + "grad_norm": 0.5841561555862427, + "learning_rate": 3.533437354247675e-06, + "loss": 0.5206, + "step": 7481 + }, + { + "epoch": 1.1025540275049115, + "grad_norm": 0.605259358882904, + "learning_rate": 3.5330843193814033e-06, + "loss": 0.5561, + "step": 7482 + }, + { + "epoch": 1.1027013752455797, + "grad_norm": 0.5629340410232544, + "learning_rate": 3.5327312596699383e-06, + "loss": 0.5549, + "step": 7483 + }, + { + "epoch": 1.1028487229862476, + "grad_norm": 0.6231169700622559, + "learning_rate": 3.532378175121771e-06, + "loss": 0.5363, + "step": 7484 + }, + { + "epoch": 1.1029960707269155, + "grad_norm": 0.5864153504371643, + "learning_rate": 3.5320250657453924e-06, + "loss": 0.5518, + "step": 7485 + }, + { + "epoch": 1.1031434184675835, + "grad_norm": 0.5959156155586243, + "learning_rate": 3.5316719315492953e-06, + "loss": 0.5497, + "step": 7486 + }, + { + "epoch": 1.1032907662082514, + "grad_norm": 0.5803437232971191, + "learning_rate": 3.531318772541973e-06, + "loss": 0.5192, + "step": 7487 + }, + { + "epoch": 1.1034381139489196, + "grad_norm": 0.5744080543518066, + "learning_rate": 3.530965588731917e-06, + "loss": 0.5633, + "step": 7488 + }, + { + "epoch": 1.1035854616895875, + "grad_norm": 0.6040222644805908, + "learning_rate": 3.5306123801276226e-06, + "loss": 0.5314, + "step": 7489 + }, + { + "epoch": 1.1037328094302554, + "grad_norm": 0.6022178530693054, + "learning_rate": 3.5302591467375837e-06, + "loss": 0.5228, + "step": 7490 + }, + { + "epoch": 1.1038801571709234, + "grad_norm": 0.585433840751648, + "learning_rate": 3.5299058885702946e-06, + "loss": 0.5645, + "step": 7491 + }, + { + "epoch": 1.1040275049115913, + "grad_norm": 0.5657128691673279, + "learning_rate": 3.5295526056342527e-06, + "loss": 0.5259, + "step": 7492 + }, + { + "epoch": 1.1041748526522592, + "grad_norm": 0.6182159185409546, + "learning_rate": 3.5291992979379534e-06, + "loss": 0.5447, + "step": 7493 + }, + { + "epoch": 1.1043222003929274, + "grad_norm": 0.5673832297325134, + "learning_rate": 3.5288459654898944e-06, + "loss": 0.5406, + "step": 7494 + }, + { + "epoch": 1.1044695481335953, + "grad_norm": 0.63082355260849, + "learning_rate": 3.528492608298571e-06, + "loss": 0.5503, + "step": 7495 + }, + { + "epoch": 1.1046168958742633, + "grad_norm": 0.6346955895423889, + "learning_rate": 3.5281392263724835e-06, + "loss": 0.5585, + "step": 7496 + }, + { + "epoch": 1.1047642436149312, + "grad_norm": 0.6236678957939148, + "learning_rate": 3.5277858197201303e-06, + "loss": 0.5426, + "step": 7497 + }, + { + "epoch": 1.1049115913555991, + "grad_norm": 0.5741454362869263, + "learning_rate": 3.5274323883500093e-06, + "loss": 0.5457, + "step": 7498 + }, + { + "epoch": 1.1050589390962673, + "grad_norm": 0.5950601100921631, + "learning_rate": 3.5270789322706223e-06, + "loss": 0.5258, + "step": 7499 + }, + { + "epoch": 1.1052062868369352, + "grad_norm": 0.5652013421058655, + "learning_rate": 3.5267254514904682e-06, + "loss": 0.5247, + "step": 7500 + }, + { + "epoch": 1.1053536345776032, + "grad_norm": 0.6083952784538269, + "learning_rate": 3.5263719460180485e-06, + "loss": 0.5445, + "step": 7501 + }, + { + "epoch": 1.105500982318271, + "grad_norm": 0.5850499272346497, + "learning_rate": 3.5260184158618647e-06, + "loss": 0.5536, + "step": 7502 + }, + { + "epoch": 1.105648330058939, + "grad_norm": 0.6151867508888245, + "learning_rate": 3.5256648610304202e-06, + "loss": 0.5432, + "step": 7503 + }, + { + "epoch": 1.1057956777996072, + "grad_norm": 0.5703746676445007, + "learning_rate": 3.525311281532216e-06, + "loss": 0.5443, + "step": 7504 + }, + { + "epoch": 1.1059430255402751, + "grad_norm": 0.5952315926551819, + "learning_rate": 3.5249576773757568e-06, + "loss": 0.4984, + "step": 7505 + }, + { + "epoch": 1.106090373280943, + "grad_norm": 0.5911837220191956, + "learning_rate": 3.5246040485695453e-06, + "loss": 0.5822, + "step": 7506 + }, + { + "epoch": 1.106237721021611, + "grad_norm": 0.6047247648239136, + "learning_rate": 3.524250395122088e-06, + "loss": 0.5311, + "step": 7507 + }, + { + "epoch": 1.106385068762279, + "grad_norm": 0.5956122875213623, + "learning_rate": 3.5238967170418884e-06, + "loss": 0.5352, + "step": 7508 + }, + { + "epoch": 1.1065324165029469, + "grad_norm": 0.5999953150749207, + "learning_rate": 3.523543014337454e-06, + "loss": 0.5365, + "step": 7509 + }, + { + "epoch": 1.106679764243615, + "grad_norm": 0.5698752999305725, + "learning_rate": 3.523189287017289e-06, + "loss": 0.5431, + "step": 7510 + }, + { + "epoch": 1.106827111984283, + "grad_norm": 0.5996482372283936, + "learning_rate": 3.522835535089903e-06, + "loss": 0.5553, + "step": 7511 + }, + { + "epoch": 1.106974459724951, + "grad_norm": 0.5936939716339111, + "learning_rate": 3.522481758563801e-06, + "loss": 0.5446, + "step": 7512 + }, + { + "epoch": 1.1071218074656188, + "grad_norm": 0.6051626205444336, + "learning_rate": 3.522127957447492e-06, + "loss": 0.5485, + "step": 7513 + }, + { + "epoch": 1.1072691552062868, + "grad_norm": 0.63742595911026, + "learning_rate": 3.5217741317494854e-06, + "loss": 0.5054, + "step": 7514 + }, + { + "epoch": 1.107416502946955, + "grad_norm": 0.5888234972953796, + "learning_rate": 3.5214202814782903e-06, + "loss": 0.5581, + "step": 7515 + }, + { + "epoch": 1.1075638506876229, + "grad_norm": 0.593828558921814, + "learning_rate": 3.5210664066424164e-06, + "loss": 0.5314, + "step": 7516 + }, + { + "epoch": 1.1077111984282908, + "grad_norm": 0.6062507629394531, + "learning_rate": 3.5207125072503746e-06, + "loss": 0.5418, + "step": 7517 + }, + { + "epoch": 1.1078585461689587, + "grad_norm": 0.6113807559013367, + "learning_rate": 3.5203585833106756e-06, + "loss": 0.5665, + "step": 7518 + }, + { + "epoch": 1.1080058939096267, + "grad_norm": 0.5959169864654541, + "learning_rate": 3.520004634831831e-06, + "loss": 0.5293, + "step": 7519 + }, + { + "epoch": 1.1081532416502946, + "grad_norm": 0.613614022731781, + "learning_rate": 3.519650661822354e-06, + "loss": 0.4895, + "step": 7520 + }, + { + "epoch": 1.1083005893909628, + "grad_norm": 0.6002504825592041, + "learning_rate": 3.519296664290756e-06, + "loss": 0.5323, + "step": 7521 + }, + { + "epoch": 1.1084479371316307, + "grad_norm": 0.6148261427879333, + "learning_rate": 3.5189426422455512e-06, + "loss": 0.5804, + "step": 7522 + }, + { + "epoch": 1.1085952848722986, + "grad_norm": 0.6006304025650024, + "learning_rate": 3.5185885956952547e-06, + "loss": 0.544, + "step": 7523 + }, + { + "epoch": 1.1087426326129666, + "grad_norm": 0.610553503036499, + "learning_rate": 3.5182345246483795e-06, + "loss": 0.5383, + "step": 7524 + }, + { + "epoch": 1.1088899803536345, + "grad_norm": 0.5930124521255493, + "learning_rate": 3.517880429113442e-06, + "loss": 0.5234, + "step": 7525 + }, + { + "epoch": 1.1090373280943027, + "grad_norm": 0.6078627705574036, + "learning_rate": 3.5175263090989574e-06, + "loss": 0.4925, + "step": 7526 + }, + { + "epoch": 1.1091846758349706, + "grad_norm": 0.6255064606666565, + "learning_rate": 3.5171721646134427e-06, + "loss": 0.5501, + "step": 7527 + }, + { + "epoch": 1.1093320235756385, + "grad_norm": 0.6180280447006226, + "learning_rate": 3.5168179956654135e-06, + "loss": 0.523, + "step": 7528 + }, + { + "epoch": 1.1094793713163065, + "grad_norm": 0.6244838237762451, + "learning_rate": 3.5164638022633895e-06, + "loss": 0.5484, + "step": 7529 + }, + { + "epoch": 1.1096267190569744, + "grad_norm": 0.5925837159156799, + "learning_rate": 3.516109584415887e-06, + "loss": 0.5419, + "step": 7530 + }, + { + "epoch": 1.1097740667976423, + "grad_norm": 0.5975171327590942, + "learning_rate": 3.515755342131426e-06, + "loss": 0.5265, + "step": 7531 + }, + { + "epoch": 1.1099214145383105, + "grad_norm": 0.6559366583824158, + "learning_rate": 3.5154010754185258e-06, + "loss": 0.54, + "step": 7532 + }, + { + "epoch": 1.1100687622789784, + "grad_norm": 0.5877535343170166, + "learning_rate": 3.5150467842857056e-06, + "loss": 0.5337, + "step": 7533 + }, + { + "epoch": 1.1102161100196464, + "grad_norm": 0.6090061068534851, + "learning_rate": 3.5146924687414865e-06, + "loss": 0.5356, + "step": 7534 + }, + { + "epoch": 1.1103634577603143, + "grad_norm": 0.5793532729148865, + "learning_rate": 3.5143381287943888e-06, + "loss": 0.5568, + "step": 7535 + }, + { + "epoch": 1.1105108055009822, + "grad_norm": 0.5941581726074219, + "learning_rate": 3.513983764452935e-06, + "loss": 0.5316, + "step": 7536 + }, + { + "epoch": 1.1106581532416504, + "grad_norm": 0.5862442255020142, + "learning_rate": 3.5136293757256477e-06, + "loss": 0.5416, + "step": 7537 + }, + { + "epoch": 1.1108055009823183, + "grad_norm": 0.5973157286643982, + "learning_rate": 3.5132749626210495e-06, + "loss": 0.5288, + "step": 7538 + }, + { + "epoch": 1.1109528487229863, + "grad_norm": 0.5799854397773743, + "learning_rate": 3.5129205251476633e-06, + "loss": 0.5302, + "step": 7539 + }, + { + "epoch": 1.1111001964636542, + "grad_norm": 0.7156983017921448, + "learning_rate": 3.5125660633140136e-06, + "loss": 0.5236, + "step": 7540 + }, + { + "epoch": 1.1112475442043221, + "grad_norm": 0.5739001035690308, + "learning_rate": 3.5122115771286253e-06, + "loss": 0.5332, + "step": 7541 + }, + { + "epoch": 1.11139489194499, + "grad_norm": 0.5982167720794678, + "learning_rate": 3.511857066600023e-06, + "loss": 0.5074, + "step": 7542 + }, + { + "epoch": 1.1115422396856582, + "grad_norm": 0.6175476312637329, + "learning_rate": 3.511502531736733e-06, + "loss": 0.5722, + "step": 7543 + }, + { + "epoch": 1.1116895874263262, + "grad_norm": 0.6006815433502197, + "learning_rate": 3.5111479725472812e-06, + "loss": 0.5169, + "step": 7544 + }, + { + "epoch": 1.1118369351669941, + "grad_norm": 0.5502755045890808, + "learning_rate": 3.510793389040195e-06, + "loss": 0.5651, + "step": 7545 + }, + { + "epoch": 1.111984282907662, + "grad_norm": 0.5757664442062378, + "learning_rate": 3.510438781224003e-06, + "loss": 0.5681, + "step": 7546 + }, + { + "epoch": 1.11213163064833, + "grad_norm": 0.5972004532814026, + "learning_rate": 3.5100841491072306e-06, + "loss": 0.5475, + "step": 7547 + }, + { + "epoch": 1.1122789783889981, + "grad_norm": 0.6062584519386292, + "learning_rate": 3.5097294926984086e-06, + "loss": 0.5308, + "step": 7548 + }, + { + "epoch": 1.112426326129666, + "grad_norm": 0.6399567127227783, + "learning_rate": 3.509374812006067e-06, + "loss": 0.5334, + "step": 7549 + }, + { + "epoch": 1.112573673870334, + "grad_norm": 0.5784479379653931, + "learning_rate": 3.509020107038733e-06, + "loss": 0.583, + "step": 7550 + }, + { + "epoch": 1.112721021611002, + "grad_norm": 0.5973838567733765, + "learning_rate": 3.508665377804939e-06, + "loss": 0.5502, + "step": 7551 + }, + { + "epoch": 1.1128683693516699, + "grad_norm": 0.5916189551353455, + "learning_rate": 3.5083106243132166e-06, + "loss": 0.5313, + "step": 7552 + }, + { + "epoch": 1.1130157170923378, + "grad_norm": 0.5786742568016052, + "learning_rate": 3.5079558465720965e-06, + "loss": 0.535, + "step": 7553 + }, + { + "epoch": 1.113163064833006, + "grad_norm": 0.6046843528747559, + "learning_rate": 3.507601044590111e-06, + "loss": 0.5539, + "step": 7554 + }, + { + "epoch": 1.113310412573674, + "grad_norm": 0.6431934833526611, + "learning_rate": 3.5072462183757927e-06, + "loss": 0.5213, + "step": 7555 + }, + { + "epoch": 1.1134577603143418, + "grad_norm": 0.5876781344413757, + "learning_rate": 3.5068913679376748e-06, + "loss": 0.5106, + "step": 7556 + }, + { + "epoch": 1.1136051080550098, + "grad_norm": 0.6387234926223755, + "learning_rate": 3.5065364932842926e-06, + "loss": 0.5497, + "step": 7557 + }, + { + "epoch": 1.1137524557956777, + "grad_norm": 0.6067906022071838, + "learning_rate": 3.5061815944241796e-06, + "loss": 0.5059, + "step": 7558 + }, + { + "epoch": 1.1138998035363459, + "grad_norm": 0.5738406181335449, + "learning_rate": 3.5058266713658706e-06, + "loss": 0.5188, + "step": 7559 + }, + { + "epoch": 1.1140471512770138, + "grad_norm": 0.5966986417770386, + "learning_rate": 3.5054717241179024e-06, + "loss": 0.5335, + "step": 7560 + }, + { + "epoch": 1.1141944990176817, + "grad_norm": 0.5904927849769592, + "learning_rate": 3.5051167526888113e-06, + "loss": 0.5524, + "step": 7561 + }, + { + "epoch": 1.1143418467583497, + "grad_norm": 0.5828259587287903, + "learning_rate": 3.504761757087133e-06, + "loss": 0.5447, + "step": 7562 + }, + { + "epoch": 1.1144891944990176, + "grad_norm": 0.5931050777435303, + "learning_rate": 3.5044067373214063e-06, + "loss": 0.535, + "step": 7563 + }, + { + "epoch": 1.1146365422396856, + "grad_norm": 0.5983620285987854, + "learning_rate": 3.5040516934001676e-06, + "loss": 0.5278, + "step": 7564 + }, + { + "epoch": 1.1147838899803537, + "grad_norm": 0.590279221534729, + "learning_rate": 3.5036966253319577e-06, + "loss": 0.5302, + "step": 7565 + }, + { + "epoch": 1.1149312377210217, + "grad_norm": 0.5917353630065918, + "learning_rate": 3.5033415331253145e-06, + "loss": 0.544, + "step": 7566 + }, + { + "epoch": 1.1150785854616896, + "grad_norm": 0.5788046717643738, + "learning_rate": 3.502986416788778e-06, + "loss": 0.5395, + "step": 7567 + }, + { + "epoch": 1.1152259332023575, + "grad_norm": 0.5935455560684204, + "learning_rate": 3.502631276330888e-06, + "loss": 0.5315, + "step": 7568 + }, + { + "epoch": 1.1153732809430255, + "grad_norm": 0.5792459845542908, + "learning_rate": 3.502276111760186e-06, + "loss": 0.5245, + "step": 7569 + }, + { + "epoch": 1.1155206286836936, + "grad_norm": 0.6023770570755005, + "learning_rate": 3.5019209230852146e-06, + "loss": 0.5202, + "step": 7570 + }, + { + "epoch": 1.1156679764243616, + "grad_norm": 0.5890241861343384, + "learning_rate": 3.5015657103145143e-06, + "loss": 0.5542, + "step": 7571 + }, + { + "epoch": 1.1158153241650295, + "grad_norm": 0.6042873859405518, + "learning_rate": 3.501210473456629e-06, + "loss": 0.539, + "step": 7572 + }, + { + "epoch": 1.1159626719056974, + "grad_norm": 0.6097132563591003, + "learning_rate": 3.5008552125201006e-06, + "loss": 0.5798, + "step": 7573 + }, + { + "epoch": 1.1161100196463654, + "grad_norm": 0.5940367579460144, + "learning_rate": 3.5004999275134744e-06, + "loss": 0.5358, + "step": 7574 + }, + { + "epoch": 1.1162573673870333, + "grad_norm": 0.6005069017410278, + "learning_rate": 3.5001446184452937e-06, + "loss": 0.5205, + "step": 7575 + }, + { + "epoch": 1.1164047151277015, + "grad_norm": 0.6596199870109558, + "learning_rate": 3.4997892853241044e-06, + "loss": 0.5316, + "step": 7576 + }, + { + "epoch": 1.1165520628683694, + "grad_norm": 0.5932339429855347, + "learning_rate": 3.4994339281584516e-06, + "loss": 0.5397, + "step": 7577 + }, + { + "epoch": 1.1166994106090373, + "grad_norm": 0.6028209924697876, + "learning_rate": 3.499078546956882e-06, + "loss": 0.5636, + "step": 7578 + }, + { + "epoch": 1.1168467583497053, + "grad_norm": 0.6036863923072815, + "learning_rate": 3.4987231417279414e-06, + "loss": 0.4971, + "step": 7579 + }, + { + "epoch": 1.1169941060903732, + "grad_norm": 0.5786066651344299, + "learning_rate": 3.4983677124801783e-06, + "loss": 0.5374, + "step": 7580 + }, + { + "epoch": 1.1171414538310414, + "grad_norm": 0.6354957222938538, + "learning_rate": 3.4980122592221394e-06, + "loss": 0.5563, + "step": 7581 + }, + { + "epoch": 1.1172888015717093, + "grad_norm": 0.5891615152359009, + "learning_rate": 3.497656781962374e-06, + "loss": 0.5538, + "step": 7582 + }, + { + "epoch": 1.1174361493123772, + "grad_norm": 0.6192718744277954, + "learning_rate": 3.4973012807094307e-06, + "loss": 0.5253, + "step": 7583 + }, + { + "epoch": 1.1175834970530452, + "grad_norm": 0.5811858177185059, + "learning_rate": 3.4969457554718596e-06, + "loss": 0.5337, + "step": 7584 + }, + { + "epoch": 1.117730844793713, + "grad_norm": 0.6013338565826416, + "learning_rate": 3.4965902062582103e-06, + "loss": 0.559, + "step": 7585 + }, + { + "epoch": 1.117878192534381, + "grad_norm": 0.5782637000083923, + "learning_rate": 3.496234633077034e-06, + "loss": 0.5383, + "step": 7586 + }, + { + "epoch": 1.1180255402750492, + "grad_norm": 0.5815911889076233, + "learning_rate": 3.495879035936882e-06, + "loss": 0.5457, + "step": 7587 + }, + { + "epoch": 1.1181728880157171, + "grad_norm": 0.5607064962387085, + "learning_rate": 3.4955234148463065e-06, + "loss": 0.5183, + "step": 7588 + }, + { + "epoch": 1.118320235756385, + "grad_norm": 0.5983226299285889, + "learning_rate": 3.4951677698138596e-06, + "loss": 0.5481, + "step": 7589 + }, + { + "epoch": 1.118467583497053, + "grad_norm": 0.595614492893219, + "learning_rate": 3.494812100848095e-06, + "loss": 0.5428, + "step": 7590 + }, + { + "epoch": 1.118614931237721, + "grad_norm": 0.5905253291130066, + "learning_rate": 3.494456407957566e-06, + "loss": 0.5128, + "step": 7591 + }, + { + "epoch": 1.118762278978389, + "grad_norm": 0.5749437212944031, + "learning_rate": 3.4941006911508263e-06, + "loss": 0.5204, + "step": 7592 + }, + { + "epoch": 1.118909626719057, + "grad_norm": 0.5596393346786499, + "learning_rate": 3.493744950436432e-06, + "loss": 0.5326, + "step": 7593 + }, + { + "epoch": 1.119056974459725, + "grad_norm": 0.6098390221595764, + "learning_rate": 3.493389185822937e-06, + "loss": 0.5413, + "step": 7594 + }, + { + "epoch": 1.119204322200393, + "grad_norm": 0.580016553401947, + "learning_rate": 3.493033397318898e-06, + "loss": 0.5483, + "step": 7595 + }, + { + "epoch": 1.1193516699410608, + "grad_norm": 0.6174038648605347, + "learning_rate": 3.492677584932872e-06, + "loss": 0.5422, + "step": 7596 + }, + { + "epoch": 1.1194990176817288, + "grad_norm": 0.5598392486572266, + "learning_rate": 3.492321748673416e-06, + "loss": 0.5221, + "step": 7597 + }, + { + "epoch": 1.119646365422397, + "grad_norm": 0.5867729783058167, + "learning_rate": 3.4919658885490863e-06, + "loss": 0.5235, + "step": 7598 + }, + { + "epoch": 1.1197937131630649, + "grad_norm": 0.5800284743309021, + "learning_rate": 3.491610004568443e-06, + "loss": 0.5432, + "step": 7599 + }, + { + "epoch": 1.1199410609037328, + "grad_norm": 0.5958842635154724, + "learning_rate": 3.4912540967400436e-06, + "loss": 0.523, + "step": 7600 + }, + { + "epoch": 1.1200884086444007, + "grad_norm": 0.5753934383392334, + "learning_rate": 3.4908981650724488e-06, + "loss": 0.5491, + "step": 7601 + }, + { + "epoch": 1.1202357563850687, + "grad_norm": 0.5753090381622314, + "learning_rate": 3.490542209574218e-06, + "loss": 0.5506, + "step": 7602 + }, + { + "epoch": 1.1203831041257368, + "grad_norm": 0.5959543585777283, + "learning_rate": 3.4901862302539115e-06, + "loss": 0.5496, + "step": 7603 + }, + { + "epoch": 1.1205304518664048, + "grad_norm": 0.5965360403060913, + "learning_rate": 3.4898302271200903e-06, + "loss": 0.5573, + "step": 7604 + }, + { + "epoch": 1.1206777996070727, + "grad_norm": 0.597707986831665, + "learning_rate": 3.4894742001813157e-06, + "loss": 0.5239, + "step": 7605 + }, + { + "epoch": 1.1208251473477406, + "grad_norm": 0.5931721925735474, + "learning_rate": 3.4891181494461516e-06, + "loss": 0.5407, + "step": 7606 + }, + { + "epoch": 1.1209724950884086, + "grad_norm": 0.5474986433982849, + "learning_rate": 3.48876207492316e-06, + "loss": 0.5235, + "step": 7607 + }, + { + "epoch": 1.1211198428290765, + "grad_norm": 0.5888906717300415, + "learning_rate": 3.488405976620904e-06, + "loss": 0.5634, + "step": 7608 + }, + { + "epoch": 1.1212671905697447, + "grad_norm": 0.6159812211990356, + "learning_rate": 3.4880498545479474e-06, + "loss": 0.5712, + "step": 7609 + }, + { + "epoch": 1.1214145383104126, + "grad_norm": 0.5828768014907837, + "learning_rate": 3.487693708712856e-06, + "loss": 0.529, + "step": 7610 + }, + { + "epoch": 1.1215618860510805, + "grad_norm": 0.5729447603225708, + "learning_rate": 3.4873375391241936e-06, + "loss": 0.5368, + "step": 7611 + }, + { + "epoch": 1.1217092337917485, + "grad_norm": 0.5746099352836609, + "learning_rate": 3.4869813457905268e-06, + "loss": 0.5023, + "step": 7612 + }, + { + "epoch": 1.1218565815324164, + "grad_norm": 0.5725248456001282, + "learning_rate": 3.4866251287204217e-06, + "loss": 0.5315, + "step": 7613 + }, + { + "epoch": 1.1220039292730846, + "grad_norm": 0.5958502292633057, + "learning_rate": 3.486268887922445e-06, + "loss": 0.5504, + "step": 7614 + }, + { + "epoch": 1.1221512770137525, + "grad_norm": 0.595846951007843, + "learning_rate": 3.485912623405163e-06, + "loss": 0.5276, + "step": 7615 + }, + { + "epoch": 1.1222986247544204, + "grad_norm": 0.564503014087677, + "learning_rate": 3.485556335177146e-06, + "loss": 0.5356, + "step": 7616 + }, + { + "epoch": 1.1224459724950884, + "grad_norm": 0.5758001208305359, + "learning_rate": 3.4852000232469612e-06, + "loss": 0.5472, + "step": 7617 + }, + { + "epoch": 1.1225933202357563, + "grad_norm": 0.6031704545021057, + "learning_rate": 3.4848436876231776e-06, + "loss": 0.515, + "step": 7618 + }, + { + "epoch": 1.1227406679764242, + "grad_norm": 0.595375120639801, + "learning_rate": 3.484487328314366e-06, + "loss": 0.5595, + "step": 7619 + }, + { + "epoch": 1.1228880157170924, + "grad_norm": 0.5481157898902893, + "learning_rate": 3.484130945329095e-06, + "loss": 0.5183, + "step": 7620 + }, + { + "epoch": 1.1230353634577603, + "grad_norm": 0.5961701273918152, + "learning_rate": 3.483774538675937e-06, + "loss": 0.5461, + "step": 7621 + }, + { + "epoch": 1.1231827111984283, + "grad_norm": 0.6170349717140198, + "learning_rate": 3.4834181083634627e-06, + "loss": 0.544, + "step": 7622 + }, + { + "epoch": 1.1233300589390962, + "grad_norm": 0.5962574481964111, + "learning_rate": 3.483061654400244e-06, + "loss": 0.5714, + "step": 7623 + }, + { + "epoch": 1.1234774066797641, + "grad_norm": 0.5932413339614868, + "learning_rate": 3.4827051767948534e-06, + "loss": 0.5682, + "step": 7624 + }, + { + "epoch": 1.1236247544204323, + "grad_norm": 0.5888411402702332, + "learning_rate": 3.4823486755558643e-06, + "loss": 0.5434, + "step": 7625 + }, + { + "epoch": 1.1237721021611002, + "grad_norm": 0.577101469039917, + "learning_rate": 3.48199215069185e-06, + "loss": 0.5229, + "step": 7626 + }, + { + "epoch": 1.1239194499017682, + "grad_norm": 0.5813589692115784, + "learning_rate": 3.4816356022113856e-06, + "loss": 0.5074, + "step": 7627 + }, + { + "epoch": 1.124066797642436, + "grad_norm": 0.5751444697380066, + "learning_rate": 3.4812790301230455e-06, + "loss": 0.5514, + "step": 7628 + }, + { + "epoch": 1.124214145383104, + "grad_norm": 0.6197606921195984, + "learning_rate": 3.4809224344354044e-06, + "loss": 0.571, + "step": 7629 + }, + { + "epoch": 1.1243614931237722, + "grad_norm": 0.604032576084137, + "learning_rate": 3.4805658151570394e-06, + "loss": 0.497, + "step": 7630 + }, + { + "epoch": 1.1245088408644401, + "grad_norm": 0.6179419755935669, + "learning_rate": 3.480209172296526e-06, + "loss": 0.5479, + "step": 7631 + }, + { + "epoch": 1.124656188605108, + "grad_norm": 0.589608907699585, + "learning_rate": 3.479852505862442e-06, + "loss": 0.5466, + "step": 7632 + }, + { + "epoch": 1.124803536345776, + "grad_norm": 0.5920473337173462, + "learning_rate": 3.4794958158633647e-06, + "loss": 0.5124, + "step": 7633 + }, + { + "epoch": 1.124950884086444, + "grad_norm": 0.5959464311599731, + "learning_rate": 3.4791391023078724e-06, + "loss": 0.5431, + "step": 7634 + }, + { + "epoch": 1.125098231827112, + "grad_norm": 0.627038300037384, + "learning_rate": 3.478782365204544e-06, + "loss": 0.534, + "step": 7635 + }, + { + "epoch": 1.12524557956778, + "grad_norm": 0.5624539256095886, + "learning_rate": 3.478425604561959e-06, + "loss": 0.5186, + "step": 7636 + }, + { + "epoch": 1.125392927308448, + "grad_norm": 0.6421216130256653, + "learning_rate": 3.478068820388697e-06, + "loss": 0.5416, + "step": 7637 + }, + { + "epoch": 1.125540275049116, + "grad_norm": 0.5515952706336975, + "learning_rate": 3.4777120126933385e-06, + "loss": 0.5399, + "step": 7638 + }, + { + "epoch": 1.1256876227897838, + "grad_norm": 0.6261618733406067, + "learning_rate": 3.4773551814844646e-06, + "loss": 0.5016, + "step": 7639 + }, + { + "epoch": 1.1258349705304518, + "grad_norm": 0.5890259146690369, + "learning_rate": 3.476998326770657e-06, + "loss": 0.5601, + "step": 7640 + }, + { + "epoch": 1.1259823182711197, + "grad_norm": 0.5820964574813843, + "learning_rate": 3.476641448560498e-06, + "loss": 0.4963, + "step": 7641 + }, + { + "epoch": 1.1261296660117879, + "grad_norm": 0.6003605723381042, + "learning_rate": 3.4762845468625694e-06, + "loss": 0.5159, + "step": 7642 + }, + { + "epoch": 1.1262770137524558, + "grad_norm": 0.5620367527008057, + "learning_rate": 3.4759276216854565e-06, + "loss": 0.52, + "step": 7643 + }, + { + "epoch": 1.1264243614931237, + "grad_norm": 0.6018159985542297, + "learning_rate": 3.475570673037742e-06, + "loss": 0.5391, + "step": 7644 + }, + { + "epoch": 1.1265717092337917, + "grad_norm": 0.5703460574150085, + "learning_rate": 3.47521370092801e-06, + "loss": 0.543, + "step": 7645 + }, + { + "epoch": 1.1267190569744598, + "grad_norm": 0.5868237018585205, + "learning_rate": 3.4748567053648462e-06, + "loss": 0.5478, + "step": 7646 + }, + { + "epoch": 1.1268664047151278, + "grad_norm": 0.6071873307228088, + "learning_rate": 3.4744996863568358e-06, + "loss": 0.5278, + "step": 7647 + }, + { + "epoch": 1.1270137524557957, + "grad_norm": 0.5778756737709045, + "learning_rate": 3.4741426439125646e-06, + "loss": 0.5273, + "step": 7648 + }, + { + "epoch": 1.1271611001964637, + "grad_norm": 0.5674449801445007, + "learning_rate": 3.47378557804062e-06, + "loss": 0.5192, + "step": 7649 + }, + { + "epoch": 1.1273084479371316, + "grad_norm": 0.6464263796806335, + "learning_rate": 3.473428488749589e-06, + "loss": 0.5633, + "step": 7650 + }, + { + "epoch": 1.1274557956777995, + "grad_norm": 0.5835638642311096, + "learning_rate": 3.4730713760480596e-06, + "loss": 0.5103, + "step": 7651 + }, + { + "epoch": 1.1276031434184677, + "grad_norm": 0.6146182417869568, + "learning_rate": 3.4727142399446195e-06, + "loss": 0.5251, + "step": 7652 + }, + { + "epoch": 1.1277504911591356, + "grad_norm": 0.6061192154884338, + "learning_rate": 3.4723570804478587e-06, + "loss": 0.5138, + "step": 7653 + }, + { + "epoch": 1.1278978388998036, + "grad_norm": 0.6027714610099792, + "learning_rate": 3.471999897566366e-06, + "loss": 0.504, + "step": 7654 + }, + { + "epoch": 1.1280451866404715, + "grad_norm": 0.5838251113891602, + "learning_rate": 3.4716426913087315e-06, + "loss": 0.5179, + "step": 7655 + }, + { + "epoch": 1.1281925343811394, + "grad_norm": 0.5837629437446594, + "learning_rate": 3.4712854616835456e-06, + "loss": 0.5567, + "step": 7656 + }, + { + "epoch": 1.1283398821218076, + "grad_norm": 0.6124322414398193, + "learning_rate": 3.4709282086994005e-06, + "loss": 0.5384, + "step": 7657 + }, + { + "epoch": 1.1284872298624755, + "grad_norm": 0.5930674076080322, + "learning_rate": 3.4705709323648873e-06, + "loss": 0.5412, + "step": 7658 + }, + { + "epoch": 1.1286345776031435, + "grad_norm": 0.5969535708427429, + "learning_rate": 3.470213632688598e-06, + "loss": 0.55, + "step": 7659 + }, + { + "epoch": 1.1287819253438114, + "grad_norm": 0.5837027430534363, + "learning_rate": 3.469856309679126e-06, + "loss": 0.52, + "step": 7660 + }, + { + "epoch": 1.1289292730844793, + "grad_norm": 0.6205540895462036, + "learning_rate": 3.469498963345065e-06, + "loss": 0.5263, + "step": 7661 + }, + { + "epoch": 1.1290766208251473, + "grad_norm": 0.6251105666160583, + "learning_rate": 3.4691415936950083e-06, + "loss": 0.5697, + "step": 7662 + }, + { + "epoch": 1.1292239685658154, + "grad_norm": 0.6076270341873169, + "learning_rate": 3.468784200737552e-06, + "loss": 0.559, + "step": 7663 + }, + { + "epoch": 1.1293713163064834, + "grad_norm": 0.6064066886901855, + "learning_rate": 3.4684267844812886e-06, + "loss": 0.5188, + "step": 7664 + }, + { + "epoch": 1.1295186640471513, + "grad_norm": 0.6202638745307922, + "learning_rate": 3.4680693449348157e-06, + "loss": 0.5399, + "step": 7665 + }, + { + "epoch": 1.1296660117878192, + "grad_norm": 0.5938944220542908, + "learning_rate": 3.467711882106729e-06, + "loss": 0.5456, + "step": 7666 + }, + { + "epoch": 1.1298133595284872, + "grad_norm": 0.6181368827819824, + "learning_rate": 3.4673543960056254e-06, + "loss": 0.5212, + "step": 7667 + }, + { + "epoch": 1.1299607072691553, + "grad_norm": 0.6168695688247681, + "learning_rate": 3.466996886640103e-06, + "loss": 0.5601, + "step": 7668 + }, + { + "epoch": 1.1301080550098233, + "grad_norm": 0.6210386157035828, + "learning_rate": 3.4666393540187586e-06, + "loss": 0.4913, + "step": 7669 + }, + { + "epoch": 1.1302554027504912, + "grad_norm": 0.5717772245407104, + "learning_rate": 3.4662817981501912e-06, + "loss": 0.5314, + "step": 7670 + }, + { + "epoch": 1.1304027504911591, + "grad_norm": 0.570131242275238, + "learning_rate": 3.4659242190429996e-06, + "loss": 0.5328, + "step": 7671 + }, + { + "epoch": 1.130550098231827, + "grad_norm": 0.6372429728507996, + "learning_rate": 3.4655666167057834e-06, + "loss": 0.532, + "step": 7672 + }, + { + "epoch": 1.130697445972495, + "grad_norm": 0.5895729064941406, + "learning_rate": 3.465208991147143e-06, + "loss": 0.5433, + "step": 7673 + }, + { + "epoch": 1.1308447937131632, + "grad_norm": 0.5833737850189209, + "learning_rate": 3.46485134237568e-06, + "loss": 0.5367, + "step": 7674 + }, + { + "epoch": 1.130992141453831, + "grad_norm": 0.5964848399162292, + "learning_rate": 3.4644936703999933e-06, + "loss": 0.5376, + "step": 7675 + }, + { + "epoch": 1.131139489194499, + "grad_norm": 0.6188317537307739, + "learning_rate": 3.464135975228687e-06, + "loss": 0.5634, + "step": 7676 + }, + { + "epoch": 1.131286836935167, + "grad_norm": 0.5708358883857727, + "learning_rate": 3.4637782568703624e-06, + "loss": 0.5504, + "step": 7677 + }, + { + "epoch": 1.131434184675835, + "grad_norm": 0.6205865740776062, + "learning_rate": 3.463420515333623e-06, + "loss": 0.5236, + "step": 7678 + }, + { + "epoch": 1.131581532416503, + "grad_norm": 0.5842613577842712, + "learning_rate": 3.463062750627072e-06, + "loss": 0.5188, + "step": 7679 + }, + { + "epoch": 1.131728880157171, + "grad_norm": 0.6200153827667236, + "learning_rate": 3.4627049627593133e-06, + "loss": 0.5493, + "step": 7680 + }, + { + "epoch": 1.131876227897839, + "grad_norm": 0.6078716516494751, + "learning_rate": 3.462347151738952e-06, + "loss": 0.5669, + "step": 7681 + }, + { + "epoch": 1.1320235756385069, + "grad_norm": 0.6352805495262146, + "learning_rate": 3.4619893175745934e-06, + "loss": 0.537, + "step": 7682 + }, + { + "epoch": 1.1321709233791748, + "grad_norm": 0.6207178235054016, + "learning_rate": 3.4616314602748425e-06, + "loss": 0.5199, + "step": 7683 + }, + { + "epoch": 1.1323182711198427, + "grad_norm": 0.5726854205131531, + "learning_rate": 3.4612735798483062e-06, + "loss": 0.5212, + "step": 7684 + }, + { + "epoch": 1.132465618860511, + "grad_norm": 0.5891842246055603, + "learning_rate": 3.4609156763035905e-06, + "loss": 0.5394, + "step": 7685 + }, + { + "epoch": 1.1326129666011788, + "grad_norm": 0.5789345502853394, + "learning_rate": 3.4605577496493044e-06, + "loss": 0.5455, + "step": 7686 + }, + { + "epoch": 1.1327603143418468, + "grad_norm": 0.5927568674087524, + "learning_rate": 3.4601997998940544e-06, + "loss": 0.515, + "step": 7687 + }, + { + "epoch": 1.1329076620825147, + "grad_norm": 0.6072763800621033, + "learning_rate": 3.45984182704645e-06, + "loss": 0.513, + "step": 7688 + }, + { + "epoch": 1.1330550098231826, + "grad_norm": 0.5992094278335571, + "learning_rate": 3.4594838311150995e-06, + "loss": 0.5218, + "step": 7689 + }, + { + "epoch": 1.1332023575638508, + "grad_norm": 0.5981463193893433, + "learning_rate": 3.459125812108613e-06, + "loss": 0.5839, + "step": 7690 + }, + { + "epoch": 1.1333497053045187, + "grad_norm": 0.6069618463516235, + "learning_rate": 3.458767770035601e-06, + "loss": 0.5454, + "step": 7691 + }, + { + "epoch": 1.1334970530451867, + "grad_norm": 0.6038497686386108, + "learning_rate": 3.4584097049046734e-06, + "loss": 0.5481, + "step": 7692 + }, + { + "epoch": 1.1336444007858546, + "grad_norm": 0.5831706523895264, + "learning_rate": 3.458051616724442e-06, + "loss": 0.5458, + "step": 7693 + }, + { + "epoch": 1.1337917485265225, + "grad_norm": 0.584854781627655, + "learning_rate": 3.4576935055035187e-06, + "loss": 0.5575, + "step": 7694 + }, + { + "epoch": 1.1339390962671905, + "grad_norm": 0.6109826564788818, + "learning_rate": 3.457335371250516e-06, + "loss": 0.5515, + "step": 7695 + }, + { + "epoch": 1.1340864440078586, + "grad_norm": 0.6104639768600464, + "learning_rate": 3.456977213974046e-06, + "loss": 0.5252, + "step": 7696 + }, + { + "epoch": 1.1342337917485266, + "grad_norm": 0.6036420464515686, + "learning_rate": 3.4566190336827225e-06, + "loss": 0.5212, + "step": 7697 + }, + { + "epoch": 1.1343811394891945, + "grad_norm": 0.5899425745010376, + "learning_rate": 3.456260830385161e-06, + "loss": 0.5403, + "step": 7698 + }, + { + "epoch": 1.1345284872298624, + "grad_norm": 0.5999977588653564, + "learning_rate": 3.455902604089974e-06, + "loss": 0.5502, + "step": 7699 + }, + { + "epoch": 1.1346758349705304, + "grad_norm": 0.6189834475517273, + "learning_rate": 3.455544354805778e-06, + "loss": 0.5039, + "step": 7700 + }, + { + "epoch": 1.1348231827111985, + "grad_norm": 0.5942191481590271, + "learning_rate": 3.4551860825411893e-06, + "loss": 0.5823, + "step": 7701 + }, + { + "epoch": 1.1349705304518665, + "grad_norm": 0.5716210603713989, + "learning_rate": 3.4548277873048226e-06, + "loss": 0.5453, + "step": 7702 + }, + { + "epoch": 1.1351178781925344, + "grad_norm": 0.6369682550430298, + "learning_rate": 3.4544694691052953e-06, + "loss": 0.5189, + "step": 7703 + }, + { + "epoch": 1.1352652259332023, + "grad_norm": 0.5863329768180847, + "learning_rate": 3.4541111279512255e-06, + "loss": 0.544, + "step": 7704 + }, + { + "epoch": 1.1354125736738703, + "grad_norm": 0.5886713266372681, + "learning_rate": 3.45375276385123e-06, + "loss": 0.5613, + "step": 7705 + }, + { + "epoch": 1.1355599214145382, + "grad_norm": 0.6425067186355591, + "learning_rate": 3.453394376813929e-06, + "loss": 0.526, + "step": 7706 + }, + { + "epoch": 1.1357072691552064, + "grad_norm": 0.6153994202613831, + "learning_rate": 3.453035966847939e-06, + "loss": 0.5291, + "step": 7707 + }, + { + "epoch": 1.1358546168958743, + "grad_norm": 0.5711575150489807, + "learning_rate": 3.452677533961882e-06, + "loss": 0.5426, + "step": 7708 + }, + { + "epoch": 1.1360019646365422, + "grad_norm": 0.5583837628364563, + "learning_rate": 3.452319078164377e-06, + "loss": 0.5214, + "step": 7709 + }, + { + "epoch": 1.1361493123772102, + "grad_norm": 0.6616848111152649, + "learning_rate": 3.4519605994640437e-06, + "loss": 0.5343, + "step": 7710 + }, + { + "epoch": 1.136296660117878, + "grad_norm": 0.5959407687187195, + "learning_rate": 3.451602097869506e-06, + "loss": 0.5208, + "step": 7711 + }, + { + "epoch": 1.1364440078585463, + "grad_norm": 0.5570586323738098, + "learning_rate": 3.4512435733893832e-06, + "loss": 0.5347, + "step": 7712 + }, + { + "epoch": 1.1365913555992142, + "grad_norm": 0.5764986276626587, + "learning_rate": 3.450885026032299e-06, + "loss": 0.5267, + "step": 7713 + }, + { + "epoch": 1.1367387033398821, + "grad_norm": 0.5800336599349976, + "learning_rate": 3.4505264558068757e-06, + "loss": 0.5497, + "step": 7714 + }, + { + "epoch": 1.13688605108055, + "grad_norm": 0.5847805738449097, + "learning_rate": 3.4501678627217374e-06, + "loss": 0.5033, + "step": 7715 + }, + { + "epoch": 1.137033398821218, + "grad_norm": 0.6139994263648987, + "learning_rate": 3.4498092467855073e-06, + "loss": 0.5626, + "step": 7716 + }, + { + "epoch": 1.137180746561886, + "grad_norm": 0.6255199313163757, + "learning_rate": 3.4494506080068103e-06, + "loss": 0.5415, + "step": 7717 + }, + { + "epoch": 1.137328094302554, + "grad_norm": 0.5983022451400757, + "learning_rate": 3.4490919463942718e-06, + "loss": 0.5569, + "step": 7718 + }, + { + "epoch": 1.137475442043222, + "grad_norm": 0.5897777080535889, + "learning_rate": 3.4487332619565166e-06, + "loss": 0.5248, + "step": 7719 + }, + { + "epoch": 1.13762278978389, + "grad_norm": 0.601180374622345, + "learning_rate": 3.448374554702172e-06, + "loss": 0.5184, + "step": 7720 + }, + { + "epoch": 1.137770137524558, + "grad_norm": 0.6122506260871887, + "learning_rate": 3.4480158246398636e-06, + "loss": 0.5546, + "step": 7721 + }, + { + "epoch": 1.1379174852652258, + "grad_norm": 0.6150729656219482, + "learning_rate": 3.44765707177822e-06, + "loss": 0.5385, + "step": 7722 + }, + { + "epoch": 1.138064833005894, + "grad_norm": 0.5935835838317871, + "learning_rate": 3.4472982961258676e-06, + "loss": 0.5334, + "step": 7723 + }, + { + "epoch": 1.138212180746562, + "grad_norm": 0.574142575263977, + "learning_rate": 3.446939497691436e-06, + "loss": 0.5588, + "step": 7724 + }, + { + "epoch": 1.1383595284872299, + "grad_norm": 0.5808748006820679, + "learning_rate": 3.446580676483553e-06, + "loss": 0.5469, + "step": 7725 + }, + { + "epoch": 1.1385068762278978, + "grad_norm": 0.5711080431938171, + "learning_rate": 3.446221832510849e-06, + "loss": 0.5385, + "step": 7726 + }, + { + "epoch": 1.1386542239685657, + "grad_norm": 0.5702672600746155, + "learning_rate": 3.4458629657819543e-06, + "loss": 0.5474, + "step": 7727 + }, + { + "epoch": 1.1388015717092337, + "grad_norm": 0.6158030033111572, + "learning_rate": 3.445504076305498e-06, + "loss": 0.5128, + "step": 7728 + }, + { + "epoch": 1.1389489194499018, + "grad_norm": 0.5769596099853516, + "learning_rate": 3.445145164090113e-06, + "loss": 0.5447, + "step": 7729 + }, + { + "epoch": 1.1390962671905698, + "grad_norm": 0.6139371991157532, + "learning_rate": 3.4447862291444297e-06, + "loss": 0.5218, + "step": 7730 + }, + { + "epoch": 1.1392436149312377, + "grad_norm": 0.5835952162742615, + "learning_rate": 3.4444272714770807e-06, + "loss": 0.5514, + "step": 7731 + }, + { + "epoch": 1.1393909626719056, + "grad_norm": 0.5852099657058716, + "learning_rate": 3.444068291096698e-06, + "loss": 0.5357, + "step": 7732 + }, + { + "epoch": 1.1395383104125736, + "grad_norm": 0.5869231820106506, + "learning_rate": 3.4437092880119173e-06, + "loss": 0.5382, + "step": 7733 + }, + { + "epoch": 1.1396856581532417, + "grad_norm": 0.584660530090332, + "learning_rate": 3.4433502622313693e-06, + "loss": 0.4941, + "step": 7734 + }, + { + "epoch": 1.1398330058939097, + "grad_norm": 0.6020726561546326, + "learning_rate": 3.4429912137636914e-06, + "loss": 0.5191, + "step": 7735 + }, + { + "epoch": 1.1399803536345776, + "grad_norm": 0.6016764640808105, + "learning_rate": 3.4426321426175165e-06, + "loss": 0.5369, + "step": 7736 + }, + { + "epoch": 1.1401277013752456, + "grad_norm": 0.5994013547897339, + "learning_rate": 3.44227304880148e-06, + "loss": 0.5044, + "step": 7737 + }, + { + "epoch": 1.1402750491159135, + "grad_norm": 0.6139448881149292, + "learning_rate": 3.4419139323242194e-06, + "loss": 0.5063, + "step": 7738 + }, + { + "epoch": 1.1404223968565814, + "grad_norm": 0.5390638113021851, + "learning_rate": 3.4415547931943704e-06, + "loss": 0.5375, + "step": 7739 + }, + { + "epoch": 1.1405697445972496, + "grad_norm": 0.6057170033454895, + "learning_rate": 3.44119563142057e-06, + "loss": 0.5508, + "step": 7740 + }, + { + "epoch": 1.1407170923379175, + "grad_norm": 0.6177294850349426, + "learning_rate": 3.4408364470114565e-06, + "loss": 0.5252, + "step": 7741 + }, + { + "epoch": 1.1408644400785855, + "grad_norm": 0.5702371001243591, + "learning_rate": 3.440477239975667e-06, + "loss": 0.5373, + "step": 7742 + }, + { + "epoch": 1.1410117878192534, + "grad_norm": 0.5828078985214233, + "learning_rate": 3.440118010321842e-06, + "loss": 0.51, + "step": 7743 + }, + { + "epoch": 1.1411591355599213, + "grad_norm": 0.5819292068481445, + "learning_rate": 3.439758758058619e-06, + "loss": 0.5285, + "step": 7744 + }, + { + "epoch": 1.1413064833005895, + "grad_norm": 0.6147122979164124, + "learning_rate": 3.439399483194639e-06, + "loss": 0.54, + "step": 7745 + }, + { + "epoch": 1.1414538310412574, + "grad_norm": 0.6075112819671631, + "learning_rate": 3.439040185738542e-06, + "loss": 0.5167, + "step": 7746 + }, + { + "epoch": 1.1416011787819254, + "grad_norm": 0.6051692366600037, + "learning_rate": 3.4386808656989688e-06, + "loss": 0.5557, + "step": 7747 + }, + { + "epoch": 1.1417485265225933, + "grad_norm": 0.6037001609802246, + "learning_rate": 3.4383215230845614e-06, + "loss": 0.5383, + "step": 7748 + }, + { + "epoch": 1.1418958742632612, + "grad_norm": 0.5851173400878906, + "learning_rate": 3.4379621579039614e-06, + "loss": 0.5608, + "step": 7749 + }, + { + "epoch": 1.1420432220039292, + "grad_norm": 0.5882610082626343, + "learning_rate": 3.437602770165811e-06, + "loss": 0.5309, + "step": 7750 + }, + { + "epoch": 1.1421905697445973, + "grad_norm": 0.5793803930282593, + "learning_rate": 3.4372433598787536e-06, + "loss": 0.5664, + "step": 7751 + }, + { + "epoch": 1.1423379174852653, + "grad_norm": 0.6228234767913818, + "learning_rate": 3.436883927051433e-06, + "loss": 0.4946, + "step": 7752 + }, + { + "epoch": 1.1424852652259332, + "grad_norm": 0.56019127368927, + "learning_rate": 3.436524471692494e-06, + "loss": 0.5451, + "step": 7753 + }, + { + "epoch": 1.1426326129666011, + "grad_norm": 0.6069178581237793, + "learning_rate": 3.4361649938105802e-06, + "loss": 0.519, + "step": 7754 + }, + { + "epoch": 1.142779960707269, + "grad_norm": 0.5808759927749634, + "learning_rate": 3.435805493414338e-06, + "loss": 0.5348, + "step": 7755 + }, + { + "epoch": 1.1429273084479372, + "grad_norm": 0.5723404288291931, + "learning_rate": 3.435445970512412e-06, + "loss": 0.5426, + "step": 7756 + }, + { + "epoch": 1.1430746561886052, + "grad_norm": 0.6097684502601624, + "learning_rate": 3.4350864251134496e-06, + "loss": 0.5485, + "step": 7757 + }, + { + "epoch": 1.143222003929273, + "grad_norm": 0.5807891488075256, + "learning_rate": 3.434726857226097e-06, + "loss": 0.5344, + "step": 7758 + }, + { + "epoch": 1.143369351669941, + "grad_norm": 0.5799002051353455, + "learning_rate": 3.434367266859003e-06, + "loss": 0.5565, + "step": 7759 + }, + { + "epoch": 1.143516699410609, + "grad_norm": 0.6190792918205261, + "learning_rate": 3.4340076540208134e-06, + "loss": 0.5528, + "step": 7760 + }, + { + "epoch": 1.143664047151277, + "grad_norm": 0.5878516435623169, + "learning_rate": 3.4336480187201774e-06, + "loss": 0.5186, + "step": 7761 + }, + { + "epoch": 1.143811394891945, + "grad_norm": 0.5926927328109741, + "learning_rate": 3.4332883609657454e-06, + "loss": 0.5657, + "step": 7762 + }, + { + "epoch": 1.143958742632613, + "grad_norm": 0.5783846974372864, + "learning_rate": 3.4329286807661655e-06, + "loss": 0.5321, + "step": 7763 + }, + { + "epoch": 1.144106090373281, + "grad_norm": 0.5785362124443054, + "learning_rate": 3.432568978130088e-06, + "loss": 0.5318, + "step": 7764 + }, + { + "epoch": 1.1442534381139489, + "grad_norm": 0.5748611092567444, + "learning_rate": 3.4322092530661642e-06, + "loss": 0.5458, + "step": 7765 + }, + { + "epoch": 1.144400785854617, + "grad_norm": 0.5744998455047607, + "learning_rate": 3.431849505583046e-06, + "loss": 0.544, + "step": 7766 + }, + { + "epoch": 1.144548133595285, + "grad_norm": 0.5519201755523682, + "learning_rate": 3.4314897356893834e-06, + "loss": 0.5352, + "step": 7767 + }, + { + "epoch": 1.144695481335953, + "grad_norm": 0.584000289440155, + "learning_rate": 3.43112994339383e-06, + "loss": 0.5176, + "step": 7768 + }, + { + "epoch": 1.1448428290766208, + "grad_norm": 0.5852591395378113, + "learning_rate": 3.4307701287050375e-06, + "loss": 0.5104, + "step": 7769 + }, + { + "epoch": 1.1449901768172888, + "grad_norm": 0.5716149806976318, + "learning_rate": 3.430410291631661e-06, + "loss": 0.5436, + "step": 7770 + }, + { + "epoch": 1.1451375245579567, + "grad_norm": 0.6073604226112366, + "learning_rate": 3.430050432182352e-06, + "loss": 0.5102, + "step": 7771 + }, + { + "epoch": 1.1452848722986246, + "grad_norm": 0.5701419115066528, + "learning_rate": 3.429690550365767e-06, + "loss": 0.5437, + "step": 7772 + }, + { + "epoch": 1.1454322200392928, + "grad_norm": 0.5984776616096497, + "learning_rate": 3.4293306461905607e-06, + "loss": 0.5353, + "step": 7773 + }, + { + "epoch": 1.1455795677799607, + "grad_norm": 0.6046543121337891, + "learning_rate": 3.4289707196653875e-06, + "loss": 0.4835, + "step": 7774 + }, + { + "epoch": 1.1457269155206287, + "grad_norm": 0.5777061581611633, + "learning_rate": 3.4286107707989045e-06, + "loss": 0.5511, + "step": 7775 + }, + { + "epoch": 1.1458742632612966, + "grad_norm": 0.5858554840087891, + "learning_rate": 3.428250799599768e-06, + "loss": 0.5103, + "step": 7776 + }, + { + "epoch": 1.1460216110019648, + "grad_norm": 0.5925667881965637, + "learning_rate": 3.427890806076636e-06, + "loss": 0.5413, + "step": 7777 + }, + { + "epoch": 1.1461689587426327, + "grad_norm": 0.640734076499939, + "learning_rate": 3.4275307902381644e-06, + "loss": 0.5547, + "step": 7778 + }, + { + "epoch": 1.1463163064833006, + "grad_norm": 0.582951009273529, + "learning_rate": 3.4271707520930125e-06, + "loss": 0.549, + "step": 7779 + }, + { + "epoch": 1.1464636542239686, + "grad_norm": 0.6090418100357056, + "learning_rate": 3.426810691649839e-06, + "loss": 0.5327, + "step": 7780 + }, + { + "epoch": 1.1466110019646365, + "grad_norm": 0.629491925239563, + "learning_rate": 3.4264506089173033e-06, + "loss": 0.5409, + "step": 7781 + }, + { + "epoch": 1.1467583497053044, + "grad_norm": 0.5734692811965942, + "learning_rate": 3.426090503904065e-06, + "loss": 0.5361, + "step": 7782 + }, + { + "epoch": 1.1469056974459724, + "grad_norm": 0.5539997220039368, + "learning_rate": 3.425730376618784e-06, + "loss": 0.5529, + "step": 7783 + }, + { + "epoch": 1.1470530451866405, + "grad_norm": 0.6034707427024841, + "learning_rate": 3.4253702270701216e-06, + "loss": 0.5588, + "step": 7784 + }, + { + "epoch": 1.1472003929273085, + "grad_norm": 0.6126664876937866, + "learning_rate": 3.42501005526674e-06, + "loss": 0.5217, + "step": 7785 + }, + { + "epoch": 1.1473477406679764, + "grad_norm": 0.6327771544456482, + "learning_rate": 3.4246498612172997e-06, + "loss": 0.5094, + "step": 7786 + }, + { + "epoch": 1.1474950884086443, + "grad_norm": 0.5925971865653992, + "learning_rate": 3.424289644930464e-06, + "loss": 0.5605, + "step": 7787 + }, + { + "epoch": 1.1476424361493125, + "grad_norm": 0.6012893319129944, + "learning_rate": 3.423929406414896e-06, + "loss": 0.5567, + "step": 7788 + }, + { + "epoch": 1.1477897838899804, + "grad_norm": 0.5948098301887512, + "learning_rate": 3.4235691456792585e-06, + "loss": 0.5304, + "step": 7789 + }, + { + "epoch": 1.1479371316306484, + "grad_norm": 0.6061700582504272, + "learning_rate": 3.423208862732217e-06, + "loss": 0.5386, + "step": 7790 + }, + { + "epoch": 1.1480844793713163, + "grad_norm": 0.5982822179794312, + "learning_rate": 3.422848557582435e-06, + "loss": 0.5442, + "step": 7791 + }, + { + "epoch": 1.1482318271119842, + "grad_norm": 0.5877569317817688, + "learning_rate": 3.422488230238579e-06, + "loss": 0.515, + "step": 7792 + }, + { + "epoch": 1.1483791748526522, + "grad_norm": 0.5695385932922363, + "learning_rate": 3.4221278807093123e-06, + "loss": 0.527, + "step": 7793 + }, + { + "epoch": 1.1485265225933203, + "grad_norm": 0.7033313512802124, + "learning_rate": 3.421767509003303e-06, + "loss": 0.5417, + "step": 7794 + }, + { + "epoch": 1.1486738703339883, + "grad_norm": 0.5902414321899414, + "learning_rate": 3.421407115129218e-06, + "loss": 0.5527, + "step": 7795 + }, + { + "epoch": 1.1488212180746562, + "grad_norm": 0.5928328037261963, + "learning_rate": 3.4210466990957237e-06, + "loss": 0.5328, + "step": 7796 + }, + { + "epoch": 1.1489685658153241, + "grad_norm": 0.5823041796684265, + "learning_rate": 3.420686260911488e-06, + "loss": 0.5386, + "step": 7797 + }, + { + "epoch": 1.149115913555992, + "grad_norm": 0.6078360676765442, + "learning_rate": 3.4203258005851804e-06, + "loss": 0.5417, + "step": 7798 + }, + { + "epoch": 1.1492632612966602, + "grad_norm": 0.5721399784088135, + "learning_rate": 3.4199653181254677e-06, + "loss": 0.536, + "step": 7799 + }, + { + "epoch": 1.1494106090373282, + "grad_norm": 0.5680684447288513, + "learning_rate": 3.4196048135410214e-06, + "loss": 0.5398, + "step": 7800 + }, + { + "epoch": 1.149557956777996, + "grad_norm": 0.6437044143676758, + "learning_rate": 3.4192442868405105e-06, + "loss": 0.5483, + "step": 7801 + }, + { + "epoch": 1.149705304518664, + "grad_norm": 0.566535472869873, + "learning_rate": 3.4188837380326058e-06, + "loss": 0.5192, + "step": 7802 + }, + { + "epoch": 1.149852652259332, + "grad_norm": 0.5452014803886414, + "learning_rate": 3.418523167125978e-06, + "loss": 0.529, + "step": 7803 + }, + { + "epoch": 1.15, + "grad_norm": 0.586347222328186, + "learning_rate": 3.418162574129299e-06, + "loss": 0.5547, + "step": 7804 + }, + { + "epoch": 1.150147347740668, + "grad_norm": 0.6035025119781494, + "learning_rate": 3.4178019590512405e-06, + "loss": 0.5285, + "step": 7805 + }, + { + "epoch": 1.150294695481336, + "grad_norm": 0.5570943355560303, + "learning_rate": 3.417441321900476e-06, + "loss": 0.5261, + "step": 7806 + }, + { + "epoch": 1.150442043222004, + "grad_norm": 0.5551353693008423, + "learning_rate": 3.417080662685677e-06, + "loss": 0.5166, + "step": 7807 + }, + { + "epoch": 1.1505893909626719, + "grad_norm": 0.6024887561798096, + "learning_rate": 3.416719981415519e-06, + "loss": 0.5156, + "step": 7808 + }, + { + "epoch": 1.1507367387033398, + "grad_norm": 0.6127015948295593, + "learning_rate": 3.4163592780986754e-06, + "loss": 0.5237, + "step": 7809 + }, + { + "epoch": 1.150884086444008, + "grad_norm": 0.5951183438301086, + "learning_rate": 3.4159985527438205e-06, + "loss": 0.5471, + "step": 7810 + }, + { + "epoch": 1.151031434184676, + "grad_norm": 0.5813073515892029, + "learning_rate": 3.41563780535963e-06, + "loss": 0.5326, + "step": 7811 + }, + { + "epoch": 1.1511787819253438, + "grad_norm": 0.6555100679397583, + "learning_rate": 3.4152770359547806e-06, + "loss": 0.5571, + "step": 7812 + }, + { + "epoch": 1.1513261296660118, + "grad_norm": 0.6052147150039673, + "learning_rate": 3.414916244537947e-06, + "loss": 0.5294, + "step": 7813 + }, + { + "epoch": 1.1514734774066797, + "grad_norm": 0.6135151386260986, + "learning_rate": 3.414555431117807e-06, + "loss": 0.5622, + "step": 7814 + }, + { + "epoch": 1.1516208251473476, + "grad_norm": 0.6008705496788025, + "learning_rate": 3.4141945957030376e-06, + "loss": 0.547, + "step": 7815 + }, + { + "epoch": 1.1517681728880158, + "grad_norm": 0.6224644780158997, + "learning_rate": 3.4138337383023175e-06, + "loss": 0.502, + "step": 7816 + }, + { + "epoch": 1.1519155206286837, + "grad_norm": 0.6047925353050232, + "learning_rate": 3.413472858924324e-06, + "loss": 0.5502, + "step": 7817 + }, + { + "epoch": 1.1520628683693517, + "grad_norm": 0.580732524394989, + "learning_rate": 3.413111957577737e-06, + "loss": 0.5525, + "step": 7818 + }, + { + "epoch": 1.1522102161100196, + "grad_norm": 0.6102653741836548, + "learning_rate": 3.4127510342712354e-06, + "loss": 0.5258, + "step": 7819 + }, + { + "epoch": 1.1523575638506875, + "grad_norm": 0.6195507049560547, + "learning_rate": 3.4123900890134997e-06, + "loss": 0.5504, + "step": 7820 + }, + { + "epoch": 1.1525049115913557, + "grad_norm": 0.5905807614326477, + "learning_rate": 3.4120291218132107e-06, + "loss": 0.5486, + "step": 7821 + }, + { + "epoch": 1.1526522593320236, + "grad_norm": 0.6629602313041687, + "learning_rate": 3.4116681326790493e-06, + "loss": 0.5425, + "step": 7822 + }, + { + "epoch": 1.1527996070726916, + "grad_norm": 0.6128150820732117, + "learning_rate": 3.411307121619696e-06, + "loss": 0.5215, + "step": 7823 + }, + { + "epoch": 1.1529469548133595, + "grad_norm": 0.6152788996696472, + "learning_rate": 3.410946088643834e-06, + "loss": 0.5316, + "step": 7824 + }, + { + "epoch": 1.1530943025540275, + "grad_norm": 0.6121789813041687, + "learning_rate": 3.4105850337601467e-06, + "loss": 0.563, + "step": 7825 + }, + { + "epoch": 1.1532416502946954, + "grad_norm": 0.5969893932342529, + "learning_rate": 3.4102239569773156e-06, + "loss": 0.5348, + "step": 7826 + }, + { + "epoch": 1.1533889980353635, + "grad_norm": 0.603484570980072, + "learning_rate": 3.409862858304025e-06, + "loss": 0.5266, + "step": 7827 + }, + { + "epoch": 1.1535363457760315, + "grad_norm": 0.5869206786155701, + "learning_rate": 3.4095017377489605e-06, + "loss": 0.5507, + "step": 7828 + }, + { + "epoch": 1.1536836935166994, + "grad_norm": 0.6071210503578186, + "learning_rate": 3.4091405953208045e-06, + "loss": 0.5421, + "step": 7829 + }, + { + "epoch": 1.1538310412573674, + "grad_norm": 0.6397535800933838, + "learning_rate": 3.4087794310282446e-06, + "loss": 0.5678, + "step": 7830 + }, + { + "epoch": 1.1539783889980353, + "grad_norm": 0.5980623960494995, + "learning_rate": 3.408418244879965e-06, + "loss": 0.5417, + "step": 7831 + }, + { + "epoch": 1.1541257367387034, + "grad_norm": 0.5942433476448059, + "learning_rate": 3.408057036884653e-06, + "loss": 0.5432, + "step": 7832 + }, + { + "epoch": 1.1542730844793714, + "grad_norm": 0.6409138441085815, + "learning_rate": 3.4076958070509953e-06, + "loss": 0.5277, + "step": 7833 + }, + { + "epoch": 1.1544204322200393, + "grad_norm": 0.5578483939170837, + "learning_rate": 3.407334555387679e-06, + "loss": 0.5419, + "step": 7834 + }, + { + "epoch": 1.1545677799607073, + "grad_norm": 0.58782958984375, + "learning_rate": 3.4069732819033925e-06, + "loss": 0.5408, + "step": 7835 + }, + { + "epoch": 1.1547151277013752, + "grad_norm": 0.5813010931015015, + "learning_rate": 3.4066119866068235e-06, + "loss": 0.5158, + "step": 7836 + }, + { + "epoch": 1.1548624754420431, + "grad_norm": 0.5897231101989746, + "learning_rate": 3.4062506695066616e-06, + "loss": 0.5263, + "step": 7837 + }, + { + "epoch": 1.1550098231827113, + "grad_norm": 0.5624765157699585, + "learning_rate": 3.4058893306115962e-06, + "loss": 0.5153, + "step": 7838 + }, + { + "epoch": 1.1551571709233792, + "grad_norm": 0.5671955347061157, + "learning_rate": 3.4055279699303174e-06, + "loss": 0.5594, + "step": 7839 + }, + { + "epoch": 1.1553045186640472, + "grad_norm": 0.594887375831604, + "learning_rate": 3.4051665874715158e-06, + "loss": 0.5865, + "step": 7840 + }, + { + "epoch": 1.155451866404715, + "grad_norm": 0.5963154435157776, + "learning_rate": 3.404805183243882e-06, + "loss": 0.5511, + "step": 7841 + }, + { + "epoch": 1.155599214145383, + "grad_norm": 0.6158878803253174, + "learning_rate": 3.4044437572561074e-06, + "loss": 0.5335, + "step": 7842 + }, + { + "epoch": 1.1557465618860512, + "grad_norm": 0.5926337242126465, + "learning_rate": 3.4040823095168856e-06, + "loss": 0.5249, + "step": 7843 + }, + { + "epoch": 1.1558939096267191, + "grad_norm": 0.573597252368927, + "learning_rate": 3.4037208400349074e-06, + "loss": 0.547, + "step": 7844 + }, + { + "epoch": 1.156041257367387, + "grad_norm": 0.5837386250495911, + "learning_rate": 3.4033593488188672e-06, + "loss": 0.538, + "step": 7845 + }, + { + "epoch": 1.156188605108055, + "grad_norm": 0.6438924074172974, + "learning_rate": 3.4029978358774585e-06, + "loss": 0.5286, + "step": 7846 + }, + { + "epoch": 1.156335952848723, + "grad_norm": 0.5898394584655762, + "learning_rate": 3.4026363012193753e-06, + "loss": 0.554, + "step": 7847 + }, + { + "epoch": 1.1564833005893909, + "grad_norm": 0.5709123015403748, + "learning_rate": 3.4022747448533127e-06, + "loss": 0.5277, + "step": 7848 + }, + { + "epoch": 1.156630648330059, + "grad_norm": 0.608397901058197, + "learning_rate": 3.4019131667879656e-06, + "loss": 0.5212, + "step": 7849 + }, + { + "epoch": 1.156777996070727, + "grad_norm": 0.6126312017440796, + "learning_rate": 3.4015515670320305e-06, + "loss": 0.5466, + "step": 7850 + }, + { + "epoch": 1.156925343811395, + "grad_norm": 0.55361008644104, + "learning_rate": 3.401189945594202e-06, + "loss": 0.5133, + "step": 7851 + }, + { + "epoch": 1.1570726915520628, + "grad_norm": 0.5851404666900635, + "learning_rate": 3.4008283024831786e-06, + "loss": 0.5183, + "step": 7852 + }, + { + "epoch": 1.1572200392927308, + "grad_norm": 0.6164841055870056, + "learning_rate": 3.4004666377076565e-06, + "loss": 0.5253, + "step": 7853 + }, + { + "epoch": 1.157367387033399, + "grad_norm": 0.5849376320838928, + "learning_rate": 3.4001049512763345e-06, + "loss": 0.5389, + "step": 7854 + }, + { + "epoch": 1.1575147347740669, + "grad_norm": 0.5994781851768494, + "learning_rate": 3.3997432431979106e-06, + "loss": 0.5604, + "step": 7855 + }, + { + "epoch": 1.1576620825147348, + "grad_norm": 0.5858972668647766, + "learning_rate": 3.3993815134810836e-06, + "loss": 0.5299, + "step": 7856 + }, + { + "epoch": 1.1578094302554027, + "grad_norm": 0.5875107645988464, + "learning_rate": 3.3990197621345528e-06, + "loss": 0.5593, + "step": 7857 + }, + { + "epoch": 1.1579567779960707, + "grad_norm": 0.5723569393157959, + "learning_rate": 3.3986579891670185e-06, + "loss": 0.5156, + "step": 7858 + }, + { + "epoch": 1.1581041257367386, + "grad_norm": 0.594313383102417, + "learning_rate": 3.398296194587181e-06, + "loss": 0.5253, + "step": 7859 + }, + { + "epoch": 1.1582514734774068, + "grad_norm": 0.5841257572174072, + "learning_rate": 3.3979343784037417e-06, + "loss": 0.5025, + "step": 7860 + }, + { + "epoch": 1.1583988212180747, + "grad_norm": 0.6130372285842896, + "learning_rate": 3.397572540625402e-06, + "loss": 0.5539, + "step": 7861 + }, + { + "epoch": 1.1585461689587426, + "grad_norm": 0.5952900052070618, + "learning_rate": 3.397210681260863e-06, + "loss": 0.5444, + "step": 7862 + }, + { + "epoch": 1.1586935166994106, + "grad_norm": 0.5609310865402222, + "learning_rate": 3.3968488003188277e-06, + "loss": 0.5111, + "step": 7863 + }, + { + "epoch": 1.1588408644400785, + "grad_norm": 0.6131823062896729, + "learning_rate": 3.3964868978079994e-06, + "loss": 0.5418, + "step": 7864 + }, + { + "epoch": 1.1589882121807467, + "grad_norm": 0.6053680777549744, + "learning_rate": 3.396124973737083e-06, + "loss": 0.5422, + "step": 7865 + }, + { + "epoch": 1.1591355599214146, + "grad_norm": 0.5803036689758301, + "learning_rate": 3.39576302811478e-06, + "loss": 0.5545, + "step": 7866 + }, + { + "epoch": 1.1592829076620825, + "grad_norm": 0.597663938999176, + "learning_rate": 3.395401060949797e-06, + "loss": 0.5358, + "step": 7867 + }, + { + "epoch": 1.1594302554027505, + "grad_norm": 0.5832355618476868, + "learning_rate": 3.395039072250838e-06, + "loss": 0.5515, + "step": 7868 + }, + { + "epoch": 1.1595776031434184, + "grad_norm": 0.6166790127754211, + "learning_rate": 3.394677062026609e-06, + "loss": 0.5431, + "step": 7869 + }, + { + "epoch": 1.1597249508840863, + "grad_norm": 0.5609540343284607, + "learning_rate": 3.394315030285817e-06, + "loss": 0.5314, + "step": 7870 + }, + { + "epoch": 1.1598722986247545, + "grad_norm": 0.5800032615661621, + "learning_rate": 3.393952977037168e-06, + "loss": 0.5292, + "step": 7871 + }, + { + "epoch": 1.1600196463654224, + "grad_norm": 0.5850493907928467, + "learning_rate": 3.3935909022893683e-06, + "loss": 0.5458, + "step": 7872 + }, + { + "epoch": 1.1601669941060904, + "grad_norm": 0.5952269434928894, + "learning_rate": 3.393228806051127e-06, + "loss": 0.5323, + "step": 7873 + }, + { + "epoch": 1.1603143418467583, + "grad_norm": 0.559017539024353, + "learning_rate": 3.3928666883311522e-06, + "loss": 0.5098, + "step": 7874 + }, + { + "epoch": 1.1604616895874262, + "grad_norm": 0.5938231945037842, + "learning_rate": 3.392504549138152e-06, + "loss": 0.5209, + "step": 7875 + }, + { + "epoch": 1.1606090373280944, + "grad_norm": 0.5867471694946289, + "learning_rate": 3.392142388480836e-06, + "loss": 0.5347, + "step": 7876 + }, + { + "epoch": 1.1607563850687623, + "grad_norm": 0.6347991824150085, + "learning_rate": 3.391780206367914e-06, + "loss": 0.5168, + "step": 7877 + }, + { + "epoch": 1.1609037328094303, + "grad_norm": 0.6097278594970703, + "learning_rate": 3.3914180028080967e-06, + "loss": 0.5547, + "step": 7878 + }, + { + "epoch": 1.1610510805500982, + "grad_norm": 0.5835239291191101, + "learning_rate": 3.391055777810094e-06, + "loss": 0.5523, + "step": 7879 + }, + { + "epoch": 1.1611984282907661, + "grad_norm": 0.5951179265975952, + "learning_rate": 3.390693531382618e-06, + "loss": 0.5291, + "step": 7880 + }, + { + "epoch": 1.161345776031434, + "grad_norm": 0.5686542391777039, + "learning_rate": 3.3903312635343795e-06, + "loss": 0.556, + "step": 7881 + }, + { + "epoch": 1.1614931237721022, + "grad_norm": 0.5900204181671143, + "learning_rate": 3.3899689742740927e-06, + "loss": 0.5394, + "step": 7882 + }, + { + "epoch": 1.1616404715127702, + "grad_norm": 0.577680230140686, + "learning_rate": 3.389606663610469e-06, + "loss": 0.5706, + "step": 7883 + }, + { + "epoch": 1.161787819253438, + "grad_norm": 0.6018599271774292, + "learning_rate": 3.3892443315522228e-06, + "loss": 0.5496, + "step": 7884 + }, + { + "epoch": 1.161935166994106, + "grad_norm": 0.5746740698814392, + "learning_rate": 3.3888819781080673e-06, + "loss": 0.5375, + "step": 7885 + }, + { + "epoch": 1.162082514734774, + "grad_norm": 0.5669230222702026, + "learning_rate": 3.3885196032867167e-06, + "loss": 0.561, + "step": 7886 + }, + { + "epoch": 1.1622298624754421, + "grad_norm": 0.5912509560585022, + "learning_rate": 3.388157207096887e-06, + "loss": 0.5356, + "step": 7887 + }, + { + "epoch": 1.16237721021611, + "grad_norm": 0.5882794260978699, + "learning_rate": 3.387794789547293e-06, + "loss": 0.5772, + "step": 7888 + }, + { + "epoch": 1.162524557956778, + "grad_norm": 0.5963994264602661, + "learning_rate": 3.38743235064665e-06, + "loss": 0.5435, + "step": 7889 + }, + { + "epoch": 1.162671905697446, + "grad_norm": 0.5403124094009399, + "learning_rate": 3.387069890403676e-06, + "loss": 0.5325, + "step": 7890 + }, + { + "epoch": 1.1628192534381139, + "grad_norm": 0.6039580702781677, + "learning_rate": 3.386707408827087e-06, + "loss": 0.5273, + "step": 7891 + }, + { + "epoch": 1.1629666011787818, + "grad_norm": 0.6466390490531921, + "learning_rate": 3.3863449059256006e-06, + "loss": 0.5547, + "step": 7892 + }, + { + "epoch": 1.16311394891945, + "grad_norm": 0.5919018387794495, + "learning_rate": 3.3859823817079347e-06, + "loss": 0.5305, + "step": 7893 + }, + { + "epoch": 1.163261296660118, + "grad_norm": 0.6329895257949829, + "learning_rate": 3.3856198361828085e-06, + "loss": 0.5451, + "step": 7894 + }, + { + "epoch": 1.1634086444007858, + "grad_norm": 0.5946719646453857, + "learning_rate": 3.3852572693589403e-06, + "loss": 0.517, + "step": 7895 + }, + { + "epoch": 1.1635559921414538, + "grad_norm": 0.5752217769622803, + "learning_rate": 3.38489468124505e-06, + "loss": 0.5167, + "step": 7896 + }, + { + "epoch": 1.1637033398821217, + "grad_norm": 0.5775926113128662, + "learning_rate": 3.3845320718498577e-06, + "loss": 0.5423, + "step": 7897 + }, + { + "epoch": 1.1638506876227899, + "grad_norm": 0.6079279780387878, + "learning_rate": 3.3841694411820843e-06, + "loss": 0.5444, + "step": 7898 + }, + { + "epoch": 1.1639980353634578, + "grad_norm": 0.6039761900901794, + "learning_rate": 3.3838067892504508e-06, + "loss": 0.5561, + "step": 7899 + }, + { + "epoch": 1.1641453831041257, + "grad_norm": 0.5749445557594299, + "learning_rate": 3.383444116063677e-06, + "loss": 0.5861, + "step": 7900 + }, + { + "epoch": 1.1642927308447937, + "grad_norm": 0.6010615229606628, + "learning_rate": 3.3830814216304874e-06, + "loss": 0.5705, + "step": 7901 + }, + { + "epoch": 1.1644400785854616, + "grad_norm": 0.591586172580719, + "learning_rate": 3.3827187059596047e-06, + "loss": 0.5533, + "step": 7902 + }, + { + "epoch": 1.1645874263261295, + "grad_norm": 0.6267173886299133, + "learning_rate": 3.38235596905975e-06, + "loss": 0.5386, + "step": 7903 + }, + { + "epoch": 1.1647347740667977, + "grad_norm": 0.5882344245910645, + "learning_rate": 3.3819932109396485e-06, + "loss": 0.5495, + "step": 7904 + }, + { + "epoch": 1.1648821218074656, + "grad_norm": 0.5813336968421936, + "learning_rate": 3.381630431608024e-06, + "loss": 0.5367, + "step": 7905 + }, + { + "epoch": 1.1650294695481336, + "grad_norm": 0.5941097736358643, + "learning_rate": 3.3812676310736014e-06, + "loss": 0.505, + "step": 7906 + }, + { + "epoch": 1.1651768172888015, + "grad_norm": 0.6127269864082336, + "learning_rate": 3.3809048093451054e-06, + "loss": 0.5187, + "step": 7907 + }, + { + "epoch": 1.1653241650294697, + "grad_norm": 0.6219330430030823, + "learning_rate": 3.380541966431262e-06, + "loss": 0.5254, + "step": 7908 + }, + { + "epoch": 1.1654715127701376, + "grad_norm": 0.6040239334106445, + "learning_rate": 3.3801791023407966e-06, + "loss": 0.5434, + "step": 7909 + }, + { + "epoch": 1.1656188605108055, + "grad_norm": 0.5664872527122498, + "learning_rate": 3.379816217082437e-06, + "loss": 0.5403, + "step": 7910 + }, + { + "epoch": 1.1657662082514735, + "grad_norm": 0.5916334390640259, + "learning_rate": 3.37945331066491e-06, + "loss": 0.5185, + "step": 7911 + }, + { + "epoch": 1.1659135559921414, + "grad_norm": 0.6305618286132812, + "learning_rate": 3.3790903830969437e-06, + "loss": 0.5453, + "step": 7912 + }, + { + "epoch": 1.1660609037328094, + "grad_norm": 0.5804756879806519, + "learning_rate": 3.378727434387266e-06, + "loss": 0.5442, + "step": 7913 + }, + { + "epoch": 1.1662082514734773, + "grad_norm": 0.575706422328949, + "learning_rate": 3.3783644645446057e-06, + "loss": 0.523, + "step": 7914 + }, + { + "epoch": 1.1663555992141454, + "grad_norm": 0.566775381565094, + "learning_rate": 3.3780014735776915e-06, + "loss": 0.5394, + "step": 7915 + }, + { + "epoch": 1.1665029469548134, + "grad_norm": 0.6030650734901428, + "learning_rate": 3.377638461495254e-06, + "loss": 0.5338, + "step": 7916 + }, + { + "epoch": 1.1666502946954813, + "grad_norm": 0.5702114701271057, + "learning_rate": 3.3772754283060233e-06, + "loss": 0.5432, + "step": 7917 + }, + { + "epoch": 1.1667976424361493, + "grad_norm": 0.6096566319465637, + "learning_rate": 3.376912374018729e-06, + "loss": 0.5565, + "step": 7918 + }, + { + "epoch": 1.1669449901768174, + "grad_norm": 0.5825981497764587, + "learning_rate": 3.376549298642105e-06, + "loss": 0.5506, + "step": 7919 + }, + { + "epoch": 1.1670923379174853, + "grad_norm": 0.6099618673324585, + "learning_rate": 3.3761862021848807e-06, + "loss": 0.5329, + "step": 7920 + }, + { + "epoch": 1.1672396856581533, + "grad_norm": 0.5917762517929077, + "learning_rate": 3.3758230846557892e-06, + "loss": 0.5453, + "step": 7921 + }, + { + "epoch": 1.1673870333988212, + "grad_norm": 0.5906172394752502, + "learning_rate": 3.375459946063563e-06, + "loss": 0.5215, + "step": 7922 + }, + { + "epoch": 1.1675343811394892, + "grad_norm": 0.6148322820663452, + "learning_rate": 3.375096786416936e-06, + "loss": 0.5776, + "step": 7923 + }, + { + "epoch": 1.167681728880157, + "grad_norm": 0.5928353667259216, + "learning_rate": 3.374733605724642e-06, + "loss": 0.5201, + "step": 7924 + }, + { + "epoch": 1.167829076620825, + "grad_norm": 0.6420735716819763, + "learning_rate": 3.374370403995415e-06, + "loss": 0.5258, + "step": 7925 + }, + { + "epoch": 1.1679764243614932, + "grad_norm": 0.5773313045501709, + "learning_rate": 3.3740071812379895e-06, + "loss": 0.5536, + "step": 7926 + }, + { + "epoch": 1.1681237721021611, + "grad_norm": 0.595805287361145, + "learning_rate": 3.3736439374611012e-06, + "loss": 0.542, + "step": 7927 + }, + { + "epoch": 1.168271119842829, + "grad_norm": 0.6135466694831848, + "learning_rate": 3.373280672673486e-06, + "loss": 0.5103, + "step": 7928 + }, + { + "epoch": 1.168418467583497, + "grad_norm": 0.5837194323539734, + "learning_rate": 3.3729173868838807e-06, + "loss": 0.5339, + "step": 7929 + }, + { + "epoch": 1.1685658153241651, + "grad_norm": 0.5806299448013306, + "learning_rate": 3.3725540801010214e-06, + "loss": 0.5217, + "step": 7930 + }, + { + "epoch": 1.168713163064833, + "grad_norm": 0.56281977891922, + "learning_rate": 3.3721907523336454e-06, + "loss": 0.5577, + "step": 7931 + }, + { + "epoch": 1.168860510805501, + "grad_norm": 0.6164603233337402, + "learning_rate": 3.3718274035904917e-06, + "loss": 0.5127, + "step": 7932 + }, + { + "epoch": 1.169007858546169, + "grad_norm": 0.6043016910552979, + "learning_rate": 3.3714640338802973e-06, + "loss": 0.5224, + "step": 7933 + }, + { + "epoch": 1.169155206286837, + "grad_norm": 0.6397182941436768, + "learning_rate": 3.3711006432118015e-06, + "loss": 0.5512, + "step": 7934 + }, + { + "epoch": 1.1693025540275048, + "grad_norm": 0.6076349020004272, + "learning_rate": 3.370737231593744e-06, + "loss": 0.5216, + "step": 7935 + }, + { + "epoch": 1.169449901768173, + "grad_norm": 0.6113758683204651, + "learning_rate": 3.3703737990348646e-06, + "loss": 0.5261, + "step": 7936 + }, + { + "epoch": 1.169597249508841, + "grad_norm": 0.5944374799728394, + "learning_rate": 3.370010345543903e-06, + "loss": 0.5083, + "step": 7937 + }, + { + "epoch": 1.1697445972495089, + "grad_norm": 0.5898892879486084, + "learning_rate": 3.369646871129601e-06, + "loss": 0.5187, + "step": 7938 + }, + { + "epoch": 1.1698919449901768, + "grad_norm": 0.5723863840103149, + "learning_rate": 3.3692833758006994e-06, + "loss": 0.5026, + "step": 7939 + }, + { + "epoch": 1.1700392927308447, + "grad_norm": 0.6302082538604736, + "learning_rate": 3.368919859565941e-06, + "loss": 0.5293, + "step": 7940 + }, + { + "epoch": 1.1701866404715129, + "grad_norm": 0.6183425784111023, + "learning_rate": 3.368556322434068e-06, + "loss": 0.5245, + "step": 7941 + }, + { + "epoch": 1.1703339882121808, + "grad_norm": 0.5836018323898315, + "learning_rate": 3.368192764413821e-06, + "loss": 0.5533, + "step": 7942 + }, + { + "epoch": 1.1704813359528488, + "grad_norm": 0.5776121616363525, + "learning_rate": 3.3678291855139466e-06, + "loss": 0.5229, + "step": 7943 + }, + { + "epoch": 1.1706286836935167, + "grad_norm": 0.579670786857605, + "learning_rate": 3.367465585743187e-06, + "loss": 0.5557, + "step": 7944 + }, + { + "epoch": 1.1707760314341846, + "grad_norm": 0.5649889707565308, + "learning_rate": 3.3671019651102864e-06, + "loss": 0.5203, + "step": 7945 + }, + { + "epoch": 1.1709233791748526, + "grad_norm": 0.5778752565383911, + "learning_rate": 3.3667383236239915e-06, + "loss": 0.5244, + "step": 7946 + }, + { + "epoch": 1.1710707269155207, + "grad_norm": 0.6022000312805176, + "learning_rate": 3.3663746612930454e-06, + "loss": 0.5106, + "step": 7947 + }, + { + "epoch": 1.1712180746561887, + "grad_norm": 0.6461508274078369, + "learning_rate": 3.366010978126195e-06, + "loss": 0.5623, + "step": 7948 + }, + { + "epoch": 1.1713654223968566, + "grad_norm": 0.580458402633667, + "learning_rate": 3.3656472741321873e-06, + "loss": 0.5564, + "step": 7949 + }, + { + "epoch": 1.1715127701375245, + "grad_norm": 0.5840852856636047, + "learning_rate": 3.365283549319768e-06, + "loss": 0.5593, + "step": 7950 + }, + { + "epoch": 1.1716601178781925, + "grad_norm": 0.5794164538383484, + "learning_rate": 3.364919803697686e-06, + "loss": 0.5226, + "step": 7951 + }, + { + "epoch": 1.1718074656188606, + "grad_norm": 0.5744820833206177, + "learning_rate": 3.3645560372746877e-06, + "loss": 0.5541, + "step": 7952 + }, + { + "epoch": 1.1719548133595286, + "grad_norm": 0.5694285035133362, + "learning_rate": 3.3641922500595227e-06, + "loss": 0.5519, + "step": 7953 + }, + { + "epoch": 1.1721021611001965, + "grad_norm": 0.5966677069664001, + "learning_rate": 3.363828442060939e-06, + "loss": 0.556, + "step": 7954 + }, + { + "epoch": 1.1722495088408644, + "grad_norm": 0.5627882480621338, + "learning_rate": 3.363464613287686e-06, + "loss": 0.5743, + "step": 7955 + }, + { + "epoch": 1.1723968565815324, + "grad_norm": 0.6031219959259033, + "learning_rate": 3.3631007637485153e-06, + "loss": 0.5134, + "step": 7956 + }, + { + "epoch": 1.1725442043222003, + "grad_norm": 0.6064467430114746, + "learning_rate": 3.3627368934521753e-06, + "loss": 0.5216, + "step": 7957 + }, + { + "epoch": 1.1726915520628685, + "grad_norm": 0.5851109623908997, + "learning_rate": 3.3623730024074176e-06, + "loss": 0.531, + "step": 7958 + }, + { + "epoch": 1.1728388998035364, + "grad_norm": 0.5786102414131165, + "learning_rate": 3.362009090622994e-06, + "loss": 0.5291, + "step": 7959 + }, + { + "epoch": 1.1729862475442043, + "grad_norm": 0.6360209584236145, + "learning_rate": 3.361645158107656e-06, + "loss": 0.5335, + "step": 7960 + }, + { + "epoch": 1.1731335952848723, + "grad_norm": 0.570669949054718, + "learning_rate": 3.361281204870156e-06, + "loss": 0.552, + "step": 7961 + }, + { + "epoch": 1.1732809430255402, + "grad_norm": 0.6019143462181091, + "learning_rate": 3.3609172309192466e-06, + "loss": 0.5566, + "step": 7962 + }, + { + "epoch": 1.1734282907662084, + "grad_norm": 0.5849248170852661, + "learning_rate": 3.360553236263682e-06, + "loss": 0.5515, + "step": 7963 + }, + { + "epoch": 1.1735756385068763, + "grad_norm": 0.6226122975349426, + "learning_rate": 3.3601892209122163e-06, + "loss": 0.504, + "step": 7964 + }, + { + "epoch": 1.1737229862475442, + "grad_norm": 0.6276815533638, + "learning_rate": 3.359825184873603e-06, + "loss": 0.5393, + "step": 7965 + }, + { + "epoch": 1.1738703339882122, + "grad_norm": 0.5955258011817932, + "learning_rate": 3.359461128156597e-06, + "loss": 0.5235, + "step": 7966 + }, + { + "epoch": 1.17401768172888, + "grad_norm": 0.607848584651947, + "learning_rate": 3.359097050769954e-06, + "loss": 0.557, + "step": 7967 + }, + { + "epoch": 1.174165029469548, + "grad_norm": 0.5686028599739075, + "learning_rate": 3.35873295272243e-06, + "loss": 0.5223, + "step": 7968 + }, + { + "epoch": 1.1743123772102162, + "grad_norm": 0.600206732749939, + "learning_rate": 3.3583688340227807e-06, + "loss": 0.5198, + "step": 7969 + }, + { + "epoch": 1.1744597249508841, + "grad_norm": 0.5972679257392883, + "learning_rate": 3.358004694679764e-06, + "loss": 0.5244, + "step": 7970 + }, + { + "epoch": 1.174607072691552, + "grad_norm": 0.6042858958244324, + "learning_rate": 3.3576405347021367e-06, + "loss": 0.5457, + "step": 7971 + }, + { + "epoch": 1.17475442043222, + "grad_norm": 0.6333156228065491, + "learning_rate": 3.357276354098657e-06, + "loss": 0.4972, + "step": 7972 + }, + { + "epoch": 1.174901768172888, + "grad_norm": 0.6014018654823303, + "learning_rate": 3.3569121528780822e-06, + "loss": 0.5206, + "step": 7973 + }, + { + "epoch": 1.175049115913556, + "grad_norm": 0.5786324143409729, + "learning_rate": 3.3565479310491727e-06, + "loss": 0.5406, + "step": 7974 + }, + { + "epoch": 1.175196463654224, + "grad_norm": 0.5775977373123169, + "learning_rate": 3.3561836886206874e-06, + "loss": 0.5395, + "step": 7975 + }, + { + "epoch": 1.175343811394892, + "grad_norm": 0.5651142001152039, + "learning_rate": 3.3558194256013855e-06, + "loss": 0.5643, + "step": 7976 + }, + { + "epoch": 1.17549115913556, + "grad_norm": 0.5784719586372375, + "learning_rate": 3.355455142000028e-06, + "loss": 0.5632, + "step": 7977 + }, + { + "epoch": 1.1756385068762278, + "grad_norm": 0.6070958375930786, + "learning_rate": 3.355090837825376e-06, + "loss": 0.5211, + "step": 7978 + }, + { + "epoch": 1.1757858546168958, + "grad_norm": 0.5977141261100769, + "learning_rate": 3.35472651308619e-06, + "loss": 0.5354, + "step": 7979 + }, + { + "epoch": 1.175933202357564, + "grad_norm": 0.5579076409339905, + "learning_rate": 3.3543621677912326e-06, + "loss": 0.511, + "step": 7980 + }, + { + "epoch": 1.1760805500982319, + "grad_norm": 0.5600191354751587, + "learning_rate": 3.353997801949265e-06, + "loss": 0.5474, + "step": 7981 + }, + { + "epoch": 1.1762278978388998, + "grad_norm": 0.6007577776908875, + "learning_rate": 3.353633415569052e-06, + "loss": 0.5186, + "step": 7982 + }, + { + "epoch": 1.1763752455795677, + "grad_norm": 0.5667813420295715, + "learning_rate": 3.353269008659355e-06, + "loss": 0.5231, + "step": 7983 + }, + { + "epoch": 1.1765225933202357, + "grad_norm": 0.6072981953620911, + "learning_rate": 3.3529045812289383e-06, + "loss": 0.4969, + "step": 7984 + }, + { + "epoch": 1.1766699410609038, + "grad_norm": 0.571844220161438, + "learning_rate": 3.3525401332865672e-06, + "loss": 0.5505, + "step": 7985 + }, + { + "epoch": 1.1768172888015718, + "grad_norm": 0.5694481134414673, + "learning_rate": 3.352175664841005e-06, + "loss": 0.5327, + "step": 7986 + }, + { + "epoch": 1.1769646365422397, + "grad_norm": 0.6361897587776184, + "learning_rate": 3.3518111759010184e-06, + "loss": 0.5123, + "step": 7987 + }, + { + "epoch": 1.1771119842829076, + "grad_norm": 0.547515869140625, + "learning_rate": 3.351446666475372e-06, + "loss": 0.5597, + "step": 7988 + }, + { + "epoch": 1.1772593320235756, + "grad_norm": 0.6319218873977661, + "learning_rate": 3.3510821365728335e-06, + "loss": 0.5348, + "step": 7989 + }, + { + "epoch": 1.1774066797642435, + "grad_norm": 0.6162993311882019, + "learning_rate": 3.3507175862021677e-06, + "loss": 0.5372, + "step": 7990 + }, + { + "epoch": 1.1775540275049117, + "grad_norm": 0.6163186430931091, + "learning_rate": 3.350353015372144e-06, + "loss": 0.5475, + "step": 7991 + }, + { + "epoch": 1.1777013752455796, + "grad_norm": 0.5663650631904602, + "learning_rate": 3.3499884240915283e-06, + "loss": 0.4877, + "step": 7992 + }, + { + "epoch": 1.1778487229862475, + "grad_norm": 0.600563108921051, + "learning_rate": 3.3496238123690896e-06, + "loss": 0.526, + "step": 7993 + }, + { + "epoch": 1.1779960707269155, + "grad_norm": 0.5733100175857544, + "learning_rate": 3.349259180213597e-06, + "loss": 0.5484, + "step": 7994 + }, + { + "epoch": 1.1781434184675834, + "grad_norm": 0.5636365413665771, + "learning_rate": 3.348894527633819e-06, + "loss": 0.5317, + "step": 7995 + }, + { + "epoch": 1.1782907662082516, + "grad_norm": 0.6095474362373352, + "learning_rate": 3.3485298546385268e-06, + "loss": 0.5617, + "step": 7996 + }, + { + "epoch": 1.1784381139489195, + "grad_norm": 0.649467408657074, + "learning_rate": 3.348165161236489e-06, + "loss": 0.5312, + "step": 7997 + }, + { + "epoch": 1.1785854616895874, + "grad_norm": 0.5759940147399902, + "learning_rate": 3.347800447436477e-06, + "loss": 0.5578, + "step": 7998 + }, + { + "epoch": 1.1787328094302554, + "grad_norm": 0.5945261120796204, + "learning_rate": 3.347435713247262e-06, + "loss": 0.5677, + "step": 7999 + }, + { + "epoch": 1.1788801571709233, + "grad_norm": 0.6015635132789612, + "learning_rate": 3.347070958677615e-06, + "loss": 0.5385, + "step": 8000 + }, + { + "epoch": 1.1790275049115913, + "grad_norm": 0.6288295984268188, + "learning_rate": 3.346706183736309e-06, + "loss": 0.5175, + "step": 8001 + }, + { + "epoch": 1.1791748526522594, + "grad_norm": 0.603992223739624, + "learning_rate": 3.3463413884321167e-06, + "loss": 0.5518, + "step": 8002 + }, + { + "epoch": 1.1793222003929273, + "grad_norm": 0.5878428220748901, + "learning_rate": 3.3459765727738107e-06, + "loss": 0.5432, + "step": 8003 + }, + { + "epoch": 1.1794695481335953, + "grad_norm": 0.5809419751167297, + "learning_rate": 3.345611736770165e-06, + "loss": 0.5491, + "step": 8004 + }, + { + "epoch": 1.1796168958742632, + "grad_norm": 0.5937219858169556, + "learning_rate": 3.345246880429954e-06, + "loss": 0.5089, + "step": 8005 + }, + { + "epoch": 1.1797642436149312, + "grad_norm": 0.5869588255882263, + "learning_rate": 3.344882003761951e-06, + "loss": 0.5252, + "step": 8006 + }, + { + "epoch": 1.1799115913555993, + "grad_norm": 0.5726460814476013, + "learning_rate": 3.3445171067749326e-06, + "loss": 0.5444, + "step": 8007 + }, + { + "epoch": 1.1800589390962672, + "grad_norm": 0.6001148223876953, + "learning_rate": 3.3441521894776743e-06, + "loss": 0.5235, + "step": 8008 + }, + { + "epoch": 1.1802062868369352, + "grad_norm": 0.6014829874038696, + "learning_rate": 3.3437872518789518e-06, + "loss": 0.5505, + "step": 8009 + }, + { + "epoch": 1.1803536345776031, + "grad_norm": 0.5937697887420654, + "learning_rate": 3.3434222939875412e-06, + "loss": 0.5595, + "step": 8010 + }, + { + "epoch": 1.180500982318271, + "grad_norm": 0.6101598143577576, + "learning_rate": 3.34305731581222e-06, + "loss": 0.536, + "step": 8011 + }, + { + "epoch": 1.180648330058939, + "grad_norm": 0.6168487668037415, + "learning_rate": 3.342692317361766e-06, + "loss": 0.5194, + "step": 8012 + }, + { + "epoch": 1.1807956777996071, + "grad_norm": 0.572346568107605, + "learning_rate": 3.3423272986449566e-06, + "loss": 0.5335, + "step": 8013 + }, + { + "epoch": 1.180943025540275, + "grad_norm": 0.5940268039703369, + "learning_rate": 3.3419622596705714e-06, + "loss": 0.5146, + "step": 8014 + }, + { + "epoch": 1.181090373280943, + "grad_norm": 0.5834726691246033, + "learning_rate": 3.3415972004473883e-06, + "loss": 0.5271, + "step": 8015 + }, + { + "epoch": 1.181237721021611, + "grad_norm": 0.6057354807853699, + "learning_rate": 3.341232120984187e-06, + "loss": 0.5368, + "step": 8016 + }, + { + "epoch": 1.181385068762279, + "grad_norm": 0.5820915699005127, + "learning_rate": 3.340867021289748e-06, + "loss": 0.5313, + "step": 8017 + }, + { + "epoch": 1.181532416502947, + "grad_norm": 0.5806114077568054, + "learning_rate": 3.3405019013728525e-06, + "loss": 0.4977, + "step": 8018 + }, + { + "epoch": 1.181679764243615, + "grad_norm": 0.5862131714820862, + "learning_rate": 3.3401367612422792e-06, + "loss": 0.5333, + "step": 8019 + }, + { + "epoch": 1.181827111984283, + "grad_norm": 0.5700843930244446, + "learning_rate": 3.3397716009068115e-06, + "loss": 0.5549, + "step": 8020 + }, + { + "epoch": 1.1819744597249509, + "grad_norm": 0.5616605877876282, + "learning_rate": 3.3394064203752302e-06, + "loss": 0.5612, + "step": 8021 + }, + { + "epoch": 1.1821218074656188, + "grad_norm": 0.654227077960968, + "learning_rate": 3.3390412196563182e-06, + "loss": 0.5198, + "step": 8022 + }, + { + "epoch": 1.1822691552062867, + "grad_norm": 0.6056110858917236, + "learning_rate": 3.3386759987588592e-06, + "loss": 0.5438, + "step": 8023 + }, + { + "epoch": 1.1824165029469549, + "grad_norm": 0.6125786900520325, + "learning_rate": 3.3383107576916353e-06, + "loss": 0.4938, + "step": 8024 + }, + { + "epoch": 1.1825638506876228, + "grad_norm": 0.5948806405067444, + "learning_rate": 3.3379454964634305e-06, + "loss": 0.5943, + "step": 8025 + }, + { + "epoch": 1.1827111984282908, + "grad_norm": 0.6194632053375244, + "learning_rate": 3.33758021508303e-06, + "loss": 0.5342, + "step": 8026 + }, + { + "epoch": 1.1828585461689587, + "grad_norm": 0.6177467107772827, + "learning_rate": 3.337214913559218e-06, + "loss": 0.5674, + "step": 8027 + }, + { + "epoch": 1.1830058939096266, + "grad_norm": 0.5917319655418396, + "learning_rate": 3.3368495919007797e-06, + "loss": 0.5358, + "step": 8028 + }, + { + "epoch": 1.1831532416502948, + "grad_norm": 0.5749235153198242, + "learning_rate": 3.3364842501165013e-06, + "loss": 0.5148, + "step": 8029 + }, + { + "epoch": 1.1833005893909627, + "grad_norm": 0.6031187176704407, + "learning_rate": 3.336118888215169e-06, + "loss": 0.5606, + "step": 8030 + }, + { + "epoch": 1.1834479371316307, + "grad_norm": 0.6063652038574219, + "learning_rate": 3.3357535062055707e-06, + "loss": 0.5021, + "step": 8031 + }, + { + "epoch": 1.1835952848722986, + "grad_norm": 0.6046050786972046, + "learning_rate": 3.3353881040964918e-06, + "loss": 0.534, + "step": 8032 + }, + { + "epoch": 1.1837426326129665, + "grad_norm": 0.5975744128227234, + "learning_rate": 3.3350226818967206e-06, + "loss": 0.5145, + "step": 8033 + }, + { + "epoch": 1.1838899803536345, + "grad_norm": 0.6045680046081543, + "learning_rate": 3.3346572396150455e-06, + "loss": 0.548, + "step": 8034 + }, + { + "epoch": 1.1840373280943026, + "grad_norm": 0.687103271484375, + "learning_rate": 3.3342917772602557e-06, + "loss": 0.5324, + "step": 8035 + }, + { + "epoch": 1.1841846758349706, + "grad_norm": 0.5765399932861328, + "learning_rate": 3.33392629484114e-06, + "loss": 0.538, + "step": 8036 + }, + { + "epoch": 1.1843320235756385, + "grad_norm": 0.6127504110336304, + "learning_rate": 3.3335607923664877e-06, + "loss": 0.5006, + "step": 8037 + }, + { + "epoch": 1.1844793713163064, + "grad_norm": 0.6170635223388672, + "learning_rate": 3.3331952698450892e-06, + "loss": 0.519, + "step": 8038 + }, + { + "epoch": 1.1846267190569744, + "grad_norm": 0.5585407614707947, + "learning_rate": 3.332829727285735e-06, + "loss": 0.5222, + "step": 8039 + }, + { + "epoch": 1.1847740667976425, + "grad_norm": 0.5943000316619873, + "learning_rate": 3.3324641646972173e-06, + "loss": 0.5628, + "step": 8040 + }, + { + "epoch": 1.1849214145383105, + "grad_norm": 0.5788171291351318, + "learning_rate": 3.332098582088327e-06, + "loss": 0.5164, + "step": 8041 + }, + { + "epoch": 1.1850687622789784, + "grad_norm": 0.628197193145752, + "learning_rate": 3.331732979467855e-06, + "loss": 0.5557, + "step": 8042 + }, + { + "epoch": 1.1852161100196463, + "grad_norm": 0.607140302658081, + "learning_rate": 3.3313673568445958e-06, + "loss": 0.5475, + "step": 8043 + }, + { + "epoch": 1.1853634577603143, + "grad_norm": 0.5581399202346802, + "learning_rate": 3.331001714227341e-06, + "loss": 0.5331, + "step": 8044 + }, + { + "epoch": 1.1855108055009822, + "grad_norm": 0.5668577551841736, + "learning_rate": 3.3306360516248854e-06, + "loss": 0.5048, + "step": 8045 + }, + { + "epoch": 1.1856581532416504, + "grad_norm": 0.6410255432128906, + "learning_rate": 3.330270369046022e-06, + "loss": 0.5295, + "step": 8046 + }, + { + "epoch": 1.1858055009823183, + "grad_norm": 0.577150821685791, + "learning_rate": 3.3299046664995447e-06, + "loss": 0.5342, + "step": 8047 + }, + { + "epoch": 1.1859528487229862, + "grad_norm": 0.5963589549064636, + "learning_rate": 3.3295389439942506e-06, + "loss": 0.5355, + "step": 8048 + }, + { + "epoch": 1.1861001964636542, + "grad_norm": 0.6206799149513245, + "learning_rate": 3.329173201538934e-06, + "loss": 0.4801, + "step": 8049 + }, + { + "epoch": 1.1862475442043223, + "grad_norm": 0.5692552924156189, + "learning_rate": 3.3288074391423903e-06, + "loss": 0.5037, + "step": 8050 + }, + { + "epoch": 1.1863948919449903, + "grad_norm": 0.5574491024017334, + "learning_rate": 3.328441656813417e-06, + "loss": 0.5334, + "step": 8051 + }, + { + "epoch": 1.1865422396856582, + "grad_norm": 0.5932641625404358, + "learning_rate": 3.3280758545608097e-06, + "loss": 0.5691, + "step": 8052 + }, + { + "epoch": 1.1866895874263261, + "grad_norm": 0.6191446185112, + "learning_rate": 3.327710032393367e-06, + "loss": 0.5328, + "step": 8053 + }, + { + "epoch": 1.186836935166994, + "grad_norm": 0.5735570788383484, + "learning_rate": 3.327344190319886e-06, + "loss": 0.546, + "step": 8054 + }, + { + "epoch": 1.186984282907662, + "grad_norm": 0.5893142223358154, + "learning_rate": 3.326978328349166e-06, + "loss": 0.5492, + "step": 8055 + }, + { + "epoch": 1.18713163064833, + "grad_norm": 0.6046679615974426, + "learning_rate": 3.3266124464900044e-06, + "loss": 0.5295, + "step": 8056 + }, + { + "epoch": 1.187278978388998, + "grad_norm": 0.586901843547821, + "learning_rate": 3.3262465447512016e-06, + "loss": 0.5118, + "step": 8057 + }, + { + "epoch": 1.187426326129666, + "grad_norm": 0.6722532510757446, + "learning_rate": 3.3258806231415565e-06, + "loss": 0.5618, + "step": 8058 + }, + { + "epoch": 1.187573673870334, + "grad_norm": 0.5995239615440369, + "learning_rate": 3.3255146816698696e-06, + "loss": 0.5588, + "step": 8059 + }, + { + "epoch": 1.187721021611002, + "grad_norm": 0.6364059448242188, + "learning_rate": 3.325148720344942e-06, + "loss": 0.5169, + "step": 8060 + }, + { + "epoch": 1.18786836935167, + "grad_norm": 0.599299967288971, + "learning_rate": 3.3247827391755748e-06, + "loss": 0.5639, + "step": 8061 + }, + { + "epoch": 1.188015717092338, + "grad_norm": 0.5836760997772217, + "learning_rate": 3.32441673817057e-06, + "loss": 0.5495, + "step": 8062 + }, + { + "epoch": 1.188163064833006, + "grad_norm": 0.6590287089347839, + "learning_rate": 3.324050717338729e-06, + "loss": 0.557, + "step": 8063 + }, + { + "epoch": 1.1883104125736739, + "grad_norm": 0.5486404895782471, + "learning_rate": 3.323684676688854e-06, + "loss": 0.5145, + "step": 8064 + }, + { + "epoch": 1.1884577603143418, + "grad_norm": 0.5724408030509949, + "learning_rate": 3.32331861622975e-06, + "loss": 0.5521, + "step": 8065 + }, + { + "epoch": 1.1886051080550097, + "grad_norm": 0.5980024933815002, + "learning_rate": 3.322952535970218e-06, + "loss": 0.5412, + "step": 8066 + }, + { + "epoch": 1.1887524557956777, + "grad_norm": 0.5964024066925049, + "learning_rate": 3.3225864359190644e-06, + "loss": 0.5177, + "step": 8067 + }, + { + "epoch": 1.1888998035363458, + "grad_norm": 0.5773582458496094, + "learning_rate": 3.322220316085093e-06, + "loss": 0.5183, + "step": 8068 + }, + { + "epoch": 1.1890471512770138, + "grad_norm": 0.5854175090789795, + "learning_rate": 3.3218541764771084e-06, + "loss": 0.5666, + "step": 8069 + }, + { + "epoch": 1.1891944990176817, + "grad_norm": 0.5972598195075989, + "learning_rate": 3.3214880171039165e-06, + "loss": 0.5284, + "step": 8070 + }, + { + "epoch": 1.1893418467583496, + "grad_norm": 0.5828613042831421, + "learning_rate": 3.3211218379743227e-06, + "loss": 0.5363, + "step": 8071 + }, + { + "epoch": 1.1894891944990178, + "grad_norm": 0.5976222157478333, + "learning_rate": 3.320755639097134e-06, + "loss": 0.5442, + "step": 8072 + }, + { + "epoch": 1.1896365422396857, + "grad_norm": 0.6006333231925964, + "learning_rate": 3.3203894204811572e-06, + "loss": 0.5242, + "step": 8073 + }, + { + "epoch": 1.1897838899803537, + "grad_norm": 0.6307390928268433, + "learning_rate": 3.3200231821351996e-06, + "loss": 0.5375, + "step": 8074 + }, + { + "epoch": 1.1899312377210216, + "grad_norm": 0.5971204042434692, + "learning_rate": 3.3196569240680694e-06, + "loss": 0.5132, + "step": 8075 + }, + { + "epoch": 1.1900785854616895, + "grad_norm": 0.5997664332389832, + "learning_rate": 3.3192906462885743e-06, + "loss": 0.5015, + "step": 8076 + }, + { + "epoch": 1.1902259332023575, + "grad_norm": 0.5958148241043091, + "learning_rate": 3.3189243488055236e-06, + "loss": 0.5345, + "step": 8077 + }, + { + "epoch": 1.1903732809430256, + "grad_norm": 0.6030668020248413, + "learning_rate": 3.3185580316277266e-06, + "loss": 0.5338, + "step": 8078 + }, + { + "epoch": 1.1905206286836936, + "grad_norm": 0.6004279255867004, + "learning_rate": 3.3181916947639924e-06, + "loss": 0.5089, + "step": 8079 + }, + { + "epoch": 1.1906679764243615, + "grad_norm": 0.5887733697891235, + "learning_rate": 3.3178253382231322e-06, + "loss": 0.5188, + "step": 8080 + }, + { + "epoch": 1.1908153241650294, + "grad_norm": 0.6217314600944519, + "learning_rate": 3.3174589620139557e-06, + "loss": 0.5467, + "step": 8081 + }, + { + "epoch": 1.1909626719056974, + "grad_norm": 0.5866604447364807, + "learning_rate": 3.3170925661452745e-06, + "loss": 0.5253, + "step": 8082 + }, + { + "epoch": 1.1911100196463655, + "grad_norm": 0.6091462969779968, + "learning_rate": 3.3167261506259007e-06, + "loss": 0.5067, + "step": 8083 + }, + { + "epoch": 1.1912573673870335, + "grad_norm": 0.5571064949035645, + "learning_rate": 3.3163597154646452e-06, + "loss": 0.5359, + "step": 8084 + }, + { + "epoch": 1.1914047151277014, + "grad_norm": 0.6134982705116272, + "learning_rate": 3.3159932606703226e-06, + "loss": 0.524, + "step": 8085 + }, + { + "epoch": 1.1915520628683693, + "grad_norm": 0.6024724245071411, + "learning_rate": 3.3156267862517446e-06, + "loss": 0.5566, + "step": 8086 + }, + { + "epoch": 1.1916994106090373, + "grad_norm": 0.570766806602478, + "learning_rate": 3.3152602922177245e-06, + "loss": 0.5501, + "step": 8087 + }, + { + "epoch": 1.1918467583497052, + "grad_norm": 0.5842997431755066, + "learning_rate": 3.3148937785770767e-06, + "loss": 0.5471, + "step": 8088 + }, + { + "epoch": 1.1919941060903734, + "grad_norm": 0.6167703866958618, + "learning_rate": 3.314527245338616e-06, + "loss": 0.5054, + "step": 8089 + }, + { + "epoch": 1.1921414538310413, + "grad_norm": 0.64316725730896, + "learning_rate": 3.3141606925111574e-06, + "loss": 0.5899, + "step": 8090 + }, + { + "epoch": 1.1922888015717092, + "grad_norm": 0.587127149105072, + "learning_rate": 3.3137941201035157e-06, + "loss": 0.5655, + "step": 8091 + }, + { + "epoch": 1.1924361493123772, + "grad_norm": 0.5922662615776062, + "learning_rate": 3.313427528124507e-06, + "loss": 0.52, + "step": 8092 + }, + { + "epoch": 1.1925834970530451, + "grad_norm": 0.6193089485168457, + "learning_rate": 3.313060916582948e-06, + "loss": 0.5146, + "step": 8093 + }, + { + "epoch": 1.1927308447937133, + "grad_norm": 0.6488621830940247, + "learning_rate": 3.3126942854876546e-06, + "loss": 0.5153, + "step": 8094 + }, + { + "epoch": 1.1928781925343812, + "grad_norm": 0.6229239106178284, + "learning_rate": 3.312327634847446e-06, + "loss": 0.5596, + "step": 8095 + }, + { + "epoch": 1.1930255402750491, + "grad_norm": 0.639250636100769, + "learning_rate": 3.311960964671138e-06, + "loss": 0.5556, + "step": 8096 + }, + { + "epoch": 1.193172888015717, + "grad_norm": 0.5867592096328735, + "learning_rate": 3.31159427496755e-06, + "loss": 0.5602, + "step": 8097 + }, + { + "epoch": 1.193320235756385, + "grad_norm": 0.6176556348800659, + "learning_rate": 3.3112275657454996e-06, + "loss": 0.5355, + "step": 8098 + }, + { + "epoch": 1.193467583497053, + "grad_norm": 0.5786208510398865, + "learning_rate": 3.3108608370138075e-06, + "loss": 0.5706, + "step": 8099 + }, + { + "epoch": 1.193614931237721, + "grad_norm": 0.628322958946228, + "learning_rate": 3.3104940887812927e-06, + "loss": 0.5294, + "step": 8100 + }, + { + "epoch": 1.193762278978389, + "grad_norm": 0.5925549268722534, + "learning_rate": 3.3101273210567747e-06, + "loss": 0.5855, + "step": 8101 + }, + { + "epoch": 1.193909626719057, + "grad_norm": 0.6052160263061523, + "learning_rate": 3.3097605338490747e-06, + "loss": 0.55, + "step": 8102 + }, + { + "epoch": 1.194056974459725, + "grad_norm": 0.6063608527183533, + "learning_rate": 3.3093937271670135e-06, + "loss": 0.5667, + "step": 8103 + }, + { + "epoch": 1.1942043222003929, + "grad_norm": 0.6651962399482727, + "learning_rate": 3.3090269010194132e-06, + "loss": 0.5465, + "step": 8104 + }, + { + "epoch": 1.194351669941061, + "grad_norm": 0.6060234904289246, + "learning_rate": 3.308660055415095e-06, + "loss": 0.5209, + "step": 8105 + }, + { + "epoch": 1.194499017681729, + "grad_norm": 0.5817711353302002, + "learning_rate": 3.3082931903628823e-06, + "loss": 0.5527, + "step": 8106 + }, + { + "epoch": 1.1946463654223969, + "grad_norm": 0.6295194029808044, + "learning_rate": 3.307926305871597e-06, + "loss": 0.5215, + "step": 8107 + }, + { + "epoch": 1.1947937131630648, + "grad_norm": 0.5867851376533508, + "learning_rate": 3.307559401950063e-06, + "loss": 0.4898, + "step": 8108 + }, + { + "epoch": 1.1949410609037328, + "grad_norm": 0.6370291709899902, + "learning_rate": 3.307192478607104e-06, + "loss": 0.537, + "step": 8109 + }, + { + "epoch": 1.1950884086444007, + "grad_norm": 0.6339362859725952, + "learning_rate": 3.306825535851545e-06, + "loss": 0.5401, + "step": 8110 + }, + { + "epoch": 1.1952357563850688, + "grad_norm": 0.616044819355011, + "learning_rate": 3.30645857369221e-06, + "loss": 0.5126, + "step": 8111 + }, + { + "epoch": 1.1953831041257368, + "grad_norm": 0.6309270262718201, + "learning_rate": 3.3060915921379245e-06, + "loss": 0.5406, + "step": 8112 + }, + { + "epoch": 1.1955304518664047, + "grad_norm": 0.5876346230506897, + "learning_rate": 3.3057245911975144e-06, + "loss": 0.5593, + "step": 8113 + }, + { + "epoch": 1.1956777996070727, + "grad_norm": 0.5983748435974121, + "learning_rate": 3.3053575708798057e-06, + "loss": 0.5266, + "step": 8114 + }, + { + "epoch": 1.1958251473477406, + "grad_norm": 0.6968710422515869, + "learning_rate": 3.304990531193625e-06, + "loss": 0.5574, + "step": 8115 + }, + { + "epoch": 1.1959724950884087, + "grad_norm": 0.587539792060852, + "learning_rate": 3.3046234721478006e-06, + "loss": 0.5597, + "step": 8116 + }, + { + "epoch": 1.1961198428290767, + "grad_norm": 0.5635225176811218, + "learning_rate": 3.304256393751158e-06, + "loss": 0.519, + "step": 8117 + }, + { + "epoch": 1.1962671905697446, + "grad_norm": 0.5782319903373718, + "learning_rate": 3.3038892960125267e-06, + "loss": 0.5408, + "step": 8118 + }, + { + "epoch": 1.1964145383104126, + "grad_norm": 0.562759280204773, + "learning_rate": 3.3035221789407347e-06, + "loss": 0.5584, + "step": 8119 + }, + { + "epoch": 1.1965618860510805, + "grad_norm": 0.6382114887237549, + "learning_rate": 3.3031550425446114e-06, + "loss": 0.5537, + "step": 8120 + }, + { + "epoch": 1.1967092337917484, + "grad_norm": 0.5673182010650635, + "learning_rate": 3.3027878868329856e-06, + "loss": 0.5431, + "step": 8121 + }, + { + "epoch": 1.1968565815324166, + "grad_norm": 0.602700412273407, + "learning_rate": 3.3024207118146883e-06, + "loss": 0.5405, + "step": 8122 + }, + { + "epoch": 1.1970039292730845, + "grad_norm": 0.5709049701690674, + "learning_rate": 3.302053517498549e-06, + "loss": 0.5412, + "step": 8123 + }, + { + "epoch": 1.1971512770137525, + "grad_norm": 0.6310555338859558, + "learning_rate": 3.3016863038933987e-06, + "loss": 0.5399, + "step": 8124 + }, + { + "epoch": 1.1972986247544204, + "grad_norm": 0.5707908272743225, + "learning_rate": 3.3013190710080687e-06, + "loss": 0.5044, + "step": 8125 + }, + { + "epoch": 1.1974459724950883, + "grad_norm": 0.6507416367530823, + "learning_rate": 3.300951818851391e-06, + "loss": 0.5547, + "step": 8126 + }, + { + "epoch": 1.1975933202357565, + "grad_norm": 0.61029052734375, + "learning_rate": 3.300584547432198e-06, + "loss": 0.5184, + "step": 8127 + }, + { + "epoch": 1.1977406679764244, + "grad_norm": 0.6117971539497375, + "learning_rate": 3.3002172567593215e-06, + "loss": 0.5198, + "step": 8128 + }, + { + "epoch": 1.1978880157170924, + "grad_norm": 0.575388491153717, + "learning_rate": 3.2998499468415957e-06, + "loss": 0.5596, + "step": 8129 + }, + { + "epoch": 1.1980353634577603, + "grad_norm": 0.5648589134216309, + "learning_rate": 3.2994826176878537e-06, + "loss": 0.5346, + "step": 8130 + }, + { + "epoch": 1.1981827111984282, + "grad_norm": 0.6069909930229187, + "learning_rate": 3.29911526930693e-06, + "loss": 0.5526, + "step": 8131 + }, + { + "epoch": 1.1983300589390962, + "grad_norm": 0.5922430157661438, + "learning_rate": 3.2987479017076586e-06, + "loss": 0.5149, + "step": 8132 + }, + { + "epoch": 1.1984774066797643, + "grad_norm": 0.5596959590911865, + "learning_rate": 3.298380514898875e-06, + "loss": 0.5601, + "step": 8133 + }, + { + "epoch": 1.1986247544204323, + "grad_norm": 0.6166183352470398, + "learning_rate": 3.298013108889414e-06, + "loss": 0.5362, + "step": 8134 + }, + { + "epoch": 1.1987721021611002, + "grad_norm": 0.6120092272758484, + "learning_rate": 3.297645683688113e-06, + "loss": 0.535, + "step": 8135 + }, + { + "epoch": 1.1989194499017681, + "grad_norm": 0.5818096399307251, + "learning_rate": 3.2972782393038067e-06, + "loss": 0.5492, + "step": 8136 + }, + { + "epoch": 1.199066797642436, + "grad_norm": 0.6200100183486938, + "learning_rate": 3.2969107757453326e-06, + "loss": 0.538, + "step": 8137 + }, + { + "epoch": 1.1992141453831042, + "grad_norm": 0.5902450084686279, + "learning_rate": 3.2965432930215285e-06, + "loss": 0.5244, + "step": 8138 + }, + { + "epoch": 1.1993614931237722, + "grad_norm": 0.5914726257324219, + "learning_rate": 3.296175791141231e-06, + "loss": 0.5297, + "step": 8139 + }, + { + "epoch": 1.19950884086444, + "grad_norm": 0.6001551747322083, + "learning_rate": 3.29580827011328e-06, + "loss": 0.53, + "step": 8140 + }, + { + "epoch": 1.199656188605108, + "grad_norm": 0.6004728078842163, + "learning_rate": 3.295440729946513e-06, + "loss": 0.5417, + "step": 8141 + }, + { + "epoch": 1.199803536345776, + "grad_norm": 0.571120023727417, + "learning_rate": 3.2950731706497694e-06, + "loss": 0.4999, + "step": 8142 + }, + { + "epoch": 1.199950884086444, + "grad_norm": 0.6273941993713379, + "learning_rate": 3.294705592231889e-06, + "loss": 0.5343, + "step": 8143 + }, + { + "epoch": 1.200098231827112, + "grad_norm": 0.6019561290740967, + "learning_rate": 3.294337994701713e-06, + "loss": 0.4935, + "step": 8144 + }, + { + "epoch": 1.20024557956778, + "grad_norm": 0.6037889122962952, + "learning_rate": 3.2939703780680795e-06, + "loss": 0.5469, + "step": 8145 + }, + { + "epoch": 1.200392927308448, + "grad_norm": 0.5623909831047058, + "learning_rate": 3.2936027423398314e-06, + "loss": 0.5277, + "step": 8146 + }, + { + "epoch": 1.2005402750491159, + "grad_norm": 0.6785317659378052, + "learning_rate": 3.2932350875258097e-06, + "loss": 0.5268, + "step": 8147 + }, + { + "epoch": 1.2006876227897838, + "grad_norm": 0.6004557609558105, + "learning_rate": 3.292867413634856e-06, + "loss": 0.5266, + "step": 8148 + }, + { + "epoch": 1.200834970530452, + "grad_norm": 0.6004703044891357, + "learning_rate": 3.2924997206758125e-06, + "loss": 0.5453, + "step": 8149 + }, + { + "epoch": 1.20098231827112, + "grad_norm": 0.6086671948432922, + "learning_rate": 3.2921320086575227e-06, + "loss": 0.5444, + "step": 8150 + }, + { + "epoch": 1.2011296660117878, + "grad_norm": 0.6924008727073669, + "learning_rate": 3.2917642775888292e-06, + "loss": 0.5473, + "step": 8151 + }, + { + "epoch": 1.2012770137524558, + "grad_norm": 0.5688605904579163, + "learning_rate": 3.291396527478577e-06, + "loss": 0.5192, + "step": 8152 + }, + { + "epoch": 1.2014243614931237, + "grad_norm": 0.603165864944458, + "learning_rate": 3.2910287583356087e-06, + "loss": 0.5248, + "step": 8153 + }, + { + "epoch": 1.2015717092337916, + "grad_norm": 0.5993923544883728, + "learning_rate": 3.2906609701687702e-06, + "loss": 0.5011, + "step": 8154 + }, + { + "epoch": 1.2017190569744598, + "grad_norm": 0.6148019433021545, + "learning_rate": 3.290293162986906e-06, + "loss": 0.5236, + "step": 8155 + }, + { + "epoch": 1.2018664047151277, + "grad_norm": 0.5916516780853271, + "learning_rate": 3.2899253367988622e-06, + "loss": 0.5584, + "step": 8156 + }, + { + "epoch": 1.2020137524557957, + "grad_norm": 0.6295434832572937, + "learning_rate": 3.2895574916134842e-06, + "loss": 0.5356, + "step": 8157 + }, + { + "epoch": 1.2021611001964636, + "grad_norm": 0.6716651320457458, + "learning_rate": 3.2891896274396185e-06, + "loss": 0.5468, + "step": 8158 + }, + { + "epoch": 1.2023084479371315, + "grad_norm": 0.6179236173629761, + "learning_rate": 3.288821744286113e-06, + "loss": 0.51, + "step": 8159 + }, + { + "epoch": 1.2024557956777997, + "grad_norm": 0.5742987990379333, + "learning_rate": 3.2884538421618146e-06, + "loss": 0.5445, + "step": 8160 + }, + { + "epoch": 1.2026031434184676, + "grad_norm": 0.6019303798675537, + "learning_rate": 3.288085921075571e-06, + "loss": 0.5161, + "step": 8161 + }, + { + "epoch": 1.2027504911591356, + "grad_norm": 0.5866471529006958, + "learning_rate": 3.2877179810362308e-06, + "loss": 0.547, + "step": 8162 + }, + { + "epoch": 1.2028978388998035, + "grad_norm": 0.6498085260391235, + "learning_rate": 3.287350022052642e-06, + "loss": 0.522, + "step": 8163 + }, + { + "epoch": 1.2030451866404714, + "grad_norm": 0.5885424613952637, + "learning_rate": 3.286982044133655e-06, + "loss": 0.5482, + "step": 8164 + }, + { + "epoch": 1.2031925343811394, + "grad_norm": 0.5913601517677307, + "learning_rate": 3.2866140472881194e-06, + "loss": 0.4972, + "step": 8165 + }, + { + "epoch": 1.2033398821218075, + "grad_norm": 0.578863263130188, + "learning_rate": 3.286246031524884e-06, + "loss": 0.5207, + "step": 8166 + }, + { + "epoch": 1.2034872298624755, + "grad_norm": 0.5864518284797668, + "learning_rate": 3.2858779968528008e-06, + "loss": 0.5321, + "step": 8167 + }, + { + "epoch": 1.2036345776031434, + "grad_norm": 0.5934474468231201, + "learning_rate": 3.28550994328072e-06, + "loss": 0.5119, + "step": 8168 + }, + { + "epoch": 1.2037819253438113, + "grad_norm": 0.5855165719985962, + "learning_rate": 3.2851418708174937e-06, + "loss": 0.4959, + "step": 8169 + }, + { + "epoch": 1.2039292730844793, + "grad_norm": 0.6204361319541931, + "learning_rate": 3.2847737794719737e-06, + "loss": 0.5439, + "step": 8170 + }, + { + "epoch": 1.2040766208251474, + "grad_norm": 0.6089562773704529, + "learning_rate": 3.284405669253013e-06, + "loss": 0.5496, + "step": 8171 + }, + { + "epoch": 1.2042239685658154, + "grad_norm": 0.6154527068138123, + "learning_rate": 3.2840375401694634e-06, + "loss": 0.5396, + "step": 8172 + }, + { + "epoch": 1.2043713163064833, + "grad_norm": 0.650314211845398, + "learning_rate": 3.283669392230178e-06, + "loss": 0.5179, + "step": 8173 + }, + { + "epoch": 1.2045186640471512, + "grad_norm": 0.6013843417167664, + "learning_rate": 3.283301225444012e-06, + "loss": 0.5498, + "step": 8174 + }, + { + "epoch": 1.2046660117878192, + "grad_norm": 0.6138674020767212, + "learning_rate": 3.282933039819819e-06, + "loss": 0.5308, + "step": 8175 + }, + { + "epoch": 1.2048133595284871, + "grad_norm": 0.5823964476585388, + "learning_rate": 3.282564835366453e-06, + "loss": 0.5373, + "step": 8176 + }, + { + "epoch": 1.2049607072691553, + "grad_norm": 0.5930694341659546, + "learning_rate": 3.2821966120927697e-06, + "loss": 0.5089, + "step": 8177 + }, + { + "epoch": 1.2051080550098232, + "grad_norm": 0.589938759803772, + "learning_rate": 3.2818283700076253e-06, + "loss": 0.5509, + "step": 8178 + }, + { + "epoch": 1.2052554027504911, + "grad_norm": 0.6375407576560974, + "learning_rate": 3.281460109119875e-06, + "loss": 0.508, + "step": 8179 + }, + { + "epoch": 1.205402750491159, + "grad_norm": 0.615541398525238, + "learning_rate": 3.281091829438376e-06, + "loss": 0.51, + "step": 8180 + }, + { + "epoch": 1.205550098231827, + "grad_norm": 0.6175752878189087, + "learning_rate": 3.280723530971984e-06, + "loss": 0.5362, + "step": 8181 + }, + { + "epoch": 1.2056974459724952, + "grad_norm": 0.646772027015686, + "learning_rate": 3.2803552137295584e-06, + "loss": 0.5723, + "step": 8182 + }, + { + "epoch": 1.205844793713163, + "grad_norm": 0.5781505107879639, + "learning_rate": 3.2799868777199552e-06, + "loss": 0.5544, + "step": 8183 + }, + { + "epoch": 1.205992141453831, + "grad_norm": 0.5642465949058533, + "learning_rate": 3.2796185229520337e-06, + "loss": 0.5356, + "step": 8184 + }, + { + "epoch": 1.206139489194499, + "grad_norm": 0.6094475984573364, + "learning_rate": 3.2792501494346524e-06, + "loss": 0.5677, + "step": 8185 + }, + { + "epoch": 1.206286836935167, + "grad_norm": 0.5904058814048767, + "learning_rate": 3.2788817571766706e-06, + "loss": 0.5515, + "step": 8186 + }, + { + "epoch": 1.2064341846758349, + "grad_norm": 0.5853831768035889, + "learning_rate": 3.2785133461869473e-06, + "loss": 0.5346, + "step": 8187 + }, + { + "epoch": 1.206581532416503, + "grad_norm": 0.5655584931373596, + "learning_rate": 3.278144916474344e-06, + "loss": 0.5143, + "step": 8188 + }, + { + "epoch": 1.206728880157171, + "grad_norm": 0.5929807424545288, + "learning_rate": 3.2777764680477197e-06, + "loss": 0.5216, + "step": 8189 + }, + { + "epoch": 1.2068762278978389, + "grad_norm": 0.5855666995048523, + "learning_rate": 3.2774080009159365e-06, + "loss": 0.5356, + "step": 8190 + }, + { + "epoch": 1.2070235756385068, + "grad_norm": 0.6118473410606384, + "learning_rate": 3.2770395150878555e-06, + "loss": 0.4866, + "step": 8191 + }, + { + "epoch": 1.207170923379175, + "grad_norm": 0.5936675667762756, + "learning_rate": 3.276671010572338e-06, + "loss": 0.5435, + "step": 8192 + }, + { + "epoch": 1.207318271119843, + "grad_norm": 0.6185727119445801, + "learning_rate": 3.2763024873782477e-06, + "loss": 0.5264, + "step": 8193 + }, + { + "epoch": 1.2074656188605108, + "grad_norm": 0.6052880883216858, + "learning_rate": 3.2759339455144468e-06, + "loss": 0.5168, + "step": 8194 + }, + { + "epoch": 1.2076129666011788, + "grad_norm": 0.575303852558136, + "learning_rate": 3.2755653849897976e-06, + "loss": 0.5709, + "step": 8195 + }, + { + "epoch": 1.2077603143418467, + "grad_norm": 0.6146560907363892, + "learning_rate": 3.2751968058131658e-06, + "loss": 0.5207, + "step": 8196 + }, + { + "epoch": 1.2079076620825147, + "grad_norm": 0.6091229915618896, + "learning_rate": 3.2748282079934137e-06, + "loss": 0.4789, + "step": 8197 + }, + { + "epoch": 1.2080550098231826, + "grad_norm": 0.59053635597229, + "learning_rate": 3.274459591539406e-06, + "loss": 0.5143, + "step": 8198 + }, + { + "epoch": 1.2082023575638507, + "grad_norm": 0.6134876012802124, + "learning_rate": 3.274090956460009e-06, + "loss": 0.5451, + "step": 8199 + }, + { + "epoch": 1.2083497053045187, + "grad_norm": 0.6067270636558533, + "learning_rate": 3.273722302764088e-06, + "loss": 0.5808, + "step": 8200 + }, + { + "epoch": 1.2084970530451866, + "grad_norm": 0.5801178812980652, + "learning_rate": 3.2733536304605085e-06, + "loss": 0.5479, + "step": 8201 + }, + { + "epoch": 1.2086444007858546, + "grad_norm": 0.5727700591087341, + "learning_rate": 3.2729849395581366e-06, + "loss": 0.5303, + "step": 8202 + }, + { + "epoch": 1.2087917485265227, + "grad_norm": 0.5994399785995483, + "learning_rate": 3.2726162300658395e-06, + "loss": 0.502, + "step": 8203 + }, + { + "epoch": 1.2089390962671906, + "grad_norm": 0.5938941836357117, + "learning_rate": 3.2722475019924838e-06, + "loss": 0.532, + "step": 8204 + }, + { + "epoch": 1.2090864440078586, + "grad_norm": 0.6308533549308777, + "learning_rate": 3.271878755346939e-06, + "loss": 0.5671, + "step": 8205 + }, + { + "epoch": 1.2092337917485265, + "grad_norm": 0.5565633773803711, + "learning_rate": 3.271509990138071e-06, + "loss": 0.559, + "step": 8206 + }, + { + "epoch": 1.2093811394891945, + "grad_norm": 0.6291263699531555, + "learning_rate": 3.2711412063747503e-06, + "loss": 0.5396, + "step": 8207 + }, + { + "epoch": 1.2095284872298624, + "grad_norm": 0.5662286877632141, + "learning_rate": 3.270772404065845e-06, + "loss": 0.5017, + "step": 8208 + }, + { + "epoch": 1.2096758349705303, + "grad_norm": 0.5920021533966064, + "learning_rate": 3.2704035832202252e-06, + "loss": 0.544, + "step": 8209 + }, + { + "epoch": 1.2098231827111985, + "grad_norm": 0.5885125994682312, + "learning_rate": 3.2700347438467606e-06, + "loss": 0.5522, + "step": 8210 + }, + { + "epoch": 1.2099705304518664, + "grad_norm": 0.5994765162467957, + "learning_rate": 3.269665885954322e-06, + "loss": 0.5484, + "step": 8211 + }, + { + "epoch": 1.2101178781925344, + "grad_norm": 0.6574380397796631, + "learning_rate": 3.2692970095517783e-06, + "loss": 0.512, + "step": 8212 + }, + { + "epoch": 1.2102652259332023, + "grad_norm": 0.6463035345077515, + "learning_rate": 3.2689281146480034e-06, + "loss": 0.5453, + "step": 8213 + }, + { + "epoch": 1.2104125736738705, + "grad_norm": 0.5693427324295044, + "learning_rate": 3.2685592012518687e-06, + "loss": 0.5409, + "step": 8214 + }, + { + "epoch": 1.2105599214145384, + "grad_norm": 0.5985028743743896, + "learning_rate": 3.2681902693722444e-06, + "loss": 0.5216, + "step": 8215 + }, + { + "epoch": 1.2107072691552063, + "grad_norm": 0.5996715426445007, + "learning_rate": 3.267821319018005e-06, + "loss": 0.5114, + "step": 8216 + }, + { + "epoch": 1.2108546168958743, + "grad_norm": 0.5742551684379578, + "learning_rate": 3.2674523501980225e-06, + "loss": 0.5289, + "step": 8217 + }, + { + "epoch": 1.2110019646365422, + "grad_norm": 0.5918591022491455, + "learning_rate": 3.2670833629211716e-06, + "loss": 0.5036, + "step": 8218 + }, + { + "epoch": 1.2111493123772101, + "grad_norm": 0.5649529695510864, + "learning_rate": 3.2667143571963256e-06, + "loss": 0.5335, + "step": 8219 + }, + { + "epoch": 1.2112966601178783, + "grad_norm": 0.573296844959259, + "learning_rate": 3.2663453330323588e-06, + "loss": 0.5383, + "step": 8220 + }, + { + "epoch": 1.2114440078585462, + "grad_norm": 0.634159505367279, + "learning_rate": 3.265976290438146e-06, + "loss": 0.5231, + "step": 8221 + }, + { + "epoch": 1.2115913555992142, + "grad_norm": 0.5939257144927979, + "learning_rate": 3.2656072294225627e-06, + "loss": 0.5211, + "step": 8222 + }, + { + "epoch": 1.211738703339882, + "grad_norm": 0.5691532492637634, + "learning_rate": 3.2652381499944848e-06, + "loss": 0.559, + "step": 8223 + }, + { + "epoch": 1.21188605108055, + "grad_norm": 0.6086956262588501, + "learning_rate": 3.264869052162788e-06, + "loss": 0.5365, + "step": 8224 + }, + { + "epoch": 1.2120333988212182, + "grad_norm": 0.6279048919677734, + "learning_rate": 3.2644999359363493e-06, + "loss": 0.5227, + "step": 8225 + }, + { + "epoch": 1.2121807465618861, + "grad_norm": 0.5823432207107544, + "learning_rate": 3.2641308013240456e-06, + "loss": 0.5148, + "step": 8226 + }, + { + "epoch": 1.212328094302554, + "grad_norm": 0.5947549939155579, + "learning_rate": 3.2637616483347552e-06, + "loss": 0.5541, + "step": 8227 + }, + { + "epoch": 1.212475442043222, + "grad_norm": 0.59806889295578, + "learning_rate": 3.2633924769773547e-06, + "loss": 0.523, + "step": 8228 + }, + { + "epoch": 1.21262278978389, + "grad_norm": 0.590999186038971, + "learning_rate": 3.2630232872607233e-06, + "loss": 0.5175, + "step": 8229 + }, + { + "epoch": 1.2127701375245579, + "grad_norm": 0.5806100964546204, + "learning_rate": 3.26265407919374e-06, + "loss": 0.5426, + "step": 8230 + }, + { + "epoch": 1.212917485265226, + "grad_norm": 0.585894763469696, + "learning_rate": 3.2622848527852827e-06, + "loss": 0.517, + "step": 8231 + }, + { + "epoch": 1.213064833005894, + "grad_norm": 0.5942673683166504, + "learning_rate": 3.2619156080442333e-06, + "loss": 0.5288, + "step": 8232 + }, + { + "epoch": 1.213212180746562, + "grad_norm": 0.5696855187416077, + "learning_rate": 3.261546344979471e-06, + "loss": 0.5519, + "step": 8233 + }, + { + "epoch": 1.2133595284872298, + "grad_norm": 0.5885685682296753, + "learning_rate": 3.2611770635998756e-06, + "loss": 0.5622, + "step": 8234 + }, + { + "epoch": 1.2135068762278978, + "grad_norm": 0.6184800267219543, + "learning_rate": 3.26080776391433e-06, + "loss": 0.537, + "step": 8235 + }, + { + "epoch": 1.213654223968566, + "grad_norm": 0.6197754740715027, + "learning_rate": 3.2604384459317135e-06, + "loss": 0.5607, + "step": 8236 + }, + { + "epoch": 1.2138015717092339, + "grad_norm": 0.5642370581626892, + "learning_rate": 3.2600691096609093e-06, + "loss": 0.5239, + "step": 8237 + }, + { + "epoch": 1.2139489194499018, + "grad_norm": 0.6008350253105164, + "learning_rate": 3.2596997551108e-06, + "loss": 0.4991, + "step": 8238 + }, + { + "epoch": 1.2140962671905697, + "grad_norm": 0.589034378528595, + "learning_rate": 3.2593303822902678e-06, + "loss": 0.5527, + "step": 8239 + }, + { + "epoch": 1.2142436149312377, + "grad_norm": 0.5847307443618774, + "learning_rate": 3.2589609912081953e-06, + "loss": 0.5399, + "step": 8240 + }, + { + "epoch": 1.2143909626719056, + "grad_norm": 0.5766740441322327, + "learning_rate": 3.258591581873467e-06, + "loss": 0.5612, + "step": 8241 + }, + { + "epoch": 1.2145383104125738, + "grad_norm": 0.5904167890548706, + "learning_rate": 3.258222154294968e-06, + "loss": 0.5423, + "step": 8242 + }, + { + "epoch": 1.2146856581532417, + "grad_norm": 0.5629783868789673, + "learning_rate": 3.2578527084815807e-06, + "loss": 0.5495, + "step": 8243 + }, + { + "epoch": 1.2148330058939096, + "grad_norm": 0.5782726407051086, + "learning_rate": 3.257483244442192e-06, + "loss": 0.575, + "step": 8244 + }, + { + "epoch": 1.2149803536345776, + "grad_norm": 0.577694833278656, + "learning_rate": 3.2571137621856856e-06, + "loss": 0.5096, + "step": 8245 + }, + { + "epoch": 1.2151277013752455, + "grad_norm": 0.5795260071754456, + "learning_rate": 3.2567442617209487e-06, + "loss": 0.5251, + "step": 8246 + }, + { + "epoch": 1.2152750491159137, + "grad_norm": 0.5790427923202515, + "learning_rate": 3.2563747430568683e-06, + "loss": 0.5433, + "step": 8247 + }, + { + "epoch": 1.2154223968565816, + "grad_norm": 0.5942614078521729, + "learning_rate": 3.256005206202329e-06, + "loss": 0.5537, + "step": 8248 + }, + { + "epoch": 1.2155697445972495, + "grad_norm": 0.5834277868270874, + "learning_rate": 3.255635651166219e-06, + "loss": 0.548, + "step": 8249 + }, + { + "epoch": 1.2157170923379175, + "grad_norm": 0.6058496832847595, + "learning_rate": 3.255266077957426e-06, + "loss": 0.5505, + "step": 8250 + }, + { + "epoch": 1.2158644400785854, + "grad_norm": 0.5825607180595398, + "learning_rate": 3.2548964865848383e-06, + "loss": 0.519, + "step": 8251 + }, + { + "epoch": 1.2160117878192533, + "grad_norm": 0.622333824634552, + "learning_rate": 3.2545268770573445e-06, + "loss": 0.5475, + "step": 8252 + }, + { + "epoch": 1.2161591355599215, + "grad_norm": 0.6518734097480774, + "learning_rate": 3.2541572493838325e-06, + "loss": 0.5332, + "step": 8253 + }, + { + "epoch": 1.2163064833005894, + "grad_norm": 0.6252349019050598, + "learning_rate": 3.253787603573193e-06, + "loss": 0.5023, + "step": 8254 + }, + { + "epoch": 1.2164538310412574, + "grad_norm": 0.5621390342712402, + "learning_rate": 3.253417939634315e-06, + "loss": 0.527, + "step": 8255 + }, + { + "epoch": 1.2166011787819253, + "grad_norm": 0.6075165867805481, + "learning_rate": 3.253048257576089e-06, + "loss": 0.5108, + "step": 8256 + }, + { + "epoch": 1.2167485265225932, + "grad_norm": 0.6206716299057007, + "learning_rate": 3.2526785574074053e-06, + "loss": 0.5299, + "step": 8257 + }, + { + "epoch": 1.2168958742632614, + "grad_norm": 0.6160521507263184, + "learning_rate": 3.252308839137156e-06, + "loss": 0.5588, + "step": 8258 + }, + { + "epoch": 1.2170432220039293, + "grad_norm": 0.626304030418396, + "learning_rate": 3.2519391027742312e-06, + "loss": 0.5536, + "step": 8259 + }, + { + "epoch": 1.2171905697445973, + "grad_norm": 0.5989513397216797, + "learning_rate": 3.251569348327524e-06, + "loss": 0.5504, + "step": 8260 + }, + { + "epoch": 1.2173379174852652, + "grad_norm": 0.6100839972496033, + "learning_rate": 3.2511995758059274e-06, + "loss": 0.5409, + "step": 8261 + }, + { + "epoch": 1.2174852652259331, + "grad_norm": 0.5750823020935059, + "learning_rate": 3.2508297852183323e-06, + "loss": 0.503, + "step": 8262 + }, + { + "epoch": 1.217632612966601, + "grad_norm": 0.5891824960708618, + "learning_rate": 3.250459976573633e-06, + "loss": 0.5259, + "step": 8263 + }, + { + "epoch": 1.2177799607072692, + "grad_norm": 0.5750365257263184, + "learning_rate": 3.2500901498807234e-06, + "loss": 0.5456, + "step": 8264 + }, + { + "epoch": 1.2179273084479372, + "grad_norm": 0.6240167617797852, + "learning_rate": 3.249720305148497e-06, + "loss": 0.5402, + "step": 8265 + }, + { + "epoch": 1.218074656188605, + "grad_norm": 0.5855587124824524, + "learning_rate": 3.2493504423858495e-06, + "loss": 0.5475, + "step": 8266 + }, + { + "epoch": 1.218222003929273, + "grad_norm": 0.5749973058700562, + "learning_rate": 3.2489805616016757e-06, + "loss": 0.5186, + "step": 8267 + }, + { + "epoch": 1.218369351669941, + "grad_norm": 0.57988440990448, + "learning_rate": 3.24861066280487e-06, + "loss": 0.5301, + "step": 8268 + }, + { + "epoch": 1.2185166994106091, + "grad_norm": 0.5791731476783752, + "learning_rate": 3.2482407460043286e-06, + "loss": 0.5168, + "step": 8269 + }, + { + "epoch": 1.218664047151277, + "grad_norm": 0.5848143100738525, + "learning_rate": 3.2478708112089486e-06, + "loss": 0.4769, + "step": 8270 + }, + { + "epoch": 1.218811394891945, + "grad_norm": 0.5828845500946045, + "learning_rate": 3.247500858427627e-06, + "loss": 0.5269, + "step": 8271 + }, + { + "epoch": 1.218958742632613, + "grad_norm": 0.6087738871574402, + "learning_rate": 3.24713088766926e-06, + "loss": 0.5803, + "step": 8272 + }, + { + "epoch": 1.2191060903732809, + "grad_norm": 0.6371464729309082, + "learning_rate": 3.2467608989427454e-06, + "loss": 0.5321, + "step": 8273 + }, + { + "epoch": 1.2192534381139488, + "grad_norm": 0.55734783411026, + "learning_rate": 3.2463908922569813e-06, + "loss": 0.5436, + "step": 8274 + }, + { + "epoch": 1.219400785854617, + "grad_norm": 0.585757315158844, + "learning_rate": 3.2460208676208666e-06, + "loss": 0.517, + "step": 8275 + }, + { + "epoch": 1.219548133595285, + "grad_norm": 0.5684197545051575, + "learning_rate": 3.2456508250433e-06, + "loss": 0.5634, + "step": 8276 + }, + { + "epoch": 1.2196954813359528, + "grad_norm": 0.5958117246627808, + "learning_rate": 3.2452807645331807e-06, + "loss": 0.5555, + "step": 8277 + }, + { + "epoch": 1.2198428290766208, + "grad_norm": 0.6269570589065552, + "learning_rate": 3.2449106860994083e-06, + "loss": 0.5208, + "step": 8278 + }, + { + "epoch": 1.2199901768172887, + "grad_norm": 0.6284036636352539, + "learning_rate": 3.2445405897508835e-06, + "loss": 0.5778, + "step": 8279 + }, + { + "epoch": 1.2201375245579569, + "grad_norm": 0.5849241614341736, + "learning_rate": 3.244170475496507e-06, + "loss": 0.5098, + "step": 8280 + }, + { + "epoch": 1.2202848722986248, + "grad_norm": 0.5885937809944153, + "learning_rate": 3.2438003433451793e-06, + "loss": 0.5591, + "step": 8281 + }, + { + "epoch": 1.2204322200392927, + "grad_norm": 0.569900393486023, + "learning_rate": 3.243430193305802e-06, + "loss": 0.5462, + "step": 8282 + }, + { + "epoch": 1.2205795677799607, + "grad_norm": 0.5797631740570068, + "learning_rate": 3.243060025387278e-06, + "loss": 0.4992, + "step": 8283 + }, + { + "epoch": 1.2207269155206286, + "grad_norm": 0.5893461108207703, + "learning_rate": 3.242689839598508e-06, + "loss": 0.5079, + "step": 8284 + }, + { + "epoch": 1.2208742632612966, + "grad_norm": 0.5700379610061646, + "learning_rate": 3.242319635948396e-06, + "loss": 0.5321, + "step": 8285 + }, + { + "epoch": 1.2210216110019647, + "grad_norm": 0.6092627644538879, + "learning_rate": 3.2419494144458457e-06, + "loss": 0.542, + "step": 8286 + }, + { + "epoch": 1.2211689587426326, + "grad_norm": 0.5868677496910095, + "learning_rate": 3.241579175099759e-06, + "loss": 0.5111, + "step": 8287 + }, + { + "epoch": 1.2213163064833006, + "grad_norm": 0.6349053382873535, + "learning_rate": 3.241208917919041e-06, + "loss": 0.536, + "step": 8288 + }, + { + "epoch": 1.2214636542239685, + "grad_norm": 0.6093603372573853, + "learning_rate": 3.2408386429125964e-06, + "loss": 0.5252, + "step": 8289 + }, + { + "epoch": 1.2216110019646365, + "grad_norm": 0.6282265782356262, + "learning_rate": 3.2404683500893297e-06, + "loss": 0.5622, + "step": 8290 + }, + { + "epoch": 1.2217583497053046, + "grad_norm": 0.5710651874542236, + "learning_rate": 3.2400980394581473e-06, + "loss": 0.5088, + "step": 8291 + }, + { + "epoch": 1.2219056974459725, + "grad_norm": 0.6068074107170105, + "learning_rate": 3.2397277110279533e-06, + "loss": 0.5521, + "step": 8292 + }, + { + "epoch": 1.2220530451866405, + "grad_norm": 0.581454336643219, + "learning_rate": 3.239357364807655e-06, + "loss": 0.5427, + "step": 8293 + }, + { + "epoch": 1.2222003929273084, + "grad_norm": 0.6125131249427795, + "learning_rate": 3.238987000806159e-06, + "loss": 0.5359, + "step": 8294 + }, + { + "epoch": 1.2223477406679764, + "grad_norm": 0.5927923321723938, + "learning_rate": 3.238616619032372e-06, + "loss": 0.5298, + "step": 8295 + }, + { + "epoch": 1.2224950884086443, + "grad_norm": 0.5942994356155396, + "learning_rate": 3.2382462194952024e-06, + "loss": 0.5619, + "step": 8296 + }, + { + "epoch": 1.2226424361493125, + "grad_norm": 0.5636431574821472, + "learning_rate": 3.237875802203557e-06, + "loss": 0.533, + "step": 8297 + }, + { + "epoch": 1.2227897838899804, + "grad_norm": 0.5847835540771484, + "learning_rate": 3.2375053671663446e-06, + "loss": 0.5728, + "step": 8298 + }, + { + "epoch": 1.2229371316306483, + "grad_norm": 0.6306821703910828, + "learning_rate": 3.237134914392474e-06, + "loss": 0.5276, + "step": 8299 + }, + { + "epoch": 1.2230844793713163, + "grad_norm": 0.5638253688812256, + "learning_rate": 3.2367644438908546e-06, + "loss": 0.5476, + "step": 8300 + }, + { + "epoch": 1.2232318271119842, + "grad_norm": 0.5730720162391663, + "learning_rate": 3.236393955670396e-06, + "loss": 0.5405, + "step": 8301 + }, + { + "epoch": 1.2233791748526524, + "grad_norm": 0.5613149404525757, + "learning_rate": 3.2360234497400076e-06, + "loss": 0.5222, + "step": 8302 + }, + { + "epoch": 1.2235265225933203, + "grad_norm": 0.5978950262069702, + "learning_rate": 3.235652926108601e-06, + "loss": 0.5667, + "step": 8303 + }, + { + "epoch": 1.2236738703339882, + "grad_norm": 0.5854465961456299, + "learning_rate": 3.2352823847850867e-06, + "loss": 0.5032, + "step": 8304 + }, + { + "epoch": 1.2238212180746562, + "grad_norm": 0.6004713177680969, + "learning_rate": 3.234911825778375e-06, + "loss": 0.5186, + "step": 8305 + }, + { + "epoch": 1.223968565815324, + "grad_norm": 0.5992001295089722, + "learning_rate": 3.2345412490973793e-06, + "loss": 0.5407, + "step": 8306 + }, + { + "epoch": 1.224115913555992, + "grad_norm": 0.6396079659461975, + "learning_rate": 3.234170654751011e-06, + "loss": 0.5347, + "step": 8307 + }, + { + "epoch": 1.2242632612966602, + "grad_norm": 0.5856828093528748, + "learning_rate": 3.233800042748183e-06, + "loss": 0.5319, + "step": 8308 + }, + { + "epoch": 1.2244106090373281, + "grad_norm": 0.5976799726486206, + "learning_rate": 3.233429413097808e-06, + "loss": 0.5652, + "step": 8309 + }, + { + "epoch": 1.224557956777996, + "grad_norm": 0.6064508557319641, + "learning_rate": 3.2330587658088e-06, + "loss": 0.5391, + "step": 8310 + }, + { + "epoch": 1.224705304518664, + "grad_norm": 0.5870406031608582, + "learning_rate": 3.232688100890071e-06, + "loss": 0.5254, + "step": 8311 + }, + { + "epoch": 1.224852652259332, + "grad_norm": 0.5918562412261963, + "learning_rate": 3.2323174183505385e-06, + "loss": 0.5017, + "step": 8312 + }, + { + "epoch": 1.225, + "grad_norm": 0.6196325421333313, + "learning_rate": 3.2319467181991148e-06, + "loss": 0.5588, + "step": 8313 + }, + { + "epoch": 1.225147347740668, + "grad_norm": 0.617682933807373, + "learning_rate": 3.231576000444716e-06, + "loss": 0.557, + "step": 8314 + }, + { + "epoch": 1.225294695481336, + "grad_norm": 0.617497980594635, + "learning_rate": 3.2312052650962577e-06, + "loss": 0.5785, + "step": 8315 + }, + { + "epoch": 1.225442043222004, + "grad_norm": 0.596121072769165, + "learning_rate": 3.2308345121626548e-06, + "loss": 0.5288, + "step": 8316 + }, + { + "epoch": 1.2255893909626718, + "grad_norm": 0.6583458185195923, + "learning_rate": 3.230463741652826e-06, + "loss": 0.5529, + "step": 8317 + }, + { + "epoch": 1.2257367387033398, + "grad_norm": 0.6037209033966064, + "learning_rate": 3.230092953575686e-06, + "loss": 0.5319, + "step": 8318 + }, + { + "epoch": 1.225884086444008, + "grad_norm": 0.5644702315330505, + "learning_rate": 3.229722147940153e-06, + "loss": 0.5415, + "step": 8319 + }, + { + "epoch": 1.2260314341846759, + "grad_norm": 0.5786057114601135, + "learning_rate": 3.229351324755145e-06, + "loss": 0.5543, + "step": 8320 + }, + { + "epoch": 1.2261787819253438, + "grad_norm": 0.6157020330429077, + "learning_rate": 3.228980484029579e-06, + "loss": 0.5406, + "step": 8321 + }, + { + "epoch": 1.2263261296660117, + "grad_norm": 0.5863932371139526, + "learning_rate": 3.2286096257723754e-06, + "loss": 0.5284, + "step": 8322 + }, + { + "epoch": 1.2264734774066797, + "grad_norm": 0.5865944027900696, + "learning_rate": 3.2282387499924504e-06, + "loss": 0.4991, + "step": 8323 + }, + { + "epoch": 1.2266208251473478, + "grad_norm": 0.6083717942237854, + "learning_rate": 3.227867856698726e-06, + "loss": 0.5132, + "step": 8324 + }, + { + "epoch": 1.2267681728880158, + "grad_norm": 0.5751996636390686, + "learning_rate": 3.227496945900121e-06, + "loss": 0.5482, + "step": 8325 + }, + { + "epoch": 1.2269155206286837, + "grad_norm": 0.6158565878868103, + "learning_rate": 3.227126017605556e-06, + "loss": 0.5297, + "step": 8326 + }, + { + "epoch": 1.2270628683693516, + "grad_norm": 0.6102267503738403, + "learning_rate": 3.2267550718239504e-06, + "loss": 0.5251, + "step": 8327 + }, + { + "epoch": 1.2272102161100196, + "grad_norm": 0.5962485671043396, + "learning_rate": 3.226384108564228e-06, + "loss": 0.5372, + "step": 8328 + }, + { + "epoch": 1.2273575638506875, + "grad_norm": 0.6022315621376038, + "learning_rate": 3.226013127835307e-06, + "loss": 0.5069, + "step": 8329 + }, + { + "epoch": 1.2275049115913557, + "grad_norm": 0.6366091966629028, + "learning_rate": 3.225642129646111e-06, + "loss": 0.5547, + "step": 8330 + }, + { + "epoch": 1.2276522593320236, + "grad_norm": 0.5769565105438232, + "learning_rate": 3.225271114005562e-06, + "loss": 0.5214, + "step": 8331 + }, + { + "epoch": 1.2277996070726915, + "grad_norm": 0.5868831872940063, + "learning_rate": 3.2249000809225833e-06, + "loss": 0.5411, + "step": 8332 + }, + { + "epoch": 1.2279469548133595, + "grad_norm": 0.6072171926498413, + "learning_rate": 3.224529030406097e-06, + "loss": 0.5543, + "step": 8333 + }, + { + "epoch": 1.2280943025540276, + "grad_norm": 0.5760124921798706, + "learning_rate": 3.224157962465028e-06, + "loss": 0.5825, + "step": 8334 + }, + { + "epoch": 1.2282416502946956, + "grad_norm": 0.5974665880203247, + "learning_rate": 3.2237868771082996e-06, + "loss": 0.5305, + "step": 8335 + }, + { + "epoch": 1.2283889980353635, + "grad_norm": 0.592433512210846, + "learning_rate": 3.2234157743448358e-06, + "loss": 0.523, + "step": 8336 + }, + { + "epoch": 1.2285363457760314, + "grad_norm": 0.5840322375297546, + "learning_rate": 3.2230446541835625e-06, + "loss": 0.5232, + "step": 8337 + }, + { + "epoch": 1.2286836935166994, + "grad_norm": 0.5888426899909973, + "learning_rate": 3.2226735166334046e-06, + "loss": 0.5316, + "step": 8338 + }, + { + "epoch": 1.2288310412573673, + "grad_norm": 0.5537952184677124, + "learning_rate": 3.222302361703287e-06, + "loss": 0.5397, + "step": 8339 + }, + { + "epoch": 1.2289783889980352, + "grad_norm": 0.5864986777305603, + "learning_rate": 3.221931189402136e-06, + "loss": 0.5442, + "step": 8340 + }, + { + "epoch": 1.2291257367387034, + "grad_norm": 0.5972114205360413, + "learning_rate": 3.221559999738879e-06, + "loss": 0.5059, + "step": 8341 + }, + { + "epoch": 1.2292730844793713, + "grad_norm": 0.633348286151886, + "learning_rate": 3.2211887927224418e-06, + "loss": 0.5363, + "step": 8342 + }, + { + "epoch": 1.2294204322200393, + "grad_norm": 0.566013514995575, + "learning_rate": 3.2208175683617533e-06, + "loss": 0.5474, + "step": 8343 + }, + { + "epoch": 1.2295677799607072, + "grad_norm": 0.6005882620811462, + "learning_rate": 3.2204463266657398e-06, + "loss": 0.5605, + "step": 8344 + }, + { + "epoch": 1.2297151277013754, + "grad_norm": 0.638939380645752, + "learning_rate": 3.2200750676433306e-06, + "loss": 0.5392, + "step": 8345 + }, + { + "epoch": 1.2298624754420433, + "grad_norm": 0.5647721290588379, + "learning_rate": 3.2197037913034535e-06, + "loss": 0.5643, + "step": 8346 + }, + { + "epoch": 1.2300098231827112, + "grad_norm": 0.6161673069000244, + "learning_rate": 3.2193324976550375e-06, + "loss": 0.4987, + "step": 8347 + }, + { + "epoch": 1.2301571709233792, + "grad_norm": 0.5960752964019775, + "learning_rate": 3.2189611867070125e-06, + "loss": 0.5579, + "step": 8348 + }, + { + "epoch": 1.230304518664047, + "grad_norm": 0.6139745116233826, + "learning_rate": 3.2185898584683082e-06, + "loss": 0.5542, + "step": 8349 + }, + { + "epoch": 1.230451866404715, + "grad_norm": 0.5802680253982544, + "learning_rate": 3.2182185129478555e-06, + "loss": 0.5425, + "step": 8350 + }, + { + "epoch": 1.230599214145383, + "grad_norm": 0.5770519375801086, + "learning_rate": 3.217847150154584e-06, + "loss": 0.5254, + "step": 8351 + }, + { + "epoch": 1.2307465618860511, + "grad_norm": 0.5978107452392578, + "learning_rate": 3.217475770097425e-06, + "loss": 0.5117, + "step": 8352 + }, + { + "epoch": 1.230893909626719, + "grad_norm": 0.6281406283378601, + "learning_rate": 3.2171043727853108e-06, + "loss": 0.5691, + "step": 8353 + }, + { + "epoch": 1.231041257367387, + "grad_norm": 0.580350935459137, + "learning_rate": 3.216732958227172e-06, + "loss": 0.5268, + "step": 8354 + }, + { + "epoch": 1.231188605108055, + "grad_norm": 0.5643450021743774, + "learning_rate": 3.216361526431942e-06, + "loss": 0.5279, + "step": 8355 + }, + { + "epoch": 1.231335952848723, + "grad_norm": 0.5945702195167542, + "learning_rate": 3.2159900774085533e-06, + "loss": 0.5617, + "step": 8356 + }, + { + "epoch": 1.231483300589391, + "grad_norm": 0.607675313949585, + "learning_rate": 3.2156186111659393e-06, + "loss": 0.547, + "step": 8357 + }, + { + "epoch": 1.231630648330059, + "grad_norm": 0.6228494048118591, + "learning_rate": 3.2152471277130333e-06, + "loss": 0.5372, + "step": 8358 + }, + { + "epoch": 1.231777996070727, + "grad_norm": 0.5955049991607666, + "learning_rate": 3.214875627058769e-06, + "loss": 0.4876, + "step": 8359 + }, + { + "epoch": 1.2319253438113948, + "grad_norm": 0.6209683418273926, + "learning_rate": 3.2145041092120804e-06, + "loss": 0.5285, + "step": 8360 + }, + { + "epoch": 1.2320726915520628, + "grad_norm": 0.5974156260490417, + "learning_rate": 3.214132574181904e-06, + "loss": 0.5413, + "step": 8361 + }, + { + "epoch": 1.232220039292731, + "grad_norm": 0.6183992624282837, + "learning_rate": 3.213761021977174e-06, + "loss": 0.5431, + "step": 8362 + }, + { + "epoch": 1.2323673870333989, + "grad_norm": 0.5901743769645691, + "learning_rate": 3.2133894526068266e-06, + "loss": 0.5357, + "step": 8363 + }, + { + "epoch": 1.2325147347740668, + "grad_norm": 0.5962323546409607, + "learning_rate": 3.213017866079797e-06, + "loss": 0.5501, + "step": 8364 + }, + { + "epoch": 1.2326620825147347, + "grad_norm": 0.6167760491371155, + "learning_rate": 3.2126462624050226e-06, + "loss": 0.5163, + "step": 8365 + }, + { + "epoch": 1.2328094302554027, + "grad_norm": 0.593396782875061, + "learning_rate": 3.2122746415914394e-06, + "loss": 0.5403, + "step": 8366 + }, + { + "epoch": 1.2329567779960708, + "grad_norm": 0.6051449775695801, + "learning_rate": 3.211903003647986e-06, + "loss": 0.4867, + "step": 8367 + }, + { + "epoch": 1.2331041257367388, + "grad_norm": 0.6126241683959961, + "learning_rate": 3.211531348583599e-06, + "loss": 0.5492, + "step": 8368 + }, + { + "epoch": 1.2332514734774067, + "grad_norm": 0.5976189970970154, + "learning_rate": 3.2111596764072154e-06, + "loss": 0.5407, + "step": 8369 + }, + { + "epoch": 1.2333988212180746, + "grad_norm": 0.59274822473526, + "learning_rate": 3.2107879871277766e-06, + "loss": 0.5487, + "step": 8370 + }, + { + "epoch": 1.2335461689587426, + "grad_norm": 0.5492615699768066, + "learning_rate": 3.2104162807542193e-06, + "loss": 0.5594, + "step": 8371 + }, + { + "epoch": 1.2336935166994105, + "grad_norm": 0.5798895955085754, + "learning_rate": 3.210044557295484e-06, + "loss": 0.5337, + "step": 8372 + }, + { + "epoch": 1.2338408644400787, + "grad_norm": 0.5854611396789551, + "learning_rate": 3.2096728167605107e-06, + "loss": 0.5687, + "step": 8373 + }, + { + "epoch": 1.2339882121807466, + "grad_norm": 0.5927178263664246, + "learning_rate": 3.209301059158238e-06, + "loss": 0.5299, + "step": 8374 + }, + { + "epoch": 1.2341355599214145, + "grad_norm": 0.5759567618370056, + "learning_rate": 3.2089292844976084e-06, + "loss": 0.5327, + "step": 8375 + }, + { + "epoch": 1.2342829076620825, + "grad_norm": 0.6036869287490845, + "learning_rate": 3.208557492787562e-06, + "loss": 0.5536, + "step": 8376 + }, + { + "epoch": 1.2344302554027504, + "grad_norm": 0.608279287815094, + "learning_rate": 3.2081856840370397e-06, + "loss": 0.5146, + "step": 8377 + }, + { + "epoch": 1.2345776031434186, + "grad_norm": 0.6090776324272156, + "learning_rate": 3.207813858254984e-06, + "loss": 0.5476, + "step": 8378 + }, + { + "epoch": 1.2347249508840865, + "grad_norm": 0.5797444581985474, + "learning_rate": 3.207442015450336e-06, + "loss": 0.5493, + "step": 8379 + }, + { + "epoch": 1.2348722986247544, + "grad_norm": 0.5994849801063538, + "learning_rate": 3.2070701556320403e-06, + "loss": 0.5469, + "step": 8380 + }, + { + "epoch": 1.2350196463654224, + "grad_norm": 0.5605560541152954, + "learning_rate": 3.206698278809039e-06, + "loss": 0.5114, + "step": 8381 + }, + { + "epoch": 1.2351669941060903, + "grad_norm": 0.5764309763908386, + "learning_rate": 3.206326384990276e-06, + "loss": 0.5461, + "step": 8382 + }, + { + "epoch": 1.2353143418467583, + "grad_norm": 0.5863512754440308, + "learning_rate": 3.2059544741846935e-06, + "loss": 0.5181, + "step": 8383 + }, + { + "epoch": 1.2354616895874264, + "grad_norm": 0.5836325883865356, + "learning_rate": 3.205582546401238e-06, + "loss": 0.5401, + "step": 8384 + }, + { + "epoch": 1.2356090373280944, + "grad_norm": 0.5831282734870911, + "learning_rate": 3.205210601648853e-06, + "loss": 0.5491, + "step": 8385 + }, + { + "epoch": 1.2357563850687623, + "grad_norm": 0.5764232873916626, + "learning_rate": 3.2048386399364837e-06, + "loss": 0.5138, + "step": 8386 + }, + { + "epoch": 1.2359037328094302, + "grad_norm": 0.5787984132766724, + "learning_rate": 3.204466661273076e-06, + "loss": 0.49, + "step": 8387 + }, + { + "epoch": 1.2360510805500982, + "grad_norm": 0.6035469770431519, + "learning_rate": 3.2040946656675754e-06, + "loss": 0.4769, + "step": 8388 + }, + { + "epoch": 1.2361984282907663, + "grad_norm": 0.5697550177574158, + "learning_rate": 3.203722653128928e-06, + "loss": 0.522, + "step": 8389 + }, + { + "epoch": 1.2363457760314343, + "grad_norm": 0.5836119055747986, + "learning_rate": 3.203350623666081e-06, + "loss": 0.5376, + "step": 8390 + }, + { + "epoch": 1.2364931237721022, + "grad_norm": 0.5908872485160828, + "learning_rate": 3.2029785772879817e-06, + "loss": 0.5337, + "step": 8391 + }, + { + "epoch": 1.2366404715127701, + "grad_norm": 0.6137061715126038, + "learning_rate": 3.2026065140035768e-06, + "loss": 0.5626, + "step": 8392 + }, + { + "epoch": 1.236787819253438, + "grad_norm": 0.6175933480262756, + "learning_rate": 3.2022344338218154e-06, + "loss": 0.5566, + "step": 8393 + }, + { + "epoch": 1.236935166994106, + "grad_norm": 0.5869764685630798, + "learning_rate": 3.2018623367516443e-06, + "loss": 0.5631, + "step": 8394 + }, + { + "epoch": 1.2370825147347742, + "grad_norm": 0.5826141238212585, + "learning_rate": 3.201490222802014e-06, + "loss": 0.5146, + "step": 8395 + }, + { + "epoch": 1.237229862475442, + "grad_norm": 0.5693265199661255, + "learning_rate": 3.2011180919818728e-06, + "loss": 0.5024, + "step": 8396 + }, + { + "epoch": 1.23737721021611, + "grad_norm": 0.6129409670829773, + "learning_rate": 3.2007459443001698e-06, + "loss": 0.5473, + "step": 8397 + }, + { + "epoch": 1.237524557956778, + "grad_norm": 0.6137910485267639, + "learning_rate": 3.2003737797658564e-06, + "loss": 0.551, + "step": 8398 + }, + { + "epoch": 1.237671905697446, + "grad_norm": 0.5662838220596313, + "learning_rate": 3.2000015983878813e-06, + "loss": 0.5526, + "step": 8399 + }, + { + "epoch": 1.237819253438114, + "grad_norm": 0.5793346166610718, + "learning_rate": 3.1996294001751967e-06, + "loss": 0.5301, + "step": 8400 + }, + { + "epoch": 1.237966601178782, + "grad_norm": 0.5705205202102661, + "learning_rate": 3.1992571851367527e-06, + "loss": 0.5331, + "step": 8401 + }, + { + "epoch": 1.23811394891945, + "grad_norm": 0.6101009249687195, + "learning_rate": 3.1988849532815016e-06, + "loss": 0.5358, + "step": 8402 + }, + { + "epoch": 1.2382612966601179, + "grad_norm": 0.5634797215461731, + "learning_rate": 3.198512704618395e-06, + "loss": 0.5356, + "step": 8403 + }, + { + "epoch": 1.2384086444007858, + "grad_norm": 0.5805830359458923, + "learning_rate": 3.1981404391563853e-06, + "loss": 0.5248, + "step": 8404 + }, + { + "epoch": 1.2385559921414537, + "grad_norm": 0.5822992324829102, + "learning_rate": 3.197768156904426e-06, + "loss": 0.5069, + "step": 8405 + }, + { + "epoch": 1.238703339882122, + "grad_norm": 0.5807539224624634, + "learning_rate": 3.197395857871469e-06, + "loss": 0.5581, + "step": 8406 + }, + { + "epoch": 1.2388506876227898, + "grad_norm": 0.5999597311019897, + "learning_rate": 3.1970235420664698e-06, + "loss": 0.5649, + "step": 8407 + }, + { + "epoch": 1.2389980353634578, + "grad_norm": 0.5946739912033081, + "learning_rate": 3.1966512094983804e-06, + "loss": 0.5286, + "step": 8408 + }, + { + "epoch": 1.2391453831041257, + "grad_norm": 0.5926889181137085, + "learning_rate": 3.1962788601761568e-06, + "loss": 0.5482, + "step": 8409 + }, + { + "epoch": 1.2392927308447936, + "grad_norm": 0.6008157134056091, + "learning_rate": 3.195906494108753e-06, + "loss": 0.5384, + "step": 8410 + }, + { + "epoch": 1.2394400785854618, + "grad_norm": 0.635796070098877, + "learning_rate": 3.195534111305124e-06, + "loss": 0.5475, + "step": 8411 + }, + { + "epoch": 1.2395874263261297, + "grad_norm": 0.5752394795417786, + "learning_rate": 3.1951617117742263e-06, + "loss": 0.5475, + "step": 8412 + }, + { + "epoch": 1.2397347740667977, + "grad_norm": 0.5686519742012024, + "learning_rate": 3.1947892955250153e-06, + "loss": 0.5518, + "step": 8413 + }, + { + "epoch": 1.2398821218074656, + "grad_norm": 0.6057783365249634, + "learning_rate": 3.194416862566448e-06, + "loss": 0.5334, + "step": 8414 + }, + { + "epoch": 1.2400294695481335, + "grad_norm": 0.5621753931045532, + "learning_rate": 3.1940444129074798e-06, + "loss": 0.5216, + "step": 8415 + }, + { + "epoch": 1.2401768172888015, + "grad_norm": 0.6068243384361267, + "learning_rate": 3.19367194655707e-06, + "loss": 0.5518, + "step": 8416 + }, + { + "epoch": 1.2403241650294696, + "grad_norm": 0.6264683604240417, + "learning_rate": 3.1932994635241747e-06, + "loss": 0.5455, + "step": 8417 + }, + { + "epoch": 1.2404715127701376, + "grad_norm": 0.5652531981468201, + "learning_rate": 3.1929269638177536e-06, + "loss": 0.5254, + "step": 8418 + }, + { + "epoch": 1.2406188605108055, + "grad_norm": 0.605726420879364, + "learning_rate": 3.192554447446763e-06, + "loss": 0.5579, + "step": 8419 + }, + { + "epoch": 1.2407662082514734, + "grad_norm": 0.6556408405303955, + "learning_rate": 3.1921819144201634e-06, + "loss": 0.4967, + "step": 8420 + }, + { + "epoch": 1.2409135559921414, + "grad_norm": 0.5681003928184509, + "learning_rate": 3.191809364746913e-06, + "loss": 0.5387, + "step": 8421 + }, + { + "epoch": 1.2410609037328095, + "grad_norm": 0.5950648784637451, + "learning_rate": 3.1914367984359716e-06, + "loss": 0.5688, + "step": 8422 + }, + { + "epoch": 1.2412082514734775, + "grad_norm": 0.5817951560020447, + "learning_rate": 3.1910642154962996e-06, + "loss": 0.5018, + "step": 8423 + }, + { + "epoch": 1.2413555992141454, + "grad_norm": 0.5822411775588989, + "learning_rate": 3.1906916159368574e-06, + "loss": 0.5156, + "step": 8424 + }, + { + "epoch": 1.2415029469548133, + "grad_norm": 0.6362036466598511, + "learning_rate": 3.190318999766606e-06, + "loss": 0.5356, + "step": 8425 + }, + { + "epoch": 1.2416502946954813, + "grad_norm": 0.6149347424507141, + "learning_rate": 3.1899463669945065e-06, + "loss": 0.5454, + "step": 8426 + }, + { + "epoch": 1.2417976424361492, + "grad_norm": 0.5839880704879761, + "learning_rate": 3.18957371762952e-06, + "loss": 0.5545, + "step": 8427 + }, + { + "epoch": 1.2419449901768174, + "grad_norm": 0.6245467662811279, + "learning_rate": 3.189201051680609e-06, + "loss": 0.5047, + "step": 8428 + }, + { + "epoch": 1.2420923379174853, + "grad_norm": 0.5589747428894043, + "learning_rate": 3.188828369156737e-06, + "loss": 0.5176, + "step": 8429 + }, + { + "epoch": 1.2422396856581532, + "grad_norm": 0.5936490297317505, + "learning_rate": 3.1884556700668643e-06, + "loss": 0.4972, + "step": 8430 + }, + { + "epoch": 1.2423870333988212, + "grad_norm": 0.6224019527435303, + "learning_rate": 3.1880829544199566e-06, + "loss": 0.5435, + "step": 8431 + }, + { + "epoch": 1.242534381139489, + "grad_norm": 0.5874687433242798, + "learning_rate": 3.1877102222249766e-06, + "loss": 0.5195, + "step": 8432 + }, + { + "epoch": 1.2426817288801573, + "grad_norm": 0.5812466144561768, + "learning_rate": 3.187337473490888e-06, + "loss": 0.5032, + "step": 8433 + }, + { + "epoch": 1.2428290766208252, + "grad_norm": 0.636776864528656, + "learning_rate": 3.1869647082266555e-06, + "loss": 0.5297, + "step": 8434 + }, + { + "epoch": 1.2429764243614931, + "grad_norm": 0.5524795651435852, + "learning_rate": 3.186591926441244e-06, + "loss": 0.5022, + "step": 8435 + }, + { + "epoch": 1.243123772102161, + "grad_norm": 0.598023533821106, + "learning_rate": 3.1862191281436183e-06, + "loss": 0.4884, + "step": 8436 + }, + { + "epoch": 1.243271119842829, + "grad_norm": 0.6003462076187134, + "learning_rate": 3.1858463133427453e-06, + "loss": 0.5262, + "step": 8437 + }, + { + "epoch": 1.243418467583497, + "grad_norm": 0.6272944808006287, + "learning_rate": 3.1854734820475897e-06, + "loss": 0.5761, + "step": 8438 + }, + { + "epoch": 1.243565815324165, + "grad_norm": 0.589004635810852, + "learning_rate": 3.185100634267119e-06, + "loss": 0.5607, + "step": 8439 + }, + { + "epoch": 1.243713163064833, + "grad_norm": 0.5850605964660645, + "learning_rate": 3.184727770010298e-06, + "loss": 0.542, + "step": 8440 + }, + { + "epoch": 1.243860510805501, + "grad_norm": 0.57340407371521, + "learning_rate": 3.1843548892860965e-06, + "loss": 0.4931, + "step": 8441 + }, + { + "epoch": 1.244007858546169, + "grad_norm": 0.6102762222290039, + "learning_rate": 3.1839819921034806e-06, + "loss": 0.5229, + "step": 8442 + }, + { + "epoch": 1.2441552062868368, + "grad_norm": 0.5961853861808777, + "learning_rate": 3.1836090784714187e-06, + "loss": 0.5318, + "step": 8443 + }, + { + "epoch": 1.244302554027505, + "grad_norm": 0.5678109526634216, + "learning_rate": 3.183236148398879e-06, + "loss": 0.5459, + "step": 8444 + }, + { + "epoch": 1.244449901768173, + "grad_norm": 0.5928691625595093, + "learning_rate": 3.18286320189483e-06, + "loss": 0.5055, + "step": 8445 + }, + { + "epoch": 1.2445972495088409, + "grad_norm": 0.5743581056594849, + "learning_rate": 3.1824902389682416e-06, + "loss": 0.5625, + "step": 8446 + }, + { + "epoch": 1.2447445972495088, + "grad_norm": 0.5928661227226257, + "learning_rate": 3.182117259628083e-06, + "loss": 0.5224, + "step": 8447 + }, + { + "epoch": 1.2448919449901767, + "grad_norm": 0.5793279409408569, + "learning_rate": 3.1817442638833246e-06, + "loss": 0.5071, + "step": 8448 + }, + { + "epoch": 1.2450392927308447, + "grad_norm": 0.6105471253395081, + "learning_rate": 3.1813712517429357e-06, + "loss": 0.5376, + "step": 8449 + }, + { + "epoch": 1.2451866404715128, + "grad_norm": 0.6028497219085693, + "learning_rate": 3.180998223215888e-06, + "loss": 0.5498, + "step": 8450 + }, + { + "epoch": 1.2453339882121808, + "grad_norm": 0.6064099669456482, + "learning_rate": 3.180625178311153e-06, + "loss": 0.5487, + "step": 8451 + }, + { + "epoch": 1.2454813359528487, + "grad_norm": 0.6040505170822144, + "learning_rate": 3.1802521170377003e-06, + "loss": 0.5651, + "step": 8452 + }, + { + "epoch": 1.2456286836935166, + "grad_norm": 0.5748438239097595, + "learning_rate": 3.179879039404504e-06, + "loss": 0.5117, + "step": 8453 + }, + { + "epoch": 1.2457760314341846, + "grad_norm": 0.6051508188247681, + "learning_rate": 3.1795059454205363e-06, + "loss": 0.573, + "step": 8454 + }, + { + "epoch": 1.2459233791748527, + "grad_norm": 0.6113705635070801, + "learning_rate": 3.1791328350947694e-06, + "loss": 0.5249, + "step": 8455 + }, + { + "epoch": 1.2460707269155207, + "grad_norm": 0.5948257446289062, + "learning_rate": 3.1787597084361753e-06, + "loss": 0.5134, + "step": 8456 + }, + { + "epoch": 1.2462180746561886, + "grad_norm": 0.5879716277122498, + "learning_rate": 3.1783865654537295e-06, + "loss": 0.5198, + "step": 8457 + }, + { + "epoch": 1.2463654223968565, + "grad_norm": 0.5578978657722473, + "learning_rate": 3.1780134061564042e-06, + "loss": 0.54, + "step": 8458 + }, + { + "epoch": 1.2465127701375245, + "grad_norm": 0.5895615220069885, + "learning_rate": 3.177640230553175e-06, + "loss": 0.5194, + "step": 8459 + }, + { + "epoch": 1.2466601178781924, + "grad_norm": 0.5994163155555725, + "learning_rate": 3.1772670386530164e-06, + "loss": 0.538, + "step": 8460 + }, + { + "epoch": 1.2468074656188606, + "grad_norm": 0.5498363971710205, + "learning_rate": 3.176893830464903e-06, + "loss": 0.5304, + "step": 8461 + }, + { + "epoch": 1.2469548133595285, + "grad_norm": 0.6069278717041016, + "learning_rate": 3.17652060599781e-06, + "loss": 0.526, + "step": 8462 + }, + { + "epoch": 1.2471021611001964, + "grad_norm": 0.5790780186653137, + "learning_rate": 3.176147365260714e-06, + "loss": 0.5412, + "step": 8463 + }, + { + "epoch": 1.2472495088408644, + "grad_norm": 0.5988009572029114, + "learning_rate": 3.175774108262591e-06, + "loss": 0.5556, + "step": 8464 + }, + { + "epoch": 1.2473968565815323, + "grad_norm": 0.5839880704879761, + "learning_rate": 3.175400835012418e-06, + "loss": 0.5384, + "step": 8465 + }, + { + "epoch": 1.2475442043222005, + "grad_norm": 0.5895832180976868, + "learning_rate": 3.17502754551917e-06, + "loss": 0.5248, + "step": 8466 + }, + { + "epoch": 1.2476915520628684, + "grad_norm": 0.5917171239852905, + "learning_rate": 3.174654239791829e-06, + "loss": 0.5524, + "step": 8467 + }, + { + "epoch": 1.2478388998035363, + "grad_norm": 0.5526681542396545, + "learning_rate": 3.174280917839368e-06, + "loss": 0.5228, + "step": 8468 + }, + { + "epoch": 1.2479862475442043, + "grad_norm": 0.6160796284675598, + "learning_rate": 3.1739075796707674e-06, + "loss": 0.5427, + "step": 8469 + }, + { + "epoch": 1.2481335952848722, + "grad_norm": 0.6002881526947021, + "learning_rate": 3.1735342252950065e-06, + "loss": 0.531, + "step": 8470 + }, + { + "epoch": 1.2482809430255402, + "grad_norm": 0.5850503444671631, + "learning_rate": 3.173160854721062e-06, + "loss": 0.5163, + "step": 8471 + }, + { + "epoch": 1.2484282907662083, + "grad_norm": 0.6513739824295044, + "learning_rate": 3.172787467957915e-06, + "loss": 0.5572, + "step": 8472 + }, + { + "epoch": 1.2485756385068763, + "grad_norm": 0.6107694506645203, + "learning_rate": 3.1724140650145452e-06, + "loss": 0.5139, + "step": 8473 + }, + { + "epoch": 1.2487229862475442, + "grad_norm": 0.6107122898101807, + "learning_rate": 3.172040645899933e-06, + "loss": 0.5436, + "step": 8474 + }, + { + "epoch": 1.2488703339882121, + "grad_norm": 0.5930836796760559, + "learning_rate": 3.171667210623058e-06, + "loss": 0.5297, + "step": 8475 + }, + { + "epoch": 1.2490176817288803, + "grad_norm": 0.5523433089256287, + "learning_rate": 3.1712937591929015e-06, + "loss": 0.5326, + "step": 8476 + }, + { + "epoch": 1.2491650294695482, + "grad_norm": 0.6367605924606323, + "learning_rate": 3.1709202916184455e-06, + "loss": 0.5432, + "step": 8477 + }, + { + "epoch": 1.2493123772102162, + "grad_norm": 0.6008528470993042, + "learning_rate": 3.170546807908671e-06, + "loss": 0.506, + "step": 8478 + }, + { + "epoch": 1.249459724950884, + "grad_norm": 0.6090682744979858, + "learning_rate": 3.17017330807256e-06, + "loss": 0.5398, + "step": 8479 + }, + { + "epoch": 1.249607072691552, + "grad_norm": 0.5892834067344666, + "learning_rate": 3.1697997921190947e-06, + "loss": 0.5578, + "step": 8480 + }, + { + "epoch": 1.24975442043222, + "grad_norm": 0.6216947436332703, + "learning_rate": 3.1694262600572595e-06, + "loss": 0.5418, + "step": 8481 + }, + { + "epoch": 1.249901768172888, + "grad_norm": 0.5586519241333008, + "learning_rate": 3.1690527118960367e-06, + "loss": 0.5539, + "step": 8482 + }, + { + "epoch": 1.250049115913556, + "grad_norm": 0.619270384311676, + "learning_rate": 3.1686791476444096e-06, + "loss": 0.54, + "step": 8483 + }, + { + "epoch": 1.250196463654224, + "grad_norm": 0.5610482692718506, + "learning_rate": 3.1683055673113627e-06, + "loss": 0.5186, + "step": 8484 + }, + { + "epoch": 1.250343811394892, + "grad_norm": 0.630726158618927, + "learning_rate": 3.167931970905881e-06, + "loss": 0.5314, + "step": 8485 + }, + { + "epoch": 1.2504911591355599, + "grad_norm": 0.6218194365501404, + "learning_rate": 3.1675583584369473e-06, + "loss": 0.5249, + "step": 8486 + }, + { + "epoch": 1.250638506876228, + "grad_norm": 0.6131093502044678, + "learning_rate": 3.1671847299135495e-06, + "loss": 0.574, + "step": 8487 + }, + { + "epoch": 1.250785854616896, + "grad_norm": 0.5926681756973267, + "learning_rate": 3.166811085344671e-06, + "loss": 0.5123, + "step": 8488 + }, + { + "epoch": 1.250933202357564, + "grad_norm": 0.6299012899398804, + "learning_rate": 3.1664374247392994e-06, + "loss": 0.5225, + "step": 8489 + }, + { + "epoch": 1.2510805500982318, + "grad_norm": 0.6432463526725769, + "learning_rate": 3.16606374810642e-06, + "loss": 0.5799, + "step": 8490 + }, + { + "epoch": 1.2512278978388998, + "grad_norm": 0.5691943168640137, + "learning_rate": 3.16569005545502e-06, + "loss": 0.5105, + "step": 8491 + }, + { + "epoch": 1.2513752455795677, + "grad_norm": 0.6072233319282532, + "learning_rate": 3.1653163467940856e-06, + "loss": 0.5228, + "step": 8492 + }, + { + "epoch": 1.2515225933202356, + "grad_norm": 0.6010671257972717, + "learning_rate": 3.1649426221326058e-06, + "loss": 0.5216, + "step": 8493 + }, + { + "epoch": 1.2516699410609038, + "grad_norm": 0.6083247065544128, + "learning_rate": 3.164568881479568e-06, + "loss": 0.5216, + "step": 8494 + }, + { + "epoch": 1.2518172888015717, + "grad_norm": 0.6072942614555359, + "learning_rate": 3.1641951248439597e-06, + "loss": 0.4856, + "step": 8495 + }, + { + "epoch": 1.2519646365422397, + "grad_norm": 0.559701681137085, + "learning_rate": 3.1638213522347705e-06, + "loss": 0.5414, + "step": 8496 + }, + { + "epoch": 1.2521119842829076, + "grad_norm": 0.6265007853507996, + "learning_rate": 3.163447563660989e-06, + "loss": 0.5318, + "step": 8497 + }, + { + "epoch": 1.2522593320235758, + "grad_norm": 0.6111485958099365, + "learning_rate": 3.1630737591316052e-06, + "loss": 0.5386, + "step": 8498 + }, + { + "epoch": 1.2524066797642437, + "grad_norm": 0.5891491174697876, + "learning_rate": 3.162699938655608e-06, + "loss": 0.547, + "step": 8499 + }, + { + "epoch": 1.2525540275049116, + "grad_norm": 0.5822084546089172, + "learning_rate": 3.1623261022419882e-06, + "loss": 0.5339, + "step": 8500 + }, + { + "epoch": 1.2527013752455796, + "grad_norm": 0.5851041078567505, + "learning_rate": 3.161952249899737e-06, + "loss": 0.5661, + "step": 8501 + }, + { + "epoch": 1.2528487229862475, + "grad_norm": 0.5547873973846436, + "learning_rate": 3.1615783816378435e-06, + "loss": 0.5166, + "step": 8502 + }, + { + "epoch": 1.2529960707269154, + "grad_norm": 0.591116726398468, + "learning_rate": 3.161204497465301e-06, + "loss": 0.5178, + "step": 8503 + }, + { + "epoch": 1.2531434184675834, + "grad_norm": 0.6178141236305237, + "learning_rate": 3.1608305973911003e-06, + "loss": 0.5528, + "step": 8504 + }, + { + "epoch": 1.2532907662082515, + "grad_norm": 0.5749914646148682, + "learning_rate": 3.1604566814242334e-06, + "loss": 0.5257, + "step": 8505 + }, + { + "epoch": 1.2534381139489195, + "grad_norm": 0.575995683670044, + "learning_rate": 3.160082749573693e-06, + "loss": 0.4996, + "step": 8506 + }, + { + "epoch": 1.2535854616895874, + "grad_norm": 0.5863906741142273, + "learning_rate": 3.159708801848472e-06, + "loss": 0.5323, + "step": 8507 + }, + { + "epoch": 1.2537328094302553, + "grad_norm": 0.5879504680633545, + "learning_rate": 3.159334838257564e-06, + "loss": 0.5254, + "step": 8508 + }, + { + "epoch": 1.2538801571709235, + "grad_norm": 0.592208743095398, + "learning_rate": 3.158960858809963e-06, + "loss": 0.5507, + "step": 8509 + }, + { + "epoch": 1.2540275049115914, + "grad_norm": 0.6007714867591858, + "learning_rate": 3.1585868635146623e-06, + "loss": 0.5313, + "step": 8510 + }, + { + "epoch": 1.2541748526522594, + "grad_norm": 0.5846205353736877, + "learning_rate": 3.1582128523806555e-06, + "loss": 0.5423, + "step": 8511 + }, + { + "epoch": 1.2543222003929273, + "grad_norm": 0.5844401121139526, + "learning_rate": 3.157838825416939e-06, + "loss": 0.5535, + "step": 8512 + }, + { + "epoch": 1.2544695481335952, + "grad_norm": 0.6162218451499939, + "learning_rate": 3.1574647826325072e-06, + "loss": 0.5242, + "step": 8513 + }, + { + "epoch": 1.2546168958742632, + "grad_norm": 0.5762893557548523, + "learning_rate": 3.1570907240363555e-06, + "loss": 0.5455, + "step": 8514 + }, + { + "epoch": 1.254764243614931, + "grad_norm": 0.6090127825737, + "learning_rate": 3.1567166496374803e-06, + "loss": 0.5371, + "step": 8515 + }, + { + "epoch": 1.2549115913555993, + "grad_norm": 0.6262156963348389, + "learning_rate": 3.156342559444878e-06, + "loss": 0.5163, + "step": 8516 + }, + { + "epoch": 1.2550589390962672, + "grad_norm": 0.5866003632545471, + "learning_rate": 3.1559684534675446e-06, + "loss": 0.5134, + "step": 8517 + }, + { + "epoch": 1.2552062868369351, + "grad_norm": 0.5990013480186462, + "learning_rate": 3.1555943317144776e-06, + "loss": 0.5321, + "step": 8518 + }, + { + "epoch": 1.255353634577603, + "grad_norm": 0.6005268692970276, + "learning_rate": 3.155220194194674e-06, + "loss": 0.5279, + "step": 8519 + }, + { + "epoch": 1.2555009823182712, + "grad_norm": 0.6039177179336548, + "learning_rate": 3.154846040917133e-06, + "loss": 0.5118, + "step": 8520 + }, + { + "epoch": 1.2556483300589392, + "grad_norm": 0.6027734875679016, + "learning_rate": 3.1544718718908514e-06, + "loss": 0.5127, + "step": 8521 + }, + { + "epoch": 1.255795677799607, + "grad_norm": 0.6197774410247803, + "learning_rate": 3.154097687124828e-06, + "loss": 0.541, + "step": 8522 + }, + { + "epoch": 1.255943025540275, + "grad_norm": 0.5928588509559631, + "learning_rate": 3.153723486628062e-06, + "loss": 0.5391, + "step": 8523 + }, + { + "epoch": 1.256090373280943, + "grad_norm": 0.601554274559021, + "learning_rate": 3.153349270409553e-06, + "loss": 0.5443, + "step": 8524 + }, + { + "epoch": 1.256237721021611, + "grad_norm": 0.6167022585868835, + "learning_rate": 3.1529750384783005e-06, + "loss": 0.5394, + "step": 8525 + }, + { + "epoch": 1.2563850687622788, + "grad_norm": 0.5934661626815796, + "learning_rate": 3.1526007908433037e-06, + "loss": 0.4923, + "step": 8526 + }, + { + "epoch": 1.256532416502947, + "grad_norm": 0.6075415015220642, + "learning_rate": 3.152226527513565e-06, + "loss": 0.503, + "step": 8527 + }, + { + "epoch": 1.256679764243615, + "grad_norm": 0.5982630252838135, + "learning_rate": 3.151852248498084e-06, + "loss": 0.4916, + "step": 8528 + }, + { + "epoch": 1.2568271119842829, + "grad_norm": 0.605685293674469, + "learning_rate": 3.1514779538058623e-06, + "loss": 0.5566, + "step": 8529 + }, + { + "epoch": 1.2569744597249508, + "grad_norm": 0.5812498331069946, + "learning_rate": 3.151103643445901e-06, + "loss": 0.5221, + "step": 8530 + }, + { + "epoch": 1.257121807465619, + "grad_norm": 0.5833204984664917, + "learning_rate": 3.150729317427203e-06, + "loss": 0.5151, + "step": 8531 + }, + { + "epoch": 1.257269155206287, + "grad_norm": 0.56996750831604, + "learning_rate": 3.1503549757587698e-06, + "loss": 0.5406, + "step": 8532 + }, + { + "epoch": 1.2574165029469548, + "grad_norm": 0.7135286331176758, + "learning_rate": 3.149980618449605e-06, + "loss": 0.5443, + "step": 8533 + }, + { + "epoch": 1.2575638506876228, + "grad_norm": 0.5944504141807556, + "learning_rate": 3.1496062455087106e-06, + "loss": 0.5554, + "step": 8534 + }, + { + "epoch": 1.2577111984282907, + "grad_norm": 0.6032397747039795, + "learning_rate": 3.1492318569450908e-06, + "loss": 0.5372, + "step": 8535 + }, + { + "epoch": 1.2578585461689586, + "grad_norm": 0.5623114109039307, + "learning_rate": 3.1488574527677497e-06, + "loss": 0.5528, + "step": 8536 + }, + { + "epoch": 1.2580058939096266, + "grad_norm": 0.5886235237121582, + "learning_rate": 3.1484830329856904e-06, + "loss": 0.5042, + "step": 8537 + }, + { + "epoch": 1.2581532416502947, + "grad_norm": 0.6152483224868774, + "learning_rate": 3.1481085976079182e-06, + "loss": 0.5616, + "step": 8538 + }, + { + "epoch": 1.2583005893909627, + "grad_norm": 0.668441116809845, + "learning_rate": 3.147734146643439e-06, + "loss": 0.5366, + "step": 8539 + }, + { + "epoch": 1.2584479371316306, + "grad_norm": 0.5571837425231934, + "learning_rate": 3.1473596801012573e-06, + "loss": 0.5332, + "step": 8540 + }, + { + "epoch": 1.2585952848722985, + "grad_norm": 0.5869909524917603, + "learning_rate": 3.1469851979903785e-06, + "loss": 0.5435, + "step": 8541 + }, + { + "epoch": 1.2587426326129667, + "grad_norm": 0.5986390709877014, + "learning_rate": 3.14661070031981e-06, + "loss": 0.5475, + "step": 8542 + }, + { + "epoch": 1.2588899803536346, + "grad_norm": 0.5880645513534546, + "learning_rate": 3.1462361870985562e-06, + "loss": 0.5295, + "step": 8543 + }, + { + "epoch": 1.2590373280943026, + "grad_norm": 0.5806154608726501, + "learning_rate": 3.1458616583356256e-06, + "loss": 0.5411, + "step": 8544 + }, + { + "epoch": 1.2591846758349705, + "grad_norm": 0.7471556663513184, + "learning_rate": 3.1454871140400255e-06, + "loss": 0.5312, + "step": 8545 + }, + { + "epoch": 1.2593320235756384, + "grad_norm": 0.5726155042648315, + "learning_rate": 3.145112554220763e-06, + "loss": 0.5654, + "step": 8546 + }, + { + "epoch": 1.2594793713163064, + "grad_norm": 0.5954543948173523, + "learning_rate": 3.1447379788868452e-06, + "loss": 0.5336, + "step": 8547 + }, + { + "epoch": 1.2596267190569745, + "grad_norm": 0.5855095386505127, + "learning_rate": 3.1443633880472823e-06, + "loss": 0.5179, + "step": 8548 + }, + { + "epoch": 1.2597740667976425, + "grad_norm": 0.5569456815719604, + "learning_rate": 3.1439887817110826e-06, + "loss": 0.5335, + "step": 8549 + }, + { + "epoch": 1.2599214145383104, + "grad_norm": 0.6035590767860413, + "learning_rate": 3.143614159887254e-06, + "loss": 0.5199, + "step": 8550 + }, + { + "epoch": 1.2600687622789783, + "grad_norm": 0.5712030529975891, + "learning_rate": 3.1432395225848066e-06, + "loss": 0.525, + "step": 8551 + }, + { + "epoch": 1.2602161100196463, + "grad_norm": 0.5862418413162231, + "learning_rate": 3.1428648698127505e-06, + "loss": 0.5489, + "step": 8552 + }, + { + "epoch": 1.2603634577603144, + "grad_norm": 0.6037749648094177, + "learning_rate": 3.1424902015800963e-06, + "loss": 0.5325, + "step": 8553 + }, + { + "epoch": 1.2605108055009824, + "grad_norm": 0.6033301949501038, + "learning_rate": 3.142115517895854e-06, + "loss": 0.5422, + "step": 8554 + }, + { + "epoch": 1.2606581532416503, + "grad_norm": 0.6015392541885376, + "learning_rate": 3.141740818769034e-06, + "loss": 0.554, + "step": 8555 + }, + { + "epoch": 1.2608055009823183, + "grad_norm": 0.6137667298316956, + "learning_rate": 3.141366104208649e-06, + "loss": 0.5456, + "step": 8556 + }, + { + "epoch": 1.2609528487229862, + "grad_norm": 0.6075030565261841, + "learning_rate": 3.14099137422371e-06, + "loss": 0.5491, + "step": 8557 + }, + { + "epoch": 1.2611001964636541, + "grad_norm": 0.5903962850570679, + "learning_rate": 3.1406166288232294e-06, + "loss": 0.5442, + "step": 8558 + }, + { + "epoch": 1.2612475442043223, + "grad_norm": 0.5963290333747864, + "learning_rate": 3.1402418680162184e-06, + "loss": 0.5559, + "step": 8559 + }, + { + "epoch": 1.2613948919449902, + "grad_norm": 0.595022976398468, + "learning_rate": 3.1398670918116913e-06, + "loss": 0.5077, + "step": 8560 + }, + { + "epoch": 1.2615422396856582, + "grad_norm": 0.5775117874145508, + "learning_rate": 3.1394923002186605e-06, + "loss": 0.5404, + "step": 8561 + }, + { + "epoch": 1.261689587426326, + "grad_norm": 0.6045454740524292, + "learning_rate": 3.1391174932461397e-06, + "loss": 0.5371, + "step": 8562 + }, + { + "epoch": 1.2618369351669942, + "grad_norm": 0.6680074334144592, + "learning_rate": 3.138742670903143e-06, + "loss": 0.5553, + "step": 8563 + }, + { + "epoch": 1.2619842829076622, + "grad_norm": 0.5821616649627686, + "learning_rate": 3.138367833198685e-06, + "loss": 0.5558, + "step": 8564 + }, + { + "epoch": 1.2621316306483301, + "grad_norm": 0.6183292269706726, + "learning_rate": 3.1379929801417796e-06, + "loss": 0.5386, + "step": 8565 + }, + { + "epoch": 1.262278978388998, + "grad_norm": 1.723841905593872, + "learning_rate": 3.1376181117414427e-06, + "loss": 0.5353, + "step": 8566 + }, + { + "epoch": 1.262426326129666, + "grad_norm": 0.6123490929603577, + "learning_rate": 3.137243228006689e-06, + "loss": 0.5477, + "step": 8567 + }, + { + "epoch": 1.262573673870334, + "grad_norm": 0.5668282508850098, + "learning_rate": 3.1368683289465346e-06, + "loss": 0.5337, + "step": 8568 + }, + { + "epoch": 1.2627210216110019, + "grad_norm": 0.5647643804550171, + "learning_rate": 3.1364934145699956e-06, + "loss": 0.5261, + "step": 8569 + }, + { + "epoch": 1.26286836935167, + "grad_norm": 0.6170791387557983, + "learning_rate": 3.136118484886089e-06, + "loss": 0.5008, + "step": 8570 + }, + { + "epoch": 1.263015717092338, + "grad_norm": 0.5941058397293091, + "learning_rate": 3.1357435399038307e-06, + "loss": 0.5254, + "step": 8571 + }, + { + "epoch": 1.2631630648330059, + "grad_norm": 0.6363881826400757, + "learning_rate": 3.1353685796322382e-06, + "loss": 0.5332, + "step": 8572 + }, + { + "epoch": 1.2633104125736738, + "grad_norm": 0.6035314798355103, + "learning_rate": 3.1349936040803298e-06, + "loss": 0.5696, + "step": 8573 + }, + { + "epoch": 1.263457760314342, + "grad_norm": 0.6142292618751526, + "learning_rate": 3.134618613257123e-06, + "loss": 0.5067, + "step": 8574 + }, + { + "epoch": 1.26360510805501, + "grad_norm": 0.5758331418037415, + "learning_rate": 3.134243607171636e-06, + "loss": 0.5541, + "step": 8575 + }, + { + "epoch": 1.2637524557956779, + "grad_norm": 0.5941159725189209, + "learning_rate": 3.1338685858328875e-06, + "loss": 0.5242, + "step": 8576 + }, + { + "epoch": 1.2638998035363458, + "grad_norm": 0.5822849273681641, + "learning_rate": 3.1334935492498975e-06, + "loss": 0.5449, + "step": 8577 + }, + { + "epoch": 1.2640471512770137, + "grad_norm": 0.5608890056610107, + "learning_rate": 3.133118497431684e-06, + "loss": 0.5263, + "step": 8578 + }, + { + "epoch": 1.2641944990176817, + "grad_norm": 0.6026806831359863, + "learning_rate": 3.1327434303872684e-06, + "loss": 0.5659, + "step": 8579 + }, + { + "epoch": 1.2643418467583496, + "grad_norm": 0.5902645587921143, + "learning_rate": 3.1323683481256694e-06, + "loss": 0.5495, + "step": 8580 + }, + { + "epoch": 1.2644891944990178, + "grad_norm": 0.6079269051551819, + "learning_rate": 3.1319932506559094e-06, + "loss": 0.5624, + "step": 8581 + }, + { + "epoch": 1.2646365422396857, + "grad_norm": 0.5963885188102722, + "learning_rate": 3.131618137987007e-06, + "loss": 0.5478, + "step": 8582 + }, + { + "epoch": 1.2647838899803536, + "grad_norm": 0.5831193923950195, + "learning_rate": 3.1312430101279843e-06, + "loss": 0.5206, + "step": 8583 + }, + { + "epoch": 1.2649312377210216, + "grad_norm": 0.5874456167221069, + "learning_rate": 3.1308678670878644e-06, + "loss": 0.5309, + "step": 8584 + }, + { + "epoch": 1.2650785854616897, + "grad_norm": 0.6567641496658325, + "learning_rate": 3.1304927088756675e-06, + "loss": 0.5151, + "step": 8585 + }, + { + "epoch": 1.2652259332023577, + "grad_norm": 0.613449215888977, + "learning_rate": 3.1301175355004166e-06, + "loss": 0.5037, + "step": 8586 + }, + { + "epoch": 1.2653732809430256, + "grad_norm": 0.5506025552749634, + "learning_rate": 3.129742346971135e-06, + "loss": 0.5481, + "step": 8587 + }, + { + "epoch": 1.2655206286836935, + "grad_norm": 0.5957449078559875, + "learning_rate": 3.1293671432968452e-06, + "loss": 0.4886, + "step": 8588 + }, + { + "epoch": 1.2656679764243615, + "grad_norm": 0.5546005368232727, + "learning_rate": 3.128991924486571e-06, + "loss": 0.5243, + "step": 8589 + }, + { + "epoch": 1.2658153241650294, + "grad_norm": 0.6036484241485596, + "learning_rate": 3.1286166905493354e-06, + "loss": 0.5433, + "step": 8590 + }, + { + "epoch": 1.2659626719056973, + "grad_norm": 0.6129400730133057, + "learning_rate": 3.128241441494163e-06, + "loss": 0.5219, + "step": 8591 + }, + { + "epoch": 1.2661100196463655, + "grad_norm": 0.5650593638420105, + "learning_rate": 3.1278661773300796e-06, + "loss": 0.4924, + "step": 8592 + }, + { + "epoch": 1.2662573673870334, + "grad_norm": 0.6087763905525208, + "learning_rate": 3.1274908980661083e-06, + "loss": 0.5047, + "step": 8593 + }, + { + "epoch": 1.2664047151277014, + "grad_norm": 0.5812458395957947, + "learning_rate": 3.127115603711276e-06, + "loss": 0.5518, + "step": 8594 + }, + { + "epoch": 1.2665520628683693, + "grad_norm": 0.6285799145698547, + "learning_rate": 3.126740294274607e-06, + "loss": 0.5325, + "step": 8595 + }, + { + "epoch": 1.2666994106090375, + "grad_norm": 0.5802414417266846, + "learning_rate": 3.1263649697651275e-06, + "loss": 0.575, + "step": 8596 + }, + { + "epoch": 1.2668467583497054, + "grad_norm": 0.588519811630249, + "learning_rate": 3.1259896301918653e-06, + "loss": 0.4953, + "step": 8597 + }, + { + "epoch": 1.2669941060903733, + "grad_norm": 0.6147618293762207, + "learning_rate": 3.1256142755638453e-06, + "loss": 0.5561, + "step": 8598 + }, + { + "epoch": 1.2671414538310413, + "grad_norm": 0.5886279940605164, + "learning_rate": 3.1252389058900954e-06, + "loss": 0.5071, + "step": 8599 + }, + { + "epoch": 1.2672888015717092, + "grad_norm": 0.5849077701568604, + "learning_rate": 3.124863521179644e-06, + "loss": 0.5206, + "step": 8600 + }, + { + "epoch": 1.2674361493123771, + "grad_norm": 0.5672047734260559, + "learning_rate": 3.1244881214415175e-06, + "loss": 0.5355, + "step": 8601 + }, + { + "epoch": 1.267583497053045, + "grad_norm": 0.5752578377723694, + "learning_rate": 3.1241127066847444e-06, + "loss": 0.5408, + "step": 8602 + }, + { + "epoch": 1.2677308447937132, + "grad_norm": 0.5723844766616821, + "learning_rate": 3.123737276918353e-06, + "loss": 0.5602, + "step": 8603 + }, + { + "epoch": 1.2678781925343812, + "grad_norm": 0.6256568431854248, + "learning_rate": 3.1233618321513733e-06, + "loss": 0.5186, + "step": 8604 + }, + { + "epoch": 1.268025540275049, + "grad_norm": 0.6042104363441467, + "learning_rate": 3.1229863723928332e-06, + "loss": 0.5798, + "step": 8605 + }, + { + "epoch": 1.268172888015717, + "grad_norm": 0.6084650754928589, + "learning_rate": 3.122610897651764e-06, + "loss": 0.4999, + "step": 8606 + }, + { + "epoch": 1.2683202357563852, + "grad_norm": 0.6101822257041931, + "learning_rate": 3.1222354079371943e-06, + "loss": 0.5533, + "step": 8607 + }, + { + "epoch": 1.2684675834970531, + "grad_norm": 0.5954177379608154, + "learning_rate": 3.121859903258155e-06, + "loss": 0.5286, + "step": 8608 + }, + { + "epoch": 1.268614931237721, + "grad_norm": 0.7784973382949829, + "learning_rate": 3.121484383623676e-06, + "loss": 0.5366, + "step": 8609 + }, + { + "epoch": 1.268762278978389, + "grad_norm": 0.5676101446151733, + "learning_rate": 3.12110884904279e-06, + "loss": 0.5423, + "step": 8610 + }, + { + "epoch": 1.268909626719057, + "grad_norm": 0.5810142755508423, + "learning_rate": 3.120733299524527e-06, + "loss": 0.5251, + "step": 8611 + }, + { + "epoch": 1.2690569744597249, + "grad_norm": 0.583730936050415, + "learning_rate": 3.1203577350779197e-06, + "loss": 0.5087, + "step": 8612 + }, + { + "epoch": 1.2692043222003928, + "grad_norm": 0.6442824602127075, + "learning_rate": 3.1199821557119993e-06, + "loss": 0.5565, + "step": 8613 + }, + { + "epoch": 1.269351669941061, + "grad_norm": 0.5914736390113831, + "learning_rate": 3.119606561435799e-06, + "loss": 0.5001, + "step": 8614 + }, + { + "epoch": 1.269499017681729, + "grad_norm": 0.5885108113288879, + "learning_rate": 3.119230952258352e-06, + "loss": 0.5245, + "step": 8615 + }, + { + "epoch": 1.2696463654223968, + "grad_norm": 0.6104685068130493, + "learning_rate": 3.11885532818869e-06, + "loss": 0.5264, + "step": 8616 + }, + { + "epoch": 1.2697937131630648, + "grad_norm": 0.6106048822402954, + "learning_rate": 3.118479689235849e-06, + "loss": 0.5013, + "step": 8617 + }, + { + "epoch": 1.269941060903733, + "grad_norm": 0.5883276462554932, + "learning_rate": 3.11810403540886e-06, + "loss": 0.5086, + "step": 8618 + }, + { + "epoch": 1.2700884086444009, + "grad_norm": 0.6015006303787231, + "learning_rate": 3.1177283667167597e-06, + "loss": 0.5707, + "step": 8619 + }, + { + "epoch": 1.2702357563850688, + "grad_norm": 0.6146100759506226, + "learning_rate": 3.117352683168582e-06, + "loss": 0.5735, + "step": 8620 + }, + { + "epoch": 1.2703831041257367, + "grad_norm": 0.5492962598800659, + "learning_rate": 3.1169769847733617e-06, + "loss": 0.533, + "step": 8621 + }, + { + "epoch": 1.2705304518664047, + "grad_norm": 0.6014704704284668, + "learning_rate": 3.116601271540134e-06, + "loss": 0.5215, + "step": 8622 + }, + { + "epoch": 1.2706777996070726, + "grad_norm": 0.6533591747283936, + "learning_rate": 3.1162255434779353e-06, + "loss": 0.5435, + "step": 8623 + }, + { + "epoch": 1.2708251473477405, + "grad_norm": 0.564132571220398, + "learning_rate": 3.1158498005958014e-06, + "loss": 0.5393, + "step": 8624 + }, + { + "epoch": 1.2709724950884087, + "grad_norm": 0.5841573476791382, + "learning_rate": 3.1154740429027687e-06, + "loss": 0.5587, + "step": 8625 + }, + { + "epoch": 1.2711198428290766, + "grad_norm": 0.6243511438369751, + "learning_rate": 3.115098270407874e-06, + "loss": 0.5371, + "step": 8626 + }, + { + "epoch": 1.2712671905697446, + "grad_norm": 0.5956541299819946, + "learning_rate": 3.114722483120154e-06, + "loss": 0.5031, + "step": 8627 + }, + { + "epoch": 1.2714145383104125, + "grad_norm": 0.5894856452941895, + "learning_rate": 3.1143466810486466e-06, + "loss": 0.5161, + "step": 8628 + }, + { + "epoch": 1.2715618860510807, + "grad_norm": 0.5829429626464844, + "learning_rate": 3.1139708642023896e-06, + "loss": 0.5816, + "step": 8629 + }, + { + "epoch": 1.2717092337917486, + "grad_norm": 0.625553548336029, + "learning_rate": 3.1135950325904217e-06, + "loss": 0.5355, + "step": 8630 + }, + { + "epoch": 1.2718565815324165, + "grad_norm": 0.6110446453094482, + "learning_rate": 3.113219186221781e-06, + "loss": 0.5408, + "step": 8631 + }, + { + "epoch": 1.2720039292730845, + "grad_norm": 0.5990308523178101, + "learning_rate": 3.1128433251055064e-06, + "loss": 0.5284, + "step": 8632 + }, + { + "epoch": 1.2721512770137524, + "grad_norm": 0.5790833830833435, + "learning_rate": 3.112467449250637e-06, + "loss": 0.5143, + "step": 8633 + }, + { + "epoch": 1.2722986247544203, + "grad_norm": 0.5924093723297119, + "learning_rate": 3.1120915586662125e-06, + "loss": 0.544, + "step": 8634 + }, + { + "epoch": 1.2724459724950883, + "grad_norm": 0.5997865796089172, + "learning_rate": 3.1117156533612737e-06, + "loss": 0.5533, + "step": 8635 + }, + { + "epoch": 1.2725933202357564, + "grad_norm": 0.6041004061698914, + "learning_rate": 3.11133973334486e-06, + "loss": 0.4818, + "step": 8636 + }, + { + "epoch": 1.2727406679764244, + "grad_norm": 0.6310515999794006, + "learning_rate": 3.110963798626012e-06, + "loss": 0.5774, + "step": 8637 + }, + { + "epoch": 1.2728880157170923, + "grad_norm": 0.5777691006660461, + "learning_rate": 3.110587849213772e-06, + "loss": 0.533, + "step": 8638 + }, + { + "epoch": 1.2730353634577602, + "grad_norm": 0.5830278992652893, + "learning_rate": 3.1102118851171803e-06, + "loss": 0.5243, + "step": 8639 + }, + { + "epoch": 1.2731827111984284, + "grad_norm": 0.5578452348709106, + "learning_rate": 3.1098359063452793e-06, + "loss": 0.5413, + "step": 8640 + }, + { + "epoch": 1.2733300589390963, + "grad_norm": 0.6238688230514526, + "learning_rate": 3.1094599129071108e-06, + "loss": 0.5273, + "step": 8641 + }, + { + "epoch": 1.2734774066797643, + "grad_norm": 0.5869956016540527, + "learning_rate": 3.109083904811717e-06, + "loss": 0.5304, + "step": 8642 + }, + { + "epoch": 1.2736247544204322, + "grad_norm": 0.6164597868919373, + "learning_rate": 3.108707882068141e-06, + "loss": 0.5427, + "step": 8643 + }, + { + "epoch": 1.2737721021611002, + "grad_norm": 0.5894786715507507, + "learning_rate": 3.108331844685426e-06, + "loss": 0.5393, + "step": 8644 + }, + { + "epoch": 1.273919449901768, + "grad_norm": 0.5959522724151611, + "learning_rate": 3.107955792672615e-06, + "loss": 0.5303, + "step": 8645 + }, + { + "epoch": 1.274066797642436, + "grad_norm": 0.5880894660949707, + "learning_rate": 3.107579726038753e-06, + "loss": 0.502, + "step": 8646 + }, + { + "epoch": 1.2742141453831042, + "grad_norm": 0.567548930644989, + "learning_rate": 3.107203644792883e-06, + "loss": 0.5696, + "step": 8647 + }, + { + "epoch": 1.2743614931237721, + "grad_norm": 0.6075037717819214, + "learning_rate": 3.1068275489440513e-06, + "loss": 0.5407, + "step": 8648 + }, + { + "epoch": 1.27450884086444, + "grad_norm": 0.596013605594635, + "learning_rate": 3.1064514385013004e-06, + "loss": 0.5304, + "step": 8649 + }, + { + "epoch": 1.274656188605108, + "grad_norm": 0.6014347076416016, + "learning_rate": 3.1060753134736777e-06, + "loss": 0.5328, + "step": 8650 + }, + { + "epoch": 1.2748035363457761, + "grad_norm": 0.5790832042694092, + "learning_rate": 3.105699173870227e-06, + "loss": 0.5436, + "step": 8651 + }, + { + "epoch": 1.274950884086444, + "grad_norm": 0.5833688974380493, + "learning_rate": 3.105323019699996e-06, + "loss": 0.5491, + "step": 8652 + }, + { + "epoch": 1.275098231827112, + "grad_norm": 0.573695957660675, + "learning_rate": 3.10494685097203e-06, + "loss": 0.5065, + "step": 8653 + }, + { + "epoch": 1.27524557956778, + "grad_norm": 0.5874945521354675, + "learning_rate": 3.104570667695376e-06, + "loss": 0.5331, + "step": 8654 + }, + { + "epoch": 1.2753929273084479, + "grad_norm": 0.5655403733253479, + "learning_rate": 3.1041944698790805e-06, + "loss": 0.5441, + "step": 8655 + }, + { + "epoch": 1.2755402750491158, + "grad_norm": 0.5913698673248291, + "learning_rate": 3.103818257532192e-06, + "loss": 0.5321, + "step": 8656 + }, + { + "epoch": 1.2756876227897838, + "grad_norm": 0.6121820211410522, + "learning_rate": 3.1034420306637575e-06, + "loss": 0.529, + "step": 8657 + }, + { + "epoch": 1.275834970530452, + "grad_norm": 0.6174578070640564, + "learning_rate": 3.1030657892828254e-06, + "loss": 0.5115, + "step": 8658 + }, + { + "epoch": 1.2759823182711199, + "grad_norm": 0.5729864239692688, + "learning_rate": 3.1026895333984434e-06, + "loss": 0.5627, + "step": 8659 + }, + { + "epoch": 1.2761296660117878, + "grad_norm": 0.5879015922546387, + "learning_rate": 3.102313263019661e-06, + "loss": 0.5444, + "step": 8660 + }, + { + "epoch": 1.2762770137524557, + "grad_norm": 0.616141676902771, + "learning_rate": 3.1019369781555275e-06, + "loss": 0.5487, + "step": 8661 + }, + { + "epoch": 1.2764243614931239, + "grad_norm": 0.6027697324752808, + "learning_rate": 3.101560678815092e-06, + "loss": 0.5348, + "step": 8662 + }, + { + "epoch": 1.2765717092337918, + "grad_norm": 0.5888727307319641, + "learning_rate": 3.1011843650074035e-06, + "loss": 0.5477, + "step": 8663 + }, + { + "epoch": 1.2767190569744598, + "grad_norm": 0.6179220676422119, + "learning_rate": 3.1008080367415138e-06, + "loss": 0.5374, + "step": 8664 + }, + { + "epoch": 1.2768664047151277, + "grad_norm": 0.5842709541320801, + "learning_rate": 3.1004316940264723e-06, + "loss": 0.5488, + "step": 8665 + }, + { + "epoch": 1.2770137524557956, + "grad_norm": 0.6148820519447327, + "learning_rate": 3.1000553368713305e-06, + "loss": 0.5303, + "step": 8666 + }, + { + "epoch": 1.2771611001964636, + "grad_norm": 0.6119087338447571, + "learning_rate": 3.099678965285139e-06, + "loss": 0.5484, + "step": 8667 + }, + { + "epoch": 1.2773084479371315, + "grad_norm": 0.5908785462379456, + "learning_rate": 3.09930257927695e-06, + "loss": 0.5109, + "step": 8668 + }, + { + "epoch": 1.2774557956777997, + "grad_norm": 0.5635044574737549, + "learning_rate": 3.0989261788558145e-06, + "loss": 0.5121, + "step": 8669 + }, + { + "epoch": 1.2776031434184676, + "grad_norm": 0.5705283880233765, + "learning_rate": 3.0985497640307856e-06, + "loss": 0.5266, + "step": 8670 + }, + { + "epoch": 1.2777504911591355, + "grad_norm": 0.595653772354126, + "learning_rate": 3.0981733348109153e-06, + "loss": 0.5525, + "step": 8671 + }, + { + "epoch": 1.2778978388998035, + "grad_norm": 0.5806884765625, + "learning_rate": 3.0977968912052573e-06, + "loss": 0.5099, + "step": 8672 + }, + { + "epoch": 1.2780451866404716, + "grad_norm": 0.5676390528678894, + "learning_rate": 3.0974204332228646e-06, + "loss": 0.4883, + "step": 8673 + }, + { + "epoch": 1.2781925343811396, + "grad_norm": 0.5729315280914307, + "learning_rate": 3.0970439608727903e-06, + "loss": 0.5057, + "step": 8674 + }, + { + "epoch": 1.2783398821218075, + "grad_norm": 0.6000615358352661, + "learning_rate": 3.096667474164089e-06, + "loss": 0.5219, + "step": 8675 + }, + { + "epoch": 1.2784872298624754, + "grad_norm": 0.6089318990707397, + "learning_rate": 3.0962909731058144e-06, + "loss": 0.5265, + "step": 8676 + }, + { + "epoch": 1.2786345776031434, + "grad_norm": 0.586046576499939, + "learning_rate": 3.0959144577070223e-06, + "loss": 0.5319, + "step": 8677 + }, + { + "epoch": 1.2787819253438113, + "grad_norm": 0.6040553450584412, + "learning_rate": 3.095537927976766e-06, + "loss": 0.5335, + "step": 8678 + }, + { + "epoch": 1.2789292730844792, + "grad_norm": 0.5740551352500916, + "learning_rate": 3.095161383924102e-06, + "loss": 0.5481, + "step": 8679 + }, + { + "epoch": 1.2790766208251474, + "grad_norm": 0.5899512767791748, + "learning_rate": 3.094784825558087e-06, + "loss": 0.5006, + "step": 8680 + }, + { + "epoch": 1.2792239685658153, + "grad_norm": 0.5838250517845154, + "learning_rate": 3.094408252887775e-06, + "loss": 0.5286, + "step": 8681 + }, + { + "epoch": 1.2793713163064833, + "grad_norm": 0.6124776601791382, + "learning_rate": 3.0940316659222236e-06, + "loss": 0.5153, + "step": 8682 + }, + { + "epoch": 1.2795186640471512, + "grad_norm": 0.5903453826904297, + "learning_rate": 3.093655064670489e-06, + "loss": 0.53, + "step": 8683 + }, + { + "epoch": 1.2796660117878194, + "grad_norm": 0.5893673300743103, + "learning_rate": 3.093278449141629e-06, + "loss": 0.489, + "step": 8684 + }, + { + "epoch": 1.2798133595284873, + "grad_norm": 0.6279658079147339, + "learning_rate": 3.0929018193447e-06, + "loss": 0.5242, + "step": 8685 + }, + { + "epoch": 1.2799607072691552, + "grad_norm": 0.5573398470878601, + "learning_rate": 3.0925251752887607e-06, + "loss": 0.5401, + "step": 8686 + }, + { + "epoch": 1.2801080550098232, + "grad_norm": 0.6011356711387634, + "learning_rate": 3.0921485169828685e-06, + "loss": 0.5386, + "step": 8687 + }, + { + "epoch": 1.280255402750491, + "grad_norm": 0.6259110569953918, + "learning_rate": 3.0917718444360823e-06, + "loss": 0.537, + "step": 8688 + }, + { + "epoch": 1.280402750491159, + "grad_norm": 0.5893895030021667, + "learning_rate": 3.091395157657461e-06, + "loss": 0.5353, + "step": 8689 + }, + { + "epoch": 1.2805500982318272, + "grad_norm": 0.6036189198493958, + "learning_rate": 3.0910184566560636e-06, + "loss": 0.5263, + "step": 8690 + }, + { + "epoch": 1.2806974459724951, + "grad_norm": 0.6119555234909058, + "learning_rate": 3.0906417414409495e-06, + "loss": 0.5316, + "step": 8691 + }, + { + "epoch": 1.280844793713163, + "grad_norm": 0.5957985520362854, + "learning_rate": 3.090265012021178e-06, + "loss": 0.5614, + "step": 8692 + }, + { + "epoch": 1.280992141453831, + "grad_norm": 0.629328727722168, + "learning_rate": 3.0898882684058106e-06, + "loss": 0.5266, + "step": 8693 + }, + { + "epoch": 1.281139489194499, + "grad_norm": 0.5820600986480713, + "learning_rate": 3.0895115106039064e-06, + "loss": 0.4996, + "step": 8694 + }, + { + "epoch": 1.281286836935167, + "grad_norm": 0.7762674689292908, + "learning_rate": 3.0891347386245274e-06, + "loss": 0.5463, + "step": 8695 + }, + { + "epoch": 1.281434184675835, + "grad_norm": 0.5866739153862, + "learning_rate": 3.0887579524767338e-06, + "loss": 0.5185, + "step": 8696 + }, + { + "epoch": 1.281581532416503, + "grad_norm": 0.5601721405982971, + "learning_rate": 3.0883811521695876e-06, + "loss": 0.5199, + "step": 8697 + }, + { + "epoch": 1.281728880157171, + "grad_norm": 0.597936749458313, + "learning_rate": 3.088004337712151e-06, + "loss": 0.5133, + "step": 8698 + }, + { + "epoch": 1.2818762278978388, + "grad_norm": 0.6018572449684143, + "learning_rate": 3.0876275091134855e-06, + "loss": 0.5781, + "step": 8699 + }, + { + "epoch": 1.2820235756385068, + "grad_norm": 0.5718172788619995, + "learning_rate": 3.0872506663826544e-06, + "loss": 0.541, + "step": 8700 + }, + { + "epoch": 1.282170923379175, + "grad_norm": 0.5892800688743591, + "learning_rate": 3.08687380952872e-06, + "loss": 0.5479, + "step": 8701 + }, + { + "epoch": 1.2823182711198429, + "grad_norm": 0.64223313331604, + "learning_rate": 3.0864969385607457e-06, + "loss": 0.5534, + "step": 8702 + }, + { + "epoch": 1.2824656188605108, + "grad_norm": 0.5957775115966797, + "learning_rate": 3.0861200534877955e-06, + "loss": 0.5297, + "step": 8703 + }, + { + "epoch": 1.2826129666011787, + "grad_norm": 0.6036246418952942, + "learning_rate": 3.085743154318932e-06, + "loss": 0.5469, + "step": 8704 + }, + { + "epoch": 1.282760314341847, + "grad_norm": 0.6031821966171265, + "learning_rate": 3.0853662410632212e-06, + "loss": 0.4887, + "step": 8705 + }, + { + "epoch": 1.2829076620825148, + "grad_norm": 0.6489701867103577, + "learning_rate": 3.0849893137297267e-06, + "loss": 0.5406, + "step": 8706 + }, + { + "epoch": 1.2830550098231828, + "grad_norm": 0.5735771656036377, + "learning_rate": 3.0846123723275134e-06, + "loss": 0.5549, + "step": 8707 + }, + { + "epoch": 1.2832023575638507, + "grad_norm": 0.5865871906280518, + "learning_rate": 3.0842354168656467e-06, + "loss": 0.5101, + "step": 8708 + }, + { + "epoch": 1.2833497053045186, + "grad_norm": 0.5966026186943054, + "learning_rate": 3.0838584473531924e-06, + "loss": 0.5618, + "step": 8709 + }, + { + "epoch": 1.2834970530451866, + "grad_norm": 0.5794899463653564, + "learning_rate": 3.0834814637992162e-06, + "loss": 0.5111, + "step": 8710 + }, + { + "epoch": 1.2836444007858545, + "grad_norm": 0.592012345790863, + "learning_rate": 3.083104466212784e-06, + "loss": 0.5529, + "step": 8711 + }, + { + "epoch": 1.2837917485265227, + "grad_norm": 0.6267836689949036, + "learning_rate": 3.082727454602964e-06, + "loss": 0.5495, + "step": 8712 + }, + { + "epoch": 1.2839390962671906, + "grad_norm": 0.5781642198562622, + "learning_rate": 3.0823504289788208e-06, + "loss": 0.5549, + "step": 8713 + }, + { + "epoch": 1.2840864440078585, + "grad_norm": 0.5873689651489258, + "learning_rate": 3.081973389349424e-06, + "loss": 0.5433, + "step": 8714 + }, + { + "epoch": 1.2842337917485265, + "grad_norm": 0.5915819406509399, + "learning_rate": 3.0815963357238403e-06, + "loss": 0.5113, + "step": 8715 + }, + { + "epoch": 1.2843811394891946, + "grad_norm": 0.54740971326828, + "learning_rate": 3.081219268111137e-06, + "loss": 0.5316, + "step": 8716 + }, + { + "epoch": 1.2845284872298626, + "grad_norm": 0.5794938802719116, + "learning_rate": 3.0808421865203825e-06, + "loss": 0.4754, + "step": 8717 + }, + { + "epoch": 1.2846758349705305, + "grad_norm": 0.5813953280448914, + "learning_rate": 3.0804650909606465e-06, + "loss": 0.522, + "step": 8718 + }, + { + "epoch": 1.2848231827111984, + "grad_norm": 0.5610129237174988, + "learning_rate": 3.0800879814409966e-06, + "loss": 0.5047, + "step": 8719 + }, + { + "epoch": 1.2849705304518664, + "grad_norm": 0.6027259230613708, + "learning_rate": 3.079710857970504e-06, + "loss": 0.5514, + "step": 8720 + }, + { + "epoch": 1.2851178781925343, + "grad_norm": 0.5984082221984863, + "learning_rate": 3.0793337205582356e-06, + "loss": 0.5355, + "step": 8721 + }, + { + "epoch": 1.2852652259332022, + "grad_norm": 0.6025797128677368, + "learning_rate": 3.078956569213264e-06, + "loss": 0.5334, + "step": 8722 + }, + { + "epoch": 1.2854125736738704, + "grad_norm": 0.5956814289093018, + "learning_rate": 3.078579403944658e-06, + "loss": 0.5385, + "step": 8723 + }, + { + "epoch": 1.2855599214145383, + "grad_norm": 0.583236575126648, + "learning_rate": 3.078202224761489e-06, + "loss": 0.5184, + "step": 8724 + }, + { + "epoch": 1.2857072691552063, + "grad_norm": 0.5856406092643738, + "learning_rate": 3.0778250316728273e-06, + "loss": 0.552, + "step": 8725 + }, + { + "epoch": 1.2858546168958742, + "grad_norm": 0.5989221334457397, + "learning_rate": 3.0774478246877452e-06, + "loss": 0.5388, + "step": 8726 + }, + { + "epoch": 1.2860019646365424, + "grad_norm": 0.6218112707138062, + "learning_rate": 3.0770706038153136e-06, + "loss": 0.5251, + "step": 8727 + }, + { + "epoch": 1.2861493123772103, + "grad_norm": 0.6424568891525269, + "learning_rate": 3.0766933690646046e-06, + "loss": 0.5465, + "step": 8728 + }, + { + "epoch": 1.2862966601178782, + "grad_norm": 0.6494314074516296, + "learning_rate": 3.07631612044469e-06, + "loss": 0.5464, + "step": 8729 + }, + { + "epoch": 1.2864440078585462, + "grad_norm": 0.5605334043502808, + "learning_rate": 3.0759388579646433e-06, + "loss": 0.5552, + "step": 8730 + }, + { + "epoch": 1.2865913555992141, + "grad_norm": 0.6392388939857483, + "learning_rate": 3.075561581633537e-06, + "loss": 0.5446, + "step": 8731 + }, + { + "epoch": 1.286738703339882, + "grad_norm": 0.5676104426383972, + "learning_rate": 3.075184291460445e-06, + "loss": 0.5155, + "step": 8732 + }, + { + "epoch": 1.28688605108055, + "grad_norm": 0.6178354620933533, + "learning_rate": 3.0748069874544406e-06, + "loss": 0.5509, + "step": 8733 + }, + { + "epoch": 1.2870333988212181, + "grad_norm": 0.6072490215301514, + "learning_rate": 3.074429669624598e-06, + "loss": 0.5086, + "step": 8734 + }, + { + "epoch": 1.287180746561886, + "grad_norm": 0.5830989480018616, + "learning_rate": 3.074052337979991e-06, + "loss": 0.547, + "step": 8735 + }, + { + "epoch": 1.287328094302554, + "grad_norm": 0.5786193013191223, + "learning_rate": 3.0736749925296944e-06, + "loss": 0.5223, + "step": 8736 + }, + { + "epoch": 1.287475442043222, + "grad_norm": 0.569706916809082, + "learning_rate": 3.0732976332827837e-06, + "loss": 0.559, + "step": 8737 + }, + { + "epoch": 1.28762278978389, + "grad_norm": 0.593514621257782, + "learning_rate": 3.072920260248333e-06, + "loss": 0.521, + "step": 8738 + }, + { + "epoch": 1.287770137524558, + "grad_norm": 0.6087044477462769, + "learning_rate": 3.07254287343542e-06, + "loss": 0.5142, + "step": 8739 + }, + { + "epoch": 1.287917485265226, + "grad_norm": 0.5859372019767761, + "learning_rate": 3.0721654728531186e-06, + "loss": 0.5201, + "step": 8740 + }, + { + "epoch": 1.288064833005894, + "grad_norm": 0.5904368162155151, + "learning_rate": 3.0717880585105064e-06, + "loss": 0.5234, + "step": 8741 + }, + { + "epoch": 1.2882121807465619, + "grad_norm": 0.5850622653961182, + "learning_rate": 3.071410630416659e-06, + "loss": 0.5053, + "step": 8742 + }, + { + "epoch": 1.2883595284872298, + "grad_norm": 0.6079801321029663, + "learning_rate": 3.0710331885806542e-06, + "loss": 0.5616, + "step": 8743 + }, + { + "epoch": 1.2885068762278977, + "grad_norm": 0.6412246823310852, + "learning_rate": 3.0706557330115687e-06, + "loss": 0.5208, + "step": 8744 + }, + { + "epoch": 1.2886542239685659, + "grad_norm": 0.639042854309082, + "learning_rate": 3.070278263718481e-06, + "loss": 0.5171, + "step": 8745 + }, + { + "epoch": 1.2888015717092338, + "grad_norm": 0.6182823181152344, + "learning_rate": 3.069900780710468e-06, + "loss": 0.5548, + "step": 8746 + }, + { + "epoch": 1.2889489194499018, + "grad_norm": 0.6105717420578003, + "learning_rate": 3.0695232839966082e-06, + "loss": 0.5422, + "step": 8747 + }, + { + "epoch": 1.2890962671905697, + "grad_norm": 0.5876570343971252, + "learning_rate": 3.0691457735859815e-06, + "loss": 0.5257, + "step": 8748 + }, + { + "epoch": 1.2892436149312378, + "grad_norm": 0.582768440246582, + "learning_rate": 3.0687682494876657e-06, + "loss": 0.5139, + "step": 8749 + }, + { + "epoch": 1.2893909626719058, + "grad_norm": 0.5701588988304138, + "learning_rate": 3.06839071171074e-06, + "loss": 0.5644, + "step": 8750 + }, + { + "epoch": 1.2895383104125737, + "grad_norm": 0.5768118500709534, + "learning_rate": 3.0680131602642844e-06, + "loss": 0.4986, + "step": 8751 + }, + { + "epoch": 1.2896856581532417, + "grad_norm": 0.5983806252479553, + "learning_rate": 3.067635595157379e-06, + "loss": 0.5444, + "step": 8752 + }, + { + "epoch": 1.2898330058939096, + "grad_norm": 0.577210545539856, + "learning_rate": 3.0672580163991033e-06, + "loss": 0.5441, + "step": 8753 + }, + { + "epoch": 1.2899803536345775, + "grad_norm": 0.5734119415283203, + "learning_rate": 3.0668804239985384e-06, + "loss": 0.5538, + "step": 8754 + }, + { + "epoch": 1.2901277013752455, + "grad_norm": 0.6160844564437866, + "learning_rate": 3.0665028179647656e-06, + "loss": 0.5249, + "step": 8755 + }, + { + "epoch": 1.2902750491159136, + "grad_norm": 0.562147855758667, + "learning_rate": 3.066125198306865e-06, + "loss": 0.5552, + "step": 8756 + }, + { + "epoch": 1.2904223968565816, + "grad_norm": 0.5785574316978455, + "learning_rate": 3.0657475650339196e-06, + "loss": 0.5367, + "step": 8757 + }, + { + "epoch": 1.2905697445972495, + "grad_norm": 0.6155684590339661, + "learning_rate": 3.0653699181550107e-06, + "loss": 0.5356, + "step": 8758 + }, + { + "epoch": 1.2907170923379174, + "grad_norm": 0.5946189165115356, + "learning_rate": 3.06499225767922e-06, + "loss": 0.4912, + "step": 8759 + }, + { + "epoch": 1.2908644400785856, + "grad_norm": 0.654255747795105, + "learning_rate": 3.0646145836156307e-06, + "loss": 0.5488, + "step": 8760 + }, + { + "epoch": 1.2910117878192535, + "grad_norm": 0.5911099910736084, + "learning_rate": 3.0642368959733255e-06, + "loss": 0.5388, + "step": 8761 + }, + { + "epoch": 1.2911591355599215, + "grad_norm": 0.5900323390960693, + "learning_rate": 3.0638591947613873e-06, + "loss": 0.5481, + "step": 8762 + }, + { + "epoch": 1.2913064833005894, + "grad_norm": 0.5814879536628723, + "learning_rate": 3.0634814799889006e-06, + "loss": 0.554, + "step": 8763 + }, + { + "epoch": 1.2914538310412573, + "grad_norm": 0.5881166458129883, + "learning_rate": 3.063103751664948e-06, + "loss": 0.5153, + "step": 8764 + }, + { + "epoch": 1.2916011787819253, + "grad_norm": 0.5881333947181702, + "learning_rate": 3.0627260097986137e-06, + "loss": 0.5294, + "step": 8765 + }, + { + "epoch": 1.2917485265225932, + "grad_norm": 0.6007373929023743, + "learning_rate": 3.062348254398984e-06, + "loss": 0.5475, + "step": 8766 + }, + { + "epoch": 1.2918958742632614, + "grad_norm": 0.5890542268753052, + "learning_rate": 3.061970485475142e-06, + "loss": 0.5041, + "step": 8767 + }, + { + "epoch": 1.2920432220039293, + "grad_norm": 0.5807287096977234, + "learning_rate": 3.0615927030361738e-06, + "loss": 0.5562, + "step": 8768 + }, + { + "epoch": 1.2921905697445972, + "grad_norm": 0.6191222667694092, + "learning_rate": 3.061214907091164e-06, + "loss": 0.5375, + "step": 8769 + }, + { + "epoch": 1.2923379174852652, + "grad_norm": 0.5770451426506042, + "learning_rate": 3.0608370976491993e-06, + "loss": 0.5334, + "step": 8770 + }, + { + "epoch": 1.2924852652259333, + "grad_norm": 0.617811918258667, + "learning_rate": 3.0604592747193652e-06, + "loss": 0.541, + "step": 8771 + }, + { + "epoch": 1.2926326129666013, + "grad_norm": 0.6150445342063904, + "learning_rate": 3.060081438310748e-06, + "loss": 0.548, + "step": 8772 + }, + { + "epoch": 1.2927799607072692, + "grad_norm": 0.5933538675308228, + "learning_rate": 3.0597035884324357e-06, + "loss": 0.5248, + "step": 8773 + }, + { + "epoch": 1.2929273084479371, + "grad_norm": 0.5715265870094299, + "learning_rate": 3.059325725093514e-06, + "loss": 0.5033, + "step": 8774 + }, + { + "epoch": 1.293074656188605, + "grad_norm": 0.6207888126373291, + "learning_rate": 3.058947848303071e-06, + "loss": 0.5186, + "step": 8775 + }, + { + "epoch": 1.293222003929273, + "grad_norm": 0.5919658541679382, + "learning_rate": 3.058569958070195e-06, + "loss": 0.5047, + "step": 8776 + }, + { + "epoch": 1.293369351669941, + "grad_norm": 0.6074035167694092, + "learning_rate": 3.058192054403973e-06, + "loss": 0.5417, + "step": 8777 + }, + { + "epoch": 1.293516699410609, + "grad_norm": 0.6004620790481567, + "learning_rate": 3.0578141373134938e-06, + "loss": 0.532, + "step": 8778 + }, + { + "epoch": 1.293664047151277, + "grad_norm": 0.6283327341079712, + "learning_rate": 3.0574362068078467e-06, + "loss": 0.5044, + "step": 8779 + }, + { + "epoch": 1.293811394891945, + "grad_norm": 0.5785051584243774, + "learning_rate": 3.0570582628961197e-06, + "loss": 0.5528, + "step": 8780 + }, + { + "epoch": 1.293958742632613, + "grad_norm": 0.5880938172340393, + "learning_rate": 3.0566803055874033e-06, + "loss": 0.5094, + "step": 8781 + }, + { + "epoch": 1.294106090373281, + "grad_norm": 0.6084617972373962, + "learning_rate": 3.056302334890786e-06, + "loss": 0.5275, + "step": 8782 + }, + { + "epoch": 1.294253438113949, + "grad_norm": 0.5751499533653259, + "learning_rate": 3.0559243508153593e-06, + "loss": 0.5232, + "step": 8783 + }, + { + "epoch": 1.294400785854617, + "grad_norm": 0.6329664587974548, + "learning_rate": 3.0555463533702124e-06, + "loss": 0.4996, + "step": 8784 + }, + { + "epoch": 1.2945481335952849, + "grad_norm": 0.6211548447608948, + "learning_rate": 3.055168342564436e-06, + "loss": 0.5074, + "step": 8785 + }, + { + "epoch": 1.2946954813359528, + "grad_norm": 0.6313790678977966, + "learning_rate": 3.0547903184071215e-06, + "loss": 0.55, + "step": 8786 + }, + { + "epoch": 1.2948428290766207, + "grad_norm": 0.6030611395835876, + "learning_rate": 3.05441228090736e-06, + "loss": 0.5279, + "step": 8787 + }, + { + "epoch": 1.2949901768172887, + "grad_norm": 0.5932065844535828, + "learning_rate": 3.054034230074243e-06, + "loss": 0.5263, + "step": 8788 + }, + { + "epoch": 1.2951375245579568, + "grad_norm": 0.5688305497169495, + "learning_rate": 3.0536561659168634e-06, + "loss": 0.5517, + "step": 8789 + }, + { + "epoch": 1.2952848722986248, + "grad_norm": 0.6013655662536621, + "learning_rate": 3.0532780884443117e-06, + "loss": 0.5482, + "step": 8790 + }, + { + "epoch": 1.2954322200392927, + "grad_norm": 0.6038967370986938, + "learning_rate": 3.0528999976656827e-06, + "loss": 0.5333, + "step": 8791 + }, + { + "epoch": 1.2955795677799606, + "grad_norm": 0.6069411039352417, + "learning_rate": 3.052521893590067e-06, + "loss": 0.5462, + "step": 8792 + }, + { + "epoch": 1.2957269155206288, + "grad_norm": 0.6132251620292664, + "learning_rate": 3.0521437762265592e-06, + "loss": 0.517, + "step": 8793 + }, + { + "epoch": 1.2958742632612967, + "grad_norm": 0.5741100907325745, + "learning_rate": 3.0517656455842528e-06, + "loss": 0.5446, + "step": 8794 + }, + { + "epoch": 1.2960216110019647, + "grad_norm": 0.584550678730011, + "learning_rate": 3.051387501672241e-06, + "loss": 0.5208, + "step": 8795 + }, + { + "epoch": 1.2961689587426326, + "grad_norm": 0.6142943501472473, + "learning_rate": 3.0510093444996187e-06, + "loss": 0.5264, + "step": 8796 + }, + { + "epoch": 1.2963163064833005, + "grad_norm": 0.5999569892883301, + "learning_rate": 3.0506311740754794e-06, + "loss": 0.527, + "step": 8797 + }, + { + "epoch": 1.2964636542239685, + "grad_norm": 0.6049243807792664, + "learning_rate": 3.0502529904089193e-06, + "loss": 0.5404, + "step": 8798 + }, + { + "epoch": 1.2966110019646364, + "grad_norm": 0.5932764410972595, + "learning_rate": 3.0498747935090335e-06, + "loss": 0.5472, + "step": 8799 + }, + { + "epoch": 1.2967583497053046, + "grad_norm": 0.5992287397384644, + "learning_rate": 3.0494965833849157e-06, + "loss": 0.5082, + "step": 8800 + }, + { + "epoch": 1.2969056974459725, + "grad_norm": 0.605226457118988, + "learning_rate": 3.049118360045663e-06, + "loss": 0.5324, + "step": 8801 + }, + { + "epoch": 1.2970530451866404, + "grad_norm": 0.5881320834159851, + "learning_rate": 3.0487401235003713e-06, + "loss": 0.5565, + "step": 8802 + }, + { + "epoch": 1.2972003929273084, + "grad_norm": 0.6173729300498962, + "learning_rate": 3.0483618737581372e-06, + "loss": 0.555, + "step": 8803 + }, + { + "epoch": 1.2973477406679765, + "grad_norm": 0.6458489894866943, + "learning_rate": 3.0479836108280568e-06, + "loss": 0.5396, + "step": 8804 + }, + { + "epoch": 1.2974950884086445, + "grad_norm": 0.6021649837493896, + "learning_rate": 3.047605334719229e-06, + "loss": 0.4788, + "step": 8805 + }, + { + "epoch": 1.2976424361493124, + "grad_norm": 0.5821858048439026, + "learning_rate": 3.047227045440749e-06, + "loss": 0.5266, + "step": 8806 + }, + { + "epoch": 1.2977897838899803, + "grad_norm": 0.6179593205451965, + "learning_rate": 3.0468487430017145e-06, + "loss": 0.4972, + "step": 8807 + }, + { + "epoch": 1.2979371316306483, + "grad_norm": 0.6278488039970398, + "learning_rate": 3.046470427411225e-06, + "loss": 0.5579, + "step": 8808 + }, + { + "epoch": 1.2980844793713162, + "grad_norm": 0.5620841979980469, + "learning_rate": 3.0460920986783775e-06, + "loss": 0.5596, + "step": 8809 + }, + { + "epoch": 1.2982318271119841, + "grad_norm": 0.5882749557495117, + "learning_rate": 3.045713756812272e-06, + "loss": 0.5695, + "step": 8810 + }, + { + "epoch": 1.2983791748526523, + "grad_norm": 0.589259684085846, + "learning_rate": 3.0453354018220055e-06, + "loss": 0.5107, + "step": 8811 + }, + { + "epoch": 1.2985265225933202, + "grad_norm": 0.617760181427002, + "learning_rate": 3.0449570337166788e-06, + "loss": 0.5173, + "step": 8812 + }, + { + "epoch": 1.2986738703339882, + "grad_norm": 0.5918892025947571, + "learning_rate": 3.044578652505391e-06, + "loss": 0.5258, + "step": 8813 + }, + { + "epoch": 1.2988212180746561, + "grad_norm": 0.5834391713142395, + "learning_rate": 3.0442002581972425e-06, + "loss": 0.5293, + "step": 8814 + }, + { + "epoch": 1.2989685658153243, + "grad_norm": 0.6170793771743774, + "learning_rate": 3.043821850801333e-06, + "loss": 0.532, + "step": 8815 + }, + { + "epoch": 1.2991159135559922, + "grad_norm": 0.5757250189781189, + "learning_rate": 3.043443430326762e-06, + "loss": 0.5359, + "step": 8816 + }, + { + "epoch": 1.2992632612966601, + "grad_norm": 0.6227938532829285, + "learning_rate": 3.0430649967826324e-06, + "loss": 0.5466, + "step": 8817 + }, + { + "epoch": 1.299410609037328, + "grad_norm": 0.6111860275268555, + "learning_rate": 3.042686550178044e-06, + "loss": 0.4785, + "step": 8818 + }, + { + "epoch": 1.299557956777996, + "grad_norm": 0.5749853849411011, + "learning_rate": 3.042308090522099e-06, + "loss": 0.4852, + "step": 8819 + }, + { + "epoch": 1.299705304518664, + "grad_norm": 0.5690816044807434, + "learning_rate": 3.041929617823898e-06, + "loss": 0.5363, + "step": 8820 + }, + { + "epoch": 1.2998526522593319, + "grad_norm": 0.5963253378868103, + "learning_rate": 3.041551132092544e-06, + "loss": 0.5329, + "step": 8821 + }, + { + "epoch": 1.3, + "grad_norm": 0.5987982749938965, + "learning_rate": 3.0411726333371394e-06, + "loss": 0.5678, + "step": 8822 + }, + { + "epoch": 1.300147347740668, + "grad_norm": 0.600109875202179, + "learning_rate": 3.040794121566787e-06, + "loss": 0.5165, + "step": 8823 + }, + { + "epoch": 1.300294695481336, + "grad_norm": 0.5943282246589661, + "learning_rate": 3.0404155967905896e-06, + "loss": 0.5302, + "step": 8824 + }, + { + "epoch": 1.3004420432220039, + "grad_norm": 0.5802594423294067, + "learning_rate": 3.040037059017651e-06, + "loss": 0.5357, + "step": 8825 + }, + { + "epoch": 1.300589390962672, + "grad_norm": 0.5839782357215881, + "learning_rate": 3.039658508257074e-06, + "loss": 0.5283, + "step": 8826 + }, + { + "epoch": 1.30073673870334, + "grad_norm": 0.60861736536026, + "learning_rate": 3.0392799445179627e-06, + "loss": 0.5431, + "step": 8827 + }, + { + "epoch": 1.3008840864440079, + "grad_norm": 0.5882900357246399, + "learning_rate": 3.038901367809422e-06, + "loss": 0.5137, + "step": 8828 + }, + { + "epoch": 1.3010314341846758, + "grad_norm": 0.5771350264549255, + "learning_rate": 3.0385227781405563e-06, + "loss": 0.5437, + "step": 8829 + }, + { + "epoch": 1.3011787819253438, + "grad_norm": 0.5942164659500122, + "learning_rate": 3.0381441755204704e-06, + "loss": 0.5202, + "step": 8830 + }, + { + "epoch": 1.3013261296660117, + "grad_norm": 0.5904426574707031, + "learning_rate": 3.0377655599582696e-06, + "loss": 0.5394, + "step": 8831 + }, + { + "epoch": 1.3014734774066798, + "grad_norm": 0.5971014499664307, + "learning_rate": 3.037386931463059e-06, + "loss": 0.5625, + "step": 8832 + }, + { + "epoch": 1.3016208251473478, + "grad_norm": 0.5740324258804321, + "learning_rate": 3.0370082900439446e-06, + "loss": 0.5194, + "step": 8833 + }, + { + "epoch": 1.3017681728880157, + "grad_norm": 0.6105207204818726, + "learning_rate": 3.036629635710033e-06, + "loss": 0.5583, + "step": 8834 + }, + { + "epoch": 1.3019155206286837, + "grad_norm": 0.5841798782348633, + "learning_rate": 3.03625096847043e-06, + "loss": 0.5197, + "step": 8835 + }, + { + "epoch": 1.3020628683693516, + "grad_norm": 0.576716423034668, + "learning_rate": 3.035872288334243e-06, + "loss": 0.5428, + "step": 8836 + }, + { + "epoch": 1.3022102161100197, + "grad_norm": 0.5984541177749634, + "learning_rate": 3.0354935953105783e-06, + "loss": 0.5161, + "step": 8837 + }, + { + "epoch": 1.3023575638506877, + "grad_norm": 0.5944229960441589, + "learning_rate": 3.035114889408545e-06, + "loss": 0.5125, + "step": 8838 + }, + { + "epoch": 1.3025049115913556, + "grad_norm": 0.5978067517280579, + "learning_rate": 3.034736170637248e-06, + "loss": 0.496, + "step": 8839 + }, + { + "epoch": 1.3026522593320236, + "grad_norm": 0.614058256149292, + "learning_rate": 3.034357439005797e-06, + "loss": 0.5539, + "step": 8840 + }, + { + "epoch": 1.3027996070726915, + "grad_norm": 0.6379024386405945, + "learning_rate": 3.0339786945233013e-06, + "loss": 0.5503, + "step": 8841 + }, + { + "epoch": 1.3029469548133594, + "grad_norm": 0.6580848097801208, + "learning_rate": 3.0335999371988682e-06, + "loss": 0.5395, + "step": 8842 + }, + { + "epoch": 1.3030943025540276, + "grad_norm": 0.5843019485473633, + "learning_rate": 3.033221167041607e-06, + "loss": 0.4945, + "step": 8843 + }, + { + "epoch": 1.3032416502946955, + "grad_norm": 0.580093502998352, + "learning_rate": 3.0328423840606264e-06, + "loss": 0.5438, + "step": 8844 + }, + { + "epoch": 1.3033889980353635, + "grad_norm": 0.6123616695404053, + "learning_rate": 3.0324635882650364e-06, + "loss": 0.527, + "step": 8845 + }, + { + "epoch": 1.3035363457760314, + "grad_norm": 0.5673072934150696, + "learning_rate": 3.0320847796639473e-06, + "loss": 0.5562, + "step": 8846 + }, + { + "epoch": 1.3036836935166995, + "grad_norm": 0.5697593092918396, + "learning_rate": 3.031705958266468e-06, + "loss": 0.5273, + "step": 8847 + }, + { + "epoch": 1.3038310412573675, + "grad_norm": 0.6132513880729675, + "learning_rate": 3.0313271240817105e-06, + "loss": 0.5407, + "step": 8848 + }, + { + "epoch": 1.3039783889980354, + "grad_norm": 0.5833691358566284, + "learning_rate": 3.0309482771187847e-06, + "loss": 0.5427, + "step": 8849 + }, + { + "epoch": 1.3041257367387034, + "grad_norm": 0.5959423780441284, + "learning_rate": 3.030569417386802e-06, + "loss": 0.5466, + "step": 8850 + }, + { + "epoch": 1.3042730844793713, + "grad_norm": 0.609770655632019, + "learning_rate": 3.0301905448948736e-06, + "loss": 0.5187, + "step": 8851 + }, + { + "epoch": 1.3044204322200392, + "grad_norm": 0.5854249596595764, + "learning_rate": 3.029811659652111e-06, + "loss": 0.5296, + "step": 8852 + }, + { + "epoch": 1.3045677799607072, + "grad_norm": 0.5598500967025757, + "learning_rate": 3.0294327616676266e-06, + "loss": 0.5396, + "step": 8853 + }, + { + "epoch": 1.3047151277013753, + "grad_norm": 0.6869417428970337, + "learning_rate": 3.0290538509505336e-06, + "loss": 0.537, + "step": 8854 + }, + { + "epoch": 1.3048624754420433, + "grad_norm": 0.5862147808074951, + "learning_rate": 3.0286749275099423e-06, + "loss": 0.5106, + "step": 8855 + }, + { + "epoch": 1.3050098231827112, + "grad_norm": 0.6264998912811279, + "learning_rate": 3.0282959913549676e-06, + "loss": 0.483, + "step": 8856 + }, + { + "epoch": 1.3051571709233791, + "grad_norm": 0.6184912919998169, + "learning_rate": 3.0279170424947215e-06, + "loss": 0.5298, + "step": 8857 + }, + { + "epoch": 1.3053045186640473, + "grad_norm": 0.6276779770851135, + "learning_rate": 3.027538080938318e-06, + "loss": 0.5309, + "step": 8858 + }, + { + "epoch": 1.3054518664047152, + "grad_norm": 0.6012272834777832, + "learning_rate": 3.027159106694872e-06, + "loss": 0.5288, + "step": 8859 + }, + { + "epoch": 1.3055992141453832, + "grad_norm": 0.6094817519187927, + "learning_rate": 3.026780119773496e-06, + "loss": 0.5436, + "step": 8860 + }, + { + "epoch": 1.305746561886051, + "grad_norm": 0.6046571135520935, + "learning_rate": 3.0264011201833053e-06, + "loss": 0.5273, + "step": 8861 + }, + { + "epoch": 1.305893909626719, + "grad_norm": 0.5661907196044922, + "learning_rate": 3.0260221079334152e-06, + "loss": 0.5577, + "step": 8862 + }, + { + "epoch": 1.306041257367387, + "grad_norm": 0.6118594408035278, + "learning_rate": 3.02564308303294e-06, + "loss": 0.5169, + "step": 8863 + }, + { + "epoch": 1.306188605108055, + "grad_norm": 0.599680483341217, + "learning_rate": 3.0252640454909945e-06, + "loss": 0.5213, + "step": 8864 + }, + { + "epoch": 1.306335952848723, + "grad_norm": 0.6113905906677246, + "learning_rate": 3.0248849953166954e-06, + "loss": 0.514, + "step": 8865 + }, + { + "epoch": 1.306483300589391, + "grad_norm": 0.561649739742279, + "learning_rate": 3.0245059325191583e-06, + "loss": 0.5583, + "step": 8866 + }, + { + "epoch": 1.306630648330059, + "grad_norm": 0.6099203824996948, + "learning_rate": 3.0241268571075e-06, + "loss": 0.5327, + "step": 8867 + }, + { + "epoch": 1.3067779960707269, + "grad_norm": 0.5886783599853516, + "learning_rate": 3.0237477690908357e-06, + "loss": 0.5364, + "step": 8868 + }, + { + "epoch": 1.306925343811395, + "grad_norm": 0.5978950262069702, + "learning_rate": 3.0233686684782835e-06, + "loss": 0.5373, + "step": 8869 + }, + { + "epoch": 1.307072691552063, + "grad_norm": 0.6237046718597412, + "learning_rate": 3.022989555278961e-06, + "loss": 0.505, + "step": 8870 + }, + { + "epoch": 1.307220039292731, + "grad_norm": 0.5646064281463623, + "learning_rate": 3.0226104295019844e-06, + "loss": 0.4928, + "step": 8871 + }, + { + "epoch": 1.3073673870333988, + "grad_norm": 0.610940158367157, + "learning_rate": 3.022231291156472e-06, + "loss": 0.5277, + "step": 8872 + }, + { + "epoch": 1.3075147347740668, + "grad_norm": 0.6119354963302612, + "learning_rate": 3.021852140251543e-06, + "loss": 0.5334, + "step": 8873 + }, + { + "epoch": 1.3076620825147347, + "grad_norm": 0.5980158448219299, + "learning_rate": 3.0214729767963135e-06, + "loss": 0.5353, + "step": 8874 + }, + { + "epoch": 1.3078094302554026, + "grad_norm": 0.6269562840461731, + "learning_rate": 3.021093800799904e-06, + "loss": 0.536, + "step": 8875 + }, + { + "epoch": 1.3079567779960708, + "grad_norm": 0.5890834927558899, + "learning_rate": 3.020714612271433e-06, + "loss": 0.5443, + "step": 8876 + }, + { + "epoch": 1.3081041257367387, + "grad_norm": 0.5700015425682068, + "learning_rate": 3.02033541122002e-06, + "loss": 0.5307, + "step": 8877 + }, + { + "epoch": 1.3082514734774067, + "grad_norm": 0.5967332124710083, + "learning_rate": 3.019956197654784e-06, + "loss": 0.5304, + "step": 8878 + }, + { + "epoch": 1.3083988212180746, + "grad_norm": 0.5684713125228882, + "learning_rate": 3.0195769715848456e-06, + "loss": 0.5643, + "step": 8879 + }, + { + "epoch": 1.3085461689587428, + "grad_norm": 0.6106557250022888, + "learning_rate": 3.0191977330193246e-06, + "loss": 0.5614, + "step": 8880 + }, + { + "epoch": 1.3086935166994107, + "grad_norm": 0.6324998736381531, + "learning_rate": 3.0188184819673415e-06, + "loss": 0.5071, + "step": 8881 + }, + { + "epoch": 1.3088408644400786, + "grad_norm": 0.5839027166366577, + "learning_rate": 3.018439218438018e-06, + "loss": 0.5108, + "step": 8882 + }, + { + "epoch": 1.3089882121807466, + "grad_norm": 0.6398817896842957, + "learning_rate": 3.0180599424404738e-06, + "loss": 0.549, + "step": 8883 + }, + { + "epoch": 1.3091355599214145, + "grad_norm": 0.6110808849334717, + "learning_rate": 3.017680653983831e-06, + "loss": 0.5176, + "step": 8884 + }, + { + "epoch": 1.3092829076620824, + "grad_norm": 0.5972000360488892, + "learning_rate": 3.017301353077211e-06, + "loss": 0.5295, + "step": 8885 + }, + { + "epoch": 1.3094302554027504, + "grad_norm": 0.5559650659561157, + "learning_rate": 3.0169220397297365e-06, + "loss": 0.5223, + "step": 8886 + }, + { + "epoch": 1.3095776031434185, + "grad_norm": 0.6038998365402222, + "learning_rate": 3.0165427139505294e-06, + "loss": 0.5308, + "step": 8887 + }, + { + "epoch": 1.3097249508840865, + "grad_norm": 0.6223336458206177, + "learning_rate": 3.0161633757487126e-06, + "loss": 0.521, + "step": 8888 + }, + { + "epoch": 1.3098722986247544, + "grad_norm": 0.5528155565261841, + "learning_rate": 3.015784025133408e-06, + "loss": 0.4874, + "step": 8889 + }, + { + "epoch": 1.3100196463654223, + "grad_norm": 0.5793650150299072, + "learning_rate": 3.01540466211374e-06, + "loss": 0.551, + "step": 8890 + }, + { + "epoch": 1.3101669941060905, + "grad_norm": 0.6116727590560913, + "learning_rate": 3.015025286698832e-06, + "loss": 0.5283, + "step": 8891 + }, + { + "epoch": 1.3103143418467584, + "grad_norm": 0.5787086486816406, + "learning_rate": 3.014645898897807e-06, + "loss": 0.5378, + "step": 8892 + }, + { + "epoch": 1.3104616895874264, + "grad_norm": 0.5998529195785522, + "learning_rate": 3.014266498719789e-06, + "loss": 0.5267, + "step": 8893 + }, + { + "epoch": 1.3106090373280943, + "grad_norm": 0.6086849570274353, + "learning_rate": 3.0138870861739027e-06, + "loss": 0.5073, + "step": 8894 + }, + { + "epoch": 1.3107563850687622, + "grad_norm": 0.5767064690589905, + "learning_rate": 3.0135076612692737e-06, + "loss": 0.5288, + "step": 8895 + }, + { + "epoch": 1.3109037328094302, + "grad_norm": 0.6061546206474304, + "learning_rate": 3.0131282240150267e-06, + "loss": 0.5236, + "step": 8896 + }, + { + "epoch": 1.3110510805500981, + "grad_norm": 0.5793053507804871, + "learning_rate": 3.012748774420286e-06, + "loss": 0.5294, + "step": 8897 + }, + { + "epoch": 1.3111984282907663, + "grad_norm": 0.570218563079834, + "learning_rate": 3.012369312494178e-06, + "loss": 0.4859, + "step": 8898 + }, + { + "epoch": 1.3113457760314342, + "grad_norm": 0.5982742309570312, + "learning_rate": 3.011989838245828e-06, + "loss": 0.5505, + "step": 8899 + }, + { + "epoch": 1.3114931237721021, + "grad_norm": 0.6418405175209045, + "learning_rate": 3.0116103516843633e-06, + "loss": 0.5374, + "step": 8900 + }, + { + "epoch": 1.31164047151277, + "grad_norm": 0.6121988892555237, + "learning_rate": 3.0112308528189087e-06, + "loss": 0.5278, + "step": 8901 + }, + { + "epoch": 1.3117878192534382, + "grad_norm": 0.6153398156166077, + "learning_rate": 3.010851341658592e-06, + "loss": 0.521, + "step": 8902 + }, + { + "epoch": 1.3119351669941062, + "grad_norm": 0.6150961518287659, + "learning_rate": 3.0104718182125403e-06, + "loss": 0.5324, + "step": 8903 + }, + { + "epoch": 1.312082514734774, + "grad_norm": 0.5889748930931091, + "learning_rate": 3.010092282489881e-06, + "loss": 0.5629, + "step": 8904 + }, + { + "epoch": 1.312229862475442, + "grad_norm": 0.5768395662307739, + "learning_rate": 3.0097127344997414e-06, + "loss": 0.5388, + "step": 8905 + }, + { + "epoch": 1.31237721021611, + "grad_norm": 0.6118963360786438, + "learning_rate": 3.00933317425125e-06, + "loss": 0.5308, + "step": 8906 + }, + { + "epoch": 1.312524557956778, + "grad_norm": 0.5806705355644226, + "learning_rate": 3.0089536017535337e-06, + "loss": 0.5511, + "step": 8907 + }, + { + "epoch": 1.3126719056974459, + "grad_norm": 0.6054975986480713, + "learning_rate": 3.008574017015723e-06, + "loss": 0.5189, + "step": 8908 + }, + { + "epoch": 1.312819253438114, + "grad_norm": 0.5591014623641968, + "learning_rate": 3.008194420046945e-06, + "loss": 0.4975, + "step": 8909 + }, + { + "epoch": 1.312966601178782, + "grad_norm": 0.5625701546669006, + "learning_rate": 3.00781481085633e-06, + "loss": 0.5514, + "step": 8910 + }, + { + "epoch": 1.3131139489194499, + "grad_norm": 0.580840528011322, + "learning_rate": 3.0074351894530066e-06, + "loss": 0.5268, + "step": 8911 + }, + { + "epoch": 1.3132612966601178, + "grad_norm": 0.6353463530540466, + "learning_rate": 3.0070555558461052e-06, + "loss": 0.5202, + "step": 8912 + }, + { + "epoch": 1.313408644400786, + "grad_norm": 0.5717399716377258, + "learning_rate": 3.006675910044755e-06, + "loss": 0.5533, + "step": 8913 + }, + { + "epoch": 1.313555992141454, + "grad_norm": 0.5786566734313965, + "learning_rate": 3.0062962520580873e-06, + "loss": 0.5388, + "step": 8914 + }, + { + "epoch": 1.3137033398821218, + "grad_norm": 0.5761737823486328, + "learning_rate": 3.005916581895232e-06, + "loss": 0.5404, + "step": 8915 + }, + { + "epoch": 1.3138506876227898, + "grad_norm": 0.6113357543945312, + "learning_rate": 3.0055368995653208e-06, + "loss": 0.545, + "step": 8916 + }, + { + "epoch": 1.3139980353634577, + "grad_norm": 0.5827839970588684, + "learning_rate": 3.0051572050774837e-06, + "loss": 0.5232, + "step": 8917 + }, + { + "epoch": 1.3141453831041257, + "grad_norm": 0.5834389328956604, + "learning_rate": 3.004777498440852e-06, + "loss": 0.511, + "step": 8918 + }, + { + "epoch": 1.3142927308447936, + "grad_norm": 0.6136263012886047, + "learning_rate": 3.0043977796645597e-06, + "loss": 0.585, + "step": 8919 + }, + { + "epoch": 1.3144400785854617, + "grad_norm": 0.5735808610916138, + "learning_rate": 3.0040180487577377e-06, + "loss": 0.5489, + "step": 8920 + }, + { + "epoch": 1.3145874263261297, + "grad_norm": 0.5877261757850647, + "learning_rate": 3.0036383057295167e-06, + "loss": 0.5356, + "step": 8921 + }, + { + "epoch": 1.3147347740667976, + "grad_norm": 0.5667177438735962, + "learning_rate": 3.003258550589031e-06, + "loss": 0.5404, + "step": 8922 + }, + { + "epoch": 1.3148821218074656, + "grad_norm": 0.5949438214302063, + "learning_rate": 3.002878783345413e-06, + "loss": 0.5504, + "step": 8923 + }, + { + "epoch": 1.3150294695481337, + "grad_norm": 0.5722221732139587, + "learning_rate": 3.0024990040077968e-06, + "loss": 0.4986, + "step": 8924 + }, + { + "epoch": 1.3151768172888016, + "grad_norm": 0.5696892738342285, + "learning_rate": 3.0021192125853144e-06, + "loss": 0.5212, + "step": 8925 + }, + { + "epoch": 1.3153241650294696, + "grad_norm": 0.6122156381607056, + "learning_rate": 3.001739409087101e-06, + "loss": 0.5369, + "step": 8926 + }, + { + "epoch": 1.3154715127701375, + "grad_norm": 0.5608416795730591, + "learning_rate": 3.00135959352229e-06, + "loss": 0.5433, + "step": 8927 + }, + { + "epoch": 1.3156188605108055, + "grad_norm": 0.5963548421859741, + "learning_rate": 3.000979765900016e-06, + "loss": 0.5433, + "step": 8928 + }, + { + "epoch": 1.3157662082514734, + "grad_norm": 0.596711277961731, + "learning_rate": 3.0005999262294134e-06, + "loss": 0.522, + "step": 8929 + }, + { + "epoch": 1.3159135559921413, + "grad_norm": 0.5946344137191772, + "learning_rate": 3.0002200745196173e-06, + "loss": 0.5497, + "step": 8930 + }, + { + "epoch": 1.3160609037328095, + "grad_norm": 0.5941744446754456, + "learning_rate": 2.999840210779763e-06, + "loss": 0.5495, + "step": 8931 + }, + { + "epoch": 1.3162082514734774, + "grad_norm": 0.5997826457023621, + "learning_rate": 2.999460335018986e-06, + "loss": 0.5256, + "step": 8932 + }, + { + "epoch": 1.3163555992141454, + "grad_norm": 0.6023252606391907, + "learning_rate": 2.999080447246423e-06, + "loss": 0.5246, + "step": 8933 + }, + { + "epoch": 1.3165029469548133, + "grad_norm": 0.6221656203269958, + "learning_rate": 2.9987005474712084e-06, + "loss": 0.529, + "step": 8934 + }, + { + "epoch": 1.3166502946954814, + "grad_norm": 0.6220554113388062, + "learning_rate": 2.99832063570248e-06, + "loss": 0.5481, + "step": 8935 + }, + { + "epoch": 1.3167976424361494, + "grad_norm": 0.6111207008361816, + "learning_rate": 2.997940711949374e-06, + "loss": 0.5408, + "step": 8936 + }, + { + "epoch": 1.3169449901768173, + "grad_norm": 0.601446807384491, + "learning_rate": 2.9975607762210272e-06, + "loss": 0.5386, + "step": 8937 + }, + { + "epoch": 1.3170923379174853, + "grad_norm": 0.6305886507034302, + "learning_rate": 2.997180828526578e-06, + "loss": 0.5411, + "step": 8938 + }, + { + "epoch": 1.3172396856581532, + "grad_norm": 0.6331508159637451, + "learning_rate": 2.996800868875162e-06, + "loss": 0.5614, + "step": 8939 + }, + { + "epoch": 1.3173870333988211, + "grad_norm": 0.5798312425613403, + "learning_rate": 2.9964208972759186e-06, + "loss": 0.5377, + "step": 8940 + }, + { + "epoch": 1.317534381139489, + "grad_norm": 0.5882259607315063, + "learning_rate": 2.9960409137379854e-06, + "loss": 0.5405, + "step": 8941 + }, + { + "epoch": 1.3176817288801572, + "grad_norm": 0.5944419503211975, + "learning_rate": 2.995660918270501e-06, + "loss": 0.5325, + "step": 8942 + }, + { + "epoch": 1.3178290766208252, + "grad_norm": 0.6169295907020569, + "learning_rate": 2.9952809108826036e-06, + "loss": 0.5396, + "step": 8943 + }, + { + "epoch": 1.317976424361493, + "grad_norm": 0.6273781657218933, + "learning_rate": 2.994900891583433e-06, + "loss": 0.5215, + "step": 8944 + }, + { + "epoch": 1.318123772102161, + "grad_norm": 0.5879075527191162, + "learning_rate": 2.9945208603821285e-06, + "loss": 0.5417, + "step": 8945 + }, + { + "epoch": 1.3182711198428292, + "grad_norm": 0.5877619981765747, + "learning_rate": 2.994140817287828e-06, + "loss": 0.5056, + "step": 8946 + }, + { + "epoch": 1.3184184675834971, + "grad_norm": 0.5976608395576477, + "learning_rate": 2.9937607623096738e-06, + "loss": 0.5376, + "step": 8947 + }, + { + "epoch": 1.318565815324165, + "grad_norm": 0.6236481666564941, + "learning_rate": 2.9933806954568044e-06, + "loss": 0.5244, + "step": 8948 + }, + { + "epoch": 1.318713163064833, + "grad_norm": 0.6378622055053711, + "learning_rate": 2.993000616738361e-06, + "loss": 0.5538, + "step": 8949 + }, + { + "epoch": 1.318860510805501, + "grad_norm": 0.6266913414001465, + "learning_rate": 2.992620526163483e-06, + "loss": 0.544, + "step": 8950 + }, + { + "epoch": 1.3190078585461689, + "grad_norm": 0.6085349321365356, + "learning_rate": 2.9922404237413128e-06, + "loss": 0.5227, + "step": 8951 + }, + { + "epoch": 1.3191552062868368, + "grad_norm": 0.5958927869796753, + "learning_rate": 2.991860309480991e-06, + "loss": 0.5373, + "step": 8952 + }, + { + "epoch": 1.319302554027505, + "grad_norm": 0.6331906318664551, + "learning_rate": 2.9914801833916603e-06, + "loss": 0.5437, + "step": 8953 + }, + { + "epoch": 1.319449901768173, + "grad_norm": 0.5804860591888428, + "learning_rate": 2.991100045482461e-06, + "loss": 0.5122, + "step": 8954 + }, + { + "epoch": 1.3195972495088408, + "grad_norm": 0.5853872299194336, + "learning_rate": 2.9907198957625366e-06, + "loss": 0.5118, + "step": 8955 + }, + { + "epoch": 1.3197445972495088, + "grad_norm": 0.599092960357666, + "learning_rate": 2.990339734241029e-06, + "loss": 0.5557, + "step": 8956 + }, + { + "epoch": 1.319891944990177, + "grad_norm": 0.5690260529518127, + "learning_rate": 2.9899595609270797e-06, + "loss": 0.545, + "step": 8957 + }, + { + "epoch": 1.3200392927308449, + "grad_norm": 0.5968871712684631, + "learning_rate": 2.9895793758298335e-06, + "loss": 0.5443, + "step": 8958 + }, + { + "epoch": 1.3201866404715128, + "grad_norm": 0.5990027189254761, + "learning_rate": 2.989199178958433e-06, + "loss": 0.5085, + "step": 8959 + }, + { + "epoch": 1.3203339882121807, + "grad_norm": 0.6135165691375732, + "learning_rate": 2.9888189703220217e-06, + "loss": 0.53, + "step": 8960 + }, + { + "epoch": 1.3204813359528487, + "grad_norm": 0.5898999571800232, + "learning_rate": 2.988438749929743e-06, + "loss": 0.5146, + "step": 8961 + }, + { + "epoch": 1.3206286836935166, + "grad_norm": 0.6258790493011475, + "learning_rate": 2.9880585177907417e-06, + "loss": 0.5385, + "step": 8962 + }, + { + "epoch": 1.3207760314341845, + "grad_norm": 0.5773698687553406, + "learning_rate": 2.9876782739141625e-06, + "loss": 0.5327, + "step": 8963 + }, + { + "epoch": 1.3209233791748527, + "grad_norm": 0.6058011054992676, + "learning_rate": 2.987298018309149e-06, + "loss": 0.5229, + "step": 8964 + }, + { + "epoch": 1.3210707269155206, + "grad_norm": 0.6023280620574951, + "learning_rate": 2.986917750984847e-06, + "loss": 0.5334, + "step": 8965 + }, + { + "epoch": 1.3212180746561886, + "grad_norm": 0.6693758368492126, + "learning_rate": 2.986537471950401e-06, + "loss": 0.5403, + "step": 8966 + }, + { + "epoch": 1.3213654223968565, + "grad_norm": 0.6057134866714478, + "learning_rate": 2.986157181214957e-06, + "loss": 0.571, + "step": 8967 + }, + { + "epoch": 1.3215127701375247, + "grad_norm": 0.6450909972190857, + "learning_rate": 2.985776878787661e-06, + "loss": 0.5078, + "step": 8968 + }, + { + "epoch": 1.3216601178781926, + "grad_norm": 0.5815439224243164, + "learning_rate": 2.985396564677659e-06, + "loss": 0.5078, + "step": 8969 + }, + { + "epoch": 1.3218074656188605, + "grad_norm": 0.6305100321769714, + "learning_rate": 2.985016238894097e-06, + "loss": 0.5415, + "step": 8970 + }, + { + "epoch": 1.3219548133595285, + "grad_norm": 0.5542739629745483, + "learning_rate": 2.9846359014461224e-06, + "loss": 0.5101, + "step": 8971 + }, + { + "epoch": 1.3221021611001964, + "grad_norm": 0.608872652053833, + "learning_rate": 2.984255552342881e-06, + "loss": 0.5259, + "step": 8972 + }, + { + "epoch": 1.3222495088408643, + "grad_norm": 0.5790343880653381, + "learning_rate": 2.9838751915935217e-06, + "loss": 0.5538, + "step": 8973 + }, + { + "epoch": 1.3223968565815325, + "grad_norm": 0.6028466820716858, + "learning_rate": 2.9834948192071905e-06, + "loss": 0.549, + "step": 8974 + }, + { + "epoch": 1.3225442043222004, + "grad_norm": 0.5964512228965759, + "learning_rate": 2.9831144351930352e-06, + "loss": 0.5298, + "step": 8975 + }, + { + "epoch": 1.3226915520628684, + "grad_norm": 0.6074384450912476, + "learning_rate": 2.982734039560205e-06, + "loss": 0.5417, + "step": 8976 + }, + { + "epoch": 1.3228388998035363, + "grad_norm": 0.6062073707580566, + "learning_rate": 2.982353632317847e-06, + "loss": 0.5415, + "step": 8977 + }, + { + "epoch": 1.3229862475442042, + "grad_norm": 0.5902124047279358, + "learning_rate": 2.981973213475111e-06, + "loss": 0.5309, + "step": 8978 + }, + { + "epoch": 1.3231335952848724, + "grad_norm": 0.6049677133560181, + "learning_rate": 2.981592783041145e-06, + "loss": 0.4974, + "step": 8979 + }, + { + "epoch": 1.3232809430255403, + "grad_norm": 0.5763800740242004, + "learning_rate": 2.981212341025098e-06, + "loss": 0.5447, + "step": 8980 + }, + { + "epoch": 1.3234282907662083, + "grad_norm": 0.5976239442825317, + "learning_rate": 2.980831887436121e-06, + "loss": 0.5299, + "step": 8981 + }, + { + "epoch": 1.3235756385068762, + "grad_norm": 0.6115972995758057, + "learning_rate": 2.980451422283361e-06, + "loss": 0.5648, + "step": 8982 + }, + { + "epoch": 1.3237229862475441, + "grad_norm": 0.6052427887916565, + "learning_rate": 2.9800709455759714e-06, + "loss": 0.5328, + "step": 8983 + }, + { + "epoch": 1.323870333988212, + "grad_norm": 0.6190658211708069, + "learning_rate": 2.9796904573230995e-06, + "loss": 0.5364, + "step": 8984 + }, + { + "epoch": 1.3240176817288802, + "grad_norm": 0.6129457950592041, + "learning_rate": 2.979309957533898e-06, + "loss": 0.5144, + "step": 8985 + }, + { + "epoch": 1.3241650294695482, + "grad_norm": 0.5896188616752625, + "learning_rate": 2.9789294462175154e-06, + "loss": 0.5164, + "step": 8986 + }, + { + "epoch": 1.324312377210216, + "grad_norm": 0.6557327508926392, + "learning_rate": 2.9785489233831054e-06, + "loss": 0.5184, + "step": 8987 + }, + { + "epoch": 1.324459724950884, + "grad_norm": 0.6120759844779968, + "learning_rate": 2.9781683890398183e-06, + "loss": 0.5113, + "step": 8988 + }, + { + "epoch": 1.3246070726915522, + "grad_norm": 0.5750967860221863, + "learning_rate": 2.9777878431968053e-06, + "loss": 0.532, + "step": 8989 + }, + { + "epoch": 1.3247544204322201, + "grad_norm": 0.5724018216133118, + "learning_rate": 2.977407285863219e-06, + "loss": 0.545, + "step": 8990 + }, + { + "epoch": 1.324901768172888, + "grad_norm": 0.6867124438285828, + "learning_rate": 2.9770267170482113e-06, + "loss": 0.5374, + "step": 8991 + }, + { + "epoch": 1.325049115913556, + "grad_norm": 0.5905114412307739, + "learning_rate": 2.976646136760935e-06, + "loss": 0.5677, + "step": 8992 + }, + { + "epoch": 1.325196463654224, + "grad_norm": 0.5823444128036499, + "learning_rate": 2.9762655450105426e-06, + "loss": 0.5025, + "step": 8993 + }, + { + "epoch": 1.3253438113948919, + "grad_norm": 0.5715795159339905, + "learning_rate": 2.975884941806187e-06, + "loss": 0.5364, + "step": 8994 + }, + { + "epoch": 1.3254911591355598, + "grad_norm": 0.5865716338157654, + "learning_rate": 2.9755043271570215e-06, + "loss": 0.5303, + "step": 8995 + }, + { + "epoch": 1.325638506876228, + "grad_norm": 0.5997296571731567, + "learning_rate": 2.9751237010722007e-06, + "loss": 0.545, + "step": 8996 + }, + { + "epoch": 1.325785854616896, + "grad_norm": 0.6161254048347473, + "learning_rate": 2.974743063560877e-06, + "loss": 0.5211, + "step": 8997 + }, + { + "epoch": 1.3259332023575638, + "grad_norm": 0.5926781296730042, + "learning_rate": 2.974362414632206e-06, + "loss": 0.5483, + "step": 8998 + }, + { + "epoch": 1.3260805500982318, + "grad_norm": 0.5885804295539856, + "learning_rate": 2.973981754295341e-06, + "loss": 0.5262, + "step": 8999 + }, + { + "epoch": 1.3262278978389, + "grad_norm": 0.5867140293121338, + "learning_rate": 2.973601082559437e-06, + "loss": 0.5576, + "step": 9000 + }, + { + "epoch": 1.3263752455795679, + "grad_norm": 0.5845410823822021, + "learning_rate": 2.9732203994336493e-06, + "loss": 0.5384, + "step": 9001 + }, + { + "epoch": 1.3265225933202358, + "grad_norm": 0.6074578166007996, + "learning_rate": 2.972839704927133e-06, + "loss": 0.5215, + "step": 9002 + }, + { + "epoch": 1.3266699410609037, + "grad_norm": 0.641106903553009, + "learning_rate": 2.972458999049043e-06, + "loss": 0.5303, + "step": 9003 + }, + { + "epoch": 1.3268172888015717, + "grad_norm": 0.5829891562461853, + "learning_rate": 2.9720782818085363e-06, + "loss": 0.5378, + "step": 9004 + }, + { + "epoch": 1.3269646365422396, + "grad_norm": 0.6092849373817444, + "learning_rate": 2.971697553214768e-06, + "loss": 0.5589, + "step": 9005 + }, + { + "epoch": 1.3271119842829076, + "grad_norm": 0.6663932204246521, + "learning_rate": 2.971316813276895e-06, + "loss": 0.5363, + "step": 9006 + }, + { + "epoch": 1.3272593320235757, + "grad_norm": 0.5992453694343567, + "learning_rate": 2.9709360620040733e-06, + "loss": 0.5156, + "step": 9007 + }, + { + "epoch": 1.3274066797642436, + "grad_norm": 0.64316326379776, + "learning_rate": 2.970555299405461e-06, + "loss": 0.5239, + "step": 9008 + }, + { + "epoch": 1.3275540275049116, + "grad_norm": 0.5564925074577332, + "learning_rate": 2.9701745254902137e-06, + "loss": 0.5351, + "step": 9009 + }, + { + "epoch": 1.3277013752455795, + "grad_norm": 0.5757303237915039, + "learning_rate": 2.9697937402674897e-06, + "loss": 0.5624, + "step": 9010 + }, + { + "epoch": 1.3278487229862477, + "grad_norm": 0.5971621870994568, + "learning_rate": 2.9694129437464473e-06, + "loss": 0.5229, + "step": 9011 + }, + { + "epoch": 1.3279960707269156, + "grad_norm": 0.5996527075767517, + "learning_rate": 2.9690321359362434e-06, + "loss": 0.5693, + "step": 9012 + }, + { + "epoch": 1.3281434184675835, + "grad_norm": 0.606825590133667, + "learning_rate": 2.968651316846037e-06, + "loss": 0.5412, + "step": 9013 + }, + { + "epoch": 1.3282907662082515, + "grad_norm": 0.5449331402778625, + "learning_rate": 2.9682704864849856e-06, + "loss": 0.5149, + "step": 9014 + }, + { + "epoch": 1.3284381139489194, + "grad_norm": 0.5807191729545593, + "learning_rate": 2.9678896448622483e-06, + "loss": 0.5143, + "step": 9015 + }, + { + "epoch": 1.3285854616895874, + "grad_norm": 0.6084197759628296, + "learning_rate": 2.967508791986985e-06, + "loss": 0.5394, + "step": 9016 + }, + { + "epoch": 1.3287328094302553, + "grad_norm": 0.5825796723365784, + "learning_rate": 2.9671279278683546e-06, + "loss": 0.5187, + "step": 9017 + }, + { + "epoch": 1.3288801571709234, + "grad_norm": 0.5941682457923889, + "learning_rate": 2.966747052515516e-06, + "loss": 0.5454, + "step": 9018 + }, + { + "epoch": 1.3290275049115914, + "grad_norm": 0.6111936569213867, + "learning_rate": 2.966366165937631e-06, + "loss": 0.541, + "step": 9019 + }, + { + "epoch": 1.3291748526522593, + "grad_norm": 0.6036586165428162, + "learning_rate": 2.965985268143857e-06, + "loss": 0.5424, + "step": 9020 + }, + { + "epoch": 1.3293222003929273, + "grad_norm": 0.5657582879066467, + "learning_rate": 2.965604359143357e-06, + "loss": 0.5209, + "step": 9021 + }, + { + "epoch": 1.3294695481335954, + "grad_norm": 0.6057657599449158, + "learning_rate": 2.96522343894529e-06, + "loss": 0.5054, + "step": 9022 + }, + { + "epoch": 1.3296168958742633, + "grad_norm": 0.5741898417472839, + "learning_rate": 2.9648425075588174e-06, + "loss": 0.5378, + "step": 9023 + }, + { + "epoch": 1.3297642436149313, + "grad_norm": 0.5832265615463257, + "learning_rate": 2.9644615649931e-06, + "loss": 0.5287, + "step": 9024 + }, + { + "epoch": 1.3299115913555992, + "grad_norm": 0.5867762565612793, + "learning_rate": 2.9640806112573004e-06, + "loss": 0.5409, + "step": 9025 + }, + { + "epoch": 1.3300589390962672, + "grad_norm": 0.5750309228897095, + "learning_rate": 2.9636996463605805e-06, + "loss": 0.5221, + "step": 9026 + }, + { + "epoch": 1.330206286836935, + "grad_norm": 0.6267939209938049, + "learning_rate": 2.963318670312101e-06, + "loss": 0.5474, + "step": 9027 + }, + { + "epoch": 1.330353634577603, + "grad_norm": 0.5784497261047363, + "learning_rate": 2.9629376831210245e-06, + "loss": 0.5464, + "step": 9028 + }, + { + "epoch": 1.3305009823182712, + "grad_norm": 0.614446759223938, + "learning_rate": 2.9625566847965144e-06, + "loss": 0.5125, + "step": 9029 + }, + { + "epoch": 1.3306483300589391, + "grad_norm": 0.576218843460083, + "learning_rate": 2.962175675347732e-06, + "loss": 0.533, + "step": 9030 + }, + { + "epoch": 1.330795677799607, + "grad_norm": 0.6085537075996399, + "learning_rate": 2.9617946547838422e-06, + "loss": 0.5446, + "step": 9031 + }, + { + "epoch": 1.330943025540275, + "grad_norm": 0.5777566432952881, + "learning_rate": 2.9614136231140078e-06, + "loss": 0.548, + "step": 9032 + }, + { + "epoch": 1.3310903732809432, + "grad_norm": 0.5860437154769897, + "learning_rate": 2.9610325803473916e-06, + "loss": 0.5387, + "step": 9033 + }, + { + "epoch": 1.331237721021611, + "grad_norm": 0.5906823873519897, + "learning_rate": 2.960651526493158e-06, + "loss": 0.5152, + "step": 9034 + }, + { + "epoch": 1.331385068762279, + "grad_norm": 0.6096025705337524, + "learning_rate": 2.960270461560472e-06, + "loss": 0.5554, + "step": 9035 + }, + { + "epoch": 1.331532416502947, + "grad_norm": 0.5599738955497742, + "learning_rate": 2.959889385558497e-06, + "loss": 0.5275, + "step": 9036 + }, + { + "epoch": 1.331679764243615, + "grad_norm": 0.6118701696395874, + "learning_rate": 2.959508298496398e-06, + "loss": 0.5288, + "step": 9037 + }, + { + "epoch": 1.3318271119842828, + "grad_norm": 0.5883637070655823, + "learning_rate": 2.9591272003833393e-06, + "loss": 0.5006, + "step": 9038 + }, + { + "epoch": 1.3319744597249508, + "grad_norm": 0.5819154977798462, + "learning_rate": 2.9587460912284873e-06, + "loss": 0.5341, + "step": 9039 + }, + { + "epoch": 1.332121807465619, + "grad_norm": 0.6089463829994202, + "learning_rate": 2.9583649710410066e-06, + "loss": 0.5434, + "step": 9040 + }, + { + "epoch": 1.3322691552062869, + "grad_norm": 0.5911954045295715, + "learning_rate": 2.957983839830064e-06, + "loss": 0.5428, + "step": 9041 + }, + { + "epoch": 1.3324165029469548, + "grad_norm": 0.5978178381919861, + "learning_rate": 2.957602697604824e-06, + "loss": 0.5452, + "step": 9042 + }, + { + "epoch": 1.3325638506876227, + "grad_norm": 0.5882730484008789, + "learning_rate": 2.957221544374454e-06, + "loss": 0.5227, + "step": 9043 + }, + { + "epoch": 1.3327111984282909, + "grad_norm": 0.5927220582962036, + "learning_rate": 2.9568403801481204e-06, + "loss": 0.5349, + "step": 9044 + }, + { + "epoch": 1.3328585461689588, + "grad_norm": 0.5930784940719604, + "learning_rate": 2.9564592049349903e-06, + "loss": 0.5238, + "step": 9045 + }, + { + "epoch": 1.3330058939096268, + "grad_norm": 0.5842193961143494, + "learning_rate": 2.95607801874423e-06, + "loss": 0.5526, + "step": 9046 + }, + { + "epoch": 1.3331532416502947, + "grad_norm": 0.6114212274551392, + "learning_rate": 2.9556968215850073e-06, + "loss": 0.5547, + "step": 9047 + }, + { + "epoch": 1.3333005893909626, + "grad_norm": 0.5930878520011902, + "learning_rate": 2.95531561346649e-06, + "loss": 0.55, + "step": 9048 + }, + { + "epoch": 1.3334479371316306, + "grad_norm": 0.5834406018257141, + "learning_rate": 2.9549343943978458e-06, + "loss": 0.534, + "step": 9049 + }, + { + "epoch": 1.3335952848722985, + "grad_norm": 0.5863726735115051, + "learning_rate": 2.9545531643882426e-06, + "loss": 0.5297, + "step": 9050 + }, + { + "epoch": 1.3337426326129667, + "grad_norm": 0.6288934350013733, + "learning_rate": 2.954171923446849e-06, + "loss": 0.5547, + "step": 9051 + }, + { + "epoch": 1.3338899803536346, + "grad_norm": 0.6254673600196838, + "learning_rate": 2.9537906715828333e-06, + "loss": 0.5412, + "step": 9052 + }, + { + "epoch": 1.3340373280943025, + "grad_norm": 0.6043513417243958, + "learning_rate": 2.9534094088053655e-06, + "loss": 0.5766, + "step": 9053 + }, + { + "epoch": 1.3341846758349705, + "grad_norm": 0.6236569881439209, + "learning_rate": 2.953028135123614e-06, + "loss": 0.5403, + "step": 9054 + }, + { + "epoch": 1.3343320235756386, + "grad_norm": 0.5959658026695251, + "learning_rate": 2.9526468505467477e-06, + "loss": 0.5122, + "step": 9055 + }, + { + "epoch": 1.3344793713163066, + "grad_norm": 0.594663679599762, + "learning_rate": 2.9522655550839373e-06, + "loss": 0.5469, + "step": 9056 + }, + { + "epoch": 1.3346267190569745, + "grad_norm": 0.6033875942230225, + "learning_rate": 2.9518842487443527e-06, + "loss": 0.5611, + "step": 9057 + }, + { + "epoch": 1.3347740667976424, + "grad_norm": 0.6157247424125671, + "learning_rate": 2.9515029315371635e-06, + "loss": 0.543, + "step": 9058 + }, + { + "epoch": 1.3349214145383104, + "grad_norm": 0.5855475068092346, + "learning_rate": 2.951121603471541e-06, + "loss": 0.5407, + "step": 9059 + }, + { + "epoch": 1.3350687622789783, + "grad_norm": 0.6062626838684082, + "learning_rate": 2.950740264556655e-06, + "loss": 0.5406, + "step": 9060 + }, + { + "epoch": 1.3352161100196462, + "grad_norm": 0.584519624710083, + "learning_rate": 2.950358914801677e-06, + "loss": 0.5443, + "step": 9061 + }, + { + "epoch": 1.3353634577603144, + "grad_norm": 0.5981712937355042, + "learning_rate": 2.949977554215779e-06, + "loss": 0.5604, + "step": 9062 + }, + { + "epoch": 1.3355108055009823, + "grad_norm": 0.5820282697677612, + "learning_rate": 2.9495961828081317e-06, + "loss": 0.5579, + "step": 9063 + }, + { + "epoch": 1.3356581532416503, + "grad_norm": 0.5840325355529785, + "learning_rate": 2.949214800587906e-06, + "loss": 0.5492, + "step": 9064 + }, + { + "epoch": 1.3358055009823182, + "grad_norm": 0.6211901903152466, + "learning_rate": 2.9488334075642766e-06, + "loss": 0.545, + "step": 9065 + }, + { + "epoch": 1.3359528487229864, + "grad_norm": 0.5986806154251099, + "learning_rate": 2.9484520037464133e-06, + "loss": 0.5252, + "step": 9066 + }, + { + "epoch": 1.3361001964636543, + "grad_norm": 0.5725258588790894, + "learning_rate": 2.9480705891434896e-06, + "loss": 0.5167, + "step": 9067 + }, + { + "epoch": 1.3362475442043222, + "grad_norm": 0.6705788969993591, + "learning_rate": 2.947689163764679e-06, + "loss": 0.5816, + "step": 9068 + }, + { + "epoch": 1.3363948919449902, + "grad_norm": 0.5977813601493835, + "learning_rate": 2.9473077276191535e-06, + "loss": 0.5168, + "step": 9069 + }, + { + "epoch": 1.336542239685658, + "grad_norm": 0.5717437267303467, + "learning_rate": 2.946926280716087e-06, + "loss": 0.5306, + "step": 9070 + }, + { + "epoch": 1.336689587426326, + "grad_norm": 0.6283136010169983, + "learning_rate": 2.9465448230646533e-06, + "loss": 0.546, + "step": 9071 + }, + { + "epoch": 1.336836935166994, + "grad_norm": 0.6060190200805664, + "learning_rate": 2.9461633546740257e-06, + "loss": 0.5345, + "step": 9072 + }, + { + "epoch": 1.3369842829076621, + "grad_norm": 0.5887794494628906, + "learning_rate": 2.9457818755533796e-06, + "loss": 0.5591, + "step": 9073 + }, + { + "epoch": 1.33713163064833, + "grad_norm": 0.591161847114563, + "learning_rate": 2.9454003857118874e-06, + "loss": 0.5511, + "step": 9074 + }, + { + "epoch": 1.337278978388998, + "grad_norm": 0.5850930213928223, + "learning_rate": 2.945018885158725e-06, + "loss": 0.5566, + "step": 9075 + }, + { + "epoch": 1.337426326129666, + "grad_norm": 0.5895281434059143, + "learning_rate": 2.9446373739030677e-06, + "loss": 0.5391, + "step": 9076 + }, + { + "epoch": 1.337573673870334, + "grad_norm": 0.6017562747001648, + "learning_rate": 2.94425585195409e-06, + "loss": 0.556, + "step": 9077 + }, + { + "epoch": 1.337721021611002, + "grad_norm": 0.6352524757385254, + "learning_rate": 2.9438743193209666e-06, + "loss": 0.5354, + "step": 9078 + }, + { + "epoch": 1.33786836935167, + "grad_norm": 0.6344918012619019, + "learning_rate": 2.9434927760128752e-06, + "loss": 0.5205, + "step": 9079 + }, + { + "epoch": 1.338015717092338, + "grad_norm": 0.5372716784477234, + "learning_rate": 2.9431112220389894e-06, + "loss": 0.4885, + "step": 9080 + }, + { + "epoch": 1.3381630648330058, + "grad_norm": 0.6221559047698975, + "learning_rate": 2.9427296574084873e-06, + "loss": 0.5269, + "step": 9081 + }, + { + "epoch": 1.3383104125736738, + "grad_norm": 0.5635979771614075, + "learning_rate": 2.9423480821305446e-06, + "loss": 0.5677, + "step": 9082 + }, + { + "epoch": 1.3384577603143417, + "grad_norm": 0.6075918674468994, + "learning_rate": 2.941966496214338e-06, + "loss": 0.5416, + "step": 9083 + }, + { + "epoch": 1.3386051080550099, + "grad_norm": 0.6060293912887573, + "learning_rate": 2.9415848996690437e-06, + "loss": 0.5261, + "step": 9084 + }, + { + "epoch": 1.3387524557956778, + "grad_norm": 0.593189001083374, + "learning_rate": 2.94120329250384e-06, + "loss": 0.5066, + "step": 9085 + }, + { + "epoch": 1.3388998035363457, + "grad_norm": 0.6191211938858032, + "learning_rate": 2.940821674727905e-06, + "loss": 0.5124, + "step": 9086 + }, + { + "epoch": 1.3390471512770137, + "grad_norm": 0.5885722637176514, + "learning_rate": 2.9404400463504146e-06, + "loss": 0.5584, + "step": 9087 + }, + { + "epoch": 1.3391944990176818, + "grad_norm": 0.5857310891151428, + "learning_rate": 2.9400584073805478e-06, + "loss": 0.5265, + "step": 9088 + }, + { + "epoch": 1.3393418467583498, + "grad_norm": 0.6139823794364929, + "learning_rate": 2.9396767578274824e-06, + "loss": 0.5619, + "step": 9089 + }, + { + "epoch": 1.3394891944990177, + "grad_norm": 0.5513720512390137, + "learning_rate": 2.9392950977003983e-06, + "loss": 0.4916, + "step": 9090 + }, + { + "epoch": 1.3396365422396856, + "grad_norm": 0.6134729981422424, + "learning_rate": 2.9389134270084716e-06, + "loss": 0.519, + "step": 9091 + }, + { + "epoch": 1.3397838899803536, + "grad_norm": 0.5706292986869812, + "learning_rate": 2.9385317457608844e-06, + "loss": 0.5439, + "step": 9092 + }, + { + "epoch": 1.3399312377210215, + "grad_norm": 0.5866262912750244, + "learning_rate": 2.9381500539668132e-06, + "loss": 0.5356, + "step": 9093 + }, + { + "epoch": 1.3400785854616895, + "grad_norm": 0.6253057718276978, + "learning_rate": 2.9377683516354396e-06, + "loss": 0.5345, + "step": 9094 + }, + { + "epoch": 1.3402259332023576, + "grad_norm": 0.65108722448349, + "learning_rate": 2.9373866387759415e-06, + "loss": 0.5353, + "step": 9095 + }, + { + "epoch": 1.3403732809430255, + "grad_norm": 0.5707628726959229, + "learning_rate": 2.9370049153975004e-06, + "loss": 0.5202, + "step": 9096 + }, + { + "epoch": 1.3405206286836935, + "grad_norm": 0.6284119486808777, + "learning_rate": 2.9366231815092956e-06, + "loss": 0.4978, + "step": 9097 + }, + { + "epoch": 1.3406679764243614, + "grad_norm": 0.6039829254150391, + "learning_rate": 2.9362414371205084e-06, + "loss": 0.5311, + "step": 9098 + }, + { + "epoch": 1.3408153241650296, + "grad_norm": 0.5927602648735046, + "learning_rate": 2.9358596822403195e-06, + "loss": 0.5347, + "step": 9099 + }, + { + "epoch": 1.3409626719056975, + "grad_norm": 0.6014181971549988, + "learning_rate": 2.935477916877909e-06, + "loss": 0.5424, + "step": 9100 + }, + { + "epoch": 1.3411100196463654, + "grad_norm": 0.6227336525917053, + "learning_rate": 2.9350961410424596e-06, + "loss": 0.5277, + "step": 9101 + }, + { + "epoch": 1.3412573673870334, + "grad_norm": 0.5643850564956665, + "learning_rate": 2.934714354743152e-06, + "loss": 0.5305, + "step": 9102 + }, + { + "epoch": 1.3414047151277013, + "grad_norm": 0.584120512008667, + "learning_rate": 2.9343325579891683e-06, + "loss": 0.5577, + "step": 9103 + }, + { + "epoch": 1.3415520628683693, + "grad_norm": 0.6018670201301575, + "learning_rate": 2.93395075078969e-06, + "loss": 0.5409, + "step": 9104 + }, + { + "epoch": 1.3416994106090372, + "grad_norm": 0.5777137875556946, + "learning_rate": 2.9335689331538996e-06, + "loss": 0.5106, + "step": 9105 + }, + { + "epoch": 1.3418467583497053, + "grad_norm": 0.6267194747924805, + "learning_rate": 2.93318710509098e-06, + "loss": 0.5448, + "step": 9106 + }, + { + "epoch": 1.3419941060903733, + "grad_norm": 0.5932929515838623, + "learning_rate": 2.932805266610113e-06, + "loss": 0.4851, + "step": 9107 + }, + { + "epoch": 1.3421414538310412, + "grad_norm": 0.6041550636291504, + "learning_rate": 2.932423417720483e-06, + "loss": 0.5298, + "step": 9108 + }, + { + "epoch": 1.3422888015717092, + "grad_norm": 0.5588039755821228, + "learning_rate": 2.932041558431272e-06, + "loss": 0.5227, + "step": 9109 + }, + { + "epoch": 1.3424361493123773, + "grad_norm": 0.5928577780723572, + "learning_rate": 2.9316596887516648e-06, + "loss": 0.5175, + "step": 9110 + }, + { + "epoch": 1.3425834970530452, + "grad_norm": 0.5955532193183899, + "learning_rate": 2.931277808690844e-06, + "loss": 0.5362, + "step": 9111 + }, + { + "epoch": 1.3427308447937132, + "grad_norm": 0.6063712239265442, + "learning_rate": 2.9308959182579944e-06, + "loss": 0.5567, + "step": 9112 + }, + { + "epoch": 1.3428781925343811, + "grad_norm": 0.6163181662559509, + "learning_rate": 2.9305140174623005e-06, + "loss": 0.5377, + "step": 9113 + }, + { + "epoch": 1.343025540275049, + "grad_norm": 0.6010279059410095, + "learning_rate": 2.9301321063129455e-06, + "loss": 0.5405, + "step": 9114 + }, + { + "epoch": 1.343172888015717, + "grad_norm": 0.6266573071479797, + "learning_rate": 2.929750184819115e-06, + "loss": 0.5034, + "step": 9115 + }, + { + "epoch": 1.3433202357563852, + "grad_norm": 0.5940106511116028, + "learning_rate": 2.9293682529899946e-06, + "loss": 0.5008, + "step": 9116 + }, + { + "epoch": 1.343467583497053, + "grad_norm": 0.6227166652679443, + "learning_rate": 2.9289863108347695e-06, + "loss": 0.5198, + "step": 9117 + }, + { + "epoch": 1.343614931237721, + "grad_norm": 0.5727578997612, + "learning_rate": 2.9286043583626243e-06, + "loss": 0.5566, + "step": 9118 + }, + { + "epoch": 1.343762278978389, + "grad_norm": 0.6131631731987, + "learning_rate": 2.928222395582746e-06, + "loss": 0.5441, + "step": 9119 + }, + { + "epoch": 1.343909626719057, + "grad_norm": 0.5634047389030457, + "learning_rate": 2.9278404225043196e-06, + "loss": 0.5376, + "step": 9120 + }, + { + "epoch": 1.344056974459725, + "grad_norm": 0.5761250257492065, + "learning_rate": 2.927458439136531e-06, + "loss": 0.5143, + "step": 9121 + }, + { + "epoch": 1.344204322200393, + "grad_norm": 0.620173990726471, + "learning_rate": 2.9270764454885685e-06, + "loss": 0.5336, + "step": 9122 + }, + { + "epoch": 1.344351669941061, + "grad_norm": 0.6242156624794006, + "learning_rate": 2.9266944415696173e-06, + "loss": 0.5309, + "step": 9123 + }, + { + "epoch": 1.3444990176817289, + "grad_norm": 0.5954384803771973, + "learning_rate": 2.926312427388865e-06, + "loss": 0.5322, + "step": 9124 + }, + { + "epoch": 1.3446463654223968, + "grad_norm": 0.5750898122787476, + "learning_rate": 2.9259304029554987e-06, + "loss": 0.5637, + "step": 9125 + }, + { + "epoch": 1.3447937131630647, + "grad_norm": 0.5958247184753418, + "learning_rate": 2.9255483682787068e-06, + "loss": 0.536, + "step": 9126 + }, + { + "epoch": 1.3449410609037329, + "grad_norm": 0.5967221856117249, + "learning_rate": 2.9251663233676753e-06, + "loss": 0.5508, + "step": 9127 + }, + { + "epoch": 1.3450884086444008, + "grad_norm": 0.5743563771247864, + "learning_rate": 2.924784268231593e-06, + "loss": 0.4969, + "step": 9128 + }, + { + "epoch": 1.3452357563850688, + "grad_norm": 0.6154966354370117, + "learning_rate": 2.924402202879649e-06, + "loss": 0.5282, + "step": 9129 + }, + { + "epoch": 1.3453831041257367, + "grad_norm": 0.5951300263404846, + "learning_rate": 2.92402012732103e-06, + "loss": 0.5371, + "step": 9130 + }, + { + "epoch": 1.3455304518664049, + "grad_norm": 0.6139770150184631, + "learning_rate": 2.923638041564927e-06, + "loss": 0.5402, + "step": 9131 + }, + { + "epoch": 1.3456777996070728, + "grad_norm": 0.6586671471595764, + "learning_rate": 2.923255945620527e-06, + "loss": 0.5567, + "step": 9132 + }, + { + "epoch": 1.3458251473477407, + "grad_norm": 0.6081450581550598, + "learning_rate": 2.9228738394970204e-06, + "loss": 0.5326, + "step": 9133 + }, + { + "epoch": 1.3459724950884087, + "grad_norm": 0.5748171210289001, + "learning_rate": 2.9224917232035955e-06, + "loss": 0.5324, + "step": 9134 + }, + { + "epoch": 1.3461198428290766, + "grad_norm": 0.5931693911552429, + "learning_rate": 2.9221095967494433e-06, + "loss": 0.579, + "step": 9135 + }, + { + "epoch": 1.3462671905697445, + "grad_norm": 0.6225234866142273, + "learning_rate": 2.9217274601437538e-06, + "loss": 0.5491, + "step": 9136 + }, + { + "epoch": 1.3464145383104125, + "grad_norm": 0.5849649906158447, + "learning_rate": 2.9213453133957167e-06, + "loss": 0.4892, + "step": 9137 + }, + { + "epoch": 1.3465618860510806, + "grad_norm": 0.5825097560882568, + "learning_rate": 2.9209631565145215e-06, + "loss": 0.5113, + "step": 9138 + }, + { + "epoch": 1.3467092337917486, + "grad_norm": 0.5489261150360107, + "learning_rate": 2.9205809895093607e-06, + "loss": 0.5796, + "step": 9139 + }, + { + "epoch": 1.3468565815324165, + "grad_norm": 0.5710273385047913, + "learning_rate": 2.920198812389424e-06, + "loss": 0.5117, + "step": 9140 + }, + { + "epoch": 1.3470039292730844, + "grad_norm": 0.6182997822761536, + "learning_rate": 2.919816625163903e-06, + "loss": 0.5453, + "step": 9141 + }, + { + "epoch": 1.3471512770137526, + "grad_norm": 0.5907214283943176, + "learning_rate": 2.919434427841989e-06, + "loss": 0.5352, + "step": 9142 + }, + { + "epoch": 1.3472986247544205, + "grad_norm": 0.6258677244186401, + "learning_rate": 2.919052220432874e-06, + "loss": 0.519, + "step": 9143 + }, + { + "epoch": 1.3474459724950885, + "grad_norm": 0.5857449173927307, + "learning_rate": 2.9186700029457495e-06, + "loss": 0.5279, + "step": 9144 + }, + { + "epoch": 1.3475933202357564, + "grad_norm": 0.6005064845085144, + "learning_rate": 2.9182877753898074e-06, + "loss": 0.5308, + "step": 9145 + }, + { + "epoch": 1.3477406679764243, + "grad_norm": 0.6082890629768372, + "learning_rate": 2.917905537774241e-06, + "loss": 0.5832, + "step": 9146 + }, + { + "epoch": 1.3478880157170923, + "grad_norm": 0.5880202054977417, + "learning_rate": 2.9175232901082424e-06, + "loss": 0.5016, + "step": 9147 + }, + { + "epoch": 1.3480353634577602, + "grad_norm": 0.6279739141464233, + "learning_rate": 2.9171410324010046e-06, + "loss": 0.5163, + "step": 9148 + }, + { + "epoch": 1.3481827111984284, + "grad_norm": 0.6427912712097168, + "learning_rate": 2.91675876466172e-06, + "loss": 0.545, + "step": 9149 + }, + { + "epoch": 1.3483300589390963, + "grad_norm": 0.5999623537063599, + "learning_rate": 2.9163764868995835e-06, + "loss": 0.5484, + "step": 9150 + }, + { + "epoch": 1.3484774066797642, + "grad_norm": 0.6030926704406738, + "learning_rate": 2.915994199123788e-06, + "loss": 0.5433, + "step": 9151 + }, + { + "epoch": 1.3486247544204322, + "grad_norm": 0.5682308673858643, + "learning_rate": 2.9156119013435256e-06, + "loss": 0.5558, + "step": 9152 + }, + { + "epoch": 1.3487721021611003, + "grad_norm": 0.5979129076004028, + "learning_rate": 2.9152295935679925e-06, + "loss": 0.5345, + "step": 9153 + }, + { + "epoch": 1.3489194499017683, + "grad_norm": 0.6094662547111511, + "learning_rate": 2.9148472758063827e-06, + "loss": 0.5357, + "step": 9154 + }, + { + "epoch": 1.3490667976424362, + "grad_norm": 0.6646454930305481, + "learning_rate": 2.9144649480678907e-06, + "loss": 0.5345, + "step": 9155 + }, + { + "epoch": 1.3492141453831041, + "grad_norm": 0.5924991965293884, + "learning_rate": 2.914082610361711e-06, + "loss": 0.5144, + "step": 9156 + }, + { + "epoch": 1.349361493123772, + "grad_norm": 0.6058846116065979, + "learning_rate": 2.9137002626970383e-06, + "loss": 0.536, + "step": 9157 + }, + { + "epoch": 1.34950884086444, + "grad_norm": 0.5963457822799683, + "learning_rate": 2.9133179050830683e-06, + "loss": 0.5388, + "step": 9158 + }, + { + "epoch": 1.349656188605108, + "grad_norm": 0.627909243106842, + "learning_rate": 2.912935537528997e-06, + "loss": 0.5031, + "step": 9159 + }, + { + "epoch": 1.349803536345776, + "grad_norm": 0.5845488905906677, + "learning_rate": 2.91255316004402e-06, + "loss": 0.5305, + "step": 9160 + }, + { + "epoch": 1.349950884086444, + "grad_norm": 0.6159253120422363, + "learning_rate": 2.9121707726373323e-06, + "loss": 0.5246, + "step": 9161 + }, + { + "epoch": 1.350098231827112, + "grad_norm": 0.6312740445137024, + "learning_rate": 2.911788375318131e-06, + "loss": 0.4844, + "step": 9162 + }, + { + "epoch": 1.35024557956778, + "grad_norm": 0.5442208647727966, + "learning_rate": 2.9114059680956125e-06, + "loss": 0.5251, + "step": 9163 + }, + { + "epoch": 1.350392927308448, + "grad_norm": 0.6022818684577942, + "learning_rate": 2.9110235509789734e-06, + "loss": 0.548, + "step": 9164 + }, + { + "epoch": 1.350540275049116, + "grad_norm": 0.6006143689155579, + "learning_rate": 2.9106411239774107e-06, + "loss": 0.5317, + "step": 9165 + }, + { + "epoch": 1.350687622789784, + "grad_norm": 0.5975980758666992, + "learning_rate": 2.9102586871001215e-06, + "loss": 0.5135, + "step": 9166 + }, + { + "epoch": 1.3508349705304519, + "grad_norm": 0.6497657895088196, + "learning_rate": 2.9098762403563036e-06, + "loss": 0.5355, + "step": 9167 + }, + { + "epoch": 1.3509823182711198, + "grad_norm": 0.5945002436637878, + "learning_rate": 2.909493783755154e-06, + "loss": 0.5466, + "step": 9168 + }, + { + "epoch": 1.3511296660117877, + "grad_norm": 0.6010496616363525, + "learning_rate": 2.909111317305871e-06, + "loss": 0.5228, + "step": 9169 + }, + { + "epoch": 1.3512770137524557, + "grad_norm": 0.608785092830658, + "learning_rate": 2.9087288410176527e-06, + "loss": 0.5288, + "step": 9170 + }, + { + "epoch": 1.3514243614931238, + "grad_norm": 0.6040342450141907, + "learning_rate": 2.9083463548996976e-06, + "loss": 0.5596, + "step": 9171 + }, + { + "epoch": 1.3515717092337918, + "grad_norm": 0.5918456315994263, + "learning_rate": 2.907963858961204e-06, + "loss": 0.5224, + "step": 9172 + }, + { + "epoch": 1.3517190569744597, + "grad_norm": 0.5888094305992126, + "learning_rate": 2.9075813532113715e-06, + "loss": 0.5313, + "step": 9173 + }, + { + "epoch": 1.3518664047151276, + "grad_norm": 0.6265828013420105, + "learning_rate": 2.907198837659398e-06, + "loss": 0.5314, + "step": 9174 + }, + { + "epoch": 1.3520137524557958, + "grad_norm": 0.6124598383903503, + "learning_rate": 2.906816312314484e-06, + "loss": 0.5014, + "step": 9175 + }, + { + "epoch": 1.3521611001964637, + "grad_norm": 0.6206343173980713, + "learning_rate": 2.9064337771858283e-06, + "loss": 0.5115, + "step": 9176 + }, + { + "epoch": 1.3523084479371317, + "grad_norm": 0.9874244928359985, + "learning_rate": 2.9060512322826305e-06, + "loss": 0.5414, + "step": 9177 + }, + { + "epoch": 1.3524557956777996, + "grad_norm": 0.619661271572113, + "learning_rate": 2.9056686776140914e-06, + "loss": 0.5444, + "step": 9178 + }, + { + "epoch": 1.3526031434184675, + "grad_norm": 0.6001849174499512, + "learning_rate": 2.9052861131894104e-06, + "loss": 0.5425, + "step": 9179 + }, + { + "epoch": 1.3527504911591355, + "grad_norm": 0.5981675982475281, + "learning_rate": 2.9049035390177887e-06, + "loss": 0.5271, + "step": 9180 + }, + { + "epoch": 1.3528978388998034, + "grad_norm": 0.5944292545318604, + "learning_rate": 2.9045209551084276e-06, + "loss": 0.5558, + "step": 9181 + }, + { + "epoch": 1.3530451866404716, + "grad_norm": 0.5742719769477844, + "learning_rate": 2.904138361470526e-06, + "loss": 0.5364, + "step": 9182 + }, + { + "epoch": 1.3531925343811395, + "grad_norm": 0.5951060056686401, + "learning_rate": 2.9037557581132867e-06, + "loss": 0.5315, + "step": 9183 + }, + { + "epoch": 1.3533398821218074, + "grad_norm": 0.5669539570808411, + "learning_rate": 2.903373145045911e-06, + "loss": 0.5096, + "step": 9184 + }, + { + "epoch": 1.3534872298624754, + "grad_norm": 0.5788537263870239, + "learning_rate": 2.9029905222776008e-06, + "loss": 0.5479, + "step": 9185 + }, + { + "epoch": 1.3536345776031435, + "grad_norm": 0.6175554394721985, + "learning_rate": 2.9026078898175567e-06, + "loss": 0.5317, + "step": 9186 + }, + { + "epoch": 1.3537819253438115, + "grad_norm": 0.6065375208854675, + "learning_rate": 2.9022252476749825e-06, + "loss": 0.5056, + "step": 9187 + }, + { + "epoch": 1.3539292730844794, + "grad_norm": 0.582814633846283, + "learning_rate": 2.9018425958590794e-06, + "loss": 0.536, + "step": 9188 + }, + { + "epoch": 1.3540766208251473, + "grad_norm": 0.6023467779159546, + "learning_rate": 2.9014599343790496e-06, + "loss": 0.5557, + "step": 9189 + }, + { + "epoch": 1.3542239685658153, + "grad_norm": 0.5974326729774475, + "learning_rate": 2.9010772632440974e-06, + "loss": 0.5429, + "step": 9190 + }, + { + "epoch": 1.3543713163064832, + "grad_norm": 0.5907664895057678, + "learning_rate": 2.9006945824634253e-06, + "loss": 0.5369, + "step": 9191 + }, + { + "epoch": 1.3545186640471512, + "grad_norm": 0.5599088072776794, + "learning_rate": 2.900311892046237e-06, + "loss": 0.5243, + "step": 9192 + }, + { + "epoch": 1.3546660117878193, + "grad_norm": 0.5948097705841064, + "learning_rate": 2.8999291920017347e-06, + "loss": 0.5534, + "step": 9193 + }, + { + "epoch": 1.3548133595284872, + "grad_norm": 0.5827615261077881, + "learning_rate": 2.899546482339123e-06, + "loss": 0.5199, + "step": 9194 + }, + { + "epoch": 1.3549607072691552, + "grad_norm": 0.5948676466941833, + "learning_rate": 2.899163763067605e-06, + "loss": 0.5127, + "step": 9195 + }, + { + "epoch": 1.3551080550098231, + "grad_norm": 0.5624764561653137, + "learning_rate": 2.898781034196387e-06, + "loss": 0.5347, + "step": 9196 + }, + { + "epoch": 1.3552554027504913, + "grad_norm": 0.5901741981506348, + "learning_rate": 2.8983982957346714e-06, + "loss": 0.5506, + "step": 9197 + }, + { + "epoch": 1.3554027504911592, + "grad_norm": 0.6000170707702637, + "learning_rate": 2.898015547691664e-06, + "loss": 0.5247, + "step": 9198 + }, + { + "epoch": 1.3555500982318271, + "grad_norm": 0.611346423625946, + "learning_rate": 2.897632790076569e-06, + "loss": 0.5307, + "step": 9199 + }, + { + "epoch": 1.355697445972495, + "grad_norm": 0.6202356219291687, + "learning_rate": 2.8972500228985922e-06, + "loss": 0.5224, + "step": 9200 + }, + { + "epoch": 1.355844793713163, + "grad_norm": 0.6233431696891785, + "learning_rate": 2.8968672461669385e-06, + "loss": 0.5118, + "step": 9201 + }, + { + "epoch": 1.355992141453831, + "grad_norm": 0.5739148855209351, + "learning_rate": 2.8964844598908137e-06, + "loss": 0.5445, + "step": 9202 + }, + { + "epoch": 1.356139489194499, + "grad_norm": 0.6127246618270874, + "learning_rate": 2.8961016640794235e-06, + "loss": 0.5355, + "step": 9203 + }, + { + "epoch": 1.356286836935167, + "grad_norm": 0.5835132598876953, + "learning_rate": 2.8957188587419743e-06, + "loss": 0.5318, + "step": 9204 + }, + { + "epoch": 1.356434184675835, + "grad_norm": 0.5996487140655518, + "learning_rate": 2.895336043887672e-06, + "loss": 0.5682, + "step": 9205 + }, + { + "epoch": 1.356581532416503, + "grad_norm": 0.6090771555900574, + "learning_rate": 2.8949532195257233e-06, + "loss": 0.5267, + "step": 9206 + }, + { + "epoch": 1.3567288801571709, + "grad_norm": 0.5753695368766785, + "learning_rate": 2.8945703856653345e-06, + "loss": 0.5308, + "step": 9207 + }, + { + "epoch": 1.356876227897839, + "grad_norm": 0.5913002490997314, + "learning_rate": 2.8941875423157134e-06, + "loss": 0.5352, + "step": 9208 + }, + { + "epoch": 1.357023575638507, + "grad_norm": 0.5871933102607727, + "learning_rate": 2.8938046894860666e-06, + "loss": 0.5304, + "step": 9209 + }, + { + "epoch": 1.3571709233791749, + "grad_norm": 0.5989846587181091, + "learning_rate": 2.8934218271856025e-06, + "loss": 0.541, + "step": 9210 + }, + { + "epoch": 1.3573182711198428, + "grad_norm": 0.6009491086006165, + "learning_rate": 2.893038955423527e-06, + "loss": 0.5357, + "step": 9211 + }, + { + "epoch": 1.3574656188605108, + "grad_norm": 0.5717447996139526, + "learning_rate": 2.8926560742090494e-06, + "loss": 0.5253, + "step": 9212 + }, + { + "epoch": 1.3576129666011787, + "grad_norm": 0.6267629265785217, + "learning_rate": 2.8922731835513774e-06, + "loss": 0.5574, + "step": 9213 + }, + { + "epoch": 1.3577603143418466, + "grad_norm": 0.5956650972366333, + "learning_rate": 2.8918902834597197e-06, + "loss": 0.5248, + "step": 9214 + }, + { + "epoch": 1.3579076620825148, + "grad_norm": 0.5884466171264648, + "learning_rate": 2.8915073739432836e-06, + "loss": 0.5146, + "step": 9215 + }, + { + "epoch": 1.3580550098231827, + "grad_norm": 0.6384528875350952, + "learning_rate": 2.8911244550112795e-06, + "loss": 0.5354, + "step": 9216 + }, + { + "epoch": 1.3582023575638507, + "grad_norm": 0.6148222088813782, + "learning_rate": 2.890741526672916e-06, + "loss": 0.5326, + "step": 9217 + }, + { + "epoch": 1.3583497053045186, + "grad_norm": 0.6194519400596619, + "learning_rate": 2.8903585889374012e-06, + "loss": 0.5445, + "step": 9218 + }, + { + "epoch": 1.3584970530451868, + "grad_norm": 0.5915213227272034, + "learning_rate": 2.8899756418139456e-06, + "loss": 0.5286, + "step": 9219 + }, + { + "epoch": 1.3586444007858547, + "grad_norm": 0.5813429355621338, + "learning_rate": 2.8895926853117597e-06, + "loss": 0.4942, + "step": 9220 + }, + { + "epoch": 1.3587917485265226, + "grad_norm": 0.6029638051986694, + "learning_rate": 2.8892097194400514e-06, + "loss": 0.5135, + "step": 9221 + }, + { + "epoch": 1.3589390962671906, + "grad_norm": 0.5697064399719238, + "learning_rate": 2.888826744208032e-06, + "loss": 0.5514, + "step": 9222 + }, + { + "epoch": 1.3590864440078585, + "grad_norm": 0.5879167318344116, + "learning_rate": 2.888443759624912e-06, + "loss": 0.5626, + "step": 9223 + }, + { + "epoch": 1.3592337917485264, + "grad_norm": 0.6089670658111572, + "learning_rate": 2.8880607656999014e-06, + "loss": 0.5279, + "step": 9224 + }, + { + "epoch": 1.3593811394891944, + "grad_norm": 0.5796257257461548, + "learning_rate": 2.887677762442212e-06, + "loss": 0.4898, + "step": 9225 + }, + { + "epoch": 1.3595284872298625, + "grad_norm": 0.6031965017318726, + "learning_rate": 2.8872947498610532e-06, + "loss": 0.4882, + "step": 9226 + }, + { + "epoch": 1.3596758349705305, + "grad_norm": 0.5618414282798767, + "learning_rate": 2.8869117279656376e-06, + "loss": 0.5235, + "step": 9227 + }, + { + "epoch": 1.3598231827111984, + "grad_norm": 0.5733559131622314, + "learning_rate": 2.8865286967651775e-06, + "loss": 0.5011, + "step": 9228 + }, + { + "epoch": 1.3599705304518663, + "grad_norm": 0.5975080132484436, + "learning_rate": 2.8861456562688828e-06, + "loss": 0.5698, + "step": 9229 + }, + { + "epoch": 1.3601178781925345, + "grad_norm": 0.649029552936554, + "learning_rate": 2.885762606485966e-06, + "loss": 0.5127, + "step": 9230 + }, + { + "epoch": 1.3602652259332024, + "grad_norm": 0.5891387462615967, + "learning_rate": 2.8853795474256395e-06, + "loss": 0.5151, + "step": 9231 + }, + { + "epoch": 1.3604125736738704, + "grad_norm": 0.582589328289032, + "learning_rate": 2.8849964790971165e-06, + "loss": 0.5389, + "step": 9232 + }, + { + "epoch": 1.3605599214145383, + "grad_norm": 0.5920457243919373, + "learning_rate": 2.8846134015096077e-06, + "loss": 0.5577, + "step": 9233 + }, + { + "epoch": 1.3607072691552062, + "grad_norm": 0.5852382183074951, + "learning_rate": 2.8842303146723272e-06, + "loss": 0.5338, + "step": 9234 + }, + { + "epoch": 1.3608546168958742, + "grad_norm": 0.5955714583396912, + "learning_rate": 2.883847218594488e-06, + "loss": 0.5464, + "step": 9235 + }, + { + "epoch": 1.361001964636542, + "grad_norm": 0.6274173855781555, + "learning_rate": 2.8834641132853037e-06, + "loss": 0.5272, + "step": 9236 + }, + { + "epoch": 1.3611493123772103, + "grad_norm": 0.5905704498291016, + "learning_rate": 2.8830809987539867e-06, + "loss": 0.5129, + "step": 9237 + }, + { + "epoch": 1.3612966601178782, + "grad_norm": 0.582211971282959, + "learning_rate": 2.882697875009752e-06, + "loss": 0.532, + "step": 9238 + }, + { + "epoch": 1.3614440078585461, + "grad_norm": 0.5518123507499695, + "learning_rate": 2.882314742061812e-06, + "loss": 0.4981, + "step": 9239 + }, + { + "epoch": 1.361591355599214, + "grad_norm": 0.6133272647857666, + "learning_rate": 2.881931599919382e-06, + "loss": 0.5542, + "step": 9240 + }, + { + "epoch": 1.3617387033398822, + "grad_norm": 0.595106303691864, + "learning_rate": 2.8815484485916773e-06, + "loss": 0.5442, + "step": 9241 + }, + { + "epoch": 1.3618860510805502, + "grad_norm": 0.6069771647453308, + "learning_rate": 2.8811652880879105e-06, + "loss": 0.5148, + "step": 9242 + }, + { + "epoch": 1.362033398821218, + "grad_norm": 0.6001421809196472, + "learning_rate": 2.880782118417297e-06, + "loss": 0.5578, + "step": 9243 + }, + { + "epoch": 1.362180746561886, + "grad_norm": 0.5773428678512573, + "learning_rate": 2.8803989395890518e-06, + "loss": 0.5627, + "step": 9244 + }, + { + "epoch": 1.362328094302554, + "grad_norm": 0.5742875933647156, + "learning_rate": 2.8800157516123913e-06, + "loss": 0.5445, + "step": 9245 + }, + { + "epoch": 1.362475442043222, + "grad_norm": 0.5953474044799805, + "learning_rate": 2.87963255449653e-06, + "loss": 0.5121, + "step": 9246 + }, + { + "epoch": 1.3626227897838898, + "grad_norm": 0.6136475801467896, + "learning_rate": 2.879249348250684e-06, + "loss": 0.525, + "step": 9247 + }, + { + "epoch": 1.362770137524558, + "grad_norm": 0.5696567296981812, + "learning_rate": 2.878866132884069e-06, + "loss": 0.5211, + "step": 9248 + }, + { + "epoch": 1.362917485265226, + "grad_norm": 0.6007147431373596, + "learning_rate": 2.8784829084059006e-06, + "loss": 0.5499, + "step": 9249 + }, + { + "epoch": 1.3630648330058939, + "grad_norm": 0.5810540318489075, + "learning_rate": 2.8780996748253962e-06, + "loss": 0.5243, + "step": 9250 + }, + { + "epoch": 1.3632121807465618, + "grad_norm": 0.5866184830665588, + "learning_rate": 2.8777164321517725e-06, + "loss": 0.5357, + "step": 9251 + }, + { + "epoch": 1.36335952848723, + "grad_norm": 0.5939967632293701, + "learning_rate": 2.877333180394245e-06, + "loss": 0.5301, + "step": 9252 + }, + { + "epoch": 1.363506876227898, + "grad_norm": 0.5932905673980713, + "learning_rate": 2.876949919562032e-06, + "loss": 0.528, + "step": 9253 + }, + { + "epoch": 1.3636542239685658, + "grad_norm": 0.5976588726043701, + "learning_rate": 2.87656664966435e-06, + "loss": 0.549, + "step": 9254 + }, + { + "epoch": 1.3638015717092338, + "grad_norm": 0.5944226384162903, + "learning_rate": 2.876183370710417e-06, + "loss": 0.5306, + "step": 9255 + }, + { + "epoch": 1.3639489194499017, + "grad_norm": 0.6011059284210205, + "learning_rate": 2.87580008270945e-06, + "loss": 0.512, + "step": 9256 + }, + { + "epoch": 1.3640962671905696, + "grad_norm": 0.6121737360954285, + "learning_rate": 2.8754167856706674e-06, + "loss": 0.5545, + "step": 9257 + }, + { + "epoch": 1.3642436149312378, + "grad_norm": 0.6068456768989563, + "learning_rate": 2.875033479603287e-06, + "loss": 0.5197, + "step": 9258 + }, + { + "epoch": 1.3643909626719057, + "grad_norm": 0.5703155994415283, + "learning_rate": 2.8746501645165277e-06, + "loss": 0.5679, + "step": 9259 + }, + { + "epoch": 1.3645383104125737, + "grad_norm": 0.6172766089439392, + "learning_rate": 2.874266840419607e-06, + "loss": 0.5403, + "step": 9260 + }, + { + "epoch": 1.3646856581532416, + "grad_norm": 0.5661584734916687, + "learning_rate": 2.8738835073217447e-06, + "loss": 0.5107, + "step": 9261 + }, + { + "epoch": 1.3648330058939095, + "grad_norm": 0.5769006609916687, + "learning_rate": 2.8735001652321596e-06, + "loss": 0.5403, + "step": 9262 + }, + { + "epoch": 1.3649803536345777, + "grad_norm": 0.6118279695510864, + "learning_rate": 2.873116814160071e-06, + "loss": 0.5475, + "step": 9263 + }, + { + "epoch": 1.3651277013752456, + "grad_norm": 0.6276359558105469, + "learning_rate": 2.8727334541146972e-06, + "loss": 0.5414, + "step": 9264 + }, + { + "epoch": 1.3652750491159136, + "grad_norm": 0.573936402797699, + "learning_rate": 2.8723500851052594e-06, + "loss": 0.5453, + "step": 9265 + }, + { + "epoch": 1.3654223968565815, + "grad_norm": 0.5988132953643799, + "learning_rate": 2.871966707140976e-06, + "loss": 0.5608, + "step": 9266 + }, + { + "epoch": 1.3655697445972494, + "grad_norm": 0.5934633016586304, + "learning_rate": 2.8715833202310683e-06, + "loss": 0.5408, + "step": 9267 + }, + { + "epoch": 1.3657170923379174, + "grad_norm": 0.5831035375595093, + "learning_rate": 2.8711999243847557e-06, + "loss": 0.5122, + "step": 9268 + }, + { + "epoch": 1.3658644400785855, + "grad_norm": 0.5954656600952148, + "learning_rate": 2.870816519611259e-06, + "loss": 0.5483, + "step": 9269 + }, + { + "epoch": 1.3660117878192535, + "grad_norm": 0.6065540909767151, + "learning_rate": 2.8704331059197992e-06, + "loss": 0.534, + "step": 9270 + }, + { + "epoch": 1.3661591355599214, + "grad_norm": 0.5936463475227356, + "learning_rate": 2.870049683319597e-06, + "loss": 0.5216, + "step": 9271 + }, + { + "epoch": 1.3663064833005893, + "grad_norm": 0.6038873195648193, + "learning_rate": 2.869666251819873e-06, + "loss": 0.5158, + "step": 9272 + }, + { + "epoch": 1.3664538310412575, + "grad_norm": 0.5770688652992249, + "learning_rate": 2.86928281142985e-06, + "loss": 0.5192, + "step": 9273 + }, + { + "epoch": 1.3666011787819254, + "grad_norm": 0.5750858187675476, + "learning_rate": 2.8688993621587473e-06, + "loss": 0.5363, + "step": 9274 + }, + { + "epoch": 1.3667485265225934, + "grad_norm": 0.6301537156105042, + "learning_rate": 2.868515904015789e-06, + "loss": 0.5058, + "step": 9275 + }, + { + "epoch": 1.3668958742632613, + "grad_norm": 0.590512216091156, + "learning_rate": 2.8681324370101953e-06, + "loss": 0.5458, + "step": 9276 + }, + { + "epoch": 1.3670432220039292, + "grad_norm": 0.5967755317687988, + "learning_rate": 2.86774896115119e-06, + "loss": 0.5332, + "step": 9277 + }, + { + "epoch": 1.3671905697445972, + "grad_norm": 0.5911713242530823, + "learning_rate": 2.867365476447994e-06, + "loss": 0.5502, + "step": 9278 + }, + { + "epoch": 1.3673379174852651, + "grad_norm": 0.6076062917709351, + "learning_rate": 2.8669819829098306e-06, + "loss": 0.5672, + "step": 9279 + }, + { + "epoch": 1.3674852652259333, + "grad_norm": 0.6020063757896423, + "learning_rate": 2.866598480545923e-06, + "loss": 0.5184, + "step": 9280 + }, + { + "epoch": 1.3676326129666012, + "grad_norm": 0.5924240946769714, + "learning_rate": 2.8662149693654928e-06, + "loss": 0.5097, + "step": 9281 + }, + { + "epoch": 1.3677799607072691, + "grad_norm": 0.5912865400314331, + "learning_rate": 2.8658314493777655e-06, + "loss": 0.5373, + "step": 9282 + }, + { + "epoch": 1.367927308447937, + "grad_norm": 0.6121885180473328, + "learning_rate": 2.8654479205919637e-06, + "loss": 0.5243, + "step": 9283 + }, + { + "epoch": 1.3680746561886052, + "grad_norm": 0.5871703624725342, + "learning_rate": 2.8650643830173104e-06, + "loss": 0.5356, + "step": 9284 + }, + { + "epoch": 1.3682220039292732, + "grad_norm": 0.6139234900474548, + "learning_rate": 2.8646808366630297e-06, + "loss": 0.5141, + "step": 9285 + }, + { + "epoch": 1.3683693516699411, + "grad_norm": 0.5901411175727844, + "learning_rate": 2.8642972815383462e-06, + "loss": 0.5549, + "step": 9286 + }, + { + "epoch": 1.368516699410609, + "grad_norm": 0.5745776295661926, + "learning_rate": 2.8639137176524834e-06, + "loss": 0.5403, + "step": 9287 + }, + { + "epoch": 1.368664047151277, + "grad_norm": 0.615553081035614, + "learning_rate": 2.863530145014667e-06, + "loss": 0.5389, + "step": 9288 + }, + { + "epoch": 1.368811394891945, + "grad_norm": 0.6081511974334717, + "learning_rate": 2.863146563634121e-06, + "loss": 0.5417, + "step": 9289 + }, + { + "epoch": 1.3689587426326129, + "grad_norm": 0.5957953929901123, + "learning_rate": 2.86276297352007e-06, + "loss": 0.5207, + "step": 9290 + }, + { + "epoch": 1.369106090373281, + "grad_norm": 0.5897999405860901, + "learning_rate": 2.8623793746817402e-06, + "loss": 0.5187, + "step": 9291 + }, + { + "epoch": 1.369253438113949, + "grad_norm": 0.6009309887886047, + "learning_rate": 2.861995767128356e-06, + "loss": 0.5417, + "step": 9292 + }, + { + "epoch": 1.3694007858546169, + "grad_norm": 0.5940969586372375, + "learning_rate": 2.8616121508691436e-06, + "loss": 0.5315, + "step": 9293 + }, + { + "epoch": 1.3695481335952848, + "grad_norm": 0.5774976015090942, + "learning_rate": 2.861228525913329e-06, + "loss": 0.5299, + "step": 9294 + }, + { + "epoch": 1.369695481335953, + "grad_norm": 0.5807084441184998, + "learning_rate": 2.860844892270137e-06, + "loss": 0.5021, + "step": 9295 + }, + { + "epoch": 1.369842829076621, + "grad_norm": 0.5982988476753235, + "learning_rate": 2.860461249948795e-06, + "loss": 0.4919, + "step": 9296 + }, + { + "epoch": 1.3699901768172889, + "grad_norm": 0.6057292819023132, + "learning_rate": 2.860077598958529e-06, + "loss": 0.5305, + "step": 9297 + }, + { + "epoch": 1.3701375245579568, + "grad_norm": 0.6108629703521729, + "learning_rate": 2.8596939393085653e-06, + "loss": 0.5745, + "step": 9298 + }, + { + "epoch": 1.3702848722986247, + "grad_norm": 0.5856649279594421, + "learning_rate": 2.859310271008131e-06, + "loss": 0.5461, + "step": 9299 + }, + { + "epoch": 1.3704322200392927, + "grad_norm": 0.588903546333313, + "learning_rate": 2.8589265940664535e-06, + "loss": 0.5153, + "step": 9300 + }, + { + "epoch": 1.3705795677799606, + "grad_norm": 0.5710193514823914, + "learning_rate": 2.8585429084927598e-06, + "loss": 0.552, + "step": 9301 + }, + { + "epoch": 1.3707269155206288, + "grad_norm": 0.61052405834198, + "learning_rate": 2.8581592142962774e-06, + "loss": 0.5496, + "step": 9302 + }, + { + "epoch": 1.3708742632612967, + "grad_norm": 0.595568835735321, + "learning_rate": 2.857775511486234e-06, + "loss": 0.49, + "step": 9303 + }, + { + "epoch": 1.3710216110019646, + "grad_norm": 0.6233636140823364, + "learning_rate": 2.8573918000718566e-06, + "loss": 0.527, + "step": 9304 + }, + { + "epoch": 1.3711689587426326, + "grad_norm": 0.5593267679214478, + "learning_rate": 2.8570080800623747e-06, + "loss": 0.5404, + "step": 9305 + }, + { + "epoch": 1.3713163064833007, + "grad_norm": 0.5734500885009766, + "learning_rate": 2.8566243514670157e-06, + "loss": 0.5284, + "step": 9306 + }, + { + "epoch": 1.3714636542239687, + "grad_norm": 0.6220394372940063, + "learning_rate": 2.856240614295009e-06, + "loss": 0.5443, + "step": 9307 + }, + { + "epoch": 1.3716110019646366, + "grad_norm": 0.5800078511238098, + "learning_rate": 2.855856868555582e-06, + "loss": 0.5394, + "step": 9308 + }, + { + "epoch": 1.3717583497053045, + "grad_norm": 0.6205489039421082, + "learning_rate": 2.8554731142579644e-06, + "loss": 0.5244, + "step": 9309 + }, + { + "epoch": 1.3719056974459725, + "grad_norm": 0.6063452363014221, + "learning_rate": 2.855089351411385e-06, + "loss": 0.5426, + "step": 9310 + }, + { + "epoch": 1.3720530451866404, + "grad_norm": 0.6024577617645264, + "learning_rate": 2.854705580025073e-06, + "loss": 0.5363, + "step": 9311 + }, + { + "epoch": 1.3722003929273083, + "grad_norm": 0.6012864112854004, + "learning_rate": 2.8543218001082582e-06, + "loss": 0.5412, + "step": 9312 + }, + { + "epoch": 1.3723477406679765, + "grad_norm": 0.5806453824043274, + "learning_rate": 2.853938011670171e-06, + "loss": 0.5446, + "step": 9313 + }, + { + "epoch": 1.3724950884086444, + "grad_norm": 0.5932196974754333, + "learning_rate": 2.8535542147200396e-06, + "loss": 0.5425, + "step": 9314 + }, + { + "epoch": 1.3726424361493124, + "grad_norm": 0.5975964069366455, + "learning_rate": 2.853170409267096e-06, + "loss": 0.5462, + "step": 9315 + }, + { + "epoch": 1.3727897838899803, + "grad_norm": 0.611703634262085, + "learning_rate": 2.852786595320569e-06, + "loss": 0.5369, + "step": 9316 + }, + { + "epoch": 1.3729371316306485, + "grad_norm": 0.6191617250442505, + "learning_rate": 2.8524027728896903e-06, + "loss": 0.5117, + "step": 9317 + }, + { + "epoch": 1.3730844793713164, + "grad_norm": 0.5708540081977844, + "learning_rate": 2.85201894198369e-06, + "loss": 0.5477, + "step": 9318 + }, + { + "epoch": 1.3732318271119843, + "grad_norm": 0.6274484395980835, + "learning_rate": 2.8516351026117988e-06, + "loss": 0.5419, + "step": 9319 + }, + { + "epoch": 1.3733791748526523, + "grad_norm": 0.6146249175071716, + "learning_rate": 2.8512512547832494e-06, + "loss": 0.5498, + "step": 9320 + }, + { + "epoch": 1.3735265225933202, + "grad_norm": 0.6036845445632935, + "learning_rate": 2.850867398507271e-06, + "loss": 0.5482, + "step": 9321 + }, + { + "epoch": 1.3736738703339881, + "grad_norm": 0.5823309421539307, + "learning_rate": 2.850483533793097e-06, + "loss": 0.5407, + "step": 9322 + }, + { + "epoch": 1.373821218074656, + "grad_norm": 0.573357105255127, + "learning_rate": 2.850099660649958e-06, + "loss": 0.5557, + "step": 9323 + }, + { + "epoch": 1.3739685658153242, + "grad_norm": 0.5820261836051941, + "learning_rate": 2.8497157790870866e-06, + "loss": 0.5305, + "step": 9324 + }, + { + "epoch": 1.3741159135559922, + "grad_norm": 0.6069490313529968, + "learning_rate": 2.849331889113715e-06, + "loss": 0.5339, + "step": 9325 + }, + { + "epoch": 1.37426326129666, + "grad_norm": 0.588880181312561, + "learning_rate": 2.848947990739075e-06, + "loss": 0.5311, + "step": 9326 + }, + { + "epoch": 1.374410609037328, + "grad_norm": 0.5658535957336426, + "learning_rate": 2.8485640839723993e-06, + "loss": 0.4962, + "step": 9327 + }, + { + "epoch": 1.3745579567779962, + "grad_norm": 0.5992171764373779, + "learning_rate": 2.848180168822921e-06, + "loss": 0.5355, + "step": 9328 + }, + { + "epoch": 1.3747053045186641, + "grad_norm": 0.6054739952087402, + "learning_rate": 2.8477962452998733e-06, + "loss": 0.5287, + "step": 9329 + }, + { + "epoch": 1.374852652259332, + "grad_norm": 0.598115086555481, + "learning_rate": 2.8474123134124887e-06, + "loss": 0.5457, + "step": 9330 + }, + { + "epoch": 1.375, + "grad_norm": 0.6044753789901733, + "learning_rate": 2.847028373170001e-06, + "loss": 0.5309, + "step": 9331 + }, + { + "epoch": 1.375147347740668, + "grad_norm": 0.5700915455818176, + "learning_rate": 2.8466444245816437e-06, + "loss": 0.5309, + "step": 9332 + }, + { + "epoch": 1.3752946954813359, + "grad_norm": 0.6225998997688293, + "learning_rate": 2.846260467656651e-06, + "loss": 0.5635, + "step": 9333 + }, + { + "epoch": 1.3754420432220038, + "grad_norm": 0.5867668390274048, + "learning_rate": 2.8458765024042556e-06, + "loss": 0.5359, + "step": 9334 + }, + { + "epoch": 1.375589390962672, + "grad_norm": 0.607252836227417, + "learning_rate": 2.845492528833693e-06, + "loss": 0.5071, + "step": 9335 + }, + { + "epoch": 1.37573673870334, + "grad_norm": 0.5906837582588196, + "learning_rate": 2.8451085469541965e-06, + "loss": 0.5632, + "step": 9336 + }, + { + "epoch": 1.3758840864440078, + "grad_norm": 0.6236119866371155, + "learning_rate": 2.844724556775002e-06, + "loss": 0.5531, + "step": 9337 + }, + { + "epoch": 1.3760314341846758, + "grad_norm": 0.5660154819488525, + "learning_rate": 2.8443405583053435e-06, + "loss": 0.5421, + "step": 9338 + }, + { + "epoch": 1.376178781925344, + "grad_norm": 0.6118252873420715, + "learning_rate": 2.8439565515544557e-06, + "loss": 0.5036, + "step": 9339 + }, + { + "epoch": 1.3763261296660119, + "grad_norm": 0.5919593572616577, + "learning_rate": 2.8435725365315747e-06, + "loss": 0.5349, + "step": 9340 + }, + { + "epoch": 1.3764734774066798, + "grad_norm": 0.5935997366905212, + "learning_rate": 2.843188513245935e-06, + "loss": 0.5581, + "step": 9341 + }, + { + "epoch": 1.3766208251473477, + "grad_norm": 0.569215714931488, + "learning_rate": 2.842804481706773e-06, + "loss": 0.5391, + "step": 9342 + }, + { + "epoch": 1.3767681728880157, + "grad_norm": 0.6099120378494263, + "learning_rate": 2.842420441923323e-06, + "loss": 0.5432, + "step": 9343 + }, + { + "epoch": 1.3769155206286836, + "grad_norm": 0.5760146379470825, + "learning_rate": 2.842036393904823e-06, + "loss": 0.5469, + "step": 9344 + }, + { + "epoch": 1.3770628683693515, + "grad_norm": 0.5901094079017639, + "learning_rate": 2.8416523376605074e-06, + "loss": 0.5574, + "step": 9345 + }, + { + "epoch": 1.3772102161100197, + "grad_norm": 0.6170943975448608, + "learning_rate": 2.8412682731996137e-06, + "loss": 0.5445, + "step": 9346 + }, + { + "epoch": 1.3773575638506876, + "grad_norm": 0.5855755805969238, + "learning_rate": 2.8408842005313785e-06, + "loss": 0.5218, + "step": 9347 + }, + { + "epoch": 1.3775049115913556, + "grad_norm": 0.5943534970283508, + "learning_rate": 2.8405001196650376e-06, + "loss": 0.5016, + "step": 9348 + }, + { + "epoch": 1.3776522593320235, + "grad_norm": 0.6184993982315063, + "learning_rate": 2.8401160306098286e-06, + "loss": 0.511, + "step": 9349 + }, + { + "epoch": 1.3777996070726917, + "grad_norm": 0.6091743111610413, + "learning_rate": 2.839731933374989e-06, + "loss": 0.509, + "step": 9350 + }, + { + "epoch": 1.3779469548133596, + "grad_norm": 0.6232195496559143, + "learning_rate": 2.839347827969755e-06, + "loss": 0.5407, + "step": 9351 + }, + { + "epoch": 1.3780943025540275, + "grad_norm": 0.557489275932312, + "learning_rate": 2.8389637144033657e-06, + "loss": 0.5582, + "step": 9352 + }, + { + "epoch": 1.3782416502946955, + "grad_norm": 0.5897760987281799, + "learning_rate": 2.8385795926850573e-06, + "loss": 0.5469, + "step": 9353 + }, + { + "epoch": 1.3783889980353634, + "grad_norm": 0.6339132189750671, + "learning_rate": 2.8381954628240695e-06, + "loss": 0.5551, + "step": 9354 + }, + { + "epoch": 1.3785363457760313, + "grad_norm": 0.6054017543792725, + "learning_rate": 2.8378113248296384e-06, + "loss": 0.5408, + "step": 9355 + }, + { + "epoch": 1.3786836935166993, + "grad_norm": 0.5979182124137878, + "learning_rate": 2.8374271787110035e-06, + "loss": 0.5553, + "step": 9356 + }, + { + "epoch": 1.3788310412573674, + "grad_norm": 0.6349807381629944, + "learning_rate": 2.8370430244774038e-06, + "loss": 0.5435, + "step": 9357 + }, + { + "epoch": 1.3789783889980354, + "grad_norm": 0.6021391749382019, + "learning_rate": 2.836658862138077e-06, + "loss": 0.5282, + "step": 9358 + }, + { + "epoch": 1.3791257367387033, + "grad_norm": 0.6087602972984314, + "learning_rate": 2.8362746917022628e-06, + "loss": 0.5614, + "step": 9359 + }, + { + "epoch": 1.3792730844793712, + "grad_norm": 0.579860508441925, + "learning_rate": 2.8358905131791996e-06, + "loss": 0.528, + "step": 9360 + }, + { + "epoch": 1.3794204322200394, + "grad_norm": 0.5996345281600952, + "learning_rate": 2.8355063265781276e-06, + "loss": 0.4879, + "step": 9361 + }, + { + "epoch": 1.3795677799607073, + "grad_norm": 0.6037408709526062, + "learning_rate": 2.8351221319082854e-06, + "loss": 0.543, + "step": 9362 + }, + { + "epoch": 1.3797151277013753, + "grad_norm": 0.617408037185669, + "learning_rate": 2.834737929178913e-06, + "loss": 0.4946, + "step": 9363 + }, + { + "epoch": 1.3798624754420432, + "grad_norm": 0.5780909061431885, + "learning_rate": 2.834353718399251e-06, + "loss": 0.5075, + "step": 9364 + }, + { + "epoch": 1.3800098231827111, + "grad_norm": 0.6053791046142578, + "learning_rate": 2.8339694995785382e-06, + "loss": 0.558, + "step": 9365 + }, + { + "epoch": 1.380157170923379, + "grad_norm": 0.5996767282485962, + "learning_rate": 2.833585272726016e-06, + "loss": 0.5471, + "step": 9366 + }, + { + "epoch": 1.380304518664047, + "grad_norm": 0.5934649705886841, + "learning_rate": 2.8332010378509243e-06, + "loss": 0.5223, + "step": 9367 + }, + { + "epoch": 1.3804518664047152, + "grad_norm": 0.5979804396629333, + "learning_rate": 2.8328167949625042e-06, + "loss": 0.5104, + "step": 9368 + }, + { + "epoch": 1.3805992141453831, + "grad_norm": 0.6194608211517334, + "learning_rate": 2.8324325440699955e-06, + "loss": 0.5421, + "step": 9369 + }, + { + "epoch": 1.380746561886051, + "grad_norm": 0.6077864766120911, + "learning_rate": 2.832048285182641e-06, + "loss": 0.526, + "step": 9370 + }, + { + "epoch": 1.380893909626719, + "grad_norm": 0.5865467190742493, + "learning_rate": 2.8316640183096804e-06, + "loss": 0.5393, + "step": 9371 + }, + { + "epoch": 1.3810412573673871, + "grad_norm": 0.6232814192771912, + "learning_rate": 2.8312797434603554e-06, + "loss": 0.4798, + "step": 9372 + }, + { + "epoch": 1.381188605108055, + "grad_norm": 0.5742001533508301, + "learning_rate": 2.830895460643908e-06, + "loss": 0.5041, + "step": 9373 + }, + { + "epoch": 1.381335952848723, + "grad_norm": 0.6024831533432007, + "learning_rate": 2.8305111698695814e-06, + "loss": 0.5443, + "step": 9374 + }, + { + "epoch": 1.381483300589391, + "grad_norm": 0.5969687104225159, + "learning_rate": 2.830126871146615e-06, + "loss": 0.5498, + "step": 9375 + }, + { + "epoch": 1.3816306483300589, + "grad_norm": 0.5864236950874329, + "learning_rate": 2.8297425644842526e-06, + "loss": 0.5462, + "step": 9376 + }, + { + "epoch": 1.3817779960707268, + "grad_norm": 0.5909931659698486, + "learning_rate": 2.829358249891736e-06, + "loss": 0.5503, + "step": 9377 + }, + { + "epoch": 1.3819253438113948, + "grad_norm": 0.6514539122581482, + "learning_rate": 2.8289739273783077e-06, + "loss": 0.5345, + "step": 9378 + }, + { + "epoch": 1.382072691552063, + "grad_norm": 0.58072829246521, + "learning_rate": 2.8285895969532113e-06, + "loss": 0.5549, + "step": 9379 + }, + { + "epoch": 1.3822200392927309, + "grad_norm": 0.6150267720222473, + "learning_rate": 2.8282052586256885e-06, + "loss": 0.5587, + "step": 9380 + }, + { + "epoch": 1.3823673870333988, + "grad_norm": 0.5852316617965698, + "learning_rate": 2.8278209124049837e-06, + "loss": 0.5127, + "step": 9381 + }, + { + "epoch": 1.3825147347740667, + "grad_norm": 0.5675297379493713, + "learning_rate": 2.8274365583003393e-06, + "loss": 0.5279, + "step": 9382 + }, + { + "epoch": 1.3826620825147349, + "grad_norm": 0.5967981219291687, + "learning_rate": 2.8270521963209995e-06, + "loss": 0.5163, + "step": 9383 + }, + { + "epoch": 1.3828094302554028, + "grad_norm": 0.609145998954773, + "learning_rate": 2.8266678264762082e-06, + "loss": 0.5493, + "step": 9384 + }, + { + "epoch": 1.3829567779960708, + "grad_norm": 0.6243659853935242, + "learning_rate": 2.8262834487752084e-06, + "loss": 0.5206, + "step": 9385 + }, + { + "epoch": 1.3831041257367387, + "grad_norm": 0.5819743275642395, + "learning_rate": 2.825899063227245e-06, + "loss": 0.5206, + "step": 9386 + }, + { + "epoch": 1.3832514734774066, + "grad_norm": 0.5944938063621521, + "learning_rate": 2.8255146698415616e-06, + "loss": 0.5369, + "step": 9387 + }, + { + "epoch": 1.3833988212180746, + "grad_norm": 0.6194116473197937, + "learning_rate": 2.8251302686274028e-06, + "loss": 0.536, + "step": 9388 + }, + { + "epoch": 1.3835461689587425, + "grad_norm": 0.6174446940422058, + "learning_rate": 2.8247458595940143e-06, + "loss": 0.5391, + "step": 9389 + }, + { + "epoch": 1.3836935166994107, + "grad_norm": 0.6358305215835571, + "learning_rate": 2.8243614427506394e-06, + "loss": 0.5609, + "step": 9390 + }, + { + "epoch": 1.3838408644400786, + "grad_norm": 0.590300977230072, + "learning_rate": 2.8239770181065244e-06, + "loss": 0.5235, + "step": 9391 + }, + { + "epoch": 1.3839882121807465, + "grad_norm": 0.5900238752365112, + "learning_rate": 2.823592585670914e-06, + "loss": 0.5219, + "step": 9392 + }, + { + "epoch": 1.3841355599214145, + "grad_norm": 0.6213523149490356, + "learning_rate": 2.8232081454530535e-06, + "loss": 0.5485, + "step": 9393 + }, + { + "epoch": 1.3842829076620826, + "grad_norm": 0.5619959831237793, + "learning_rate": 2.8228236974621892e-06, + "loss": 0.554, + "step": 9394 + }, + { + "epoch": 1.3844302554027506, + "grad_norm": 0.5837584733963013, + "learning_rate": 2.822439241707566e-06, + "loss": 0.5532, + "step": 9395 + }, + { + "epoch": 1.3845776031434185, + "grad_norm": 0.5798329710960388, + "learning_rate": 2.8220547781984303e-06, + "loss": 0.5216, + "step": 9396 + }, + { + "epoch": 1.3847249508840864, + "grad_norm": 0.6035570502281189, + "learning_rate": 2.821670306944028e-06, + "loss": 0.5265, + "step": 9397 + }, + { + "epoch": 1.3848722986247544, + "grad_norm": 0.6133591532707214, + "learning_rate": 2.821285827953607e-06, + "loss": 0.5487, + "step": 9398 + }, + { + "epoch": 1.3850196463654223, + "grad_norm": 0.5892952680587769, + "learning_rate": 2.8209013412364116e-06, + "loss": 0.5688, + "step": 9399 + }, + { + "epoch": 1.3851669941060905, + "grad_norm": 0.5711392164230347, + "learning_rate": 2.820516846801689e-06, + "loss": 0.5476, + "step": 9400 + }, + { + "epoch": 1.3853143418467584, + "grad_norm": 0.5560675263404846, + "learning_rate": 2.820132344658687e-06, + "loss": 0.5172, + "step": 9401 + }, + { + "epoch": 1.3854616895874263, + "grad_norm": 0.5867301821708679, + "learning_rate": 2.8197478348166524e-06, + "loss": 0.5055, + "step": 9402 + }, + { + "epoch": 1.3856090373280943, + "grad_norm": 0.6277143955230713, + "learning_rate": 2.819363317284832e-06, + "loss": 0.544, + "step": 9403 + }, + { + "epoch": 1.3857563850687622, + "grad_norm": 0.6088715195655823, + "learning_rate": 2.8189787920724744e-06, + "loss": 0.5361, + "step": 9404 + }, + { + "epoch": 1.3859037328094304, + "grad_norm": 0.5745201110839844, + "learning_rate": 2.8185942591888253e-06, + "loss": 0.5496, + "step": 9405 + }, + { + "epoch": 1.3860510805500983, + "grad_norm": 0.5771067142486572, + "learning_rate": 2.8182097186431346e-06, + "loss": 0.5225, + "step": 9406 + }, + { + "epoch": 1.3861984282907662, + "grad_norm": 0.5892012715339661, + "learning_rate": 2.817825170444649e-06, + "loss": 0.5363, + "step": 9407 + }, + { + "epoch": 1.3863457760314342, + "grad_norm": 0.603553831577301, + "learning_rate": 2.8174406146026166e-06, + "loss": 0.5585, + "step": 9408 + }, + { + "epoch": 1.386493123772102, + "grad_norm": 0.5820625424385071, + "learning_rate": 2.817056051126287e-06, + "loss": 0.5379, + "step": 9409 + }, + { + "epoch": 1.38664047151277, + "grad_norm": 0.6417454481124878, + "learning_rate": 2.816671480024908e-06, + "loss": 0.5407, + "step": 9410 + }, + { + "epoch": 1.3867878192534382, + "grad_norm": 0.6190982460975647, + "learning_rate": 2.816286901307728e-06, + "loss": 0.534, + "step": 9411 + }, + { + "epoch": 1.3869351669941061, + "grad_norm": 0.5609854459762573, + "learning_rate": 2.815902314983997e-06, + "loss": 0.545, + "step": 9412 + }, + { + "epoch": 1.387082514734774, + "grad_norm": 0.5863822102546692, + "learning_rate": 2.815517721062963e-06, + "loss": 0.541, + "step": 9413 + }, + { + "epoch": 1.387229862475442, + "grad_norm": 0.5830988883972168, + "learning_rate": 2.815133119553876e-06, + "loss": 0.543, + "step": 9414 + }, + { + "epoch": 1.3873772102161102, + "grad_norm": 0.5728414058685303, + "learning_rate": 2.8147485104659854e-06, + "loss": 0.52, + "step": 9415 + }, + { + "epoch": 1.387524557956778, + "grad_norm": 0.6090782284736633, + "learning_rate": 2.814363893808541e-06, + "loss": 0.5099, + "step": 9416 + }, + { + "epoch": 1.387671905697446, + "grad_norm": 0.6419918537139893, + "learning_rate": 2.813979269590792e-06, + "loss": 0.5094, + "step": 9417 + }, + { + "epoch": 1.387819253438114, + "grad_norm": 0.5941023230552673, + "learning_rate": 2.813594637821989e-06, + "loss": 0.5385, + "step": 9418 + }, + { + "epoch": 1.387966601178782, + "grad_norm": 0.5881707072257996, + "learning_rate": 2.813209998511382e-06, + "loss": 0.5065, + "step": 9419 + }, + { + "epoch": 1.3881139489194498, + "grad_norm": 0.5615877509117126, + "learning_rate": 2.8128253516682215e-06, + "loss": 0.5422, + "step": 9420 + }, + { + "epoch": 1.3882612966601178, + "grad_norm": 0.6122201085090637, + "learning_rate": 2.8124406973017577e-06, + "loss": 0.5393, + "step": 9421 + }, + { + "epoch": 1.388408644400786, + "grad_norm": 0.6128215193748474, + "learning_rate": 2.812056035421242e-06, + "loss": 0.5448, + "step": 9422 + }, + { + "epoch": 1.3885559921414539, + "grad_norm": 0.577695369720459, + "learning_rate": 2.811671366035925e-06, + "loss": 0.5266, + "step": 9423 + }, + { + "epoch": 1.3887033398821218, + "grad_norm": 0.6184230446815491, + "learning_rate": 2.8112866891550583e-06, + "loss": 0.5392, + "step": 9424 + }, + { + "epoch": 1.3888506876227897, + "grad_norm": 0.5928309559822083, + "learning_rate": 2.810902004787892e-06, + "loss": 0.5393, + "step": 9425 + }, + { + "epoch": 1.388998035363458, + "grad_norm": 0.6218499541282654, + "learning_rate": 2.8105173129436787e-06, + "loss": 0.5089, + "step": 9426 + }, + { + "epoch": 1.3891453831041258, + "grad_norm": 0.6371063590049744, + "learning_rate": 2.8101326136316693e-06, + "loss": 0.5237, + "step": 9427 + }, + { + "epoch": 1.3892927308447938, + "grad_norm": 0.6056108474731445, + "learning_rate": 2.8097479068611156e-06, + "loss": 0.5516, + "step": 9428 + }, + { + "epoch": 1.3894400785854617, + "grad_norm": 0.6098662614822388, + "learning_rate": 2.80936319264127e-06, + "loss": 0.5422, + "step": 9429 + }, + { + "epoch": 1.3895874263261296, + "grad_norm": 0.5636065006256104, + "learning_rate": 2.8089784709813857e-06, + "loss": 0.5143, + "step": 9430 + }, + { + "epoch": 1.3897347740667976, + "grad_norm": 0.6105050444602966, + "learning_rate": 2.8085937418907137e-06, + "loss": 0.5054, + "step": 9431 + }, + { + "epoch": 1.3898821218074655, + "grad_norm": 0.6347804665565491, + "learning_rate": 2.8082090053785064e-06, + "loss": 0.5463, + "step": 9432 + }, + { + "epoch": 1.3900294695481337, + "grad_norm": 0.5676878690719604, + "learning_rate": 2.807824261454018e-06, + "loss": 0.5064, + "step": 9433 + }, + { + "epoch": 1.3901768172888016, + "grad_norm": 0.6288904547691345, + "learning_rate": 2.8074395101265e-06, + "loss": 0.5342, + "step": 9434 + }, + { + "epoch": 1.3903241650294695, + "grad_norm": 0.5875034332275391, + "learning_rate": 2.807054751405206e-06, + "loss": 0.4848, + "step": 9435 + }, + { + "epoch": 1.3904715127701375, + "grad_norm": 0.5946412682533264, + "learning_rate": 2.8066699852993893e-06, + "loss": 0.5573, + "step": 9436 + }, + { + "epoch": 1.3906188605108056, + "grad_norm": 0.6210314035415649, + "learning_rate": 2.8062852118183037e-06, + "loss": 0.5413, + "step": 9437 + }, + { + "epoch": 1.3907662082514736, + "grad_norm": 0.6131764054298401, + "learning_rate": 2.805900430971202e-06, + "loss": 0.5057, + "step": 9438 + }, + { + "epoch": 1.3909135559921415, + "grad_norm": 0.5590509176254272, + "learning_rate": 2.805515642767338e-06, + "loss": 0.5147, + "step": 9439 + }, + { + "epoch": 1.3910609037328094, + "grad_norm": 0.5788787603378296, + "learning_rate": 2.8051308472159665e-06, + "loss": 0.5029, + "step": 9440 + }, + { + "epoch": 1.3912082514734774, + "grad_norm": 0.5792824625968933, + "learning_rate": 2.8047460443263412e-06, + "loss": 0.4936, + "step": 9441 + }, + { + "epoch": 1.3913555992141453, + "grad_norm": 0.617714524269104, + "learning_rate": 2.804361234107717e-06, + "loss": 0.5391, + "step": 9442 + }, + { + "epoch": 1.3915029469548132, + "grad_norm": 0.6052968502044678, + "learning_rate": 2.8039764165693473e-06, + "loss": 0.5232, + "step": 9443 + }, + { + "epoch": 1.3916502946954814, + "grad_norm": 0.5859036445617676, + "learning_rate": 2.8035915917204877e-06, + "loss": 0.55, + "step": 9444 + }, + { + "epoch": 1.3917976424361493, + "grad_norm": 0.6142366528511047, + "learning_rate": 2.803206759570392e-06, + "loss": 0.5271, + "step": 9445 + }, + { + "epoch": 1.3919449901768173, + "grad_norm": 0.6427129507064819, + "learning_rate": 2.802821920128316e-06, + "loss": 0.5331, + "step": 9446 + }, + { + "epoch": 1.3920923379174852, + "grad_norm": 0.5545939207077026, + "learning_rate": 2.8024370734035157e-06, + "loss": 0.4981, + "step": 9447 + }, + { + "epoch": 1.3922396856581534, + "grad_norm": 0.5979474186897278, + "learning_rate": 2.8020522194052454e-06, + "loss": 0.56, + "step": 9448 + }, + { + "epoch": 1.3923870333988213, + "grad_norm": 0.649522602558136, + "learning_rate": 2.8016673581427604e-06, + "loss": 0.5561, + "step": 9449 + }, + { + "epoch": 1.3925343811394892, + "grad_norm": 0.5876821279525757, + "learning_rate": 2.801282489625317e-06, + "loss": 0.547, + "step": 9450 + }, + { + "epoch": 1.3926817288801572, + "grad_norm": 0.5749251246452332, + "learning_rate": 2.800897613862172e-06, + "loss": 0.5004, + "step": 9451 + }, + { + "epoch": 1.3928290766208251, + "grad_norm": 0.609677255153656, + "learning_rate": 2.8005127308625797e-06, + "loss": 0.5244, + "step": 9452 + }, + { + "epoch": 1.392976424361493, + "grad_norm": 0.565189003944397, + "learning_rate": 2.8001278406357968e-06, + "loss": 0.5465, + "step": 9453 + }, + { + "epoch": 1.393123772102161, + "grad_norm": 0.5837709903717041, + "learning_rate": 2.7997429431910806e-06, + "loss": 0.5471, + "step": 9454 + }, + { + "epoch": 1.3932711198428291, + "grad_norm": 0.6302018165588379, + "learning_rate": 2.7993580385376875e-06, + "loss": 0.5615, + "step": 9455 + }, + { + "epoch": 1.393418467583497, + "grad_norm": 0.6993605494499207, + "learning_rate": 2.7989731266848735e-06, + "loss": 0.5327, + "step": 9456 + }, + { + "epoch": 1.393565815324165, + "grad_norm": 0.6577961444854736, + "learning_rate": 2.798588207641896e-06, + "loss": 0.5433, + "step": 9457 + }, + { + "epoch": 1.393713163064833, + "grad_norm": 0.5859469175338745, + "learning_rate": 2.798203281418012e-06, + "loss": 0.5059, + "step": 9458 + }, + { + "epoch": 1.393860510805501, + "grad_norm": 0.6231498718261719, + "learning_rate": 2.7978183480224796e-06, + "loss": 0.5359, + "step": 9459 + }, + { + "epoch": 1.394007858546169, + "grad_norm": 0.6108489036560059, + "learning_rate": 2.7974334074645555e-06, + "loss": 0.5065, + "step": 9460 + }, + { + "epoch": 1.394155206286837, + "grad_norm": 0.5829463601112366, + "learning_rate": 2.797048459753497e-06, + "loss": 0.5487, + "step": 9461 + }, + { + "epoch": 1.394302554027505, + "grad_norm": 0.5885910987854004, + "learning_rate": 2.7966635048985626e-06, + "loss": 0.51, + "step": 9462 + }, + { + "epoch": 1.3944499017681729, + "grad_norm": 0.5796955823898315, + "learning_rate": 2.79627854290901e-06, + "loss": 0.5488, + "step": 9463 + }, + { + "epoch": 1.3945972495088408, + "grad_norm": 0.629566490650177, + "learning_rate": 2.7958935737940973e-06, + "loss": 0.4936, + "step": 9464 + }, + { + "epoch": 1.3947445972495087, + "grad_norm": 0.5980973243713379, + "learning_rate": 2.795508597563083e-06, + "loss": 0.4878, + "step": 9465 + }, + { + "epoch": 1.3948919449901769, + "grad_norm": 0.5908229351043701, + "learning_rate": 2.7951236142252257e-06, + "loss": 0.5359, + "step": 9466 + }, + { + "epoch": 1.3950392927308448, + "grad_norm": 0.619108259677887, + "learning_rate": 2.794738623789784e-06, + "loss": 0.5205, + "step": 9467 + }, + { + "epoch": 1.3951866404715128, + "grad_norm": 0.6139633059501648, + "learning_rate": 2.7943536262660166e-06, + "loss": 0.5061, + "step": 9468 + }, + { + "epoch": 1.3953339882121807, + "grad_norm": 0.6157007217407227, + "learning_rate": 2.793968621663182e-06, + "loss": 0.5294, + "step": 9469 + }, + { + "epoch": 1.3954813359528488, + "grad_norm": 0.5809003710746765, + "learning_rate": 2.7935836099905406e-06, + "loss": 0.5443, + "step": 9470 + }, + { + "epoch": 1.3956286836935168, + "grad_norm": 0.5719122290611267, + "learning_rate": 2.7931985912573505e-06, + "loss": 0.5458, + "step": 9471 + }, + { + "epoch": 1.3957760314341847, + "grad_norm": 0.6240162253379822, + "learning_rate": 2.7928135654728724e-06, + "loss": 0.511, + "step": 9472 + }, + { + "epoch": 1.3959233791748527, + "grad_norm": 0.6389714479446411, + "learning_rate": 2.7924285326463652e-06, + "loss": 0.4741, + "step": 9473 + }, + { + "epoch": 1.3960707269155206, + "grad_norm": 0.5853534936904907, + "learning_rate": 2.7920434927870888e-06, + "loss": 0.5058, + "step": 9474 + }, + { + "epoch": 1.3962180746561885, + "grad_norm": 0.6280527114868164, + "learning_rate": 2.791658445904303e-06, + "loss": 0.5615, + "step": 9475 + }, + { + "epoch": 1.3963654223968565, + "grad_norm": 0.6069963574409485, + "learning_rate": 2.7912733920072687e-06, + "loss": 0.5038, + "step": 9476 + }, + { + "epoch": 1.3965127701375246, + "grad_norm": 0.5841155052185059, + "learning_rate": 2.7908883311052463e-06, + "loss": 0.5434, + "step": 9477 + }, + { + "epoch": 1.3966601178781926, + "grad_norm": 0.5848388075828552, + "learning_rate": 2.790503263207496e-06, + "loss": 0.5358, + "step": 9478 + }, + { + "epoch": 1.3968074656188605, + "grad_norm": 0.5890612602233887, + "learning_rate": 2.7901181883232783e-06, + "loss": 0.5614, + "step": 9479 + }, + { + "epoch": 1.3969548133595284, + "grad_norm": 0.609767496585846, + "learning_rate": 2.789733106461854e-06, + "loss": 0.517, + "step": 9480 + }, + { + "epoch": 1.3971021611001966, + "grad_norm": 0.6057938933372498, + "learning_rate": 2.7893480176324843e-06, + "loss": 0.5662, + "step": 9481 + }, + { + "epoch": 1.3972495088408645, + "grad_norm": 0.5797199606895447, + "learning_rate": 2.788962921844431e-06, + "loss": 0.5147, + "step": 9482 + }, + { + "epoch": 1.3973968565815325, + "grad_norm": 0.6227172017097473, + "learning_rate": 2.788577819106954e-06, + "loss": 0.5198, + "step": 9483 + }, + { + "epoch": 1.3975442043222004, + "grad_norm": 0.5821150541305542, + "learning_rate": 2.788192709429316e-06, + "loss": 0.5216, + "step": 9484 + }, + { + "epoch": 1.3976915520628683, + "grad_norm": 0.5996490120887756, + "learning_rate": 2.7878075928207794e-06, + "loss": 0.5204, + "step": 9485 + }, + { + "epoch": 1.3978388998035363, + "grad_norm": 0.6055722832679749, + "learning_rate": 2.787422469290604e-06, + "loss": 0.5347, + "step": 9486 + }, + { + "epoch": 1.3979862475442042, + "grad_norm": 0.5812922120094299, + "learning_rate": 2.7870373388480543e-06, + "loss": 0.5521, + "step": 9487 + }, + { + "epoch": 1.3981335952848724, + "grad_norm": 0.6201383471488953, + "learning_rate": 2.7866522015023906e-06, + "loss": 0.5538, + "step": 9488 + }, + { + "epoch": 1.3982809430255403, + "grad_norm": 0.594943642616272, + "learning_rate": 2.7862670572628764e-06, + "loss": 0.4948, + "step": 9489 + }, + { + "epoch": 1.3984282907662082, + "grad_norm": 0.6291036605834961, + "learning_rate": 2.7858819061387733e-06, + "loss": 0.5215, + "step": 9490 + }, + { + "epoch": 1.3985756385068762, + "grad_norm": 0.5884178876876831, + "learning_rate": 2.785496748139345e-06, + "loss": 0.5211, + "step": 9491 + }, + { + "epoch": 1.3987229862475443, + "grad_norm": 0.5886706709861755, + "learning_rate": 2.785111583273853e-06, + "loss": 0.5105, + "step": 9492 + }, + { + "epoch": 1.3988703339882123, + "grad_norm": 0.562516450881958, + "learning_rate": 2.7847264115515616e-06, + "loss": 0.486, + "step": 9493 + }, + { + "epoch": 1.3990176817288802, + "grad_norm": 0.6316235065460205, + "learning_rate": 2.784341232981733e-06, + "loss": 0.5375, + "step": 9494 + }, + { + "epoch": 1.3991650294695481, + "grad_norm": 0.5539649128913879, + "learning_rate": 2.7839560475736317e-06, + "loss": 0.4868, + "step": 9495 + }, + { + "epoch": 1.399312377210216, + "grad_norm": 0.6432821154594421, + "learning_rate": 2.7835708553365204e-06, + "loss": 0.5515, + "step": 9496 + }, + { + "epoch": 1.399459724950884, + "grad_norm": 0.6153750419616699, + "learning_rate": 2.783185656279663e-06, + "loss": 0.5364, + "step": 9497 + }, + { + "epoch": 1.399607072691552, + "grad_norm": 0.6216462254524231, + "learning_rate": 2.7828004504123234e-06, + "loss": 0.5482, + "step": 9498 + }, + { + "epoch": 1.39975442043222, + "grad_norm": 0.5746115446090698, + "learning_rate": 2.7824152377437656e-06, + "loss": 0.5224, + "step": 9499 + }, + { + "epoch": 1.399901768172888, + "grad_norm": 0.6147276759147644, + "learning_rate": 2.782030018283254e-06, + "loss": 0.5259, + "step": 9500 + }, + { + "epoch": 1.400049115913556, + "grad_norm": 0.6188173890113831, + "learning_rate": 2.7816447920400524e-06, + "loss": 0.5299, + "step": 9501 + }, + { + "epoch": 1.400196463654224, + "grad_norm": 0.6220896244049072, + "learning_rate": 2.7812595590234254e-06, + "loss": 0.5639, + "step": 9502 + }, + { + "epoch": 1.400343811394892, + "grad_norm": 0.6257064938545227, + "learning_rate": 2.7808743192426386e-06, + "loss": 0.5265, + "step": 9503 + }, + { + "epoch": 1.40049115913556, + "grad_norm": 0.5962709188461304, + "learning_rate": 2.7804890727069554e-06, + "loss": 0.5303, + "step": 9504 + }, + { + "epoch": 1.400638506876228, + "grad_norm": 0.5955157279968262, + "learning_rate": 2.7801038194256424e-06, + "loss": 0.5197, + "step": 9505 + }, + { + "epoch": 1.4007858546168959, + "grad_norm": 0.5943162441253662, + "learning_rate": 2.779718559407964e-06, + "loss": 0.5566, + "step": 9506 + }, + { + "epoch": 1.4009332023575638, + "grad_norm": 0.5868822336196899, + "learning_rate": 2.779333292663185e-06, + "loss": 0.534, + "step": 9507 + }, + { + "epoch": 1.4010805500982317, + "grad_norm": 0.6088932156562805, + "learning_rate": 2.7789480192005715e-06, + "loss": 0.5299, + "step": 9508 + }, + { + "epoch": 1.4012278978388997, + "grad_norm": 0.5772897005081177, + "learning_rate": 2.7785627390293882e-06, + "loss": 0.508, + "step": 9509 + }, + { + "epoch": 1.4013752455795678, + "grad_norm": 0.567450761795044, + "learning_rate": 2.778177452158903e-06, + "loss": 0.5166, + "step": 9510 + }, + { + "epoch": 1.4015225933202358, + "grad_norm": 0.5671507120132446, + "learning_rate": 2.7777921585983797e-06, + "loss": 0.5288, + "step": 9511 + }, + { + "epoch": 1.4016699410609037, + "grad_norm": 0.5894911289215088, + "learning_rate": 2.777406858357085e-06, + "loss": 0.5334, + "step": 9512 + }, + { + "epoch": 1.4018172888015716, + "grad_norm": 0.599221408367157, + "learning_rate": 2.7770215514442868e-06, + "loss": 0.5317, + "step": 9513 + }, + { + "epoch": 1.4019646365422398, + "grad_norm": 0.597547709941864, + "learning_rate": 2.7766362378692492e-06, + "loss": 0.5213, + "step": 9514 + }, + { + "epoch": 1.4021119842829077, + "grad_norm": 0.5804005861282349, + "learning_rate": 2.77625091764124e-06, + "loss": 0.5242, + "step": 9515 + }, + { + "epoch": 1.4022593320235757, + "grad_norm": 0.5714170932769775, + "learning_rate": 2.775865590769526e-06, + "loss": 0.5445, + "step": 9516 + }, + { + "epoch": 1.4024066797642436, + "grad_norm": 0.590028703212738, + "learning_rate": 2.7754802572633737e-06, + "loss": 0.5427, + "step": 9517 + }, + { + "epoch": 1.4025540275049115, + "grad_norm": 0.6158977746963501, + "learning_rate": 2.7750949171320503e-06, + "loss": 0.5397, + "step": 9518 + }, + { + "epoch": 1.4027013752455795, + "grad_norm": 0.5990768671035767, + "learning_rate": 2.7747095703848232e-06, + "loss": 0.5179, + "step": 9519 + }, + { + "epoch": 1.4028487229862474, + "grad_norm": 0.5649538636207581, + "learning_rate": 2.7743242170309594e-06, + "loss": 0.5367, + "step": 9520 + }, + { + "epoch": 1.4029960707269156, + "grad_norm": 0.561164915561676, + "learning_rate": 2.773938857079727e-06, + "loss": 0.5177, + "step": 9521 + }, + { + "epoch": 1.4031434184675835, + "grad_norm": 0.6194385290145874, + "learning_rate": 2.7735534905403944e-06, + "loss": 0.5421, + "step": 9522 + }, + { + "epoch": 1.4032907662082514, + "grad_norm": 0.5993775129318237, + "learning_rate": 2.773168117422228e-06, + "loss": 0.5052, + "step": 9523 + }, + { + "epoch": 1.4034381139489194, + "grad_norm": 0.6443929672241211, + "learning_rate": 2.772782737734497e-06, + "loss": 0.5503, + "step": 9524 + }, + { + "epoch": 1.4035854616895875, + "grad_norm": 0.6067727208137512, + "learning_rate": 2.772397351486468e-06, + "loss": 0.5304, + "step": 9525 + }, + { + "epoch": 1.4037328094302555, + "grad_norm": 0.6435922384262085, + "learning_rate": 2.7720119586874116e-06, + "loss": 0.5484, + "step": 9526 + }, + { + "epoch": 1.4038801571709234, + "grad_norm": 0.6033235788345337, + "learning_rate": 2.7716265593465946e-06, + "loss": 0.5348, + "step": 9527 + }, + { + "epoch": 1.4040275049115913, + "grad_norm": 0.6415777802467346, + "learning_rate": 2.7712411534732864e-06, + "loss": 0.5281, + "step": 9528 + }, + { + "epoch": 1.4041748526522593, + "grad_norm": 0.5912123322486877, + "learning_rate": 2.7708557410767563e-06, + "loss": 0.5138, + "step": 9529 + }, + { + "epoch": 1.4043222003929272, + "grad_norm": 0.5922355651855469, + "learning_rate": 2.7704703221662714e-06, + "loss": 0.5209, + "step": 9530 + }, + { + "epoch": 1.4044695481335951, + "grad_norm": 0.6279308795928955, + "learning_rate": 2.770084896751103e-06, + "loss": 0.4958, + "step": 9531 + }, + { + "epoch": 1.4046168958742633, + "grad_norm": 0.5982269644737244, + "learning_rate": 2.7696994648405194e-06, + "loss": 0.517, + "step": 9532 + }, + { + "epoch": 1.4047642436149312, + "grad_norm": 0.5781853795051575, + "learning_rate": 2.76931402644379e-06, + "loss": 0.5187, + "step": 9533 + }, + { + "epoch": 1.4049115913555992, + "grad_norm": 0.6153016090393066, + "learning_rate": 2.768928581570185e-06, + "loss": 0.5122, + "step": 9534 + }, + { + "epoch": 1.4050589390962671, + "grad_norm": 0.5971723198890686, + "learning_rate": 2.7685431302289736e-06, + "loss": 0.5346, + "step": 9535 + }, + { + "epoch": 1.4052062868369353, + "grad_norm": 0.5784233212471008, + "learning_rate": 2.768157672429425e-06, + "loss": 0.5425, + "step": 9536 + }, + { + "epoch": 1.4053536345776032, + "grad_norm": 0.5937912464141846, + "learning_rate": 2.767772208180812e-06, + "loss": 0.5118, + "step": 9537 + }, + { + "epoch": 1.4055009823182711, + "grad_norm": 0.5841678380966187, + "learning_rate": 2.7673867374924015e-06, + "loss": 0.5234, + "step": 9538 + }, + { + "epoch": 1.405648330058939, + "grad_norm": 0.5766240358352661, + "learning_rate": 2.767001260373465e-06, + "loss": 0.5172, + "step": 9539 + }, + { + "epoch": 1.405795677799607, + "grad_norm": 0.6087648868560791, + "learning_rate": 2.766615776833274e-06, + "loss": 0.4981, + "step": 9540 + }, + { + "epoch": 1.405943025540275, + "grad_norm": 0.5757223963737488, + "learning_rate": 2.7662302868810988e-06, + "loss": 0.5454, + "step": 9541 + }, + { + "epoch": 1.406090373280943, + "grad_norm": 0.6108905673027039, + "learning_rate": 2.76584479052621e-06, + "loss": 0.5295, + "step": 9542 + }, + { + "epoch": 1.406237721021611, + "grad_norm": 0.6332886219024658, + "learning_rate": 2.7654592877778785e-06, + "loss": 0.521, + "step": 9543 + }, + { + "epoch": 1.406385068762279, + "grad_norm": 0.5900530815124512, + "learning_rate": 2.7650737786453755e-06, + "loss": 0.5249, + "step": 9544 + }, + { + "epoch": 1.406532416502947, + "grad_norm": 0.6074915528297424, + "learning_rate": 2.7646882631379735e-06, + "loss": 0.5214, + "step": 9545 + }, + { + "epoch": 1.4066797642436148, + "grad_norm": 0.5886110663414001, + "learning_rate": 2.764302741264942e-06, + "loss": 0.5175, + "step": 9546 + }, + { + "epoch": 1.406827111984283, + "grad_norm": 0.5796636343002319, + "learning_rate": 2.7639172130355534e-06, + "loss": 0.5334, + "step": 9547 + }, + { + "epoch": 1.406974459724951, + "grad_norm": 0.6096106767654419, + "learning_rate": 2.76353167845908e-06, + "loss": 0.5123, + "step": 9548 + }, + { + "epoch": 1.4071218074656189, + "grad_norm": 0.5733877420425415, + "learning_rate": 2.763146137544793e-06, + "loss": 0.5255, + "step": 9549 + }, + { + "epoch": 1.4072691552062868, + "grad_norm": 0.6000850200653076, + "learning_rate": 2.7627605903019656e-06, + "loss": 0.4886, + "step": 9550 + }, + { + "epoch": 1.4074165029469548, + "grad_norm": 0.6189525723457336, + "learning_rate": 2.762375036739868e-06, + "loss": 0.5094, + "step": 9551 + }, + { + "epoch": 1.4075638506876227, + "grad_norm": 0.5988956689834595, + "learning_rate": 2.761989476867775e-06, + "loss": 0.5166, + "step": 9552 + }, + { + "epoch": 1.4077111984282908, + "grad_norm": 0.6160733699798584, + "learning_rate": 2.761603910694958e-06, + "loss": 0.5762, + "step": 9553 + }, + { + "epoch": 1.4078585461689588, + "grad_norm": 0.5825312733650208, + "learning_rate": 2.761218338230689e-06, + "loss": 0.5398, + "step": 9554 + }, + { + "epoch": 1.4080058939096267, + "grad_norm": 0.6430700421333313, + "learning_rate": 2.7608327594842414e-06, + "loss": 0.5331, + "step": 9555 + }, + { + "epoch": 1.4081532416502947, + "grad_norm": 0.5677822828292847, + "learning_rate": 2.7604471744648886e-06, + "loss": 0.4947, + "step": 9556 + }, + { + "epoch": 1.4083005893909628, + "grad_norm": 0.6313102841377258, + "learning_rate": 2.7600615831819034e-06, + "loss": 0.5042, + "step": 9557 + }, + { + "epoch": 1.4084479371316307, + "grad_norm": 0.602257490158081, + "learning_rate": 2.7596759856445587e-06, + "loss": 0.5341, + "step": 9558 + }, + { + "epoch": 1.4085952848722987, + "grad_norm": 0.6236230134963989, + "learning_rate": 2.7592903818621285e-06, + "loss": 0.5453, + "step": 9559 + }, + { + "epoch": 1.4087426326129666, + "grad_norm": 0.5972323417663574, + "learning_rate": 2.7589047718438867e-06, + "loss": 0.5481, + "step": 9560 + }, + { + "epoch": 1.4088899803536346, + "grad_norm": 0.5924033522605896, + "learning_rate": 2.7585191555991065e-06, + "loss": 0.5083, + "step": 9561 + }, + { + "epoch": 1.4090373280943025, + "grad_norm": 0.6261858940124512, + "learning_rate": 2.7581335331370613e-06, + "loss": 0.5378, + "step": 9562 + }, + { + "epoch": 1.4091846758349704, + "grad_norm": 0.5659173130989075, + "learning_rate": 2.7577479044670264e-06, + "loss": 0.5267, + "step": 9563 + }, + { + "epoch": 1.4093320235756386, + "grad_norm": 0.5941291451454163, + "learning_rate": 2.7573622695982745e-06, + "loss": 0.5249, + "step": 9564 + }, + { + "epoch": 1.4094793713163065, + "grad_norm": 0.5880107879638672, + "learning_rate": 2.7569766285400813e-06, + "loss": 0.5676, + "step": 9565 + }, + { + "epoch": 1.4096267190569745, + "grad_norm": 0.6165246367454529, + "learning_rate": 2.7565909813017205e-06, + "loss": 0.5544, + "step": 9566 + }, + { + "epoch": 1.4097740667976424, + "grad_norm": 0.5813858509063721, + "learning_rate": 2.7562053278924673e-06, + "loss": 0.5557, + "step": 9567 + }, + { + "epoch": 1.4099214145383105, + "grad_norm": 0.5732730031013489, + "learning_rate": 2.7558196683215953e-06, + "loss": 0.551, + "step": 9568 + }, + { + "epoch": 1.4100687622789785, + "grad_norm": 0.6252087950706482, + "learning_rate": 2.755434002598381e-06, + "loss": 0.5247, + "step": 9569 + }, + { + "epoch": 1.4102161100196464, + "grad_norm": 0.6161335706710815, + "learning_rate": 2.7550483307320987e-06, + "loss": 0.5048, + "step": 9570 + }, + { + "epoch": 1.4103634577603144, + "grad_norm": 0.6199514269828796, + "learning_rate": 2.7546626527320238e-06, + "loss": 0.5347, + "step": 9571 + }, + { + "epoch": 1.4105108055009823, + "grad_norm": 0.6004640460014343, + "learning_rate": 2.754276968607431e-06, + "loss": 0.5437, + "step": 9572 + }, + { + "epoch": 1.4106581532416502, + "grad_norm": 0.5645808577537537, + "learning_rate": 2.7538912783675966e-06, + "loss": 0.5669, + "step": 9573 + }, + { + "epoch": 1.4108055009823182, + "grad_norm": 0.6040757298469543, + "learning_rate": 2.753505582021796e-06, + "loss": 0.5249, + "step": 9574 + }, + { + "epoch": 1.4109528487229863, + "grad_norm": 0.566525936126709, + "learning_rate": 2.753119879579305e-06, + "loss": 0.5372, + "step": 9575 + }, + { + "epoch": 1.4111001964636543, + "grad_norm": 0.6072534918785095, + "learning_rate": 2.7527341710493993e-06, + "loss": 0.574, + "step": 9576 + }, + { + "epoch": 1.4112475442043222, + "grad_norm": 0.6177161931991577, + "learning_rate": 2.7523484564413553e-06, + "loss": 0.549, + "step": 9577 + }, + { + "epoch": 1.4113948919449901, + "grad_norm": 0.5744690895080566, + "learning_rate": 2.7519627357644503e-06, + "loss": 0.5366, + "step": 9578 + }, + { + "epoch": 1.4115422396856583, + "grad_norm": 0.5906718969345093, + "learning_rate": 2.7515770090279593e-06, + "loss": 0.5232, + "step": 9579 + }, + { + "epoch": 1.4116895874263262, + "grad_norm": 0.6098536252975464, + "learning_rate": 2.7511912762411593e-06, + "loss": 0.5258, + "step": 9580 + }, + { + "epoch": 1.4118369351669942, + "grad_norm": 0.5988755226135254, + "learning_rate": 2.7508055374133263e-06, + "loss": 0.5459, + "step": 9581 + }, + { + "epoch": 1.411984282907662, + "grad_norm": 0.5949608087539673, + "learning_rate": 2.7504197925537387e-06, + "loss": 0.5429, + "step": 9582 + }, + { + "epoch": 1.41213163064833, + "grad_norm": 0.6174249053001404, + "learning_rate": 2.7500340416716714e-06, + "loss": 0.5261, + "step": 9583 + }, + { + "epoch": 1.412278978388998, + "grad_norm": 0.6119457483291626, + "learning_rate": 2.7496482847764032e-06, + "loss": 0.5477, + "step": 9584 + }, + { + "epoch": 1.412426326129666, + "grad_norm": 0.5903874039649963, + "learning_rate": 2.749262521877211e-06, + "loss": 0.5275, + "step": 9585 + }, + { + "epoch": 1.412573673870334, + "grad_norm": 0.6120256781578064, + "learning_rate": 2.7488767529833722e-06, + "loss": 0.5293, + "step": 9586 + }, + { + "epoch": 1.412721021611002, + "grad_norm": 0.5748682022094727, + "learning_rate": 2.748490978104164e-06, + "loss": 0.5042, + "step": 9587 + }, + { + "epoch": 1.41286836935167, + "grad_norm": 0.61686772108078, + "learning_rate": 2.7481051972488643e-06, + "loss": 0.516, + "step": 9588 + }, + { + "epoch": 1.4130157170923379, + "grad_norm": 0.5975081324577332, + "learning_rate": 2.7477194104267507e-06, + "loss": 0.5086, + "step": 9589 + }, + { + "epoch": 1.413163064833006, + "grad_norm": 0.6480733752250671, + "learning_rate": 2.747333617647102e-06, + "loss": 0.5468, + "step": 9590 + }, + { + "epoch": 1.413310412573674, + "grad_norm": 0.582266628742218, + "learning_rate": 2.7469478189191954e-06, + "loss": 0.5171, + "step": 9591 + }, + { + "epoch": 1.413457760314342, + "grad_norm": 0.5953171849250793, + "learning_rate": 2.7465620142523097e-06, + "loss": 0.5367, + "step": 9592 + }, + { + "epoch": 1.4136051080550098, + "grad_norm": 0.615422785282135, + "learning_rate": 2.746176203655723e-06, + "loss": 0.5346, + "step": 9593 + }, + { + "epoch": 1.4137524557956778, + "grad_norm": 0.5823226571083069, + "learning_rate": 2.7457903871387144e-06, + "loss": 0.5298, + "step": 9594 + }, + { + "epoch": 1.4138998035363457, + "grad_norm": 0.584498405456543, + "learning_rate": 2.745404564710562e-06, + "loss": 0.5133, + "step": 9595 + }, + { + "epoch": 1.4140471512770136, + "grad_norm": 0.6403849124908447, + "learning_rate": 2.7450187363805446e-06, + "loss": 0.5387, + "step": 9596 + }, + { + "epoch": 1.4141944990176818, + "grad_norm": 0.5734889507293701, + "learning_rate": 2.744632902157942e-06, + "loss": 0.5271, + "step": 9597 + }, + { + "epoch": 1.4143418467583497, + "grad_norm": 0.5892496109008789, + "learning_rate": 2.744247062052033e-06, + "loss": 0.5534, + "step": 9598 + }, + { + "epoch": 1.4144891944990177, + "grad_norm": 0.5709632635116577, + "learning_rate": 2.743861216072097e-06, + "loss": 0.5379, + "step": 9599 + }, + { + "epoch": 1.4146365422396856, + "grad_norm": 0.6093901991844177, + "learning_rate": 2.7434753642274127e-06, + "loss": 0.5451, + "step": 9600 + }, + { + "epoch": 1.4147838899803538, + "grad_norm": 0.5651069283485413, + "learning_rate": 2.7430895065272597e-06, + "loss": 0.5515, + "step": 9601 + }, + { + "epoch": 1.4149312377210217, + "grad_norm": 0.6580312252044678, + "learning_rate": 2.7427036429809182e-06, + "loss": 0.5471, + "step": 9602 + }, + { + "epoch": 1.4150785854616896, + "grad_norm": 0.591532826423645, + "learning_rate": 2.7423177735976685e-06, + "loss": 0.5076, + "step": 9603 + }, + { + "epoch": 1.4152259332023576, + "grad_norm": 0.6091835498809814, + "learning_rate": 2.7419318983867902e-06, + "loss": 0.5418, + "step": 9604 + }, + { + "epoch": 1.4153732809430255, + "grad_norm": 0.5937901139259338, + "learning_rate": 2.7415460173575626e-06, + "loss": 0.5188, + "step": 9605 + }, + { + "epoch": 1.4155206286836934, + "grad_norm": 0.5698951482772827, + "learning_rate": 2.7411601305192665e-06, + "loss": 0.5306, + "step": 9606 + }, + { + "epoch": 1.4156679764243614, + "grad_norm": 0.6070894598960876, + "learning_rate": 2.740774237881183e-06, + "loss": 0.5256, + "step": 9607 + }, + { + "epoch": 1.4158153241650295, + "grad_norm": 0.6311027407646179, + "learning_rate": 2.7403883394525915e-06, + "loss": 0.5308, + "step": 9608 + }, + { + "epoch": 1.4159626719056975, + "grad_norm": 0.5881789922714233, + "learning_rate": 2.740002435242773e-06, + "loss": 0.545, + "step": 9609 + }, + { + "epoch": 1.4161100196463654, + "grad_norm": 0.5968655943870544, + "learning_rate": 2.7396165252610087e-06, + "loss": 0.5426, + "step": 9610 + }, + { + "epoch": 1.4162573673870333, + "grad_norm": 0.5954957008361816, + "learning_rate": 2.7392306095165795e-06, + "loss": 0.5179, + "step": 9611 + }, + { + "epoch": 1.4164047151277015, + "grad_norm": 0.5814993977546692, + "learning_rate": 2.7388446880187663e-06, + "loss": 0.5246, + "step": 9612 + }, + { + "epoch": 1.4165520628683694, + "grad_norm": 0.5973030924797058, + "learning_rate": 2.73845876077685e-06, + "loss": 0.4871, + "step": 9613 + }, + { + "epoch": 1.4166994106090374, + "grad_norm": 0.601584255695343, + "learning_rate": 2.7380728278001123e-06, + "loss": 0.5599, + "step": 9614 + }, + { + "epoch": 1.4168467583497053, + "grad_norm": 0.6301667094230652, + "learning_rate": 2.737686889097835e-06, + "loss": 0.5467, + "step": 9615 + }, + { + "epoch": 1.4169941060903732, + "grad_norm": 0.5933640003204346, + "learning_rate": 2.7373009446792997e-06, + "loss": 0.5302, + "step": 9616 + }, + { + "epoch": 1.4171414538310412, + "grad_norm": 0.6422939896583557, + "learning_rate": 2.7369149945537876e-06, + "loss": 0.5349, + "step": 9617 + }, + { + "epoch": 1.4172888015717091, + "grad_norm": 0.5847200155258179, + "learning_rate": 2.7365290387305817e-06, + "loss": 0.5156, + "step": 9618 + }, + { + "epoch": 1.4174361493123773, + "grad_norm": 0.6091415286064148, + "learning_rate": 2.736143077218963e-06, + "loss": 0.5298, + "step": 9619 + }, + { + "epoch": 1.4175834970530452, + "grad_norm": 0.6311068534851074, + "learning_rate": 2.7357571100282136e-06, + "loss": 0.5077, + "step": 9620 + }, + { + "epoch": 1.4177308447937131, + "grad_norm": 0.6078808903694153, + "learning_rate": 2.7353711371676167e-06, + "loss": 0.5415, + "step": 9621 + }, + { + "epoch": 1.417878192534381, + "grad_norm": 0.6032773852348328, + "learning_rate": 2.7349851586464545e-06, + "loss": 0.5305, + "step": 9622 + }, + { + "epoch": 1.4180255402750492, + "grad_norm": 0.6203250288963318, + "learning_rate": 2.734599174474009e-06, + "loss": 0.5042, + "step": 9623 + }, + { + "epoch": 1.4181728880157172, + "grad_norm": 0.5972741842269897, + "learning_rate": 2.7342131846595638e-06, + "loss": 0.543, + "step": 9624 + }, + { + "epoch": 1.418320235756385, + "grad_norm": 0.5960015654563904, + "learning_rate": 2.7338271892124014e-06, + "loss": 0.5441, + "step": 9625 + }, + { + "epoch": 1.418467583497053, + "grad_norm": 0.6659784317016602, + "learning_rate": 2.733441188141804e-06, + "loss": 0.539, + "step": 9626 + }, + { + "epoch": 1.418614931237721, + "grad_norm": 0.6104554533958435, + "learning_rate": 2.7330551814570565e-06, + "loss": 0.5451, + "step": 9627 + }, + { + "epoch": 1.418762278978389, + "grad_norm": 0.6208983659744263, + "learning_rate": 2.732669169167442e-06, + "loss": 0.5365, + "step": 9628 + }, + { + "epoch": 1.4189096267190568, + "grad_norm": 0.5848246216773987, + "learning_rate": 2.732283151282241e-06, + "loss": 0.5439, + "step": 9629 + }, + { + "epoch": 1.419056974459725, + "grad_norm": 0.5836443305015564, + "learning_rate": 2.7318971278107404e-06, + "loss": 0.5464, + "step": 9630 + }, + { + "epoch": 1.419204322200393, + "grad_norm": 0.63386470079422, + "learning_rate": 2.731511098762222e-06, + "loss": 0.5348, + "step": 9631 + }, + { + "epoch": 1.4193516699410609, + "grad_norm": 0.626867949962616, + "learning_rate": 2.7311250641459703e-06, + "loss": 0.5102, + "step": 9632 + }, + { + "epoch": 1.4194990176817288, + "grad_norm": 0.6057164669036865, + "learning_rate": 2.7307390239712696e-06, + "loss": 0.5585, + "step": 9633 + }, + { + "epoch": 1.419646365422397, + "grad_norm": 0.6009288430213928, + "learning_rate": 2.7303529782474035e-06, + "loss": 0.5051, + "step": 9634 + }, + { + "epoch": 1.419793713163065, + "grad_norm": 0.637935221195221, + "learning_rate": 2.7299669269836566e-06, + "loss": 0.5807, + "step": 9635 + }, + { + "epoch": 1.4199410609037328, + "grad_norm": 0.6108421087265015, + "learning_rate": 2.7295808701893122e-06, + "loss": 0.5425, + "step": 9636 + }, + { + "epoch": 1.4200884086444008, + "grad_norm": 0.5643652081489563, + "learning_rate": 2.7291948078736564e-06, + "loss": 0.53, + "step": 9637 + }, + { + "epoch": 1.4202357563850687, + "grad_norm": 0.6201995015144348, + "learning_rate": 2.7288087400459723e-06, + "loss": 0.5252, + "step": 9638 + }, + { + "epoch": 1.4203831041257367, + "grad_norm": 0.5823082327842712, + "learning_rate": 2.7284226667155458e-06, + "loss": 0.532, + "step": 9639 + }, + { + "epoch": 1.4205304518664046, + "grad_norm": 0.6290830373764038, + "learning_rate": 2.728036587891661e-06, + "loss": 0.5549, + "step": 9640 + }, + { + "epoch": 1.4206777996070727, + "grad_norm": 0.6281077861785889, + "learning_rate": 2.727650503583603e-06, + "loss": 0.5373, + "step": 9641 + }, + { + "epoch": 1.4208251473477407, + "grad_norm": 0.5722319483757019, + "learning_rate": 2.7272644138006576e-06, + "loss": 0.5277, + "step": 9642 + }, + { + "epoch": 1.4209724950884086, + "grad_norm": 0.5794869661331177, + "learning_rate": 2.7268783185521096e-06, + "loss": 0.5176, + "step": 9643 + }, + { + "epoch": 1.4211198428290766, + "grad_norm": 0.5995215177536011, + "learning_rate": 2.726492217847245e-06, + "loss": 0.4992, + "step": 9644 + }, + { + "epoch": 1.4212671905697447, + "grad_norm": 0.6146522760391235, + "learning_rate": 2.726106111695348e-06, + "loss": 0.5037, + "step": 9645 + }, + { + "epoch": 1.4214145383104126, + "grad_norm": 0.6238453984260559, + "learning_rate": 2.7257200001057048e-06, + "loss": 0.5168, + "step": 9646 + }, + { + "epoch": 1.4215618860510806, + "grad_norm": 0.5982551574707031, + "learning_rate": 2.725333883087602e-06, + "loss": 0.5499, + "step": 9647 + }, + { + "epoch": 1.4217092337917485, + "grad_norm": 0.5934234857559204, + "learning_rate": 2.7249477606503248e-06, + "loss": 0.5378, + "step": 9648 + }, + { + "epoch": 1.4218565815324165, + "grad_norm": 0.5860940217971802, + "learning_rate": 2.72456163280316e-06, + "loss": 0.5224, + "step": 9649 + }, + { + "epoch": 1.4220039292730844, + "grad_norm": 0.6125759482383728, + "learning_rate": 2.7241754995553926e-06, + "loss": 0.522, + "step": 9650 + }, + { + "epoch": 1.4221512770137523, + "grad_norm": 0.6023329496383667, + "learning_rate": 2.7237893609163097e-06, + "loss": 0.5339, + "step": 9651 + }, + { + "epoch": 1.4222986247544205, + "grad_norm": 0.637100338935852, + "learning_rate": 2.723403216895198e-06, + "loss": 0.5229, + "step": 9652 + }, + { + "epoch": 1.4224459724950884, + "grad_norm": 0.5903239846229553, + "learning_rate": 2.7230170675013433e-06, + "loss": 0.5151, + "step": 9653 + }, + { + "epoch": 1.4225933202357564, + "grad_norm": 0.5726285576820374, + "learning_rate": 2.722630912744033e-06, + "loss": 0.5152, + "step": 9654 + }, + { + "epoch": 1.4227406679764243, + "grad_norm": 0.6107499599456787, + "learning_rate": 2.7222447526325537e-06, + "loss": 0.5451, + "step": 9655 + }, + { + "epoch": 1.4228880157170924, + "grad_norm": 0.5932846069335938, + "learning_rate": 2.721858587176192e-06, + "loss": 0.5361, + "step": 9656 + }, + { + "epoch": 1.4230353634577604, + "grad_norm": 0.5854629278182983, + "learning_rate": 2.7214724163842353e-06, + "loss": 0.5559, + "step": 9657 + }, + { + "epoch": 1.4231827111984283, + "grad_norm": 0.6059659123420715, + "learning_rate": 2.721086240265971e-06, + "loss": 0.5071, + "step": 9658 + }, + { + "epoch": 1.4233300589390963, + "grad_norm": 0.5818633437156677, + "learning_rate": 2.7207000588306865e-06, + "loss": 0.5244, + "step": 9659 + }, + { + "epoch": 1.4234774066797642, + "grad_norm": 0.5895152688026428, + "learning_rate": 2.720313872087669e-06, + "loss": 0.5512, + "step": 9660 + }, + { + "epoch": 1.4236247544204321, + "grad_norm": 0.5918038487434387, + "learning_rate": 2.7199276800462065e-06, + "loss": 0.5365, + "step": 9661 + }, + { + "epoch": 1.4237721021611, + "grad_norm": 0.6287980079650879, + "learning_rate": 2.719541482715586e-06, + "loss": 0.539, + "step": 9662 + }, + { + "epoch": 1.4239194499017682, + "grad_norm": 0.5628067851066589, + "learning_rate": 2.7191552801050958e-06, + "loss": 0.5491, + "step": 9663 + }, + { + "epoch": 1.4240667976424362, + "grad_norm": 0.600619912147522, + "learning_rate": 2.7187690722240247e-06, + "loss": 0.537, + "step": 9664 + }, + { + "epoch": 1.424214145383104, + "grad_norm": 0.6075250506401062, + "learning_rate": 2.718382859081659e-06, + "loss": 0.5338, + "step": 9665 + }, + { + "epoch": 1.424361493123772, + "grad_norm": 0.6285223960876465, + "learning_rate": 2.717996640687288e-06, + "loss": 0.4916, + "step": 9666 + }, + { + "epoch": 1.4245088408644402, + "grad_norm": 0.6011330485343933, + "learning_rate": 2.717610417050201e-06, + "loss": 0.5625, + "step": 9667 + }, + { + "epoch": 1.4246561886051081, + "grad_norm": 0.6098334193229675, + "learning_rate": 2.717224188179684e-06, + "loss": 0.5107, + "step": 9668 + }, + { + "epoch": 1.424803536345776, + "grad_norm": 0.5873013734817505, + "learning_rate": 2.7168379540850284e-06, + "loss": 0.5127, + "step": 9669 + }, + { + "epoch": 1.424950884086444, + "grad_norm": 0.6135823130607605, + "learning_rate": 2.716451714775522e-06, + "loss": 0.5254, + "step": 9670 + }, + { + "epoch": 1.425098231827112, + "grad_norm": 0.6158457398414612, + "learning_rate": 2.716065470260453e-06, + "loss": 0.5284, + "step": 9671 + }, + { + "epoch": 1.4252455795677799, + "grad_norm": 0.5720446705818176, + "learning_rate": 2.71567922054911e-06, + "loss": 0.5306, + "step": 9672 + }, + { + "epoch": 1.4253929273084478, + "grad_norm": 0.5930591821670532, + "learning_rate": 2.7152929656507837e-06, + "loss": 0.542, + "step": 9673 + }, + { + "epoch": 1.425540275049116, + "grad_norm": 0.6222829222679138, + "learning_rate": 2.7149067055747624e-06, + "loss": 0.5279, + "step": 9674 + }, + { + "epoch": 1.425687622789784, + "grad_norm": 0.6274460554122925, + "learning_rate": 2.7145204403303353e-06, + "loss": 0.5302, + "step": 9675 + }, + { + "epoch": 1.4258349705304518, + "grad_norm": 0.6165236234664917, + "learning_rate": 2.7141341699267927e-06, + "loss": 0.506, + "step": 9676 + }, + { + "epoch": 1.4259823182711198, + "grad_norm": 0.589307963848114, + "learning_rate": 2.7137478943734236e-06, + "loss": 0.4948, + "step": 9677 + }, + { + "epoch": 1.426129666011788, + "grad_norm": 0.5702733397483826, + "learning_rate": 2.7133616136795175e-06, + "loss": 0.5099, + "step": 9678 + }, + { + "epoch": 1.4262770137524559, + "grad_norm": 0.6035054922103882, + "learning_rate": 2.7129753278543654e-06, + "loss": 0.5232, + "step": 9679 + }, + { + "epoch": 1.4264243614931238, + "grad_norm": 0.6199454665184021, + "learning_rate": 2.7125890369072563e-06, + "loss": 0.5243, + "step": 9680 + }, + { + "epoch": 1.4265717092337917, + "grad_norm": 0.5622614026069641, + "learning_rate": 2.71220274084748e-06, + "loss": 0.5321, + "step": 9681 + }, + { + "epoch": 1.4267190569744597, + "grad_norm": 0.5909480452537537, + "learning_rate": 2.7118164396843277e-06, + "loss": 0.5424, + "step": 9682 + }, + { + "epoch": 1.4268664047151276, + "grad_norm": 0.585114598274231, + "learning_rate": 2.711430133427089e-06, + "loss": 0.5287, + "step": 9683 + }, + { + "epoch": 1.4270137524557958, + "grad_norm": 0.6278077960014343, + "learning_rate": 2.7110438220850556e-06, + "loss": 0.5919, + "step": 9684 + }, + { + "epoch": 1.4271611001964637, + "grad_norm": 0.6518099308013916, + "learning_rate": 2.7106575056675167e-06, + "loss": 0.5171, + "step": 9685 + }, + { + "epoch": 1.4273084479371316, + "grad_norm": 0.6148005127906799, + "learning_rate": 2.710271184183763e-06, + "loss": 0.53, + "step": 9686 + }, + { + "epoch": 1.4274557956777996, + "grad_norm": 0.5875228047370911, + "learning_rate": 2.709884857643087e-06, + "loss": 0.5189, + "step": 9687 + }, + { + "epoch": 1.4276031434184675, + "grad_norm": 0.5812709927558899, + "learning_rate": 2.7094985260547785e-06, + "loss": 0.5079, + "step": 9688 + }, + { + "epoch": 1.4277504911591357, + "grad_norm": 0.6103475093841553, + "learning_rate": 2.7091121894281282e-06, + "loss": 0.5488, + "step": 9689 + }, + { + "epoch": 1.4278978388998036, + "grad_norm": 0.6155165433883667, + "learning_rate": 2.7087258477724282e-06, + "loss": 0.5277, + "step": 9690 + }, + { + "epoch": 1.4280451866404715, + "grad_norm": 0.6440425515174866, + "learning_rate": 2.708339501096969e-06, + "loss": 0.562, + "step": 9691 + }, + { + "epoch": 1.4281925343811395, + "grad_norm": 0.6192280650138855, + "learning_rate": 2.7079531494110434e-06, + "loss": 0.5548, + "step": 9692 + }, + { + "epoch": 1.4283398821218074, + "grad_norm": 0.6015728712081909, + "learning_rate": 2.7075667927239412e-06, + "loss": 0.5185, + "step": 9693 + }, + { + "epoch": 1.4284872298624753, + "grad_norm": 0.5893257260322571, + "learning_rate": 2.7071804310449562e-06, + "loss": 0.5223, + "step": 9694 + }, + { + "epoch": 1.4286345776031435, + "grad_norm": 0.6277155876159668, + "learning_rate": 2.706794064383378e-06, + "loss": 0.5452, + "step": 9695 + }, + { + "epoch": 1.4287819253438114, + "grad_norm": 0.5820591449737549, + "learning_rate": 2.7064076927485e-06, + "loss": 0.5402, + "step": 9696 + }, + { + "epoch": 1.4289292730844794, + "grad_norm": 0.6010319590568542, + "learning_rate": 2.7060213161496134e-06, + "loss": 0.5008, + "step": 9697 + }, + { + "epoch": 1.4290766208251473, + "grad_norm": 0.6037759184837341, + "learning_rate": 2.7056349345960108e-06, + "loss": 0.5585, + "step": 9698 + }, + { + "epoch": 1.4292239685658155, + "grad_norm": 0.573081374168396, + "learning_rate": 2.7052485480969847e-06, + "loss": 0.5251, + "step": 9699 + }, + { + "epoch": 1.4293713163064834, + "grad_norm": 0.6407430768013, + "learning_rate": 2.704862156661827e-06, + "loss": 0.5599, + "step": 9700 + }, + { + "epoch": 1.4295186640471513, + "grad_norm": 0.5743011832237244, + "learning_rate": 2.7044757602998305e-06, + "loss": 0.5406, + "step": 9701 + }, + { + "epoch": 1.4296660117878193, + "grad_norm": 0.6150754690170288, + "learning_rate": 2.7040893590202883e-06, + "loss": 0.5364, + "step": 9702 + }, + { + "epoch": 1.4298133595284872, + "grad_norm": 0.6165287494659424, + "learning_rate": 2.7037029528324925e-06, + "loss": 0.5147, + "step": 9703 + }, + { + "epoch": 1.4299607072691551, + "grad_norm": 0.5912858247756958, + "learning_rate": 2.7033165417457365e-06, + "loss": 0.5149, + "step": 9704 + }, + { + "epoch": 1.430108055009823, + "grad_norm": 0.5928008556365967, + "learning_rate": 2.702930125769312e-06, + "loss": 0.5604, + "step": 9705 + }, + { + "epoch": 1.4302554027504912, + "grad_norm": 0.6111850738525391, + "learning_rate": 2.7025437049125138e-06, + "loss": 0.5468, + "step": 9706 + }, + { + "epoch": 1.4304027504911592, + "grad_norm": 0.5972082018852234, + "learning_rate": 2.7021572791846344e-06, + "loss": 0.5244, + "step": 9707 + }, + { + "epoch": 1.430550098231827, + "grad_norm": 0.5990304946899414, + "learning_rate": 2.701770848594968e-06, + "loss": 0.538, + "step": 9708 + }, + { + "epoch": 1.430697445972495, + "grad_norm": 0.5746538043022156, + "learning_rate": 2.7013844131528066e-06, + "loss": 0.53, + "step": 9709 + }, + { + "epoch": 1.4308447937131632, + "grad_norm": 0.5853259563446045, + "learning_rate": 2.7009979728674446e-06, + "loss": 0.5451, + "step": 9710 + }, + { + "epoch": 1.4309921414538311, + "grad_norm": 0.5963356494903564, + "learning_rate": 2.7006115277481753e-06, + "loss": 0.521, + "step": 9711 + }, + { + "epoch": 1.431139489194499, + "grad_norm": 0.5519407391548157, + "learning_rate": 2.7002250778042936e-06, + "loss": 0.5303, + "step": 9712 + }, + { + "epoch": 1.431286836935167, + "grad_norm": 0.5925446152687073, + "learning_rate": 2.6998386230450922e-06, + "loss": 0.5039, + "step": 9713 + }, + { + "epoch": 1.431434184675835, + "grad_norm": 0.590106725692749, + "learning_rate": 2.699452163479866e-06, + "loss": 0.5576, + "step": 9714 + }, + { + "epoch": 1.4315815324165029, + "grad_norm": 0.6009145975112915, + "learning_rate": 2.6990656991179086e-06, + "loss": 0.5405, + "step": 9715 + }, + { + "epoch": 1.4317288801571708, + "grad_norm": 0.6307013630867004, + "learning_rate": 2.6986792299685144e-06, + "loss": 0.5491, + "step": 9716 + }, + { + "epoch": 1.431876227897839, + "grad_norm": 0.6106905341148376, + "learning_rate": 2.6982927560409782e-06, + "loss": 0.5344, + "step": 9717 + }, + { + "epoch": 1.432023575638507, + "grad_norm": 0.5925889611244202, + "learning_rate": 2.697906277344594e-06, + "loss": 0.541, + "step": 9718 + }, + { + "epoch": 1.4321709233791748, + "grad_norm": 0.6035111546516418, + "learning_rate": 2.697519793888657e-06, + "loss": 0.5304, + "step": 9719 + }, + { + "epoch": 1.4323182711198428, + "grad_norm": 0.6131261587142944, + "learning_rate": 2.6971333056824616e-06, + "loss": 0.5329, + "step": 9720 + }, + { + "epoch": 1.432465618860511, + "grad_norm": 0.5779381394386292, + "learning_rate": 2.6967468127353025e-06, + "loss": 0.5221, + "step": 9721 + }, + { + "epoch": 1.4326129666011789, + "grad_norm": 0.6069056391716003, + "learning_rate": 2.6963603150564745e-06, + "loss": 0.5313, + "step": 9722 + }, + { + "epoch": 1.4327603143418468, + "grad_norm": 0.590976893901825, + "learning_rate": 2.6959738126552727e-06, + "loss": 0.5073, + "step": 9723 + }, + { + "epoch": 1.4329076620825147, + "grad_norm": 0.6395171284675598, + "learning_rate": 2.6955873055409933e-06, + "loss": 0.5011, + "step": 9724 + }, + { + "epoch": 1.4330550098231827, + "grad_norm": 0.5914829969406128, + "learning_rate": 2.6952007937229306e-06, + "loss": 0.522, + "step": 9725 + }, + { + "epoch": 1.4332023575638506, + "grad_norm": 0.586307942867279, + "learning_rate": 2.69481427721038e-06, + "loss": 0.5315, + "step": 9726 + }, + { + "epoch": 1.4333497053045186, + "grad_norm": 0.6004577875137329, + "learning_rate": 2.6944277560126385e-06, + "loss": 0.5492, + "step": 9727 + }, + { + "epoch": 1.4334970530451867, + "grad_norm": 0.5637063384056091, + "learning_rate": 2.694041230139e-06, + "loss": 0.5241, + "step": 9728 + }, + { + "epoch": 1.4336444007858546, + "grad_norm": 0.5804138779640198, + "learning_rate": 2.693654699598761e-06, + "loss": 0.5202, + "step": 9729 + }, + { + "epoch": 1.4337917485265226, + "grad_norm": 0.5989297032356262, + "learning_rate": 2.6932681644012177e-06, + "loss": 0.502, + "step": 9730 + }, + { + "epoch": 1.4339390962671905, + "grad_norm": 0.5989737510681152, + "learning_rate": 2.692881624555665e-06, + "loss": 0.5526, + "step": 9731 + }, + { + "epoch": 1.4340864440078587, + "grad_norm": 0.5837454199790955, + "learning_rate": 2.6924950800714002e-06, + "loss": 0.5574, + "step": 9732 + }, + { + "epoch": 1.4342337917485266, + "grad_norm": 0.5980965495109558, + "learning_rate": 2.692108530957719e-06, + "loss": 0.5265, + "step": 9733 + }, + { + "epoch": 1.4343811394891945, + "grad_norm": 0.5887786149978638, + "learning_rate": 2.6917219772239174e-06, + "loss": 0.5304, + "step": 9734 + }, + { + "epoch": 1.4345284872298625, + "grad_norm": 0.5793076157569885, + "learning_rate": 2.691335418879292e-06, + "loss": 0.5454, + "step": 9735 + }, + { + "epoch": 1.4346758349705304, + "grad_norm": 0.5954176187515259, + "learning_rate": 2.69094885593314e-06, + "loss": 0.5361, + "step": 9736 + }, + { + "epoch": 1.4348231827111984, + "grad_norm": 0.6005052924156189, + "learning_rate": 2.6905622883947573e-06, + "loss": 0.5134, + "step": 9737 + }, + { + "epoch": 1.4349705304518663, + "grad_norm": 0.5995551347732544, + "learning_rate": 2.6901757162734414e-06, + "loss": 0.5422, + "step": 9738 + }, + { + "epoch": 1.4351178781925344, + "grad_norm": 0.6106781959533691, + "learning_rate": 2.689789139578488e-06, + "loss": 0.5351, + "step": 9739 + }, + { + "epoch": 1.4352652259332024, + "grad_norm": 0.6114991903305054, + "learning_rate": 2.6894025583191957e-06, + "loss": 0.5275, + "step": 9740 + }, + { + "epoch": 1.4354125736738703, + "grad_norm": 0.5796871185302734, + "learning_rate": 2.6890159725048604e-06, + "loss": 0.5245, + "step": 9741 + }, + { + "epoch": 1.4355599214145383, + "grad_norm": 0.6135414242744446, + "learning_rate": 2.688629382144779e-06, + "loss": 0.5457, + "step": 9742 + }, + { + "epoch": 1.4357072691552064, + "grad_norm": 0.5956376791000366, + "learning_rate": 2.6882427872482495e-06, + "loss": 0.5289, + "step": 9743 + }, + { + "epoch": 1.4358546168958743, + "grad_norm": 0.5833061337471008, + "learning_rate": 2.68785618782457e-06, + "loss": 0.5161, + "step": 9744 + }, + { + "epoch": 1.4360019646365423, + "grad_norm": 0.5592572093009949, + "learning_rate": 2.6874695838830373e-06, + "loss": 0.5397, + "step": 9745 + }, + { + "epoch": 1.4361493123772102, + "grad_norm": 0.5750744938850403, + "learning_rate": 2.6870829754329484e-06, + "loss": 0.5316, + "step": 9746 + }, + { + "epoch": 1.4362966601178782, + "grad_norm": 0.617902934551239, + "learning_rate": 2.686696362483602e-06, + "loss": 0.5242, + "step": 9747 + }, + { + "epoch": 1.436444007858546, + "grad_norm": 0.5737513899803162, + "learning_rate": 2.6863097450442953e-06, + "loss": 0.5424, + "step": 9748 + }, + { + "epoch": 1.436591355599214, + "grad_norm": 0.5919638276100159, + "learning_rate": 2.685923123124327e-06, + "loss": 0.5486, + "step": 9749 + }, + { + "epoch": 1.4367387033398822, + "grad_norm": 0.6117244362831116, + "learning_rate": 2.685536496732995e-06, + "loss": 0.5222, + "step": 9750 + }, + { + "epoch": 1.4368860510805501, + "grad_norm": 0.5924229025840759, + "learning_rate": 2.6851498658795966e-06, + "loss": 0.5026, + "step": 9751 + }, + { + "epoch": 1.437033398821218, + "grad_norm": 0.5880529880523682, + "learning_rate": 2.684763230573431e-06, + "loss": 0.5542, + "step": 9752 + }, + { + "epoch": 1.437180746561886, + "grad_norm": 0.5973674058914185, + "learning_rate": 2.684376590823797e-06, + "loss": 0.533, + "step": 9753 + }, + { + "epoch": 1.4373280943025541, + "grad_norm": 0.5985232591629028, + "learning_rate": 2.6839899466399916e-06, + "loss": 0.5195, + "step": 9754 + }, + { + "epoch": 1.437475442043222, + "grad_norm": 0.5857942700386047, + "learning_rate": 2.6836032980313147e-06, + "loss": 0.5324, + "step": 9755 + }, + { + "epoch": 1.43762278978389, + "grad_norm": 0.5929141640663147, + "learning_rate": 2.6832166450070644e-06, + "loss": 0.5124, + "step": 9756 + }, + { + "epoch": 1.437770137524558, + "grad_norm": 0.5972726345062256, + "learning_rate": 2.68282998757654e-06, + "loss": 0.5206, + "step": 9757 + }, + { + "epoch": 1.437917485265226, + "grad_norm": 0.5787148475646973, + "learning_rate": 2.68244332574904e-06, + "loss": 0.5566, + "step": 9758 + }, + { + "epoch": 1.4380648330058938, + "grad_norm": 0.5805413722991943, + "learning_rate": 2.682056659533864e-06, + "loss": 0.5394, + "step": 9759 + }, + { + "epoch": 1.4382121807465618, + "grad_norm": 0.5846542119979858, + "learning_rate": 2.6816699889403096e-06, + "loss": 0.5189, + "step": 9760 + }, + { + "epoch": 1.43835952848723, + "grad_norm": 0.5618135333061218, + "learning_rate": 2.6812833139776784e-06, + "loss": 0.4863, + "step": 9761 + }, + { + "epoch": 1.4385068762278979, + "grad_norm": 0.5746179819107056, + "learning_rate": 2.680896634655268e-06, + "loss": 0.4947, + "step": 9762 + }, + { + "epoch": 1.4386542239685658, + "grad_norm": 0.5934920310974121, + "learning_rate": 2.6805099509823785e-06, + "loss": 0.5235, + "step": 9763 + }, + { + "epoch": 1.4388015717092337, + "grad_norm": 0.589591920375824, + "learning_rate": 2.6801232629683093e-06, + "loss": 0.5256, + "step": 9764 + }, + { + "epoch": 1.4389489194499019, + "grad_norm": 0.5675579309463501, + "learning_rate": 2.67973657062236e-06, + "loss": 0.5403, + "step": 9765 + }, + { + "epoch": 1.4390962671905698, + "grad_norm": 0.6098610162734985, + "learning_rate": 2.6793498739538306e-06, + "loss": 0.549, + "step": 9766 + }, + { + "epoch": 1.4392436149312378, + "grad_norm": 0.5964248776435852, + "learning_rate": 2.6789631729720206e-06, + "loss": 0.5403, + "step": 9767 + }, + { + "epoch": 1.4393909626719057, + "grad_norm": 0.5871145129203796, + "learning_rate": 2.678576467686231e-06, + "loss": 0.5474, + "step": 9768 + }, + { + "epoch": 1.4395383104125736, + "grad_norm": 0.6014453768730164, + "learning_rate": 2.6781897581057605e-06, + "loss": 0.5455, + "step": 9769 + }, + { + "epoch": 1.4396856581532416, + "grad_norm": 0.604356050491333, + "learning_rate": 2.6778030442399094e-06, + "loss": 0.5274, + "step": 9770 + }, + { + "epoch": 1.4398330058939095, + "grad_norm": 0.5815904140472412, + "learning_rate": 2.677416326097979e-06, + "loss": 0.5221, + "step": 9771 + }, + { + "epoch": 1.4399803536345777, + "grad_norm": 0.6054075360298157, + "learning_rate": 2.6770296036892695e-06, + "loss": 0.5338, + "step": 9772 + }, + { + "epoch": 1.4401277013752456, + "grad_norm": 0.6202554702758789, + "learning_rate": 2.6766428770230807e-06, + "loss": 0.5211, + "step": 9773 + }, + { + "epoch": 1.4402750491159135, + "grad_norm": 0.558098316192627, + "learning_rate": 2.6762561461087133e-06, + "loss": 0.5647, + "step": 9774 + }, + { + "epoch": 1.4404223968565815, + "grad_norm": 0.5896269679069519, + "learning_rate": 2.675869410955469e-06, + "loss": 0.5078, + "step": 9775 + }, + { + "epoch": 1.4405697445972496, + "grad_norm": 0.577897310256958, + "learning_rate": 2.675482671572647e-06, + "loss": 0.532, + "step": 9776 + }, + { + "epoch": 1.4407170923379176, + "grad_norm": 0.5562728643417358, + "learning_rate": 2.6750959279695497e-06, + "loss": 0.5152, + "step": 9777 + }, + { + "epoch": 1.4408644400785855, + "grad_norm": 0.5819399952888489, + "learning_rate": 2.674709180155477e-06, + "loss": 0.5302, + "step": 9778 + }, + { + "epoch": 1.4410117878192534, + "grad_norm": 0.625589907169342, + "learning_rate": 2.674322428139731e-06, + "loss": 0.5144, + "step": 9779 + }, + { + "epoch": 1.4411591355599214, + "grad_norm": 0.6158449053764343, + "learning_rate": 2.673935671931611e-06, + "loss": 0.5659, + "step": 9780 + }, + { + "epoch": 1.4413064833005893, + "grad_norm": 0.607903003692627, + "learning_rate": 2.673548911540421e-06, + "loss": 0.5364, + "step": 9781 + }, + { + "epoch": 1.4414538310412572, + "grad_norm": 0.5989524126052856, + "learning_rate": 2.673162146975461e-06, + "loss": 0.5298, + "step": 9782 + }, + { + "epoch": 1.4416011787819254, + "grad_norm": 0.5845589637756348, + "learning_rate": 2.6727753782460325e-06, + "loss": 0.5211, + "step": 9783 + }, + { + "epoch": 1.4417485265225933, + "grad_norm": 0.5929787755012512, + "learning_rate": 2.6723886053614367e-06, + "loss": 0.5286, + "step": 9784 + }, + { + "epoch": 1.4418958742632613, + "grad_norm": 0.6013107299804688, + "learning_rate": 2.6720018283309767e-06, + "loss": 0.5181, + "step": 9785 + }, + { + "epoch": 1.4420432220039292, + "grad_norm": 0.6232383847236633, + "learning_rate": 2.671615047163953e-06, + "loss": 0.536, + "step": 9786 + }, + { + "epoch": 1.4421905697445974, + "grad_norm": 0.5698485374450684, + "learning_rate": 2.6712282618696676e-06, + "loss": 0.5366, + "step": 9787 + }, + { + "epoch": 1.4423379174852653, + "grad_norm": 0.5870968699455261, + "learning_rate": 2.670841472457423e-06, + "loss": 0.5192, + "step": 9788 + }, + { + "epoch": 1.4424852652259332, + "grad_norm": 0.5973191857337952, + "learning_rate": 2.6704546789365214e-06, + "loss": 0.5013, + "step": 9789 + }, + { + "epoch": 1.4426326129666012, + "grad_norm": 0.594054102897644, + "learning_rate": 2.6700678813162647e-06, + "loss": 0.5393, + "step": 9790 + }, + { + "epoch": 1.442779960707269, + "grad_norm": 0.6024547219276428, + "learning_rate": 2.6696810796059548e-06, + "loss": 0.533, + "step": 9791 + }, + { + "epoch": 1.442927308447937, + "grad_norm": 0.6028982400894165, + "learning_rate": 2.6692942738148947e-06, + "loss": 0.4839, + "step": 9792 + }, + { + "epoch": 1.443074656188605, + "grad_norm": 0.5771982669830322, + "learning_rate": 2.6689074639523865e-06, + "loss": 0.5493, + "step": 9793 + }, + { + "epoch": 1.4432220039292731, + "grad_norm": 0.6075153350830078, + "learning_rate": 2.6685206500277332e-06, + "loss": 0.5596, + "step": 9794 + }, + { + "epoch": 1.443369351669941, + "grad_norm": 0.6063494086265564, + "learning_rate": 2.6681338320502378e-06, + "loss": 0.5527, + "step": 9795 + }, + { + "epoch": 1.443516699410609, + "grad_norm": 0.58017498254776, + "learning_rate": 2.667747010029202e-06, + "loss": 0.5187, + "step": 9796 + }, + { + "epoch": 1.443664047151277, + "grad_norm": 0.6026691794395447, + "learning_rate": 2.6673601839739284e-06, + "loss": 0.5462, + "step": 9797 + }, + { + "epoch": 1.443811394891945, + "grad_norm": 0.5786339640617371, + "learning_rate": 2.666973353893722e-06, + "loss": 0.5196, + "step": 9798 + }, + { + "epoch": 1.443958742632613, + "grad_norm": 0.5733527541160583, + "learning_rate": 2.6665865197978846e-06, + "loss": 0.5029, + "step": 9799 + }, + { + "epoch": 1.444106090373281, + "grad_norm": 0.576805591583252, + "learning_rate": 2.66619968169572e-06, + "loss": 0.5148, + "step": 9800 + }, + { + "epoch": 1.444253438113949, + "grad_norm": 0.5839876532554626, + "learning_rate": 2.6658128395965298e-06, + "loss": 0.5207, + "step": 9801 + }, + { + "epoch": 1.4444007858546168, + "grad_norm": 0.5980414152145386, + "learning_rate": 2.66542599350962e-06, + "loss": 0.5409, + "step": 9802 + }, + { + "epoch": 1.4445481335952848, + "grad_norm": 0.5723055601119995, + "learning_rate": 2.6650391434442918e-06, + "loss": 0.5136, + "step": 9803 + }, + { + "epoch": 1.4446954813359527, + "grad_norm": 0.5973418951034546, + "learning_rate": 2.6646522894098497e-06, + "loss": 0.5269, + "step": 9804 + }, + { + "epoch": 1.4448428290766209, + "grad_norm": 0.6041827201843262, + "learning_rate": 2.6642654314155973e-06, + "loss": 0.5498, + "step": 9805 + }, + { + "epoch": 1.4449901768172888, + "grad_norm": 0.604426920413971, + "learning_rate": 2.6638785694708386e-06, + "loss": 0.5337, + "step": 9806 + }, + { + "epoch": 1.4451375245579567, + "grad_norm": 0.5866217613220215, + "learning_rate": 2.6634917035848767e-06, + "loss": 0.538, + "step": 9807 + }, + { + "epoch": 1.4452848722986247, + "grad_norm": 0.6608548760414124, + "learning_rate": 2.6631048337670163e-06, + "loss": 0.5433, + "step": 9808 + }, + { + "epoch": 1.4454322200392928, + "grad_norm": 0.6046935319900513, + "learning_rate": 2.662717960026562e-06, + "loss": 0.5478, + "step": 9809 + }, + { + "epoch": 1.4455795677799608, + "grad_norm": 0.6024928689002991, + "learning_rate": 2.662331082372816e-06, + "loss": 0.5501, + "step": 9810 + }, + { + "epoch": 1.4457269155206287, + "grad_norm": 0.6227796673774719, + "learning_rate": 2.661944200815084e-06, + "loss": 0.53, + "step": 9811 + }, + { + "epoch": 1.4458742632612966, + "grad_norm": 0.6106438040733337, + "learning_rate": 2.6615573153626694e-06, + "loss": 0.5102, + "step": 9812 + }, + { + "epoch": 1.4460216110019646, + "grad_norm": 0.5695505738258362, + "learning_rate": 2.661170426024878e-06, + "loss": 0.5547, + "step": 9813 + }, + { + "epoch": 1.4461689587426325, + "grad_norm": 0.6092890501022339, + "learning_rate": 2.660783532811013e-06, + "loss": 0.5436, + "step": 9814 + }, + { + "epoch": 1.4463163064833005, + "grad_norm": 0.5804924368858337, + "learning_rate": 2.6603966357303785e-06, + "loss": 0.5329, + "step": 9815 + }, + { + "epoch": 1.4464636542239686, + "grad_norm": 0.5745814442634583, + "learning_rate": 2.660009734792281e-06, + "loss": 0.5349, + "step": 9816 + }, + { + "epoch": 1.4466110019646365, + "grad_norm": 0.6082786321640015, + "learning_rate": 2.659622830006024e-06, + "loss": 0.5212, + "step": 9817 + }, + { + "epoch": 1.4467583497053045, + "grad_norm": 0.6182600259780884, + "learning_rate": 2.6592359213809133e-06, + "loss": 0.5262, + "step": 9818 + }, + { + "epoch": 1.4469056974459724, + "grad_norm": 0.5897960662841797, + "learning_rate": 2.6588490089262536e-06, + "loss": 0.5354, + "step": 9819 + }, + { + "epoch": 1.4470530451866406, + "grad_norm": 0.6251097321510315, + "learning_rate": 2.6584620926513495e-06, + "loss": 0.5236, + "step": 9820 + }, + { + "epoch": 1.4472003929273085, + "grad_norm": 0.5949124693870544, + "learning_rate": 2.6580751725655065e-06, + "loss": 0.5517, + "step": 9821 + }, + { + "epoch": 1.4473477406679764, + "grad_norm": 0.6332907676696777, + "learning_rate": 2.6576882486780292e-06, + "loss": 0.4976, + "step": 9822 + }, + { + "epoch": 1.4474950884086444, + "grad_norm": 0.5415046811103821, + "learning_rate": 2.657301320998224e-06, + "loss": 0.5319, + "step": 9823 + }, + { + "epoch": 1.4476424361493123, + "grad_norm": 0.6027477383613586, + "learning_rate": 2.6569143895353955e-06, + "loss": 0.5422, + "step": 9824 + }, + { + "epoch": 1.4477897838899803, + "grad_norm": 0.6007509827613831, + "learning_rate": 2.6565274542988495e-06, + "loss": 0.5001, + "step": 9825 + }, + { + "epoch": 1.4479371316306484, + "grad_norm": 0.5905018448829651, + "learning_rate": 2.656140515297892e-06, + "loss": 0.528, + "step": 9826 + }, + { + "epoch": 1.4480844793713163, + "grad_norm": 0.6036232113838196, + "learning_rate": 2.655753572541828e-06, + "loss": 0.4933, + "step": 9827 + }, + { + "epoch": 1.4482318271119843, + "grad_norm": 0.6064690947532654, + "learning_rate": 2.655366626039964e-06, + "loss": 0.5489, + "step": 9828 + }, + { + "epoch": 1.4483791748526522, + "grad_norm": 0.6089418530464172, + "learning_rate": 2.6549796758016054e-06, + "loss": 0.5075, + "step": 9829 + }, + { + "epoch": 1.4485265225933202, + "grad_norm": 0.6079117059707642, + "learning_rate": 2.6545927218360585e-06, + "loss": 0.5594, + "step": 9830 + }, + { + "epoch": 1.4486738703339883, + "grad_norm": 0.6213726997375488, + "learning_rate": 2.6542057641526286e-06, + "loss": 0.5231, + "step": 9831 + }, + { + "epoch": 1.4488212180746562, + "grad_norm": 0.5561091303825378, + "learning_rate": 2.6538188027606226e-06, + "loss": 0.5554, + "step": 9832 + }, + { + "epoch": 1.4489685658153242, + "grad_norm": 0.5799715518951416, + "learning_rate": 2.6534318376693467e-06, + "loss": 0.574, + "step": 9833 + }, + { + "epoch": 1.4491159135559921, + "grad_norm": 0.5843555927276611, + "learning_rate": 2.6530448688881072e-06, + "loss": 0.5148, + "step": 9834 + }, + { + "epoch": 1.44926326129666, + "grad_norm": 0.5849975347518921, + "learning_rate": 2.6526578964262102e-06, + "loss": 0.559, + "step": 9835 + }, + { + "epoch": 1.449410609037328, + "grad_norm": 0.5900014638900757, + "learning_rate": 2.652270920292962e-06, + "loss": 0.5041, + "step": 9836 + }, + { + "epoch": 1.4495579567779961, + "grad_norm": 0.5815402269363403, + "learning_rate": 2.6518839404976697e-06, + "loss": 0.5236, + "step": 9837 + }, + { + "epoch": 1.449705304518664, + "grad_norm": 0.6161077618598938, + "learning_rate": 2.6514969570496406e-06, + "loss": 0.5731, + "step": 9838 + }, + { + "epoch": 1.449852652259332, + "grad_norm": 0.6003743410110474, + "learning_rate": 2.6511099699581797e-06, + "loss": 0.5392, + "step": 9839 + }, + { + "epoch": 1.45, + "grad_norm": 0.5855796337127686, + "learning_rate": 2.650722979232596e-06, + "loss": 0.5462, + "step": 9840 + }, + { + "epoch": 1.4501473477406681, + "grad_norm": 0.5890954732894897, + "learning_rate": 2.6503359848821945e-06, + "loss": 0.5102, + "step": 9841 + }, + { + "epoch": 1.450294695481336, + "grad_norm": 0.601511538028717, + "learning_rate": 2.6499489869162837e-06, + "loss": 0.5554, + "step": 9842 + }, + { + "epoch": 1.450442043222004, + "grad_norm": 0.6020495295524597, + "learning_rate": 2.6495619853441695e-06, + "loss": 0.5191, + "step": 9843 + }, + { + "epoch": 1.450589390962672, + "grad_norm": 0.5871845483779907, + "learning_rate": 2.64917498017516e-06, + "loss": 0.5359, + "step": 9844 + }, + { + "epoch": 1.4507367387033399, + "grad_norm": 0.6026452779769897, + "learning_rate": 2.6487879714185622e-06, + "loss": 0.4742, + "step": 9845 + }, + { + "epoch": 1.4508840864440078, + "grad_norm": 0.6103945970535278, + "learning_rate": 2.6484009590836836e-06, + "loss": 0.5231, + "step": 9846 + }, + { + "epoch": 1.4510314341846757, + "grad_norm": 0.558080792427063, + "learning_rate": 2.648013943179831e-06, + "loss": 0.5287, + "step": 9847 + }, + { + "epoch": 1.4511787819253439, + "grad_norm": 0.6031992435455322, + "learning_rate": 2.647626923716313e-06, + "loss": 0.5405, + "step": 9848 + }, + { + "epoch": 1.4513261296660118, + "grad_norm": 0.5647971034049988, + "learning_rate": 2.647239900702437e-06, + "loss": 0.556, + "step": 9849 + }, + { + "epoch": 1.4514734774066798, + "grad_norm": 0.6248372197151184, + "learning_rate": 2.64685287414751e-06, + "loss": 0.5149, + "step": 9850 + }, + { + "epoch": 1.4516208251473477, + "grad_norm": 0.6469186544418335, + "learning_rate": 2.64646584406084e-06, + "loss": 0.5583, + "step": 9851 + }, + { + "epoch": 1.4517681728880159, + "grad_norm": 0.6374300122261047, + "learning_rate": 2.6460788104517353e-06, + "loss": 0.5169, + "step": 9852 + }, + { + "epoch": 1.4519155206286838, + "grad_norm": 0.5713105797767639, + "learning_rate": 2.645691773329504e-06, + "loss": 0.5476, + "step": 9853 + }, + { + "epoch": 1.4520628683693517, + "grad_norm": 0.6026155948638916, + "learning_rate": 2.6453047327034538e-06, + "loss": 0.5529, + "step": 9854 + }, + { + "epoch": 1.4522102161100197, + "grad_norm": 0.6054568290710449, + "learning_rate": 2.6449176885828926e-06, + "loss": 0.5318, + "step": 9855 + }, + { + "epoch": 1.4523575638506876, + "grad_norm": 0.5980650186538696, + "learning_rate": 2.644530640977129e-06, + "loss": 0.5288, + "step": 9856 + }, + { + "epoch": 1.4525049115913555, + "grad_norm": 0.5892629623413086, + "learning_rate": 2.644143589895472e-06, + "loss": 0.5593, + "step": 9857 + }, + { + "epoch": 1.4526522593320235, + "grad_norm": 0.6045873761177063, + "learning_rate": 2.643756535347229e-06, + "loss": 0.5256, + "step": 9858 + }, + { + "epoch": 1.4527996070726916, + "grad_norm": 0.5535716414451599, + "learning_rate": 2.643369477341708e-06, + "loss": 0.5409, + "step": 9859 + }, + { + "epoch": 1.4529469548133596, + "grad_norm": 0.5843128561973572, + "learning_rate": 2.642982415888219e-06, + "loss": 0.5496, + "step": 9860 + }, + { + "epoch": 1.4530943025540275, + "grad_norm": 0.5803544521331787, + "learning_rate": 2.64259535099607e-06, + "loss": 0.5213, + "step": 9861 + }, + { + "epoch": 1.4532416502946954, + "grad_norm": 0.5970826148986816, + "learning_rate": 2.6422082826745695e-06, + "loss": 0.5348, + "step": 9862 + }, + { + "epoch": 1.4533889980353636, + "grad_norm": 0.6021209359169006, + "learning_rate": 2.641821210933026e-06, + "loss": 0.5115, + "step": 9863 + }, + { + "epoch": 1.4535363457760315, + "grad_norm": 0.5865763425827026, + "learning_rate": 2.6414341357807497e-06, + "loss": 0.532, + "step": 9864 + }, + { + "epoch": 1.4536836935166995, + "grad_norm": 0.648102879524231, + "learning_rate": 2.6410470572270486e-06, + "loss": 0.5345, + "step": 9865 + }, + { + "epoch": 1.4538310412573674, + "grad_norm": 0.6077001690864563, + "learning_rate": 2.6406599752812316e-06, + "loss": 0.5454, + "step": 9866 + }, + { + "epoch": 1.4539783889980353, + "grad_norm": 0.5586757659912109, + "learning_rate": 2.640272889952608e-06, + "loss": 0.5261, + "step": 9867 + }, + { + "epoch": 1.4541257367387033, + "grad_norm": 0.6119193434715271, + "learning_rate": 2.639885801250488e-06, + "loss": 0.5456, + "step": 9868 + }, + { + "epoch": 1.4542730844793712, + "grad_norm": 0.5802199840545654, + "learning_rate": 2.6394987091841794e-06, + "loss": 0.5269, + "step": 9869 + }, + { + "epoch": 1.4544204322200394, + "grad_norm": 0.5878089666366577, + "learning_rate": 2.639111613762992e-06, + "loss": 0.5101, + "step": 9870 + }, + { + "epoch": 1.4545677799607073, + "grad_norm": 0.588291347026825, + "learning_rate": 2.638724514996236e-06, + "loss": 0.5423, + "step": 9871 + }, + { + "epoch": 1.4547151277013752, + "grad_norm": 0.5849677920341492, + "learning_rate": 2.6383374128932204e-06, + "loss": 0.5521, + "step": 9872 + }, + { + "epoch": 1.4548624754420432, + "grad_norm": 0.6072477698326111, + "learning_rate": 2.6379503074632553e-06, + "loss": 0.5337, + "step": 9873 + }, + { + "epoch": 1.4550098231827113, + "grad_norm": 0.5766562223434448, + "learning_rate": 2.6375631987156497e-06, + "loss": 0.4749, + "step": 9874 + }, + { + "epoch": 1.4551571709233793, + "grad_norm": 0.6235811710357666, + "learning_rate": 2.637176086659714e-06, + "loss": 0.5484, + "step": 9875 + }, + { + "epoch": 1.4553045186640472, + "grad_norm": 0.5837340950965881, + "learning_rate": 2.636788971304758e-06, + "loss": 0.5444, + "step": 9876 + }, + { + "epoch": 1.4554518664047151, + "grad_norm": 0.5813325643539429, + "learning_rate": 2.6364018526600905e-06, + "loss": 0.5508, + "step": 9877 + }, + { + "epoch": 1.455599214145383, + "grad_norm": 0.5928691029548645, + "learning_rate": 2.636014730735023e-06, + "loss": 0.5489, + "step": 9878 + }, + { + "epoch": 1.455746561886051, + "grad_norm": 1.049030065536499, + "learning_rate": 2.6356276055388645e-06, + "loss": 0.4979, + "step": 9879 + }, + { + "epoch": 1.455893909626719, + "grad_norm": 0.6039294600486755, + "learning_rate": 2.6352404770809263e-06, + "loss": 0.5303, + "step": 9880 + }, + { + "epoch": 1.456041257367387, + "grad_norm": 0.5552292466163635, + "learning_rate": 2.6348533453705176e-06, + "loss": 0.4878, + "step": 9881 + }, + { + "epoch": 1.456188605108055, + "grad_norm": 0.5900421142578125, + "learning_rate": 2.6344662104169494e-06, + "loss": 0.4909, + "step": 9882 + }, + { + "epoch": 1.456335952848723, + "grad_norm": 0.6260613799095154, + "learning_rate": 2.634079072229532e-06, + "loss": 0.5355, + "step": 9883 + }, + { + "epoch": 1.456483300589391, + "grad_norm": 0.6030600070953369, + "learning_rate": 2.6336919308175757e-06, + "loss": 0.5273, + "step": 9884 + }, + { + "epoch": 1.456630648330059, + "grad_norm": 0.6174973845481873, + "learning_rate": 2.633304786190391e-06, + "loss": 0.537, + "step": 9885 + }, + { + "epoch": 1.456777996070727, + "grad_norm": 0.5545453429222107, + "learning_rate": 2.632917638357289e-06, + "loss": 0.5394, + "step": 9886 + }, + { + "epoch": 1.456925343811395, + "grad_norm": 0.6350937485694885, + "learning_rate": 2.6325304873275798e-06, + "loss": 0.5224, + "step": 9887 + }, + { + "epoch": 1.4570726915520629, + "grad_norm": 0.6390053033828735, + "learning_rate": 2.632143333110575e-06, + "loss": 0.5562, + "step": 9888 + }, + { + "epoch": 1.4572200392927308, + "grad_norm": 0.6283150911331177, + "learning_rate": 2.631756175715584e-06, + "loss": 0.5537, + "step": 9889 + }, + { + "epoch": 1.4573673870333987, + "grad_norm": 0.6175167560577393, + "learning_rate": 2.6313690151519194e-06, + "loss": 0.5545, + "step": 9890 + }, + { + "epoch": 1.4575147347740667, + "grad_norm": 0.6084418296813965, + "learning_rate": 2.630981851428892e-06, + "loss": 0.5109, + "step": 9891 + }, + { + "epoch": 1.4576620825147348, + "grad_norm": 0.5583937168121338, + "learning_rate": 2.630594684555812e-06, + "loss": 0.493, + "step": 9892 + }, + { + "epoch": 1.4578094302554028, + "grad_norm": 0.5830779075622559, + "learning_rate": 2.6302075145419916e-06, + "loss": 0.5307, + "step": 9893 + }, + { + "epoch": 1.4579567779960707, + "grad_norm": 0.5984534025192261, + "learning_rate": 2.629820341396741e-06, + "loss": 0.5371, + "step": 9894 + }, + { + "epoch": 1.4581041257367386, + "grad_norm": 0.5933192372322083, + "learning_rate": 2.6294331651293725e-06, + "loss": 0.5743, + "step": 9895 + }, + { + "epoch": 1.4582514734774068, + "grad_norm": 0.5770457983016968, + "learning_rate": 2.6290459857491972e-06, + "loss": 0.5415, + "step": 9896 + }, + { + "epoch": 1.4583988212180747, + "grad_norm": 0.626039445400238, + "learning_rate": 2.6286588032655263e-06, + "loss": 0.5447, + "step": 9897 + }, + { + "epoch": 1.4585461689587427, + "grad_norm": 0.5891436338424683, + "learning_rate": 2.628271617687671e-06, + "loss": 0.5465, + "step": 9898 + }, + { + "epoch": 1.4586935166994106, + "grad_norm": 0.6091305613517761, + "learning_rate": 2.627884429024944e-06, + "loss": 0.526, + "step": 9899 + }, + { + "epoch": 1.4588408644400785, + "grad_norm": 0.5445797443389893, + "learning_rate": 2.627497237286657e-06, + "loss": 0.5285, + "step": 9900 + }, + { + "epoch": 1.4589882121807465, + "grad_norm": 0.6189185976982117, + "learning_rate": 2.6271100424821206e-06, + "loss": 0.5187, + "step": 9901 + }, + { + "epoch": 1.4591355599214144, + "grad_norm": 0.5987495183944702, + "learning_rate": 2.6267228446206473e-06, + "loss": 0.5276, + "step": 9902 + }, + { + "epoch": 1.4592829076620826, + "grad_norm": 0.5725798606872559, + "learning_rate": 2.6263356437115487e-06, + "loss": 0.5557, + "step": 9903 + }, + { + "epoch": 1.4594302554027505, + "grad_norm": 0.5853303074836731, + "learning_rate": 2.625948439764138e-06, + "loss": 0.5227, + "step": 9904 + }, + { + "epoch": 1.4595776031434184, + "grad_norm": 0.6072374582290649, + "learning_rate": 2.6255612327877257e-06, + "loss": 0.5396, + "step": 9905 + }, + { + "epoch": 1.4597249508840864, + "grad_norm": 0.5768663287162781, + "learning_rate": 2.6251740227916246e-06, + "loss": 0.4989, + "step": 9906 + }, + { + "epoch": 1.4598722986247545, + "grad_norm": 0.596519410610199, + "learning_rate": 2.624786809785147e-06, + "loss": 0.5146, + "step": 9907 + }, + { + "epoch": 1.4600196463654225, + "grad_norm": 0.6046650409698486, + "learning_rate": 2.624399593777605e-06, + "loss": 0.541, + "step": 9908 + }, + { + "epoch": 1.4601669941060904, + "grad_norm": 0.5707663297653198, + "learning_rate": 2.6240123747783115e-06, + "loss": 0.5164, + "step": 9909 + }, + { + "epoch": 1.4603143418467583, + "grad_norm": 0.6081482172012329, + "learning_rate": 2.6236251527965785e-06, + "loss": 0.5472, + "step": 9910 + }, + { + "epoch": 1.4604616895874263, + "grad_norm": 0.609214723110199, + "learning_rate": 2.6232379278417184e-06, + "loss": 0.5241, + "step": 9911 + }, + { + "epoch": 1.4606090373280942, + "grad_norm": 0.5973914861679077, + "learning_rate": 2.622850699923044e-06, + "loss": 0.5375, + "step": 9912 + }, + { + "epoch": 1.4607563850687622, + "grad_norm": 0.6349538564682007, + "learning_rate": 2.622463469049868e-06, + "loss": 0.5254, + "step": 9913 + }, + { + "epoch": 1.4609037328094303, + "grad_norm": 0.591354250907898, + "learning_rate": 2.622076235231503e-06, + "loss": 0.5396, + "step": 9914 + }, + { + "epoch": 1.4610510805500982, + "grad_norm": 0.5974034070968628, + "learning_rate": 2.621688998477262e-06, + "loss": 0.5141, + "step": 9915 + }, + { + "epoch": 1.4611984282907662, + "grad_norm": 0.5794710516929626, + "learning_rate": 2.6213017587964575e-06, + "loss": 0.5104, + "step": 9916 + }, + { + "epoch": 1.4613457760314341, + "grad_norm": 0.5901190042495728, + "learning_rate": 2.620914516198402e-06, + "loss": 0.5454, + "step": 9917 + }, + { + "epoch": 1.4614931237721023, + "grad_norm": 0.626093327999115, + "learning_rate": 2.6205272706924102e-06, + "loss": 0.5378, + "step": 9918 + }, + { + "epoch": 1.4616404715127702, + "grad_norm": 0.6106665134429932, + "learning_rate": 2.6201400222877932e-06, + "loss": 0.5283, + "step": 9919 + }, + { + "epoch": 1.4617878192534381, + "grad_norm": 0.5919543504714966, + "learning_rate": 2.619752770993865e-06, + "loss": 0.5051, + "step": 9920 + }, + { + "epoch": 1.461935166994106, + "grad_norm": 0.6056196093559265, + "learning_rate": 2.6193655168199395e-06, + "loss": 0.5345, + "step": 9921 + }, + { + "epoch": 1.462082514734774, + "grad_norm": 0.6011831760406494, + "learning_rate": 2.618978259775329e-06, + "loss": 0.5443, + "step": 9922 + }, + { + "epoch": 1.462229862475442, + "grad_norm": 0.6045325994491577, + "learning_rate": 2.618590999869347e-06, + "loss": 0.5424, + "step": 9923 + }, + { + "epoch": 1.46237721021611, + "grad_norm": 0.6268953680992126, + "learning_rate": 2.618203737111308e-06, + "loss": 0.5435, + "step": 9924 + }, + { + "epoch": 1.462524557956778, + "grad_norm": 0.5844424962997437, + "learning_rate": 2.617816471510523e-06, + "loss": 0.5124, + "step": 9925 + }, + { + "epoch": 1.462671905697446, + "grad_norm": 0.6022363305091858, + "learning_rate": 2.617429203076308e-06, + "loss": 0.5189, + "step": 9926 + }, + { + "epoch": 1.462819253438114, + "grad_norm": 0.5652041435241699, + "learning_rate": 2.6170419318179753e-06, + "loss": 0.5322, + "step": 9927 + }, + { + "epoch": 1.4629666011787819, + "grad_norm": 0.5852246284484863, + "learning_rate": 2.6166546577448393e-06, + "loss": 0.5203, + "step": 9928 + }, + { + "epoch": 1.46311394891945, + "grad_norm": 0.6315856575965881, + "learning_rate": 2.6162673808662137e-06, + "loss": 0.567, + "step": 9929 + }, + { + "epoch": 1.463261296660118, + "grad_norm": 0.608891487121582, + "learning_rate": 2.6158801011914116e-06, + "loss": 0.5373, + "step": 9930 + }, + { + "epoch": 1.4634086444007859, + "grad_norm": 0.5887822508811951, + "learning_rate": 2.615492818729748e-06, + "loss": 0.5165, + "step": 9931 + }, + { + "epoch": 1.4635559921414538, + "grad_norm": 0.572455108165741, + "learning_rate": 2.6151055334905356e-06, + "loss": 0.5344, + "step": 9932 + }, + { + "epoch": 1.4637033398821218, + "grad_norm": 0.5913503766059875, + "learning_rate": 2.61471824548309e-06, + "loss": 0.5452, + "step": 9933 + }, + { + "epoch": 1.4638506876227897, + "grad_norm": 0.6317070126533508, + "learning_rate": 2.614330954716724e-06, + "loss": 0.5392, + "step": 9934 + }, + { + "epoch": 1.4639980353634576, + "grad_norm": 0.5990109443664551, + "learning_rate": 2.613943661200752e-06, + "loss": 0.5449, + "step": 9935 + }, + { + "epoch": 1.4641453831041258, + "grad_norm": 0.5892694592475891, + "learning_rate": 2.6135563649444883e-06, + "loss": 0.5284, + "step": 9936 + }, + { + "epoch": 1.4642927308447937, + "grad_norm": 0.5950057506561279, + "learning_rate": 2.6131690659572478e-06, + "loss": 0.5272, + "step": 9937 + }, + { + "epoch": 1.4644400785854617, + "grad_norm": 0.5876561999320984, + "learning_rate": 2.612781764248344e-06, + "loss": 0.5144, + "step": 9938 + }, + { + "epoch": 1.4645874263261296, + "grad_norm": 0.574324905872345, + "learning_rate": 2.6123944598270918e-06, + "loss": 0.4958, + "step": 9939 + }, + { + "epoch": 1.4647347740667978, + "grad_norm": 0.6349040865898132, + "learning_rate": 2.612007152702805e-06, + "loss": 0.5194, + "step": 9940 + }, + { + "epoch": 1.4648821218074657, + "grad_norm": 0.5906997919082642, + "learning_rate": 2.6116198428847995e-06, + "loss": 0.5696, + "step": 9941 + }, + { + "epoch": 1.4650294695481336, + "grad_norm": 0.6295965909957886, + "learning_rate": 2.611232530382389e-06, + "loss": 0.5435, + "step": 9942 + }, + { + "epoch": 1.4651768172888016, + "grad_norm": 0.6153285503387451, + "learning_rate": 2.610845215204888e-06, + "loss": 0.5253, + "step": 9943 + }, + { + "epoch": 1.4653241650294695, + "grad_norm": 0.5828400254249573, + "learning_rate": 2.6104578973616114e-06, + "loss": 0.5136, + "step": 9944 + }, + { + "epoch": 1.4654715127701374, + "grad_norm": 0.5891258716583252, + "learning_rate": 2.6100705768618744e-06, + "loss": 0.5565, + "step": 9945 + }, + { + "epoch": 1.4656188605108054, + "grad_norm": 0.6146863698959351, + "learning_rate": 2.609683253714992e-06, + "loss": 0.5483, + "step": 9946 + }, + { + "epoch": 1.4657662082514735, + "grad_norm": 0.6208190321922302, + "learning_rate": 2.6092959279302784e-06, + "loss": 0.5237, + "step": 9947 + }, + { + "epoch": 1.4659135559921415, + "grad_norm": 0.5941185355186462, + "learning_rate": 2.608908599517049e-06, + "loss": 0.5434, + "step": 9948 + }, + { + "epoch": 1.4660609037328094, + "grad_norm": 0.6153148412704468, + "learning_rate": 2.608521268484619e-06, + "loss": 0.5271, + "step": 9949 + }, + { + "epoch": 1.4662082514734773, + "grad_norm": 0.6015383005142212, + "learning_rate": 2.6081339348423037e-06, + "loss": 0.5051, + "step": 9950 + }, + { + "epoch": 1.4663555992141455, + "grad_norm": 0.6236249804496765, + "learning_rate": 2.607746598599417e-06, + "loss": 0.5349, + "step": 9951 + }, + { + "epoch": 1.4665029469548134, + "grad_norm": 0.6245143413543701, + "learning_rate": 2.607359259765276e-06, + "loss": 0.5315, + "step": 9952 + }, + { + "epoch": 1.4666502946954814, + "grad_norm": 0.6336596608161926, + "learning_rate": 2.606971918349195e-06, + "loss": 0.5405, + "step": 9953 + }, + { + "epoch": 1.4667976424361493, + "grad_norm": 0.617985188961029, + "learning_rate": 2.6065845743604895e-06, + "loss": 0.5126, + "step": 9954 + }, + { + "epoch": 1.4669449901768172, + "grad_norm": 0.6423707008361816, + "learning_rate": 2.6061972278084747e-06, + "loss": 0.5555, + "step": 9955 + }, + { + "epoch": 1.4670923379174852, + "grad_norm": 0.5593560338020325, + "learning_rate": 2.6058098787024666e-06, + "loss": 0.5009, + "step": 9956 + }, + { + "epoch": 1.467239685658153, + "grad_norm": 0.5895771980285645, + "learning_rate": 2.605422527051781e-06, + "loss": 0.5071, + "step": 9957 + }, + { + "epoch": 1.4673870333988213, + "grad_norm": 0.6016409993171692, + "learning_rate": 2.6050351728657326e-06, + "loss": 0.5467, + "step": 9958 + }, + { + "epoch": 1.4675343811394892, + "grad_norm": 0.5913192629814148, + "learning_rate": 2.6046478161536376e-06, + "loss": 0.5074, + "step": 9959 + }, + { + "epoch": 1.4676817288801571, + "grad_norm": 0.6050670742988586, + "learning_rate": 2.6042604569248112e-06, + "loss": 0.5526, + "step": 9960 + }, + { + "epoch": 1.467829076620825, + "grad_norm": 0.6069830656051636, + "learning_rate": 2.6038730951885704e-06, + "loss": 0.5628, + "step": 9961 + }, + { + "epoch": 1.4679764243614932, + "grad_norm": 0.6136523485183716, + "learning_rate": 2.60348573095423e-06, + "loss": 0.501, + "step": 9962 + }, + { + "epoch": 1.4681237721021612, + "grad_norm": 0.6104743480682373, + "learning_rate": 2.6030983642311063e-06, + "loss": 0.5229, + "step": 9963 + }, + { + "epoch": 1.468271119842829, + "grad_norm": 0.5980548858642578, + "learning_rate": 2.6027109950285156e-06, + "loss": 0.5417, + "step": 9964 + }, + { + "epoch": 1.468418467583497, + "grad_norm": 0.5875688195228577, + "learning_rate": 2.6023236233557736e-06, + "loss": 0.5558, + "step": 9965 + }, + { + "epoch": 1.468565815324165, + "grad_norm": 0.6151111721992493, + "learning_rate": 2.6019362492221968e-06, + "loss": 0.5217, + "step": 9966 + }, + { + "epoch": 1.468713163064833, + "grad_norm": 0.5682883858680725, + "learning_rate": 2.6015488726371007e-06, + "loss": 0.5351, + "step": 9967 + }, + { + "epoch": 1.468860510805501, + "grad_norm": 0.5931665301322937, + "learning_rate": 2.6011614936098016e-06, + "loss": 0.5556, + "step": 9968 + }, + { + "epoch": 1.469007858546169, + "grad_norm": 0.6644761562347412, + "learning_rate": 2.600774112149616e-06, + "loss": 0.4977, + "step": 9969 + }, + { + "epoch": 1.469155206286837, + "grad_norm": 0.6262829899787903, + "learning_rate": 2.6003867282658606e-06, + "loss": 0.5229, + "step": 9970 + }, + { + "epoch": 1.4693025540275049, + "grad_norm": 0.6095042824745178, + "learning_rate": 2.5999993419678513e-06, + "loss": 0.5441, + "step": 9971 + }, + { + "epoch": 1.4694499017681728, + "grad_norm": 0.6209861636161804, + "learning_rate": 2.599611953264905e-06, + "loss": 0.5452, + "step": 9972 + }, + { + "epoch": 1.469597249508841, + "grad_norm": 0.5965262055397034, + "learning_rate": 2.5992245621663378e-06, + "loss": 0.5411, + "step": 9973 + }, + { + "epoch": 1.469744597249509, + "grad_norm": 0.6038243770599365, + "learning_rate": 2.598837168681466e-06, + "loss": 0.558, + "step": 9974 + }, + { + "epoch": 1.4698919449901768, + "grad_norm": 0.6585659980773926, + "learning_rate": 2.5984497728196074e-06, + "loss": 0.5195, + "step": 9975 + }, + { + "epoch": 1.4700392927308448, + "grad_norm": 0.5875405073165894, + "learning_rate": 2.5980623745900773e-06, + "loss": 0.5415, + "step": 9976 + }, + { + "epoch": 1.4701866404715127, + "grad_norm": 0.6867613196372986, + "learning_rate": 2.597674974002193e-06, + "loss": 0.4993, + "step": 9977 + }, + { + "epoch": 1.4703339882121806, + "grad_norm": 0.6035363674163818, + "learning_rate": 2.597287571065272e-06, + "loss": 0.5441, + "step": 9978 + }, + { + "epoch": 1.4704813359528488, + "grad_norm": 0.6264950633049011, + "learning_rate": 2.59690016578863e-06, + "loss": 0.5555, + "step": 9979 + }, + { + "epoch": 1.4706286836935167, + "grad_norm": 0.5715134739875793, + "learning_rate": 2.5965127581815848e-06, + "loss": 0.4877, + "step": 9980 + }, + { + "epoch": 1.4707760314341847, + "grad_norm": 0.5780227780342102, + "learning_rate": 2.5961253482534522e-06, + "loss": 0.5154, + "step": 9981 + }, + { + "epoch": 1.4709233791748526, + "grad_norm": 0.6042702198028564, + "learning_rate": 2.5957379360135508e-06, + "loss": 0.5302, + "step": 9982 + }, + { + "epoch": 1.4710707269155208, + "grad_norm": 0.6045791506767273, + "learning_rate": 2.595350521471197e-06, + "loss": 0.5119, + "step": 9983 + }, + { + "epoch": 1.4712180746561887, + "grad_norm": 0.6366142630577087, + "learning_rate": 2.5949631046357077e-06, + "loss": 0.5583, + "step": 9984 + }, + { + "epoch": 1.4713654223968566, + "grad_norm": 0.5981544256210327, + "learning_rate": 2.5945756855164007e-06, + "loss": 0.5089, + "step": 9985 + }, + { + "epoch": 1.4715127701375246, + "grad_norm": 0.6127652525901794, + "learning_rate": 2.594188264122592e-06, + "loss": 0.4864, + "step": 9986 + }, + { + "epoch": 1.4716601178781925, + "grad_norm": 0.5793547630310059, + "learning_rate": 2.5938008404636007e-06, + "loss": 0.5377, + "step": 9987 + }, + { + "epoch": 1.4718074656188604, + "grad_norm": 0.5820690393447876, + "learning_rate": 2.593413414548743e-06, + "loss": 0.5294, + "step": 9988 + }, + { + "epoch": 1.4719548133595284, + "grad_norm": 0.6025925874710083, + "learning_rate": 2.593025986387336e-06, + "loss": 0.5201, + "step": 9989 + }, + { + "epoch": 1.4721021611001965, + "grad_norm": 0.60872882604599, + "learning_rate": 2.5926385559886973e-06, + "loss": 0.5376, + "step": 9990 + }, + { + "epoch": 1.4722495088408645, + "grad_norm": 0.619564414024353, + "learning_rate": 2.592251123362146e-06, + "loss": 0.5551, + "step": 9991 + }, + { + "epoch": 1.4723968565815324, + "grad_norm": 0.6202471852302551, + "learning_rate": 2.5918636885169977e-06, + "loss": 0.5535, + "step": 9992 + }, + { + "epoch": 1.4725442043222003, + "grad_norm": 0.5841695666313171, + "learning_rate": 2.5914762514625715e-06, + "loss": 0.5433, + "step": 9993 + }, + { + "epoch": 1.4726915520628685, + "grad_norm": 0.6435644030570984, + "learning_rate": 2.5910888122081835e-06, + "loss": 0.4953, + "step": 9994 + }, + { + "epoch": 1.4728388998035364, + "grad_norm": 0.558293342590332, + "learning_rate": 2.5907013707631523e-06, + "loss": 0.5299, + "step": 9995 + }, + { + "epoch": 1.4729862475442044, + "grad_norm": 0.6302747130393982, + "learning_rate": 2.590313927136796e-06, + "loss": 0.5379, + "step": 9996 + }, + { + "epoch": 1.4731335952848723, + "grad_norm": 0.5539103746414185, + "learning_rate": 2.589926481338432e-06, + "loss": 0.5217, + "step": 9997 + }, + { + "epoch": 1.4732809430255402, + "grad_norm": 0.6226930022239685, + "learning_rate": 2.5895390333773786e-06, + "loss": 0.4871, + "step": 9998 + }, + { + "epoch": 1.4734282907662082, + "grad_norm": 0.6194667220115662, + "learning_rate": 2.589151583262952e-06, + "loss": 0.5204, + "step": 9999 + }, + { + "epoch": 1.4735756385068761, + "grad_norm": 0.5744219422340393, + "learning_rate": 2.5887641310044733e-06, + "loss": 0.4931, + "step": 10000 + }, + { + "epoch": 1.4737229862475443, + "grad_norm": 0.5780558586120605, + "learning_rate": 2.5883766766112577e-06, + "loss": 0.5219, + "step": 10001 + }, + { + "epoch": 1.4738703339882122, + "grad_norm": 0.6071460247039795, + "learning_rate": 2.5879892200926255e-06, + "loss": 0.5181, + "step": 10002 + }, + { + "epoch": 1.4740176817288801, + "grad_norm": 0.6100936532020569, + "learning_rate": 2.5876017614578937e-06, + "loss": 0.5717, + "step": 10003 + }, + { + "epoch": 1.474165029469548, + "grad_norm": 0.6069823503494263, + "learning_rate": 2.5872143007163804e-06, + "loss": 0.5411, + "step": 10004 + }, + { + "epoch": 1.4743123772102162, + "grad_norm": 0.6238568425178528, + "learning_rate": 2.5868268378774035e-06, + "loss": 0.5226, + "step": 10005 + }, + { + "epoch": 1.4744597249508842, + "grad_norm": 0.6290948987007141, + "learning_rate": 2.586439372950283e-06, + "loss": 0.5252, + "step": 10006 + }, + { + "epoch": 1.4746070726915521, + "grad_norm": 0.6006831526756287, + "learning_rate": 2.586051905944335e-06, + "loss": 0.5532, + "step": 10007 + }, + { + "epoch": 1.47475442043222, + "grad_norm": 0.6118170619010925, + "learning_rate": 2.58566443686888e-06, + "loss": 0.5187, + "step": 10008 + }, + { + "epoch": 1.474901768172888, + "grad_norm": 0.5705695748329163, + "learning_rate": 2.5852769657332347e-06, + "loss": 0.5306, + "step": 10009 + }, + { + "epoch": 1.475049115913556, + "grad_norm": 0.6078392267227173, + "learning_rate": 2.5848894925467187e-06, + "loss": 0.535, + "step": 10010 + }, + { + "epoch": 1.4751964636542239, + "grad_norm": 0.5939925312995911, + "learning_rate": 2.584502017318651e-06, + "loss": 0.532, + "step": 10011 + }, + { + "epoch": 1.475343811394892, + "grad_norm": 0.613732099533081, + "learning_rate": 2.5841145400583485e-06, + "loss": 0.5579, + "step": 10012 + }, + { + "epoch": 1.47549115913556, + "grad_norm": 0.5972065925598145, + "learning_rate": 2.5837270607751307e-06, + "loss": 0.5187, + "step": 10013 + }, + { + "epoch": 1.4756385068762279, + "grad_norm": 0.5833370089530945, + "learning_rate": 2.5833395794783165e-06, + "loss": 0.5312, + "step": 10014 + }, + { + "epoch": 1.4757858546168958, + "grad_norm": 0.6040092706680298, + "learning_rate": 2.5829520961772255e-06, + "loss": 0.5249, + "step": 10015 + }, + { + "epoch": 1.475933202357564, + "grad_norm": 0.6326467394828796, + "learning_rate": 2.582564610881174e-06, + "loss": 0.5181, + "step": 10016 + }, + { + "epoch": 1.476080550098232, + "grad_norm": 0.6252024173736572, + "learning_rate": 2.582177123599483e-06, + "loss": 0.5425, + "step": 10017 + }, + { + "epoch": 1.4762278978388998, + "grad_norm": 0.5614728927612305, + "learning_rate": 2.58178963434147e-06, + "loss": 0.5242, + "step": 10018 + }, + { + "epoch": 1.4763752455795678, + "grad_norm": 0.6079885363578796, + "learning_rate": 2.5814021431164554e-06, + "loss": 0.5355, + "step": 10019 + }, + { + "epoch": 1.4765225933202357, + "grad_norm": 0.5666729807853699, + "learning_rate": 2.5810146499337573e-06, + "loss": 0.5179, + "step": 10020 + }, + { + "epoch": 1.4766699410609037, + "grad_norm": 0.5674146413803101, + "learning_rate": 2.580627154802695e-06, + "loss": 0.5235, + "step": 10021 + }, + { + "epoch": 1.4768172888015716, + "grad_norm": 0.5792256593704224, + "learning_rate": 2.5802396577325874e-06, + "loss": 0.4927, + "step": 10022 + }, + { + "epoch": 1.4769646365422398, + "grad_norm": 0.5754746794700623, + "learning_rate": 2.5798521587327533e-06, + "loss": 0.5189, + "step": 10023 + }, + { + "epoch": 1.4771119842829077, + "grad_norm": 0.6012855768203735, + "learning_rate": 2.5794646578125126e-06, + "loss": 0.5293, + "step": 10024 + }, + { + "epoch": 1.4772593320235756, + "grad_norm": 0.60455721616745, + "learning_rate": 2.5790771549811843e-06, + "loss": 0.5162, + "step": 10025 + }, + { + "epoch": 1.4774066797642436, + "grad_norm": 0.5657702088356018, + "learning_rate": 2.5786896502480874e-06, + "loss": 0.5499, + "step": 10026 + }, + { + "epoch": 1.4775540275049117, + "grad_norm": 0.587780773639679, + "learning_rate": 2.5783021436225415e-06, + "loss": 0.5386, + "step": 10027 + }, + { + "epoch": 1.4777013752455797, + "grad_norm": 0.5746779441833496, + "learning_rate": 2.577914635113865e-06, + "loss": 0.5121, + "step": 10028 + }, + { + "epoch": 1.4778487229862476, + "grad_norm": 0.5926362872123718, + "learning_rate": 2.5775271247313787e-06, + "loss": 0.5136, + "step": 10029 + }, + { + "epoch": 1.4779960707269155, + "grad_norm": 0.6004006862640381, + "learning_rate": 2.577139612484401e-06, + "loss": 0.5177, + "step": 10030 + }, + { + "epoch": 1.4781434184675835, + "grad_norm": 0.5714066624641418, + "learning_rate": 2.5767520983822526e-06, + "loss": 0.5457, + "step": 10031 + }, + { + "epoch": 1.4782907662082514, + "grad_norm": 0.5854382514953613, + "learning_rate": 2.5763645824342514e-06, + "loss": 0.5354, + "step": 10032 + }, + { + "epoch": 1.4784381139489193, + "grad_norm": 0.6055911779403687, + "learning_rate": 2.5759770646497185e-06, + "loss": 0.5588, + "step": 10033 + }, + { + "epoch": 1.4785854616895875, + "grad_norm": 0.5727078914642334, + "learning_rate": 2.575589545037972e-06, + "loss": 0.5453, + "step": 10034 + }, + { + "epoch": 1.4787328094302554, + "grad_norm": 0.6085164546966553, + "learning_rate": 2.575202023608333e-06, + "loss": 0.5088, + "step": 10035 + }, + { + "epoch": 1.4788801571709234, + "grad_norm": 0.5956229567527771, + "learning_rate": 2.5748145003701208e-06, + "loss": 0.54, + "step": 10036 + }, + { + "epoch": 1.4790275049115913, + "grad_norm": 0.599794864654541, + "learning_rate": 2.5744269753326544e-06, + "loss": 0.5222, + "step": 10037 + }, + { + "epoch": 1.4791748526522595, + "grad_norm": 0.5899403691291809, + "learning_rate": 2.5740394485052545e-06, + "loss": 0.5281, + "step": 10038 + }, + { + "epoch": 1.4793222003929274, + "grad_norm": 0.579741358757019, + "learning_rate": 2.57365191989724e-06, + "loss": 0.5282, + "step": 10039 + }, + { + "epoch": 1.4794695481335953, + "grad_norm": 0.6185555458068848, + "learning_rate": 2.573264389517932e-06, + "loss": 0.535, + "step": 10040 + }, + { + "epoch": 1.4796168958742633, + "grad_norm": 0.6111336350440979, + "learning_rate": 2.57287685737665e-06, + "loss": 0.5369, + "step": 10041 + }, + { + "epoch": 1.4797642436149312, + "grad_norm": 0.5992207527160645, + "learning_rate": 2.572489323482713e-06, + "loss": 0.5183, + "step": 10042 + }, + { + "epoch": 1.4799115913555991, + "grad_norm": 0.6428816914558411, + "learning_rate": 2.5721017878454424e-06, + "loss": 0.5406, + "step": 10043 + }, + { + "epoch": 1.480058939096267, + "grad_norm": 0.6048517823219299, + "learning_rate": 2.5717142504741573e-06, + "loss": 0.5349, + "step": 10044 + }, + { + "epoch": 1.4802062868369352, + "grad_norm": 0.6084042191505432, + "learning_rate": 2.571326711378178e-06, + "loss": 0.5188, + "step": 10045 + }, + { + "epoch": 1.4803536345776032, + "grad_norm": 0.5848962068557739, + "learning_rate": 2.5709391705668254e-06, + "loss": 0.4861, + "step": 10046 + }, + { + "epoch": 1.480500982318271, + "grad_norm": 0.6029049754142761, + "learning_rate": 2.570551628049418e-06, + "loss": 0.559, + "step": 10047 + }, + { + "epoch": 1.480648330058939, + "grad_norm": 0.6201281547546387, + "learning_rate": 2.5701640838352776e-06, + "loss": 0.5373, + "step": 10048 + }, + { + "epoch": 1.4807956777996072, + "grad_norm": 0.600612461566925, + "learning_rate": 2.569776537933724e-06, + "loss": 0.5316, + "step": 10049 + }, + { + "epoch": 1.4809430255402751, + "grad_norm": 0.5837006568908691, + "learning_rate": 2.5693889903540776e-06, + "loss": 0.5126, + "step": 10050 + }, + { + "epoch": 1.481090373280943, + "grad_norm": 0.6132205724716187, + "learning_rate": 2.569001441105658e-06, + "loss": 0.5162, + "step": 10051 + }, + { + "epoch": 1.481237721021611, + "grad_norm": 0.5858865976333618, + "learning_rate": 2.5686138901977865e-06, + "loss": 0.5542, + "step": 10052 + }, + { + "epoch": 1.481385068762279, + "grad_norm": 0.5971036553382874, + "learning_rate": 2.5682263376397826e-06, + "loss": 0.5717, + "step": 10053 + }, + { + "epoch": 1.4815324165029469, + "grad_norm": 0.6227021217346191, + "learning_rate": 2.5678387834409674e-06, + "loss": 0.5217, + "step": 10054 + }, + { + "epoch": 1.4816797642436148, + "grad_norm": 0.5711679458618164, + "learning_rate": 2.567451227610661e-06, + "loss": 0.5437, + "step": 10055 + }, + { + "epoch": 1.481827111984283, + "grad_norm": 0.5914393663406372, + "learning_rate": 2.5670636701581848e-06, + "loss": 0.5306, + "step": 10056 + }, + { + "epoch": 1.481974459724951, + "grad_norm": 0.5981053709983826, + "learning_rate": 2.5666761110928596e-06, + "loss": 0.5427, + "step": 10057 + }, + { + "epoch": 1.4821218074656188, + "grad_norm": 0.6171571612358093, + "learning_rate": 2.5662885504240036e-06, + "loss": 0.5125, + "step": 10058 + }, + { + "epoch": 1.4822691552062868, + "grad_norm": 0.586504340171814, + "learning_rate": 2.5659009881609406e-06, + "loss": 0.5606, + "step": 10059 + }, + { + "epoch": 1.482416502946955, + "grad_norm": 0.6025530099868774, + "learning_rate": 2.5655134243129882e-06, + "loss": 0.5541, + "step": 10060 + }, + { + "epoch": 1.4825638506876229, + "grad_norm": 0.6206779479980469, + "learning_rate": 2.5651258588894695e-06, + "loss": 0.5281, + "step": 10061 + }, + { + "epoch": 1.4827111984282908, + "grad_norm": 0.5724141597747803, + "learning_rate": 2.5647382918997043e-06, + "loss": 0.5457, + "step": 10062 + }, + { + "epoch": 1.4828585461689587, + "grad_norm": 0.5864885449409485, + "learning_rate": 2.5643507233530133e-06, + "loss": 0.5596, + "step": 10063 + }, + { + "epoch": 1.4830058939096267, + "grad_norm": 0.6063650250434875, + "learning_rate": 2.5639631532587177e-06, + "loss": 0.5089, + "step": 10064 + }, + { + "epoch": 1.4831532416502946, + "grad_norm": 0.5797774791717529, + "learning_rate": 2.5635755816261388e-06, + "loss": 0.5551, + "step": 10065 + }, + { + "epoch": 1.4833005893909625, + "grad_norm": 0.5981155633926392, + "learning_rate": 2.5631880084645967e-06, + "loss": 0.4998, + "step": 10066 + }, + { + "epoch": 1.4834479371316307, + "grad_norm": 0.6179545521736145, + "learning_rate": 2.5628004337834123e-06, + "loss": 0.5411, + "step": 10067 + }, + { + "epoch": 1.4835952848722986, + "grad_norm": 0.6068146824836731, + "learning_rate": 2.5624128575919072e-06, + "loss": 0.5375, + "step": 10068 + }, + { + "epoch": 1.4837426326129666, + "grad_norm": 0.606562077999115, + "learning_rate": 2.562025279899402e-06, + "loss": 0.5431, + "step": 10069 + }, + { + "epoch": 1.4838899803536345, + "grad_norm": 0.6154018044471741, + "learning_rate": 2.5616377007152182e-06, + "loss": 0.5683, + "step": 10070 + }, + { + "epoch": 1.4840373280943027, + "grad_norm": 0.5995402336120605, + "learning_rate": 2.5612501200486763e-06, + "loss": 0.5214, + "step": 10071 + }, + { + "epoch": 1.4841846758349706, + "grad_norm": 0.5599244236946106, + "learning_rate": 2.560862537909098e-06, + "loss": 0.5106, + "step": 10072 + }, + { + "epoch": 1.4843320235756385, + "grad_norm": 0.6351637244224548, + "learning_rate": 2.5604749543058033e-06, + "loss": 0.5167, + "step": 10073 + }, + { + "epoch": 1.4844793713163065, + "grad_norm": 0.6006361246109009, + "learning_rate": 2.5600873692481147e-06, + "loss": 0.5564, + "step": 10074 + }, + { + "epoch": 1.4846267190569744, + "grad_norm": 0.579949676990509, + "learning_rate": 2.5596997827453535e-06, + "loss": 0.5312, + "step": 10075 + }, + { + "epoch": 1.4847740667976423, + "grad_norm": 0.5758150219917297, + "learning_rate": 2.5593121948068406e-06, + "loss": 0.5412, + "step": 10076 + }, + { + "epoch": 1.4849214145383103, + "grad_norm": 0.6173452734947205, + "learning_rate": 2.558924605441897e-06, + "loss": 0.5033, + "step": 10077 + }, + { + "epoch": 1.4850687622789784, + "grad_norm": 0.5908030867576599, + "learning_rate": 2.558537014659844e-06, + "loss": 0.5157, + "step": 10078 + }, + { + "epoch": 1.4852161100196464, + "grad_norm": 0.58383709192276, + "learning_rate": 2.558149422470004e-06, + "loss": 0.5726, + "step": 10079 + }, + { + "epoch": 1.4853634577603143, + "grad_norm": 0.6121160387992859, + "learning_rate": 2.5577618288816973e-06, + "loss": 0.4985, + "step": 10080 + }, + { + "epoch": 1.4855108055009822, + "grad_norm": 0.6004834771156311, + "learning_rate": 2.557374233904246e-06, + "loss": 0.5465, + "step": 10081 + }, + { + "epoch": 1.4856581532416504, + "grad_norm": 0.6069691777229309, + "learning_rate": 2.556986637546971e-06, + "loss": 0.5568, + "step": 10082 + }, + { + "epoch": 1.4858055009823183, + "grad_norm": 0.6074449419975281, + "learning_rate": 2.556599039819194e-06, + "loss": 0.5403, + "step": 10083 + }, + { + "epoch": 1.4859528487229863, + "grad_norm": 0.5949353575706482, + "learning_rate": 2.556211440730237e-06, + "loss": 0.5407, + "step": 10084 + }, + { + "epoch": 1.4861001964636542, + "grad_norm": 0.6475723385810852, + "learning_rate": 2.555823840289421e-06, + "loss": 0.5162, + "step": 10085 + }, + { + "epoch": 1.4862475442043221, + "grad_norm": 0.5755516290664673, + "learning_rate": 2.555436238506067e-06, + "loss": 0.4729, + "step": 10086 + }, + { + "epoch": 1.48639489194499, + "grad_norm": 0.6287891268730164, + "learning_rate": 2.555048635389499e-06, + "loss": 0.5427, + "step": 10087 + }, + { + "epoch": 1.486542239685658, + "grad_norm": 0.6511856913566589, + "learning_rate": 2.5546610309490362e-06, + "loss": 0.5031, + "step": 10088 + }, + { + "epoch": 1.4866895874263262, + "grad_norm": 0.600928544998169, + "learning_rate": 2.554273425194001e-06, + "loss": 0.5342, + "step": 10089 + }, + { + "epoch": 1.4868369351669941, + "grad_norm": 0.5927289724349976, + "learning_rate": 2.5538858181337154e-06, + "loss": 0.528, + "step": 10090 + }, + { + "epoch": 1.486984282907662, + "grad_norm": 0.5730476379394531, + "learning_rate": 2.5534982097775012e-06, + "loss": 0.5495, + "step": 10091 + }, + { + "epoch": 1.48713163064833, + "grad_norm": 0.591219961643219, + "learning_rate": 2.5531106001346804e-06, + "loss": 0.5248, + "step": 10092 + }, + { + "epoch": 1.4872789783889981, + "grad_norm": 0.5749034881591797, + "learning_rate": 2.552722989214575e-06, + "loss": 0.5001, + "step": 10093 + }, + { + "epoch": 1.487426326129666, + "grad_norm": 0.6213936805725098, + "learning_rate": 2.5523353770265058e-06, + "loss": 0.5383, + "step": 10094 + }, + { + "epoch": 1.487573673870334, + "grad_norm": 0.600908100605011, + "learning_rate": 2.5519477635797956e-06, + "loss": 0.5318, + "step": 10095 + }, + { + "epoch": 1.487721021611002, + "grad_norm": 0.6069400906562805, + "learning_rate": 2.551560148883766e-06, + "loss": 0.5369, + "step": 10096 + }, + { + "epoch": 1.4878683693516699, + "grad_norm": 0.5784490704536438, + "learning_rate": 2.5511725329477394e-06, + "loss": 0.558, + "step": 10097 + }, + { + "epoch": 1.4880157170923378, + "grad_norm": 0.5908248424530029, + "learning_rate": 2.5507849157810366e-06, + "loss": 0.5366, + "step": 10098 + }, + { + "epoch": 1.4881630648330058, + "grad_norm": 0.574958860874176, + "learning_rate": 2.550397297392981e-06, + "loss": 0.5122, + "step": 10099 + }, + { + "epoch": 1.488310412573674, + "grad_norm": 0.6053071618080139, + "learning_rate": 2.550009677792894e-06, + "loss": 0.5529, + "step": 10100 + }, + { + "epoch": 1.4884577603143418, + "grad_norm": 0.6132107973098755, + "learning_rate": 2.5496220569900977e-06, + "loss": 0.5116, + "step": 10101 + }, + { + "epoch": 1.4886051080550098, + "grad_norm": 0.5951073169708252, + "learning_rate": 2.5492344349939134e-06, + "loss": 0.5357, + "step": 10102 + }, + { + "epoch": 1.4887524557956777, + "grad_norm": 0.6057087182998657, + "learning_rate": 2.548846811813665e-06, + "loss": 0.5535, + "step": 10103 + }, + { + "epoch": 1.4888998035363459, + "grad_norm": 0.5759057402610779, + "learning_rate": 2.5484591874586735e-06, + "loss": 0.5483, + "step": 10104 + }, + { + "epoch": 1.4890471512770138, + "grad_norm": 0.5740559697151184, + "learning_rate": 2.5480715619382612e-06, + "loss": 0.5212, + "step": 10105 + }, + { + "epoch": 1.4891944990176817, + "grad_norm": 0.6479734182357788, + "learning_rate": 2.5476839352617506e-06, + "loss": 0.5213, + "step": 10106 + }, + { + "epoch": 1.4893418467583497, + "grad_norm": 0.6037486791610718, + "learning_rate": 2.5472963074384636e-06, + "loss": 0.5105, + "step": 10107 + }, + { + "epoch": 1.4894891944990176, + "grad_norm": 0.5881115198135376, + "learning_rate": 2.5469086784777224e-06, + "loss": 0.5496, + "step": 10108 + }, + { + "epoch": 1.4896365422396856, + "grad_norm": 0.5985898971557617, + "learning_rate": 2.5465210483888497e-06, + "loss": 0.499, + "step": 10109 + }, + { + "epoch": 1.4897838899803537, + "grad_norm": 0.5970534682273865, + "learning_rate": 2.546133417181167e-06, + "loss": 0.5263, + "step": 10110 + }, + { + "epoch": 1.4899312377210217, + "grad_norm": 0.6005857586860657, + "learning_rate": 2.545745784863998e-06, + "loss": 0.5058, + "step": 10111 + }, + { + "epoch": 1.4900785854616896, + "grad_norm": 0.6735231876373291, + "learning_rate": 2.5453581514466635e-06, + "loss": 0.5581, + "step": 10112 + }, + { + "epoch": 1.4902259332023575, + "grad_norm": 0.5707589983940125, + "learning_rate": 2.544970516938487e-06, + "loss": 0.5421, + "step": 10113 + }, + { + "epoch": 1.4903732809430255, + "grad_norm": 0.5774886608123779, + "learning_rate": 2.544582881348791e-06, + "loss": 0.5278, + "step": 10114 + }, + { + "epoch": 1.4905206286836936, + "grad_norm": 0.5943716764450073, + "learning_rate": 2.544195244686898e-06, + "loss": 0.544, + "step": 10115 + }, + { + "epoch": 1.4906679764243616, + "grad_norm": 0.5878989696502686, + "learning_rate": 2.5438076069621294e-06, + "loss": 0.5267, + "step": 10116 + }, + { + "epoch": 1.4908153241650295, + "grad_norm": 0.6015142798423767, + "learning_rate": 2.5434199681838084e-06, + "loss": 0.5679, + "step": 10117 + }, + { + "epoch": 1.4909626719056974, + "grad_norm": 0.650373101234436, + "learning_rate": 2.543032328361258e-06, + "loss": 0.5244, + "step": 10118 + }, + { + "epoch": 1.4911100196463654, + "grad_norm": 0.5807651877403259, + "learning_rate": 2.5426446875037995e-06, + "loss": 0.5464, + "step": 10119 + }, + { + "epoch": 1.4912573673870333, + "grad_norm": 0.6165382266044617, + "learning_rate": 2.5422570456207575e-06, + "loss": 0.5175, + "step": 10120 + }, + { + "epoch": 1.4914047151277015, + "grad_norm": 0.5993167757987976, + "learning_rate": 2.5418694027214526e-06, + "loss": 0.5162, + "step": 10121 + }, + { + "epoch": 1.4915520628683694, + "grad_norm": 0.5882503390312195, + "learning_rate": 2.541481758815208e-06, + "loss": 0.5161, + "step": 10122 + }, + { + "epoch": 1.4916994106090373, + "grad_norm": 0.5880311131477356, + "learning_rate": 2.541094113911347e-06, + "loss": 0.5463, + "step": 10123 + }, + { + "epoch": 1.4918467583497053, + "grad_norm": 0.5815681219100952, + "learning_rate": 2.540706468019192e-06, + "loss": 0.5471, + "step": 10124 + }, + { + "epoch": 1.4919941060903734, + "grad_norm": 0.5963301062583923, + "learning_rate": 2.5403188211480644e-06, + "loss": 0.5382, + "step": 10125 + }, + { + "epoch": 1.4921414538310414, + "grad_norm": 0.6263893246650696, + "learning_rate": 2.539931173307289e-06, + "loss": 0.5173, + "step": 10126 + }, + { + "epoch": 1.4922888015717093, + "grad_norm": 0.5704562664031982, + "learning_rate": 2.5395435245061876e-06, + "loss": 0.553, + "step": 10127 + }, + { + "epoch": 1.4924361493123772, + "grad_norm": 0.60550856590271, + "learning_rate": 2.539155874754083e-06, + "loss": 0.5407, + "step": 10128 + }, + { + "epoch": 1.4925834970530452, + "grad_norm": 0.577599048614502, + "learning_rate": 2.538768224060298e-06, + "loss": 0.5355, + "step": 10129 + }, + { + "epoch": 1.492730844793713, + "grad_norm": 0.6213477849960327, + "learning_rate": 2.5383805724341547e-06, + "loss": 0.5463, + "step": 10130 + }, + { + "epoch": 1.492878192534381, + "grad_norm": 0.5974999666213989, + "learning_rate": 2.5379929198849774e-06, + "loss": 0.5511, + "step": 10131 + }, + { + "epoch": 1.4930255402750492, + "grad_norm": 0.662420392036438, + "learning_rate": 2.537605266422088e-06, + "loss": 0.5174, + "step": 10132 + }, + { + "epoch": 1.4931728880157171, + "grad_norm": 0.5895794034004211, + "learning_rate": 2.5372176120548098e-06, + "loss": 0.5122, + "step": 10133 + }, + { + "epoch": 1.493320235756385, + "grad_norm": 0.6046537756919861, + "learning_rate": 2.536829956792465e-06, + "loss": 0.5443, + "step": 10134 + }, + { + "epoch": 1.493467583497053, + "grad_norm": 0.6031988859176636, + "learning_rate": 2.536442300644377e-06, + "loss": 0.5287, + "step": 10135 + }, + { + "epoch": 1.4936149312377212, + "grad_norm": 0.6107053160667419, + "learning_rate": 2.536054643619869e-06, + "loss": 0.4705, + "step": 10136 + }, + { + "epoch": 1.493762278978389, + "grad_norm": 0.6281952261924744, + "learning_rate": 2.535666985728264e-06, + "loss": 0.525, + "step": 10137 + }, + { + "epoch": 1.493909626719057, + "grad_norm": 0.6193602681159973, + "learning_rate": 2.5352793269788845e-06, + "loss": 0.4858, + "step": 10138 + }, + { + "epoch": 1.494056974459725, + "grad_norm": 0.5369738936424255, + "learning_rate": 2.5348916673810535e-06, + "loss": 0.5055, + "step": 10139 + }, + { + "epoch": 1.494204322200393, + "grad_norm": 0.6060987114906311, + "learning_rate": 2.5345040069440944e-06, + "loss": 0.5121, + "step": 10140 + }, + { + "epoch": 1.4943516699410608, + "grad_norm": 0.6007633805274963, + "learning_rate": 2.5341163456773307e-06, + "loss": 0.5266, + "step": 10141 + }, + { + "epoch": 1.4944990176817288, + "grad_norm": 0.6350640654563904, + "learning_rate": 2.533728683590084e-06, + "loss": 0.5385, + "step": 10142 + }, + { + "epoch": 1.494646365422397, + "grad_norm": 0.6219783425331116, + "learning_rate": 2.5333410206916787e-06, + "loss": 0.5348, + "step": 10143 + }, + { + "epoch": 1.4947937131630649, + "grad_norm": 0.6128910183906555, + "learning_rate": 2.532953356991437e-06, + "loss": 0.5347, + "step": 10144 + }, + { + "epoch": 1.4949410609037328, + "grad_norm": 0.5925172567367554, + "learning_rate": 2.5325656924986836e-06, + "loss": 0.5598, + "step": 10145 + }, + { + "epoch": 1.4950884086444007, + "grad_norm": 0.6189956665039062, + "learning_rate": 2.5321780272227397e-06, + "loss": 0.5322, + "step": 10146 + }, + { + "epoch": 1.495235756385069, + "grad_norm": 0.6521819829940796, + "learning_rate": 2.5317903611729287e-06, + "loss": 0.5333, + "step": 10147 + }, + { + "epoch": 1.4953831041257368, + "grad_norm": 0.5967217683792114, + "learning_rate": 2.5314026943585764e-06, + "loss": 0.5478, + "step": 10148 + }, + { + "epoch": 1.4955304518664048, + "grad_norm": 0.6054741144180298, + "learning_rate": 2.531015026789003e-06, + "loss": 0.5274, + "step": 10149 + }, + { + "epoch": 1.4956777996070727, + "grad_norm": 0.6146218776702881, + "learning_rate": 2.5306273584735324e-06, + "loss": 0.5422, + "step": 10150 + }, + { + "epoch": 1.4958251473477406, + "grad_norm": 0.6348437070846558, + "learning_rate": 2.530239689421488e-06, + "loss": 0.5267, + "step": 10151 + }, + { + "epoch": 1.4959724950884086, + "grad_norm": 0.5617045164108276, + "learning_rate": 2.529852019642194e-06, + "loss": 0.5504, + "step": 10152 + }, + { + "epoch": 1.4961198428290765, + "grad_norm": 0.6225736737251282, + "learning_rate": 2.529464349144973e-06, + "loss": 0.5243, + "step": 10153 + }, + { + "epoch": 1.4962671905697447, + "grad_norm": 0.6187669038772583, + "learning_rate": 2.5290766779391474e-06, + "loss": 0.5292, + "step": 10154 + }, + { + "epoch": 1.4964145383104126, + "grad_norm": 0.5875985622406006, + "learning_rate": 2.528689006034042e-06, + "loss": 0.5357, + "step": 10155 + }, + { + "epoch": 1.4965618860510805, + "grad_norm": 0.6059481501579285, + "learning_rate": 2.5283013334389796e-06, + "loss": 0.5273, + "step": 10156 + }, + { + "epoch": 1.4967092337917485, + "grad_norm": 0.6142833828926086, + "learning_rate": 2.5279136601632825e-06, + "loss": 0.5161, + "step": 10157 + }, + { + "epoch": 1.4968565815324166, + "grad_norm": 0.6324189305305481, + "learning_rate": 2.527525986216276e-06, + "loss": 0.561, + "step": 10158 + }, + { + "epoch": 1.4970039292730846, + "grad_norm": 0.6165001392364502, + "learning_rate": 2.5271383116072816e-06, + "loss": 0.5361, + "step": 10159 + }, + { + "epoch": 1.4971512770137525, + "grad_norm": 0.6069198846817017, + "learning_rate": 2.526750636345624e-06, + "loss": 0.5106, + "step": 10160 + }, + { + "epoch": 1.4972986247544204, + "grad_norm": 0.5930930972099304, + "learning_rate": 2.526362960440626e-06, + "loss": 0.5133, + "step": 10161 + }, + { + "epoch": 1.4974459724950884, + "grad_norm": 0.5911492109298706, + "learning_rate": 2.525975283901611e-06, + "loss": 0.528, + "step": 10162 + }, + { + "epoch": 1.4975933202357563, + "grad_norm": 0.640221118927002, + "learning_rate": 2.525587606737903e-06, + "loss": 0.5529, + "step": 10163 + }, + { + "epoch": 1.4977406679764242, + "grad_norm": 0.5628364682197571, + "learning_rate": 2.5251999289588245e-06, + "loss": 0.5221, + "step": 10164 + }, + { + "epoch": 1.4978880157170924, + "grad_norm": 0.5784949660301208, + "learning_rate": 2.5248122505736998e-06, + "loss": 0.5009, + "step": 10165 + }, + { + "epoch": 1.4980353634577603, + "grad_norm": 0.6031850576400757, + "learning_rate": 2.5244245715918526e-06, + "loss": 0.5345, + "step": 10166 + }, + { + "epoch": 1.4981827111984283, + "grad_norm": 0.5889051556587219, + "learning_rate": 2.524036892022605e-06, + "loss": 0.5166, + "step": 10167 + }, + { + "epoch": 1.4983300589390962, + "grad_norm": 0.602304220199585, + "learning_rate": 2.523649211875282e-06, + "loss": 0.5181, + "step": 10168 + }, + { + "epoch": 1.4984774066797644, + "grad_norm": 0.593313455581665, + "learning_rate": 2.523261531159207e-06, + "loss": 0.5163, + "step": 10169 + }, + { + "epoch": 1.4986247544204323, + "grad_norm": 0.598702073097229, + "learning_rate": 2.5228738498837026e-06, + "loss": 0.5333, + "step": 10170 + }, + { + "epoch": 1.4987721021611002, + "grad_norm": 0.6156904101371765, + "learning_rate": 2.5224861680580927e-06, + "loss": 0.5379, + "step": 10171 + }, + { + "epoch": 1.4989194499017682, + "grad_norm": 0.6051478981971741, + "learning_rate": 2.5220984856917014e-06, + "loss": 0.5614, + "step": 10172 + }, + { + "epoch": 1.499066797642436, + "grad_norm": 0.6317456960678101, + "learning_rate": 2.5217108027938515e-06, + "loss": 0.5181, + "step": 10173 + }, + { + "epoch": 1.499214145383104, + "grad_norm": 0.5905874967575073, + "learning_rate": 2.5213231193738672e-06, + "loss": 0.5167, + "step": 10174 + }, + { + "epoch": 1.499361493123772, + "grad_norm": 0.6179569959640503, + "learning_rate": 2.520935435441072e-06, + "loss": 0.5095, + "step": 10175 + }, + { + "epoch": 1.4995088408644401, + "grad_norm": 0.6140065789222717, + "learning_rate": 2.5205477510047886e-06, + "loss": 0.5084, + "step": 10176 + }, + { + "epoch": 1.499656188605108, + "grad_norm": 0.639605700969696, + "learning_rate": 2.5201600660743426e-06, + "loss": 0.5409, + "step": 10177 + }, + { + "epoch": 1.499803536345776, + "grad_norm": 0.5748205184936523, + "learning_rate": 2.5197723806590558e-06, + "loss": 0.5429, + "step": 10178 + }, + { + "epoch": 1.499950884086444, + "grad_norm": 0.5841798186302185, + "learning_rate": 2.5193846947682522e-06, + "loss": 0.5328, + "step": 10179 + }, + { + "epoch": 1.500098231827112, + "grad_norm": 0.6033486127853394, + "learning_rate": 2.518997008411256e-06, + "loss": 0.5152, + "step": 10180 + }, + { + "epoch": 1.50024557956778, + "grad_norm": 0.6150385737419128, + "learning_rate": 2.5186093215973913e-06, + "loss": 0.5583, + "step": 10181 + }, + { + "epoch": 1.500392927308448, + "grad_norm": 0.6148741841316223, + "learning_rate": 2.5182216343359805e-06, + "loss": 0.5225, + "step": 10182 + }, + { + "epoch": 1.500540275049116, + "grad_norm": 0.5991114377975464, + "learning_rate": 2.517833946636348e-06, + "loss": 0.5193, + "step": 10183 + }, + { + "epoch": 1.5006876227897838, + "grad_norm": 0.5718602538108826, + "learning_rate": 2.5174462585078167e-06, + "loss": 0.5276, + "step": 10184 + }, + { + "epoch": 1.5008349705304518, + "grad_norm": 0.6201191544532776, + "learning_rate": 2.517058569959712e-06, + "loss": 0.5536, + "step": 10185 + }, + { + "epoch": 1.5009823182711197, + "grad_norm": 0.6135426163673401, + "learning_rate": 2.5166708810013566e-06, + "loss": 0.5302, + "step": 10186 + }, + { + "epoch": 1.5011296660117877, + "grad_norm": 0.636661171913147, + "learning_rate": 2.5162831916420743e-06, + "loss": 0.5191, + "step": 10187 + }, + { + "epoch": 1.5012770137524558, + "grad_norm": 0.6372947692871094, + "learning_rate": 2.515895501891189e-06, + "loss": 0.5652, + "step": 10188 + }, + { + "epoch": 1.5014243614931237, + "grad_norm": 0.6222315430641174, + "learning_rate": 2.5155078117580246e-06, + "loss": 0.5126, + "step": 10189 + }, + { + "epoch": 1.501571709233792, + "grad_norm": 0.567379891872406, + "learning_rate": 2.5151201212519037e-06, + "loss": 0.5559, + "step": 10190 + }, + { + "epoch": 1.5017190569744598, + "grad_norm": 0.6240565776824951, + "learning_rate": 2.514732430382152e-06, + "loss": 0.5416, + "step": 10191 + }, + { + "epoch": 1.5018664047151278, + "grad_norm": 0.6097725033760071, + "learning_rate": 2.5143447391580915e-06, + "loss": 0.5089, + "step": 10192 + }, + { + "epoch": 1.5020137524557957, + "grad_norm": 0.6066035032272339, + "learning_rate": 2.513957047589047e-06, + "loss": 0.5019, + "step": 10193 + }, + { + "epoch": 1.5021611001964637, + "grad_norm": 0.5932560563087463, + "learning_rate": 2.513569355684342e-06, + "loss": 0.522, + "step": 10194 + }, + { + "epoch": 1.5023084479371316, + "grad_norm": 0.6683105826377869, + "learning_rate": 2.5131816634533007e-06, + "loss": 0.5057, + "step": 10195 + }, + { + "epoch": 1.5024557956777995, + "grad_norm": 0.5704898238182068, + "learning_rate": 2.512793970905247e-06, + "loss": 0.5353, + "step": 10196 + }, + { + "epoch": 1.5026031434184675, + "grad_norm": 0.6015740036964417, + "learning_rate": 2.5124062780495036e-06, + "loss": 0.533, + "step": 10197 + }, + { + "epoch": 1.5027504911591356, + "grad_norm": 0.5944229364395142, + "learning_rate": 2.512018584895395e-06, + "loss": 0.5217, + "step": 10198 + }, + { + "epoch": 1.5028978388998036, + "grad_norm": 0.6010437607765198, + "learning_rate": 2.5116308914522454e-06, + "loss": 0.5013, + "step": 10199 + }, + { + "epoch": 1.5030451866404715, + "grad_norm": 0.6128653883934021, + "learning_rate": 2.5112431977293776e-06, + "loss": 0.5304, + "step": 10200 + }, + { + "epoch": 1.5031925343811396, + "grad_norm": 0.6027241349220276, + "learning_rate": 2.5108555037361165e-06, + "loss": 0.5304, + "step": 10201 + }, + { + "epoch": 1.5033398821218076, + "grad_norm": 0.6017232537269592, + "learning_rate": 2.510467809481785e-06, + "loss": 0.5566, + "step": 10202 + }, + { + "epoch": 1.5034872298624755, + "grad_norm": 0.596106767654419, + "learning_rate": 2.5100801149757087e-06, + "loss": 0.526, + "step": 10203 + }, + { + "epoch": 1.5036345776031435, + "grad_norm": 0.602689266204834, + "learning_rate": 2.50969242022721e-06, + "loss": 0.5485, + "step": 10204 + }, + { + "epoch": 1.5037819253438114, + "grad_norm": 0.6550092101097107, + "learning_rate": 2.5093047252456132e-06, + "loss": 0.5448, + "step": 10205 + }, + { + "epoch": 1.5039292730844793, + "grad_norm": 0.6251450181007385, + "learning_rate": 2.508917030040242e-06, + "loss": 0.534, + "step": 10206 + }, + { + "epoch": 1.5040766208251473, + "grad_norm": 0.5864893198013306, + "learning_rate": 2.5085293346204204e-06, + "loss": 0.5456, + "step": 10207 + }, + { + "epoch": 1.5042239685658152, + "grad_norm": 0.6060082912445068, + "learning_rate": 2.5081416389954727e-06, + "loss": 0.5019, + "step": 10208 + }, + { + "epoch": 1.5043713163064834, + "grad_norm": 0.6160162091255188, + "learning_rate": 2.5077539431747216e-06, + "loss": 0.5273, + "step": 10209 + }, + { + "epoch": 1.5045186640471513, + "grad_norm": 0.5693908929824829, + "learning_rate": 2.5073662471674927e-06, + "loss": 0.522, + "step": 10210 + }, + { + "epoch": 1.5046660117878192, + "grad_norm": 0.6336097717285156, + "learning_rate": 2.5069785509831086e-06, + "loss": 0.5387, + "step": 10211 + }, + { + "epoch": 1.5048133595284874, + "grad_norm": 0.5715206265449524, + "learning_rate": 2.5065908546308938e-06, + "loss": 0.5581, + "step": 10212 + }, + { + "epoch": 1.5049607072691553, + "grad_norm": 0.6174238920211792, + "learning_rate": 2.506203158120171e-06, + "loss": 0.5083, + "step": 10213 + }, + { + "epoch": 1.5051080550098233, + "grad_norm": 0.638642430305481, + "learning_rate": 2.5058154614602666e-06, + "loss": 0.5233, + "step": 10214 + }, + { + "epoch": 1.5052554027504912, + "grad_norm": 0.6082945466041565, + "learning_rate": 2.5054277646605024e-06, + "loss": 0.5052, + "step": 10215 + }, + { + "epoch": 1.5054027504911591, + "grad_norm": 0.6139020323753357, + "learning_rate": 2.505040067730203e-06, + "loss": 0.5549, + "step": 10216 + }, + { + "epoch": 1.505550098231827, + "grad_norm": 0.6112037301063538, + "learning_rate": 2.5046523706786925e-06, + "loss": 0.55, + "step": 10217 + }, + { + "epoch": 1.505697445972495, + "grad_norm": 0.5847442150115967, + "learning_rate": 2.504264673515295e-06, + "loss": 0.5067, + "step": 10218 + }, + { + "epoch": 1.505844793713163, + "grad_norm": 0.5607470273971558, + "learning_rate": 2.5038769762493332e-06, + "loss": 0.5096, + "step": 10219 + }, + { + "epoch": 1.505992141453831, + "grad_norm": 0.6064713597297668, + "learning_rate": 2.5034892788901326e-06, + "loss": 0.5551, + "step": 10220 + }, + { + "epoch": 1.506139489194499, + "grad_norm": 0.5816140174865723, + "learning_rate": 2.503101581447016e-06, + "loss": 0.5426, + "step": 10221 + }, + { + "epoch": 1.506286836935167, + "grad_norm": 0.6286744475364685, + "learning_rate": 2.502713883929308e-06, + "loss": 0.536, + "step": 10222 + }, + { + "epoch": 1.5064341846758351, + "grad_norm": 0.627019464969635, + "learning_rate": 2.502326186346333e-06, + "loss": 0.5277, + "step": 10223 + }, + { + "epoch": 1.506581532416503, + "grad_norm": 0.6077402830123901, + "learning_rate": 2.501938488707413e-06, + "loss": 0.5196, + "step": 10224 + }, + { + "epoch": 1.506728880157171, + "grad_norm": 0.6216160655021667, + "learning_rate": 2.5015507910218744e-06, + "loss": 0.4979, + "step": 10225 + }, + { + "epoch": 1.506876227897839, + "grad_norm": 0.5782046318054199, + "learning_rate": 2.50116309329904e-06, + "loss": 0.516, + "step": 10226 + }, + { + "epoch": 1.5070235756385069, + "grad_norm": 0.5724177956581116, + "learning_rate": 2.5007753955482332e-06, + "loss": 0.5328, + "step": 10227 + }, + { + "epoch": 1.5071709233791748, + "grad_norm": 0.6134186387062073, + "learning_rate": 2.500387697778779e-06, + "loss": 0.5419, + "step": 10228 + }, + { + "epoch": 1.5073182711198427, + "grad_norm": 0.586065948009491, + "learning_rate": 2.5e-06, + "loss": 0.4923, + "step": 10229 + }, + { + "epoch": 1.5074656188605107, + "grad_norm": 0.6241225600242615, + "learning_rate": 2.499612302221222e-06, + "loss": 0.5348, + "step": 10230 + }, + { + "epoch": 1.5076129666011788, + "grad_norm": 0.6008968949317932, + "learning_rate": 2.499224604451767e-06, + "loss": 0.517, + "step": 10231 + }, + { + "epoch": 1.5077603143418468, + "grad_norm": 0.559579074382782, + "learning_rate": 2.498836906700961e-06, + "loss": 0.5324, + "step": 10232 + }, + { + "epoch": 1.5079076620825147, + "grad_norm": 0.602648138999939, + "learning_rate": 2.4984492089781264e-06, + "loss": 0.5326, + "step": 10233 + }, + { + "epoch": 1.5080550098231829, + "grad_norm": 0.6021631360054016, + "learning_rate": 2.4980615112925873e-06, + "loss": 0.5043, + "step": 10234 + }, + { + "epoch": 1.5082023575638508, + "grad_norm": 0.588191568851471, + "learning_rate": 2.497673813653668e-06, + "loss": 0.5216, + "step": 10235 + }, + { + "epoch": 1.5083497053045187, + "grad_norm": 0.5948814153671265, + "learning_rate": 2.4972861160706923e-06, + "loss": 0.5238, + "step": 10236 + }, + { + "epoch": 1.5084970530451867, + "grad_norm": 0.5815559029579163, + "learning_rate": 2.4968984185529848e-06, + "loss": 0.5544, + "step": 10237 + }, + { + "epoch": 1.5086444007858546, + "grad_norm": 0.5934638977050781, + "learning_rate": 2.496510721109868e-06, + "loss": 0.5371, + "step": 10238 + }, + { + "epoch": 1.5087917485265225, + "grad_norm": 0.6308760046958923, + "learning_rate": 2.4961230237506676e-06, + "loss": 0.5364, + "step": 10239 + }, + { + "epoch": 1.5089390962671905, + "grad_norm": 0.5927485823631287, + "learning_rate": 2.4957353264847063e-06, + "loss": 0.5284, + "step": 10240 + }, + { + "epoch": 1.5090864440078584, + "grad_norm": 0.6171257495880127, + "learning_rate": 2.4953476293213083e-06, + "loss": 0.5406, + "step": 10241 + }, + { + "epoch": 1.5092337917485266, + "grad_norm": 0.6048797965049744, + "learning_rate": 2.4949599322697975e-06, + "loss": 0.5535, + "step": 10242 + }, + { + "epoch": 1.5093811394891945, + "grad_norm": 0.5738576650619507, + "learning_rate": 2.4945722353394984e-06, + "loss": 0.5303, + "step": 10243 + }, + { + "epoch": 1.5095284872298624, + "grad_norm": 0.5916306376457214, + "learning_rate": 2.4941845385397343e-06, + "loss": 0.5205, + "step": 10244 + }, + { + "epoch": 1.5096758349705306, + "grad_norm": 0.578652560710907, + "learning_rate": 2.4937968418798297e-06, + "loss": 0.5259, + "step": 10245 + }, + { + "epoch": 1.5098231827111985, + "grad_norm": 0.6232569813728333, + "learning_rate": 2.493409145369108e-06, + "loss": 0.5256, + "step": 10246 + }, + { + "epoch": 1.5099705304518665, + "grad_norm": 0.5967174172401428, + "learning_rate": 2.4930214490168927e-06, + "loss": 0.5116, + "step": 10247 + }, + { + "epoch": 1.5101178781925344, + "grad_norm": 0.6015446782112122, + "learning_rate": 2.4926337528325077e-06, + "loss": 0.5401, + "step": 10248 + }, + { + "epoch": 1.5102652259332023, + "grad_norm": 0.5864649415016174, + "learning_rate": 2.492246056825278e-06, + "loss": 0.5293, + "step": 10249 + }, + { + "epoch": 1.5104125736738703, + "grad_norm": 0.6076340675354004, + "learning_rate": 2.4918583610045277e-06, + "loss": 0.503, + "step": 10250 + }, + { + "epoch": 1.5105599214145382, + "grad_norm": 0.6084544658660889, + "learning_rate": 2.4914706653795796e-06, + "loss": 0.5447, + "step": 10251 + }, + { + "epoch": 1.5107072691552061, + "grad_norm": 0.6045971512794495, + "learning_rate": 2.491082969959758e-06, + "loss": 0.5191, + "step": 10252 + }, + { + "epoch": 1.5108546168958743, + "grad_norm": 0.5992733836174011, + "learning_rate": 2.4906952747543868e-06, + "loss": 0.5138, + "step": 10253 + }, + { + "epoch": 1.5110019646365422, + "grad_norm": 0.6176450252532959, + "learning_rate": 2.4903075797727903e-06, + "loss": 0.5609, + "step": 10254 + }, + { + "epoch": 1.5111493123772102, + "grad_norm": 0.5881679058074951, + "learning_rate": 2.4899198850242913e-06, + "loss": 0.5263, + "step": 10255 + }, + { + "epoch": 1.5112966601178783, + "grad_norm": 0.7635016441345215, + "learning_rate": 2.4895321905182145e-06, + "loss": 0.4978, + "step": 10256 + }, + { + "epoch": 1.5114440078585463, + "grad_norm": 0.6047636866569519, + "learning_rate": 2.489144496263884e-06, + "loss": 0.5223, + "step": 10257 + }, + { + "epoch": 1.5115913555992142, + "grad_norm": 0.6081615686416626, + "learning_rate": 2.488756802270623e-06, + "loss": 0.5518, + "step": 10258 + }, + { + "epoch": 1.5117387033398821, + "grad_norm": 0.5957133769989014, + "learning_rate": 2.488369108547756e-06, + "loss": 0.5322, + "step": 10259 + }, + { + "epoch": 1.51188605108055, + "grad_norm": 0.5661551356315613, + "learning_rate": 2.4879814151046054e-06, + "loss": 0.5201, + "step": 10260 + }, + { + "epoch": 1.512033398821218, + "grad_norm": 0.588688313961029, + "learning_rate": 2.4875937219504973e-06, + "loss": 0.5388, + "step": 10261 + }, + { + "epoch": 1.512180746561886, + "grad_norm": 0.6243281364440918, + "learning_rate": 2.487206029094754e-06, + "loss": 0.5149, + "step": 10262 + }, + { + "epoch": 1.5123280943025539, + "grad_norm": 0.5835373401641846, + "learning_rate": 2.4868183365467e-06, + "loss": 0.5371, + "step": 10263 + }, + { + "epoch": 1.512475442043222, + "grad_norm": 0.5698044300079346, + "learning_rate": 2.4864306443156585e-06, + "loss": 0.5302, + "step": 10264 + }, + { + "epoch": 1.51262278978389, + "grad_norm": 0.5796380043029785, + "learning_rate": 2.4860429524109537e-06, + "loss": 0.5145, + "step": 10265 + }, + { + "epoch": 1.512770137524558, + "grad_norm": 0.603929877281189, + "learning_rate": 2.485655260841909e-06, + "loss": 0.5352, + "step": 10266 + }, + { + "epoch": 1.512917485265226, + "grad_norm": 0.5876373648643494, + "learning_rate": 2.485267569617849e-06, + "loss": 0.5149, + "step": 10267 + }, + { + "epoch": 1.513064833005894, + "grad_norm": 0.5975383520126343, + "learning_rate": 2.4848798787480967e-06, + "loss": 0.5124, + "step": 10268 + }, + { + "epoch": 1.513212180746562, + "grad_norm": 0.5925776958465576, + "learning_rate": 2.4844921882419763e-06, + "loss": 0.5385, + "step": 10269 + }, + { + "epoch": 1.5133595284872299, + "grad_norm": 0.5971775054931641, + "learning_rate": 2.4841044981088116e-06, + "loss": 0.53, + "step": 10270 + }, + { + "epoch": 1.5135068762278978, + "grad_norm": 0.5895811319351196, + "learning_rate": 2.483716808357926e-06, + "loss": 0.5035, + "step": 10271 + }, + { + "epoch": 1.5136542239685657, + "grad_norm": 0.6034707427024841, + "learning_rate": 2.4833291189986438e-06, + "loss": 0.5324, + "step": 10272 + }, + { + "epoch": 1.5138015717092337, + "grad_norm": 0.6302515268325806, + "learning_rate": 2.4829414300402884e-06, + "loss": 0.5518, + "step": 10273 + }, + { + "epoch": 1.5139489194499016, + "grad_norm": 0.6136506199836731, + "learning_rate": 2.4825537414921837e-06, + "loss": 0.5448, + "step": 10274 + }, + { + "epoch": 1.5140962671905698, + "grad_norm": 0.5966064929962158, + "learning_rate": 2.4821660533636534e-06, + "loss": 0.5381, + "step": 10275 + }, + { + "epoch": 1.5142436149312377, + "grad_norm": 0.5802631378173828, + "learning_rate": 2.4817783656640208e-06, + "loss": 0.5543, + "step": 10276 + }, + { + "epoch": 1.5143909626719056, + "grad_norm": 0.6357364654541016, + "learning_rate": 2.48139067840261e-06, + "loss": 0.5404, + "step": 10277 + }, + { + "epoch": 1.5145383104125738, + "grad_norm": 0.5775880813598633, + "learning_rate": 2.481002991588745e-06, + "loss": 0.5363, + "step": 10278 + }, + { + "epoch": 1.5146856581532417, + "grad_norm": 0.6057843565940857, + "learning_rate": 2.480615305231749e-06, + "loss": 0.5364, + "step": 10279 + }, + { + "epoch": 1.5148330058939097, + "grad_norm": 0.5886603593826294, + "learning_rate": 2.480227619340946e-06, + "loss": 0.5366, + "step": 10280 + }, + { + "epoch": 1.5149803536345776, + "grad_norm": 0.6026503443717957, + "learning_rate": 2.479839933925659e-06, + "loss": 0.5062, + "step": 10281 + }, + { + "epoch": 1.5151277013752456, + "grad_norm": 0.6005035042762756, + "learning_rate": 2.4794522489952126e-06, + "loss": 0.5228, + "step": 10282 + }, + { + "epoch": 1.5152750491159135, + "grad_norm": 0.603428840637207, + "learning_rate": 2.4790645645589294e-06, + "loss": 0.5397, + "step": 10283 + }, + { + "epoch": 1.5154223968565814, + "grad_norm": 0.607301652431488, + "learning_rate": 2.478676880626134e-06, + "loss": 0.5253, + "step": 10284 + }, + { + "epoch": 1.5155697445972494, + "grad_norm": 0.623644232749939, + "learning_rate": 2.478289197206149e-06, + "loss": 0.5388, + "step": 10285 + }, + { + "epoch": 1.5157170923379175, + "grad_norm": 0.6253346800804138, + "learning_rate": 2.4779015143082995e-06, + "loss": 0.5644, + "step": 10286 + }, + { + "epoch": 1.5158644400785855, + "grad_norm": 0.6410650014877319, + "learning_rate": 2.4775138319419077e-06, + "loss": 0.525, + "step": 10287 + }, + { + "epoch": 1.5160117878192534, + "grad_norm": 0.6499364376068115, + "learning_rate": 2.477126150116298e-06, + "loss": 0.5322, + "step": 10288 + }, + { + "epoch": 1.5161591355599215, + "grad_norm": 0.6083891987800598, + "learning_rate": 2.4767384688407934e-06, + "loss": 0.5393, + "step": 10289 + }, + { + "epoch": 1.5163064833005895, + "grad_norm": 0.5999969840049744, + "learning_rate": 2.4763507881247183e-06, + "loss": 0.5211, + "step": 10290 + }, + { + "epoch": 1.5164538310412574, + "grad_norm": 0.6010143160820007, + "learning_rate": 2.475963107977395e-06, + "loss": 0.5134, + "step": 10291 + }, + { + "epoch": 1.5166011787819254, + "grad_norm": 0.6048627495765686, + "learning_rate": 2.4755754284081478e-06, + "loss": 0.5086, + "step": 10292 + }, + { + "epoch": 1.5167485265225933, + "grad_norm": 0.6134151220321655, + "learning_rate": 2.4751877494263006e-06, + "loss": 0.5165, + "step": 10293 + }, + { + "epoch": 1.5168958742632612, + "grad_norm": 0.575219452381134, + "learning_rate": 2.474800071041176e-06, + "loss": 0.5584, + "step": 10294 + }, + { + "epoch": 1.5170432220039292, + "grad_norm": 0.5997015833854675, + "learning_rate": 2.4744123932620973e-06, + "loss": 0.5413, + "step": 10295 + }, + { + "epoch": 1.517190569744597, + "grad_norm": 0.5841656923294067, + "learning_rate": 2.4740247160983895e-06, + "loss": 0.5217, + "step": 10296 + }, + { + "epoch": 1.5173379174852653, + "grad_norm": 0.6016933917999268, + "learning_rate": 2.4736370395593748e-06, + "loss": 0.5325, + "step": 10297 + }, + { + "epoch": 1.5174852652259332, + "grad_norm": 0.6138267517089844, + "learning_rate": 2.4732493636543765e-06, + "loss": 0.514, + "step": 10298 + }, + { + "epoch": 1.5176326129666011, + "grad_norm": 0.5981290340423584, + "learning_rate": 2.472861688392719e-06, + "loss": 0.5528, + "step": 10299 + }, + { + "epoch": 1.5177799607072693, + "grad_norm": 0.5635799169540405, + "learning_rate": 2.472474013783725e-06, + "loss": 0.5396, + "step": 10300 + }, + { + "epoch": 1.5179273084479372, + "grad_norm": 0.5866395235061646, + "learning_rate": 2.4720863398367183e-06, + "loss": 0.5357, + "step": 10301 + }, + { + "epoch": 1.5180746561886052, + "grad_norm": 0.5970284342765808, + "learning_rate": 2.4716986665610217e-06, + "loss": 0.5185, + "step": 10302 + }, + { + "epoch": 1.518222003929273, + "grad_norm": 0.5693259239196777, + "learning_rate": 2.471310993965959e-06, + "loss": 0.547, + "step": 10303 + }, + { + "epoch": 1.518369351669941, + "grad_norm": 0.6376387476921082, + "learning_rate": 2.470923322060853e-06, + "loss": 0.522, + "step": 10304 + }, + { + "epoch": 1.518516699410609, + "grad_norm": 0.6003425121307373, + "learning_rate": 2.470535650855028e-06, + "loss": 0.5433, + "step": 10305 + }, + { + "epoch": 1.518664047151277, + "grad_norm": 0.5769048929214478, + "learning_rate": 2.4701479803578065e-06, + "loss": 0.5107, + "step": 10306 + }, + { + "epoch": 1.5188113948919448, + "grad_norm": 0.6042068600654602, + "learning_rate": 2.4697603105785124e-06, + "loss": 0.5532, + "step": 10307 + }, + { + "epoch": 1.518958742632613, + "grad_norm": 0.6384438276290894, + "learning_rate": 2.4693726415264684e-06, + "loss": 0.5182, + "step": 10308 + }, + { + "epoch": 1.519106090373281, + "grad_norm": 0.6242793202400208, + "learning_rate": 2.468984973210998e-06, + "loss": 0.5327, + "step": 10309 + }, + { + "epoch": 1.519253438113949, + "grad_norm": 0.6230701208114624, + "learning_rate": 2.4685973056414244e-06, + "loss": 0.5112, + "step": 10310 + }, + { + "epoch": 1.519400785854617, + "grad_norm": 0.6073266267776489, + "learning_rate": 2.4682096388270717e-06, + "loss": 0.5066, + "step": 10311 + }, + { + "epoch": 1.519548133595285, + "grad_norm": 0.6042713522911072, + "learning_rate": 2.4678219727772616e-06, + "loss": 0.505, + "step": 10312 + }, + { + "epoch": 1.519695481335953, + "grad_norm": 0.6160421967506409, + "learning_rate": 2.467434307501318e-06, + "loss": 0.5581, + "step": 10313 + }, + { + "epoch": 1.5198428290766208, + "grad_norm": 0.612213671207428, + "learning_rate": 2.4670466430085637e-06, + "loss": 0.5631, + "step": 10314 + }, + { + "epoch": 1.5199901768172888, + "grad_norm": 0.5815005898475647, + "learning_rate": 2.4666589793083225e-06, + "loss": 0.5624, + "step": 10315 + }, + { + "epoch": 1.5201375245579567, + "grad_norm": 0.6178110241889954, + "learning_rate": 2.466271316409917e-06, + "loss": 0.5159, + "step": 10316 + }, + { + "epoch": 1.5202848722986246, + "grad_norm": 0.5850718021392822, + "learning_rate": 2.4658836543226706e-06, + "loss": 0.521, + "step": 10317 + }, + { + "epoch": 1.5204322200392926, + "grad_norm": 0.62416011095047, + "learning_rate": 2.465495993055907e-06, + "loss": 0.5509, + "step": 10318 + }, + { + "epoch": 1.5205795677799607, + "grad_norm": 0.6398894190788269, + "learning_rate": 2.465108332618948e-06, + "loss": 0.5243, + "step": 10319 + }, + { + "epoch": 1.5207269155206287, + "grad_norm": 0.5828613638877869, + "learning_rate": 2.4647206730211168e-06, + "loss": 0.5211, + "step": 10320 + }, + { + "epoch": 1.5208742632612968, + "grad_norm": 0.6042398810386658, + "learning_rate": 2.4643330142717375e-06, + "loss": 0.5675, + "step": 10321 + }, + { + "epoch": 1.5210216110019648, + "grad_norm": 0.6427307724952698, + "learning_rate": 2.4639453563801306e-06, + "loss": 0.5402, + "step": 10322 + }, + { + "epoch": 1.5211689587426327, + "grad_norm": 0.60838383436203, + "learning_rate": 2.463557699355623e-06, + "loss": 0.4872, + "step": 10323 + }, + { + "epoch": 1.5213163064833006, + "grad_norm": 0.6332340836524963, + "learning_rate": 2.463170043207535e-06, + "loss": 0.5079, + "step": 10324 + }, + { + "epoch": 1.5214636542239686, + "grad_norm": 0.583267331123352, + "learning_rate": 2.4627823879451906e-06, + "loss": 0.5166, + "step": 10325 + }, + { + "epoch": 1.5216110019646365, + "grad_norm": 0.5988248586654663, + "learning_rate": 2.462394733577912e-06, + "loss": 0.5461, + "step": 10326 + }, + { + "epoch": 1.5217583497053044, + "grad_norm": 0.6122534871101379, + "learning_rate": 2.4620070801150226e-06, + "loss": 0.5359, + "step": 10327 + }, + { + "epoch": 1.5219056974459724, + "grad_norm": 0.5843744874000549, + "learning_rate": 2.4616194275658452e-06, + "loss": 0.5487, + "step": 10328 + }, + { + "epoch": 1.5220530451866403, + "grad_norm": 0.5882205963134766, + "learning_rate": 2.461231775939703e-06, + "loss": 0.5621, + "step": 10329 + }, + { + "epoch": 1.5222003929273085, + "grad_norm": 0.580457866191864, + "learning_rate": 2.460844125245918e-06, + "loss": 0.5354, + "step": 10330 + }, + { + "epoch": 1.5223477406679764, + "grad_norm": 0.592848539352417, + "learning_rate": 2.460456475493813e-06, + "loss": 0.5385, + "step": 10331 + }, + { + "epoch": 1.5224950884086446, + "grad_norm": 0.5819212794303894, + "learning_rate": 2.4600688266927113e-06, + "loss": 0.5205, + "step": 10332 + }, + { + "epoch": 1.5226424361493125, + "grad_norm": 0.5791220664978027, + "learning_rate": 2.459681178851936e-06, + "loss": 0.5144, + "step": 10333 + }, + { + "epoch": 1.5227897838899804, + "grad_norm": 0.5850048661231995, + "learning_rate": 2.4592935319808094e-06, + "loss": 0.5485, + "step": 10334 + }, + { + "epoch": 1.5229371316306484, + "grad_norm": 0.6021028161048889, + "learning_rate": 2.4589058860886537e-06, + "loss": 0.5187, + "step": 10335 + }, + { + "epoch": 1.5230844793713163, + "grad_norm": 0.5621654987335205, + "learning_rate": 2.4585182411847928e-06, + "loss": 0.5196, + "step": 10336 + }, + { + "epoch": 1.5232318271119842, + "grad_norm": 0.6020181179046631, + "learning_rate": 2.458130597278548e-06, + "loss": 0.5134, + "step": 10337 + }, + { + "epoch": 1.5233791748526522, + "grad_norm": 0.6355478763580322, + "learning_rate": 2.4577429543792437e-06, + "loss": 0.5608, + "step": 10338 + }, + { + "epoch": 1.52352652259332, + "grad_norm": 0.6045737266540527, + "learning_rate": 2.457355312496201e-06, + "loss": 0.5255, + "step": 10339 + }, + { + "epoch": 1.5236738703339883, + "grad_norm": 0.6090301871299744, + "learning_rate": 2.456967671638743e-06, + "loss": 0.5072, + "step": 10340 + }, + { + "epoch": 1.5238212180746562, + "grad_norm": 0.5968594551086426, + "learning_rate": 2.456580031816192e-06, + "loss": 0.5002, + "step": 10341 + }, + { + "epoch": 1.5239685658153241, + "grad_norm": 0.5815266966819763, + "learning_rate": 2.4561923930378714e-06, + "loss": 0.5282, + "step": 10342 + }, + { + "epoch": 1.5241159135559923, + "grad_norm": 0.58380126953125, + "learning_rate": 2.455804755313103e-06, + "loss": 0.5016, + "step": 10343 + }, + { + "epoch": 1.5242632612966602, + "grad_norm": 0.5884482860565186, + "learning_rate": 2.45541711865121e-06, + "loss": 0.5331, + "step": 10344 + }, + { + "epoch": 1.5244106090373282, + "grad_norm": 0.6059187650680542, + "learning_rate": 2.4550294830615133e-06, + "loss": 0.5484, + "step": 10345 + }, + { + "epoch": 1.524557956777996, + "grad_norm": 0.6389738321304321, + "learning_rate": 2.4546418485533373e-06, + "loss": 0.5201, + "step": 10346 + }, + { + "epoch": 1.524705304518664, + "grad_norm": 0.6066357493400574, + "learning_rate": 2.4542542151360034e-06, + "loss": 0.5167, + "step": 10347 + }, + { + "epoch": 1.524852652259332, + "grad_norm": 0.6271058917045593, + "learning_rate": 2.453866582818834e-06, + "loss": 0.5069, + "step": 10348 + }, + { + "epoch": 1.525, + "grad_norm": 0.5752118825912476, + "learning_rate": 2.453478951611152e-06, + "loss": 0.4975, + "step": 10349 + }, + { + "epoch": 1.5251473477406678, + "grad_norm": 0.5797145962715149, + "learning_rate": 2.453091321522279e-06, + "loss": 0.5497, + "step": 10350 + }, + { + "epoch": 1.525294695481336, + "grad_norm": 0.6106210350990295, + "learning_rate": 2.4527036925615376e-06, + "loss": 0.5401, + "step": 10351 + }, + { + "epoch": 1.525442043222004, + "grad_norm": 0.5964381098747253, + "learning_rate": 2.4523160647382502e-06, + "loss": 0.5256, + "step": 10352 + }, + { + "epoch": 1.5255893909626719, + "grad_norm": 0.6164708137512207, + "learning_rate": 2.4519284380617396e-06, + "loss": 0.5296, + "step": 10353 + }, + { + "epoch": 1.52573673870334, + "grad_norm": 0.6219168305397034, + "learning_rate": 2.4515408125413273e-06, + "loss": 0.5376, + "step": 10354 + }, + { + "epoch": 1.525884086444008, + "grad_norm": 0.6081355810165405, + "learning_rate": 2.4511531881863358e-06, + "loss": 0.5103, + "step": 10355 + }, + { + "epoch": 1.526031434184676, + "grad_norm": 0.6060886383056641, + "learning_rate": 2.450765565006087e-06, + "loss": 0.5304, + "step": 10356 + }, + { + "epoch": 1.5261787819253438, + "grad_norm": 0.604508101940155, + "learning_rate": 2.4503779430099035e-06, + "loss": 0.5222, + "step": 10357 + }, + { + "epoch": 1.5263261296660118, + "grad_norm": 0.5781116485595703, + "learning_rate": 2.4499903222071064e-06, + "loss": 0.5051, + "step": 10358 + }, + { + "epoch": 1.5264734774066797, + "grad_norm": 0.6141273379325867, + "learning_rate": 2.4496027026070195e-06, + "loss": 0.5268, + "step": 10359 + }, + { + "epoch": 1.5266208251473476, + "grad_norm": 0.6080543398857117, + "learning_rate": 2.4492150842189634e-06, + "loss": 0.5118, + "step": 10360 + }, + { + "epoch": 1.5267681728880156, + "grad_norm": 0.6257089376449585, + "learning_rate": 2.448827467052261e-06, + "loss": 0.5301, + "step": 10361 + }, + { + "epoch": 1.5269155206286837, + "grad_norm": 0.5671752691268921, + "learning_rate": 2.448439851116234e-06, + "loss": 0.5299, + "step": 10362 + }, + { + "epoch": 1.5270628683693517, + "grad_norm": 0.6012353897094727, + "learning_rate": 2.4480522364202044e-06, + "loss": 0.5301, + "step": 10363 + }, + { + "epoch": 1.5272102161100196, + "grad_norm": 0.617817759513855, + "learning_rate": 2.447664622973494e-06, + "loss": 0.5196, + "step": 10364 + }, + { + "epoch": 1.5273575638506878, + "grad_norm": 0.5978717803955078, + "learning_rate": 2.4472770107854254e-06, + "loss": 0.5418, + "step": 10365 + }, + { + "epoch": 1.5275049115913557, + "grad_norm": 0.5973769426345825, + "learning_rate": 2.4468893998653195e-06, + "loss": 0.5191, + "step": 10366 + }, + { + "epoch": 1.5276522593320236, + "grad_norm": 0.588554322719574, + "learning_rate": 2.4465017902224988e-06, + "loss": 0.5328, + "step": 10367 + }, + { + "epoch": 1.5277996070726916, + "grad_norm": 0.6107785105705261, + "learning_rate": 2.446114181866285e-06, + "loss": 0.5229, + "step": 10368 + }, + { + "epoch": 1.5279469548133595, + "grad_norm": 0.6158796548843384, + "learning_rate": 2.4457265748059995e-06, + "loss": 0.5496, + "step": 10369 + }, + { + "epoch": 1.5280943025540275, + "grad_norm": 0.6038158535957336, + "learning_rate": 2.445338969050965e-06, + "loss": 0.5183, + "step": 10370 + }, + { + "epoch": 1.5282416502946954, + "grad_norm": 0.6070204377174377, + "learning_rate": 2.444951364610502e-06, + "loss": 0.502, + "step": 10371 + }, + { + "epoch": 1.5283889980353633, + "grad_norm": 0.5935075283050537, + "learning_rate": 2.4445637614939333e-06, + "loss": 0.5217, + "step": 10372 + }, + { + "epoch": 1.5285363457760315, + "grad_norm": 0.6031075119972229, + "learning_rate": 2.44417615971058e-06, + "loss": 0.5348, + "step": 10373 + }, + { + "epoch": 1.5286836935166994, + "grad_norm": 0.6182942390441895, + "learning_rate": 2.443788559269764e-06, + "loss": 0.5148, + "step": 10374 + }, + { + "epoch": 1.5288310412573674, + "grad_norm": 0.5783098340034485, + "learning_rate": 2.4434009601808067e-06, + "loss": 0.5043, + "step": 10375 + }, + { + "epoch": 1.5289783889980355, + "grad_norm": 0.6262577772140503, + "learning_rate": 2.4430133624530298e-06, + "loss": 0.5335, + "step": 10376 + }, + { + "epoch": 1.5291257367387034, + "grad_norm": 0.6098250150680542, + "learning_rate": 2.4426257660957547e-06, + "loss": 0.523, + "step": 10377 + }, + { + "epoch": 1.5292730844793714, + "grad_norm": 0.5813658833503723, + "learning_rate": 2.4422381711183036e-06, + "loss": 0.4906, + "step": 10378 + }, + { + "epoch": 1.5294204322200393, + "grad_norm": 0.5806041955947876, + "learning_rate": 2.4418505775299966e-06, + "loss": 0.5252, + "step": 10379 + }, + { + "epoch": 1.5295677799607073, + "grad_norm": 0.5948179960250854, + "learning_rate": 2.4414629853401563e-06, + "loss": 0.5358, + "step": 10380 + }, + { + "epoch": 1.5297151277013752, + "grad_norm": 0.5945817232131958, + "learning_rate": 2.4410753945581038e-06, + "loss": 0.4871, + "step": 10381 + }, + { + "epoch": 1.5298624754420431, + "grad_norm": 0.602263867855072, + "learning_rate": 2.4406878051931603e-06, + "loss": 0.5574, + "step": 10382 + }, + { + "epoch": 1.530009823182711, + "grad_norm": 0.5891357064247131, + "learning_rate": 2.4403002172546473e-06, + "loss": 0.5374, + "step": 10383 + }, + { + "epoch": 1.5301571709233792, + "grad_norm": 0.5641748309135437, + "learning_rate": 2.4399126307518857e-06, + "loss": 0.495, + "step": 10384 + }, + { + "epoch": 1.5303045186640472, + "grad_norm": 0.607006311416626, + "learning_rate": 2.4395250456941975e-06, + "loss": 0.4898, + "step": 10385 + }, + { + "epoch": 1.530451866404715, + "grad_norm": 0.639699399471283, + "learning_rate": 2.439137462090904e-06, + "loss": 0.5141, + "step": 10386 + }, + { + "epoch": 1.5305992141453832, + "grad_norm": 0.6019107103347778, + "learning_rate": 2.438749879951325e-06, + "loss": 0.5209, + "step": 10387 + }, + { + "epoch": 1.5307465618860512, + "grad_norm": 0.5951138138771057, + "learning_rate": 2.4383622992847834e-06, + "loss": 0.542, + "step": 10388 + }, + { + "epoch": 1.5308939096267191, + "grad_norm": 0.600635290145874, + "learning_rate": 2.437974720100599e-06, + "loss": 0.5004, + "step": 10389 + }, + { + "epoch": 1.531041257367387, + "grad_norm": 0.6258196234703064, + "learning_rate": 2.437587142408094e-06, + "loss": 0.5194, + "step": 10390 + }, + { + "epoch": 1.531188605108055, + "grad_norm": 0.5818198919296265, + "learning_rate": 2.437199566216589e-06, + "loss": 0.5371, + "step": 10391 + }, + { + "epoch": 1.531335952848723, + "grad_norm": 0.6504175662994385, + "learning_rate": 2.4368119915354045e-06, + "loss": 0.5123, + "step": 10392 + }, + { + "epoch": 1.5314833005893909, + "grad_norm": 0.6013475656509399, + "learning_rate": 2.4364244183738625e-06, + "loss": 0.5239, + "step": 10393 + }, + { + "epoch": 1.5316306483300588, + "grad_norm": 0.6073537468910217, + "learning_rate": 2.436036846741283e-06, + "loss": 0.5225, + "step": 10394 + }, + { + "epoch": 1.531777996070727, + "grad_norm": 0.5733551383018494, + "learning_rate": 2.435649276646987e-06, + "loss": 0.5522, + "step": 10395 + }, + { + "epoch": 1.531925343811395, + "grad_norm": 0.6039338111877441, + "learning_rate": 2.435261708100296e-06, + "loss": 0.5215, + "step": 10396 + }, + { + "epoch": 1.5320726915520628, + "grad_norm": 0.6211051344871521, + "learning_rate": 2.4348741411105313e-06, + "loss": 0.5344, + "step": 10397 + }, + { + "epoch": 1.532220039292731, + "grad_norm": 0.6174623370170593, + "learning_rate": 2.434486575687012e-06, + "loss": 0.4888, + "step": 10398 + }, + { + "epoch": 1.532367387033399, + "grad_norm": 0.5985472798347473, + "learning_rate": 2.4340990118390607e-06, + "loss": 0.4992, + "step": 10399 + }, + { + "epoch": 1.5325147347740669, + "grad_norm": 0.6105830073356628, + "learning_rate": 2.433711449575997e-06, + "loss": 0.5506, + "step": 10400 + }, + { + "epoch": 1.5326620825147348, + "grad_norm": 0.5821838974952698, + "learning_rate": 2.4333238889071413e-06, + "loss": 0.5424, + "step": 10401 + }, + { + "epoch": 1.5328094302554027, + "grad_norm": 0.6283087134361267, + "learning_rate": 2.4329363298418156e-06, + "loss": 0.5517, + "step": 10402 + }, + { + "epoch": 1.5329567779960707, + "grad_norm": 0.6067033410072327, + "learning_rate": 2.432548772389339e-06, + "loss": 0.5534, + "step": 10403 + }, + { + "epoch": 1.5331041257367386, + "grad_norm": 0.5898045897483826, + "learning_rate": 2.432161216559033e-06, + "loss": 0.5045, + "step": 10404 + }, + { + "epoch": 1.5332514734774065, + "grad_norm": 0.6271289587020874, + "learning_rate": 2.4317736623602182e-06, + "loss": 0.5038, + "step": 10405 + }, + { + "epoch": 1.5333988212180747, + "grad_norm": 0.5973233580589294, + "learning_rate": 2.4313861098022148e-06, + "loss": 0.5657, + "step": 10406 + }, + { + "epoch": 1.5335461689587426, + "grad_norm": 0.580278754234314, + "learning_rate": 2.430998558894343e-06, + "loss": 0.5493, + "step": 10407 + }, + { + "epoch": 1.5336935166994106, + "grad_norm": 0.5605477690696716, + "learning_rate": 2.4306110096459233e-06, + "loss": 0.5311, + "step": 10408 + }, + { + "epoch": 1.5338408644400787, + "grad_norm": 0.6245855093002319, + "learning_rate": 2.4302234620662768e-06, + "loss": 0.5328, + "step": 10409 + }, + { + "epoch": 1.5339882121807467, + "grad_norm": 0.5524704456329346, + "learning_rate": 2.4298359161647228e-06, + "loss": 0.5349, + "step": 10410 + }, + { + "epoch": 1.5341355599214146, + "grad_norm": 0.5886157155036926, + "learning_rate": 2.4294483719505825e-06, + "loss": 0.5503, + "step": 10411 + }, + { + "epoch": 1.5342829076620825, + "grad_norm": 0.6082873344421387, + "learning_rate": 2.429060829433176e-06, + "loss": 0.5483, + "step": 10412 + }, + { + "epoch": 1.5344302554027505, + "grad_norm": 0.5845164060592651, + "learning_rate": 2.4286732886218224e-06, + "loss": 0.5114, + "step": 10413 + }, + { + "epoch": 1.5345776031434184, + "grad_norm": 0.6198939085006714, + "learning_rate": 2.4282857495258435e-06, + "loss": 0.537, + "step": 10414 + }, + { + "epoch": 1.5347249508840863, + "grad_norm": 0.6369621157646179, + "learning_rate": 2.4278982121545585e-06, + "loss": 0.5068, + "step": 10415 + }, + { + "epoch": 1.5348722986247543, + "grad_norm": 0.6066128015518188, + "learning_rate": 2.4275106765172875e-06, + "loss": 0.5399, + "step": 10416 + }, + { + "epoch": 1.5350196463654224, + "grad_norm": 0.619169294834137, + "learning_rate": 2.427123142623351e-06, + "loss": 0.546, + "step": 10417 + }, + { + "epoch": 1.5351669941060904, + "grad_norm": 0.6008298397064209, + "learning_rate": 2.4267356104820687e-06, + "loss": 0.5425, + "step": 10418 + }, + { + "epoch": 1.5353143418467583, + "grad_norm": 0.6332057118415833, + "learning_rate": 2.4263480801027606e-06, + "loss": 0.5144, + "step": 10419 + }, + { + "epoch": 1.5354616895874265, + "grad_norm": 0.5784188508987427, + "learning_rate": 2.4259605514947463e-06, + "loss": 0.5033, + "step": 10420 + }, + { + "epoch": 1.5356090373280944, + "grad_norm": 0.5968025922775269, + "learning_rate": 2.4255730246673464e-06, + "loss": 0.5542, + "step": 10421 + }, + { + "epoch": 1.5357563850687623, + "grad_norm": 0.610436737537384, + "learning_rate": 2.42518549962988e-06, + "loss": 0.5258, + "step": 10422 + }, + { + "epoch": 1.5359037328094303, + "grad_norm": 0.6058443784713745, + "learning_rate": 2.4247979763916675e-06, + "loss": 0.5171, + "step": 10423 + }, + { + "epoch": 1.5360510805500982, + "grad_norm": 0.5889146327972412, + "learning_rate": 2.4244104549620283e-06, + "loss": 0.5264, + "step": 10424 + }, + { + "epoch": 1.5361984282907661, + "grad_norm": 0.7096870541572571, + "learning_rate": 2.4240229353502827e-06, + "loss": 0.5629, + "step": 10425 + }, + { + "epoch": 1.536345776031434, + "grad_norm": 0.6060745716094971, + "learning_rate": 2.42363541756575e-06, + "loss": 0.5284, + "step": 10426 + }, + { + "epoch": 1.536493123772102, + "grad_norm": 0.5846174359321594, + "learning_rate": 2.4232479016177487e-06, + "loss": 0.5117, + "step": 10427 + }, + { + "epoch": 1.5366404715127702, + "grad_norm": 0.5885968208312988, + "learning_rate": 2.4228603875155997e-06, + "loss": 0.5262, + "step": 10428 + }, + { + "epoch": 1.536787819253438, + "grad_norm": 0.6360501050949097, + "learning_rate": 2.4224728752686226e-06, + "loss": 0.5177, + "step": 10429 + }, + { + "epoch": 1.536935166994106, + "grad_norm": 0.6043634414672852, + "learning_rate": 2.422085364886136e-06, + "loss": 0.514, + "step": 10430 + }, + { + "epoch": 1.5370825147347742, + "grad_norm": 0.5772643685340881, + "learning_rate": 2.4216978563774602e-06, + "loss": 0.4994, + "step": 10431 + }, + { + "epoch": 1.5372298624754421, + "grad_norm": 0.6030833721160889, + "learning_rate": 2.4213103497519126e-06, + "loss": 0.5036, + "step": 10432 + }, + { + "epoch": 1.53737721021611, + "grad_norm": 0.6042342782020569, + "learning_rate": 2.4209228450188157e-06, + "loss": 0.4941, + "step": 10433 + }, + { + "epoch": 1.537524557956778, + "grad_norm": 0.6195099353790283, + "learning_rate": 2.4205353421874874e-06, + "loss": 0.5309, + "step": 10434 + }, + { + "epoch": 1.537671905697446, + "grad_norm": 0.5744369029998779, + "learning_rate": 2.4201478412672467e-06, + "loss": 0.5504, + "step": 10435 + }, + { + "epoch": 1.5378192534381139, + "grad_norm": 0.6446452736854553, + "learning_rate": 2.4197603422674126e-06, + "loss": 0.54, + "step": 10436 + }, + { + "epoch": 1.5379666011787818, + "grad_norm": 0.6038150191307068, + "learning_rate": 2.419372845197305e-06, + "loss": 0.4965, + "step": 10437 + }, + { + "epoch": 1.5381139489194497, + "grad_norm": 0.6201282739639282, + "learning_rate": 2.4189853500662427e-06, + "loss": 0.5271, + "step": 10438 + }, + { + "epoch": 1.538261296660118, + "grad_norm": 0.5676858425140381, + "learning_rate": 2.4185978568835446e-06, + "loss": 0.5381, + "step": 10439 + }, + { + "epoch": 1.5384086444007858, + "grad_norm": 0.6013271808624268, + "learning_rate": 2.41821036565853e-06, + "loss": 0.5208, + "step": 10440 + }, + { + "epoch": 1.5385559921414538, + "grad_norm": 0.5722228288650513, + "learning_rate": 2.4178228764005173e-06, + "loss": 0.4954, + "step": 10441 + }, + { + "epoch": 1.538703339882122, + "grad_norm": 0.6437250375747681, + "learning_rate": 2.4174353891188267e-06, + "loss": 0.5146, + "step": 10442 + }, + { + "epoch": 1.5388506876227899, + "grad_norm": 0.5819971561431885, + "learning_rate": 2.4170479038227757e-06, + "loss": 0.5601, + "step": 10443 + }, + { + "epoch": 1.5389980353634578, + "grad_norm": 0.5847922563552856, + "learning_rate": 2.4166604205216843e-06, + "loss": 0.5568, + "step": 10444 + }, + { + "epoch": 1.5391453831041257, + "grad_norm": 0.6111276149749756, + "learning_rate": 2.41627293922487e-06, + "loss": 0.5041, + "step": 10445 + }, + { + "epoch": 1.5392927308447937, + "grad_norm": 0.6011260747909546, + "learning_rate": 2.4158854599416528e-06, + "loss": 0.4946, + "step": 10446 + }, + { + "epoch": 1.5394400785854616, + "grad_norm": 0.6002845764160156, + "learning_rate": 2.41549798268135e-06, + "loss": 0.5195, + "step": 10447 + }, + { + "epoch": 1.5395874263261295, + "grad_norm": 0.5952722430229187, + "learning_rate": 2.4151105074532817e-06, + "loss": 0.5362, + "step": 10448 + }, + { + "epoch": 1.5397347740667975, + "grad_norm": 0.605627715587616, + "learning_rate": 2.4147230342667657e-06, + "loss": 0.5104, + "step": 10449 + }, + { + "epoch": 1.5398821218074656, + "grad_norm": 0.6000638604164124, + "learning_rate": 2.414335563131121e-06, + "loss": 0.5498, + "step": 10450 + }, + { + "epoch": 1.5400294695481336, + "grad_norm": 0.6293776631355286, + "learning_rate": 2.4139480940556654e-06, + "loss": 0.5131, + "step": 10451 + }, + { + "epoch": 1.5401768172888017, + "grad_norm": 0.6329897046089172, + "learning_rate": 2.413560627049718e-06, + "loss": 0.4874, + "step": 10452 + }, + { + "epoch": 1.5403241650294697, + "grad_norm": 0.5933842658996582, + "learning_rate": 2.413173162122597e-06, + "loss": 0.488, + "step": 10453 + }, + { + "epoch": 1.5404715127701376, + "grad_norm": 0.6165576577186584, + "learning_rate": 2.4127856992836204e-06, + "loss": 0.5189, + "step": 10454 + }, + { + "epoch": 1.5406188605108055, + "grad_norm": 0.6284331679344177, + "learning_rate": 2.412398238542107e-06, + "loss": 0.5054, + "step": 10455 + }, + { + "epoch": 1.5407662082514735, + "grad_norm": 0.6143565773963928, + "learning_rate": 2.4120107799073753e-06, + "loss": 0.5429, + "step": 10456 + }, + { + "epoch": 1.5409135559921414, + "grad_norm": 0.5738587975502014, + "learning_rate": 2.4116233233887427e-06, + "loss": 0.5069, + "step": 10457 + }, + { + "epoch": 1.5410609037328094, + "grad_norm": 0.6015323996543884, + "learning_rate": 2.411235868995528e-06, + "loss": 0.5422, + "step": 10458 + }, + { + "epoch": 1.5412082514734773, + "grad_norm": 0.5762231349945068, + "learning_rate": 2.4108484167370484e-06, + "loss": 0.5386, + "step": 10459 + }, + { + "epoch": 1.5413555992141452, + "grad_norm": 0.5917509198188782, + "learning_rate": 2.410460966622623e-06, + "loss": 0.5342, + "step": 10460 + }, + { + "epoch": 1.5415029469548134, + "grad_norm": 0.6120691895484924, + "learning_rate": 2.4100735186615695e-06, + "loss": 0.511, + "step": 10461 + }, + { + "epoch": 1.5416502946954813, + "grad_norm": 0.6014742851257324, + "learning_rate": 2.4096860728632056e-06, + "loss": 0.5201, + "step": 10462 + }, + { + "epoch": 1.5417976424361495, + "grad_norm": 0.6205368041992188, + "learning_rate": 2.409298629236849e-06, + "loss": 0.5464, + "step": 10463 + }, + { + "epoch": 1.5419449901768174, + "grad_norm": 0.5833765864372253, + "learning_rate": 2.4089111877918178e-06, + "loss": 0.5622, + "step": 10464 + }, + { + "epoch": 1.5420923379174853, + "grad_norm": 0.6443162560462952, + "learning_rate": 2.40852374853743e-06, + "loss": 0.5178, + "step": 10465 + }, + { + "epoch": 1.5422396856581533, + "grad_norm": 0.5532763600349426, + "learning_rate": 2.4081363114830036e-06, + "loss": 0.4982, + "step": 10466 + }, + { + "epoch": 1.5423870333988212, + "grad_norm": 0.5633993744850159, + "learning_rate": 2.4077488766378553e-06, + "loss": 0.5001, + "step": 10467 + }, + { + "epoch": 1.5425343811394892, + "grad_norm": 0.6109498739242554, + "learning_rate": 2.407361444011303e-06, + "loss": 0.5501, + "step": 10468 + }, + { + "epoch": 1.542681728880157, + "grad_norm": 0.6282305717468262, + "learning_rate": 2.4069740136126645e-06, + "loss": 0.5488, + "step": 10469 + }, + { + "epoch": 1.542829076620825, + "grad_norm": 0.5962167382240295, + "learning_rate": 2.4065865854512575e-06, + "loss": 0.5078, + "step": 10470 + }, + { + "epoch": 1.542976424361493, + "grad_norm": 0.5968943238258362, + "learning_rate": 2.4061991595363997e-06, + "loss": 0.5402, + "step": 10471 + }, + { + "epoch": 1.5431237721021611, + "grad_norm": 0.6174858808517456, + "learning_rate": 2.405811735877408e-06, + "loss": 0.5512, + "step": 10472 + }, + { + "epoch": 1.543271119842829, + "grad_norm": 0.5973556041717529, + "learning_rate": 2.4054243144835997e-06, + "loss": 0.5266, + "step": 10473 + }, + { + "epoch": 1.5434184675834972, + "grad_norm": 0.6103199124336243, + "learning_rate": 2.4050368953642923e-06, + "loss": 0.5348, + "step": 10474 + }, + { + "epoch": 1.5435658153241651, + "grad_norm": 0.594063937664032, + "learning_rate": 2.4046494785288035e-06, + "loss": 0.5225, + "step": 10475 + }, + { + "epoch": 1.543713163064833, + "grad_norm": 0.6133121848106384, + "learning_rate": 2.404262063986449e-06, + "loss": 0.5437, + "step": 10476 + }, + { + "epoch": 1.543860510805501, + "grad_norm": 0.5571784377098083, + "learning_rate": 2.4038746517465478e-06, + "loss": 0.5086, + "step": 10477 + }, + { + "epoch": 1.544007858546169, + "grad_norm": 0.6365574598312378, + "learning_rate": 2.403487241818416e-06, + "loss": 0.5497, + "step": 10478 + }, + { + "epoch": 1.544155206286837, + "grad_norm": 0.6071714162826538, + "learning_rate": 2.4030998342113707e-06, + "loss": 0.5532, + "step": 10479 + }, + { + "epoch": 1.5443025540275048, + "grad_norm": 0.6148251295089722, + "learning_rate": 2.402712428934729e-06, + "loss": 0.5274, + "step": 10480 + }, + { + "epoch": 1.5444499017681728, + "grad_norm": 0.6704176664352417, + "learning_rate": 2.4023250259978077e-06, + "loss": 0.5226, + "step": 10481 + }, + { + "epoch": 1.544597249508841, + "grad_norm": 0.5948224067687988, + "learning_rate": 2.4019376254099235e-06, + "loss": 0.4918, + "step": 10482 + }, + { + "epoch": 1.5447445972495089, + "grad_norm": 0.5652888417243958, + "learning_rate": 2.401550227180394e-06, + "loss": 0.4967, + "step": 10483 + }, + { + "epoch": 1.5448919449901768, + "grad_norm": 0.6257306933403015, + "learning_rate": 2.4011628313185343e-06, + "loss": 0.5373, + "step": 10484 + }, + { + "epoch": 1.545039292730845, + "grad_norm": 0.6394097805023193, + "learning_rate": 2.400775437833663e-06, + "loss": 0.5386, + "step": 10485 + }, + { + "epoch": 1.5451866404715129, + "grad_norm": 0.572754979133606, + "learning_rate": 2.400388046735096e-06, + "loss": 0.5106, + "step": 10486 + }, + { + "epoch": 1.5453339882121808, + "grad_norm": 0.5857800841331482, + "learning_rate": 2.400000658032149e-06, + "loss": 0.5192, + "step": 10487 + }, + { + "epoch": 1.5454813359528488, + "grad_norm": 0.5818976163864136, + "learning_rate": 2.39961327173414e-06, + "loss": 0.5173, + "step": 10488 + }, + { + "epoch": 1.5456286836935167, + "grad_norm": 0.610838770866394, + "learning_rate": 2.3992258878503845e-06, + "loss": 0.554, + "step": 10489 + }, + { + "epoch": 1.5457760314341846, + "grad_norm": 0.6353291273117065, + "learning_rate": 2.398838506390199e-06, + "loss": 0.5313, + "step": 10490 + }, + { + "epoch": 1.5459233791748526, + "grad_norm": 0.6485588550567627, + "learning_rate": 2.3984511273629e-06, + "loss": 0.5281, + "step": 10491 + }, + { + "epoch": 1.5460707269155205, + "grad_norm": 0.5709021687507629, + "learning_rate": 2.398063750777804e-06, + "loss": 0.5025, + "step": 10492 + }, + { + "epoch": 1.5462180746561887, + "grad_norm": 0.5981251001358032, + "learning_rate": 2.397676376644227e-06, + "loss": 0.5586, + "step": 10493 + }, + { + "epoch": 1.5463654223968566, + "grad_norm": 0.6560633182525635, + "learning_rate": 2.397289004971485e-06, + "loss": 0.5175, + "step": 10494 + }, + { + "epoch": 1.5465127701375245, + "grad_norm": 0.5983068943023682, + "learning_rate": 2.396901635768894e-06, + "loss": 0.4493, + "step": 10495 + }, + { + "epoch": 1.5466601178781927, + "grad_norm": 0.6060875058174133, + "learning_rate": 2.396514269045771e-06, + "loss": 0.5133, + "step": 10496 + }, + { + "epoch": 1.5468074656188606, + "grad_norm": 0.6366392374038696, + "learning_rate": 2.396126904811431e-06, + "loss": 0.5515, + "step": 10497 + }, + { + "epoch": 1.5469548133595286, + "grad_norm": 0.6186490654945374, + "learning_rate": 2.3957395430751896e-06, + "loss": 0.5314, + "step": 10498 + }, + { + "epoch": 1.5471021611001965, + "grad_norm": 0.5848577618598938, + "learning_rate": 2.395352183846364e-06, + "loss": 0.5477, + "step": 10499 + }, + { + "epoch": 1.5472495088408644, + "grad_norm": 0.6210240721702576, + "learning_rate": 2.3949648271342686e-06, + "loss": 0.5009, + "step": 10500 + }, + { + "epoch": 1.5473968565815324, + "grad_norm": 0.6206060647964478, + "learning_rate": 2.3945774729482203e-06, + "loss": 0.5132, + "step": 10501 + }, + { + "epoch": 1.5475442043222003, + "grad_norm": 0.6038814187049866, + "learning_rate": 2.3941901212975342e-06, + "loss": 0.5364, + "step": 10502 + }, + { + "epoch": 1.5476915520628682, + "grad_norm": 0.6375616788864136, + "learning_rate": 2.393802772191526e-06, + "loss": 0.4923, + "step": 10503 + }, + { + "epoch": 1.5478388998035364, + "grad_norm": 0.5769535303115845, + "learning_rate": 2.3934154256395118e-06, + "loss": 0.5188, + "step": 10504 + }, + { + "epoch": 1.5479862475442043, + "grad_norm": 0.6547877788543701, + "learning_rate": 2.3930280816508062e-06, + "loss": 0.5013, + "step": 10505 + }, + { + "epoch": 1.5481335952848723, + "grad_norm": 0.5864390134811401, + "learning_rate": 2.392640740234724e-06, + "loss": 0.536, + "step": 10506 + }, + { + "epoch": 1.5482809430255404, + "grad_norm": 0.5894542336463928, + "learning_rate": 2.392253401400583e-06, + "loss": 0.5505, + "step": 10507 + }, + { + "epoch": 1.5484282907662084, + "grad_norm": 0.5829493999481201, + "learning_rate": 2.3918660651576967e-06, + "loss": 0.5267, + "step": 10508 + }, + { + "epoch": 1.5485756385068763, + "grad_norm": 0.57991623878479, + "learning_rate": 2.3914787315153814e-06, + "loss": 0.5345, + "step": 10509 + }, + { + "epoch": 1.5487229862475442, + "grad_norm": 0.5677309036254883, + "learning_rate": 2.3910914004829515e-06, + "loss": 0.5199, + "step": 10510 + }, + { + "epoch": 1.5488703339882122, + "grad_norm": 0.5785871744155884, + "learning_rate": 2.3907040720697224e-06, + "loss": 0.5377, + "step": 10511 + }, + { + "epoch": 1.54901768172888, + "grad_norm": 0.5715954899787903, + "learning_rate": 2.390316746285009e-06, + "loss": 0.5342, + "step": 10512 + }, + { + "epoch": 1.549165029469548, + "grad_norm": 0.6185896396636963, + "learning_rate": 2.3899294231381264e-06, + "loss": 0.5658, + "step": 10513 + }, + { + "epoch": 1.549312377210216, + "grad_norm": 0.5857637524604797, + "learning_rate": 2.389542102638389e-06, + "loss": 0.5163, + "step": 10514 + }, + { + "epoch": 1.5494597249508841, + "grad_norm": 0.5812265872955322, + "learning_rate": 2.389154784795113e-06, + "loss": 0.5239, + "step": 10515 + }, + { + "epoch": 1.549607072691552, + "grad_norm": 0.5959134697914124, + "learning_rate": 2.3887674696176123e-06, + "loss": 0.5419, + "step": 10516 + }, + { + "epoch": 1.54975442043222, + "grad_norm": 0.6016830205917358, + "learning_rate": 2.3883801571152013e-06, + "loss": 0.5466, + "step": 10517 + }, + { + "epoch": 1.5499017681728882, + "grad_norm": 0.598275363445282, + "learning_rate": 2.3879928472971957e-06, + "loss": 0.5334, + "step": 10518 + }, + { + "epoch": 1.550049115913556, + "grad_norm": 0.5951681733131409, + "learning_rate": 2.387605540172909e-06, + "loss": 0.5421, + "step": 10519 + }, + { + "epoch": 1.550196463654224, + "grad_norm": 0.5777075290679932, + "learning_rate": 2.387218235751657e-06, + "loss": 0.5075, + "step": 10520 + }, + { + "epoch": 1.550343811394892, + "grad_norm": 0.6792351007461548, + "learning_rate": 2.386830934042753e-06, + "loss": 0.5146, + "step": 10521 + }, + { + "epoch": 1.55049115913556, + "grad_norm": 0.6113483309745789, + "learning_rate": 2.386443635055512e-06, + "loss": 0.5331, + "step": 10522 + }, + { + "epoch": 1.5506385068762278, + "grad_norm": 0.603032648563385, + "learning_rate": 2.386056338799249e-06, + "loss": 0.4934, + "step": 10523 + }, + { + "epoch": 1.5507858546168958, + "grad_norm": 0.6211456060409546, + "learning_rate": 2.385669045283277e-06, + "loss": 0.5415, + "step": 10524 + }, + { + "epoch": 1.5509332023575637, + "grad_norm": 0.5929624438285828, + "learning_rate": 2.385281754516911e-06, + "loss": 0.496, + "step": 10525 + }, + { + "epoch": 1.5510805500982319, + "grad_norm": 0.6098231077194214, + "learning_rate": 2.3848944665094648e-06, + "loss": 0.5235, + "step": 10526 + }, + { + "epoch": 1.5512278978388998, + "grad_norm": 0.6386737823486328, + "learning_rate": 2.384507181270253e-06, + "loss": 0.5269, + "step": 10527 + }, + { + "epoch": 1.5513752455795677, + "grad_norm": 0.6013073325157166, + "learning_rate": 2.384119898808589e-06, + "loss": 0.4947, + "step": 10528 + }, + { + "epoch": 1.551522593320236, + "grad_norm": 0.5992372035980225, + "learning_rate": 2.383732619133787e-06, + "loss": 0.5398, + "step": 10529 + }, + { + "epoch": 1.5516699410609038, + "grad_norm": 0.5895220637321472, + "learning_rate": 2.3833453422551615e-06, + "loss": 0.5326, + "step": 10530 + }, + { + "epoch": 1.5518172888015718, + "grad_norm": 0.583000898361206, + "learning_rate": 2.3829580681820255e-06, + "loss": 0.4928, + "step": 10531 + }, + { + "epoch": 1.5519646365422397, + "grad_norm": 0.6192269325256348, + "learning_rate": 2.382570796923693e-06, + "loss": 0.5511, + "step": 10532 + }, + { + "epoch": 1.5521119842829076, + "grad_norm": 0.6301325559616089, + "learning_rate": 2.3821835284894777e-06, + "loss": 0.5009, + "step": 10533 + }, + { + "epoch": 1.5522593320235756, + "grad_norm": 0.58245849609375, + "learning_rate": 2.381796262888694e-06, + "loss": 0.5271, + "step": 10534 + }, + { + "epoch": 1.5524066797642435, + "grad_norm": 0.6137112975120544, + "learning_rate": 2.3814090001306538e-06, + "loss": 0.5386, + "step": 10535 + }, + { + "epoch": 1.5525540275049114, + "grad_norm": 0.5932170152664185, + "learning_rate": 2.381021740224672e-06, + "loss": 0.4908, + "step": 10536 + }, + { + "epoch": 1.5527013752455796, + "grad_norm": 0.5983676910400391, + "learning_rate": 2.3806344831800614e-06, + "loss": 0.5194, + "step": 10537 + }, + { + "epoch": 1.5528487229862475, + "grad_norm": 0.6405593156814575, + "learning_rate": 2.3802472290061353e-06, + "loss": 0.5282, + "step": 10538 + }, + { + "epoch": 1.5529960707269155, + "grad_norm": 0.57049959897995, + "learning_rate": 2.3798599777122076e-06, + "loss": 0.5419, + "step": 10539 + }, + { + "epoch": 1.5531434184675836, + "grad_norm": 0.5762197375297546, + "learning_rate": 2.379472729307591e-06, + "loss": 0.5094, + "step": 10540 + }, + { + "epoch": 1.5532907662082516, + "grad_norm": 0.5953972935676575, + "learning_rate": 2.379085483801599e-06, + "loss": 0.534, + "step": 10541 + }, + { + "epoch": 1.5534381139489195, + "grad_norm": 0.596409261226654, + "learning_rate": 2.3786982412035438e-06, + "loss": 0.5527, + "step": 10542 + }, + { + "epoch": 1.5535854616895874, + "grad_norm": 0.630831241607666, + "learning_rate": 2.3783110015227385e-06, + "loss": 0.4879, + "step": 10543 + }, + { + "epoch": 1.5537328094302554, + "grad_norm": 0.6045762896537781, + "learning_rate": 2.3779237647684973e-06, + "loss": 0.5246, + "step": 10544 + }, + { + "epoch": 1.5538801571709233, + "grad_norm": 0.573896586894989, + "learning_rate": 2.3775365309501324e-06, + "loss": 0.564, + "step": 10545 + }, + { + "epoch": 1.5540275049115913, + "grad_norm": 0.590350329875946, + "learning_rate": 2.377149300076956e-06, + "loss": 0.532, + "step": 10546 + }, + { + "epoch": 1.5541748526522592, + "grad_norm": 0.6213998794555664, + "learning_rate": 2.376762072158282e-06, + "loss": 0.5475, + "step": 10547 + }, + { + "epoch": 1.5543222003929273, + "grad_norm": 0.6028419137001038, + "learning_rate": 2.376374847203422e-06, + "loss": 0.5301, + "step": 10548 + }, + { + "epoch": 1.5544695481335953, + "grad_norm": 0.5963233709335327, + "learning_rate": 2.375987625221689e-06, + "loss": 0.5722, + "step": 10549 + }, + { + "epoch": 1.5546168958742632, + "grad_norm": 0.5977103114128113, + "learning_rate": 2.375600406222395e-06, + "loss": 0.511, + "step": 10550 + }, + { + "epoch": 1.5547642436149314, + "grad_norm": 0.5584907531738281, + "learning_rate": 2.3752131902148532e-06, + "loss": 0.499, + "step": 10551 + }, + { + "epoch": 1.5549115913555993, + "grad_norm": 0.622757613658905, + "learning_rate": 2.374825977208376e-06, + "loss": 0.561, + "step": 10552 + }, + { + "epoch": 1.5550589390962672, + "grad_norm": 0.6055571436882019, + "learning_rate": 2.374438767212275e-06, + "loss": 0.5604, + "step": 10553 + }, + { + "epoch": 1.5552062868369352, + "grad_norm": 0.5779094099998474, + "learning_rate": 2.374051560235863e-06, + "loss": 0.5232, + "step": 10554 + }, + { + "epoch": 1.5553536345776031, + "grad_norm": 0.5988250970840454, + "learning_rate": 2.3736643562884517e-06, + "loss": 0.5452, + "step": 10555 + }, + { + "epoch": 1.555500982318271, + "grad_norm": 0.6237763166427612, + "learning_rate": 2.3732771553793535e-06, + "loss": 0.5449, + "step": 10556 + }, + { + "epoch": 1.555648330058939, + "grad_norm": 0.6034322381019592, + "learning_rate": 2.3728899575178802e-06, + "loss": 0.5295, + "step": 10557 + }, + { + "epoch": 1.555795677799607, + "grad_norm": 0.6073933839797974, + "learning_rate": 2.372502762713344e-06, + "loss": 0.5142, + "step": 10558 + }, + { + "epoch": 1.555943025540275, + "grad_norm": 0.5980381965637207, + "learning_rate": 2.3721155709750565e-06, + "loss": 0.5589, + "step": 10559 + }, + { + "epoch": 1.556090373280943, + "grad_norm": 0.6379940509796143, + "learning_rate": 2.3717283823123293e-06, + "loss": 0.5648, + "step": 10560 + }, + { + "epoch": 1.556237721021611, + "grad_norm": 0.6364835500717163, + "learning_rate": 2.3713411967344746e-06, + "loss": 0.5284, + "step": 10561 + }, + { + "epoch": 1.5563850687622791, + "grad_norm": 0.6101773977279663, + "learning_rate": 2.3709540142508036e-06, + "loss": 0.4926, + "step": 10562 + }, + { + "epoch": 1.556532416502947, + "grad_norm": 0.602307140827179, + "learning_rate": 2.3705668348706283e-06, + "loss": 0.5217, + "step": 10563 + }, + { + "epoch": 1.556679764243615, + "grad_norm": 0.6260330677032471, + "learning_rate": 2.3701796586032593e-06, + "loss": 0.5212, + "step": 10564 + }, + { + "epoch": 1.556827111984283, + "grad_norm": 0.6148066520690918, + "learning_rate": 2.369792485458009e-06, + "loss": 0.5153, + "step": 10565 + }, + { + "epoch": 1.5569744597249509, + "grad_norm": 0.6121525764465332, + "learning_rate": 2.3694053154441884e-06, + "loss": 0.514, + "step": 10566 + }, + { + "epoch": 1.5571218074656188, + "grad_norm": 0.5920483469963074, + "learning_rate": 2.3690181485711084e-06, + "loss": 0.5418, + "step": 10567 + }, + { + "epoch": 1.5572691552062867, + "grad_norm": 0.6212109327316284, + "learning_rate": 2.368630984848081e-06, + "loss": 0.4933, + "step": 10568 + }, + { + "epoch": 1.5574165029469547, + "grad_norm": 0.6437981128692627, + "learning_rate": 2.3682438242844165e-06, + "loss": 0.509, + "step": 10569 + }, + { + "epoch": 1.5575638506876228, + "grad_norm": 0.5802285075187683, + "learning_rate": 2.367856666889426e-06, + "loss": 0.5213, + "step": 10570 + }, + { + "epoch": 1.5577111984282908, + "grad_norm": 0.5986752510070801, + "learning_rate": 2.367469512672421e-06, + "loss": 0.5158, + "step": 10571 + }, + { + "epoch": 1.5578585461689587, + "grad_norm": 0.6260605454444885, + "learning_rate": 2.367082361642712e-06, + "loss": 0.5076, + "step": 10572 + }, + { + "epoch": 1.5580058939096268, + "grad_norm": 0.5743899345397949, + "learning_rate": 2.36669521380961e-06, + "loss": 0.5013, + "step": 10573 + }, + { + "epoch": 1.5581532416502948, + "grad_norm": 0.6333783268928528, + "learning_rate": 2.3663080691824255e-06, + "loss": 0.5211, + "step": 10574 + }, + { + "epoch": 1.5583005893909627, + "grad_norm": 0.576344907283783, + "learning_rate": 2.365920927770469e-06, + "loss": 0.5575, + "step": 10575 + }, + { + "epoch": 1.5584479371316307, + "grad_norm": 0.5797743201255798, + "learning_rate": 2.3655337895830514e-06, + "loss": 0.549, + "step": 10576 + }, + { + "epoch": 1.5585952848722986, + "grad_norm": 0.6220982074737549, + "learning_rate": 2.3651466546294836e-06, + "loss": 0.5127, + "step": 10577 + }, + { + "epoch": 1.5587426326129665, + "grad_norm": 0.6005342602729797, + "learning_rate": 2.364759522919075e-06, + "loss": 0.5114, + "step": 10578 + }, + { + "epoch": 1.5588899803536345, + "grad_norm": 0.6307000517845154, + "learning_rate": 2.364372394461136e-06, + "loss": 0.5154, + "step": 10579 + }, + { + "epoch": 1.5590373280943024, + "grad_norm": 0.5778676271438599, + "learning_rate": 2.3639852692649777e-06, + "loss": 0.5109, + "step": 10580 + }, + { + "epoch": 1.5591846758349706, + "grad_norm": 0.6142286062240601, + "learning_rate": 2.36359814733991e-06, + "loss": 0.5106, + "step": 10581 + }, + { + "epoch": 1.5593320235756385, + "grad_norm": 0.6064006090164185, + "learning_rate": 2.363211028695243e-06, + "loss": 0.5681, + "step": 10582 + }, + { + "epoch": 1.5594793713163064, + "grad_norm": 0.614570677280426, + "learning_rate": 2.3628239133402865e-06, + "loss": 0.4908, + "step": 10583 + }, + { + "epoch": 1.5596267190569746, + "grad_norm": 0.5967398285865784, + "learning_rate": 2.3624368012843502e-06, + "loss": 0.5136, + "step": 10584 + }, + { + "epoch": 1.5597740667976425, + "grad_norm": 0.5898983478546143, + "learning_rate": 2.3620496925367447e-06, + "loss": 0.538, + "step": 10585 + }, + { + "epoch": 1.5599214145383105, + "grad_norm": 0.5851373672485352, + "learning_rate": 2.3616625871067796e-06, + "loss": 0.5347, + "step": 10586 + }, + { + "epoch": 1.5600687622789784, + "grad_norm": 0.6108033061027527, + "learning_rate": 2.361275485003764e-06, + "loss": 0.5269, + "step": 10587 + }, + { + "epoch": 1.5602161100196463, + "grad_norm": 0.5930831432342529, + "learning_rate": 2.360888386237008e-06, + "loss": 0.4861, + "step": 10588 + }, + { + "epoch": 1.5603634577603143, + "grad_norm": 0.5859571695327759, + "learning_rate": 2.3605012908158214e-06, + "loss": 0.5346, + "step": 10589 + }, + { + "epoch": 1.5605108055009822, + "grad_norm": 0.6090549230575562, + "learning_rate": 2.360114198749513e-06, + "loss": 0.5182, + "step": 10590 + }, + { + "epoch": 1.5606581532416501, + "grad_norm": 0.5914034843444824, + "learning_rate": 2.3597271100473923e-06, + "loss": 0.5239, + "step": 10591 + }, + { + "epoch": 1.5608055009823183, + "grad_norm": 0.5769892334938049, + "learning_rate": 2.3593400247187692e-06, + "loss": 0.5243, + "step": 10592 + }, + { + "epoch": 1.5609528487229862, + "grad_norm": 0.5930962562561035, + "learning_rate": 2.3589529427729523e-06, + "loss": 0.5176, + "step": 10593 + }, + { + "epoch": 1.5611001964636544, + "grad_norm": 0.613606333732605, + "learning_rate": 2.358565864219251e-06, + "loss": 0.5353, + "step": 10594 + }, + { + "epoch": 1.5612475442043223, + "grad_norm": 0.5808413624763489, + "learning_rate": 2.3581787890669742e-06, + "loss": 0.5084, + "step": 10595 + }, + { + "epoch": 1.5613948919449903, + "grad_norm": 0.5866539478302002, + "learning_rate": 2.3577917173254314e-06, + "loss": 0.5435, + "step": 10596 + }, + { + "epoch": 1.5615422396856582, + "grad_norm": 0.6110586524009705, + "learning_rate": 2.357404649003931e-06, + "loss": 0.514, + "step": 10597 + }, + { + "epoch": 1.5616895874263261, + "grad_norm": 0.5784856677055359, + "learning_rate": 2.357017584111782e-06, + "loss": 0.5346, + "step": 10598 + }, + { + "epoch": 1.561836935166994, + "grad_norm": 0.5957366228103638, + "learning_rate": 2.3566305226582924e-06, + "loss": 0.5229, + "step": 10599 + }, + { + "epoch": 1.561984282907662, + "grad_norm": 0.6105712056159973, + "learning_rate": 2.356243464652772e-06, + "loss": 0.4965, + "step": 10600 + }, + { + "epoch": 1.56213163064833, + "grad_norm": 0.5287002921104431, + "learning_rate": 2.3558564101045288e-06, + "loss": 0.5067, + "step": 10601 + }, + { + "epoch": 1.5622789783889979, + "grad_norm": 0.5756508111953735, + "learning_rate": 2.3554693590228712e-06, + "loss": 0.5416, + "step": 10602 + }, + { + "epoch": 1.562426326129666, + "grad_norm": 0.6004253625869751, + "learning_rate": 2.355082311417108e-06, + "loss": 0.5027, + "step": 10603 + }, + { + "epoch": 1.562573673870334, + "grad_norm": 0.6469578146934509, + "learning_rate": 2.354695267296547e-06, + "loss": 0.5236, + "step": 10604 + }, + { + "epoch": 1.5627210216110021, + "grad_norm": 0.5948167443275452, + "learning_rate": 2.3543082266704965e-06, + "loss": 0.5197, + "step": 10605 + }, + { + "epoch": 1.56286836935167, + "grad_norm": 0.5856058597564697, + "learning_rate": 2.353921189548265e-06, + "loss": 0.507, + "step": 10606 + }, + { + "epoch": 1.563015717092338, + "grad_norm": 0.600296139717102, + "learning_rate": 2.353534155939161e-06, + "loss": 0.5338, + "step": 10607 + }, + { + "epoch": 1.563163064833006, + "grad_norm": 0.7305501103401184, + "learning_rate": 2.3531471258524914e-06, + "loss": 0.5611, + "step": 10608 + }, + { + "epoch": 1.5633104125736739, + "grad_norm": 0.6128466129302979, + "learning_rate": 2.3527600992975647e-06, + "loss": 0.5416, + "step": 10609 + }, + { + "epoch": 1.5634577603143418, + "grad_norm": 0.590783417224884, + "learning_rate": 2.352373076283688e-06, + "loss": 0.571, + "step": 10610 + }, + { + "epoch": 1.5636051080550097, + "grad_norm": 0.6036388874053955, + "learning_rate": 2.35198605682017e-06, + "loss": 0.5039, + "step": 10611 + }, + { + "epoch": 1.5637524557956777, + "grad_norm": 0.5948769450187683, + "learning_rate": 2.3515990409163177e-06, + "loss": 0.5435, + "step": 10612 + }, + { + "epoch": 1.5638998035363456, + "grad_norm": 0.6038820147514343, + "learning_rate": 2.351212028581439e-06, + "loss": 0.4955, + "step": 10613 + }, + { + "epoch": 1.5640471512770138, + "grad_norm": 0.6267384886741638, + "learning_rate": 2.350825019824841e-06, + "loss": 0.5395, + "step": 10614 + }, + { + "epoch": 1.5641944990176817, + "grad_norm": 0.5974903702735901, + "learning_rate": 2.350438014655832e-06, + "loss": 0.539, + "step": 10615 + }, + { + "epoch": 1.5643418467583499, + "grad_norm": 0.5616816878318787, + "learning_rate": 2.3500510130837167e-06, + "loss": 0.5347, + "step": 10616 + }, + { + "epoch": 1.5644891944990178, + "grad_norm": 0.6474602222442627, + "learning_rate": 2.3496640151178055e-06, + "loss": 0.565, + "step": 10617 + }, + { + "epoch": 1.5646365422396857, + "grad_norm": 0.5879522562026978, + "learning_rate": 2.3492770207674044e-06, + "loss": 0.5427, + "step": 10618 + }, + { + "epoch": 1.5647838899803537, + "grad_norm": 0.6099388599395752, + "learning_rate": 2.34889003004182e-06, + "loss": 0.4937, + "step": 10619 + }, + { + "epoch": 1.5649312377210216, + "grad_norm": 0.6023955941200256, + "learning_rate": 2.34850304295036e-06, + "loss": 0.4956, + "step": 10620 + }, + { + "epoch": 1.5650785854616895, + "grad_norm": 0.5808203220367432, + "learning_rate": 2.34811605950233e-06, + "loss": 0.546, + "step": 10621 + }, + { + "epoch": 1.5652259332023575, + "grad_norm": 0.5937490463256836, + "learning_rate": 2.347729079707038e-06, + "loss": 0.5119, + "step": 10622 + }, + { + "epoch": 1.5653732809430254, + "grad_norm": 0.5838586091995239, + "learning_rate": 2.34734210357379e-06, + "loss": 0.5288, + "step": 10623 + }, + { + "epoch": 1.5655206286836936, + "grad_norm": 0.5782347321510315, + "learning_rate": 2.346955131111893e-06, + "loss": 0.5332, + "step": 10624 + }, + { + "epoch": 1.5656679764243615, + "grad_norm": 0.6086188554763794, + "learning_rate": 2.346568162330654e-06, + "loss": 0.5384, + "step": 10625 + }, + { + "epoch": 1.5658153241650294, + "grad_norm": 0.616266667842865, + "learning_rate": 2.3461811972393782e-06, + "loss": 0.5096, + "step": 10626 + }, + { + "epoch": 1.5659626719056976, + "grad_norm": 0.6295241713523865, + "learning_rate": 2.345794235847372e-06, + "loss": 0.5305, + "step": 10627 + }, + { + "epoch": 1.5661100196463655, + "grad_norm": 0.6239032745361328, + "learning_rate": 2.3454072781639423e-06, + "loss": 0.5503, + "step": 10628 + }, + { + "epoch": 1.5662573673870335, + "grad_norm": 0.6058738827705383, + "learning_rate": 2.345020324198395e-06, + "loss": 0.5036, + "step": 10629 + }, + { + "epoch": 1.5664047151277014, + "grad_norm": 0.5896150469779968, + "learning_rate": 2.3446333739600367e-06, + "loss": 0.5244, + "step": 10630 + }, + { + "epoch": 1.5665520628683693, + "grad_norm": 0.6289317011833191, + "learning_rate": 2.3442464274581723e-06, + "loss": 0.5465, + "step": 10631 + }, + { + "epoch": 1.5666994106090373, + "grad_norm": 0.5682757496833801, + "learning_rate": 2.3438594847021087e-06, + "loss": 0.5169, + "step": 10632 + }, + { + "epoch": 1.5668467583497052, + "grad_norm": 0.6160773634910583, + "learning_rate": 2.343472545701151e-06, + "loss": 0.5359, + "step": 10633 + }, + { + "epoch": 1.5669941060903732, + "grad_norm": 0.6095417141914368, + "learning_rate": 2.3430856104646054e-06, + "loss": 0.5321, + "step": 10634 + }, + { + "epoch": 1.5671414538310413, + "grad_norm": 0.5771042704582214, + "learning_rate": 2.342698679001777e-06, + "loss": 0.5129, + "step": 10635 + }, + { + "epoch": 1.5672888015717092, + "grad_norm": 0.6112717986106873, + "learning_rate": 2.3423117513219716e-06, + "loss": 0.5233, + "step": 10636 + }, + { + "epoch": 1.5674361493123772, + "grad_norm": 0.6054883599281311, + "learning_rate": 2.341924827434495e-06, + "loss": 0.5286, + "step": 10637 + }, + { + "epoch": 1.5675834970530453, + "grad_norm": 0.6945099234580994, + "learning_rate": 2.3415379073486514e-06, + "loss": 0.5237, + "step": 10638 + }, + { + "epoch": 1.5677308447937133, + "grad_norm": 0.6029690504074097, + "learning_rate": 2.3411509910737473e-06, + "loss": 0.5386, + "step": 10639 + }, + { + "epoch": 1.5678781925343812, + "grad_norm": 0.5873031616210938, + "learning_rate": 2.340764078619087e-06, + "loss": 0.5344, + "step": 10640 + }, + { + "epoch": 1.5680255402750491, + "grad_norm": 0.5914747714996338, + "learning_rate": 2.3403771699939764e-06, + "loss": 0.5456, + "step": 10641 + }, + { + "epoch": 1.568172888015717, + "grad_norm": 0.5816425085067749, + "learning_rate": 2.3399902652077198e-06, + "loss": 0.5077, + "step": 10642 + }, + { + "epoch": 1.568320235756385, + "grad_norm": 0.5944057703018188, + "learning_rate": 2.3396033642696224e-06, + "loss": 0.5394, + "step": 10643 + }, + { + "epoch": 1.568467583497053, + "grad_norm": 0.600948691368103, + "learning_rate": 2.339216467188989e-06, + "loss": 0.522, + "step": 10644 + }, + { + "epoch": 1.5686149312377209, + "grad_norm": 0.5986623167991638, + "learning_rate": 2.3388295739751238e-06, + "loss": 0.5268, + "step": 10645 + }, + { + "epoch": 1.568762278978389, + "grad_norm": 0.6158726811408997, + "learning_rate": 2.338442684637332e-06, + "loss": 0.5235, + "step": 10646 + }, + { + "epoch": 1.568909626719057, + "grad_norm": 0.5837901830673218, + "learning_rate": 2.3380557991849174e-06, + "loss": 0.5204, + "step": 10647 + }, + { + "epoch": 1.569056974459725, + "grad_norm": 0.6136919856071472, + "learning_rate": 2.3376689176271854e-06, + "loss": 0.5191, + "step": 10648 + }, + { + "epoch": 1.569204322200393, + "grad_norm": 0.5920243859291077, + "learning_rate": 2.3372820399734393e-06, + "loss": 0.5352, + "step": 10649 + }, + { + "epoch": 1.569351669941061, + "grad_norm": 0.6259016394615173, + "learning_rate": 2.3368951662329845e-06, + "loss": 0.5095, + "step": 10650 + }, + { + "epoch": 1.569499017681729, + "grad_norm": 0.5850388407707214, + "learning_rate": 2.336508296415124e-06, + "loss": 0.5168, + "step": 10651 + }, + { + "epoch": 1.5696463654223969, + "grad_norm": 0.6174200773239136, + "learning_rate": 2.3361214305291622e-06, + "loss": 0.5566, + "step": 10652 + }, + { + "epoch": 1.5697937131630648, + "grad_norm": 0.6273850202560425, + "learning_rate": 2.3357345685844027e-06, + "loss": 0.531, + "step": 10653 + }, + { + "epoch": 1.5699410609037328, + "grad_norm": 0.6069431900978088, + "learning_rate": 2.3353477105901503e-06, + "loss": 0.5036, + "step": 10654 + }, + { + "epoch": 1.5700884086444007, + "grad_norm": 0.6120507717132568, + "learning_rate": 2.3349608565557086e-06, + "loss": 0.5362, + "step": 10655 + }, + { + "epoch": 1.5702357563850686, + "grad_norm": 0.5514083504676819, + "learning_rate": 2.3345740064903804e-06, + "loss": 0.5204, + "step": 10656 + }, + { + "epoch": 1.5703831041257368, + "grad_norm": 0.5975586771965027, + "learning_rate": 2.3341871604034698e-06, + "loss": 0.5225, + "step": 10657 + }, + { + "epoch": 1.5705304518664047, + "grad_norm": 0.6081207990646362, + "learning_rate": 2.3338003183042806e-06, + "loss": 0.5325, + "step": 10658 + }, + { + "epoch": 1.5706777996070727, + "grad_norm": 0.5904489159584045, + "learning_rate": 2.3334134802021154e-06, + "loss": 0.5112, + "step": 10659 + }, + { + "epoch": 1.5708251473477408, + "grad_norm": 0.6060272455215454, + "learning_rate": 2.333026646106278e-06, + "loss": 0.5371, + "step": 10660 + }, + { + "epoch": 1.5709724950884087, + "grad_norm": 0.5769744515419006, + "learning_rate": 2.3326398160260716e-06, + "loss": 0.5191, + "step": 10661 + }, + { + "epoch": 1.5711198428290767, + "grad_norm": 0.6072629690170288, + "learning_rate": 2.332252989970799e-06, + "loss": 0.5384, + "step": 10662 + }, + { + "epoch": 1.5712671905697446, + "grad_norm": 0.5681524872779846, + "learning_rate": 2.3318661679497635e-06, + "loss": 0.524, + "step": 10663 + }, + { + "epoch": 1.5714145383104126, + "grad_norm": 0.6276409029960632, + "learning_rate": 2.331479349972267e-06, + "loss": 0.5121, + "step": 10664 + }, + { + "epoch": 1.5715618860510805, + "grad_norm": 0.6324780583381653, + "learning_rate": 2.3310925360476143e-06, + "loss": 0.51, + "step": 10665 + }, + { + "epoch": 1.5717092337917484, + "grad_norm": 0.5860362648963928, + "learning_rate": 2.330705726185106e-06, + "loss": 0.5296, + "step": 10666 + }, + { + "epoch": 1.5718565815324164, + "grad_norm": 0.5887131094932556, + "learning_rate": 2.330318920394046e-06, + "loss": 0.5277, + "step": 10667 + }, + { + "epoch": 1.5720039292730845, + "grad_norm": 0.581780195236206, + "learning_rate": 2.329932118683736e-06, + "loss": 0.517, + "step": 10668 + }, + { + "epoch": 1.5721512770137525, + "grad_norm": 0.5689706802368164, + "learning_rate": 2.3295453210634795e-06, + "loss": 0.5162, + "step": 10669 + }, + { + "epoch": 1.5722986247544204, + "grad_norm": 0.5929564833641052, + "learning_rate": 2.329158527542577e-06, + "loss": 0.5364, + "step": 10670 + }, + { + "epoch": 1.5724459724950886, + "grad_norm": 0.6088452339172363, + "learning_rate": 2.3287717381303328e-06, + "loss": 0.567, + "step": 10671 + }, + { + "epoch": 1.5725933202357565, + "grad_norm": 0.6076845526695251, + "learning_rate": 2.3283849528360474e-06, + "loss": 0.5284, + "step": 10672 + }, + { + "epoch": 1.5727406679764244, + "grad_norm": 0.6697649359703064, + "learning_rate": 2.3279981716690237e-06, + "loss": 0.5063, + "step": 10673 + }, + { + "epoch": 1.5728880157170924, + "grad_norm": 0.6405940055847168, + "learning_rate": 2.3276113946385637e-06, + "loss": 0.5242, + "step": 10674 + }, + { + "epoch": 1.5730353634577603, + "grad_norm": 0.585854172706604, + "learning_rate": 2.3272246217539684e-06, + "loss": 0.5191, + "step": 10675 + }, + { + "epoch": 1.5731827111984282, + "grad_norm": 0.6230186223983765, + "learning_rate": 2.3268378530245397e-06, + "loss": 0.5279, + "step": 10676 + }, + { + "epoch": 1.5733300589390962, + "grad_norm": 0.6168109774589539, + "learning_rate": 2.3264510884595794e-06, + "loss": 0.559, + "step": 10677 + }, + { + "epoch": 1.573477406679764, + "grad_norm": 0.5646570324897766, + "learning_rate": 2.3260643280683893e-06, + "loss": 0.4935, + "step": 10678 + }, + { + "epoch": 1.5736247544204323, + "grad_norm": 0.557883620262146, + "learning_rate": 2.3256775718602705e-06, + "loss": 0.4915, + "step": 10679 + }, + { + "epoch": 1.5737721021611002, + "grad_norm": 0.6532228589057922, + "learning_rate": 2.325290819844524e-06, + "loss": 0.5286, + "step": 10680 + }, + { + "epoch": 1.5739194499017681, + "grad_norm": 0.6268886923789978, + "learning_rate": 2.3249040720304516e-06, + "loss": 0.4998, + "step": 10681 + }, + { + "epoch": 1.5740667976424363, + "grad_norm": 0.5977069735527039, + "learning_rate": 2.324517328427354e-06, + "loss": 0.5332, + "step": 10682 + }, + { + "epoch": 1.5742141453831042, + "grad_norm": 0.6116479635238647, + "learning_rate": 2.324130589044532e-06, + "loss": 0.4806, + "step": 10683 + }, + { + "epoch": 1.5743614931237722, + "grad_norm": 0.5801721215248108, + "learning_rate": 2.3237438538912875e-06, + "loss": 0.5166, + "step": 10684 + }, + { + "epoch": 1.57450884086444, + "grad_norm": 0.6011000871658325, + "learning_rate": 2.3233571229769205e-06, + "loss": 0.5379, + "step": 10685 + }, + { + "epoch": 1.574656188605108, + "grad_norm": 0.5982815623283386, + "learning_rate": 2.322970396310732e-06, + "loss": 0.4954, + "step": 10686 + }, + { + "epoch": 1.574803536345776, + "grad_norm": 0.6071864366531372, + "learning_rate": 2.322583673902022e-06, + "loss": 0.5211, + "step": 10687 + }, + { + "epoch": 1.574950884086444, + "grad_norm": 0.6062960624694824, + "learning_rate": 2.3221969557600914e-06, + "loss": 0.5268, + "step": 10688 + }, + { + "epoch": 1.5750982318271118, + "grad_norm": 0.5671746134757996, + "learning_rate": 2.321810241894241e-06, + "loss": 0.5127, + "step": 10689 + }, + { + "epoch": 1.57524557956778, + "grad_norm": 0.6327191591262817, + "learning_rate": 2.3214235323137695e-06, + "loss": 0.5341, + "step": 10690 + }, + { + "epoch": 1.575392927308448, + "grad_norm": 0.6594485640525818, + "learning_rate": 2.3210368270279794e-06, + "loss": 0.543, + "step": 10691 + }, + { + "epoch": 1.5755402750491159, + "grad_norm": 0.6067283749580383, + "learning_rate": 2.32065012604617e-06, + "loss": 0.5492, + "step": 10692 + }, + { + "epoch": 1.575687622789784, + "grad_norm": 0.6378638744354248, + "learning_rate": 2.3202634293776406e-06, + "loss": 0.537, + "step": 10693 + }, + { + "epoch": 1.575834970530452, + "grad_norm": 0.6017686724662781, + "learning_rate": 2.319876737031691e-06, + "loss": 0.5175, + "step": 10694 + }, + { + "epoch": 1.57598231827112, + "grad_norm": 0.5969119668006897, + "learning_rate": 2.3194900490176223e-06, + "loss": 0.5416, + "step": 10695 + }, + { + "epoch": 1.5761296660117878, + "grad_norm": 0.6249895691871643, + "learning_rate": 2.3191033653447324e-06, + "loss": 0.5112, + "step": 10696 + }, + { + "epoch": 1.5762770137524558, + "grad_norm": 0.6073744297027588, + "learning_rate": 2.3187166860223224e-06, + "loss": 0.5137, + "step": 10697 + }, + { + "epoch": 1.5764243614931237, + "grad_norm": 0.6058841347694397, + "learning_rate": 2.318330011059691e-06, + "loss": 0.4948, + "step": 10698 + }, + { + "epoch": 1.5765717092337916, + "grad_norm": 0.5922934412956238, + "learning_rate": 2.317943340466137e-06, + "loss": 0.5162, + "step": 10699 + }, + { + "epoch": 1.5767190569744596, + "grad_norm": 0.6371204257011414, + "learning_rate": 2.3175566742509604e-06, + "loss": 0.5457, + "step": 10700 + }, + { + "epoch": 1.5768664047151277, + "grad_norm": 0.5701794624328613, + "learning_rate": 2.3171700124234607e-06, + "loss": 0.5389, + "step": 10701 + }, + { + "epoch": 1.5770137524557957, + "grad_norm": 0.5907616019248962, + "learning_rate": 2.316783354992936e-06, + "loss": 0.5344, + "step": 10702 + }, + { + "epoch": 1.5771611001964636, + "grad_norm": 0.5864935517311096, + "learning_rate": 2.3163967019686857e-06, + "loss": 0.4972, + "step": 10703 + }, + { + "epoch": 1.5773084479371318, + "grad_norm": 0.6154171228408813, + "learning_rate": 2.3160100533600088e-06, + "loss": 0.529, + "step": 10704 + }, + { + "epoch": 1.5774557956777997, + "grad_norm": 0.6274032592773438, + "learning_rate": 2.315623409176204e-06, + "loss": 0.524, + "step": 10705 + }, + { + "epoch": 1.5776031434184676, + "grad_norm": 0.6115827560424805, + "learning_rate": 2.3152367694265693e-06, + "loss": 0.5039, + "step": 10706 + }, + { + "epoch": 1.5777504911591356, + "grad_norm": 0.626069962978363, + "learning_rate": 2.314850134120404e-06, + "loss": 0.5207, + "step": 10707 + }, + { + "epoch": 1.5778978388998035, + "grad_norm": 0.5912005305290222, + "learning_rate": 2.314463503267006e-06, + "loss": 0.5276, + "step": 10708 + }, + { + "epoch": 1.5780451866404714, + "grad_norm": 0.5755892395973206, + "learning_rate": 2.3140768768756736e-06, + "loss": 0.5543, + "step": 10709 + }, + { + "epoch": 1.5781925343811394, + "grad_norm": 0.5662397146224976, + "learning_rate": 2.313690254955705e-06, + "loss": 0.5326, + "step": 10710 + }, + { + "epoch": 1.5783398821218073, + "grad_norm": 0.6059046387672424, + "learning_rate": 2.313303637516399e-06, + "loss": 0.5253, + "step": 10711 + }, + { + "epoch": 1.5784872298624755, + "grad_norm": 0.5834491848945618, + "learning_rate": 2.3129170245670524e-06, + "loss": 0.5135, + "step": 10712 + }, + { + "epoch": 1.5786345776031434, + "grad_norm": 0.6021634340286255, + "learning_rate": 2.312530416116964e-06, + "loss": 0.521, + "step": 10713 + }, + { + "epoch": 1.5787819253438113, + "grad_norm": 0.6058568358421326, + "learning_rate": 2.3121438121754306e-06, + "loss": 0.5196, + "step": 10714 + }, + { + "epoch": 1.5789292730844795, + "grad_norm": 0.5734556913375854, + "learning_rate": 2.311757212751751e-06, + "loss": 0.5264, + "step": 10715 + }, + { + "epoch": 1.5790766208251474, + "grad_norm": 0.5840901732444763, + "learning_rate": 2.3113706178552217e-06, + "loss": 0.5229, + "step": 10716 + }, + { + "epoch": 1.5792239685658154, + "grad_norm": 1.1190557479858398, + "learning_rate": 2.3109840274951405e-06, + "loss": 0.5297, + "step": 10717 + }, + { + "epoch": 1.5793713163064833, + "grad_norm": 0.5976383090019226, + "learning_rate": 2.310597441680805e-06, + "loss": 0.5497, + "step": 10718 + }, + { + "epoch": 1.5795186640471512, + "grad_norm": 0.5996979475021362, + "learning_rate": 2.3102108604215123e-06, + "loss": 0.519, + "step": 10719 + }, + { + "epoch": 1.5796660117878192, + "grad_norm": 0.641431450843811, + "learning_rate": 2.3098242837265594e-06, + "loss": 0.5519, + "step": 10720 + }, + { + "epoch": 1.5798133595284871, + "grad_norm": 0.5938864350318909, + "learning_rate": 2.309437711605243e-06, + "loss": 0.5705, + "step": 10721 + }, + { + "epoch": 1.579960707269155, + "grad_norm": 0.5912948250770569, + "learning_rate": 2.3090511440668604e-06, + "loss": 0.4986, + "step": 10722 + }, + { + "epoch": 1.5801080550098232, + "grad_norm": 0.6020652651786804, + "learning_rate": 2.308664581120709e-06, + "loss": 0.5346, + "step": 10723 + }, + { + "epoch": 1.5802554027504911, + "grad_norm": 0.5436289310455322, + "learning_rate": 2.308278022776084e-06, + "loss": 0.4836, + "step": 10724 + }, + { + "epoch": 1.580402750491159, + "grad_norm": 0.6169669032096863, + "learning_rate": 2.307891469042283e-06, + "loss": 0.5284, + "step": 10725 + }, + { + "epoch": 1.5805500982318272, + "grad_norm": 0.6096964478492737, + "learning_rate": 2.307504919928601e-06, + "loss": 0.5461, + "step": 10726 + }, + { + "epoch": 1.5806974459724952, + "grad_norm": 0.6646755933761597, + "learning_rate": 2.307118375444335e-06, + "loss": 0.5232, + "step": 10727 + }, + { + "epoch": 1.580844793713163, + "grad_norm": 0.5949763059616089, + "learning_rate": 2.3067318355987827e-06, + "loss": 0.5156, + "step": 10728 + }, + { + "epoch": 1.580992141453831, + "grad_norm": 0.5788768529891968, + "learning_rate": 2.306345300401239e-06, + "loss": 0.5013, + "step": 10729 + }, + { + "epoch": 1.581139489194499, + "grad_norm": 0.5917132496833801, + "learning_rate": 2.305958769861e-06, + "loss": 0.536, + "step": 10730 + }, + { + "epoch": 1.581286836935167, + "grad_norm": 0.5988339185714722, + "learning_rate": 2.305572243987362e-06, + "loss": 0.533, + "step": 10731 + }, + { + "epoch": 1.5814341846758349, + "grad_norm": 0.6234259605407715, + "learning_rate": 2.3051857227896194e-06, + "loss": 0.5385, + "step": 10732 + }, + { + "epoch": 1.5815815324165028, + "grad_norm": 0.6523886919021606, + "learning_rate": 2.30479920627707e-06, + "loss": 0.4958, + "step": 10733 + }, + { + "epoch": 1.581728880157171, + "grad_norm": 0.5639915466308594, + "learning_rate": 2.304412694459007e-06, + "loss": 0.5424, + "step": 10734 + }, + { + "epoch": 1.5818762278978389, + "grad_norm": 0.6157767176628113, + "learning_rate": 2.3040261873447277e-06, + "loss": 0.5391, + "step": 10735 + }, + { + "epoch": 1.582023575638507, + "grad_norm": 0.5907439589500427, + "learning_rate": 2.3036396849435263e-06, + "loss": 0.4885, + "step": 10736 + }, + { + "epoch": 1.582170923379175, + "grad_norm": 0.6120783090591431, + "learning_rate": 2.3032531872646984e-06, + "loss": 0.5048, + "step": 10737 + }, + { + "epoch": 1.582318271119843, + "grad_norm": 0.6163152456283569, + "learning_rate": 2.302866694317539e-06, + "loss": 0.5641, + "step": 10738 + }, + { + "epoch": 1.5824656188605108, + "grad_norm": 0.6251341104507446, + "learning_rate": 2.3024802061113437e-06, + "loss": 0.5539, + "step": 10739 + }, + { + "epoch": 1.5826129666011788, + "grad_norm": 0.6161477565765381, + "learning_rate": 2.302093722655407e-06, + "loss": 0.549, + "step": 10740 + }, + { + "epoch": 1.5827603143418467, + "grad_norm": 0.6154907941818237, + "learning_rate": 2.3017072439590226e-06, + "loss": 0.5332, + "step": 10741 + }, + { + "epoch": 1.5829076620825147, + "grad_norm": 0.5997831225395203, + "learning_rate": 2.3013207700314864e-06, + "loss": 0.5221, + "step": 10742 + }, + { + "epoch": 1.5830550098231826, + "grad_norm": 0.5902646780014038, + "learning_rate": 2.3009343008820923e-06, + "loss": 0.5539, + "step": 10743 + }, + { + "epoch": 1.5832023575638505, + "grad_norm": 0.6014850735664368, + "learning_rate": 2.3005478365201347e-06, + "loss": 0.5732, + "step": 10744 + }, + { + "epoch": 1.5833497053045187, + "grad_norm": 0.6623765826225281, + "learning_rate": 2.300161376954908e-06, + "loss": 0.5222, + "step": 10745 + }, + { + "epoch": 1.5834970530451866, + "grad_norm": 0.5860768556594849, + "learning_rate": 2.2997749221957072e-06, + "loss": 0.5021, + "step": 10746 + }, + { + "epoch": 1.5836444007858548, + "grad_norm": 0.6289964914321899, + "learning_rate": 2.299388472251825e-06, + "loss": 0.5272, + "step": 10747 + }, + { + "epoch": 1.5837917485265227, + "grad_norm": 0.5542706251144409, + "learning_rate": 2.299002027132556e-06, + "loss": 0.5174, + "step": 10748 + }, + { + "epoch": 1.5839390962671906, + "grad_norm": 0.605686604976654, + "learning_rate": 2.298615586847194e-06, + "loss": 0.5452, + "step": 10749 + }, + { + "epoch": 1.5840864440078586, + "grad_norm": 0.6017974019050598, + "learning_rate": 2.298229151405033e-06, + "loss": 0.5498, + "step": 10750 + }, + { + "epoch": 1.5842337917485265, + "grad_norm": 0.6114950180053711, + "learning_rate": 2.297842720815366e-06, + "loss": 0.5314, + "step": 10751 + }, + { + "epoch": 1.5843811394891945, + "grad_norm": 0.5806621313095093, + "learning_rate": 2.297456295087487e-06, + "loss": 0.5425, + "step": 10752 + }, + { + "epoch": 1.5845284872298624, + "grad_norm": 0.6098533868789673, + "learning_rate": 2.2970698742306888e-06, + "loss": 0.5406, + "step": 10753 + }, + { + "epoch": 1.5846758349705303, + "grad_norm": 0.593768298625946, + "learning_rate": 2.296683458254265e-06, + "loss": 0.5104, + "step": 10754 + }, + { + "epoch": 1.5848231827111983, + "grad_norm": 0.5781470537185669, + "learning_rate": 2.2962970471675087e-06, + "loss": 0.5466, + "step": 10755 + }, + { + "epoch": 1.5849705304518664, + "grad_norm": 0.613411009311676, + "learning_rate": 2.295910640979713e-06, + "loss": 0.5445, + "step": 10756 + }, + { + "epoch": 1.5851178781925344, + "grad_norm": 0.6270749568939209, + "learning_rate": 2.295524239700171e-06, + "loss": 0.5201, + "step": 10757 + }, + { + "epoch": 1.5852652259332025, + "grad_norm": 0.596798837184906, + "learning_rate": 2.2951378433381745e-06, + "loss": 0.5294, + "step": 10758 + }, + { + "epoch": 1.5854125736738705, + "grad_norm": 0.6145007610321045, + "learning_rate": 2.2947514519030165e-06, + "loss": 0.5716, + "step": 10759 + }, + { + "epoch": 1.5855599214145384, + "grad_norm": 0.6252945065498352, + "learning_rate": 2.2943650654039905e-06, + "loss": 0.5524, + "step": 10760 + }, + { + "epoch": 1.5857072691552063, + "grad_norm": 0.6079338192939758, + "learning_rate": 2.293978683850388e-06, + "loss": 0.5185, + "step": 10761 + }, + { + "epoch": 1.5858546168958743, + "grad_norm": 0.5881878137588501, + "learning_rate": 2.2935923072515015e-06, + "loss": 0.536, + "step": 10762 + }, + { + "epoch": 1.5860019646365422, + "grad_norm": 0.5709467530250549, + "learning_rate": 2.293205935616623e-06, + "loss": 0.5234, + "step": 10763 + }, + { + "epoch": 1.5861493123772101, + "grad_norm": 0.6110209822654724, + "learning_rate": 2.2928195689550446e-06, + "loss": 0.5318, + "step": 10764 + }, + { + "epoch": 1.586296660117878, + "grad_norm": 0.6137253642082214, + "learning_rate": 2.2924332072760588e-06, + "loss": 0.53, + "step": 10765 + }, + { + "epoch": 1.5864440078585462, + "grad_norm": 0.6161969304084778, + "learning_rate": 2.292046850588957e-06, + "loss": 0.4969, + "step": 10766 + }, + { + "epoch": 1.5865913555992142, + "grad_norm": 0.6308338046073914, + "learning_rate": 2.291660498903031e-06, + "loss": 0.5032, + "step": 10767 + }, + { + "epoch": 1.586738703339882, + "grad_norm": 0.6315396428108215, + "learning_rate": 2.2912741522275717e-06, + "loss": 0.5395, + "step": 10768 + }, + { + "epoch": 1.5868860510805503, + "grad_norm": 0.595104455947876, + "learning_rate": 2.2908878105718717e-06, + "loss": 0.5472, + "step": 10769 + }, + { + "epoch": 1.5870333988212182, + "grad_norm": 0.5831519961357117, + "learning_rate": 2.290501473945222e-06, + "loss": 0.5422, + "step": 10770 + }, + { + "epoch": 1.5871807465618861, + "grad_norm": 0.6380118131637573, + "learning_rate": 2.290115142356913e-06, + "loss": 0.5594, + "step": 10771 + }, + { + "epoch": 1.587328094302554, + "grad_norm": 0.5947901010513306, + "learning_rate": 2.289728815816237e-06, + "loss": 0.5011, + "step": 10772 + }, + { + "epoch": 1.587475442043222, + "grad_norm": 0.5997841954231262, + "learning_rate": 2.2893424943324838e-06, + "loss": 0.5408, + "step": 10773 + }, + { + "epoch": 1.58762278978389, + "grad_norm": 0.5992057919502258, + "learning_rate": 2.288956177914945e-06, + "loss": 0.584, + "step": 10774 + }, + { + "epoch": 1.5877701375245579, + "grad_norm": 0.5953646302223206, + "learning_rate": 2.2885698665729112e-06, + "loss": 0.5017, + "step": 10775 + }, + { + "epoch": 1.5879174852652258, + "grad_norm": 0.6015706062316895, + "learning_rate": 2.288183560315673e-06, + "loss": 0.5436, + "step": 10776 + }, + { + "epoch": 1.588064833005894, + "grad_norm": 0.58596271276474, + "learning_rate": 2.287797259152521e-06, + "loss": 0.5142, + "step": 10777 + }, + { + "epoch": 1.588212180746562, + "grad_norm": 0.5826263427734375, + "learning_rate": 2.287410963092745e-06, + "loss": 0.5295, + "step": 10778 + }, + { + "epoch": 1.5883595284872298, + "grad_norm": 0.6127050518989563, + "learning_rate": 2.2870246721456355e-06, + "loss": 0.5246, + "step": 10779 + }, + { + "epoch": 1.588506876227898, + "grad_norm": 0.6324231624603271, + "learning_rate": 2.286638386320483e-06, + "loss": 0.5183, + "step": 10780 + }, + { + "epoch": 1.588654223968566, + "grad_norm": 0.6038939356803894, + "learning_rate": 2.286252105626577e-06, + "loss": 0.533, + "step": 10781 + }, + { + "epoch": 1.5888015717092339, + "grad_norm": 0.5794168710708618, + "learning_rate": 2.285865830073208e-06, + "loss": 0.5461, + "step": 10782 + }, + { + "epoch": 1.5889489194499018, + "grad_norm": 0.6243623495101929, + "learning_rate": 2.285479559669665e-06, + "loss": 0.4913, + "step": 10783 + }, + { + "epoch": 1.5890962671905697, + "grad_norm": 0.5910075902938843, + "learning_rate": 2.2850932944252385e-06, + "loss": 0.5131, + "step": 10784 + }, + { + "epoch": 1.5892436149312377, + "grad_norm": 0.6130678057670593, + "learning_rate": 2.284707034349217e-06, + "loss": 0.539, + "step": 10785 + }, + { + "epoch": 1.5893909626719056, + "grad_norm": 0.5961058139801025, + "learning_rate": 2.2843207794508904e-06, + "loss": 0.5197, + "step": 10786 + }, + { + "epoch": 1.5895383104125735, + "grad_norm": 0.5985195636749268, + "learning_rate": 2.2839345297395483e-06, + "loss": 0.5241, + "step": 10787 + }, + { + "epoch": 1.5896856581532417, + "grad_norm": 0.5923731327056885, + "learning_rate": 2.283548285224479e-06, + "loss": 0.5125, + "step": 10788 + }, + { + "epoch": 1.5898330058939096, + "grad_norm": 0.620554506778717, + "learning_rate": 2.2831620459149725e-06, + "loss": 0.5165, + "step": 10789 + }, + { + "epoch": 1.5899803536345776, + "grad_norm": 0.6014866232872009, + "learning_rate": 2.2827758118203167e-06, + "loss": 0.507, + "step": 10790 + }, + { + "epoch": 1.5901277013752457, + "grad_norm": 0.6015968322753906, + "learning_rate": 2.2823895829498006e-06, + "loss": 0.5315, + "step": 10791 + }, + { + "epoch": 1.5902750491159137, + "grad_norm": 0.6476470828056335, + "learning_rate": 2.2820033593127126e-06, + "loss": 0.5614, + "step": 10792 + }, + { + "epoch": 1.5904223968565816, + "grad_norm": 0.6883425116539001, + "learning_rate": 2.2816171409183423e-06, + "loss": 0.5728, + "step": 10793 + }, + { + "epoch": 1.5905697445972495, + "grad_norm": 0.5920664668083191, + "learning_rate": 2.281230927775977e-06, + "loss": 0.5326, + "step": 10794 + }, + { + "epoch": 1.5907170923379175, + "grad_norm": 0.6084862351417542, + "learning_rate": 2.280844719894905e-06, + "loss": 0.5326, + "step": 10795 + }, + { + "epoch": 1.5908644400785854, + "grad_norm": 0.5889013409614563, + "learning_rate": 2.2804585172844152e-06, + "loss": 0.5306, + "step": 10796 + }, + { + "epoch": 1.5910117878192533, + "grad_norm": 0.5945448279380798, + "learning_rate": 2.280072319953795e-06, + "loss": 0.5511, + "step": 10797 + }, + { + "epoch": 1.5911591355599213, + "grad_norm": 0.6006492972373962, + "learning_rate": 2.279686127912332e-06, + "loss": 0.5275, + "step": 10798 + }, + { + "epoch": 1.5913064833005894, + "grad_norm": 0.5960615873336792, + "learning_rate": 2.2792999411693147e-06, + "loss": 0.5186, + "step": 10799 + }, + { + "epoch": 1.5914538310412574, + "grad_norm": 0.6736083030700684, + "learning_rate": 2.278913759734029e-06, + "loss": 0.5008, + "step": 10800 + }, + { + "epoch": 1.5916011787819253, + "grad_norm": 0.6116936206817627, + "learning_rate": 2.2785275836157647e-06, + "loss": 0.5394, + "step": 10801 + }, + { + "epoch": 1.5917485265225935, + "grad_norm": 0.5795106887817383, + "learning_rate": 2.2781414128238083e-06, + "loss": 0.4894, + "step": 10802 + }, + { + "epoch": 1.5918958742632614, + "grad_norm": 0.5818837881088257, + "learning_rate": 2.2777552473674467e-06, + "loss": 0.5374, + "step": 10803 + }, + { + "epoch": 1.5920432220039293, + "grad_norm": 0.6042067408561707, + "learning_rate": 2.277369087255967e-06, + "loss": 0.5395, + "step": 10804 + }, + { + "epoch": 1.5921905697445973, + "grad_norm": 0.5691345930099487, + "learning_rate": 2.276982932498657e-06, + "loss": 0.5801, + "step": 10805 + }, + { + "epoch": 1.5923379174852652, + "grad_norm": 0.6075572967529297, + "learning_rate": 2.2765967831048023e-06, + "loss": 0.5328, + "step": 10806 + }, + { + "epoch": 1.5924852652259331, + "grad_norm": 0.6260449886322021, + "learning_rate": 2.2762106390836907e-06, + "loss": 0.5265, + "step": 10807 + }, + { + "epoch": 1.592632612966601, + "grad_norm": 0.6089715957641602, + "learning_rate": 2.2758245004446083e-06, + "loss": 0.5602, + "step": 10808 + }, + { + "epoch": 1.592779960707269, + "grad_norm": 0.5989148020744324, + "learning_rate": 2.275438367196841e-06, + "loss": 0.4984, + "step": 10809 + }, + { + "epoch": 1.5929273084479372, + "grad_norm": 0.5720031261444092, + "learning_rate": 2.2750522393496756e-06, + "loss": 0.5113, + "step": 10810 + }, + { + "epoch": 1.593074656188605, + "grad_norm": 0.6014187932014465, + "learning_rate": 2.2746661169123984e-06, + "loss": 0.5429, + "step": 10811 + }, + { + "epoch": 1.593222003929273, + "grad_norm": 0.607669472694397, + "learning_rate": 2.2742799998942956e-06, + "loss": 0.5499, + "step": 10812 + }, + { + "epoch": 1.5933693516699412, + "grad_norm": 0.613805890083313, + "learning_rate": 2.273893888304653e-06, + "loss": 0.4925, + "step": 10813 + }, + { + "epoch": 1.5935166994106091, + "grad_norm": 0.5783565640449524, + "learning_rate": 2.2735077821527564e-06, + "loss": 0.5148, + "step": 10814 + }, + { + "epoch": 1.593664047151277, + "grad_norm": 0.6060866117477417, + "learning_rate": 2.2731216814478908e-06, + "loss": 0.5249, + "step": 10815 + }, + { + "epoch": 1.593811394891945, + "grad_norm": 0.5740613341331482, + "learning_rate": 2.272735586199343e-06, + "loss": 0.5162, + "step": 10816 + }, + { + "epoch": 1.593958742632613, + "grad_norm": 0.5683912038803101, + "learning_rate": 2.2723494964163974e-06, + "loss": 0.538, + "step": 10817 + }, + { + "epoch": 1.5941060903732809, + "grad_norm": 0.581203818321228, + "learning_rate": 2.2719634121083396e-06, + "loss": 0.5177, + "step": 10818 + }, + { + "epoch": 1.5942534381139488, + "grad_norm": 0.6024909615516663, + "learning_rate": 2.2715773332844547e-06, + "loss": 0.5268, + "step": 10819 + }, + { + "epoch": 1.5944007858546168, + "grad_norm": 0.573757529258728, + "learning_rate": 2.2711912599540286e-06, + "loss": 0.5128, + "step": 10820 + }, + { + "epoch": 1.594548133595285, + "grad_norm": 0.5862977504730225, + "learning_rate": 2.270805192126345e-06, + "loss": 0.5016, + "step": 10821 + }, + { + "epoch": 1.5946954813359528, + "grad_norm": 0.5759912729263306, + "learning_rate": 2.2704191298106886e-06, + "loss": 0.529, + "step": 10822 + }, + { + "epoch": 1.5948428290766208, + "grad_norm": 0.5813285112380981, + "learning_rate": 2.2700330730163446e-06, + "loss": 0.5537, + "step": 10823 + }, + { + "epoch": 1.594990176817289, + "grad_norm": 0.6079335808753967, + "learning_rate": 2.2696470217525973e-06, + "loss": 0.4898, + "step": 10824 + }, + { + "epoch": 1.5951375245579569, + "grad_norm": 0.6007696390151978, + "learning_rate": 2.269260976028731e-06, + "loss": 0.524, + "step": 10825 + }, + { + "epoch": 1.5952848722986248, + "grad_norm": 0.5772767663002014, + "learning_rate": 2.26887493585403e-06, + "loss": 0.5342, + "step": 10826 + }, + { + "epoch": 1.5954322200392927, + "grad_norm": 0.5628086924552917, + "learning_rate": 2.268488901237779e-06, + "loss": 0.5498, + "step": 10827 + }, + { + "epoch": 1.5955795677799607, + "grad_norm": 0.5938635468482971, + "learning_rate": 2.268102872189261e-06, + "loss": 0.4865, + "step": 10828 + }, + { + "epoch": 1.5957269155206286, + "grad_norm": 0.5769317150115967, + "learning_rate": 2.2677168487177597e-06, + "loss": 0.547, + "step": 10829 + }, + { + "epoch": 1.5958742632612966, + "grad_norm": 0.6203666925430298, + "learning_rate": 2.26733083083256e-06, + "loss": 0.5463, + "step": 10830 + }, + { + "epoch": 1.5960216110019645, + "grad_norm": 0.5915158987045288, + "learning_rate": 2.266944818542944e-06, + "loss": 0.5305, + "step": 10831 + }, + { + "epoch": 1.5961689587426326, + "grad_norm": 0.5690820217132568, + "learning_rate": 2.2665588118581962e-06, + "loss": 0.527, + "step": 10832 + }, + { + "epoch": 1.5963163064833006, + "grad_norm": 0.6107688546180725, + "learning_rate": 2.2661728107875994e-06, + "loss": 0.5491, + "step": 10833 + }, + { + "epoch": 1.5964636542239685, + "grad_norm": 0.6183532476425171, + "learning_rate": 2.2657868153404366e-06, + "loss": 0.5473, + "step": 10834 + }, + { + "epoch": 1.5966110019646367, + "grad_norm": 0.5940412878990173, + "learning_rate": 2.2654008255259917e-06, + "loss": 0.529, + "step": 10835 + }, + { + "epoch": 1.5967583497053046, + "grad_norm": 0.6332665085792542, + "learning_rate": 2.265014841353546e-06, + "loss": 0.5128, + "step": 10836 + }, + { + "epoch": 1.5969056974459725, + "grad_norm": 0.6039103865623474, + "learning_rate": 2.2646288628323833e-06, + "loss": 0.5291, + "step": 10837 + }, + { + "epoch": 1.5970530451866405, + "grad_norm": 0.6082881093025208, + "learning_rate": 2.2642428899717864e-06, + "loss": 0.4922, + "step": 10838 + }, + { + "epoch": 1.5972003929273084, + "grad_norm": 0.5810093879699707, + "learning_rate": 2.263856922781037e-06, + "loss": 0.5136, + "step": 10839 + }, + { + "epoch": 1.5973477406679764, + "grad_norm": 0.6419953107833862, + "learning_rate": 2.2634709612694188e-06, + "loss": 0.5401, + "step": 10840 + }, + { + "epoch": 1.5974950884086443, + "grad_norm": 0.5993397831916809, + "learning_rate": 2.2630850054462124e-06, + "loss": 0.5425, + "step": 10841 + }, + { + "epoch": 1.5976424361493122, + "grad_norm": 0.5931317210197449, + "learning_rate": 2.2626990553207002e-06, + "loss": 0.5244, + "step": 10842 + }, + { + "epoch": 1.5977897838899804, + "grad_norm": 0.5977848172187805, + "learning_rate": 2.2623131109021653e-06, + "loss": 0.5031, + "step": 10843 + }, + { + "epoch": 1.5979371316306483, + "grad_norm": 0.6171492338180542, + "learning_rate": 2.2619271721998877e-06, + "loss": 0.5577, + "step": 10844 + }, + { + "epoch": 1.5980844793713163, + "grad_norm": 0.5947583317756653, + "learning_rate": 2.2615412392231505e-06, + "loss": 0.5259, + "step": 10845 + }, + { + "epoch": 1.5982318271119844, + "grad_norm": 0.5847504734992981, + "learning_rate": 2.2611553119812346e-06, + "loss": 0.4952, + "step": 10846 + }, + { + "epoch": 1.5983791748526524, + "grad_norm": 0.6017939448356628, + "learning_rate": 2.2607693904834213e-06, + "loss": 0.5305, + "step": 10847 + }, + { + "epoch": 1.5985265225933203, + "grad_norm": 0.6243972778320312, + "learning_rate": 2.2603834747389917e-06, + "loss": 0.5029, + "step": 10848 + }, + { + "epoch": 1.5986738703339882, + "grad_norm": 0.5544679760932922, + "learning_rate": 2.259997564757228e-06, + "loss": 0.5347, + "step": 10849 + }, + { + "epoch": 1.5988212180746562, + "grad_norm": 0.6002576351165771, + "learning_rate": 2.2596116605474093e-06, + "loss": 0.4979, + "step": 10850 + }, + { + "epoch": 1.598968565815324, + "grad_norm": 0.585659921169281, + "learning_rate": 2.259225762118818e-06, + "loss": 0.5102, + "step": 10851 + }, + { + "epoch": 1.599115913555992, + "grad_norm": 0.5882190465927124, + "learning_rate": 2.258839869480734e-06, + "loss": 0.5215, + "step": 10852 + }, + { + "epoch": 1.59926326129666, + "grad_norm": 0.5808500051498413, + "learning_rate": 2.2584539826424383e-06, + "loss": 0.5051, + "step": 10853 + }, + { + "epoch": 1.5994106090373281, + "grad_norm": 0.6090992093086243, + "learning_rate": 2.2580681016132106e-06, + "loss": 0.5388, + "step": 10854 + }, + { + "epoch": 1.599557956777996, + "grad_norm": 0.6103779673576355, + "learning_rate": 2.2576822264023323e-06, + "loss": 0.5288, + "step": 10855 + }, + { + "epoch": 1.599705304518664, + "grad_norm": 0.5798584818840027, + "learning_rate": 2.257296357019082e-06, + "loss": 0.5402, + "step": 10856 + }, + { + "epoch": 1.5998526522593322, + "grad_norm": 0.5823350548744202, + "learning_rate": 2.2569104934727407e-06, + "loss": 0.5459, + "step": 10857 + }, + { + "epoch": 1.6, + "grad_norm": 0.5997583866119385, + "learning_rate": 2.2565246357725885e-06, + "loss": 0.5482, + "step": 10858 + }, + { + "epoch": 1.600147347740668, + "grad_norm": 0.5844020843505859, + "learning_rate": 2.2561387839279043e-06, + "loss": 0.5291, + "step": 10859 + }, + { + "epoch": 1.600294695481336, + "grad_norm": 0.6026061773300171, + "learning_rate": 2.2557529379479674e-06, + "loss": 0.5396, + "step": 10860 + }, + { + "epoch": 1.600442043222004, + "grad_norm": 0.5904895067214966, + "learning_rate": 2.2553670978420583e-06, + "loss": 0.493, + "step": 10861 + }, + { + "epoch": 1.6005893909626718, + "grad_norm": 0.6020025610923767, + "learning_rate": 2.254981263619456e-06, + "loss": 0.515, + "step": 10862 + }, + { + "epoch": 1.6007367387033398, + "grad_norm": 0.5677732825279236, + "learning_rate": 2.254595435289439e-06, + "loss": 0.5194, + "step": 10863 + }, + { + "epoch": 1.6008840864440077, + "grad_norm": 0.5877519845962524, + "learning_rate": 2.254209612861287e-06, + "loss": 0.5585, + "step": 10864 + }, + { + "epoch": 1.6010314341846759, + "grad_norm": 0.589480459690094, + "learning_rate": 2.253823796344278e-06, + "loss": 0.5262, + "step": 10865 + }, + { + "epoch": 1.6011787819253438, + "grad_norm": 0.6029322743415833, + "learning_rate": 2.2534379857476916e-06, + "loss": 0.5034, + "step": 10866 + }, + { + "epoch": 1.6013261296660117, + "grad_norm": 0.5997851490974426, + "learning_rate": 2.253052181080806e-06, + "loss": 0.5414, + "step": 10867 + }, + { + "epoch": 1.60147347740668, + "grad_norm": 0.6061923503875732, + "learning_rate": 2.252666382352899e-06, + "loss": 0.527, + "step": 10868 + }, + { + "epoch": 1.6016208251473478, + "grad_norm": 0.6103389859199524, + "learning_rate": 2.25228058957325e-06, + "loss": 0.5468, + "step": 10869 + }, + { + "epoch": 1.6017681728880158, + "grad_norm": 0.5975282788276672, + "learning_rate": 2.251894802751137e-06, + "loss": 0.5091, + "step": 10870 + }, + { + "epoch": 1.6019155206286837, + "grad_norm": 0.5941799283027649, + "learning_rate": 2.2515090218958373e-06, + "loss": 0.5092, + "step": 10871 + }, + { + "epoch": 1.6020628683693516, + "grad_norm": 0.6174789667129517, + "learning_rate": 2.251123247016629e-06, + "loss": 0.5453, + "step": 10872 + }, + { + "epoch": 1.6022102161100196, + "grad_norm": 0.6029117703437805, + "learning_rate": 2.2507374781227902e-06, + "loss": 0.5445, + "step": 10873 + }, + { + "epoch": 1.6023575638506875, + "grad_norm": 0.5980428457260132, + "learning_rate": 2.250351715223597e-06, + "loss": 0.54, + "step": 10874 + }, + { + "epoch": 1.6025049115913554, + "grad_norm": 0.586764395236969, + "learning_rate": 2.249965958328329e-06, + "loss": 0.5504, + "step": 10875 + }, + { + "epoch": 1.6026522593320236, + "grad_norm": 0.599651575088501, + "learning_rate": 2.249580207446262e-06, + "loss": 0.5352, + "step": 10876 + }, + { + "epoch": 1.6027996070726915, + "grad_norm": 0.6095916628837585, + "learning_rate": 2.249194462586674e-06, + "loss": 0.553, + "step": 10877 + }, + { + "epoch": 1.6029469548133597, + "grad_norm": 0.604871392250061, + "learning_rate": 2.2488087237588415e-06, + "loss": 0.5392, + "step": 10878 + }, + { + "epoch": 1.6030943025540276, + "grad_norm": 0.6311751008033752, + "learning_rate": 2.248422990972041e-06, + "loss": 0.5121, + "step": 10879 + }, + { + "epoch": 1.6032416502946956, + "grad_norm": 0.5913708209991455, + "learning_rate": 2.24803726423555e-06, + "loss": 0.515, + "step": 10880 + }, + { + "epoch": 1.6033889980353635, + "grad_norm": 0.5796368718147278, + "learning_rate": 2.2476515435586447e-06, + "loss": 0.5309, + "step": 10881 + }, + { + "epoch": 1.6035363457760314, + "grad_norm": 0.6125421524047852, + "learning_rate": 2.247265828950601e-06, + "loss": 0.5287, + "step": 10882 + }, + { + "epoch": 1.6036836935166994, + "grad_norm": 0.6111894845962524, + "learning_rate": 2.2468801204206958e-06, + "loss": 0.5266, + "step": 10883 + }, + { + "epoch": 1.6038310412573673, + "grad_norm": 0.6083178520202637, + "learning_rate": 2.246494417978205e-06, + "loss": 0.5477, + "step": 10884 + }, + { + "epoch": 1.6039783889980352, + "grad_norm": 0.5757671594619751, + "learning_rate": 2.2461087216324043e-06, + "loss": 0.5163, + "step": 10885 + }, + { + "epoch": 1.6041257367387032, + "grad_norm": 0.6051121354103088, + "learning_rate": 2.24572303139257e-06, + "loss": 0.5267, + "step": 10886 + }, + { + "epoch": 1.6042730844793713, + "grad_norm": 0.5850569605827332, + "learning_rate": 2.2453373472679775e-06, + "loss": 0.5383, + "step": 10887 + }, + { + "epoch": 1.6044204322200393, + "grad_norm": 0.6126118898391724, + "learning_rate": 2.244951669267902e-06, + "loss": 0.519, + "step": 10888 + }, + { + "epoch": 1.6045677799607074, + "grad_norm": 0.5727962851524353, + "learning_rate": 2.2445659974016197e-06, + "loss": 0.5361, + "step": 10889 + }, + { + "epoch": 1.6047151277013754, + "grad_norm": 0.618584930896759, + "learning_rate": 2.244180331678405e-06, + "loss": 0.5225, + "step": 10890 + }, + { + "epoch": 1.6048624754420433, + "grad_norm": 0.6276893019676208, + "learning_rate": 2.243794672107534e-06, + "loss": 0.5207, + "step": 10891 + }, + { + "epoch": 1.6050098231827112, + "grad_norm": 0.568620502948761, + "learning_rate": 2.2434090186982803e-06, + "loss": 0.5202, + "step": 10892 + }, + { + "epoch": 1.6051571709233792, + "grad_norm": 0.571708619594574, + "learning_rate": 2.2430233714599195e-06, + "loss": 0.511, + "step": 10893 + }, + { + "epoch": 1.605304518664047, + "grad_norm": 0.5906604528427124, + "learning_rate": 2.242637730401726e-06, + "loss": 0.5274, + "step": 10894 + }, + { + "epoch": 1.605451866404715, + "grad_norm": 0.6061241030693054, + "learning_rate": 2.242252095532975e-06, + "loss": 0.5379, + "step": 10895 + }, + { + "epoch": 1.605599214145383, + "grad_norm": 0.6314187049865723, + "learning_rate": 2.2418664668629396e-06, + "loss": 0.5202, + "step": 10896 + }, + { + "epoch": 1.605746561886051, + "grad_norm": 0.5803888440132141, + "learning_rate": 2.2414808444008944e-06, + "loss": 0.4904, + "step": 10897 + }, + { + "epoch": 1.605893909626719, + "grad_norm": 0.5995601415634155, + "learning_rate": 2.241095228156114e-06, + "loss": 0.5271, + "step": 10898 + }, + { + "epoch": 1.606041257367387, + "grad_norm": 0.5644673109054565, + "learning_rate": 2.240709618137872e-06, + "loss": 0.5195, + "step": 10899 + }, + { + "epoch": 1.6061886051080552, + "grad_norm": 0.6129732131958008, + "learning_rate": 2.2403240143554417e-06, + "loss": 0.5064, + "step": 10900 + }, + { + "epoch": 1.606335952848723, + "grad_norm": 0.59978187084198, + "learning_rate": 2.239938416818098e-06, + "loss": 0.5338, + "step": 10901 + }, + { + "epoch": 1.606483300589391, + "grad_norm": 0.6019730567932129, + "learning_rate": 2.239552825535112e-06, + "loss": 0.5206, + "step": 10902 + }, + { + "epoch": 1.606630648330059, + "grad_norm": 0.5850425362586975, + "learning_rate": 2.2391672405157595e-06, + "loss": 0.5425, + "step": 10903 + }, + { + "epoch": 1.606777996070727, + "grad_norm": 0.5998349189758301, + "learning_rate": 2.2387816617693125e-06, + "loss": 0.5094, + "step": 10904 + }, + { + "epoch": 1.6069253438113948, + "grad_norm": 0.5789154767990112, + "learning_rate": 2.238396089305044e-06, + "loss": 0.5077, + "step": 10905 + }, + { + "epoch": 1.6070726915520628, + "grad_norm": 0.6179758906364441, + "learning_rate": 2.2380105231322262e-06, + "loss": 0.5399, + "step": 10906 + }, + { + "epoch": 1.6072200392927307, + "grad_norm": 0.6040871143341064, + "learning_rate": 2.2376249632601327e-06, + "loss": 0.5177, + "step": 10907 + }, + { + "epoch": 1.6073673870333989, + "grad_norm": 0.6059592962265015, + "learning_rate": 2.237239409698036e-06, + "loss": 0.5294, + "step": 10908 + }, + { + "epoch": 1.6075147347740668, + "grad_norm": 0.5975524187088013, + "learning_rate": 2.236853862455208e-06, + "loss": 0.5483, + "step": 10909 + }, + { + "epoch": 1.6076620825147347, + "grad_norm": 0.6017861366271973, + "learning_rate": 2.236468321540921e-06, + "loss": 0.5251, + "step": 10910 + }, + { + "epoch": 1.607809430255403, + "grad_norm": 0.58635413646698, + "learning_rate": 2.2360827869644466e-06, + "loss": 0.5189, + "step": 10911 + }, + { + "epoch": 1.6079567779960708, + "grad_norm": 0.618898868560791, + "learning_rate": 2.235697258735058e-06, + "loss": 0.5648, + "step": 10912 + }, + { + "epoch": 1.6081041257367388, + "grad_norm": 0.6103026866912842, + "learning_rate": 2.235311736862027e-06, + "loss": 0.549, + "step": 10913 + }, + { + "epoch": 1.6082514734774067, + "grad_norm": 0.5965602993965149, + "learning_rate": 2.234926221354624e-06, + "loss": 0.5385, + "step": 10914 + }, + { + "epoch": 1.6083988212180746, + "grad_norm": 0.5906350612640381, + "learning_rate": 2.2345407122221215e-06, + "loss": 0.5581, + "step": 10915 + }, + { + "epoch": 1.6085461689587426, + "grad_norm": 0.581619143486023, + "learning_rate": 2.23415520947379e-06, + "loss": 0.534, + "step": 10916 + }, + { + "epoch": 1.6086935166994105, + "grad_norm": 0.5821840763092041, + "learning_rate": 2.2337697131189012e-06, + "loss": 0.5052, + "step": 10917 + }, + { + "epoch": 1.6088408644400785, + "grad_norm": 0.5558578372001648, + "learning_rate": 2.233384223166726e-06, + "loss": 0.499, + "step": 10918 + }, + { + "epoch": 1.6089882121807466, + "grad_norm": 0.5821650624275208, + "learning_rate": 2.2329987396265353e-06, + "loss": 0.5375, + "step": 10919 + }, + { + "epoch": 1.6091355599214145, + "grad_norm": 0.5925263166427612, + "learning_rate": 2.2326132625075994e-06, + "loss": 0.5346, + "step": 10920 + }, + { + "epoch": 1.6092829076620825, + "grad_norm": 0.5786210894584656, + "learning_rate": 2.2322277918191894e-06, + "loss": 0.5282, + "step": 10921 + }, + { + "epoch": 1.6094302554027506, + "grad_norm": 0.597231924533844, + "learning_rate": 2.2318423275705757e-06, + "loss": 0.5555, + "step": 10922 + }, + { + "epoch": 1.6095776031434186, + "grad_norm": 0.5718169808387756, + "learning_rate": 2.2314568697710277e-06, + "loss": 0.5335, + "step": 10923 + }, + { + "epoch": 1.6097249508840865, + "grad_norm": 0.6089250445365906, + "learning_rate": 2.231071418429816e-06, + "loss": 0.5462, + "step": 10924 + }, + { + "epoch": 1.6098722986247544, + "grad_norm": 0.6137478351593018, + "learning_rate": 2.2306859735562105e-06, + "loss": 0.5397, + "step": 10925 + }, + { + "epoch": 1.6100196463654224, + "grad_norm": 0.5906621813774109, + "learning_rate": 2.2303005351594815e-06, + "loss": 0.5001, + "step": 10926 + }, + { + "epoch": 1.6101669941060903, + "grad_norm": 0.581083357334137, + "learning_rate": 2.2299151032488975e-06, + "loss": 0.5028, + "step": 10927 + }, + { + "epoch": 1.6103143418467583, + "grad_norm": 0.5751511454582214, + "learning_rate": 2.229529677833729e-06, + "loss": 0.5201, + "step": 10928 + }, + { + "epoch": 1.6104616895874262, + "grad_norm": 0.5992443561553955, + "learning_rate": 2.229144258923245e-06, + "loss": 0.5212, + "step": 10929 + }, + { + "epoch": 1.6106090373280944, + "grad_norm": 0.605466365814209, + "learning_rate": 2.228758846526714e-06, + "loss": 0.5455, + "step": 10930 + }, + { + "epoch": 1.6107563850687623, + "grad_norm": 0.6181374788284302, + "learning_rate": 2.228373440653406e-06, + "loss": 0.5463, + "step": 10931 + }, + { + "epoch": 1.6109037328094302, + "grad_norm": 0.5999366641044617, + "learning_rate": 2.227988041312589e-06, + "loss": 0.5187, + "step": 10932 + }, + { + "epoch": 1.6110510805500984, + "grad_norm": 0.5851277709007263, + "learning_rate": 2.2276026485135323e-06, + "loss": 0.5063, + "step": 10933 + }, + { + "epoch": 1.6111984282907663, + "grad_norm": 0.5679987072944641, + "learning_rate": 2.2272172622655044e-06, + "loss": 0.5324, + "step": 10934 + }, + { + "epoch": 1.6113457760314343, + "grad_norm": 0.6388627886772156, + "learning_rate": 2.226831882577773e-06, + "loss": 0.5551, + "step": 10935 + }, + { + "epoch": 1.6114931237721022, + "grad_norm": 0.5916426777839661, + "learning_rate": 2.2264465094596065e-06, + "loss": 0.5253, + "step": 10936 + }, + { + "epoch": 1.6116404715127701, + "grad_norm": 0.5967548489570618, + "learning_rate": 2.2260611429202734e-06, + "loss": 0.5371, + "step": 10937 + }, + { + "epoch": 1.611787819253438, + "grad_norm": 0.6281325221061707, + "learning_rate": 2.225675782969041e-06, + "loss": 0.5446, + "step": 10938 + }, + { + "epoch": 1.611935166994106, + "grad_norm": 0.6086993217468262, + "learning_rate": 2.225290429615178e-06, + "loss": 0.5551, + "step": 10939 + }, + { + "epoch": 1.612082514734774, + "grad_norm": 0.614322304725647, + "learning_rate": 2.224905082867951e-06, + "loss": 0.5452, + "step": 10940 + }, + { + "epoch": 1.612229862475442, + "grad_norm": 0.5772532820701599, + "learning_rate": 2.2245197427366276e-06, + "loss": 0.5171, + "step": 10941 + }, + { + "epoch": 1.61237721021611, + "grad_norm": 0.6378366351127625, + "learning_rate": 2.2241344092304753e-06, + "loss": 0.537, + "step": 10942 + }, + { + "epoch": 1.612524557956778, + "grad_norm": 0.5866855382919312, + "learning_rate": 2.223749082358761e-06, + "loss": 0.5215, + "step": 10943 + }, + { + "epoch": 1.6126719056974461, + "grad_norm": 0.5947437286376953, + "learning_rate": 2.223363762130752e-06, + "loss": 0.5031, + "step": 10944 + }, + { + "epoch": 1.612819253438114, + "grad_norm": 0.5984221696853638, + "learning_rate": 2.2229784485557145e-06, + "loss": 0.5505, + "step": 10945 + }, + { + "epoch": 1.612966601178782, + "grad_norm": 0.5907178521156311, + "learning_rate": 2.2225931416429155e-06, + "loss": 0.5252, + "step": 10946 + }, + { + "epoch": 1.61311394891945, + "grad_norm": 0.6286479234695435, + "learning_rate": 2.222207841401621e-06, + "loss": 0.5077, + "step": 10947 + }, + { + "epoch": 1.6132612966601179, + "grad_norm": 0.5492534637451172, + "learning_rate": 2.2218225478410976e-06, + "loss": 0.5327, + "step": 10948 + }, + { + "epoch": 1.6134086444007858, + "grad_norm": 0.6333783268928528, + "learning_rate": 2.2214372609706118e-06, + "loss": 0.5315, + "step": 10949 + }, + { + "epoch": 1.6135559921414537, + "grad_norm": 0.5889266729354858, + "learning_rate": 2.221051980799429e-06, + "loss": 0.5162, + "step": 10950 + }, + { + "epoch": 1.6137033398821217, + "grad_norm": 0.6123552918434143, + "learning_rate": 2.2206667073368153e-06, + "loss": 0.5218, + "step": 10951 + }, + { + "epoch": 1.6138506876227898, + "grad_norm": 0.5991240739822388, + "learning_rate": 2.2202814405920364e-06, + "loss": 0.5039, + "step": 10952 + }, + { + "epoch": 1.6139980353634578, + "grad_norm": 0.6110100746154785, + "learning_rate": 2.2198961805743576e-06, + "loss": 0.5329, + "step": 10953 + }, + { + "epoch": 1.6141453831041257, + "grad_norm": 0.5929593443870544, + "learning_rate": 2.219510927293044e-06, + "loss": 0.5341, + "step": 10954 + }, + { + "epoch": 1.6142927308447939, + "grad_norm": 0.5766206979751587, + "learning_rate": 2.2191256807573614e-06, + "loss": 0.5424, + "step": 10955 + }, + { + "epoch": 1.6144400785854618, + "grad_norm": 0.5923124551773071, + "learning_rate": 2.2187404409765745e-06, + "loss": 0.5463, + "step": 10956 + }, + { + "epoch": 1.6145874263261297, + "grad_norm": 0.579356849193573, + "learning_rate": 2.218355207959948e-06, + "loss": 0.5328, + "step": 10957 + }, + { + "epoch": 1.6147347740667977, + "grad_norm": 0.5936928987503052, + "learning_rate": 2.2179699817167466e-06, + "loss": 0.5168, + "step": 10958 + }, + { + "epoch": 1.6148821218074656, + "grad_norm": 0.6045268177986145, + "learning_rate": 2.217584762256235e-06, + "loss": 0.4983, + "step": 10959 + }, + { + "epoch": 1.6150294695481335, + "grad_norm": 0.6051517128944397, + "learning_rate": 2.217199549587677e-06, + "loss": 0.5251, + "step": 10960 + }, + { + "epoch": 1.6151768172888015, + "grad_norm": 0.6126794219017029, + "learning_rate": 2.2168143437203377e-06, + "loss": 0.5378, + "step": 10961 + }, + { + "epoch": 1.6153241650294694, + "grad_norm": 0.5947140455245972, + "learning_rate": 2.2164291446634804e-06, + "loss": 0.5139, + "step": 10962 + }, + { + "epoch": 1.6154715127701376, + "grad_norm": 0.585483729839325, + "learning_rate": 2.216043952426369e-06, + "loss": 0.5056, + "step": 10963 + }, + { + "epoch": 1.6156188605108055, + "grad_norm": 0.5815266966819763, + "learning_rate": 2.2156587670182674e-06, + "loss": 0.5243, + "step": 10964 + }, + { + "epoch": 1.6157662082514734, + "grad_norm": 0.588790237903595, + "learning_rate": 2.2152735884484393e-06, + "loss": 0.542, + "step": 10965 + }, + { + "epoch": 1.6159135559921416, + "grad_norm": 0.6158576011657715, + "learning_rate": 2.214888416726148e-06, + "loss": 0.5322, + "step": 10966 + }, + { + "epoch": 1.6160609037328095, + "grad_norm": 0.6308504343032837, + "learning_rate": 2.214503251860656e-06, + "loss": 0.4978, + "step": 10967 + }, + { + "epoch": 1.6162082514734775, + "grad_norm": 0.6132919192314148, + "learning_rate": 2.214118093861227e-06, + "loss": 0.516, + "step": 10968 + }, + { + "epoch": 1.6163555992141454, + "grad_norm": 0.5780192613601685, + "learning_rate": 2.213732942737124e-06, + "loss": 0.5346, + "step": 10969 + }, + { + "epoch": 1.6165029469548133, + "grad_norm": 0.5787243843078613, + "learning_rate": 2.2133477984976103e-06, + "loss": 0.5463, + "step": 10970 + }, + { + "epoch": 1.6166502946954813, + "grad_norm": 0.6018535494804382, + "learning_rate": 2.2129626611519465e-06, + "loss": 0.5532, + "step": 10971 + }, + { + "epoch": 1.6167976424361492, + "grad_norm": 0.615318775177002, + "learning_rate": 2.2125775307093963e-06, + "loss": 0.5221, + "step": 10972 + }, + { + "epoch": 1.6169449901768171, + "grad_norm": 0.5947490334510803, + "learning_rate": 2.212192407179222e-06, + "loss": 0.5085, + "step": 10973 + }, + { + "epoch": 1.6170923379174853, + "grad_norm": 0.5919204354286194, + "learning_rate": 2.2118072905706843e-06, + "loss": 0.5327, + "step": 10974 + }, + { + "epoch": 1.6172396856581532, + "grad_norm": 0.6330759525299072, + "learning_rate": 2.211422180893047e-06, + "loss": 0.5674, + "step": 10975 + }, + { + "epoch": 1.6173870333988212, + "grad_norm": 0.6143143177032471, + "learning_rate": 2.211037078155571e-06, + "loss": 0.5469, + "step": 10976 + }, + { + "epoch": 1.6175343811394893, + "grad_norm": 0.5982959866523743, + "learning_rate": 2.210651982367517e-06, + "loss": 0.4908, + "step": 10977 + }, + { + "epoch": 1.6176817288801573, + "grad_norm": 0.5713812112808228, + "learning_rate": 2.2102668935381475e-06, + "loss": 0.5176, + "step": 10978 + }, + { + "epoch": 1.6178290766208252, + "grad_norm": 0.6183671951293945, + "learning_rate": 2.2098818116767234e-06, + "loss": 0.5568, + "step": 10979 + }, + { + "epoch": 1.6179764243614931, + "grad_norm": 0.6118852496147156, + "learning_rate": 2.2094967367925054e-06, + "loss": 0.5314, + "step": 10980 + }, + { + "epoch": 1.618123772102161, + "grad_norm": 0.6038554906845093, + "learning_rate": 2.209111668894755e-06, + "loss": 0.5277, + "step": 10981 + }, + { + "epoch": 1.618271119842829, + "grad_norm": 0.650945246219635, + "learning_rate": 2.208726607992732e-06, + "loss": 0.5447, + "step": 10982 + }, + { + "epoch": 1.618418467583497, + "grad_norm": 0.6002305150032043, + "learning_rate": 2.208341554095698e-06, + "loss": 0.4951, + "step": 10983 + }, + { + "epoch": 1.6185658153241649, + "grad_norm": 0.5879220962524414, + "learning_rate": 2.207956507212913e-06, + "loss": 0.535, + "step": 10984 + }, + { + "epoch": 1.618713163064833, + "grad_norm": 0.6137065291404724, + "learning_rate": 2.207571467353635e-06, + "loss": 0.5484, + "step": 10985 + }, + { + "epoch": 1.618860510805501, + "grad_norm": 0.6486377716064453, + "learning_rate": 2.207186434527128e-06, + "loss": 0.5383, + "step": 10986 + }, + { + "epoch": 1.619007858546169, + "grad_norm": 0.5913034677505493, + "learning_rate": 2.2068014087426495e-06, + "loss": 0.5454, + "step": 10987 + }, + { + "epoch": 1.619155206286837, + "grad_norm": 0.5780973434448242, + "learning_rate": 2.20641639000946e-06, + "loss": 0.5468, + "step": 10988 + }, + { + "epoch": 1.619302554027505, + "grad_norm": 0.5994802117347717, + "learning_rate": 2.2060313783368187e-06, + "loss": 0.5337, + "step": 10989 + }, + { + "epoch": 1.619449901768173, + "grad_norm": 0.6054990291595459, + "learning_rate": 2.2056463737339846e-06, + "loss": 0.5383, + "step": 10990 + }, + { + "epoch": 1.6195972495088409, + "grad_norm": 0.5910143256187439, + "learning_rate": 2.2052613762102166e-06, + "loss": 0.5432, + "step": 10991 + }, + { + "epoch": 1.6197445972495088, + "grad_norm": 0.6312951445579529, + "learning_rate": 2.2048763857747747e-06, + "loss": 0.5405, + "step": 10992 + }, + { + "epoch": 1.6198919449901767, + "grad_norm": 0.5869192481040955, + "learning_rate": 2.2044914024369176e-06, + "loss": 0.5167, + "step": 10993 + }, + { + "epoch": 1.6200392927308447, + "grad_norm": 0.6178472638130188, + "learning_rate": 2.2041064262059036e-06, + "loss": 0.4959, + "step": 10994 + }, + { + "epoch": 1.6201866404715126, + "grad_norm": 0.5889374613761902, + "learning_rate": 2.203721457090991e-06, + "loss": 0.51, + "step": 10995 + }, + { + "epoch": 1.6203339882121808, + "grad_norm": 0.6083022952079773, + "learning_rate": 2.203336495101438e-06, + "loss": 0.5215, + "step": 10996 + }, + { + "epoch": 1.6204813359528487, + "grad_norm": 0.5939242839813232, + "learning_rate": 2.202951540246504e-06, + "loss": 0.5098, + "step": 10997 + }, + { + "epoch": 1.6206286836935166, + "grad_norm": 0.5672206878662109, + "learning_rate": 2.2025665925354458e-06, + "loss": 0.512, + "step": 10998 + }, + { + "epoch": 1.6207760314341848, + "grad_norm": 0.5958739519119263, + "learning_rate": 2.2021816519775212e-06, + "loss": 0.488, + "step": 10999 + }, + { + "epoch": 1.6209233791748527, + "grad_norm": 0.6115172505378723, + "learning_rate": 2.2017967185819883e-06, + "loss": 0.5248, + "step": 11000 + }, + { + "epoch": 1.6210707269155207, + "grad_norm": 0.596372663974762, + "learning_rate": 2.2014117923581046e-06, + "loss": 0.5017, + "step": 11001 + }, + { + "epoch": 1.6212180746561886, + "grad_norm": 0.6485625505447388, + "learning_rate": 2.2010268733151273e-06, + "loss": 0.5387, + "step": 11002 + }, + { + "epoch": 1.6213654223968565, + "grad_norm": 0.6395941376686096, + "learning_rate": 2.2006419614623133e-06, + "loss": 0.5265, + "step": 11003 + }, + { + "epoch": 1.6215127701375245, + "grad_norm": 0.605262279510498, + "learning_rate": 2.2002570568089203e-06, + "loss": 0.5346, + "step": 11004 + }, + { + "epoch": 1.6216601178781924, + "grad_norm": 0.6185641288757324, + "learning_rate": 2.199872159364204e-06, + "loss": 0.5393, + "step": 11005 + }, + { + "epoch": 1.6218074656188604, + "grad_norm": 0.6180818676948547, + "learning_rate": 2.1994872691374215e-06, + "loss": 0.5108, + "step": 11006 + }, + { + "epoch": 1.6219548133595285, + "grad_norm": 0.6001430153846741, + "learning_rate": 2.199102386137829e-06, + "loss": 0.5259, + "step": 11007 + }, + { + "epoch": 1.6221021611001964, + "grad_norm": 0.627541184425354, + "learning_rate": 2.1987175103746833e-06, + "loss": 0.5748, + "step": 11008 + }, + { + "epoch": 1.6222495088408644, + "grad_norm": 0.6114258170127869, + "learning_rate": 2.19833264185724e-06, + "loss": 0.4977, + "step": 11009 + }, + { + "epoch": 1.6223968565815325, + "grad_norm": 0.5999030470848083, + "learning_rate": 2.1979477805947555e-06, + "loss": 0.5339, + "step": 11010 + }, + { + "epoch": 1.6225442043222005, + "grad_norm": 0.6211379766464233, + "learning_rate": 2.1975629265964847e-06, + "loss": 0.5113, + "step": 11011 + }, + { + "epoch": 1.6226915520628684, + "grad_norm": 0.5996118187904358, + "learning_rate": 2.1971780798716843e-06, + "loss": 0.528, + "step": 11012 + }, + { + "epoch": 1.6228388998035363, + "grad_norm": 0.6030031442642212, + "learning_rate": 2.1967932404296087e-06, + "loss": 0.498, + "step": 11013 + }, + { + "epoch": 1.6229862475442043, + "grad_norm": 0.5883554816246033, + "learning_rate": 2.1964084082795135e-06, + "loss": 0.5328, + "step": 11014 + }, + { + "epoch": 1.6231335952848722, + "grad_norm": 0.6369892358779907, + "learning_rate": 2.1960235834306535e-06, + "loss": 0.5339, + "step": 11015 + }, + { + "epoch": 1.6232809430255402, + "grad_norm": 0.6192852258682251, + "learning_rate": 2.195638765892284e-06, + "loss": 0.5214, + "step": 11016 + }, + { + "epoch": 1.623428290766208, + "grad_norm": 0.604861319065094, + "learning_rate": 2.195253955673659e-06, + "loss": 0.5337, + "step": 11017 + }, + { + "epoch": 1.6235756385068763, + "grad_norm": 0.6330520510673523, + "learning_rate": 2.194869152784034e-06, + "loss": 0.5162, + "step": 11018 + }, + { + "epoch": 1.6237229862475442, + "grad_norm": 0.6612882614135742, + "learning_rate": 2.194484357232663e-06, + "loss": 0.5466, + "step": 11019 + }, + { + "epoch": 1.6238703339882123, + "grad_norm": 0.579501211643219, + "learning_rate": 2.1940995690287994e-06, + "loss": 0.5175, + "step": 11020 + }, + { + "epoch": 1.6240176817288803, + "grad_norm": 0.6015431880950928, + "learning_rate": 2.1937147881816967e-06, + "loss": 0.5234, + "step": 11021 + }, + { + "epoch": 1.6241650294695482, + "grad_norm": 0.5765331387519836, + "learning_rate": 2.1933300147006107e-06, + "loss": 0.5349, + "step": 11022 + }, + { + "epoch": 1.6243123772102162, + "grad_norm": 0.6472790241241455, + "learning_rate": 2.1929452485947938e-06, + "loss": 0.5552, + "step": 11023 + }, + { + "epoch": 1.624459724950884, + "grad_norm": 0.6146993637084961, + "learning_rate": 2.1925604898735e-06, + "loss": 0.5129, + "step": 11024 + }, + { + "epoch": 1.624607072691552, + "grad_norm": 0.5949875116348267, + "learning_rate": 2.192175738545982e-06, + "loss": 0.5261, + "step": 11025 + }, + { + "epoch": 1.62475442043222, + "grad_norm": 0.58220374584198, + "learning_rate": 2.191790994621493e-06, + "loss": 0.4966, + "step": 11026 + }, + { + "epoch": 1.624901768172888, + "grad_norm": 0.6190231442451477, + "learning_rate": 2.1914062581092867e-06, + "loss": 0.506, + "step": 11027 + }, + { + "epoch": 1.6250491159135558, + "grad_norm": 0.5686320066452026, + "learning_rate": 2.1910215290186147e-06, + "loss": 0.534, + "step": 11028 + }, + { + "epoch": 1.625196463654224, + "grad_norm": 0.6083618402481079, + "learning_rate": 2.19063680735873e-06, + "loss": 0.5004, + "step": 11029 + }, + { + "epoch": 1.625343811394892, + "grad_norm": 0.6073130369186401, + "learning_rate": 2.190252093138885e-06, + "loss": 0.5265, + "step": 11030 + }, + { + "epoch": 1.62549115913556, + "grad_norm": 0.5955968499183655, + "learning_rate": 2.189867386368332e-06, + "loss": 0.527, + "step": 11031 + }, + { + "epoch": 1.625638506876228, + "grad_norm": 0.5922759771347046, + "learning_rate": 2.1894826870563226e-06, + "loss": 0.5317, + "step": 11032 + }, + { + "epoch": 1.625785854616896, + "grad_norm": 0.6251760721206665, + "learning_rate": 2.189097995212109e-06, + "loss": 0.5192, + "step": 11033 + }, + { + "epoch": 1.625933202357564, + "grad_norm": 0.6013765931129456, + "learning_rate": 2.1887133108449426e-06, + "loss": 0.4894, + "step": 11034 + }, + { + "epoch": 1.6260805500982318, + "grad_norm": 0.5693352818489075, + "learning_rate": 2.1883286339640754e-06, + "loss": 0.5405, + "step": 11035 + }, + { + "epoch": 1.6262278978388998, + "grad_norm": 0.5933298468589783, + "learning_rate": 2.187943964578758e-06, + "loss": 0.5221, + "step": 11036 + }, + { + "epoch": 1.6263752455795677, + "grad_norm": 0.5877081751823425, + "learning_rate": 2.187559302698243e-06, + "loss": 0.5542, + "step": 11037 + }, + { + "epoch": 1.6265225933202356, + "grad_norm": 0.5905691981315613, + "learning_rate": 2.1871746483317798e-06, + "loss": 0.5092, + "step": 11038 + }, + { + "epoch": 1.6266699410609036, + "grad_norm": 0.5708624720573425, + "learning_rate": 2.186790001488619e-06, + "loss": 0.5084, + "step": 11039 + }, + { + "epoch": 1.6268172888015717, + "grad_norm": 0.652804434299469, + "learning_rate": 2.1864053621780117e-06, + "loss": 0.5618, + "step": 11040 + }, + { + "epoch": 1.6269646365422397, + "grad_norm": 0.6339282989501953, + "learning_rate": 2.1860207304092087e-06, + "loss": 0.5143, + "step": 11041 + }, + { + "epoch": 1.6271119842829078, + "grad_norm": 0.6300268173217773, + "learning_rate": 2.18563610619146e-06, + "loss": 0.5117, + "step": 11042 + }, + { + "epoch": 1.6272593320235758, + "grad_norm": 0.6143361330032349, + "learning_rate": 2.185251489534015e-06, + "loss": 0.5127, + "step": 11043 + }, + { + "epoch": 1.6274066797642437, + "grad_norm": 0.6310532689094543, + "learning_rate": 2.1848668804461244e-06, + "loss": 0.4913, + "step": 11044 + }, + { + "epoch": 1.6275540275049116, + "grad_norm": 0.5796357989311218, + "learning_rate": 2.1844822789370377e-06, + "loss": 0.5309, + "step": 11045 + }, + { + "epoch": 1.6277013752455796, + "grad_norm": 0.6373587846755981, + "learning_rate": 2.1840976850160038e-06, + "loss": 0.5449, + "step": 11046 + }, + { + "epoch": 1.6278487229862475, + "grad_norm": 0.5750013589859009, + "learning_rate": 2.1837130986922726e-06, + "loss": 0.5442, + "step": 11047 + }, + { + "epoch": 1.6279960707269154, + "grad_norm": 0.6770113706588745, + "learning_rate": 2.183328519975093e-06, + "loss": 0.5161, + "step": 11048 + }, + { + "epoch": 1.6281434184675834, + "grad_norm": 0.6189544796943665, + "learning_rate": 2.1829439488737137e-06, + "loss": 0.5112, + "step": 11049 + }, + { + "epoch": 1.6282907662082515, + "grad_norm": 0.5822144746780396, + "learning_rate": 2.1825593853973843e-06, + "loss": 0.5264, + "step": 11050 + }, + { + "epoch": 1.6284381139489195, + "grad_norm": 0.5855816602706909, + "learning_rate": 2.1821748295553526e-06, + "loss": 0.5487, + "step": 11051 + }, + { + "epoch": 1.6285854616895874, + "grad_norm": 0.5663124918937683, + "learning_rate": 2.181790281356867e-06, + "loss": 0.5259, + "step": 11052 + }, + { + "epoch": 1.6287328094302556, + "grad_norm": 0.5936033129692078, + "learning_rate": 2.181405740811176e-06, + "loss": 0.5362, + "step": 11053 + }, + { + "epoch": 1.6288801571709235, + "grad_norm": 0.5981681942939758, + "learning_rate": 2.1810212079275272e-06, + "loss": 0.5493, + "step": 11054 + }, + { + "epoch": 1.6290275049115914, + "grad_norm": 0.5804879069328308, + "learning_rate": 2.180636682715169e-06, + "loss": 0.5247, + "step": 11055 + }, + { + "epoch": 1.6291748526522594, + "grad_norm": 0.6133402585983276, + "learning_rate": 2.180252165183349e-06, + "loss": 0.5342, + "step": 11056 + }, + { + "epoch": 1.6293222003929273, + "grad_norm": 0.6314303874969482, + "learning_rate": 2.179867655341314e-06, + "loss": 0.5123, + "step": 11057 + }, + { + "epoch": 1.6294695481335952, + "grad_norm": 0.6585528254508972, + "learning_rate": 2.1794831531983114e-06, + "loss": 0.5386, + "step": 11058 + }, + { + "epoch": 1.6296168958742632, + "grad_norm": 0.6066458821296692, + "learning_rate": 2.1790986587635892e-06, + "loss": 0.5563, + "step": 11059 + }, + { + "epoch": 1.629764243614931, + "grad_norm": 0.6364437341690063, + "learning_rate": 2.178714172046394e-06, + "loss": 0.5428, + "step": 11060 + }, + { + "epoch": 1.6299115913555993, + "grad_norm": 0.6249551177024841, + "learning_rate": 2.178329693055972e-06, + "loss": 0.5325, + "step": 11061 + }, + { + "epoch": 1.6300589390962672, + "grad_norm": 0.6293502449989319, + "learning_rate": 2.1779452218015697e-06, + "loss": 0.4973, + "step": 11062 + }, + { + "epoch": 1.6302062868369351, + "grad_norm": 0.5963471531867981, + "learning_rate": 2.177560758292434e-06, + "loss": 0.5209, + "step": 11063 + }, + { + "epoch": 1.6303536345776033, + "grad_norm": 0.613935112953186, + "learning_rate": 2.177176302537811e-06, + "loss": 0.4984, + "step": 11064 + }, + { + "epoch": 1.6305009823182712, + "grad_norm": 0.5816716551780701, + "learning_rate": 2.1767918545469465e-06, + "loss": 0.529, + "step": 11065 + }, + { + "epoch": 1.6306483300589392, + "grad_norm": 0.6252373456954956, + "learning_rate": 2.1764074143290863e-06, + "loss": 0.5207, + "step": 11066 + }, + { + "epoch": 1.630795677799607, + "grad_norm": 0.6428282856941223, + "learning_rate": 2.176022981893476e-06, + "loss": 0.513, + "step": 11067 + }, + { + "epoch": 1.630943025540275, + "grad_norm": 0.6003892421722412, + "learning_rate": 2.175638557249361e-06, + "loss": 0.5413, + "step": 11068 + }, + { + "epoch": 1.631090373280943, + "grad_norm": 0.6050590872764587, + "learning_rate": 2.1752541404059865e-06, + "loss": 0.5206, + "step": 11069 + }, + { + "epoch": 1.631237721021611, + "grad_norm": 0.6068952679634094, + "learning_rate": 2.1748697313725976e-06, + "loss": 0.4963, + "step": 11070 + }, + { + "epoch": 1.6313850687622788, + "grad_norm": 0.5949049592018127, + "learning_rate": 2.1744853301584392e-06, + "loss": 0.5375, + "step": 11071 + }, + { + "epoch": 1.631532416502947, + "grad_norm": 0.6271416544914246, + "learning_rate": 2.1741009367727563e-06, + "loss": 0.5293, + "step": 11072 + }, + { + "epoch": 1.631679764243615, + "grad_norm": 0.5677645802497864, + "learning_rate": 2.1737165512247924e-06, + "loss": 0.5381, + "step": 11073 + }, + { + "epoch": 1.6318271119842829, + "grad_norm": 0.6279079914093018, + "learning_rate": 2.1733321735237926e-06, + "loss": 0.5241, + "step": 11074 + }, + { + "epoch": 1.631974459724951, + "grad_norm": 0.5868801474571228, + "learning_rate": 2.172947803679001e-06, + "loss": 0.5026, + "step": 11075 + }, + { + "epoch": 1.632121807465619, + "grad_norm": 0.6219079494476318, + "learning_rate": 2.1725634416996615e-06, + "loss": 0.5323, + "step": 11076 + }, + { + "epoch": 1.632269155206287, + "grad_norm": 0.6170361042022705, + "learning_rate": 2.172179087595017e-06, + "loss": 0.5393, + "step": 11077 + }, + { + "epoch": 1.6324165029469548, + "grad_norm": 0.5783489346504211, + "learning_rate": 2.171794741374312e-06, + "loss": 0.5308, + "step": 11078 + }, + { + "epoch": 1.6325638506876228, + "grad_norm": 0.6257763504981995, + "learning_rate": 2.17141040304679e-06, + "loss": 0.527, + "step": 11079 + }, + { + "epoch": 1.6327111984282907, + "grad_norm": 0.6292795538902283, + "learning_rate": 2.171026072621693e-06, + "loss": 0.5392, + "step": 11080 + }, + { + "epoch": 1.6328585461689586, + "grad_norm": 0.6089624762535095, + "learning_rate": 2.170641750108265e-06, + "loss": 0.533, + "step": 11081 + }, + { + "epoch": 1.6330058939096266, + "grad_norm": 0.6111420392990112, + "learning_rate": 2.1702574355157483e-06, + "loss": 0.5211, + "step": 11082 + }, + { + "epoch": 1.6331532416502947, + "grad_norm": 0.6041256785392761, + "learning_rate": 2.1698731288533854e-06, + "loss": 0.5575, + "step": 11083 + }, + { + "epoch": 1.6333005893909627, + "grad_norm": 0.5809090733528137, + "learning_rate": 2.1694888301304194e-06, + "loss": 0.5085, + "step": 11084 + }, + { + "epoch": 1.6334479371316306, + "grad_norm": 0.590444803237915, + "learning_rate": 2.1691045393560924e-06, + "loss": 0.5134, + "step": 11085 + }, + { + "epoch": 1.6335952848722988, + "grad_norm": 0.6548458337783813, + "learning_rate": 2.1687202565396454e-06, + "loss": 0.5347, + "step": 11086 + }, + { + "epoch": 1.6337426326129667, + "grad_norm": 0.5971848368644714, + "learning_rate": 2.168335981690321e-06, + "loss": 0.5487, + "step": 11087 + }, + { + "epoch": 1.6338899803536346, + "grad_norm": 0.5931848287582397, + "learning_rate": 2.1679517148173603e-06, + "loss": 0.5291, + "step": 11088 + }, + { + "epoch": 1.6340373280943026, + "grad_norm": 0.5854246616363525, + "learning_rate": 2.1675674559300058e-06, + "loss": 0.502, + "step": 11089 + }, + { + "epoch": 1.6341846758349705, + "grad_norm": 0.5912979245185852, + "learning_rate": 2.1671832050374974e-06, + "loss": 0.5376, + "step": 11090 + }, + { + "epoch": 1.6343320235756384, + "grad_norm": 0.5906209349632263, + "learning_rate": 2.166798962149077e-06, + "loss": 0.4968, + "step": 11091 + }, + { + "epoch": 1.6344793713163064, + "grad_norm": 0.6105968356132507, + "learning_rate": 2.1664147272739854e-06, + "loss": 0.5249, + "step": 11092 + }, + { + "epoch": 1.6346267190569743, + "grad_norm": 0.6409052610397339, + "learning_rate": 2.166030500421463e-06, + "loss": 0.5636, + "step": 11093 + }, + { + "epoch": 1.6347740667976425, + "grad_norm": 0.5945939421653748, + "learning_rate": 2.1656462816007506e-06, + "loss": 0.5131, + "step": 11094 + }, + { + "epoch": 1.6349214145383104, + "grad_norm": 0.6017090678215027, + "learning_rate": 2.165262070821087e-06, + "loss": 0.528, + "step": 11095 + }, + { + "epoch": 1.6350687622789783, + "grad_norm": 0.6041409969329834, + "learning_rate": 2.164877868091715e-06, + "loss": 0.5037, + "step": 11096 + }, + { + "epoch": 1.6352161100196465, + "grad_norm": 0.6447442173957825, + "learning_rate": 2.164493673421873e-06, + "loss": 0.5446, + "step": 11097 + }, + { + "epoch": 1.6353634577603144, + "grad_norm": 0.574122428894043, + "learning_rate": 2.1641094868208004e-06, + "loss": 0.5423, + "step": 11098 + }, + { + "epoch": 1.6355108055009824, + "grad_norm": 0.5865603089332581, + "learning_rate": 2.1637253082977376e-06, + "loss": 0.5397, + "step": 11099 + }, + { + "epoch": 1.6356581532416503, + "grad_norm": 0.5968482494354248, + "learning_rate": 2.163341137861923e-06, + "loss": 0.5007, + "step": 11100 + }, + { + "epoch": 1.6358055009823183, + "grad_norm": 0.5861513614654541, + "learning_rate": 2.1629569755225966e-06, + "loss": 0.5092, + "step": 11101 + }, + { + "epoch": 1.6359528487229862, + "grad_norm": 0.6096494793891907, + "learning_rate": 2.1625728212889965e-06, + "loss": 0.5269, + "step": 11102 + }, + { + "epoch": 1.6361001964636541, + "grad_norm": 0.5951991677284241, + "learning_rate": 2.1621886751703624e-06, + "loss": 0.5175, + "step": 11103 + }, + { + "epoch": 1.636247544204322, + "grad_norm": 0.5707665681838989, + "learning_rate": 2.1618045371759317e-06, + "loss": 0.4914, + "step": 11104 + }, + { + "epoch": 1.6363948919449902, + "grad_norm": 0.5753116011619568, + "learning_rate": 2.161420407314943e-06, + "loss": 0.5164, + "step": 11105 + }, + { + "epoch": 1.6365422396856582, + "grad_norm": 0.6003001928329468, + "learning_rate": 2.161036285596635e-06, + "loss": 0.4969, + "step": 11106 + }, + { + "epoch": 1.636689587426326, + "grad_norm": 0.6463905572891235, + "learning_rate": 2.1606521720302455e-06, + "loss": 0.5201, + "step": 11107 + }, + { + "epoch": 1.6368369351669942, + "grad_norm": 0.5822681188583374, + "learning_rate": 2.160268066625012e-06, + "loss": 0.5067, + "step": 11108 + }, + { + "epoch": 1.6369842829076622, + "grad_norm": 0.5715650916099548, + "learning_rate": 2.159883969390172e-06, + "loss": 0.5026, + "step": 11109 + }, + { + "epoch": 1.6371316306483301, + "grad_norm": 0.5740357637405396, + "learning_rate": 2.1594998803349632e-06, + "loss": 0.522, + "step": 11110 + }, + { + "epoch": 1.637278978388998, + "grad_norm": 0.6067920327186584, + "learning_rate": 2.1591157994686223e-06, + "loss": 0.5267, + "step": 11111 + }, + { + "epoch": 1.637426326129666, + "grad_norm": 0.6096743941307068, + "learning_rate": 2.1587317268003867e-06, + "loss": 0.4974, + "step": 11112 + }, + { + "epoch": 1.637573673870334, + "grad_norm": 0.6358999013900757, + "learning_rate": 2.158347662339493e-06, + "loss": 0.5425, + "step": 11113 + }, + { + "epoch": 1.6377210216110019, + "grad_norm": 0.6103094220161438, + "learning_rate": 2.157963606095178e-06, + "loss": 0.5293, + "step": 11114 + }, + { + "epoch": 1.6378683693516698, + "grad_norm": 0.5965175628662109, + "learning_rate": 2.1575795580766773e-06, + "loss": 0.5188, + "step": 11115 + }, + { + "epoch": 1.638015717092338, + "grad_norm": 0.602108895778656, + "learning_rate": 2.157195518293228e-06, + "loss": 0.5228, + "step": 11116 + }, + { + "epoch": 1.6381630648330059, + "grad_norm": 0.6060830950737, + "learning_rate": 2.1568114867540658e-06, + "loss": 0.5132, + "step": 11117 + }, + { + "epoch": 1.6383104125736738, + "grad_norm": 0.6038975715637207, + "learning_rate": 2.156427463468426e-06, + "loss": 0.5356, + "step": 11118 + }, + { + "epoch": 1.638457760314342, + "grad_norm": 0.617359459400177, + "learning_rate": 2.156043448445545e-06, + "loss": 0.5307, + "step": 11119 + }, + { + "epoch": 1.63860510805501, + "grad_norm": 0.5959164500236511, + "learning_rate": 2.1556594416946573e-06, + "loss": 0.539, + "step": 11120 + }, + { + "epoch": 1.6387524557956779, + "grad_norm": 0.5702115297317505, + "learning_rate": 2.155275443224999e-06, + "loss": 0.5413, + "step": 11121 + }, + { + "epoch": 1.6388998035363458, + "grad_norm": 0.5856035947799683, + "learning_rate": 2.1548914530458043e-06, + "loss": 0.4873, + "step": 11122 + }, + { + "epoch": 1.6390471512770137, + "grad_norm": 0.6364746689796448, + "learning_rate": 2.154507471166308e-06, + "loss": 0.5571, + "step": 11123 + }, + { + "epoch": 1.6391944990176817, + "grad_norm": 0.5792127847671509, + "learning_rate": 2.1541234975957452e-06, + "loss": 0.545, + "step": 11124 + }, + { + "epoch": 1.6393418467583496, + "grad_norm": 0.5894178152084351, + "learning_rate": 2.1537395323433504e-06, + "loss": 0.5681, + "step": 11125 + }, + { + "epoch": 1.6394891944990175, + "grad_norm": 0.6279268860816956, + "learning_rate": 2.153355575418357e-06, + "loss": 0.5134, + "step": 11126 + }, + { + "epoch": 1.6396365422396857, + "grad_norm": 0.605336606502533, + "learning_rate": 2.15297162683e-06, + "loss": 0.5198, + "step": 11127 + }, + { + "epoch": 1.6397838899803536, + "grad_norm": 0.6447799205780029, + "learning_rate": 2.152587686587512e-06, + "loss": 0.5207, + "step": 11128 + }, + { + "epoch": 1.6399312377210216, + "grad_norm": 0.5993359088897705, + "learning_rate": 2.1522037547001276e-06, + "loss": 0.5386, + "step": 11129 + }, + { + "epoch": 1.6400785854616897, + "grad_norm": 0.6162787675857544, + "learning_rate": 2.1518198311770798e-06, + "loss": 0.5203, + "step": 11130 + }, + { + "epoch": 1.6402259332023577, + "grad_norm": 0.6013906002044678, + "learning_rate": 2.1514359160276015e-06, + "loss": 0.5238, + "step": 11131 + }, + { + "epoch": 1.6403732809430256, + "grad_norm": 0.5901201367378235, + "learning_rate": 2.151052009260925e-06, + "loss": 0.5451, + "step": 11132 + }, + { + "epoch": 1.6405206286836935, + "grad_norm": 0.5981432795524597, + "learning_rate": 2.1506681108862855e-06, + "loss": 0.54, + "step": 11133 + }, + { + "epoch": 1.6406679764243615, + "grad_norm": 0.6227648854255676, + "learning_rate": 2.1502842209129134e-06, + "loss": 0.5271, + "step": 11134 + }, + { + "epoch": 1.6408153241650294, + "grad_norm": 0.5989921689033508, + "learning_rate": 2.149900339350042e-06, + "loss": 0.5405, + "step": 11135 + }, + { + "epoch": 1.6409626719056973, + "grad_norm": 0.6266641616821289, + "learning_rate": 2.1495164662069028e-06, + "loss": 0.5369, + "step": 11136 + }, + { + "epoch": 1.6411100196463653, + "grad_norm": 0.6041465401649475, + "learning_rate": 2.1491326014927287e-06, + "loss": 0.5346, + "step": 11137 + }, + { + "epoch": 1.6412573673870334, + "grad_norm": 0.5835134983062744, + "learning_rate": 2.148748745216751e-06, + "loss": 0.5358, + "step": 11138 + }, + { + "epoch": 1.6414047151277014, + "grad_norm": 0.5864649415016174, + "learning_rate": 2.148364897388201e-06, + "loss": 0.5374, + "step": 11139 + }, + { + "epoch": 1.6415520628683693, + "grad_norm": 0.5913028120994568, + "learning_rate": 2.1479810580163106e-06, + "loss": 0.5369, + "step": 11140 + }, + { + "epoch": 1.6416994106090375, + "grad_norm": 0.6023693680763245, + "learning_rate": 2.14759722711031e-06, + "loss": 0.5039, + "step": 11141 + }, + { + "epoch": 1.6418467583497054, + "grad_norm": 0.6145163774490356, + "learning_rate": 2.1472134046794317e-06, + "loss": 0.503, + "step": 11142 + }, + { + "epoch": 1.6419941060903733, + "grad_norm": 0.5864872932434082, + "learning_rate": 2.146829590732905e-06, + "loss": 0.5237, + "step": 11143 + }, + { + "epoch": 1.6421414538310413, + "grad_norm": 0.6527603268623352, + "learning_rate": 2.1464457852799608e-06, + "loss": 0.5396, + "step": 11144 + }, + { + "epoch": 1.6422888015717092, + "grad_norm": 0.5922210216522217, + "learning_rate": 2.1460619883298302e-06, + "loss": 0.5036, + "step": 11145 + }, + { + "epoch": 1.6424361493123771, + "grad_norm": 0.5932883620262146, + "learning_rate": 2.145678199891742e-06, + "loss": 0.5401, + "step": 11146 + }, + { + "epoch": 1.642583497053045, + "grad_norm": 0.6086810827255249, + "learning_rate": 2.1452944199749275e-06, + "loss": 0.5045, + "step": 11147 + }, + { + "epoch": 1.642730844793713, + "grad_norm": 0.6178856492042542, + "learning_rate": 2.144910648588616e-06, + "loss": 0.507, + "step": 11148 + }, + { + "epoch": 1.6428781925343812, + "grad_norm": 0.5955756902694702, + "learning_rate": 2.1445268857420364e-06, + "loss": 0.5375, + "step": 11149 + }, + { + "epoch": 1.643025540275049, + "grad_norm": 0.6082937717437744, + "learning_rate": 2.1441431314444185e-06, + "loss": 0.5361, + "step": 11150 + }, + { + "epoch": 1.643172888015717, + "grad_norm": 0.6465573906898499, + "learning_rate": 2.1437593857049915e-06, + "loss": 0.5325, + "step": 11151 + }, + { + "epoch": 1.6433202357563852, + "grad_norm": 0.5958907604217529, + "learning_rate": 2.1433756485329847e-06, + "loss": 0.5511, + "step": 11152 + }, + { + "epoch": 1.6434675834970531, + "grad_norm": 0.5960927605628967, + "learning_rate": 2.1429919199376257e-06, + "loss": 0.5402, + "step": 11153 + }, + { + "epoch": 1.643614931237721, + "grad_norm": 0.6015209555625916, + "learning_rate": 2.142608199928144e-06, + "loss": 0.5141, + "step": 11154 + }, + { + "epoch": 1.643762278978389, + "grad_norm": 0.6017306447029114, + "learning_rate": 2.1422244885137673e-06, + "loss": 0.5299, + "step": 11155 + }, + { + "epoch": 1.643909626719057, + "grad_norm": 0.6398621797561646, + "learning_rate": 2.1418407857037235e-06, + "loss": 0.5544, + "step": 11156 + }, + { + "epoch": 1.6440569744597249, + "grad_norm": 0.6279236078262329, + "learning_rate": 2.141457091507241e-06, + "loss": 0.5323, + "step": 11157 + }, + { + "epoch": 1.6442043222003928, + "grad_norm": 0.5880197286605835, + "learning_rate": 2.1410734059335473e-06, + "loss": 0.5232, + "step": 11158 + }, + { + "epoch": 1.6443516699410607, + "grad_norm": 0.5872476696968079, + "learning_rate": 2.1406897289918698e-06, + "loss": 0.5185, + "step": 11159 + }, + { + "epoch": 1.644499017681729, + "grad_norm": 0.5815001726150513, + "learning_rate": 2.140306060691436e-06, + "loss": 0.5461, + "step": 11160 + }, + { + "epoch": 1.6446463654223968, + "grad_norm": 0.625606894493103, + "learning_rate": 2.1399224010414723e-06, + "loss": 0.5501, + "step": 11161 + }, + { + "epoch": 1.644793713163065, + "grad_norm": 0.5912275910377502, + "learning_rate": 2.1395387500512063e-06, + "loss": 0.518, + "step": 11162 + }, + { + "epoch": 1.644941060903733, + "grad_norm": 0.6081578731536865, + "learning_rate": 2.1391551077298642e-06, + "loss": 0.4957, + "step": 11163 + }, + { + "epoch": 1.6450884086444009, + "grad_norm": 0.6077240705490112, + "learning_rate": 2.1387714740866725e-06, + "loss": 0.5341, + "step": 11164 + }, + { + "epoch": 1.6452357563850688, + "grad_norm": 0.6256056427955627, + "learning_rate": 2.1383878491308573e-06, + "loss": 0.5405, + "step": 11165 + }, + { + "epoch": 1.6453831041257367, + "grad_norm": 0.6175428628921509, + "learning_rate": 2.138004232871645e-06, + "loss": 0.5378, + "step": 11166 + }, + { + "epoch": 1.6455304518664047, + "grad_norm": 0.6248172521591187, + "learning_rate": 2.137620625318261e-06, + "loss": 0.5676, + "step": 11167 + }, + { + "epoch": 1.6456777996070726, + "grad_norm": 0.5772278904914856, + "learning_rate": 2.137237026479931e-06, + "loss": 0.5277, + "step": 11168 + }, + { + "epoch": 1.6458251473477405, + "grad_norm": 0.6299455761909485, + "learning_rate": 2.13685343636588e-06, + "loss": 0.5334, + "step": 11169 + }, + { + "epoch": 1.6459724950884085, + "grad_norm": 0.5909412503242493, + "learning_rate": 2.1364698549853334e-06, + "loss": 0.5163, + "step": 11170 + }, + { + "epoch": 1.6461198428290766, + "grad_norm": 0.6104263663291931, + "learning_rate": 2.136086282347517e-06, + "loss": 0.5453, + "step": 11171 + }, + { + "epoch": 1.6462671905697446, + "grad_norm": 0.6347589492797852, + "learning_rate": 2.1357027184616546e-06, + "loss": 0.5374, + "step": 11172 + }, + { + "epoch": 1.6464145383104127, + "grad_norm": 0.5855777263641357, + "learning_rate": 2.135319163336971e-06, + "loss": 0.5256, + "step": 11173 + }, + { + "epoch": 1.6465618860510807, + "grad_norm": 0.5786951184272766, + "learning_rate": 2.1349356169826905e-06, + "loss": 0.5277, + "step": 11174 + }, + { + "epoch": 1.6467092337917486, + "grad_norm": 0.5988860130310059, + "learning_rate": 2.134552079408037e-06, + "loss": 0.5016, + "step": 11175 + }, + { + "epoch": 1.6468565815324165, + "grad_norm": 0.6302437782287598, + "learning_rate": 2.1341685506222345e-06, + "loss": 0.5601, + "step": 11176 + }, + { + "epoch": 1.6470039292730845, + "grad_norm": 0.5816003084182739, + "learning_rate": 2.1337850306345072e-06, + "loss": 0.5015, + "step": 11177 + }, + { + "epoch": 1.6471512770137524, + "grad_norm": 0.5979393124580383, + "learning_rate": 2.1334015194540777e-06, + "loss": 0.524, + "step": 11178 + }, + { + "epoch": 1.6472986247544203, + "grad_norm": 0.6059600710868835, + "learning_rate": 2.13301801709017e-06, + "loss": 0.5475, + "step": 11179 + }, + { + "epoch": 1.6474459724950883, + "grad_norm": 0.6269234418869019, + "learning_rate": 2.132634523552007e-06, + "loss": 0.5113, + "step": 11180 + }, + { + "epoch": 1.6475933202357562, + "grad_norm": 0.6298476457595825, + "learning_rate": 2.132251038848811e-06, + "loss": 0.5426, + "step": 11181 + }, + { + "epoch": 1.6477406679764244, + "grad_norm": 0.6226649880409241, + "learning_rate": 2.131867562989805e-06, + "loss": 0.5077, + "step": 11182 + }, + { + "epoch": 1.6478880157170923, + "grad_norm": 0.6141617894172668, + "learning_rate": 2.131484095984212e-06, + "loss": 0.5281, + "step": 11183 + }, + { + "epoch": 1.6480353634577605, + "grad_norm": 0.5906453728675842, + "learning_rate": 2.131100637841253e-06, + "loss": 0.5399, + "step": 11184 + }, + { + "epoch": 1.6481827111984284, + "grad_norm": 0.5834736227989197, + "learning_rate": 2.130717188570151e-06, + "loss": 0.5304, + "step": 11185 + }, + { + "epoch": 1.6483300589390963, + "grad_norm": 0.604975700378418, + "learning_rate": 2.1303337481801273e-06, + "loss": 0.525, + "step": 11186 + }, + { + "epoch": 1.6484774066797643, + "grad_norm": 0.6400066614151001, + "learning_rate": 2.1299503166804035e-06, + "loss": 0.5156, + "step": 11187 + }, + { + "epoch": 1.6486247544204322, + "grad_norm": 0.5782424211502075, + "learning_rate": 2.129566894080201e-06, + "loss": 0.5139, + "step": 11188 + }, + { + "epoch": 1.6487721021611002, + "grad_norm": 0.6137928366661072, + "learning_rate": 2.1291834803887416e-06, + "loss": 0.5026, + "step": 11189 + }, + { + "epoch": 1.648919449901768, + "grad_norm": 0.6268151998519897, + "learning_rate": 2.1288000756152447e-06, + "loss": 0.5149, + "step": 11190 + }, + { + "epoch": 1.649066797642436, + "grad_norm": 0.6088536977767944, + "learning_rate": 2.1284166797689326e-06, + "loss": 0.5441, + "step": 11191 + }, + { + "epoch": 1.6492141453831042, + "grad_norm": 0.5997933745384216, + "learning_rate": 2.1280332928590247e-06, + "loss": 0.5647, + "step": 11192 + }, + { + "epoch": 1.6493614931237721, + "grad_norm": 0.5988098978996277, + "learning_rate": 2.1276499148947414e-06, + "loss": 0.5139, + "step": 11193 + }, + { + "epoch": 1.64950884086444, + "grad_norm": 0.5988739132881165, + "learning_rate": 2.127266545885303e-06, + "loss": 0.5307, + "step": 11194 + }, + { + "epoch": 1.6496561886051082, + "grad_norm": 0.628730058670044, + "learning_rate": 2.12688318583993e-06, + "loss": 0.5194, + "step": 11195 + }, + { + "epoch": 1.6498035363457761, + "grad_norm": 0.609990656375885, + "learning_rate": 2.1264998347678412e-06, + "loss": 0.5304, + "step": 11196 + }, + { + "epoch": 1.649950884086444, + "grad_norm": 0.580651044845581, + "learning_rate": 2.1261164926782557e-06, + "loss": 0.5062, + "step": 11197 + }, + { + "epoch": 1.650098231827112, + "grad_norm": 0.5941289067268372, + "learning_rate": 2.1257331595803937e-06, + "loss": 0.5211, + "step": 11198 + }, + { + "epoch": 1.65024557956778, + "grad_norm": 0.6099201440811157, + "learning_rate": 2.1253498354834735e-06, + "loss": 0.5357, + "step": 11199 + }, + { + "epoch": 1.6503929273084479, + "grad_norm": 0.5914556384086609, + "learning_rate": 2.1249665203967143e-06, + "loss": 0.5259, + "step": 11200 + }, + { + "epoch": 1.6505402750491158, + "grad_norm": 0.5937098860740662, + "learning_rate": 2.124583214329334e-06, + "loss": 0.5384, + "step": 11201 + }, + { + "epoch": 1.6506876227897838, + "grad_norm": 0.6265343427658081, + "learning_rate": 2.1241999172905513e-06, + "loss": 0.5357, + "step": 11202 + }, + { + "epoch": 1.650834970530452, + "grad_norm": 0.5889278650283813, + "learning_rate": 2.1238166292895844e-06, + "loss": 0.5179, + "step": 11203 + }, + { + "epoch": 1.6509823182711199, + "grad_norm": 0.6021231412887573, + "learning_rate": 2.1234333503356516e-06, + "loss": 0.5371, + "step": 11204 + }, + { + "epoch": 1.6511296660117878, + "grad_norm": 0.5933511853218079, + "learning_rate": 2.1230500804379696e-06, + "loss": 0.5099, + "step": 11205 + }, + { + "epoch": 1.651277013752456, + "grad_norm": 0.6109631657600403, + "learning_rate": 2.1226668196057552e-06, + "loss": 0.5098, + "step": 11206 + }, + { + "epoch": 1.6514243614931239, + "grad_norm": 0.5981847643852234, + "learning_rate": 2.122283567848228e-06, + "loss": 0.51, + "step": 11207 + }, + { + "epoch": 1.6515717092337918, + "grad_norm": 0.6267642378807068, + "learning_rate": 2.1219003251746037e-06, + "loss": 0.512, + "step": 11208 + }, + { + "epoch": 1.6517190569744598, + "grad_norm": 0.5559785962104797, + "learning_rate": 2.1215170915940994e-06, + "loss": 0.4908, + "step": 11209 + }, + { + "epoch": 1.6518664047151277, + "grad_norm": 0.6382353901863098, + "learning_rate": 2.1211338671159313e-06, + "loss": 0.5388, + "step": 11210 + }, + { + "epoch": 1.6520137524557956, + "grad_norm": 0.6098406910896301, + "learning_rate": 2.1207506517493166e-06, + "loss": 0.5297, + "step": 11211 + }, + { + "epoch": 1.6521611001964636, + "grad_norm": 0.5955070853233337, + "learning_rate": 2.12036744550347e-06, + "loss": 0.5171, + "step": 11212 + }, + { + "epoch": 1.6523084479371315, + "grad_norm": 0.6351247429847717, + "learning_rate": 2.119984248387609e-06, + "loss": 0.5418, + "step": 11213 + }, + { + "epoch": 1.6524557956777997, + "grad_norm": 0.5860016942024231, + "learning_rate": 2.1196010604109482e-06, + "loss": 0.5444, + "step": 11214 + }, + { + "epoch": 1.6526031434184676, + "grad_norm": 0.6398259401321411, + "learning_rate": 2.1192178815827035e-06, + "loss": 0.553, + "step": 11215 + }, + { + "epoch": 1.6527504911591355, + "grad_norm": 0.6135919094085693, + "learning_rate": 2.1188347119120908e-06, + "loss": 0.5254, + "step": 11216 + }, + { + "epoch": 1.6528978388998037, + "grad_norm": 0.6164790391921997, + "learning_rate": 2.118451551408324e-06, + "loss": 0.5491, + "step": 11217 + }, + { + "epoch": 1.6530451866404716, + "grad_norm": 0.5923084020614624, + "learning_rate": 2.1180684000806183e-06, + "loss": 0.523, + "step": 11218 + }, + { + "epoch": 1.6531925343811396, + "grad_norm": 0.6176260709762573, + "learning_rate": 2.1176852579381888e-06, + "loss": 0.5427, + "step": 11219 + }, + { + "epoch": 1.6533398821218075, + "grad_norm": 0.5864949822425842, + "learning_rate": 2.1173021249902494e-06, + "loss": 0.5141, + "step": 11220 + }, + { + "epoch": 1.6534872298624754, + "grad_norm": 0.6294904351234436, + "learning_rate": 2.1169190012460137e-06, + "loss": 0.5398, + "step": 11221 + }, + { + "epoch": 1.6536345776031434, + "grad_norm": 0.592812180519104, + "learning_rate": 2.1165358867146975e-06, + "loss": 0.5347, + "step": 11222 + }, + { + "epoch": 1.6537819253438113, + "grad_norm": 0.5996875762939453, + "learning_rate": 2.1161527814055122e-06, + "loss": 0.515, + "step": 11223 + }, + { + "epoch": 1.6539292730844792, + "grad_norm": 0.6014246344566345, + "learning_rate": 2.115769685327673e-06, + "loss": 0.503, + "step": 11224 + }, + { + "epoch": 1.6540766208251474, + "grad_norm": 0.6025770306587219, + "learning_rate": 2.115386598490393e-06, + "loss": 0.5561, + "step": 11225 + }, + { + "epoch": 1.6542239685658153, + "grad_norm": 0.6241815090179443, + "learning_rate": 2.1150035209028847e-06, + "loss": 0.5599, + "step": 11226 + }, + { + "epoch": 1.6543713163064833, + "grad_norm": 0.6149230003356934, + "learning_rate": 2.114620452574361e-06, + "loss": 0.5458, + "step": 11227 + }, + { + "epoch": 1.6545186640471514, + "grad_norm": 0.6090419292449951, + "learning_rate": 2.1142373935140347e-06, + "loss": 0.5086, + "step": 11228 + }, + { + "epoch": 1.6546660117878194, + "grad_norm": 0.5591367483139038, + "learning_rate": 2.113854343731118e-06, + "loss": 0.5224, + "step": 11229 + }, + { + "epoch": 1.6548133595284873, + "grad_norm": 0.6045783758163452, + "learning_rate": 2.1134713032348233e-06, + "loss": 0.5412, + "step": 11230 + }, + { + "epoch": 1.6549607072691552, + "grad_norm": 0.5810937285423279, + "learning_rate": 2.113088272034363e-06, + "loss": 0.5195, + "step": 11231 + }, + { + "epoch": 1.6551080550098232, + "grad_norm": 0.5976927280426025, + "learning_rate": 2.1127052501389476e-06, + "loss": 0.5331, + "step": 11232 + }, + { + "epoch": 1.655255402750491, + "grad_norm": 0.5951833128929138, + "learning_rate": 2.1123222375577896e-06, + "loss": 0.5383, + "step": 11233 + }, + { + "epoch": 1.655402750491159, + "grad_norm": 0.5645279288291931, + "learning_rate": 2.1119392343001e-06, + "loss": 0.5254, + "step": 11234 + }, + { + "epoch": 1.655550098231827, + "grad_norm": 0.658301055431366, + "learning_rate": 2.1115562403750894e-06, + "loss": 0.5087, + "step": 11235 + }, + { + "epoch": 1.6556974459724951, + "grad_norm": 0.5983149409294128, + "learning_rate": 2.1111732557919693e-06, + "loss": 0.5231, + "step": 11236 + }, + { + "epoch": 1.655844793713163, + "grad_norm": 0.6592406034469604, + "learning_rate": 2.11079028055995e-06, + "loss": 0.5361, + "step": 11237 + }, + { + "epoch": 1.655992141453831, + "grad_norm": 0.6082925200462341, + "learning_rate": 2.110407314688242e-06, + "loss": 0.5276, + "step": 11238 + }, + { + "epoch": 1.6561394891944992, + "grad_norm": 0.6365618705749512, + "learning_rate": 2.110024358186055e-06, + "loss": 0.5454, + "step": 11239 + }, + { + "epoch": 1.656286836935167, + "grad_norm": 0.6153686046600342, + "learning_rate": 2.1096414110626e-06, + "loss": 0.5338, + "step": 11240 + }, + { + "epoch": 1.656434184675835, + "grad_norm": 0.5929551720619202, + "learning_rate": 2.1092584733270855e-06, + "loss": 0.5469, + "step": 11241 + }, + { + "epoch": 1.656581532416503, + "grad_norm": 0.6315720081329346, + "learning_rate": 2.108875544988721e-06, + "loss": 0.4974, + "step": 11242 + }, + { + "epoch": 1.656728880157171, + "grad_norm": 0.6734260320663452, + "learning_rate": 2.1084926260567164e-06, + "loss": 0.5327, + "step": 11243 + }, + { + "epoch": 1.6568762278978388, + "grad_norm": 0.6004292964935303, + "learning_rate": 2.1081097165402807e-06, + "loss": 0.5409, + "step": 11244 + }, + { + "epoch": 1.6570235756385068, + "grad_norm": 0.6250290274620056, + "learning_rate": 2.1077268164486226e-06, + "loss": 0.5229, + "step": 11245 + }, + { + "epoch": 1.6571709233791747, + "grad_norm": 0.6492753028869629, + "learning_rate": 2.1073439257909506e-06, + "loss": 0.5342, + "step": 11246 + }, + { + "epoch": 1.6573182711198429, + "grad_norm": 0.5742637515068054, + "learning_rate": 2.1069610445764733e-06, + "loss": 0.5285, + "step": 11247 + }, + { + "epoch": 1.6574656188605108, + "grad_norm": 0.6261954307556152, + "learning_rate": 2.106578172814398e-06, + "loss": 0.5187, + "step": 11248 + }, + { + "epoch": 1.6576129666011787, + "grad_norm": 0.6035549640655518, + "learning_rate": 2.1061953105139334e-06, + "loss": 0.5396, + "step": 11249 + }, + { + "epoch": 1.657760314341847, + "grad_norm": 0.6049785614013672, + "learning_rate": 2.1058124576842866e-06, + "loss": 0.5419, + "step": 11250 + }, + { + "epoch": 1.6579076620825148, + "grad_norm": 0.5542066097259521, + "learning_rate": 2.1054296143346655e-06, + "loss": 0.5248, + "step": 11251 + }, + { + "epoch": 1.6580550098231828, + "grad_norm": 0.6240285038948059, + "learning_rate": 2.1050467804742775e-06, + "loss": 0.5201, + "step": 11252 + }, + { + "epoch": 1.6582023575638507, + "grad_norm": 0.627313494682312, + "learning_rate": 2.1046639561123285e-06, + "loss": 0.518, + "step": 11253 + }, + { + "epoch": 1.6583497053045186, + "grad_norm": 0.579695999622345, + "learning_rate": 2.104281141258026e-06, + "loss": 0.4712, + "step": 11254 + }, + { + "epoch": 1.6584970530451866, + "grad_norm": 0.6109254360198975, + "learning_rate": 2.1038983359205773e-06, + "loss": 0.5602, + "step": 11255 + }, + { + "epoch": 1.6586444007858545, + "grad_norm": 0.6138448119163513, + "learning_rate": 2.103515540109187e-06, + "loss": 0.528, + "step": 11256 + }, + { + "epoch": 1.6587917485265224, + "grad_norm": 0.6227081418037415, + "learning_rate": 2.1031327538330624e-06, + "loss": 0.5349, + "step": 11257 + }, + { + "epoch": 1.6589390962671906, + "grad_norm": 0.6402378678321838, + "learning_rate": 2.1027499771014086e-06, + "loss": 0.5307, + "step": 11258 + }, + { + "epoch": 1.6590864440078585, + "grad_norm": 0.5668720006942749, + "learning_rate": 2.1023672099234314e-06, + "loss": 0.5238, + "step": 11259 + }, + { + "epoch": 1.6592337917485265, + "grad_norm": 0.5936813354492188, + "learning_rate": 2.101984452308337e-06, + "loss": 0.5321, + "step": 11260 + }, + { + "epoch": 1.6593811394891946, + "grad_norm": 0.6141794919967651, + "learning_rate": 2.1016017042653294e-06, + "loss": 0.5485, + "step": 11261 + }, + { + "epoch": 1.6595284872298626, + "grad_norm": 0.6340028047561646, + "learning_rate": 2.101218965803614e-06, + "loss": 0.5132, + "step": 11262 + }, + { + "epoch": 1.6596758349705305, + "grad_norm": 0.5934224128723145, + "learning_rate": 2.1008362369323952e-06, + "loss": 0.5187, + "step": 11263 + }, + { + "epoch": 1.6598231827111984, + "grad_norm": 0.6431472301483154, + "learning_rate": 2.1004535176608783e-06, + "loss": 0.5199, + "step": 11264 + }, + { + "epoch": 1.6599705304518664, + "grad_norm": 0.663366436958313, + "learning_rate": 2.100070807998266e-06, + "loss": 0.5191, + "step": 11265 + }, + { + "epoch": 1.6601178781925343, + "grad_norm": 0.571725606918335, + "learning_rate": 2.099688107953764e-06, + "loss": 0.5028, + "step": 11266 + }, + { + "epoch": 1.6602652259332022, + "grad_norm": 0.5853416919708252, + "learning_rate": 2.0993054175365755e-06, + "loss": 0.5357, + "step": 11267 + }, + { + "epoch": 1.6604125736738702, + "grad_norm": 0.6084394454956055, + "learning_rate": 2.098922736755903e-06, + "loss": 0.5297, + "step": 11268 + }, + { + "epoch": 1.6605599214145383, + "grad_norm": 0.6031394600868225, + "learning_rate": 2.098540065620951e-06, + "loss": 0.5088, + "step": 11269 + }, + { + "epoch": 1.6607072691552063, + "grad_norm": 0.566952109336853, + "learning_rate": 2.098157404140922e-06, + "loss": 0.5739, + "step": 11270 + }, + { + "epoch": 1.6608546168958742, + "grad_norm": 0.616141140460968, + "learning_rate": 2.0977747523250188e-06, + "loss": 0.5299, + "step": 11271 + }, + { + "epoch": 1.6610019646365424, + "grad_norm": 0.6015310883522034, + "learning_rate": 2.097392110182444e-06, + "loss": 0.5489, + "step": 11272 + }, + { + "epoch": 1.6611493123772103, + "grad_norm": 0.6256542205810547, + "learning_rate": 2.0970094777224005e-06, + "loss": 0.5096, + "step": 11273 + }, + { + "epoch": 1.6612966601178782, + "grad_norm": 0.6019378900527954, + "learning_rate": 2.0966268549540896e-06, + "loss": 0.5494, + "step": 11274 + }, + { + "epoch": 1.6614440078585462, + "grad_norm": 0.6018205881118774, + "learning_rate": 2.096244241886714e-06, + "loss": 0.5365, + "step": 11275 + }, + { + "epoch": 1.6615913555992141, + "grad_norm": 0.6337941884994507, + "learning_rate": 2.095861638529475e-06, + "loss": 0.511, + "step": 11276 + }, + { + "epoch": 1.661738703339882, + "grad_norm": 0.5900132656097412, + "learning_rate": 2.095479044891574e-06, + "loss": 0.5406, + "step": 11277 + }, + { + "epoch": 1.66188605108055, + "grad_norm": 0.6492214798927307, + "learning_rate": 2.095096460982212e-06, + "loss": 0.4978, + "step": 11278 + }, + { + "epoch": 1.662033398821218, + "grad_norm": 0.5561586022377014, + "learning_rate": 2.0947138868105896e-06, + "loss": 0.5411, + "step": 11279 + }, + { + "epoch": 1.662180746561886, + "grad_norm": 0.6036345958709717, + "learning_rate": 2.094331322385909e-06, + "loss": 0.5518, + "step": 11280 + }, + { + "epoch": 1.662328094302554, + "grad_norm": 0.620385468006134, + "learning_rate": 2.09394876771737e-06, + "loss": 0.5071, + "step": 11281 + }, + { + "epoch": 1.662475442043222, + "grad_norm": 0.6034526824951172, + "learning_rate": 2.093566222814172e-06, + "loss": 0.5256, + "step": 11282 + }, + { + "epoch": 1.66262278978389, + "grad_norm": 0.6041215658187866, + "learning_rate": 2.0931836876855164e-06, + "loss": 0.5301, + "step": 11283 + }, + { + "epoch": 1.662770137524558, + "grad_norm": 0.6081304550170898, + "learning_rate": 2.0928011623406024e-06, + "loss": 0.5241, + "step": 11284 + }, + { + "epoch": 1.662917485265226, + "grad_norm": 0.5731514096260071, + "learning_rate": 2.0924186467886293e-06, + "loss": 0.5296, + "step": 11285 + }, + { + "epoch": 1.663064833005894, + "grad_norm": 0.6079497337341309, + "learning_rate": 2.0920361410387964e-06, + "loss": 0.5366, + "step": 11286 + }, + { + "epoch": 1.6632121807465619, + "grad_norm": 0.590358316898346, + "learning_rate": 2.0916536451003032e-06, + "loss": 0.5426, + "step": 11287 + }, + { + "epoch": 1.6633595284872298, + "grad_norm": 0.638238251209259, + "learning_rate": 2.0912711589823477e-06, + "loss": 0.4892, + "step": 11288 + }, + { + "epoch": 1.6635068762278977, + "grad_norm": 0.5720162391662598, + "learning_rate": 2.0908886826941295e-06, + "loss": 0.4996, + "step": 11289 + }, + { + "epoch": 1.6636542239685657, + "grad_norm": 0.5995236039161682, + "learning_rate": 2.0905062162448466e-06, + "loss": 0.5078, + "step": 11290 + }, + { + "epoch": 1.6638015717092338, + "grad_norm": 0.5984612703323364, + "learning_rate": 2.0901237596436976e-06, + "loss": 0.5515, + "step": 11291 + }, + { + "epoch": 1.6639489194499018, + "grad_norm": 0.574663519859314, + "learning_rate": 2.0897413128998794e-06, + "loss": 0.519, + "step": 11292 + }, + { + "epoch": 1.66409626719057, + "grad_norm": 0.622238278388977, + "learning_rate": 2.08935887602259e-06, + "loss": 0.4943, + "step": 11293 + }, + { + "epoch": 1.6642436149312378, + "grad_norm": 0.5977858304977417, + "learning_rate": 2.0889764490210274e-06, + "loss": 0.5433, + "step": 11294 + }, + { + "epoch": 1.6643909626719058, + "grad_norm": 0.6513185501098633, + "learning_rate": 2.0885940319043884e-06, + "loss": 0.5652, + "step": 11295 + }, + { + "epoch": 1.6645383104125737, + "grad_norm": 0.6132174730300903, + "learning_rate": 2.0882116246818696e-06, + "loss": 0.5199, + "step": 11296 + }, + { + "epoch": 1.6646856581532417, + "grad_norm": 0.5975970029830933, + "learning_rate": 2.0878292273626686e-06, + "loss": 0.4884, + "step": 11297 + }, + { + "epoch": 1.6648330058939096, + "grad_norm": 0.6159466505050659, + "learning_rate": 2.087446839955981e-06, + "loss": 0.5424, + "step": 11298 + }, + { + "epoch": 1.6649803536345775, + "grad_norm": 0.6213060617446899, + "learning_rate": 2.0870644624710036e-06, + "loss": 0.5252, + "step": 11299 + }, + { + "epoch": 1.6651277013752455, + "grad_norm": 0.6429519653320312, + "learning_rate": 2.086682094916932e-06, + "loss": 0.4926, + "step": 11300 + }, + { + "epoch": 1.6652750491159134, + "grad_norm": 0.6124944686889648, + "learning_rate": 2.0862997373029625e-06, + "loss": 0.5416, + "step": 11301 + }, + { + "epoch": 1.6654223968565816, + "grad_norm": 0.6397833824157715, + "learning_rate": 2.0859173896382902e-06, + "loss": 0.516, + "step": 11302 + }, + { + "epoch": 1.6655697445972495, + "grad_norm": 0.5928331613540649, + "learning_rate": 2.08553505193211e-06, + "loss": 0.5357, + "step": 11303 + }, + { + "epoch": 1.6657170923379176, + "grad_norm": 0.5880200862884521, + "learning_rate": 2.0851527241936177e-06, + "loss": 0.5263, + "step": 11304 + }, + { + "epoch": 1.6658644400785856, + "grad_norm": 0.5847657322883606, + "learning_rate": 2.084770406432008e-06, + "loss": 0.5369, + "step": 11305 + }, + { + "epoch": 1.6660117878192535, + "grad_norm": 0.587394118309021, + "learning_rate": 2.0843880986564752e-06, + "loss": 0.5261, + "step": 11306 + }, + { + "epoch": 1.6661591355599215, + "grad_norm": 0.581632137298584, + "learning_rate": 2.0840058008762134e-06, + "loss": 0.5422, + "step": 11307 + }, + { + "epoch": 1.6663064833005894, + "grad_norm": 0.619168221950531, + "learning_rate": 2.0836235131004173e-06, + "loss": 0.5148, + "step": 11308 + }, + { + "epoch": 1.6664538310412573, + "grad_norm": 0.5852216482162476, + "learning_rate": 2.0832412353382804e-06, + "loss": 0.5266, + "step": 11309 + }, + { + "epoch": 1.6666011787819253, + "grad_norm": 0.6074285507202148, + "learning_rate": 2.082858967598996e-06, + "loss": 0.521, + "step": 11310 + }, + { + "epoch": 1.6667485265225932, + "grad_norm": 0.6179641485214233, + "learning_rate": 2.082476709891758e-06, + "loss": 0.5708, + "step": 11311 + }, + { + "epoch": 1.6668958742632611, + "grad_norm": 0.6324393153190613, + "learning_rate": 2.0820944622257592e-06, + "loss": 0.5195, + "step": 11312 + }, + { + "epoch": 1.6670432220039293, + "grad_norm": 0.6579806804656982, + "learning_rate": 2.081712224610193e-06, + "loss": 0.5431, + "step": 11313 + }, + { + "epoch": 1.6671905697445972, + "grad_norm": 0.5742238163948059, + "learning_rate": 2.081329997054251e-06, + "loss": 0.533, + "step": 11314 + }, + { + "epoch": 1.6673379174852654, + "grad_norm": 0.5733186602592468, + "learning_rate": 2.080947779567127e-06, + "loss": 0.526, + "step": 11315 + }, + { + "epoch": 1.6674852652259333, + "grad_norm": 0.6012062430381775, + "learning_rate": 2.080565572158011e-06, + "loss": 0.5429, + "step": 11316 + }, + { + "epoch": 1.6676326129666013, + "grad_norm": 0.5785420536994934, + "learning_rate": 2.0801833748360974e-06, + "loss": 0.547, + "step": 11317 + }, + { + "epoch": 1.6677799607072692, + "grad_norm": 0.6392502784729004, + "learning_rate": 2.0798011876105762e-06, + "loss": 0.5314, + "step": 11318 + }, + { + "epoch": 1.6679273084479371, + "grad_norm": 0.6057840585708618, + "learning_rate": 2.0794190104906397e-06, + "loss": 0.5322, + "step": 11319 + }, + { + "epoch": 1.668074656188605, + "grad_norm": 0.6344149708747864, + "learning_rate": 2.0790368434854785e-06, + "loss": 0.529, + "step": 11320 + }, + { + "epoch": 1.668222003929273, + "grad_norm": 0.619214653968811, + "learning_rate": 2.0786546866042837e-06, + "loss": 0.5246, + "step": 11321 + }, + { + "epoch": 1.668369351669941, + "grad_norm": 0.5945306420326233, + "learning_rate": 2.0782725398562466e-06, + "loss": 0.5482, + "step": 11322 + }, + { + "epoch": 1.6685166994106089, + "grad_norm": 0.6040920615196228, + "learning_rate": 2.0778904032505566e-06, + "loss": 0.4792, + "step": 11323 + }, + { + "epoch": 1.668664047151277, + "grad_norm": 0.6016905307769775, + "learning_rate": 2.077508276796405e-06, + "loss": 0.5002, + "step": 11324 + }, + { + "epoch": 1.668811394891945, + "grad_norm": 0.6461159586906433, + "learning_rate": 2.0771261605029805e-06, + "loss": 0.5099, + "step": 11325 + }, + { + "epoch": 1.6689587426326131, + "grad_norm": 0.6003037691116333, + "learning_rate": 2.0767440543794736e-06, + "loss": 0.5334, + "step": 11326 + }, + { + "epoch": 1.669106090373281, + "grad_norm": 0.5854881405830383, + "learning_rate": 2.076361958435074e-06, + "loss": 0.5524, + "step": 11327 + }, + { + "epoch": 1.669253438113949, + "grad_norm": 0.5996814966201782, + "learning_rate": 2.0759798726789705e-06, + "loss": 0.5417, + "step": 11328 + }, + { + "epoch": 1.669400785854617, + "grad_norm": 0.6201242804527283, + "learning_rate": 2.075597797120352e-06, + "loss": 0.5152, + "step": 11329 + }, + { + "epoch": 1.6695481335952849, + "grad_norm": 0.6079175472259521, + "learning_rate": 2.0752157317684076e-06, + "loss": 0.5226, + "step": 11330 + }, + { + "epoch": 1.6696954813359528, + "grad_norm": 0.5903156399726868, + "learning_rate": 2.0748336766323256e-06, + "loss": 0.5026, + "step": 11331 + }, + { + "epoch": 1.6698428290766207, + "grad_norm": 0.6089279055595398, + "learning_rate": 2.074451631721294e-06, + "loss": 0.5315, + "step": 11332 + }, + { + "epoch": 1.6699901768172887, + "grad_norm": 0.6025678515434265, + "learning_rate": 2.074069597044502e-06, + "loss": 0.4942, + "step": 11333 + }, + { + "epoch": 1.6701375245579568, + "grad_norm": 0.5963343977928162, + "learning_rate": 2.073687572611136e-06, + "loss": 0.5181, + "step": 11334 + }, + { + "epoch": 1.6702848722986248, + "grad_norm": 0.6188786625862122, + "learning_rate": 2.0733055584303835e-06, + "loss": 0.5515, + "step": 11335 + }, + { + "epoch": 1.6704322200392927, + "grad_norm": 0.6128718256950378, + "learning_rate": 2.0729235545114323e-06, + "loss": 0.5256, + "step": 11336 + }, + { + "epoch": 1.6705795677799609, + "grad_norm": 0.6204689741134644, + "learning_rate": 2.0725415608634692e-06, + "loss": 0.5462, + "step": 11337 + }, + { + "epoch": 1.6707269155206288, + "grad_norm": 0.5873001217842102, + "learning_rate": 2.072159577495681e-06, + "loss": 0.525, + "step": 11338 + }, + { + "epoch": 1.6708742632612967, + "grad_norm": 0.5847495198249817, + "learning_rate": 2.0717776044172547e-06, + "loss": 0.5312, + "step": 11339 + }, + { + "epoch": 1.6710216110019647, + "grad_norm": 0.5856199264526367, + "learning_rate": 2.071395641637376e-06, + "loss": 0.5172, + "step": 11340 + }, + { + "epoch": 1.6711689587426326, + "grad_norm": 0.5881211161613464, + "learning_rate": 2.071013689165231e-06, + "loss": 0.5469, + "step": 11341 + }, + { + "epoch": 1.6713163064833005, + "grad_norm": 0.5990616083145142, + "learning_rate": 2.0706317470100058e-06, + "loss": 0.5361, + "step": 11342 + }, + { + "epoch": 1.6714636542239685, + "grad_norm": 0.6250334978103638, + "learning_rate": 2.0702498151808852e-06, + "loss": 0.5336, + "step": 11343 + }, + { + "epoch": 1.6716110019646364, + "grad_norm": 0.5902557969093323, + "learning_rate": 2.0698678936870557e-06, + "loss": 0.533, + "step": 11344 + }, + { + "epoch": 1.6717583497053046, + "grad_norm": 0.5966247320175171, + "learning_rate": 2.069485982537701e-06, + "loss": 0.5222, + "step": 11345 + }, + { + "epoch": 1.6719056974459725, + "grad_norm": 0.5985121726989746, + "learning_rate": 2.0691040817420065e-06, + "loss": 0.5233, + "step": 11346 + }, + { + "epoch": 1.6720530451866404, + "grad_norm": 0.6210648417472839, + "learning_rate": 2.068722191309157e-06, + "loss": 0.5415, + "step": 11347 + }, + { + "epoch": 1.6722003929273086, + "grad_norm": 0.6182976365089417, + "learning_rate": 2.0683403112483365e-06, + "loss": 0.5272, + "step": 11348 + }, + { + "epoch": 1.6723477406679765, + "grad_norm": 0.5762332677841187, + "learning_rate": 2.067958441568729e-06, + "loss": 0.5339, + "step": 11349 + }, + { + "epoch": 1.6724950884086445, + "grad_norm": 0.6320669651031494, + "learning_rate": 2.0675765822795186e-06, + "loss": 0.5482, + "step": 11350 + }, + { + "epoch": 1.6726424361493124, + "grad_norm": 0.5890970230102539, + "learning_rate": 2.067194733389888e-06, + "loss": 0.5054, + "step": 11351 + }, + { + "epoch": 1.6727897838899803, + "grad_norm": 0.6003643870353699, + "learning_rate": 2.0668128949090216e-06, + "loss": 0.5289, + "step": 11352 + }, + { + "epoch": 1.6729371316306483, + "grad_norm": 0.6255325078964233, + "learning_rate": 2.0664310668461013e-06, + "loss": 0.5009, + "step": 11353 + }, + { + "epoch": 1.6730844793713162, + "grad_norm": 0.6384484171867371, + "learning_rate": 2.0660492492103107e-06, + "loss": 0.5577, + "step": 11354 + }, + { + "epoch": 1.6732318271119841, + "grad_norm": 0.6151971220970154, + "learning_rate": 2.0656674420108325e-06, + "loss": 0.5445, + "step": 11355 + }, + { + "epoch": 1.6733791748526523, + "grad_norm": 0.6080342531204224, + "learning_rate": 2.065285645256848e-06, + "loss": 0.5325, + "step": 11356 + }, + { + "epoch": 1.6735265225933202, + "grad_norm": 0.5983710289001465, + "learning_rate": 2.0649038589575404e-06, + "loss": 0.5287, + "step": 11357 + }, + { + "epoch": 1.6736738703339882, + "grad_norm": 0.6297622919082642, + "learning_rate": 2.064522083122091e-06, + "loss": 0.5724, + "step": 11358 + }, + { + "epoch": 1.6738212180746563, + "grad_norm": 0.576178789138794, + "learning_rate": 2.064140317759681e-06, + "loss": 0.5116, + "step": 11359 + }, + { + "epoch": 1.6739685658153243, + "grad_norm": 0.6113457679748535, + "learning_rate": 2.063758562879492e-06, + "loss": 0.5615, + "step": 11360 + }, + { + "epoch": 1.6741159135559922, + "grad_norm": 0.5889472365379333, + "learning_rate": 2.063376818490705e-06, + "loss": 0.5193, + "step": 11361 + }, + { + "epoch": 1.6742632612966601, + "grad_norm": 0.5980311632156372, + "learning_rate": 2.0629950846025004e-06, + "loss": 0.5238, + "step": 11362 + }, + { + "epoch": 1.674410609037328, + "grad_norm": 0.6312637329101562, + "learning_rate": 2.0626133612240593e-06, + "loss": 0.5196, + "step": 11363 + }, + { + "epoch": 1.674557956777996, + "grad_norm": 0.6169079542160034, + "learning_rate": 2.0622316483645617e-06, + "loss": 0.5478, + "step": 11364 + }, + { + "epoch": 1.674705304518664, + "grad_norm": 0.6200231313705444, + "learning_rate": 2.0618499460331876e-06, + "loss": 0.5348, + "step": 11365 + }, + { + "epoch": 1.6748526522593319, + "grad_norm": 0.60097736120224, + "learning_rate": 2.061468254239117e-06, + "loss": 0.5312, + "step": 11366 + }, + { + "epoch": 1.675, + "grad_norm": 0.5872446894645691, + "learning_rate": 2.0610865729915288e-06, + "loss": 0.4987, + "step": 11367 + }, + { + "epoch": 1.675147347740668, + "grad_norm": 0.5965269207954407, + "learning_rate": 2.060704902299603e-06, + "loss": 0.5217, + "step": 11368 + }, + { + "epoch": 1.675294695481336, + "grad_norm": 0.6399555802345276, + "learning_rate": 2.060323242172518e-06, + "loss": 0.5325, + "step": 11369 + }, + { + "epoch": 1.675442043222004, + "grad_norm": 0.5931639075279236, + "learning_rate": 2.0599415926194526e-06, + "loss": 0.5449, + "step": 11370 + }, + { + "epoch": 1.675589390962672, + "grad_norm": 0.601029098033905, + "learning_rate": 2.0595599536495862e-06, + "loss": 0.5148, + "step": 11371 + }, + { + "epoch": 1.67573673870334, + "grad_norm": 0.5713047981262207, + "learning_rate": 2.059178325272096e-06, + "loss": 0.5041, + "step": 11372 + }, + { + "epoch": 1.6758840864440079, + "grad_norm": 0.5958921909332275, + "learning_rate": 2.0587967074961602e-06, + "loss": 0.5436, + "step": 11373 + }, + { + "epoch": 1.6760314341846758, + "grad_norm": 0.6167491674423218, + "learning_rate": 2.0584151003309567e-06, + "loss": 0.515, + "step": 11374 + }, + { + "epoch": 1.6761787819253438, + "grad_norm": 0.5803003907203674, + "learning_rate": 2.058033503785663e-06, + "loss": 0.5299, + "step": 11375 + }, + { + "epoch": 1.6763261296660117, + "grad_norm": 0.6043460369110107, + "learning_rate": 2.057651917869456e-06, + "loss": 0.5267, + "step": 11376 + }, + { + "epoch": 1.6764734774066796, + "grad_norm": 0.6147666573524475, + "learning_rate": 2.057270342591513e-06, + "loss": 0.5221, + "step": 11377 + }, + { + "epoch": 1.6766208251473478, + "grad_norm": 0.5704576373100281, + "learning_rate": 2.056888777961011e-06, + "loss": 0.5097, + "step": 11378 + }, + { + "epoch": 1.6767681728880157, + "grad_norm": 0.6292908787727356, + "learning_rate": 2.0565072239871256e-06, + "loss": 0.5334, + "step": 11379 + }, + { + "epoch": 1.6769155206286837, + "grad_norm": 0.5949814915657043, + "learning_rate": 2.0561256806790338e-06, + "loss": 0.5761, + "step": 11380 + }, + { + "epoch": 1.6770628683693518, + "grad_norm": 0.6199483871459961, + "learning_rate": 2.055744148045911e-06, + "loss": 0.531, + "step": 11381 + }, + { + "epoch": 1.6772102161100197, + "grad_norm": 0.5993669033050537, + "learning_rate": 2.055362626096933e-06, + "loss": 0.5394, + "step": 11382 + }, + { + "epoch": 1.6773575638506877, + "grad_norm": 0.6014810800552368, + "learning_rate": 2.0549811148412758e-06, + "loss": 0.5202, + "step": 11383 + }, + { + "epoch": 1.6775049115913556, + "grad_norm": 0.6047062277793884, + "learning_rate": 2.0545996142881135e-06, + "loss": 0.543, + "step": 11384 + }, + { + "epoch": 1.6776522593320236, + "grad_norm": 0.6278411149978638, + "learning_rate": 2.054218124446622e-06, + "loss": 0.5052, + "step": 11385 + }, + { + "epoch": 1.6777996070726915, + "grad_norm": 0.5964990258216858, + "learning_rate": 2.053836645325975e-06, + "loss": 0.4931, + "step": 11386 + }, + { + "epoch": 1.6779469548133594, + "grad_norm": 0.5806694626808167, + "learning_rate": 2.053455176935348e-06, + "loss": 0.5233, + "step": 11387 + }, + { + "epoch": 1.6780943025540274, + "grad_norm": 0.6291136741638184, + "learning_rate": 2.053073719283914e-06, + "loss": 0.5286, + "step": 11388 + }, + { + "epoch": 1.6782416502946955, + "grad_norm": 0.6348987221717834, + "learning_rate": 2.052692272380848e-06, + "loss": 0.5594, + "step": 11389 + }, + { + "epoch": 1.6783889980353635, + "grad_norm": 0.6141166090965271, + "learning_rate": 2.052310836235321e-06, + "loss": 0.4624, + "step": 11390 + }, + { + "epoch": 1.6785363457760314, + "grad_norm": 0.9486217498779297, + "learning_rate": 2.0519294108565104e-06, + "loss": 0.5213, + "step": 11391 + }, + { + "epoch": 1.6786836935166995, + "grad_norm": 0.5623011589050293, + "learning_rate": 2.051547996253587e-06, + "loss": 0.5039, + "step": 11392 + }, + { + "epoch": 1.6788310412573675, + "grad_norm": 0.6261928081512451, + "learning_rate": 2.051166592435724e-06, + "loss": 0.5169, + "step": 11393 + }, + { + "epoch": 1.6789783889980354, + "grad_norm": 0.6308937668800354, + "learning_rate": 2.0507851994120935e-06, + "loss": 0.5009, + "step": 11394 + }, + { + "epoch": 1.6791257367387034, + "grad_norm": 0.6012265682220459, + "learning_rate": 2.0504038171918687e-06, + "loss": 0.5469, + "step": 11395 + }, + { + "epoch": 1.6792730844793713, + "grad_norm": 0.6278182864189148, + "learning_rate": 2.0500224457842215e-06, + "loss": 0.5487, + "step": 11396 + }, + { + "epoch": 1.6794204322200392, + "grad_norm": 0.6403038501739502, + "learning_rate": 2.0496410851983227e-06, + "loss": 0.5162, + "step": 11397 + }, + { + "epoch": 1.6795677799607072, + "grad_norm": 0.6234381198883057, + "learning_rate": 2.0492597354433453e-06, + "loss": 0.529, + "step": 11398 + }, + { + "epoch": 1.679715127701375, + "grad_norm": 0.5892391204833984, + "learning_rate": 2.0488783965284596e-06, + "loss": 0.5436, + "step": 11399 + }, + { + "epoch": 1.6798624754420433, + "grad_norm": 0.5725656747817993, + "learning_rate": 2.0484970684628373e-06, + "loss": 0.5533, + "step": 11400 + }, + { + "epoch": 1.6800098231827112, + "grad_norm": 0.6201172471046448, + "learning_rate": 2.048115751255648e-06, + "loss": 0.5243, + "step": 11401 + }, + { + "epoch": 1.6801571709233791, + "grad_norm": 0.6288437247276306, + "learning_rate": 2.047734444916063e-06, + "loss": 0.5384, + "step": 11402 + }, + { + "epoch": 1.6803045186640473, + "grad_norm": 0.5769781470298767, + "learning_rate": 2.0473531494532527e-06, + "loss": 0.5688, + "step": 11403 + }, + { + "epoch": 1.6804518664047152, + "grad_norm": 0.5974081158638, + "learning_rate": 2.046971864876387e-06, + "loss": 0.5622, + "step": 11404 + }, + { + "epoch": 1.6805992141453832, + "grad_norm": 0.59477698802948, + "learning_rate": 2.0465905911946353e-06, + "loss": 0.5084, + "step": 11405 + }, + { + "epoch": 1.680746561886051, + "grad_norm": 0.6143825650215149, + "learning_rate": 2.046209328417167e-06, + "loss": 0.5341, + "step": 11406 + }, + { + "epoch": 1.680893909626719, + "grad_norm": 0.6280226707458496, + "learning_rate": 2.045828076553152e-06, + "loss": 0.4923, + "step": 11407 + }, + { + "epoch": 1.681041257367387, + "grad_norm": 0.580858588218689, + "learning_rate": 2.0454468356117582e-06, + "loss": 0.5307, + "step": 11408 + }, + { + "epoch": 1.681188605108055, + "grad_norm": 0.601067304611206, + "learning_rate": 2.045065605602155e-06, + "loss": 0.5303, + "step": 11409 + }, + { + "epoch": 1.6813359528487228, + "grad_norm": 0.5963059663772583, + "learning_rate": 2.0446843865335105e-06, + "loss": 0.5495, + "step": 11410 + }, + { + "epoch": 1.681483300589391, + "grad_norm": 0.5791304111480713, + "learning_rate": 2.044303178414993e-06, + "loss": 0.5199, + "step": 11411 + }, + { + "epoch": 1.681630648330059, + "grad_norm": 0.5785623788833618, + "learning_rate": 2.0439219812557705e-06, + "loss": 0.52, + "step": 11412 + }, + { + "epoch": 1.6817779960707269, + "grad_norm": 0.6346462965011597, + "learning_rate": 2.0435407950650105e-06, + "loss": 0.5165, + "step": 11413 + }, + { + "epoch": 1.681925343811395, + "grad_norm": 0.5586227178573608, + "learning_rate": 2.04315961985188e-06, + "loss": 0.5288, + "step": 11414 + }, + { + "epoch": 1.682072691552063, + "grad_norm": 0.5838005542755127, + "learning_rate": 2.0427784556255466e-06, + "loss": 0.5491, + "step": 11415 + }, + { + "epoch": 1.682220039292731, + "grad_norm": 0.6084684729576111, + "learning_rate": 2.0423973023951767e-06, + "loss": 0.547, + "step": 11416 + }, + { + "epoch": 1.6823673870333988, + "grad_norm": 0.6198234558105469, + "learning_rate": 2.042016160169937e-06, + "loss": 0.5492, + "step": 11417 + }, + { + "epoch": 1.6825147347740668, + "grad_norm": 0.6017231941223145, + "learning_rate": 2.0416350289589943e-06, + "loss": 0.5103, + "step": 11418 + }, + { + "epoch": 1.6826620825147347, + "grad_norm": 0.6035711765289307, + "learning_rate": 2.041253908771514e-06, + "loss": 0.5062, + "step": 11419 + }, + { + "epoch": 1.6828094302554026, + "grad_norm": 0.6094516515731812, + "learning_rate": 2.0408727996166615e-06, + "loss": 0.5232, + "step": 11420 + }, + { + "epoch": 1.6829567779960706, + "grad_norm": 0.624376654624939, + "learning_rate": 2.0404917015036035e-06, + "loss": 0.5298, + "step": 11421 + }, + { + "epoch": 1.6831041257367387, + "grad_norm": 0.576448917388916, + "learning_rate": 2.0401106144415044e-06, + "loss": 0.5256, + "step": 11422 + }, + { + "epoch": 1.6832514734774067, + "grad_norm": 0.6106398701667786, + "learning_rate": 2.039729538439529e-06, + "loss": 0.5447, + "step": 11423 + }, + { + "epoch": 1.6833988212180746, + "grad_norm": 0.6228384971618652, + "learning_rate": 2.0393484735068426e-06, + "loss": 0.5277, + "step": 11424 + }, + { + "epoch": 1.6835461689587428, + "grad_norm": 0.6151278614997864, + "learning_rate": 2.038967419652609e-06, + "loss": 0.4992, + "step": 11425 + }, + { + "epoch": 1.6836935166994107, + "grad_norm": 0.5901390314102173, + "learning_rate": 2.038586376885993e-06, + "loss": 0.4972, + "step": 11426 + }, + { + "epoch": 1.6838408644400786, + "grad_norm": 0.619636595249176, + "learning_rate": 2.0382053452161578e-06, + "loss": 0.5315, + "step": 11427 + }, + { + "epoch": 1.6839882121807466, + "grad_norm": 0.6137980222702026, + "learning_rate": 2.037824324652268e-06, + "loss": 0.5266, + "step": 11428 + }, + { + "epoch": 1.6841355599214145, + "grad_norm": 0.6223263144493103, + "learning_rate": 2.037443315203486e-06, + "loss": 0.5013, + "step": 11429 + }, + { + "epoch": 1.6842829076620824, + "grad_norm": 0.6045231819152832, + "learning_rate": 2.037062316878976e-06, + "loss": 0.5522, + "step": 11430 + }, + { + "epoch": 1.6844302554027504, + "grad_norm": 0.6373542547225952, + "learning_rate": 2.0366813296878994e-06, + "loss": 0.5461, + "step": 11431 + }, + { + "epoch": 1.6845776031434183, + "grad_norm": 0.5921975374221802, + "learning_rate": 2.03630035363942e-06, + "loss": 0.5264, + "step": 11432 + }, + { + "epoch": 1.6847249508840865, + "grad_norm": 0.6039825677871704, + "learning_rate": 2.035919388742699e-06, + "loss": 0.5444, + "step": 11433 + }, + { + "epoch": 1.6848722986247544, + "grad_norm": 0.6322339773178101, + "learning_rate": 2.0355384350069e-06, + "loss": 0.5162, + "step": 11434 + }, + { + "epoch": 1.6850196463654226, + "grad_norm": 0.5867341160774231, + "learning_rate": 2.035157492441183e-06, + "loss": 0.5016, + "step": 11435 + }, + { + "epoch": 1.6851669941060905, + "grad_norm": 0.5870938897132874, + "learning_rate": 2.0347765610547107e-06, + "loss": 0.5345, + "step": 11436 + }, + { + "epoch": 1.6853143418467584, + "grad_norm": 0.577861487865448, + "learning_rate": 2.0343956408566436e-06, + "loss": 0.5504, + "step": 11437 + }, + { + "epoch": 1.6854616895874264, + "grad_norm": 0.5972655415534973, + "learning_rate": 2.0340147318561433e-06, + "loss": 0.5391, + "step": 11438 + }, + { + "epoch": 1.6856090373280943, + "grad_norm": 0.6126350164413452, + "learning_rate": 2.03363383406237e-06, + "loss": 0.4967, + "step": 11439 + }, + { + "epoch": 1.6857563850687622, + "grad_norm": 0.6184426546096802, + "learning_rate": 2.0332529474844844e-06, + "loss": 0.5418, + "step": 11440 + }, + { + "epoch": 1.6859037328094302, + "grad_norm": 0.5862855315208435, + "learning_rate": 2.0328720721316462e-06, + "loss": 0.5582, + "step": 11441 + }, + { + "epoch": 1.6860510805500981, + "grad_norm": 0.5836980938911438, + "learning_rate": 2.0324912080130154e-06, + "loss": 0.5336, + "step": 11442 + }, + { + "epoch": 1.686198428290766, + "grad_norm": 0.6296038031578064, + "learning_rate": 2.032110355137752e-06, + "loss": 0.5279, + "step": 11443 + }, + { + "epoch": 1.6863457760314342, + "grad_norm": 0.6116330027580261, + "learning_rate": 2.0317295135150157e-06, + "loss": 0.5676, + "step": 11444 + }, + { + "epoch": 1.6864931237721021, + "grad_norm": 0.6098064184188843, + "learning_rate": 2.0313486831539644e-06, + "loss": 0.518, + "step": 11445 + }, + { + "epoch": 1.6866404715127703, + "grad_norm": 0.5948503017425537, + "learning_rate": 2.0309678640637575e-06, + "loss": 0.5379, + "step": 11446 + }, + { + "epoch": 1.6867878192534382, + "grad_norm": 0.5925956964492798, + "learning_rate": 2.030587056253553e-06, + "loss": 0.5199, + "step": 11447 + }, + { + "epoch": 1.6869351669941062, + "grad_norm": 0.6365150213241577, + "learning_rate": 2.0302062597325107e-06, + "loss": 0.4695, + "step": 11448 + }, + { + "epoch": 1.687082514734774, + "grad_norm": 0.5882795453071594, + "learning_rate": 2.029825474509787e-06, + "loss": 0.5387, + "step": 11449 + }, + { + "epoch": 1.687229862475442, + "grad_norm": 0.6050406098365784, + "learning_rate": 2.02944470059454e-06, + "loss": 0.5121, + "step": 11450 + }, + { + "epoch": 1.68737721021611, + "grad_norm": 0.6101592779159546, + "learning_rate": 2.0290639379959275e-06, + "loss": 0.5419, + "step": 11451 + }, + { + "epoch": 1.687524557956778, + "grad_norm": 0.6314029693603516, + "learning_rate": 2.028683186723106e-06, + "loss": 0.5097, + "step": 11452 + }, + { + "epoch": 1.6876719056974459, + "grad_norm": 0.612734317779541, + "learning_rate": 2.028302446785233e-06, + "loss": 0.5203, + "step": 11453 + }, + { + "epoch": 1.6878192534381138, + "grad_norm": 0.5996101498603821, + "learning_rate": 2.027921718191465e-06, + "loss": 0.5312, + "step": 11454 + }, + { + "epoch": 1.687966601178782, + "grad_norm": 0.6045476198196411, + "learning_rate": 2.027541000950958e-06, + "loss": 0.5392, + "step": 11455 + }, + { + "epoch": 1.6881139489194499, + "grad_norm": 0.6372597217559814, + "learning_rate": 2.0271602950728685e-06, + "loss": 0.5368, + "step": 11456 + }, + { + "epoch": 1.688261296660118, + "grad_norm": 0.5776920914649963, + "learning_rate": 2.026779600566352e-06, + "loss": 0.5161, + "step": 11457 + }, + { + "epoch": 1.688408644400786, + "grad_norm": 0.5862222909927368, + "learning_rate": 2.0263989174405643e-06, + "loss": 0.5058, + "step": 11458 + }, + { + "epoch": 1.688555992141454, + "grad_norm": 0.5932839512825012, + "learning_rate": 2.0260182457046604e-06, + "loss": 0.5257, + "step": 11459 + }, + { + "epoch": 1.6887033398821218, + "grad_norm": 0.6036797165870667, + "learning_rate": 2.0256375853677957e-06, + "loss": 0.529, + "step": 11460 + }, + { + "epoch": 1.6888506876227898, + "grad_norm": 0.6071561574935913, + "learning_rate": 2.0252569364391243e-06, + "loss": 0.5215, + "step": 11461 + }, + { + "epoch": 1.6889980353634577, + "grad_norm": 0.5867946743965149, + "learning_rate": 2.024876298927801e-06, + "loss": 0.5017, + "step": 11462 + }, + { + "epoch": 1.6891453831041257, + "grad_norm": 0.6502769589424133, + "learning_rate": 2.0244956728429784e-06, + "loss": 0.5524, + "step": 11463 + }, + { + "epoch": 1.6892927308447936, + "grad_norm": 0.587607204914093, + "learning_rate": 2.0241150581938134e-06, + "loss": 0.5274, + "step": 11464 + }, + { + "epoch": 1.6894400785854615, + "grad_norm": 0.556632936000824, + "learning_rate": 2.0237344549894578e-06, + "loss": 0.5009, + "step": 11465 + }, + { + "epoch": 1.6895874263261297, + "grad_norm": 0.5774903893470764, + "learning_rate": 2.0233538632390657e-06, + "loss": 0.5287, + "step": 11466 + }, + { + "epoch": 1.6897347740667976, + "grad_norm": 0.5679711103439331, + "learning_rate": 2.022973282951789e-06, + "loss": 0.5382, + "step": 11467 + }, + { + "epoch": 1.6898821218074658, + "grad_norm": 0.6134912371635437, + "learning_rate": 2.0225927141367814e-06, + "loss": 0.493, + "step": 11468 + }, + { + "epoch": 1.6900294695481337, + "grad_norm": 0.584132194519043, + "learning_rate": 2.022212156803195e-06, + "loss": 0.5571, + "step": 11469 + }, + { + "epoch": 1.6901768172888016, + "grad_norm": 0.5973325371742249, + "learning_rate": 2.0218316109601825e-06, + "loss": 0.5437, + "step": 11470 + }, + { + "epoch": 1.6903241650294696, + "grad_norm": 0.5756487846374512, + "learning_rate": 2.021451076616895e-06, + "loss": 0.5173, + "step": 11471 + }, + { + "epoch": 1.6904715127701375, + "grad_norm": 0.5560109615325928, + "learning_rate": 2.021070553782485e-06, + "loss": 0.5324, + "step": 11472 + }, + { + "epoch": 1.6906188605108055, + "grad_norm": 0.6251965761184692, + "learning_rate": 2.020690042466103e-06, + "loss": 0.5447, + "step": 11473 + }, + { + "epoch": 1.6907662082514734, + "grad_norm": 0.6053615212440491, + "learning_rate": 2.020309542676901e-06, + "loss": 0.5011, + "step": 11474 + }, + { + "epoch": 1.6909135559921413, + "grad_norm": 0.6039064526557922, + "learning_rate": 2.01992905442403e-06, + "loss": 0.581, + "step": 11475 + }, + { + "epoch": 1.6910609037328095, + "grad_norm": 0.6007823944091797, + "learning_rate": 2.0195485777166392e-06, + "loss": 0.5468, + "step": 11476 + }, + { + "epoch": 1.6912082514734774, + "grad_norm": 0.5888518691062927, + "learning_rate": 2.01916811256388e-06, + "loss": 0.5424, + "step": 11477 + }, + { + "epoch": 1.6913555992141454, + "grad_norm": 0.5756382346153259, + "learning_rate": 2.0187876589749023e-06, + "loss": 0.5285, + "step": 11478 + }, + { + "epoch": 1.6915029469548135, + "grad_norm": 0.5863697528839111, + "learning_rate": 2.018407216958856e-06, + "loss": 0.5525, + "step": 11479 + }, + { + "epoch": 1.6916502946954814, + "grad_norm": 0.6007311344146729, + "learning_rate": 2.0180267865248898e-06, + "loss": 0.5243, + "step": 11480 + }, + { + "epoch": 1.6917976424361494, + "grad_norm": 0.5904764533042908, + "learning_rate": 2.0176463676821535e-06, + "loss": 0.5344, + "step": 11481 + }, + { + "epoch": 1.6919449901768173, + "grad_norm": 0.6224201321601868, + "learning_rate": 2.0172659604397955e-06, + "loss": 0.5662, + "step": 11482 + }, + { + "epoch": 1.6920923379174853, + "grad_norm": 0.5822588205337524, + "learning_rate": 2.016885564806965e-06, + "loss": 0.4939, + "step": 11483 + }, + { + "epoch": 1.6922396856581532, + "grad_norm": 0.6387345194816589, + "learning_rate": 2.0165051807928104e-06, + "loss": 0.5442, + "step": 11484 + }, + { + "epoch": 1.6923870333988211, + "grad_norm": 0.6070008873939514, + "learning_rate": 2.016124808406479e-06, + "loss": 0.5556, + "step": 11485 + }, + { + "epoch": 1.692534381139489, + "grad_norm": 0.5767913460731506, + "learning_rate": 2.0157444476571193e-06, + "loss": 0.5131, + "step": 11486 + }, + { + "epoch": 1.6926817288801572, + "grad_norm": 0.6065965294837952, + "learning_rate": 2.0153640985538784e-06, + "loss": 0.5183, + "step": 11487 + }, + { + "epoch": 1.6928290766208252, + "grad_norm": 0.6075121760368347, + "learning_rate": 2.0149837611059033e-06, + "loss": 0.5027, + "step": 11488 + }, + { + "epoch": 1.692976424361493, + "grad_norm": 0.6041368246078491, + "learning_rate": 2.014603435322342e-06, + "loss": 0.5504, + "step": 11489 + }, + { + "epoch": 1.6931237721021613, + "grad_norm": 0.6375484466552734, + "learning_rate": 2.0142231212123398e-06, + "loss": 0.5209, + "step": 11490 + }, + { + "epoch": 1.6932711198428292, + "grad_norm": 0.6067923307418823, + "learning_rate": 2.0138428187850435e-06, + "loss": 0.5212, + "step": 11491 + }, + { + "epoch": 1.6934184675834971, + "grad_norm": 0.6261146068572998, + "learning_rate": 2.0134625280496e-06, + "loss": 0.5243, + "step": 11492 + }, + { + "epoch": 1.693565815324165, + "grad_norm": 0.5734266638755798, + "learning_rate": 2.013082249015154e-06, + "loss": 0.5135, + "step": 11493 + }, + { + "epoch": 1.693713163064833, + "grad_norm": 0.6333805918693542, + "learning_rate": 2.012701981690852e-06, + "loss": 0.5081, + "step": 11494 + }, + { + "epoch": 1.693860510805501, + "grad_norm": 0.6097116470336914, + "learning_rate": 2.0123217260858383e-06, + "loss": 0.5184, + "step": 11495 + }, + { + "epoch": 1.6940078585461689, + "grad_norm": 0.6299377083778381, + "learning_rate": 2.0119414822092587e-06, + "loss": 0.5418, + "step": 11496 + }, + { + "epoch": 1.6941552062868368, + "grad_norm": 0.5982630848884583, + "learning_rate": 2.011561250070258e-06, + "loss": 0.5287, + "step": 11497 + }, + { + "epoch": 1.694302554027505, + "grad_norm": 0.6256794333457947, + "learning_rate": 2.0111810296779796e-06, + "loss": 0.5077, + "step": 11498 + }, + { + "epoch": 1.694449901768173, + "grad_norm": 0.5856954455375671, + "learning_rate": 2.010800821041568e-06, + "loss": 0.4857, + "step": 11499 + }, + { + "epoch": 1.6945972495088408, + "grad_norm": 0.5925942063331604, + "learning_rate": 2.010420624170166e-06, + "loss": 0.532, + "step": 11500 + }, + { + "epoch": 1.694744597249509, + "grad_norm": 0.6490039229393005, + "learning_rate": 2.01004043907292e-06, + "loss": 0.5357, + "step": 11501 + }, + { + "epoch": 1.694891944990177, + "grad_norm": 0.6061179041862488, + "learning_rate": 2.0096602657589713e-06, + "loss": 0.5441, + "step": 11502 + }, + { + "epoch": 1.6950392927308449, + "grad_norm": 0.6194128394126892, + "learning_rate": 2.0092801042374634e-06, + "loss": 0.5342, + "step": 11503 + }, + { + "epoch": 1.6951866404715128, + "grad_norm": 0.5811118483543396, + "learning_rate": 2.008899954517539e-06, + "loss": 0.521, + "step": 11504 + }, + { + "epoch": 1.6953339882121807, + "grad_norm": 0.6154339909553528, + "learning_rate": 2.0085198166083397e-06, + "loss": 0.5402, + "step": 11505 + }, + { + "epoch": 1.6954813359528487, + "grad_norm": 0.5582705140113831, + "learning_rate": 2.0081396905190085e-06, + "loss": 0.5291, + "step": 11506 + }, + { + "epoch": 1.6956286836935166, + "grad_norm": 0.5773485898971558, + "learning_rate": 2.0077595762586872e-06, + "loss": 0.5198, + "step": 11507 + }, + { + "epoch": 1.6957760314341845, + "grad_norm": 0.5915935039520264, + "learning_rate": 2.0073794738365175e-06, + "loss": 0.5487, + "step": 11508 + }, + { + "epoch": 1.6959233791748527, + "grad_norm": 0.5858554840087891, + "learning_rate": 2.0069993832616404e-06, + "loss": 0.5423, + "step": 11509 + }, + { + "epoch": 1.6960707269155206, + "grad_norm": 0.5970928072929382, + "learning_rate": 2.0066193045431964e-06, + "loss": 0.5047, + "step": 11510 + }, + { + "epoch": 1.6962180746561886, + "grad_norm": 0.5957722067832947, + "learning_rate": 2.006239237690327e-06, + "loss": 0.521, + "step": 11511 + }, + { + "epoch": 1.6963654223968567, + "grad_norm": 0.6166214346885681, + "learning_rate": 2.0058591827121723e-06, + "loss": 0.5272, + "step": 11512 + }, + { + "epoch": 1.6965127701375247, + "grad_norm": 0.6471192836761475, + "learning_rate": 2.0054791396178723e-06, + "loss": 0.5319, + "step": 11513 + }, + { + "epoch": 1.6966601178781926, + "grad_norm": 0.5614808797836304, + "learning_rate": 2.0050991084165676e-06, + "loss": 0.5412, + "step": 11514 + }, + { + "epoch": 1.6968074656188605, + "grad_norm": 0.6012758612632751, + "learning_rate": 2.0047190891173972e-06, + "loss": 0.5483, + "step": 11515 + }, + { + "epoch": 1.6969548133595285, + "grad_norm": 0.6027750968933105, + "learning_rate": 2.0043390817295e-06, + "loss": 0.5131, + "step": 11516 + }, + { + "epoch": 1.6971021611001964, + "grad_norm": 0.6093142032623291, + "learning_rate": 2.0039590862620155e-06, + "loss": 0.5668, + "step": 11517 + }, + { + "epoch": 1.6972495088408643, + "grad_norm": 0.6088322997093201, + "learning_rate": 2.003579102724082e-06, + "loss": 0.5617, + "step": 11518 + }, + { + "epoch": 1.6973968565815323, + "grad_norm": 0.5925126671791077, + "learning_rate": 2.0031991311248387e-06, + "loss": 0.5166, + "step": 11519 + }, + { + "epoch": 1.6975442043222004, + "grad_norm": 0.5825607180595398, + "learning_rate": 2.002819171473423e-06, + "loss": 0.544, + "step": 11520 + }, + { + "epoch": 1.6976915520628684, + "grad_norm": 0.5905587077140808, + "learning_rate": 2.002439223778973e-06, + "loss": 0.5222, + "step": 11521 + }, + { + "epoch": 1.6978388998035363, + "grad_norm": 0.584155797958374, + "learning_rate": 2.0020592880506263e-06, + "loss": 0.5476, + "step": 11522 + }, + { + "epoch": 1.6979862475442045, + "grad_norm": 0.596538245677948, + "learning_rate": 2.0016793642975207e-06, + "loss": 0.5241, + "step": 11523 + }, + { + "epoch": 1.6981335952848724, + "grad_norm": 0.5869702100753784, + "learning_rate": 2.001299452528792e-06, + "loss": 0.5627, + "step": 11524 + }, + { + "epoch": 1.6982809430255403, + "grad_norm": 0.6056994795799255, + "learning_rate": 2.000919552753578e-06, + "loss": 0.5231, + "step": 11525 + }, + { + "epoch": 1.6984282907662083, + "grad_norm": 0.6378287672996521, + "learning_rate": 2.0005396649810145e-06, + "loss": 0.5268, + "step": 11526 + }, + { + "epoch": 1.6985756385068762, + "grad_norm": 0.5992657542228699, + "learning_rate": 2.000159789220238e-06, + "loss": 0.5415, + "step": 11527 + }, + { + "epoch": 1.6987229862475441, + "grad_norm": 0.6096765995025635, + "learning_rate": 1.9997799254803835e-06, + "loss": 0.5438, + "step": 11528 + }, + { + "epoch": 1.698870333988212, + "grad_norm": 0.5907707810401917, + "learning_rate": 1.999400073770588e-06, + "loss": 0.5462, + "step": 11529 + }, + { + "epoch": 1.69901768172888, + "grad_norm": 0.5904582738876343, + "learning_rate": 1.9990202340999852e-06, + "loss": 0.5065, + "step": 11530 + }, + { + "epoch": 1.6991650294695482, + "grad_norm": 0.6242498159408569, + "learning_rate": 1.9986404064777114e-06, + "loss": 0.5154, + "step": 11531 + }, + { + "epoch": 1.699312377210216, + "grad_norm": 0.5703064203262329, + "learning_rate": 1.9982605909129003e-06, + "loss": 0.5175, + "step": 11532 + }, + { + "epoch": 1.699459724950884, + "grad_norm": 0.6169881820678711, + "learning_rate": 1.9978807874146864e-06, + "loss": 0.5194, + "step": 11533 + }, + { + "epoch": 1.6996070726915522, + "grad_norm": 0.5910190939903259, + "learning_rate": 1.997500995992205e-06, + "loss": 0.502, + "step": 11534 + }, + { + "epoch": 1.6997544204322201, + "grad_norm": 0.6355990767478943, + "learning_rate": 1.997121216654588e-06, + "loss": 0.5346, + "step": 11535 + }, + { + "epoch": 1.699901768172888, + "grad_norm": 0.6045966148376465, + "learning_rate": 1.99674144941097e-06, + "loss": 0.5499, + "step": 11536 + }, + { + "epoch": 1.700049115913556, + "grad_norm": 0.5686396360397339, + "learning_rate": 1.996361694270484e-06, + "loss": 0.5483, + "step": 11537 + }, + { + "epoch": 1.700196463654224, + "grad_norm": 0.5809792280197144, + "learning_rate": 1.9959819512422636e-06, + "loss": 0.5331, + "step": 11538 + }, + { + "epoch": 1.7003438113948919, + "grad_norm": 0.600383460521698, + "learning_rate": 1.9956022203354407e-06, + "loss": 0.5307, + "step": 11539 + }, + { + "epoch": 1.7004911591355598, + "grad_norm": 0.5941588878631592, + "learning_rate": 1.9952225015591474e-06, + "loss": 0.4924, + "step": 11540 + }, + { + "epoch": 1.7006385068762278, + "grad_norm": 0.5879323482513428, + "learning_rate": 1.9948427949225167e-06, + "loss": 0.5068, + "step": 11541 + }, + { + "epoch": 1.700785854616896, + "grad_norm": 0.6204105615615845, + "learning_rate": 1.9944631004346796e-06, + "loss": 0.5269, + "step": 11542 + }, + { + "epoch": 1.7009332023575638, + "grad_norm": 0.5921027660369873, + "learning_rate": 1.994083418104768e-06, + "loss": 0.5584, + "step": 11543 + }, + { + "epoch": 1.7010805500982318, + "grad_norm": 0.6246852278709412, + "learning_rate": 1.993703747941913e-06, + "loss": 0.5194, + "step": 11544 + }, + { + "epoch": 1.7012278978389, + "grad_norm": 0.6066559553146362, + "learning_rate": 1.9933240899552453e-06, + "loss": 0.5349, + "step": 11545 + }, + { + "epoch": 1.7013752455795679, + "grad_norm": 0.6173602938652039, + "learning_rate": 1.992944444153895e-06, + "loss": 0.5204, + "step": 11546 + }, + { + "epoch": 1.7015225933202358, + "grad_norm": 0.59098881483078, + "learning_rate": 1.9925648105469938e-06, + "loss": 0.5494, + "step": 11547 + }, + { + "epoch": 1.7016699410609037, + "grad_norm": 0.5920909643173218, + "learning_rate": 1.992185189143671e-06, + "loss": 0.553, + "step": 11548 + }, + { + "epoch": 1.7018172888015717, + "grad_norm": 0.6019975543022156, + "learning_rate": 1.9918055799530554e-06, + "loss": 0.5249, + "step": 11549 + }, + { + "epoch": 1.7019646365422396, + "grad_norm": 0.8997403383255005, + "learning_rate": 1.991425982984278e-06, + "loss": 0.4747, + "step": 11550 + }, + { + "epoch": 1.7021119842829076, + "grad_norm": 0.5913712382316589, + "learning_rate": 1.9910463982464667e-06, + "loss": 0.5216, + "step": 11551 + }, + { + "epoch": 1.7022593320235755, + "grad_norm": 0.6120492815971375, + "learning_rate": 1.990666825748751e-06, + "loss": 0.4935, + "step": 11552 + }, + { + "epoch": 1.7024066797642436, + "grad_norm": 0.5965938568115234, + "learning_rate": 1.990287265500259e-06, + "loss": 0.5321, + "step": 11553 + }, + { + "epoch": 1.7025540275049116, + "grad_norm": 0.5775196552276611, + "learning_rate": 1.98990771751012e-06, + "loss": 0.5502, + "step": 11554 + }, + { + "epoch": 1.7027013752455795, + "grad_norm": 0.6060889959335327, + "learning_rate": 1.98952818178746e-06, + "loss": 0.5233, + "step": 11555 + }, + { + "epoch": 1.7028487229862477, + "grad_norm": 0.5989891290664673, + "learning_rate": 1.9891486583414083e-06, + "loss": 0.5389, + "step": 11556 + }, + { + "epoch": 1.7029960707269156, + "grad_norm": 0.618607223033905, + "learning_rate": 1.988769147181092e-06, + "loss": 0.5078, + "step": 11557 + }, + { + "epoch": 1.7031434184675835, + "grad_norm": 0.5982061624526978, + "learning_rate": 1.988389648315638e-06, + "loss": 0.5098, + "step": 11558 + }, + { + "epoch": 1.7032907662082515, + "grad_norm": 0.6291569471359253, + "learning_rate": 1.9880101617541725e-06, + "loss": 0.5259, + "step": 11559 + }, + { + "epoch": 1.7034381139489194, + "grad_norm": 0.5983841419219971, + "learning_rate": 1.987630687505823e-06, + "loss": 0.531, + "step": 11560 + }, + { + "epoch": 1.7035854616895874, + "grad_norm": 0.6245340704917908, + "learning_rate": 1.987251225579715e-06, + "loss": 0.4977, + "step": 11561 + }, + { + "epoch": 1.7037328094302553, + "grad_norm": 0.5948473215103149, + "learning_rate": 1.986871775984974e-06, + "loss": 0.5183, + "step": 11562 + }, + { + "epoch": 1.7038801571709232, + "grad_norm": 0.617793083190918, + "learning_rate": 1.986492338730727e-06, + "loss": 0.5018, + "step": 11563 + }, + { + "epoch": 1.7040275049115914, + "grad_norm": 0.6060125827789307, + "learning_rate": 1.986112913826098e-06, + "loss": 0.5555, + "step": 11564 + }, + { + "epoch": 1.7041748526522593, + "grad_norm": 0.5957565307617188, + "learning_rate": 1.9857335012802127e-06, + "loss": 0.5733, + "step": 11565 + }, + { + "epoch": 1.7043222003929273, + "grad_norm": 0.5809439420700073, + "learning_rate": 1.985354101102195e-06, + "loss": 0.5138, + "step": 11566 + }, + { + "epoch": 1.7044695481335954, + "grad_norm": 0.5962969660758972, + "learning_rate": 1.98497471330117e-06, + "loss": 0.5207, + "step": 11567 + }, + { + "epoch": 1.7046168958742633, + "grad_norm": 0.6186302304267883, + "learning_rate": 1.984595337886261e-06, + "loss": 0.5479, + "step": 11568 + }, + { + "epoch": 1.7047642436149313, + "grad_norm": 0.595564603805542, + "learning_rate": 1.984215974866593e-06, + "loss": 0.5569, + "step": 11569 + }, + { + "epoch": 1.7049115913555992, + "grad_norm": 0.587648868560791, + "learning_rate": 1.9838366242512887e-06, + "loss": 0.5191, + "step": 11570 + }, + { + "epoch": 1.7050589390962672, + "grad_norm": 0.5832417011260986, + "learning_rate": 1.983457286049472e-06, + "loss": 0.5248, + "step": 11571 + }, + { + "epoch": 1.705206286836935, + "grad_norm": 0.6389435529708862, + "learning_rate": 1.9830779602702647e-06, + "loss": 0.5091, + "step": 11572 + }, + { + "epoch": 1.705353634577603, + "grad_norm": 0.5718219876289368, + "learning_rate": 1.98269864692279e-06, + "loss": 0.521, + "step": 11573 + }, + { + "epoch": 1.705500982318271, + "grad_norm": 0.5771855711936951, + "learning_rate": 1.9823193460161696e-06, + "loss": 0.5349, + "step": 11574 + }, + { + "epoch": 1.7056483300589391, + "grad_norm": 0.5927507281303406, + "learning_rate": 1.9819400575595266e-06, + "loss": 0.5268, + "step": 11575 + }, + { + "epoch": 1.705795677799607, + "grad_norm": 0.5938290357589722, + "learning_rate": 1.9815607815619824e-06, + "loss": 0.5392, + "step": 11576 + }, + { + "epoch": 1.7059430255402752, + "grad_norm": 0.5809314846992493, + "learning_rate": 1.981181518032658e-06, + "loss": 0.5168, + "step": 11577 + }, + { + "epoch": 1.7060903732809432, + "grad_norm": 0.5828458666801453, + "learning_rate": 1.9808022669806754e-06, + "loss": 0.5419, + "step": 11578 + }, + { + "epoch": 1.706237721021611, + "grad_norm": 0.658062219619751, + "learning_rate": 1.980423028415155e-06, + "loss": 0.5414, + "step": 11579 + }, + { + "epoch": 1.706385068762279, + "grad_norm": 0.5635340213775635, + "learning_rate": 1.9800438023452163e-06, + "loss": 0.4953, + "step": 11580 + }, + { + "epoch": 1.706532416502947, + "grad_norm": 0.5949739813804626, + "learning_rate": 1.9796645887799807e-06, + "loss": 0.5254, + "step": 11581 + }, + { + "epoch": 1.706679764243615, + "grad_norm": 0.6307817697525024, + "learning_rate": 1.9792853877285676e-06, + "loss": 0.5285, + "step": 11582 + }, + { + "epoch": 1.7068271119842828, + "grad_norm": 0.6604481339454651, + "learning_rate": 1.9789061992000967e-06, + "loss": 0.5075, + "step": 11583 + }, + { + "epoch": 1.7069744597249508, + "grad_norm": 0.5776048302650452, + "learning_rate": 1.978527023203687e-06, + "loss": 0.505, + "step": 11584 + }, + { + "epoch": 1.7071218074656187, + "grad_norm": 0.6152699589729309, + "learning_rate": 1.978147859748458e-06, + "loss": 0.5246, + "step": 11585 + }, + { + "epoch": 1.7072691552062869, + "grad_norm": 0.5944661498069763, + "learning_rate": 1.9777687088435283e-06, + "loss": 0.5238, + "step": 11586 + }, + { + "epoch": 1.7074165029469548, + "grad_norm": 0.5986544489860535, + "learning_rate": 1.977389570498016e-06, + "loss": 0.5117, + "step": 11587 + }, + { + "epoch": 1.707563850687623, + "grad_norm": 0.6016570329666138, + "learning_rate": 1.97701044472104e-06, + "loss": 0.4964, + "step": 11588 + }, + { + "epoch": 1.7077111984282909, + "grad_norm": 0.6003456115722656, + "learning_rate": 1.976631331521717e-06, + "loss": 0.5157, + "step": 11589 + }, + { + "epoch": 1.7078585461689588, + "grad_norm": 0.6338788866996765, + "learning_rate": 1.9762522309091647e-06, + "loss": 0.5293, + "step": 11590 + }, + { + "epoch": 1.7080058939096268, + "grad_norm": 0.626389741897583, + "learning_rate": 1.975873142892501e-06, + "loss": 0.5269, + "step": 11591 + }, + { + "epoch": 1.7081532416502947, + "grad_norm": 0.6001273393630981, + "learning_rate": 1.975494067480842e-06, + "loss": 0.5499, + "step": 11592 + }, + { + "epoch": 1.7083005893909626, + "grad_norm": 0.6125349402427673, + "learning_rate": 1.9751150046833055e-06, + "loss": 0.509, + "step": 11593 + }, + { + "epoch": 1.7084479371316306, + "grad_norm": 0.6289119720458984, + "learning_rate": 1.974735954509006e-06, + "loss": 0.5378, + "step": 11594 + }, + { + "epoch": 1.7085952848722985, + "grad_norm": 0.5945587158203125, + "learning_rate": 1.9743569169670614e-06, + "loss": 0.5427, + "step": 11595 + }, + { + "epoch": 1.7087426326129664, + "grad_norm": 0.6175783276557922, + "learning_rate": 1.9739778920665856e-06, + "loss": 0.5125, + "step": 11596 + }, + { + "epoch": 1.7088899803536346, + "grad_norm": 0.6013296246528625, + "learning_rate": 1.973598879816695e-06, + "loss": 0.5521, + "step": 11597 + }, + { + "epoch": 1.7090373280943025, + "grad_norm": 0.6159368753433228, + "learning_rate": 1.9732198802265044e-06, + "loss": 0.5567, + "step": 11598 + }, + { + "epoch": 1.7091846758349707, + "grad_norm": 0.5950815677642822, + "learning_rate": 1.9728408933051288e-06, + "loss": 0.5198, + "step": 11599 + }, + { + "epoch": 1.7093320235756386, + "grad_norm": 0.5944063663482666, + "learning_rate": 1.9724619190616824e-06, + "loss": 0.5079, + "step": 11600 + }, + { + "epoch": 1.7094793713163066, + "grad_norm": 0.592106282711029, + "learning_rate": 1.9720829575052793e-06, + "loss": 0.5026, + "step": 11601 + }, + { + "epoch": 1.7096267190569745, + "grad_norm": 0.5872070789337158, + "learning_rate": 1.9717040086450336e-06, + "loss": 0.5148, + "step": 11602 + }, + { + "epoch": 1.7097740667976424, + "grad_norm": 0.5799901485443115, + "learning_rate": 1.971325072490059e-06, + "loss": 0.4955, + "step": 11603 + }, + { + "epoch": 1.7099214145383104, + "grad_norm": 0.6351662874221802, + "learning_rate": 1.9709461490494676e-06, + "loss": 0.5076, + "step": 11604 + }, + { + "epoch": 1.7100687622789783, + "grad_norm": 0.5733173489570618, + "learning_rate": 1.970567238332374e-06, + "loss": 0.5133, + "step": 11605 + }, + { + "epoch": 1.7102161100196462, + "grad_norm": 0.6329832673072815, + "learning_rate": 1.9701883403478894e-06, + "loss": 0.536, + "step": 11606 + }, + { + "epoch": 1.7103634577603142, + "grad_norm": 0.5488272309303284, + "learning_rate": 1.9698094551051272e-06, + "loss": 0.5289, + "step": 11607 + }, + { + "epoch": 1.7105108055009823, + "grad_norm": 0.5841234922409058, + "learning_rate": 1.969430582613199e-06, + "loss": 0.5366, + "step": 11608 + }, + { + "epoch": 1.7106581532416503, + "grad_norm": 0.6059290170669556, + "learning_rate": 1.9690517228812157e-06, + "loss": 0.5367, + "step": 11609 + }, + { + "epoch": 1.7108055009823184, + "grad_norm": 0.6013659834861755, + "learning_rate": 1.96867287591829e-06, + "loss": 0.5161, + "step": 11610 + }, + { + "epoch": 1.7109528487229864, + "grad_norm": 0.5760628581047058, + "learning_rate": 1.9682940417335315e-06, + "loss": 0.5553, + "step": 11611 + }, + { + "epoch": 1.7111001964636543, + "grad_norm": 0.5867592096328735, + "learning_rate": 1.967915220336053e-06, + "loss": 0.5437, + "step": 11612 + }, + { + "epoch": 1.7112475442043222, + "grad_norm": 0.620838463306427, + "learning_rate": 1.967536411734964e-06, + "loss": 0.5148, + "step": 11613 + }, + { + "epoch": 1.7113948919449902, + "grad_norm": 0.6356079578399658, + "learning_rate": 1.9671576159393736e-06, + "loss": 0.5116, + "step": 11614 + }, + { + "epoch": 1.711542239685658, + "grad_norm": 0.6191105842590332, + "learning_rate": 1.9667788329583935e-06, + "loss": 0.5214, + "step": 11615 + }, + { + "epoch": 1.711689587426326, + "grad_norm": 0.6049318909645081, + "learning_rate": 1.966400062801132e-06, + "loss": 0.5149, + "step": 11616 + }, + { + "epoch": 1.711836935166994, + "grad_norm": 0.5410767793655396, + "learning_rate": 1.9660213054766987e-06, + "loss": 0.5029, + "step": 11617 + }, + { + "epoch": 1.7119842829076621, + "grad_norm": 0.5935559272766113, + "learning_rate": 1.9656425609942025e-06, + "loss": 0.5001, + "step": 11618 + }, + { + "epoch": 1.71213163064833, + "grad_norm": 0.6069359183311462, + "learning_rate": 1.9652638293627525e-06, + "loss": 0.5478, + "step": 11619 + }, + { + "epoch": 1.712278978388998, + "grad_norm": 0.6050851941108704, + "learning_rate": 1.9648851105914564e-06, + "loss": 0.5323, + "step": 11620 + }, + { + "epoch": 1.7124263261296662, + "grad_norm": 0.6038092374801636, + "learning_rate": 1.964506404689422e-06, + "loss": 0.5724, + "step": 11621 + }, + { + "epoch": 1.712573673870334, + "grad_norm": 0.6715244650840759, + "learning_rate": 1.964127711665758e-06, + "loss": 0.5406, + "step": 11622 + }, + { + "epoch": 1.712721021611002, + "grad_norm": 0.5973464846611023, + "learning_rate": 1.9637490315295705e-06, + "loss": 0.5293, + "step": 11623 + }, + { + "epoch": 1.71286836935167, + "grad_norm": 0.6204094886779785, + "learning_rate": 1.963370364289968e-06, + "loss": 0.5474, + "step": 11624 + }, + { + "epoch": 1.713015717092338, + "grad_norm": 0.5934365391731262, + "learning_rate": 1.9629917099560562e-06, + "loss": 0.5282, + "step": 11625 + }, + { + "epoch": 1.7131630648330058, + "grad_norm": 0.6338246464729309, + "learning_rate": 1.962613068536942e-06, + "loss": 0.5195, + "step": 11626 + }, + { + "epoch": 1.7133104125736738, + "grad_norm": 0.5724172592163086, + "learning_rate": 1.9622344400417312e-06, + "loss": 0.5285, + "step": 11627 + }, + { + "epoch": 1.7134577603143417, + "grad_norm": 0.6036698818206787, + "learning_rate": 1.9618558244795304e-06, + "loss": 0.5139, + "step": 11628 + }, + { + "epoch": 1.7136051080550099, + "grad_norm": 0.6144570708274841, + "learning_rate": 1.9614772218594446e-06, + "loss": 0.5192, + "step": 11629 + }, + { + "epoch": 1.7137524557956778, + "grad_norm": 0.5741648077964783, + "learning_rate": 1.9610986321905785e-06, + "loss": 0.5126, + "step": 11630 + }, + { + "epoch": 1.7138998035363457, + "grad_norm": 0.5871856808662415, + "learning_rate": 1.960720055482038e-06, + "loss": 0.5418, + "step": 11631 + }, + { + "epoch": 1.714047151277014, + "grad_norm": 0.6320500373840332, + "learning_rate": 1.960341491742927e-06, + "loss": 0.5164, + "step": 11632 + }, + { + "epoch": 1.7141944990176818, + "grad_norm": 0.5938225984573364, + "learning_rate": 1.95996294098235e-06, + "loss": 0.5396, + "step": 11633 + }, + { + "epoch": 1.7143418467583498, + "grad_norm": 0.6197379231452942, + "learning_rate": 1.959584403209411e-06, + "loss": 0.5477, + "step": 11634 + }, + { + "epoch": 1.7144891944990177, + "grad_norm": 0.6286264061927795, + "learning_rate": 1.9592058784332135e-06, + "loss": 0.5329, + "step": 11635 + }, + { + "epoch": 1.7146365422396856, + "grad_norm": 0.6023470163345337, + "learning_rate": 1.958827366662861e-06, + "loss": 0.5301, + "step": 11636 + }, + { + "epoch": 1.7147838899803536, + "grad_norm": 0.6100669503211975, + "learning_rate": 1.9584488679074566e-06, + "loss": 0.5813, + "step": 11637 + }, + { + "epoch": 1.7149312377210215, + "grad_norm": 0.6100407838821411, + "learning_rate": 1.958070382176103e-06, + "loss": 0.5249, + "step": 11638 + }, + { + "epoch": 1.7150785854616895, + "grad_norm": 0.6131170988082886, + "learning_rate": 1.9576919094779024e-06, + "loss": 0.532, + "step": 11639 + }, + { + "epoch": 1.7152259332023576, + "grad_norm": 0.6224942207336426, + "learning_rate": 1.9573134498219572e-06, + "loss": 0.5239, + "step": 11640 + }, + { + "epoch": 1.7153732809430255, + "grad_norm": 0.5932450890541077, + "learning_rate": 1.956935003217369e-06, + "loss": 0.5359, + "step": 11641 + }, + { + "epoch": 1.7155206286836935, + "grad_norm": 0.6280590891838074, + "learning_rate": 1.9565565696732387e-06, + "loss": 0.5311, + "step": 11642 + }, + { + "epoch": 1.7156679764243616, + "grad_norm": 0.606448233127594, + "learning_rate": 1.956178149198669e-06, + "loss": 0.5339, + "step": 11643 + }, + { + "epoch": 1.7158153241650296, + "grad_norm": 0.6143476963043213, + "learning_rate": 1.9557997418027587e-06, + "loss": 0.5368, + "step": 11644 + }, + { + "epoch": 1.7159626719056975, + "grad_norm": 0.5948390364646912, + "learning_rate": 1.95542134749461e-06, + "loss": 0.5073, + "step": 11645 + }, + { + "epoch": 1.7161100196463654, + "grad_norm": 0.6061253547668457, + "learning_rate": 1.9550429662833225e-06, + "loss": 0.5493, + "step": 11646 + }, + { + "epoch": 1.7162573673870334, + "grad_norm": 0.6173214912414551, + "learning_rate": 1.9546645981779957e-06, + "loss": 0.5382, + "step": 11647 + }, + { + "epoch": 1.7164047151277013, + "grad_norm": 0.6230306029319763, + "learning_rate": 1.954286243187729e-06, + "loss": 0.5177, + "step": 11648 + }, + { + "epoch": 1.7165520628683693, + "grad_norm": 0.5566965937614441, + "learning_rate": 1.953907901321623e-06, + "loss": 0.4982, + "step": 11649 + }, + { + "epoch": 1.7166994106090372, + "grad_norm": 0.5941075086593628, + "learning_rate": 1.9535295725887757e-06, + "loss": 0.5011, + "step": 11650 + }, + { + "epoch": 1.7168467583497053, + "grad_norm": 0.5872687697410583, + "learning_rate": 1.953151256998286e-06, + "loss": 0.4825, + "step": 11651 + }, + { + "epoch": 1.7169941060903733, + "grad_norm": 0.6773032546043396, + "learning_rate": 1.952772954559252e-06, + "loss": 0.507, + "step": 11652 + }, + { + "epoch": 1.7171414538310412, + "grad_norm": 0.5938284993171692, + "learning_rate": 1.9523946652807717e-06, + "loss": 0.5142, + "step": 11653 + }, + { + "epoch": 1.7172888015717094, + "grad_norm": 0.6214128136634827, + "learning_rate": 1.9520163891719428e-06, + "loss": 0.5408, + "step": 11654 + }, + { + "epoch": 1.7174361493123773, + "grad_norm": 0.6369233727455139, + "learning_rate": 1.9516381262418627e-06, + "loss": 0.5201, + "step": 11655 + }, + { + "epoch": 1.7175834970530452, + "grad_norm": 0.5729424953460693, + "learning_rate": 1.9512598764996287e-06, + "loss": 0.526, + "step": 11656 + }, + { + "epoch": 1.7177308447937132, + "grad_norm": 0.5949599742889404, + "learning_rate": 1.9508816399543374e-06, + "loss": 0.4947, + "step": 11657 + }, + { + "epoch": 1.7178781925343811, + "grad_norm": 0.6134472489356995, + "learning_rate": 1.9505034166150847e-06, + "loss": 0.496, + "step": 11658 + }, + { + "epoch": 1.718025540275049, + "grad_norm": 0.6225259900093079, + "learning_rate": 1.9501252064909678e-06, + "loss": 0.4697, + "step": 11659 + }, + { + "epoch": 1.718172888015717, + "grad_norm": 0.5844719409942627, + "learning_rate": 1.949747009591081e-06, + "loss": 0.552, + "step": 11660 + }, + { + "epoch": 1.718320235756385, + "grad_norm": 0.5864596962928772, + "learning_rate": 1.949368825924521e-06, + "loss": 0.5088, + "step": 11661 + }, + { + "epoch": 1.718467583497053, + "grad_norm": 0.6284313797950745, + "learning_rate": 1.948990655500382e-06, + "loss": 0.5297, + "step": 11662 + }, + { + "epoch": 1.718614931237721, + "grad_norm": 0.6261193752288818, + "learning_rate": 1.94861249832776e-06, + "loss": 0.4718, + "step": 11663 + }, + { + "epoch": 1.718762278978389, + "grad_norm": 0.6027799844741821, + "learning_rate": 1.9482343544157485e-06, + "loss": 0.5382, + "step": 11664 + }, + { + "epoch": 1.7189096267190571, + "grad_norm": 0.6258955597877502, + "learning_rate": 1.9478562237734416e-06, + "loss": 0.5364, + "step": 11665 + }, + { + "epoch": 1.719056974459725, + "grad_norm": 0.596182644367218, + "learning_rate": 1.947478106409934e-06, + "loss": 0.4986, + "step": 11666 + }, + { + "epoch": 1.719204322200393, + "grad_norm": 0.5952522158622742, + "learning_rate": 1.9471000023343186e-06, + "loss": 0.5526, + "step": 11667 + }, + { + "epoch": 1.719351669941061, + "grad_norm": 0.6053610444068909, + "learning_rate": 1.9467219115556887e-06, + "loss": 0.5299, + "step": 11668 + }, + { + "epoch": 1.7194990176817289, + "grad_norm": 0.591629683971405, + "learning_rate": 1.9463438340831374e-06, + "loss": 0.4666, + "step": 11669 + }, + { + "epoch": 1.7196463654223968, + "grad_norm": 0.6098299026489258, + "learning_rate": 1.9459657699257574e-06, + "loss": 0.4932, + "step": 11670 + }, + { + "epoch": 1.7197937131630647, + "grad_norm": 0.6016725301742554, + "learning_rate": 1.9455877190926405e-06, + "loss": 0.4913, + "step": 11671 + }, + { + "epoch": 1.7199410609037327, + "grad_norm": 0.5926467776298523, + "learning_rate": 1.9452096815928793e-06, + "loss": 0.547, + "step": 11672 + }, + { + "epoch": 1.7200884086444008, + "grad_norm": 0.5838582515716553, + "learning_rate": 1.9448316574355646e-06, + "loss": 0.5275, + "step": 11673 + }, + { + "epoch": 1.7202357563850688, + "grad_norm": 0.6311476826667786, + "learning_rate": 1.9444536466297884e-06, + "loss": 0.5538, + "step": 11674 + }, + { + "epoch": 1.7203831041257367, + "grad_norm": 0.6130570769309998, + "learning_rate": 1.944075649184641e-06, + "loss": 0.5249, + "step": 11675 + }, + { + "epoch": 1.7205304518664049, + "grad_norm": 0.604576051235199, + "learning_rate": 1.9436976651092143e-06, + "loss": 0.5187, + "step": 11676 + }, + { + "epoch": 1.7206777996070728, + "grad_norm": 0.600835919380188, + "learning_rate": 1.9433196944125975e-06, + "loss": 0.5303, + "step": 11677 + }, + { + "epoch": 1.7208251473477407, + "grad_norm": 0.6123670339584351, + "learning_rate": 1.9429417371038816e-06, + "loss": 0.5042, + "step": 11678 + }, + { + "epoch": 1.7209724950884087, + "grad_norm": 0.5956187844276428, + "learning_rate": 1.942563793192155e-06, + "loss": 0.5287, + "step": 11679 + }, + { + "epoch": 1.7211198428290766, + "grad_norm": 0.5818579792976379, + "learning_rate": 1.9421858626865075e-06, + "loss": 0.5314, + "step": 11680 + }, + { + "epoch": 1.7212671905697445, + "grad_norm": 0.611240029335022, + "learning_rate": 1.9418079455960284e-06, + "loss": 0.525, + "step": 11681 + }, + { + "epoch": 1.7214145383104125, + "grad_norm": 0.5888427495956421, + "learning_rate": 1.9414300419298065e-06, + "loss": 0.5164, + "step": 11682 + }, + { + "epoch": 1.7215618860510804, + "grad_norm": 0.6269477605819702, + "learning_rate": 1.94105215169693e-06, + "loss": 0.5441, + "step": 11683 + }, + { + "epoch": 1.7217092337917486, + "grad_norm": 0.6030125021934509, + "learning_rate": 1.940674274906486e-06, + "loss": 0.5669, + "step": 11684 + }, + { + "epoch": 1.7218565815324165, + "grad_norm": 0.5933009386062622, + "learning_rate": 1.9402964115675647e-06, + "loss": 0.5351, + "step": 11685 + }, + { + "epoch": 1.7220039292730844, + "grad_norm": 0.6110202670097351, + "learning_rate": 1.939918561689252e-06, + "loss": 0.5344, + "step": 11686 + }, + { + "epoch": 1.7221512770137526, + "grad_norm": 0.5873198509216309, + "learning_rate": 1.939540725280635e-06, + "loss": 0.4943, + "step": 11687 + }, + { + "epoch": 1.7222986247544205, + "grad_norm": 0.6498313546180725, + "learning_rate": 1.939162902350801e-06, + "loss": 0.5453, + "step": 11688 + }, + { + "epoch": 1.7224459724950885, + "grad_norm": 0.6104013323783875, + "learning_rate": 1.9387850929088364e-06, + "loss": 0.5266, + "step": 11689 + }, + { + "epoch": 1.7225933202357564, + "grad_norm": 0.618722140789032, + "learning_rate": 1.9384072969638267e-06, + "loss": 0.492, + "step": 11690 + }, + { + "epoch": 1.7227406679764243, + "grad_norm": 0.5967821478843689, + "learning_rate": 1.938029514524858e-06, + "loss": 0.5495, + "step": 11691 + }, + { + "epoch": 1.7228880157170923, + "grad_norm": 0.6056063771247864, + "learning_rate": 1.9376517456010163e-06, + "loss": 0.5474, + "step": 11692 + }, + { + "epoch": 1.7230353634577602, + "grad_norm": 0.6022253036499023, + "learning_rate": 1.937273990201386e-06, + "loss": 0.53, + "step": 11693 + }, + { + "epoch": 1.7231827111984281, + "grad_norm": 0.6104105114936829, + "learning_rate": 1.9368962483350528e-06, + "loss": 0.5366, + "step": 11694 + }, + { + "epoch": 1.7233300589390963, + "grad_norm": 0.6130340099334717, + "learning_rate": 1.9365185200111002e-06, + "loss": 0.5562, + "step": 11695 + }, + { + "epoch": 1.7234774066797642, + "grad_norm": 0.6113914847373962, + "learning_rate": 1.936140805238613e-06, + "loss": 0.5337, + "step": 11696 + }, + { + "epoch": 1.7236247544204322, + "grad_norm": 0.569728672504425, + "learning_rate": 1.9357631040266753e-06, + "loss": 0.5344, + "step": 11697 + }, + { + "epoch": 1.7237721021611003, + "grad_norm": 0.6258840560913086, + "learning_rate": 1.93538541638437e-06, + "loss": 0.525, + "step": 11698 + }, + { + "epoch": 1.7239194499017683, + "grad_norm": 0.589782178401947, + "learning_rate": 1.9350077423207807e-06, + "loss": 0.5664, + "step": 11699 + }, + { + "epoch": 1.7240667976424362, + "grad_norm": 0.6132495999336243, + "learning_rate": 1.93463008184499e-06, + "loss": 0.5601, + "step": 11700 + }, + { + "epoch": 1.7242141453831041, + "grad_norm": 0.5770863890647888, + "learning_rate": 1.934252434966081e-06, + "loss": 0.503, + "step": 11701 + }, + { + "epoch": 1.724361493123772, + "grad_norm": 0.6011015176773071, + "learning_rate": 1.9338748016931353e-06, + "loss": 0.541, + "step": 11702 + }, + { + "epoch": 1.72450884086444, + "grad_norm": 0.6137421727180481, + "learning_rate": 1.9334971820352352e-06, + "loss": 0.5208, + "step": 11703 + }, + { + "epoch": 1.724656188605108, + "grad_norm": 0.6061083674430847, + "learning_rate": 1.9331195760014624e-06, + "loss": 0.5306, + "step": 11704 + }, + { + "epoch": 1.7248035363457759, + "grad_norm": 0.5877822637557983, + "learning_rate": 1.9327419836008975e-06, + "loss": 0.544, + "step": 11705 + }, + { + "epoch": 1.724950884086444, + "grad_norm": 0.5951606035232544, + "learning_rate": 1.932364404842622e-06, + "loss": 0.5123, + "step": 11706 + }, + { + "epoch": 1.725098231827112, + "grad_norm": 0.5844014286994934, + "learning_rate": 1.9319868397357164e-06, + "loss": 0.5407, + "step": 11707 + }, + { + "epoch": 1.72524557956778, + "grad_norm": 0.571019172668457, + "learning_rate": 1.9316092882892606e-06, + "loss": 0.529, + "step": 11708 + }, + { + "epoch": 1.725392927308448, + "grad_norm": 0.594667375087738, + "learning_rate": 1.931231750512335e-06, + "loss": 0.4996, + "step": 11709 + }, + { + "epoch": 1.725540275049116, + "grad_norm": 0.5660551190376282, + "learning_rate": 1.9308542264140193e-06, + "loss": 0.5172, + "step": 11710 + }, + { + "epoch": 1.725687622789784, + "grad_norm": 0.6056662201881409, + "learning_rate": 1.930476716003392e-06, + "loss": 0.518, + "step": 11711 + }, + { + "epoch": 1.7258349705304519, + "grad_norm": 0.5965051054954529, + "learning_rate": 1.930099219289533e-06, + "loss": 0.5039, + "step": 11712 + }, + { + "epoch": 1.7259823182711198, + "grad_norm": 0.6041452884674072, + "learning_rate": 1.9297217362815203e-06, + "loss": 0.5092, + "step": 11713 + }, + { + "epoch": 1.7261296660117877, + "grad_norm": 0.5983904600143433, + "learning_rate": 1.9293442669884326e-06, + "loss": 0.5325, + "step": 11714 + }, + { + "epoch": 1.7262770137524557, + "grad_norm": 0.6314982175827026, + "learning_rate": 1.928966811419347e-06, + "loss": 0.551, + "step": 11715 + }, + { + "epoch": 1.7264243614931236, + "grad_norm": 0.6092755198478699, + "learning_rate": 1.9285893695833426e-06, + "loss": 0.5172, + "step": 11716 + }, + { + "epoch": 1.7265717092337918, + "grad_norm": 0.6238587498664856, + "learning_rate": 1.9282119414894953e-06, + "loss": 0.502, + "step": 11717 + }, + { + "epoch": 1.7267190569744597, + "grad_norm": 0.5809354782104492, + "learning_rate": 1.9278345271468827e-06, + "loss": 0.5102, + "step": 11718 + }, + { + "epoch": 1.7268664047151279, + "grad_norm": 0.6173313856124878, + "learning_rate": 1.9274571265645812e-06, + "loss": 0.5552, + "step": 11719 + }, + { + "epoch": 1.7270137524557958, + "grad_norm": 0.6019474864006042, + "learning_rate": 1.9270797397516677e-06, + "loss": 0.4927, + "step": 11720 + }, + { + "epoch": 1.7271611001964637, + "grad_norm": 0.5900370478630066, + "learning_rate": 1.9267023667172167e-06, + "loss": 0.4993, + "step": 11721 + }, + { + "epoch": 1.7273084479371317, + "grad_norm": 0.6250142455101013, + "learning_rate": 1.9263250074703056e-06, + "loss": 0.5524, + "step": 11722 + }, + { + "epoch": 1.7274557956777996, + "grad_norm": 0.5988089442253113, + "learning_rate": 1.9259476620200094e-06, + "loss": 0.5437, + "step": 11723 + }, + { + "epoch": 1.7276031434184675, + "grad_norm": 0.569915771484375, + "learning_rate": 1.9255703303754024e-06, + "loss": 0.5192, + "step": 11724 + }, + { + "epoch": 1.7277504911591355, + "grad_norm": 0.598561704158783, + "learning_rate": 1.9251930125455594e-06, + "loss": 0.5643, + "step": 11725 + }, + { + "epoch": 1.7278978388998034, + "grad_norm": 0.6121224761009216, + "learning_rate": 1.9248157085395545e-06, + "loss": 0.5103, + "step": 11726 + }, + { + "epoch": 1.7280451866404714, + "grad_norm": 0.5699798464775085, + "learning_rate": 1.924438418366463e-06, + "loss": 0.5594, + "step": 11727 + }, + { + "epoch": 1.7281925343811395, + "grad_norm": 0.6617475152015686, + "learning_rate": 1.924061142035357e-06, + "loss": 0.5367, + "step": 11728 + }, + { + "epoch": 1.7283398821218074, + "grad_norm": 0.5849500298500061, + "learning_rate": 1.9236838795553105e-06, + "loss": 0.5169, + "step": 11729 + }, + { + "epoch": 1.7284872298624756, + "grad_norm": 0.599980354309082, + "learning_rate": 1.9233066309353963e-06, + "loss": 0.5363, + "step": 11730 + }, + { + "epoch": 1.7286345776031435, + "grad_norm": 0.6202858686447144, + "learning_rate": 1.9229293961846872e-06, + "loss": 0.4841, + "step": 11731 + }, + { + "epoch": 1.7287819253438115, + "grad_norm": 0.5796306729316711, + "learning_rate": 1.922552175312255e-06, + "loss": 0.5181, + "step": 11732 + }, + { + "epoch": 1.7289292730844794, + "grad_norm": 0.628072202205658, + "learning_rate": 1.922174968327173e-06, + "loss": 0.5077, + "step": 11733 + }, + { + "epoch": 1.7290766208251473, + "grad_norm": 0.5835309028625488, + "learning_rate": 1.9217977752385114e-06, + "loss": 0.5094, + "step": 11734 + }, + { + "epoch": 1.7292239685658153, + "grad_norm": 0.5653502941131592, + "learning_rate": 1.9214205960553427e-06, + "loss": 0.4934, + "step": 11735 + }, + { + "epoch": 1.7293713163064832, + "grad_norm": 0.5902736186981201, + "learning_rate": 1.9210434307867366e-06, + "loss": 0.5118, + "step": 11736 + }, + { + "epoch": 1.7295186640471512, + "grad_norm": 0.6298245787620544, + "learning_rate": 1.920666279441765e-06, + "loss": 0.5124, + "step": 11737 + }, + { + "epoch": 1.729666011787819, + "grad_norm": 0.5806632041931152, + "learning_rate": 1.9202891420294973e-06, + "loss": 0.5205, + "step": 11738 + }, + { + "epoch": 1.7298133595284872, + "grad_norm": 0.6140338182449341, + "learning_rate": 1.919912018559004e-06, + "loss": 0.5242, + "step": 11739 + }, + { + "epoch": 1.7299607072691552, + "grad_norm": 0.6135437488555908, + "learning_rate": 1.9195349090393543e-06, + "loss": 0.5178, + "step": 11740 + }, + { + "epoch": 1.7301080550098233, + "grad_norm": 0.5909650921821594, + "learning_rate": 1.919157813479618e-06, + "loss": 0.5338, + "step": 11741 + }, + { + "epoch": 1.7302554027504913, + "grad_norm": 0.6031169891357422, + "learning_rate": 1.918780731888864e-06, + "loss": 0.525, + "step": 11742 + }, + { + "epoch": 1.7304027504911592, + "grad_norm": 0.5887567400932312, + "learning_rate": 1.9184036642761606e-06, + "loss": 0.518, + "step": 11743 + }, + { + "epoch": 1.7305500982318271, + "grad_norm": 0.6086535453796387, + "learning_rate": 1.9180266106505766e-06, + "loss": 0.5409, + "step": 11744 + }, + { + "epoch": 1.730697445972495, + "grad_norm": 0.6151487827301025, + "learning_rate": 1.9176495710211796e-06, + "loss": 0.4839, + "step": 11745 + }, + { + "epoch": 1.730844793713163, + "grad_norm": 0.6313906311988831, + "learning_rate": 1.917272545397037e-06, + "loss": 0.5135, + "step": 11746 + }, + { + "epoch": 1.730992141453831, + "grad_norm": 0.5947974920272827, + "learning_rate": 1.9168955337872163e-06, + "loss": 0.5215, + "step": 11747 + }, + { + "epoch": 1.731139489194499, + "grad_norm": 0.6335309743881226, + "learning_rate": 1.916518536200785e-06, + "loss": 0.5355, + "step": 11748 + }, + { + "epoch": 1.7312868369351668, + "grad_norm": 0.6255335211753845, + "learning_rate": 1.9161415526468085e-06, + "loss": 0.5334, + "step": 11749 + }, + { + "epoch": 1.731434184675835, + "grad_norm": 0.624281108379364, + "learning_rate": 1.9157645831343546e-06, + "loss": 0.4878, + "step": 11750 + }, + { + "epoch": 1.731581532416503, + "grad_norm": 0.6275539994239807, + "learning_rate": 1.915387627672488e-06, + "loss": 0.4983, + "step": 11751 + }, + { + "epoch": 1.731728880157171, + "grad_norm": 0.6146279573440552, + "learning_rate": 1.9150106862702745e-06, + "loss": 0.5299, + "step": 11752 + }, + { + "epoch": 1.731876227897839, + "grad_norm": 0.6138433814048767, + "learning_rate": 1.91463375893678e-06, + "loss": 0.5503, + "step": 11753 + }, + { + "epoch": 1.732023575638507, + "grad_norm": 0.667983889579773, + "learning_rate": 1.9142568456810688e-06, + "loss": 0.5458, + "step": 11754 + }, + { + "epoch": 1.7321709233791749, + "grad_norm": 0.6039285063743591, + "learning_rate": 1.913879946512206e-06, + "loss": 0.5298, + "step": 11755 + }, + { + "epoch": 1.7323182711198428, + "grad_norm": 0.5775296688079834, + "learning_rate": 1.9135030614392556e-06, + "loss": 0.5509, + "step": 11756 + }, + { + "epoch": 1.7324656188605108, + "grad_norm": 0.6062142252922058, + "learning_rate": 1.913126190471281e-06, + "loss": 0.5148, + "step": 11757 + }, + { + "epoch": 1.7326129666011787, + "grad_norm": 0.6127450466156006, + "learning_rate": 1.912749333617346e-06, + "loss": 0.5113, + "step": 11758 + }, + { + "epoch": 1.7327603143418466, + "grad_norm": 0.623894989490509, + "learning_rate": 1.9123724908865145e-06, + "loss": 0.5466, + "step": 11759 + }, + { + "epoch": 1.7329076620825148, + "grad_norm": 0.5897853374481201, + "learning_rate": 1.911995662287849e-06, + "loss": 0.4938, + "step": 11760 + }, + { + "epoch": 1.7330550098231827, + "grad_norm": 0.6118406653404236, + "learning_rate": 1.9116188478304124e-06, + "loss": 0.5234, + "step": 11761 + }, + { + "epoch": 1.7332023575638507, + "grad_norm": 0.5898472666740417, + "learning_rate": 1.9112420475232666e-06, + "loss": 0.5077, + "step": 11762 + }, + { + "epoch": 1.7333497053045188, + "grad_norm": 0.5981550812721252, + "learning_rate": 1.9108652613754735e-06, + "loss": 0.5165, + "step": 11763 + }, + { + "epoch": 1.7334970530451868, + "grad_norm": 0.5895688533782959, + "learning_rate": 1.9104884893960944e-06, + "loss": 0.5104, + "step": 11764 + }, + { + "epoch": 1.7336444007858547, + "grad_norm": 0.6630901098251343, + "learning_rate": 1.9101117315941903e-06, + "loss": 0.5228, + "step": 11765 + }, + { + "epoch": 1.7337917485265226, + "grad_norm": 0.642580509185791, + "learning_rate": 1.9097349879788225e-06, + "loss": 0.5293, + "step": 11766 + }, + { + "epoch": 1.7339390962671906, + "grad_norm": 0.6122806668281555, + "learning_rate": 1.9093582585590514e-06, + "loss": 0.5077, + "step": 11767 + }, + { + "epoch": 1.7340864440078585, + "grad_norm": 0.5895129442214966, + "learning_rate": 1.9089815433439372e-06, + "loss": 0.5223, + "step": 11768 + }, + { + "epoch": 1.7342337917485264, + "grad_norm": 0.6133357286453247, + "learning_rate": 1.9086048423425395e-06, + "loss": 0.5046, + "step": 11769 + }, + { + "epoch": 1.7343811394891944, + "grad_norm": 0.619113564491272, + "learning_rate": 1.908228155563918e-06, + "loss": 0.5, + "step": 11770 + }, + { + "epoch": 1.7345284872298625, + "grad_norm": 0.6039297580718994, + "learning_rate": 1.907851483017132e-06, + "loss": 0.5237, + "step": 11771 + }, + { + "epoch": 1.7346758349705305, + "grad_norm": 0.6581718921661377, + "learning_rate": 1.90747482471124e-06, + "loss": 0.5149, + "step": 11772 + }, + { + "epoch": 1.7348231827111984, + "grad_norm": 0.6062868237495422, + "learning_rate": 1.9070981806553008e-06, + "loss": 0.554, + "step": 11773 + }, + { + "epoch": 1.7349705304518666, + "grad_norm": 0.6006470322608948, + "learning_rate": 1.9067215508583719e-06, + "loss": 0.513, + "step": 11774 + }, + { + "epoch": 1.7351178781925345, + "grad_norm": 0.5803431272506714, + "learning_rate": 1.9063449353295116e-06, + "loss": 0.5161, + "step": 11775 + }, + { + "epoch": 1.7352652259332024, + "grad_norm": 0.5780698657035828, + "learning_rate": 1.905968334077777e-06, + "loss": 0.5061, + "step": 11776 + }, + { + "epoch": 1.7354125736738704, + "grad_norm": 0.5906593799591064, + "learning_rate": 1.9055917471122258e-06, + "loss": 0.5365, + "step": 11777 + }, + { + "epoch": 1.7355599214145383, + "grad_norm": 0.6087611317634583, + "learning_rate": 1.905215174441914e-06, + "loss": 0.4855, + "step": 11778 + }, + { + "epoch": 1.7357072691552062, + "grad_norm": 0.575620174407959, + "learning_rate": 1.904838616075898e-06, + "loss": 0.5093, + "step": 11779 + }, + { + "epoch": 1.7358546168958742, + "grad_norm": 0.6293839812278748, + "learning_rate": 1.9044620720232343e-06, + "loss": 0.5571, + "step": 11780 + }, + { + "epoch": 1.736001964636542, + "grad_norm": 0.5800663828849792, + "learning_rate": 1.9040855422929787e-06, + "loss": 0.5054, + "step": 11781 + }, + { + "epoch": 1.7361493123772103, + "grad_norm": 0.6113099455833435, + "learning_rate": 1.903709026894186e-06, + "loss": 0.5468, + "step": 11782 + }, + { + "epoch": 1.7362966601178782, + "grad_norm": 0.6015065312385559, + "learning_rate": 1.9033325258359118e-06, + "loss": 0.5134, + "step": 11783 + }, + { + "epoch": 1.7364440078585461, + "grad_norm": 0.6211109161376953, + "learning_rate": 1.9029560391272103e-06, + "loss": 0.5472, + "step": 11784 + }, + { + "epoch": 1.7365913555992143, + "grad_norm": 0.6811472177505493, + "learning_rate": 1.9025795667771362e-06, + "loss": 0.551, + "step": 11785 + }, + { + "epoch": 1.7367387033398822, + "grad_norm": 0.6123239994049072, + "learning_rate": 1.902203108794743e-06, + "loss": 0.5502, + "step": 11786 + }, + { + "epoch": 1.7368860510805502, + "grad_norm": 0.5850644707679749, + "learning_rate": 1.9018266651890849e-06, + "loss": 0.5316, + "step": 11787 + }, + { + "epoch": 1.737033398821218, + "grad_norm": 0.6076188087463379, + "learning_rate": 1.901450235969215e-06, + "loss": 0.5273, + "step": 11788 + }, + { + "epoch": 1.737180746561886, + "grad_norm": 0.6138581037521362, + "learning_rate": 1.9010738211441863e-06, + "loss": 0.5085, + "step": 11789 + }, + { + "epoch": 1.737328094302554, + "grad_norm": 0.6065112948417664, + "learning_rate": 1.9006974207230509e-06, + "loss": 0.5269, + "step": 11790 + }, + { + "epoch": 1.737475442043222, + "grad_norm": 0.6500422954559326, + "learning_rate": 1.9003210347148616e-06, + "loss": 0.558, + "step": 11791 + }, + { + "epoch": 1.7376227897838898, + "grad_norm": 0.6288553476333618, + "learning_rate": 1.8999446631286702e-06, + "loss": 0.5572, + "step": 11792 + }, + { + "epoch": 1.737770137524558, + "grad_norm": 0.5989505648612976, + "learning_rate": 1.8995683059735287e-06, + "loss": 0.5131, + "step": 11793 + }, + { + "epoch": 1.737917485265226, + "grad_norm": 0.5867815017700195, + "learning_rate": 1.8991919632584873e-06, + "loss": 0.5494, + "step": 11794 + }, + { + "epoch": 1.7380648330058939, + "grad_norm": 0.6076154112815857, + "learning_rate": 1.8988156349925963e-06, + "loss": 0.51, + "step": 11795 + }, + { + "epoch": 1.738212180746562, + "grad_norm": 0.5926553010940552, + "learning_rate": 1.8984393211849083e-06, + "loss": 0.5091, + "step": 11796 + }, + { + "epoch": 1.73835952848723, + "grad_norm": 0.6101515889167786, + "learning_rate": 1.8980630218444727e-06, + "loss": 0.5525, + "step": 11797 + }, + { + "epoch": 1.738506876227898, + "grad_norm": 0.6101085543632507, + "learning_rate": 1.897686736980339e-06, + "loss": 0.5261, + "step": 11798 + }, + { + "epoch": 1.7386542239685658, + "grad_norm": 0.6439685225486755, + "learning_rate": 1.8973104666015567e-06, + "loss": 0.5496, + "step": 11799 + }, + { + "epoch": 1.7388015717092338, + "grad_norm": 0.5869073867797852, + "learning_rate": 1.8969342107171752e-06, + "loss": 0.5302, + "step": 11800 + }, + { + "epoch": 1.7389489194499017, + "grad_norm": 0.6060370206832886, + "learning_rate": 1.8965579693362427e-06, + "loss": 0.5263, + "step": 11801 + }, + { + "epoch": 1.7390962671905696, + "grad_norm": 0.6421024203300476, + "learning_rate": 1.8961817424678082e-06, + "loss": 0.5329, + "step": 11802 + }, + { + "epoch": 1.7392436149312376, + "grad_norm": 0.6361680030822754, + "learning_rate": 1.8958055301209197e-06, + "loss": 0.4748, + "step": 11803 + }, + { + "epoch": 1.7393909626719057, + "grad_norm": 0.6478605270385742, + "learning_rate": 1.8954293323046246e-06, + "loss": 0.5311, + "step": 11804 + }, + { + "epoch": 1.7395383104125737, + "grad_norm": 0.6180056929588318, + "learning_rate": 1.8950531490279706e-06, + "loss": 0.5348, + "step": 11805 + }, + { + "epoch": 1.7396856581532416, + "grad_norm": 0.5737855434417725, + "learning_rate": 1.8946769803000049e-06, + "loss": 0.5365, + "step": 11806 + }, + { + "epoch": 1.7398330058939098, + "grad_norm": 0.6127997636795044, + "learning_rate": 1.8943008261297735e-06, + "loss": 0.5159, + "step": 11807 + }, + { + "epoch": 1.7399803536345777, + "grad_norm": 0.6682573556900024, + "learning_rate": 1.8939246865263234e-06, + "loss": 0.5182, + "step": 11808 + }, + { + "epoch": 1.7401277013752456, + "grad_norm": 0.5912705063819885, + "learning_rate": 1.8935485614987e-06, + "loss": 0.51, + "step": 11809 + }, + { + "epoch": 1.7402750491159136, + "grad_norm": 0.603872537612915, + "learning_rate": 1.8931724510559495e-06, + "loss": 0.5134, + "step": 11810 + }, + { + "epoch": 1.7404223968565815, + "grad_norm": 0.6501057147979736, + "learning_rate": 1.8927963552071177e-06, + "loss": 0.5431, + "step": 11811 + }, + { + "epoch": 1.7405697445972494, + "grad_norm": 0.5972366333007812, + "learning_rate": 1.892420273961248e-06, + "loss": 0.5043, + "step": 11812 + }, + { + "epoch": 1.7407170923379174, + "grad_norm": 0.6086369156837463, + "learning_rate": 1.8920442073273857e-06, + "loss": 0.4979, + "step": 11813 + }, + { + "epoch": 1.7408644400785853, + "grad_norm": 0.5922181606292725, + "learning_rate": 1.891668155314575e-06, + "loss": 0.5202, + "step": 11814 + }, + { + "epoch": 1.7410117878192535, + "grad_norm": 0.6442272663116455, + "learning_rate": 1.89129211793186e-06, + "loss": 0.489, + "step": 11815 + }, + { + "epoch": 1.7411591355599214, + "grad_norm": 0.5896434783935547, + "learning_rate": 1.8909160951882838e-06, + "loss": 0.5351, + "step": 11816 + }, + { + "epoch": 1.7413064833005893, + "grad_norm": 0.6043244004249573, + "learning_rate": 1.89054008709289e-06, + "loss": 0.4938, + "step": 11817 + }, + { + "epoch": 1.7414538310412575, + "grad_norm": 0.6158156394958496, + "learning_rate": 1.8901640936547213e-06, + "loss": 0.5426, + "step": 11818 + }, + { + "epoch": 1.7416011787819254, + "grad_norm": 0.5886554718017578, + "learning_rate": 1.88978811488282e-06, + "loss": 0.5024, + "step": 11819 + }, + { + "epoch": 1.7417485265225934, + "grad_norm": 0.6128722429275513, + "learning_rate": 1.8894121507862284e-06, + "loss": 0.5307, + "step": 11820 + }, + { + "epoch": 1.7418958742632613, + "grad_norm": 0.635574460029602, + "learning_rate": 1.8890362013739883e-06, + "loss": 0.5236, + "step": 11821 + }, + { + "epoch": 1.7420432220039292, + "grad_norm": 0.6347232460975647, + "learning_rate": 1.888660266655141e-06, + "loss": 0.4959, + "step": 11822 + }, + { + "epoch": 1.7421905697445972, + "grad_norm": 0.6110600233078003, + "learning_rate": 1.8882843466387274e-06, + "loss": 0.5369, + "step": 11823 + }, + { + "epoch": 1.7423379174852651, + "grad_norm": 0.612734317779541, + "learning_rate": 1.8879084413337883e-06, + "loss": 0.5499, + "step": 11824 + }, + { + "epoch": 1.742485265225933, + "grad_norm": 0.5751619338989258, + "learning_rate": 1.8875325507493642e-06, + "loss": 0.5164, + "step": 11825 + }, + { + "epoch": 1.7426326129666012, + "grad_norm": 0.638593316078186, + "learning_rate": 1.887156674894495e-06, + "loss": 0.5075, + "step": 11826 + }, + { + "epoch": 1.7427799607072691, + "grad_norm": 0.5969300866127014, + "learning_rate": 1.8867808137782205e-06, + "loss": 0.5664, + "step": 11827 + }, + { + "epoch": 1.742927308447937, + "grad_norm": 0.5946177244186401, + "learning_rate": 1.8864049674095796e-06, + "loss": 0.5494, + "step": 11828 + }, + { + "epoch": 1.7430746561886052, + "grad_norm": 0.590137779712677, + "learning_rate": 1.8860291357976113e-06, + "loss": 0.5327, + "step": 11829 + }, + { + "epoch": 1.7432220039292732, + "grad_norm": 0.6161376237869263, + "learning_rate": 1.8856533189513547e-06, + "loss": 0.5578, + "step": 11830 + }, + { + "epoch": 1.7433693516699411, + "grad_norm": 0.5768461227416992, + "learning_rate": 1.8852775168798474e-06, + "loss": 0.5116, + "step": 11831 + }, + { + "epoch": 1.743516699410609, + "grad_norm": 0.6127576231956482, + "learning_rate": 1.8849017295921268e-06, + "loss": 0.5231, + "step": 11832 + }, + { + "epoch": 1.743664047151277, + "grad_norm": 0.6108216643333435, + "learning_rate": 1.884525957097232e-06, + "loss": 0.5053, + "step": 11833 + }, + { + "epoch": 1.743811394891945, + "grad_norm": 0.6341115236282349, + "learning_rate": 1.8841501994041988e-06, + "loss": 0.479, + "step": 11834 + }, + { + "epoch": 1.7439587426326129, + "grad_norm": 0.6229897141456604, + "learning_rate": 1.8837744565220651e-06, + "loss": 0.5077, + "step": 11835 + }, + { + "epoch": 1.7441060903732808, + "grad_norm": 0.626654326915741, + "learning_rate": 1.8833987284598661e-06, + "loss": 0.4962, + "step": 11836 + }, + { + "epoch": 1.744253438113949, + "grad_norm": 0.6148939728736877, + "learning_rate": 1.8830230152266389e-06, + "loss": 0.5111, + "step": 11837 + }, + { + "epoch": 1.7444007858546169, + "grad_norm": 0.6041998267173767, + "learning_rate": 1.8826473168314186e-06, + "loss": 0.5272, + "step": 11838 + }, + { + "epoch": 1.7445481335952848, + "grad_norm": 0.6009404063224792, + "learning_rate": 1.882271633283241e-06, + "loss": 0.4731, + "step": 11839 + }, + { + "epoch": 1.744695481335953, + "grad_norm": 0.6357950568199158, + "learning_rate": 1.8818959645911402e-06, + "loss": 0.5265, + "step": 11840 + }, + { + "epoch": 1.744842829076621, + "grad_norm": 0.6528650522232056, + "learning_rate": 1.8815203107641521e-06, + "loss": 0.4967, + "step": 11841 + }, + { + "epoch": 1.7449901768172889, + "grad_norm": 0.553837776184082, + "learning_rate": 1.8811446718113102e-06, + "loss": 0.5346, + "step": 11842 + }, + { + "epoch": 1.7451375245579568, + "grad_norm": 0.6076595783233643, + "learning_rate": 1.880769047741649e-06, + "loss": 0.5353, + "step": 11843 + }, + { + "epoch": 1.7452848722986247, + "grad_norm": 0.5695890188217163, + "learning_rate": 1.8803934385642016e-06, + "loss": 0.5307, + "step": 11844 + }, + { + "epoch": 1.7454322200392927, + "grad_norm": 0.5887214541435242, + "learning_rate": 1.8800178442880013e-06, + "loss": 0.5279, + "step": 11845 + }, + { + "epoch": 1.7455795677799606, + "grad_norm": 0.6013043522834778, + "learning_rate": 1.8796422649220813e-06, + "loss": 0.5281, + "step": 11846 + }, + { + "epoch": 1.7457269155206285, + "grad_norm": 0.6048842072486877, + "learning_rate": 1.8792667004754735e-06, + "loss": 0.5477, + "step": 11847 + }, + { + "epoch": 1.7458742632612967, + "grad_norm": 0.5980496406555176, + "learning_rate": 1.8788911509572108e-06, + "loss": 0.5221, + "step": 11848 + }, + { + "epoch": 1.7460216110019646, + "grad_norm": 0.6186067461967468, + "learning_rate": 1.8785156163763245e-06, + "loss": 0.5107, + "step": 11849 + }, + { + "epoch": 1.7461689587426326, + "grad_norm": 0.5859857201576233, + "learning_rate": 1.8781400967418459e-06, + "loss": 0.5123, + "step": 11850 + }, + { + "epoch": 1.7463163064833007, + "grad_norm": 0.5953540205955505, + "learning_rate": 1.8777645920628066e-06, + "loss": 0.5197, + "step": 11851 + }, + { + "epoch": 1.7464636542239687, + "grad_norm": 0.6012839078903198, + "learning_rate": 1.877389102348237e-06, + "loss": 0.5166, + "step": 11852 + }, + { + "epoch": 1.7466110019646366, + "grad_norm": 0.5993348956108093, + "learning_rate": 1.8770136276071672e-06, + "loss": 0.4985, + "step": 11853 + }, + { + "epoch": 1.7467583497053045, + "grad_norm": 0.6160585284233093, + "learning_rate": 1.8766381678486275e-06, + "loss": 0.5114, + "step": 11854 + }, + { + "epoch": 1.7469056974459725, + "grad_norm": 0.6125109195709229, + "learning_rate": 1.8762627230816478e-06, + "loss": 0.5662, + "step": 11855 + }, + { + "epoch": 1.7470530451866404, + "grad_norm": 0.6028446555137634, + "learning_rate": 1.8758872933152566e-06, + "loss": 0.5203, + "step": 11856 + }, + { + "epoch": 1.7472003929273083, + "grad_norm": 0.625891387462616, + "learning_rate": 1.8755118785584835e-06, + "loss": 0.5208, + "step": 11857 + }, + { + "epoch": 1.7473477406679763, + "grad_norm": 0.6120128631591797, + "learning_rate": 1.875136478820357e-06, + "loss": 0.5057, + "step": 11858 + }, + { + "epoch": 1.7474950884086444, + "grad_norm": 0.6146994829177856, + "learning_rate": 1.8747610941099054e-06, + "loss": 0.5265, + "step": 11859 + }, + { + "epoch": 1.7476424361493124, + "grad_norm": 0.595011830329895, + "learning_rate": 1.8743857244361558e-06, + "loss": 0.5077, + "step": 11860 + }, + { + "epoch": 1.7477897838899805, + "grad_norm": 0.591149628162384, + "learning_rate": 1.8740103698081361e-06, + "loss": 0.5346, + "step": 11861 + }, + { + "epoch": 1.7479371316306485, + "grad_norm": 0.5876913666725159, + "learning_rate": 1.8736350302348733e-06, + "loss": 0.507, + "step": 11862 + }, + { + "epoch": 1.7480844793713164, + "grad_norm": 0.5856168270111084, + "learning_rate": 1.8732597057253943e-06, + "loss": 0.5412, + "step": 11863 + }, + { + "epoch": 1.7482318271119843, + "grad_norm": 0.5835593342781067, + "learning_rate": 1.8728843962887256e-06, + "loss": 0.5367, + "step": 11864 + }, + { + "epoch": 1.7483791748526523, + "grad_norm": 0.6289700865745544, + "learning_rate": 1.8725091019338925e-06, + "loss": 0.5342, + "step": 11865 + }, + { + "epoch": 1.7485265225933202, + "grad_norm": 0.5994970798492432, + "learning_rate": 1.8721338226699219e-06, + "loss": 0.5035, + "step": 11866 + }, + { + "epoch": 1.7486738703339881, + "grad_norm": 0.6315870881080627, + "learning_rate": 1.871758558505838e-06, + "loss": 0.516, + "step": 11867 + }, + { + "epoch": 1.748821218074656, + "grad_norm": 0.6009716987609863, + "learning_rate": 1.8713833094506661e-06, + "loss": 0.5169, + "step": 11868 + }, + { + "epoch": 1.748968565815324, + "grad_norm": 0.64836585521698, + "learning_rate": 1.8710080755134296e-06, + "loss": 0.5328, + "step": 11869 + }, + { + "epoch": 1.7491159135559922, + "grad_norm": 0.5863229632377625, + "learning_rate": 1.8706328567031552e-06, + "loss": 0.5191, + "step": 11870 + }, + { + "epoch": 1.74926326129666, + "grad_norm": 0.6604428291320801, + "learning_rate": 1.8702576530288651e-06, + "loss": 0.5109, + "step": 11871 + }, + { + "epoch": 1.7494106090373283, + "grad_norm": 0.5754243731498718, + "learning_rate": 1.8698824644995834e-06, + "loss": 0.515, + "step": 11872 + }, + { + "epoch": 1.7495579567779962, + "grad_norm": 0.5928562879562378, + "learning_rate": 1.8695072911243327e-06, + "loss": 0.5293, + "step": 11873 + }, + { + "epoch": 1.7497053045186641, + "grad_norm": 0.6106999516487122, + "learning_rate": 1.8691321329121358e-06, + "loss": 0.5411, + "step": 11874 + }, + { + "epoch": 1.749852652259332, + "grad_norm": 0.6198994517326355, + "learning_rate": 1.8687569898720153e-06, + "loss": 0.5045, + "step": 11875 + }, + { + "epoch": 1.75, + "grad_norm": 0.6234041452407837, + "learning_rate": 1.8683818620129935e-06, + "loss": 0.5079, + "step": 11876 + }, + { + "epoch": 1.750147347740668, + "grad_norm": 0.5825262665748596, + "learning_rate": 1.8680067493440919e-06, + "loss": 0.5307, + "step": 11877 + }, + { + "epoch": 1.7502946954813359, + "grad_norm": 0.6049823760986328, + "learning_rate": 1.867631651874331e-06, + "loss": 0.5114, + "step": 11878 + }, + { + "epoch": 1.7504420432220038, + "grad_norm": 0.630957841873169, + "learning_rate": 1.867256569612732e-06, + "loss": 0.5416, + "step": 11879 + }, + { + "epoch": 1.7505893909626717, + "grad_norm": 0.6113013029098511, + "learning_rate": 1.8668815025683162e-06, + "loss": 0.5124, + "step": 11880 + }, + { + "epoch": 1.75073673870334, + "grad_norm": 0.5796501636505127, + "learning_rate": 1.8665064507501033e-06, + "loss": 0.5165, + "step": 11881 + }, + { + "epoch": 1.7508840864440078, + "grad_norm": 0.6182447075843811, + "learning_rate": 1.8661314141671127e-06, + "loss": 0.5352, + "step": 11882 + }, + { + "epoch": 1.751031434184676, + "grad_norm": 0.6430680155754089, + "learning_rate": 1.8657563928283645e-06, + "loss": 0.5292, + "step": 11883 + }, + { + "epoch": 1.751178781925344, + "grad_norm": 0.6413750052452087, + "learning_rate": 1.8653813867428777e-06, + "loss": 0.559, + "step": 11884 + }, + { + "epoch": 1.7513261296660119, + "grad_norm": 0.5992459058761597, + "learning_rate": 1.865006395919671e-06, + "loss": 0.5237, + "step": 11885 + }, + { + "epoch": 1.7514734774066798, + "grad_norm": 0.6863816976547241, + "learning_rate": 1.8646314203677624e-06, + "loss": 0.5416, + "step": 11886 + }, + { + "epoch": 1.7516208251473477, + "grad_norm": 0.628714919090271, + "learning_rate": 1.8642564600961705e-06, + "loss": 0.5253, + "step": 11887 + }, + { + "epoch": 1.7517681728880157, + "grad_norm": 0.6064491271972656, + "learning_rate": 1.863881515113912e-06, + "loss": 0.5017, + "step": 11888 + }, + { + "epoch": 1.7519155206286836, + "grad_norm": 0.6170048117637634, + "learning_rate": 1.863506585430005e-06, + "loss": 0.5449, + "step": 11889 + }, + { + "epoch": 1.7520628683693515, + "grad_norm": 0.6230272650718689, + "learning_rate": 1.863131671053466e-06, + "loss": 0.5299, + "step": 11890 + }, + { + "epoch": 1.7522102161100195, + "grad_norm": 0.6434227824211121, + "learning_rate": 1.8627567719933115e-06, + "loss": 0.5101, + "step": 11891 + }, + { + "epoch": 1.7523575638506876, + "grad_norm": 0.6144903898239136, + "learning_rate": 1.8623818882585582e-06, + "loss": 0.5356, + "step": 11892 + }, + { + "epoch": 1.7525049115913556, + "grad_norm": 0.5967804193496704, + "learning_rate": 1.862007019858221e-06, + "loss": 0.5304, + "step": 11893 + }, + { + "epoch": 1.7526522593320237, + "grad_norm": 0.6127954721450806, + "learning_rate": 1.861632166801316e-06, + "loss": 0.515, + "step": 11894 + }, + { + "epoch": 1.7527996070726917, + "grad_norm": 0.6261035203933716, + "learning_rate": 1.8612573290968578e-06, + "loss": 0.5219, + "step": 11895 + }, + { + "epoch": 1.7529469548133596, + "grad_norm": 0.5783584713935852, + "learning_rate": 1.8608825067538611e-06, + "loss": 0.521, + "step": 11896 + }, + { + "epoch": 1.7530943025540275, + "grad_norm": 0.5882755517959595, + "learning_rate": 1.8605076997813406e-06, + "loss": 0.5316, + "step": 11897 + }, + { + "epoch": 1.7532416502946955, + "grad_norm": 0.6306664347648621, + "learning_rate": 1.8601329081883102e-06, + "loss": 0.5236, + "step": 11898 + }, + { + "epoch": 1.7533889980353634, + "grad_norm": 0.6001319885253906, + "learning_rate": 1.8597581319837826e-06, + "loss": 0.5113, + "step": 11899 + }, + { + "epoch": 1.7535363457760313, + "grad_norm": 0.6146848201751709, + "learning_rate": 1.8593833711767722e-06, + "loss": 0.5344, + "step": 11900 + }, + { + "epoch": 1.7536836935166993, + "grad_norm": 0.6628960371017456, + "learning_rate": 1.859008625776291e-06, + "loss": 0.511, + "step": 11901 + }, + { + "epoch": 1.7538310412573674, + "grad_norm": 0.5834515690803528, + "learning_rate": 1.8586338957913519e-06, + "loss": 0.482, + "step": 11902 + }, + { + "epoch": 1.7539783889980354, + "grad_norm": 0.5878354907035828, + "learning_rate": 1.8582591812309663e-06, + "loss": 0.5211, + "step": 11903 + }, + { + "epoch": 1.7541257367387033, + "grad_norm": 0.6050301194190979, + "learning_rate": 1.857884482104147e-06, + "loss": 0.5266, + "step": 11904 + }, + { + "epoch": 1.7542730844793715, + "grad_norm": 0.632175624370575, + "learning_rate": 1.8575097984199038e-06, + "loss": 0.5507, + "step": 11905 + }, + { + "epoch": 1.7544204322200394, + "grad_norm": 0.6143622398376465, + "learning_rate": 1.8571351301872493e-06, + "loss": 0.5109, + "step": 11906 + }, + { + "epoch": 1.7545677799607073, + "grad_norm": 0.6325197219848633, + "learning_rate": 1.8567604774151934e-06, + "loss": 0.5212, + "step": 11907 + }, + { + "epoch": 1.7547151277013753, + "grad_norm": 0.5826661586761475, + "learning_rate": 1.8563858401127462e-06, + "loss": 0.5043, + "step": 11908 + }, + { + "epoch": 1.7548624754420432, + "grad_norm": 0.5878058075904846, + "learning_rate": 1.8560112182889178e-06, + "loss": 0.5109, + "step": 11909 + }, + { + "epoch": 1.7550098231827111, + "grad_norm": 0.5932122468948364, + "learning_rate": 1.8556366119527175e-06, + "loss": 0.5291, + "step": 11910 + }, + { + "epoch": 1.755157170923379, + "grad_norm": 0.6205436587333679, + "learning_rate": 1.8552620211131545e-06, + "loss": 0.4947, + "step": 11911 + }, + { + "epoch": 1.755304518664047, + "grad_norm": 0.6103712916374207, + "learning_rate": 1.8548874457792376e-06, + "loss": 0.5329, + "step": 11912 + }, + { + "epoch": 1.7554518664047152, + "grad_norm": 0.5803231000900269, + "learning_rate": 1.8545128859599749e-06, + "loss": 0.535, + "step": 11913 + }, + { + "epoch": 1.7555992141453831, + "grad_norm": 0.5568118095397949, + "learning_rate": 1.8541383416643745e-06, + "loss": 0.5364, + "step": 11914 + }, + { + "epoch": 1.755746561886051, + "grad_norm": 0.6404611468315125, + "learning_rate": 1.853763812901444e-06, + "loss": 0.5212, + "step": 11915 + }, + { + "epoch": 1.7558939096267192, + "grad_norm": 0.6198306679725647, + "learning_rate": 1.8533892996801911e-06, + "loss": 0.5224, + "step": 11916 + }, + { + "epoch": 1.7560412573673871, + "grad_norm": 0.5672478079795837, + "learning_rate": 1.8530148020096217e-06, + "loss": 0.5126, + "step": 11917 + }, + { + "epoch": 1.756188605108055, + "grad_norm": 0.5858099460601807, + "learning_rate": 1.8526403198987433e-06, + "loss": 0.4972, + "step": 11918 + }, + { + "epoch": 1.756335952848723, + "grad_norm": 0.6092847585678101, + "learning_rate": 1.8522658533565612e-06, + "loss": 0.5276, + "step": 11919 + }, + { + "epoch": 1.756483300589391, + "grad_norm": 0.6268789768218994, + "learning_rate": 1.851891402392082e-06, + "loss": 0.4936, + "step": 11920 + }, + { + "epoch": 1.7566306483300589, + "grad_norm": 0.5804989337921143, + "learning_rate": 1.8515169670143102e-06, + "loss": 0.4945, + "step": 11921 + }, + { + "epoch": 1.7567779960707268, + "grad_norm": 0.6047488451004028, + "learning_rate": 1.8511425472322514e-06, + "loss": 0.5151, + "step": 11922 + }, + { + "epoch": 1.7569253438113948, + "grad_norm": 0.608254075050354, + "learning_rate": 1.8507681430549098e-06, + "loss": 0.537, + "step": 11923 + }, + { + "epoch": 1.757072691552063, + "grad_norm": 0.6243156790733337, + "learning_rate": 1.85039375449129e-06, + "loss": 0.5222, + "step": 11924 + }, + { + "epoch": 1.7572200392927309, + "grad_norm": 0.5965356826782227, + "learning_rate": 1.8500193815503958e-06, + "loss": 0.5441, + "step": 11925 + }, + { + "epoch": 1.7573673870333988, + "grad_norm": 0.6127040386199951, + "learning_rate": 1.849645024241231e-06, + "loss": 0.5149, + "step": 11926 + }, + { + "epoch": 1.757514734774067, + "grad_norm": 0.5857803225517273, + "learning_rate": 1.849270682572798e-06, + "loss": 0.5364, + "step": 11927 + }, + { + "epoch": 1.7576620825147349, + "grad_norm": 0.5977339148521423, + "learning_rate": 1.8488963565540996e-06, + "loss": 0.5156, + "step": 11928 + }, + { + "epoch": 1.7578094302554028, + "grad_norm": 0.5859400629997253, + "learning_rate": 1.8485220461941384e-06, + "loss": 0.5027, + "step": 11929 + }, + { + "epoch": 1.7579567779960708, + "grad_norm": 0.5817168354988098, + "learning_rate": 1.8481477515019166e-06, + "loss": 0.5021, + "step": 11930 + }, + { + "epoch": 1.7581041257367387, + "grad_norm": 0.7149852514266968, + "learning_rate": 1.8477734724864358e-06, + "loss": 0.526, + "step": 11931 + }, + { + "epoch": 1.7582514734774066, + "grad_norm": 0.5886353254318237, + "learning_rate": 1.8473992091566967e-06, + "loss": 0.5044, + "step": 11932 + }, + { + "epoch": 1.7583988212180746, + "grad_norm": 0.6128542423248291, + "learning_rate": 1.8470249615217006e-06, + "loss": 0.5211, + "step": 11933 + }, + { + "epoch": 1.7585461689587425, + "grad_norm": 0.5890260338783264, + "learning_rate": 1.8466507295904484e-06, + "loss": 0.5385, + "step": 11934 + }, + { + "epoch": 1.7586935166994107, + "grad_norm": 0.5946323275566101, + "learning_rate": 1.846276513371939e-06, + "loss": 0.5403, + "step": 11935 + }, + { + "epoch": 1.7588408644400786, + "grad_norm": 0.6083287000656128, + "learning_rate": 1.8459023128751736e-06, + "loss": 0.5017, + "step": 11936 + }, + { + "epoch": 1.7589882121807465, + "grad_norm": 0.6190624237060547, + "learning_rate": 1.8455281281091503e-06, + "loss": 0.5167, + "step": 11937 + }, + { + "epoch": 1.7591355599214147, + "grad_norm": 0.6031579375267029, + "learning_rate": 1.8451539590828685e-06, + "loss": 0.5191, + "step": 11938 + }, + { + "epoch": 1.7592829076620826, + "grad_norm": 0.6189119219779968, + "learning_rate": 1.8447798058053268e-06, + "loss": 0.5427, + "step": 11939 + }, + { + "epoch": 1.7594302554027506, + "grad_norm": 0.5752289891242981, + "learning_rate": 1.8444056682855239e-06, + "loss": 0.5531, + "step": 11940 + }, + { + "epoch": 1.7595776031434185, + "grad_norm": 0.6424001455307007, + "learning_rate": 1.844031546532457e-06, + "loss": 0.5372, + "step": 11941 + }, + { + "epoch": 1.7597249508840864, + "grad_norm": 0.6166041493415833, + "learning_rate": 1.8436574405551221e-06, + "loss": 0.5375, + "step": 11942 + }, + { + "epoch": 1.7598722986247544, + "grad_norm": 0.6006016135215759, + "learning_rate": 1.8432833503625203e-06, + "loss": 0.524, + "step": 11943 + }, + { + "epoch": 1.7600196463654223, + "grad_norm": 0.5880300402641296, + "learning_rate": 1.8429092759636453e-06, + "loss": 0.5483, + "step": 11944 + }, + { + "epoch": 1.7601669941060902, + "grad_norm": 0.5851756930351257, + "learning_rate": 1.8425352173674934e-06, + "loss": 0.5414, + "step": 11945 + }, + { + "epoch": 1.7603143418467584, + "grad_norm": 0.5973255634307861, + "learning_rate": 1.8421611745830618e-06, + "loss": 0.5077, + "step": 11946 + }, + { + "epoch": 1.7604616895874263, + "grad_norm": 0.6178747415542603, + "learning_rate": 1.841787147619345e-06, + "loss": 0.5337, + "step": 11947 + }, + { + "epoch": 1.7606090373280943, + "grad_norm": 0.5824301242828369, + "learning_rate": 1.8414131364853387e-06, + "loss": 0.4998, + "step": 11948 + }, + { + "epoch": 1.7607563850687624, + "grad_norm": 0.6121538281440735, + "learning_rate": 1.8410391411900374e-06, + "loss": 0.5384, + "step": 11949 + }, + { + "epoch": 1.7609037328094304, + "grad_norm": 0.5932035446166992, + "learning_rate": 1.840665161742436e-06, + "loss": 0.5115, + "step": 11950 + }, + { + "epoch": 1.7610510805500983, + "grad_norm": 0.6267505884170532, + "learning_rate": 1.840291198151528e-06, + "loss": 0.5228, + "step": 11951 + }, + { + "epoch": 1.7611984282907662, + "grad_norm": 0.5924410223960876, + "learning_rate": 1.8399172504263075e-06, + "loss": 0.5474, + "step": 11952 + }, + { + "epoch": 1.7613457760314342, + "grad_norm": 0.5999380946159363, + "learning_rate": 1.8395433185757674e-06, + "loss": 0.5357, + "step": 11953 + }, + { + "epoch": 1.761493123772102, + "grad_norm": 0.5869868993759155, + "learning_rate": 1.8391694026089007e-06, + "loss": 0.4928, + "step": 11954 + }, + { + "epoch": 1.76164047151277, + "grad_norm": 0.613437831401825, + "learning_rate": 1.8387955025346998e-06, + "loss": 0.5019, + "step": 11955 + }, + { + "epoch": 1.761787819253438, + "grad_norm": 0.6300199031829834, + "learning_rate": 1.8384216183621573e-06, + "loss": 0.5278, + "step": 11956 + }, + { + "epoch": 1.7619351669941061, + "grad_norm": 0.6225699782371521, + "learning_rate": 1.8380477501002642e-06, + "loss": 0.5313, + "step": 11957 + }, + { + "epoch": 1.762082514734774, + "grad_norm": 0.5865854620933533, + "learning_rate": 1.8376738977580122e-06, + "loss": 0.5079, + "step": 11958 + }, + { + "epoch": 1.762229862475442, + "grad_norm": 0.595840573310852, + "learning_rate": 1.8373000613443925e-06, + "loss": 0.5205, + "step": 11959 + }, + { + "epoch": 1.7623772102161102, + "grad_norm": 0.5600127577781677, + "learning_rate": 1.8369262408683958e-06, + "loss": 0.5139, + "step": 11960 + }, + { + "epoch": 1.762524557956778, + "grad_norm": 0.6250534653663635, + "learning_rate": 1.8365524363390113e-06, + "loss": 0.527, + "step": 11961 + }, + { + "epoch": 1.762671905697446, + "grad_norm": 0.5705733895301819, + "learning_rate": 1.83617864776523e-06, + "loss": 0.5268, + "step": 11962 + }, + { + "epoch": 1.762819253438114, + "grad_norm": 0.631588339805603, + "learning_rate": 1.835804875156041e-06, + "loss": 0.5287, + "step": 11963 + }, + { + "epoch": 1.762966601178782, + "grad_norm": 0.6005550622940063, + "learning_rate": 1.835431118520433e-06, + "loss": 0.5442, + "step": 11964 + }, + { + "epoch": 1.7631139489194498, + "grad_norm": 0.5915423035621643, + "learning_rate": 1.8350573778673946e-06, + "loss": 0.5372, + "step": 11965 + }, + { + "epoch": 1.7632612966601178, + "grad_norm": 0.632602870464325, + "learning_rate": 1.8346836532059149e-06, + "loss": 0.5432, + "step": 11966 + }, + { + "epoch": 1.7634086444007857, + "grad_norm": 0.6256498098373413, + "learning_rate": 1.834309944544981e-06, + "loss": 0.5332, + "step": 11967 + }, + { + "epoch": 1.7635559921414539, + "grad_norm": 0.5703648328781128, + "learning_rate": 1.833936251893581e-06, + "loss": 0.5182, + "step": 11968 + }, + { + "epoch": 1.7637033398821218, + "grad_norm": 0.5669651627540588, + "learning_rate": 1.8335625752607015e-06, + "loss": 0.5107, + "step": 11969 + }, + { + "epoch": 1.7638506876227897, + "grad_norm": 0.6195549368858337, + "learning_rate": 1.8331889146553294e-06, + "loss": 0.5377, + "step": 11970 + }, + { + "epoch": 1.763998035363458, + "grad_norm": 0.5822687745094299, + "learning_rate": 1.8328152700864513e-06, + "loss": 0.5225, + "step": 11971 + }, + { + "epoch": 1.7641453831041258, + "grad_norm": 0.5999653339385986, + "learning_rate": 1.832441641563053e-06, + "loss": 0.5332, + "step": 11972 + }, + { + "epoch": 1.7642927308447938, + "grad_norm": 0.5688645243644714, + "learning_rate": 1.8320680290941201e-06, + "loss": 0.5145, + "step": 11973 + }, + { + "epoch": 1.7644400785854617, + "grad_norm": 0.6032887697219849, + "learning_rate": 1.8316944326886383e-06, + "loss": 0.4747, + "step": 11974 + }, + { + "epoch": 1.7645874263261296, + "grad_norm": 0.6023910045623779, + "learning_rate": 1.8313208523555914e-06, + "loss": 0.5116, + "step": 11975 + }, + { + "epoch": 1.7647347740667976, + "grad_norm": 0.5903893709182739, + "learning_rate": 1.8309472881039647e-06, + "loss": 0.5073, + "step": 11976 + }, + { + "epoch": 1.7648821218074655, + "grad_norm": 0.6143282651901245, + "learning_rate": 1.8305737399427416e-06, + "loss": 0.5162, + "step": 11977 + }, + { + "epoch": 1.7650294695481334, + "grad_norm": 0.5815592408180237, + "learning_rate": 1.830200207880906e-06, + "loss": 0.5341, + "step": 11978 + }, + { + "epoch": 1.7651768172888016, + "grad_norm": 0.5962185859680176, + "learning_rate": 1.8298266919274405e-06, + "loss": 0.5366, + "step": 11979 + }, + { + "epoch": 1.7653241650294695, + "grad_norm": 0.5986121296882629, + "learning_rate": 1.8294531920913295e-06, + "loss": 0.5108, + "step": 11980 + }, + { + "epoch": 1.7654715127701375, + "grad_norm": 0.609014093875885, + "learning_rate": 1.8290797083815547e-06, + "loss": 0.5068, + "step": 11981 + }, + { + "epoch": 1.7656188605108056, + "grad_norm": 0.5798143744468689, + "learning_rate": 1.8287062408070985e-06, + "loss": 0.5127, + "step": 11982 + }, + { + "epoch": 1.7657662082514736, + "grad_norm": 0.6196554899215698, + "learning_rate": 1.8283327893769421e-06, + "loss": 0.5231, + "step": 11983 + }, + { + "epoch": 1.7659135559921415, + "grad_norm": 0.5831034183502197, + "learning_rate": 1.8279593541000673e-06, + "loss": 0.5012, + "step": 11984 + }, + { + "epoch": 1.7660609037328094, + "grad_norm": 0.583702564239502, + "learning_rate": 1.8275859349854547e-06, + "loss": 0.5369, + "step": 11985 + }, + { + "epoch": 1.7662082514734774, + "grad_norm": 0.5975921154022217, + "learning_rate": 1.827212532042085e-06, + "loss": 0.5332, + "step": 11986 + }, + { + "epoch": 1.7663555992141453, + "grad_norm": 0.5894949436187744, + "learning_rate": 1.8268391452789386e-06, + "loss": 0.5144, + "step": 11987 + }, + { + "epoch": 1.7665029469548132, + "grad_norm": 0.5912629961967468, + "learning_rate": 1.8264657747049946e-06, + "loss": 0.5264, + "step": 11988 + }, + { + "epoch": 1.7666502946954812, + "grad_norm": 0.5794445872306824, + "learning_rate": 1.826092420329233e-06, + "loss": 0.5579, + "step": 11989 + }, + { + "epoch": 1.7667976424361493, + "grad_norm": 0.5956168174743652, + "learning_rate": 1.8257190821606325e-06, + "loss": 0.4895, + "step": 11990 + }, + { + "epoch": 1.7669449901768173, + "grad_norm": 0.5917425155639648, + "learning_rate": 1.825345760208172e-06, + "loss": 0.4979, + "step": 11991 + }, + { + "epoch": 1.7670923379174852, + "grad_norm": 0.6053353548049927, + "learning_rate": 1.82497245448083e-06, + "loss": 0.5142, + "step": 11992 + }, + { + "epoch": 1.7672396856581534, + "grad_norm": 0.5907410979270935, + "learning_rate": 1.8245991649875832e-06, + "loss": 0.4973, + "step": 11993 + }, + { + "epoch": 1.7673870333988213, + "grad_norm": 0.6026906371116638, + "learning_rate": 1.82422589173741e-06, + "loss": 0.5238, + "step": 11994 + }, + { + "epoch": 1.7675343811394892, + "grad_norm": 0.6093196868896484, + "learning_rate": 1.8238526347392871e-06, + "loss": 0.5063, + "step": 11995 + }, + { + "epoch": 1.7676817288801572, + "grad_norm": 0.6244550347328186, + "learning_rate": 1.823479394002191e-06, + "loss": 0.5363, + "step": 11996 + }, + { + "epoch": 1.7678290766208251, + "grad_norm": 0.5944084525108337, + "learning_rate": 1.8231061695350982e-06, + "loss": 0.552, + "step": 11997 + }, + { + "epoch": 1.767976424361493, + "grad_norm": 0.608955442905426, + "learning_rate": 1.8227329613469847e-06, + "loss": 0.5433, + "step": 11998 + }, + { + "epoch": 1.768123772102161, + "grad_norm": 0.5819432735443115, + "learning_rate": 1.8223597694468253e-06, + "loss": 0.5675, + "step": 11999 + }, + { + "epoch": 1.768271119842829, + "grad_norm": 0.5815385580062866, + "learning_rate": 1.8219865938435964e-06, + "loss": 0.5175, + "step": 12000 + }, + { + "epoch": 1.768418467583497, + "grad_norm": 0.6149349808692932, + "learning_rate": 1.8216134345462713e-06, + "loss": 0.5462, + "step": 12001 + }, + { + "epoch": 1.768565815324165, + "grad_norm": 0.6183425188064575, + "learning_rate": 1.821240291563825e-06, + "loss": 0.5366, + "step": 12002 + }, + { + "epoch": 1.7687131630648332, + "grad_norm": 0.6055321097373962, + "learning_rate": 1.8208671649052318e-06, + "loss": 0.517, + "step": 12003 + }, + { + "epoch": 1.768860510805501, + "grad_norm": 0.6158315539360046, + "learning_rate": 1.8204940545794641e-06, + "loss": 0.4957, + "step": 12004 + }, + { + "epoch": 1.769007858546169, + "grad_norm": 0.5830575823783875, + "learning_rate": 1.820120960595496e-06, + "loss": 0.5045, + "step": 12005 + }, + { + "epoch": 1.769155206286837, + "grad_norm": 0.5634695887565613, + "learning_rate": 1.8197478829623e-06, + "loss": 0.5104, + "step": 12006 + }, + { + "epoch": 1.769302554027505, + "grad_norm": 0.6188579797744751, + "learning_rate": 1.8193748216888484e-06, + "loss": 0.5221, + "step": 12007 + }, + { + "epoch": 1.7694499017681729, + "grad_norm": 0.5975366830825806, + "learning_rate": 1.8190017767841128e-06, + "loss": 0.5256, + "step": 12008 + }, + { + "epoch": 1.7695972495088408, + "grad_norm": 0.610837996006012, + "learning_rate": 1.8186287482570653e-06, + "loss": 0.5465, + "step": 12009 + }, + { + "epoch": 1.7697445972495087, + "grad_norm": 0.5902300477027893, + "learning_rate": 1.8182557361166769e-06, + "loss": 0.5365, + "step": 12010 + }, + { + "epoch": 1.7698919449901767, + "grad_norm": 0.6016647815704346, + "learning_rate": 1.8178827403719178e-06, + "loss": 0.5157, + "step": 12011 + }, + { + "epoch": 1.7700392927308448, + "grad_norm": 0.6150363683700562, + "learning_rate": 1.8175097610317594e-06, + "loss": 0.5454, + "step": 12012 + }, + { + "epoch": 1.7701866404715128, + "grad_norm": 0.5577608346939087, + "learning_rate": 1.817136798105171e-06, + "loss": 0.5324, + "step": 12013 + }, + { + "epoch": 1.770333988212181, + "grad_norm": 0.6143082976341248, + "learning_rate": 1.8167638516011223e-06, + "loss": 0.5352, + "step": 12014 + }, + { + "epoch": 1.7704813359528488, + "grad_norm": 0.5824859142303467, + "learning_rate": 1.8163909215285825e-06, + "loss": 0.5482, + "step": 12015 + }, + { + "epoch": 1.7706286836935168, + "grad_norm": 0.6084660291671753, + "learning_rate": 1.81601800789652e-06, + "loss": 0.5466, + "step": 12016 + }, + { + "epoch": 1.7707760314341847, + "grad_norm": 0.6205120086669922, + "learning_rate": 1.815645110713904e-06, + "loss": 0.5084, + "step": 12017 + }, + { + "epoch": 1.7709233791748527, + "grad_norm": 0.5867165327072144, + "learning_rate": 1.8152722299897018e-06, + "loss": 0.5367, + "step": 12018 + }, + { + "epoch": 1.7710707269155206, + "grad_norm": 0.6314483880996704, + "learning_rate": 1.8148993657328818e-06, + "loss": 0.5234, + "step": 12019 + }, + { + "epoch": 1.7712180746561885, + "grad_norm": 0.6167195439338684, + "learning_rate": 1.8145265179524107e-06, + "loss": 0.496, + "step": 12020 + }, + { + "epoch": 1.7713654223968565, + "grad_norm": 0.5728719234466553, + "learning_rate": 1.814153686657255e-06, + "loss": 0.5373, + "step": 12021 + }, + { + "epoch": 1.7715127701375244, + "grad_norm": 0.5843507051467896, + "learning_rate": 1.8137808718563815e-06, + "loss": 0.5256, + "step": 12022 + }, + { + "epoch": 1.7716601178781926, + "grad_norm": 0.6248809099197388, + "learning_rate": 1.8134080735587563e-06, + "loss": 0.5474, + "step": 12023 + }, + { + "epoch": 1.7718074656188605, + "grad_norm": 0.5928716659545898, + "learning_rate": 1.8130352917733452e-06, + "loss": 0.5198, + "step": 12024 + }, + { + "epoch": 1.7719548133595286, + "grad_norm": 0.598904013633728, + "learning_rate": 1.8126625265091128e-06, + "loss": 0.5509, + "step": 12025 + }, + { + "epoch": 1.7721021611001966, + "grad_norm": 0.5951171517372131, + "learning_rate": 1.8122897777750242e-06, + "loss": 0.5273, + "step": 12026 + }, + { + "epoch": 1.7722495088408645, + "grad_norm": 0.6214570999145508, + "learning_rate": 1.8119170455800438e-06, + "loss": 0.5202, + "step": 12027 + }, + { + "epoch": 1.7723968565815325, + "grad_norm": 0.5707209706306458, + "learning_rate": 1.8115443299331359e-06, + "loss": 0.5264, + "step": 12028 + }, + { + "epoch": 1.7725442043222004, + "grad_norm": 0.6044912934303284, + "learning_rate": 1.8111716308432642e-06, + "loss": 0.5615, + "step": 12029 + }, + { + "epoch": 1.7726915520628683, + "grad_norm": 0.5944265723228455, + "learning_rate": 1.8107989483193914e-06, + "loss": 0.538, + "step": 12030 + }, + { + "epoch": 1.7728388998035363, + "grad_norm": 0.6029117107391357, + "learning_rate": 1.8104262823704807e-06, + "loss": 0.5302, + "step": 12031 + }, + { + "epoch": 1.7729862475442042, + "grad_norm": 0.6285424828529358, + "learning_rate": 1.8100536330054946e-06, + "loss": 0.5188, + "step": 12032 + }, + { + "epoch": 1.7731335952848721, + "grad_norm": 0.6088384389877319, + "learning_rate": 1.8096810002333948e-06, + "loss": 0.5114, + "step": 12033 + }, + { + "epoch": 1.7732809430255403, + "grad_norm": 0.6148783564567566, + "learning_rate": 1.809308384063143e-06, + "loss": 0.5282, + "step": 12034 + }, + { + "epoch": 1.7734282907662082, + "grad_norm": 0.5987503528594971, + "learning_rate": 1.8089357845037008e-06, + "loss": 0.5114, + "step": 12035 + }, + { + "epoch": 1.7735756385068764, + "grad_norm": 0.5997693538665771, + "learning_rate": 1.8085632015640292e-06, + "loss": 0.5267, + "step": 12036 + }, + { + "epoch": 1.7737229862475443, + "grad_norm": 0.6019460558891296, + "learning_rate": 1.808190635253088e-06, + "loss": 0.5256, + "step": 12037 + }, + { + "epoch": 1.7738703339882123, + "grad_norm": 0.6375225186347961, + "learning_rate": 1.8078180855798378e-06, + "loss": 0.5413, + "step": 12038 + }, + { + "epoch": 1.7740176817288802, + "grad_norm": 0.5745946168899536, + "learning_rate": 1.8074455525532376e-06, + "loss": 0.5062, + "step": 12039 + }, + { + "epoch": 1.7741650294695481, + "grad_norm": 0.6262592673301697, + "learning_rate": 1.8070730361822472e-06, + "loss": 0.5271, + "step": 12040 + }, + { + "epoch": 1.774312377210216, + "grad_norm": 0.6117158532142639, + "learning_rate": 1.8067005364758259e-06, + "loss": 0.5214, + "step": 12041 + }, + { + "epoch": 1.774459724950884, + "grad_norm": 0.6191735863685608, + "learning_rate": 1.8063280534429307e-06, + "loss": 0.5309, + "step": 12042 + }, + { + "epoch": 1.774607072691552, + "grad_norm": 0.5909665822982788, + "learning_rate": 1.8059555870925204e-06, + "loss": 0.4762, + "step": 12043 + }, + { + "epoch": 1.77475442043222, + "grad_norm": 0.5960121154785156, + "learning_rate": 1.805583137433553e-06, + "loss": 0.4943, + "step": 12044 + }, + { + "epoch": 1.774901768172888, + "grad_norm": 0.630036473274231, + "learning_rate": 1.8052107044749855e-06, + "loss": 0.5529, + "step": 12045 + }, + { + "epoch": 1.775049115913556, + "grad_norm": 0.6884161829948425, + "learning_rate": 1.8048382882257748e-06, + "loss": 0.4967, + "step": 12046 + }, + { + "epoch": 1.7751964636542241, + "grad_norm": 0.6111856698989868, + "learning_rate": 1.8044658886948768e-06, + "loss": 0.5053, + "step": 12047 + }, + { + "epoch": 1.775343811394892, + "grad_norm": 0.6040043830871582, + "learning_rate": 1.8040935058912483e-06, + "loss": 0.5143, + "step": 12048 + }, + { + "epoch": 1.77549115913556, + "grad_norm": 0.5986577272415161, + "learning_rate": 1.8037211398238445e-06, + "loss": 0.5369, + "step": 12049 + }, + { + "epoch": 1.775638506876228, + "grad_norm": 0.6302375197410583, + "learning_rate": 1.8033487905016207e-06, + "loss": 0.5052, + "step": 12050 + }, + { + "epoch": 1.7757858546168959, + "grad_norm": 0.6006875038146973, + "learning_rate": 1.8029764579335315e-06, + "loss": 0.5389, + "step": 12051 + }, + { + "epoch": 1.7759332023575638, + "grad_norm": 0.6296342611312866, + "learning_rate": 1.8026041421285316e-06, + "loss": 0.5331, + "step": 12052 + }, + { + "epoch": 1.7760805500982317, + "grad_norm": 0.5799417495727539, + "learning_rate": 1.8022318430955744e-06, + "loss": 0.5262, + "step": 12053 + }, + { + "epoch": 1.7762278978388997, + "grad_norm": 0.6089008450508118, + "learning_rate": 1.8018595608436146e-06, + "loss": 0.5367, + "step": 12054 + }, + { + "epoch": 1.7763752455795678, + "grad_norm": 0.6551491022109985, + "learning_rate": 1.8014872953816053e-06, + "loss": 0.5233, + "step": 12055 + }, + { + "epoch": 1.7765225933202358, + "grad_norm": 0.6015550494194031, + "learning_rate": 1.8011150467184988e-06, + "loss": 0.5323, + "step": 12056 + }, + { + "epoch": 1.7766699410609037, + "grad_norm": 0.6049721240997314, + "learning_rate": 1.8007428148632478e-06, + "loss": 0.5172, + "step": 12057 + }, + { + "epoch": 1.7768172888015719, + "grad_norm": 0.5908800363540649, + "learning_rate": 1.8003705998248042e-06, + "loss": 0.5443, + "step": 12058 + }, + { + "epoch": 1.7769646365422398, + "grad_norm": 0.6084566116333008, + "learning_rate": 1.799998401612119e-06, + "loss": 0.5148, + "step": 12059 + }, + { + "epoch": 1.7771119842829077, + "grad_norm": 0.6195356845855713, + "learning_rate": 1.7996262202341444e-06, + "loss": 0.5233, + "step": 12060 + }, + { + "epoch": 1.7772593320235757, + "grad_norm": 0.6758645176887512, + "learning_rate": 1.7992540556998307e-06, + "loss": 0.5277, + "step": 12061 + }, + { + "epoch": 1.7774066797642436, + "grad_norm": 0.6000088453292847, + "learning_rate": 1.798881908018128e-06, + "loss": 0.5458, + "step": 12062 + }, + { + "epoch": 1.7775540275049115, + "grad_norm": 0.5630125403404236, + "learning_rate": 1.7985097771979866e-06, + "loss": 0.553, + "step": 12063 + }, + { + "epoch": 1.7777013752455795, + "grad_norm": 0.6279202699661255, + "learning_rate": 1.7981376632483561e-06, + "loss": 0.4992, + "step": 12064 + }, + { + "epoch": 1.7778487229862474, + "grad_norm": 0.5903679132461548, + "learning_rate": 1.7977655661781857e-06, + "loss": 0.5034, + "step": 12065 + }, + { + "epoch": 1.7779960707269156, + "grad_norm": 0.5721664428710938, + "learning_rate": 1.7973934859964236e-06, + "loss": 0.5193, + "step": 12066 + }, + { + "epoch": 1.7781434184675835, + "grad_norm": 0.6145866513252258, + "learning_rate": 1.7970214227120192e-06, + "loss": 0.5443, + "step": 12067 + }, + { + "epoch": 1.7782907662082514, + "grad_norm": 0.6341841816902161, + "learning_rate": 1.7966493763339197e-06, + "loss": 0.5213, + "step": 12068 + }, + { + "epoch": 1.7784381139489196, + "grad_norm": 0.5831978917121887, + "learning_rate": 1.7962773468710726e-06, + "loss": 0.5337, + "step": 12069 + }, + { + "epoch": 1.7785854616895875, + "grad_norm": 0.5894931554794312, + "learning_rate": 1.7959053343324256e-06, + "loss": 0.5155, + "step": 12070 + }, + { + "epoch": 1.7787328094302555, + "grad_norm": 0.6313118934631348, + "learning_rate": 1.7955333387269248e-06, + "loss": 0.5329, + "step": 12071 + }, + { + "epoch": 1.7788801571709234, + "grad_norm": 0.6172979474067688, + "learning_rate": 1.7951613600635167e-06, + "loss": 0.5204, + "step": 12072 + }, + { + "epoch": 1.7790275049115913, + "grad_norm": 0.6142081618309021, + "learning_rate": 1.7947893983511477e-06, + "loss": 0.4652, + "step": 12073 + }, + { + "epoch": 1.7791748526522593, + "grad_norm": 0.6018050909042358, + "learning_rate": 1.7944174535987623e-06, + "loss": 0.5173, + "step": 12074 + }, + { + "epoch": 1.7793222003929272, + "grad_norm": 0.5883392691612244, + "learning_rate": 1.7940455258153067e-06, + "loss": 0.5244, + "step": 12075 + }, + { + "epoch": 1.7794695481335951, + "grad_norm": 0.5724472403526306, + "learning_rate": 1.793673615009725e-06, + "loss": 0.5181, + "step": 12076 + }, + { + "epoch": 1.7796168958742633, + "grad_norm": 0.6417863368988037, + "learning_rate": 1.7933017211909615e-06, + "loss": 0.5358, + "step": 12077 + }, + { + "epoch": 1.7797642436149312, + "grad_norm": 0.6671501994132996, + "learning_rate": 1.7929298443679605e-06, + "loss": 0.5509, + "step": 12078 + }, + { + "epoch": 1.7799115913555992, + "grad_norm": 0.6555772423744202, + "learning_rate": 1.7925579845496643e-06, + "loss": 0.5095, + "step": 12079 + }, + { + "epoch": 1.7800589390962673, + "grad_norm": 0.5795878767967224, + "learning_rate": 1.7921861417450176e-06, + "loss": 0.5133, + "step": 12080 + }, + { + "epoch": 1.7802062868369353, + "grad_norm": 0.5988327860832214, + "learning_rate": 1.7918143159629618e-06, + "loss": 0.5471, + "step": 12081 + }, + { + "epoch": 1.7803536345776032, + "grad_norm": 0.6072269678115845, + "learning_rate": 1.7914425072124397e-06, + "loss": 0.5216, + "step": 12082 + }, + { + "epoch": 1.7805009823182711, + "grad_norm": 0.6138641238212585, + "learning_rate": 1.7910707155023927e-06, + "loss": 0.5199, + "step": 12083 + }, + { + "epoch": 1.780648330058939, + "grad_norm": 0.583185076713562, + "learning_rate": 1.7906989408417625e-06, + "loss": 0.5295, + "step": 12084 + }, + { + "epoch": 1.780795677799607, + "grad_norm": 0.6194505095481873, + "learning_rate": 1.7903271832394904e-06, + "loss": 0.5149, + "step": 12085 + }, + { + "epoch": 1.780943025540275, + "grad_norm": 0.564257025718689, + "learning_rate": 1.7899554427045162e-06, + "loss": 0.4964, + "step": 12086 + }, + { + "epoch": 1.7810903732809429, + "grad_norm": 0.6136214733123779, + "learning_rate": 1.789583719245781e-06, + "loss": 0.517, + "step": 12087 + }, + { + "epoch": 1.781237721021611, + "grad_norm": 0.6453644633293152, + "learning_rate": 1.789212012872224e-06, + "loss": 0.5169, + "step": 12088 + }, + { + "epoch": 1.781385068762279, + "grad_norm": 0.5933575630187988, + "learning_rate": 1.7888403235927854e-06, + "loss": 0.5313, + "step": 12089 + }, + { + "epoch": 1.781532416502947, + "grad_norm": 0.5816054344177246, + "learning_rate": 1.7884686514164018e-06, + "loss": 0.5298, + "step": 12090 + }, + { + "epoch": 1.781679764243615, + "grad_norm": 0.6404828429222107, + "learning_rate": 1.7880969963520146e-06, + "loss": 0.5187, + "step": 12091 + }, + { + "epoch": 1.781827111984283, + "grad_norm": 0.5971904993057251, + "learning_rate": 1.7877253584085601e-06, + "loss": 0.5183, + "step": 12092 + }, + { + "epoch": 1.781974459724951, + "grad_norm": 0.5828503966331482, + "learning_rate": 1.7873537375949774e-06, + "loss": 0.5307, + "step": 12093 + }, + { + "epoch": 1.7821218074656189, + "grad_norm": 0.6106052994728088, + "learning_rate": 1.7869821339202025e-06, + "loss": 0.5122, + "step": 12094 + }, + { + "epoch": 1.7822691552062868, + "grad_norm": 0.6236417889595032, + "learning_rate": 1.7866105473931732e-06, + "loss": 0.5048, + "step": 12095 + }, + { + "epoch": 1.7824165029469548, + "grad_norm": 0.5888512134552002, + "learning_rate": 1.7862389780228257e-06, + "loss": 0.557, + "step": 12096 + }, + { + "epoch": 1.7825638506876227, + "grad_norm": 0.607877254486084, + "learning_rate": 1.7858674258180958e-06, + "loss": 0.5339, + "step": 12097 + }, + { + "epoch": 1.7827111984282906, + "grad_norm": 0.621333122253418, + "learning_rate": 1.7854958907879194e-06, + "loss": 0.5572, + "step": 12098 + }, + { + "epoch": 1.7828585461689588, + "grad_norm": 0.6523053050041199, + "learning_rate": 1.7851243729412319e-06, + "loss": 0.5362, + "step": 12099 + }, + { + "epoch": 1.7830058939096267, + "grad_norm": 0.6220071315765381, + "learning_rate": 1.7847528722869678e-06, + "loss": 0.5342, + "step": 12100 + }, + { + "epoch": 1.7831532416502947, + "grad_norm": 0.6248462200164795, + "learning_rate": 1.7843813888340617e-06, + "loss": 0.5211, + "step": 12101 + }, + { + "epoch": 1.7833005893909628, + "grad_norm": 0.5931789875030518, + "learning_rate": 1.7840099225914473e-06, + "loss": 0.5338, + "step": 12102 + }, + { + "epoch": 1.7834479371316307, + "grad_norm": 0.6272584199905396, + "learning_rate": 1.7836384735680585e-06, + "loss": 0.5292, + "step": 12103 + }, + { + "epoch": 1.7835952848722987, + "grad_norm": 0.5715369582176208, + "learning_rate": 1.7832670417728288e-06, + "loss": 0.5121, + "step": 12104 + }, + { + "epoch": 1.7837426326129666, + "grad_norm": 0.5984556078910828, + "learning_rate": 1.7828956272146903e-06, + "loss": 0.5233, + "step": 12105 + }, + { + "epoch": 1.7838899803536346, + "grad_norm": 0.5961918830871582, + "learning_rate": 1.7825242299025752e-06, + "loss": 0.4967, + "step": 12106 + }, + { + "epoch": 1.7840373280943025, + "grad_norm": 0.6021654605865479, + "learning_rate": 1.782152849845417e-06, + "loss": 0.5206, + "step": 12107 + }, + { + "epoch": 1.7841846758349704, + "grad_norm": 0.5876522660255432, + "learning_rate": 1.7817814870521456e-06, + "loss": 0.5145, + "step": 12108 + }, + { + "epoch": 1.7843320235756384, + "grad_norm": 0.6105210185050964, + "learning_rate": 1.781410141531692e-06, + "loss": 0.5229, + "step": 12109 + }, + { + "epoch": 1.7844793713163065, + "grad_norm": 0.5931021571159363, + "learning_rate": 1.781038813292988e-06, + "loss": 0.528, + "step": 12110 + }, + { + "epoch": 1.7846267190569745, + "grad_norm": 0.5781100988388062, + "learning_rate": 1.7806675023449629e-06, + "loss": 0.5321, + "step": 12111 + }, + { + "epoch": 1.7847740667976424, + "grad_norm": 0.5783883333206177, + "learning_rate": 1.7802962086965473e-06, + "loss": 0.5011, + "step": 12112 + }, + { + "epoch": 1.7849214145383105, + "grad_norm": 0.5964802503585815, + "learning_rate": 1.7799249323566702e-06, + "loss": 0.5346, + "step": 12113 + }, + { + "epoch": 1.7850687622789785, + "grad_norm": 0.6158037185668945, + "learning_rate": 1.7795536733342606e-06, + "loss": 0.5136, + "step": 12114 + }, + { + "epoch": 1.7852161100196464, + "grad_norm": 0.6060144305229187, + "learning_rate": 1.7791824316382475e-06, + "loss": 0.5265, + "step": 12115 + }, + { + "epoch": 1.7853634577603144, + "grad_norm": 0.6077468395233154, + "learning_rate": 1.7788112072775588e-06, + "loss": 0.5199, + "step": 12116 + }, + { + "epoch": 1.7855108055009823, + "grad_norm": 0.6055959463119507, + "learning_rate": 1.778440000261122e-06, + "loss": 0.4991, + "step": 12117 + }, + { + "epoch": 1.7856581532416502, + "grad_norm": 0.5932065844535828, + "learning_rate": 1.778068810597865e-06, + "loss": 0.5038, + "step": 12118 + }, + { + "epoch": 1.7858055009823182, + "grad_norm": 0.6636227369308472, + "learning_rate": 1.7776976382967146e-06, + "loss": 0.5183, + "step": 12119 + }, + { + "epoch": 1.785952848722986, + "grad_norm": 0.5807492733001709, + "learning_rate": 1.777326483366597e-06, + "loss": 0.5239, + "step": 12120 + }, + { + "epoch": 1.7861001964636543, + "grad_norm": 0.6080461144447327, + "learning_rate": 1.7769553458164388e-06, + "loss": 0.5371, + "step": 12121 + }, + { + "epoch": 1.7862475442043222, + "grad_norm": 0.6217292547225952, + "learning_rate": 1.7765842256551653e-06, + "loss": 0.5074, + "step": 12122 + }, + { + "epoch": 1.7863948919449901, + "grad_norm": 0.6036060452461243, + "learning_rate": 1.7762131228917019e-06, + "loss": 0.5392, + "step": 12123 + }, + { + "epoch": 1.7865422396856583, + "grad_norm": 0.6314602494239807, + "learning_rate": 1.7758420375349732e-06, + "loss": 0.542, + "step": 12124 + }, + { + "epoch": 1.7866895874263262, + "grad_norm": 0.5926256775856018, + "learning_rate": 1.7754709695939037e-06, + "loss": 0.5314, + "step": 12125 + }, + { + "epoch": 1.7868369351669942, + "grad_norm": 0.5863423943519592, + "learning_rate": 1.7750999190774177e-06, + "loss": 0.5141, + "step": 12126 + }, + { + "epoch": 1.786984282907662, + "grad_norm": 0.6327468752861023, + "learning_rate": 1.7747288859944385e-06, + "loss": 0.5005, + "step": 12127 + }, + { + "epoch": 1.78713163064833, + "grad_norm": 0.5989857912063599, + "learning_rate": 1.77435787035389e-06, + "loss": 0.5134, + "step": 12128 + }, + { + "epoch": 1.787278978388998, + "grad_norm": 0.5752421021461487, + "learning_rate": 1.7739868721646938e-06, + "loss": 0.5243, + "step": 12129 + }, + { + "epoch": 1.787426326129666, + "grad_norm": 0.588973879814148, + "learning_rate": 1.7736158914357732e-06, + "loss": 0.5214, + "step": 12130 + }, + { + "epoch": 1.7875736738703338, + "grad_norm": 0.5956103801727295, + "learning_rate": 1.7732449281760494e-06, + "loss": 0.5276, + "step": 12131 + }, + { + "epoch": 1.787721021611002, + "grad_norm": 0.6334179043769836, + "learning_rate": 1.7728739823944447e-06, + "loss": 0.5413, + "step": 12132 + }, + { + "epoch": 1.78786836935167, + "grad_norm": 0.5963438153266907, + "learning_rate": 1.7725030540998792e-06, + "loss": 0.5322, + "step": 12133 + }, + { + "epoch": 1.7880157170923379, + "grad_norm": 0.5901161432266235, + "learning_rate": 1.7721321433012741e-06, + "loss": 0.5149, + "step": 12134 + }, + { + "epoch": 1.788163064833006, + "grad_norm": 0.6119266152381897, + "learning_rate": 1.7717612500075498e-06, + "loss": 0.5483, + "step": 12135 + }, + { + "epoch": 1.788310412573674, + "grad_norm": 0.6438882350921631, + "learning_rate": 1.7713903742276256e-06, + "loss": 0.5463, + "step": 12136 + }, + { + "epoch": 1.788457760314342, + "grad_norm": 0.5846942663192749, + "learning_rate": 1.7710195159704213e-06, + "loss": 0.5179, + "step": 12137 + }, + { + "epoch": 1.7886051080550098, + "grad_norm": 0.6229512095451355, + "learning_rate": 1.770648675244856e-06, + "loss": 0.5313, + "step": 12138 + }, + { + "epoch": 1.7887524557956778, + "grad_norm": 0.6110730171203613, + "learning_rate": 1.7702778520598475e-06, + "loss": 0.5009, + "step": 12139 + }, + { + "epoch": 1.7888998035363457, + "grad_norm": 0.6311581134796143, + "learning_rate": 1.7699070464243145e-06, + "loss": 0.5664, + "step": 12140 + }, + { + "epoch": 1.7890471512770136, + "grad_norm": 0.6198157072067261, + "learning_rate": 1.7695362583471749e-06, + "loss": 0.5245, + "step": 12141 + }, + { + "epoch": 1.7891944990176816, + "grad_norm": 0.6079050302505493, + "learning_rate": 1.7691654878373454e-06, + "loss": 0.5302, + "step": 12142 + }, + { + "epoch": 1.7893418467583497, + "grad_norm": 0.6097778081893921, + "learning_rate": 1.7687947349037431e-06, + "loss": 0.4888, + "step": 12143 + }, + { + "epoch": 1.7894891944990177, + "grad_norm": 0.6073293685913086, + "learning_rate": 1.768423999555285e-06, + "loss": 0.5208, + "step": 12144 + }, + { + "epoch": 1.7896365422396858, + "grad_norm": 0.6134012937545776, + "learning_rate": 1.768053281800886e-06, + "loss": 0.5335, + "step": 12145 + }, + { + "epoch": 1.7897838899803538, + "grad_norm": 0.6372425556182861, + "learning_rate": 1.7676825816494626e-06, + "loss": 0.5128, + "step": 12146 + }, + { + "epoch": 1.7899312377210217, + "grad_norm": 0.5851307511329651, + "learning_rate": 1.7673118991099293e-06, + "loss": 0.5186, + "step": 12147 + }, + { + "epoch": 1.7900785854616896, + "grad_norm": 0.6038761138916016, + "learning_rate": 1.7669412341912012e-06, + "loss": 0.5367, + "step": 12148 + }, + { + "epoch": 1.7902259332023576, + "grad_norm": 0.6161606311798096, + "learning_rate": 1.7665705869021926e-06, + "loss": 0.5386, + "step": 12149 + }, + { + "epoch": 1.7903732809430255, + "grad_norm": 0.6024687886238098, + "learning_rate": 1.766199957251818e-06, + "loss": 0.5339, + "step": 12150 + }, + { + "epoch": 1.7905206286836934, + "grad_norm": 0.5994341373443604, + "learning_rate": 1.7658293452489895e-06, + "loss": 0.5559, + "step": 12151 + }, + { + "epoch": 1.7906679764243614, + "grad_norm": 0.6217582821846008, + "learning_rate": 1.7654587509026211e-06, + "loss": 0.5083, + "step": 12152 + }, + { + "epoch": 1.7908153241650293, + "grad_norm": 0.5906957983970642, + "learning_rate": 1.7650881742216252e-06, + "loss": 0.537, + "step": 12153 + }, + { + "epoch": 1.7909626719056975, + "grad_norm": 0.6190986037254333, + "learning_rate": 1.7647176152149141e-06, + "loss": 0.5054, + "step": 12154 + }, + { + "epoch": 1.7911100196463654, + "grad_norm": 0.5979694128036499, + "learning_rate": 1.7643470738913999e-06, + "loss": 0.5284, + "step": 12155 + }, + { + "epoch": 1.7912573673870336, + "grad_norm": 0.58730149269104, + "learning_rate": 1.7639765502599934e-06, + "loss": 0.5236, + "step": 12156 + }, + { + "epoch": 1.7914047151277015, + "grad_norm": 0.6013782024383545, + "learning_rate": 1.7636060443296056e-06, + "loss": 0.5099, + "step": 12157 + }, + { + "epoch": 1.7915520628683694, + "grad_norm": 0.5886592268943787, + "learning_rate": 1.763235556109147e-06, + "loss": 0.5391, + "step": 12158 + }, + { + "epoch": 1.7916994106090374, + "grad_norm": 0.5878515839576721, + "learning_rate": 1.7628650856075274e-06, + "loss": 0.507, + "step": 12159 + }, + { + "epoch": 1.7918467583497053, + "grad_norm": 0.5838161706924438, + "learning_rate": 1.7624946328336567e-06, + "loss": 0.4968, + "step": 12160 + }, + { + "epoch": 1.7919941060903732, + "grad_norm": 0.601475179195404, + "learning_rate": 1.7621241977964444e-06, + "loss": 0.527, + "step": 12161 + }, + { + "epoch": 1.7921414538310412, + "grad_norm": 0.6003263592720032, + "learning_rate": 1.761753780504799e-06, + "loss": 0.5372, + "step": 12162 + }, + { + "epoch": 1.7922888015717091, + "grad_norm": 0.5994994640350342, + "learning_rate": 1.7613833809676278e-06, + "loss": 0.5312, + "step": 12163 + }, + { + "epoch": 1.792436149312377, + "grad_norm": 0.648698091506958, + "learning_rate": 1.7610129991938414e-06, + "loss": 0.522, + "step": 12164 + }, + { + "epoch": 1.7925834970530452, + "grad_norm": 0.6009228825569153, + "learning_rate": 1.760642635192345e-06, + "loss": 0.4918, + "step": 12165 + }, + { + "epoch": 1.7927308447937131, + "grad_norm": 0.6219028234481812, + "learning_rate": 1.760272288972047e-06, + "loss": 0.5057, + "step": 12166 + }, + { + "epoch": 1.7928781925343813, + "grad_norm": 0.6114358305931091, + "learning_rate": 1.7599019605418533e-06, + "loss": 0.5167, + "step": 12167 + }, + { + "epoch": 1.7930255402750492, + "grad_norm": 0.6125554442405701, + "learning_rate": 1.75953164991067e-06, + "loss": 0.5228, + "step": 12168 + }, + { + "epoch": 1.7931728880157172, + "grad_norm": 0.6166630983352661, + "learning_rate": 1.7591613570874038e-06, + "loss": 0.5133, + "step": 12169 + }, + { + "epoch": 1.793320235756385, + "grad_norm": 0.5897046327590942, + "learning_rate": 1.7587910820809592e-06, + "loss": 0.5007, + "step": 12170 + }, + { + "epoch": 1.793467583497053, + "grad_norm": 0.6652669906616211, + "learning_rate": 1.7584208249002416e-06, + "loss": 0.5159, + "step": 12171 + }, + { + "epoch": 1.793614931237721, + "grad_norm": 0.5679954290390015, + "learning_rate": 1.7580505855541552e-06, + "loss": 0.5189, + "step": 12172 + }, + { + "epoch": 1.793762278978389, + "grad_norm": 0.6442963480949402, + "learning_rate": 1.7576803640516047e-06, + "loss": 0.5151, + "step": 12173 + }, + { + "epoch": 1.7939096267190568, + "grad_norm": 0.5737887024879456, + "learning_rate": 1.7573101604014927e-06, + "loss": 0.5014, + "step": 12174 + }, + { + "epoch": 1.7940569744597248, + "grad_norm": 0.6156220436096191, + "learning_rate": 1.7569399746127231e-06, + "loss": 0.5486, + "step": 12175 + }, + { + "epoch": 1.794204322200393, + "grad_norm": 0.6252838969230652, + "learning_rate": 1.7565698066941986e-06, + "loss": 0.5401, + "step": 12176 + }, + { + "epoch": 1.7943516699410609, + "grad_norm": 0.5995187759399414, + "learning_rate": 1.7561996566548217e-06, + "loss": 0.5465, + "step": 12177 + }, + { + "epoch": 1.794499017681729, + "grad_norm": 0.6177511215209961, + "learning_rate": 1.755829524503494e-06, + "loss": 0.5179, + "step": 12178 + }, + { + "epoch": 1.794646365422397, + "grad_norm": 0.604814887046814, + "learning_rate": 1.7554594102491172e-06, + "loss": 0.5624, + "step": 12179 + }, + { + "epoch": 1.794793713163065, + "grad_norm": 0.588760256767273, + "learning_rate": 1.7550893139005926e-06, + "loss": 0.507, + "step": 12180 + }, + { + "epoch": 1.7949410609037328, + "grad_norm": 0.635957658290863, + "learning_rate": 1.7547192354668203e-06, + "loss": 0.5284, + "step": 12181 + }, + { + "epoch": 1.7950884086444008, + "grad_norm": 0.6279348134994507, + "learning_rate": 1.754349174956701e-06, + "loss": 0.5361, + "step": 12182 + }, + { + "epoch": 1.7952357563850687, + "grad_norm": 0.6274941563606262, + "learning_rate": 1.7539791323791342e-06, + "loss": 0.5109, + "step": 12183 + }, + { + "epoch": 1.7953831041257367, + "grad_norm": 0.6080780029296875, + "learning_rate": 1.7536091077430196e-06, + "loss": 0.5032, + "step": 12184 + }, + { + "epoch": 1.7955304518664046, + "grad_norm": 0.614609956741333, + "learning_rate": 1.7532391010572555e-06, + "loss": 0.5517, + "step": 12185 + }, + { + "epoch": 1.7956777996070727, + "grad_norm": 0.5911155343055725, + "learning_rate": 1.752869112330741e-06, + "loss": 0.4663, + "step": 12186 + }, + { + "epoch": 1.7958251473477407, + "grad_norm": 0.6345371603965759, + "learning_rate": 1.7524991415723737e-06, + "loss": 0.524, + "step": 12187 + }, + { + "epoch": 1.7959724950884086, + "grad_norm": 0.6194059252738953, + "learning_rate": 1.7521291887910518e-06, + "loss": 0.5037, + "step": 12188 + }, + { + "epoch": 1.7961198428290768, + "grad_norm": 0.6328821182250977, + "learning_rate": 1.7517592539956718e-06, + "loss": 0.5337, + "step": 12189 + }, + { + "epoch": 1.7962671905697447, + "grad_norm": 0.6555737257003784, + "learning_rate": 1.7513893371951312e-06, + "loss": 0.4998, + "step": 12190 + }, + { + "epoch": 1.7964145383104126, + "grad_norm": 0.6290463209152222, + "learning_rate": 1.751019438398326e-06, + "loss": 0.4955, + "step": 12191 + }, + { + "epoch": 1.7965618860510806, + "grad_norm": 0.6361489295959473, + "learning_rate": 1.7506495576141513e-06, + "loss": 0.5669, + "step": 12192 + }, + { + "epoch": 1.7967092337917485, + "grad_norm": 0.6110626459121704, + "learning_rate": 1.7502796948515038e-06, + "loss": 0.5012, + "step": 12193 + }, + { + "epoch": 1.7968565815324165, + "grad_norm": 0.6006029844284058, + "learning_rate": 1.749909850119278e-06, + "loss": 0.5232, + "step": 12194 + }, + { + "epoch": 1.7970039292730844, + "grad_norm": 0.6144216656684875, + "learning_rate": 1.7495400234263682e-06, + "loss": 0.4852, + "step": 12195 + }, + { + "epoch": 1.7971512770137523, + "grad_norm": 0.5869200825691223, + "learning_rate": 1.7491702147816692e-06, + "loss": 0.5064, + "step": 12196 + }, + { + "epoch": 1.7972986247544205, + "grad_norm": 0.6154882907867432, + "learning_rate": 1.7488004241940742e-06, + "loss": 0.5342, + "step": 12197 + }, + { + "epoch": 1.7974459724950884, + "grad_norm": 0.6201682686805725, + "learning_rate": 1.7484306516724766e-06, + "loss": 0.5177, + "step": 12198 + }, + { + "epoch": 1.7975933202357564, + "grad_norm": 0.6189724802970886, + "learning_rate": 1.7480608972257696e-06, + "loss": 0.5304, + "step": 12199 + }, + { + "epoch": 1.7977406679764245, + "grad_norm": 0.6148282885551453, + "learning_rate": 1.7476911608628444e-06, + "loss": 0.5235, + "step": 12200 + }, + { + "epoch": 1.7978880157170924, + "grad_norm": 0.6277077794075012, + "learning_rate": 1.7473214425925947e-06, + "loss": 0.5093, + "step": 12201 + }, + { + "epoch": 1.7980353634577604, + "grad_norm": 0.6780112981796265, + "learning_rate": 1.7469517424239112e-06, + "loss": 0.5406, + "step": 12202 + }, + { + "epoch": 1.7981827111984283, + "grad_norm": 0.6243786811828613, + "learning_rate": 1.7465820603656852e-06, + "loss": 0.5354, + "step": 12203 + }, + { + "epoch": 1.7983300589390963, + "grad_norm": 0.556193470954895, + "learning_rate": 1.7462123964268072e-06, + "loss": 0.5405, + "step": 12204 + }, + { + "epoch": 1.7984774066797642, + "grad_norm": 0.6307245492935181, + "learning_rate": 1.7458427506161673e-06, + "loss": 0.5355, + "step": 12205 + }, + { + "epoch": 1.7986247544204321, + "grad_norm": 0.5775851011276245, + "learning_rate": 1.7454731229426557e-06, + "loss": 0.5435, + "step": 12206 + }, + { + "epoch": 1.7987721021611, + "grad_norm": 0.5627174973487854, + "learning_rate": 1.7451035134151617e-06, + "loss": 0.5099, + "step": 12207 + }, + { + "epoch": 1.7989194499017682, + "grad_norm": 0.5971312522888184, + "learning_rate": 1.744733922042574e-06, + "loss": 0.5335, + "step": 12208 + }, + { + "epoch": 1.7990667976424362, + "grad_norm": 0.6055852770805359, + "learning_rate": 1.7443643488337814e-06, + "loss": 0.5187, + "step": 12209 + }, + { + "epoch": 1.799214145383104, + "grad_norm": 0.6283954381942749, + "learning_rate": 1.743994793797672e-06, + "loss": 0.526, + "step": 12210 + }, + { + "epoch": 1.7993614931237722, + "grad_norm": 0.6457358002662659, + "learning_rate": 1.7436252569431327e-06, + "loss": 0.5236, + "step": 12211 + }, + { + "epoch": 1.7995088408644402, + "grad_norm": 0.6118271350860596, + "learning_rate": 1.7432557382790517e-06, + "loss": 0.5163, + "step": 12212 + }, + { + "epoch": 1.7996561886051081, + "grad_norm": 0.5762506723403931, + "learning_rate": 1.7428862378143146e-06, + "loss": 0.5288, + "step": 12213 + }, + { + "epoch": 1.799803536345776, + "grad_norm": 0.6557776927947998, + "learning_rate": 1.742516755557809e-06, + "loss": 0.5271, + "step": 12214 + }, + { + "epoch": 1.799950884086444, + "grad_norm": 0.5923138856887817, + "learning_rate": 1.74214729151842e-06, + "loss": 0.5489, + "step": 12215 + }, + { + "epoch": 1.800098231827112, + "grad_norm": 0.627033531665802, + "learning_rate": 1.741777845705033e-06, + "loss": 0.5159, + "step": 12216 + }, + { + "epoch": 1.8002455795677799, + "grad_norm": 0.5736974477767944, + "learning_rate": 1.7414084181265334e-06, + "loss": 0.504, + "step": 12217 + }, + { + "epoch": 1.8003929273084478, + "grad_norm": 0.5909762382507324, + "learning_rate": 1.7410390087918055e-06, + "loss": 0.5177, + "step": 12218 + }, + { + "epoch": 1.800540275049116, + "grad_norm": 0.633838951587677, + "learning_rate": 1.7406696177097333e-06, + "loss": 0.5049, + "step": 12219 + }, + { + "epoch": 1.800687622789784, + "grad_norm": 0.6174159049987793, + "learning_rate": 1.740300244889201e-06, + "loss": 0.5443, + "step": 12220 + }, + { + "epoch": 1.8008349705304518, + "grad_norm": 0.5845352411270142, + "learning_rate": 1.739930890339091e-06, + "loss": 0.4866, + "step": 12221 + }, + { + "epoch": 1.80098231827112, + "grad_norm": 0.6038165092468262, + "learning_rate": 1.7395615540682873e-06, + "loss": 0.4737, + "step": 12222 + }, + { + "epoch": 1.801129666011788, + "grad_norm": 0.5641252994537354, + "learning_rate": 1.739192236085671e-06, + "loss": 0.4964, + "step": 12223 + }, + { + "epoch": 1.8012770137524559, + "grad_norm": 0.591046154499054, + "learning_rate": 1.7388229364001248e-06, + "loss": 0.5253, + "step": 12224 + }, + { + "epoch": 1.8014243614931238, + "grad_norm": 0.5790848135948181, + "learning_rate": 1.73845365502053e-06, + "loss": 0.5342, + "step": 12225 + }, + { + "epoch": 1.8015717092337917, + "grad_norm": 0.6235995888710022, + "learning_rate": 1.7380843919557672e-06, + "loss": 0.5105, + "step": 12226 + }, + { + "epoch": 1.8017190569744597, + "grad_norm": 0.5719767212867737, + "learning_rate": 1.7377151472147177e-06, + "loss": 0.5283, + "step": 12227 + }, + { + "epoch": 1.8018664047151276, + "grad_norm": 0.6302123665809631, + "learning_rate": 1.7373459208062614e-06, + "loss": 0.5316, + "step": 12228 + }, + { + "epoch": 1.8020137524557955, + "grad_norm": 0.5962929129600525, + "learning_rate": 1.7369767127392777e-06, + "loss": 0.5292, + "step": 12229 + }, + { + "epoch": 1.8021611001964637, + "grad_norm": 0.6142902970314026, + "learning_rate": 1.7366075230226464e-06, + "loss": 0.5246, + "step": 12230 + }, + { + "epoch": 1.8023084479371316, + "grad_norm": 0.5890962481498718, + "learning_rate": 1.7362383516652464e-06, + "loss": 0.5581, + "step": 12231 + }, + { + "epoch": 1.8024557956777996, + "grad_norm": 0.6068778038024902, + "learning_rate": 1.7358691986759554e-06, + "loss": 0.5391, + "step": 12232 + }, + { + "epoch": 1.8026031434184677, + "grad_norm": 0.5965456366539001, + "learning_rate": 1.7355000640636522e-06, + "loss": 0.4991, + "step": 12233 + }, + { + "epoch": 1.8027504911591357, + "grad_norm": 0.6045018434524536, + "learning_rate": 1.7351309478372136e-06, + "loss": 0.49, + "step": 12234 + }, + { + "epoch": 1.8028978388998036, + "grad_norm": 0.6217299103736877, + "learning_rate": 1.734761850005517e-06, + "loss": 0.5393, + "step": 12235 + }, + { + "epoch": 1.8030451866404715, + "grad_norm": 0.6164529323577881, + "learning_rate": 1.7343927705774388e-06, + "loss": 0.5026, + "step": 12236 + }, + { + "epoch": 1.8031925343811395, + "grad_norm": 0.6149834990501404, + "learning_rate": 1.7340237095618545e-06, + "loss": 0.5338, + "step": 12237 + }, + { + "epoch": 1.8033398821218074, + "grad_norm": 0.6288071274757385, + "learning_rate": 1.7336546669676419e-06, + "loss": 0.4985, + "step": 12238 + }, + { + "epoch": 1.8034872298624753, + "grad_norm": 0.6385444402694702, + "learning_rate": 1.7332856428036748e-06, + "loss": 0.5216, + "step": 12239 + }, + { + "epoch": 1.8036345776031433, + "grad_norm": 0.5898364782333374, + "learning_rate": 1.7329166370788292e-06, + "loss": 0.5397, + "step": 12240 + }, + { + "epoch": 1.8037819253438114, + "grad_norm": 0.5945817828178406, + "learning_rate": 1.7325476498019777e-06, + "loss": 0.51, + "step": 12241 + }, + { + "epoch": 1.8039292730844794, + "grad_norm": 0.5577456951141357, + "learning_rate": 1.7321786809819956e-06, + "loss": 0.4787, + "step": 12242 + }, + { + "epoch": 1.8040766208251473, + "grad_norm": 0.6203539371490479, + "learning_rate": 1.7318097306277565e-06, + "loss": 0.5155, + "step": 12243 + }, + { + "epoch": 1.8042239685658155, + "grad_norm": 0.6055380702018738, + "learning_rate": 1.7314407987481324e-06, + "loss": 0.5247, + "step": 12244 + }, + { + "epoch": 1.8043713163064834, + "grad_norm": 0.5729547739028931, + "learning_rate": 1.731071885351997e-06, + "loss": 0.5177, + "step": 12245 + }, + { + "epoch": 1.8045186640471513, + "grad_norm": 0.6197087168693542, + "learning_rate": 1.730702990448222e-06, + "loss": 0.5514, + "step": 12246 + }, + { + "epoch": 1.8046660117878193, + "grad_norm": 0.60401451587677, + "learning_rate": 1.7303341140456793e-06, + "loss": 0.5372, + "step": 12247 + }, + { + "epoch": 1.8048133595284872, + "grad_norm": 0.5817292332649231, + "learning_rate": 1.7299652561532398e-06, + "loss": 0.5428, + "step": 12248 + }, + { + "epoch": 1.8049607072691551, + "grad_norm": 0.5739916563034058, + "learning_rate": 1.7295964167797752e-06, + "loss": 0.5175, + "step": 12249 + }, + { + "epoch": 1.805108055009823, + "grad_norm": 0.5906115770339966, + "learning_rate": 1.7292275959341553e-06, + "loss": 0.5161, + "step": 12250 + }, + { + "epoch": 1.805255402750491, + "grad_norm": 0.6421008110046387, + "learning_rate": 1.7288587936252505e-06, + "loss": 0.5183, + "step": 12251 + }, + { + "epoch": 1.8054027504911592, + "grad_norm": 0.6168648600578308, + "learning_rate": 1.7284900098619294e-06, + "loss": 0.5042, + "step": 12252 + }, + { + "epoch": 1.805550098231827, + "grad_norm": 0.6250393986701965, + "learning_rate": 1.728121244653062e-06, + "loss": 0.5514, + "step": 12253 + }, + { + "epoch": 1.805697445972495, + "grad_norm": 0.6311091780662537, + "learning_rate": 1.7277524980075166e-06, + "loss": 0.5283, + "step": 12254 + }, + { + "epoch": 1.8058447937131632, + "grad_norm": 0.620114803314209, + "learning_rate": 1.7273837699341616e-06, + "loss": 0.514, + "step": 12255 + }, + { + "epoch": 1.8059921414538311, + "grad_norm": 0.583740770816803, + "learning_rate": 1.7270150604418644e-06, + "loss": 0.5023, + "step": 12256 + }, + { + "epoch": 1.806139489194499, + "grad_norm": 0.5798120498657227, + "learning_rate": 1.7266463695394925e-06, + "loss": 0.5235, + "step": 12257 + }, + { + "epoch": 1.806286836935167, + "grad_norm": 0.619547963142395, + "learning_rate": 1.7262776972359124e-06, + "loss": 0.5601, + "step": 12258 + }, + { + "epoch": 1.806434184675835, + "grad_norm": 0.5854601263999939, + "learning_rate": 1.725909043539991e-06, + "loss": 0.5178, + "step": 12259 + }, + { + "epoch": 1.8065815324165029, + "grad_norm": 0.6128246784210205, + "learning_rate": 1.7255404084605942e-06, + "loss": 0.5224, + "step": 12260 + }, + { + "epoch": 1.8067288801571708, + "grad_norm": 0.6358801126480103, + "learning_rate": 1.7251717920065876e-06, + "loss": 0.524, + "step": 12261 + }, + { + "epoch": 1.8068762278978387, + "grad_norm": 0.6562744975090027, + "learning_rate": 1.7248031941868355e-06, + "loss": 0.5255, + "step": 12262 + }, + { + "epoch": 1.807023575638507, + "grad_norm": 0.5971754193305969, + "learning_rate": 1.7244346150102028e-06, + "loss": 0.5181, + "step": 12263 + }, + { + "epoch": 1.8071709233791748, + "grad_norm": 0.6112021207809448, + "learning_rate": 1.7240660544855543e-06, + "loss": 0.518, + "step": 12264 + }, + { + "epoch": 1.8073182711198428, + "grad_norm": 0.6322716474533081, + "learning_rate": 1.723697512621753e-06, + "loss": 0.5169, + "step": 12265 + }, + { + "epoch": 1.807465618860511, + "grad_norm": 0.5997450947761536, + "learning_rate": 1.7233289894276626e-06, + "loss": 0.531, + "step": 12266 + }, + { + "epoch": 1.8076129666011789, + "grad_norm": 0.6129190921783447, + "learning_rate": 1.7229604849121457e-06, + "loss": 0.5435, + "step": 12267 + }, + { + "epoch": 1.8077603143418468, + "grad_norm": 0.6292527318000793, + "learning_rate": 1.7225919990840645e-06, + "loss": 0.5114, + "step": 12268 + }, + { + "epoch": 1.8079076620825147, + "grad_norm": 0.5857449769973755, + "learning_rate": 1.7222235319522812e-06, + "loss": 0.56, + "step": 12269 + }, + { + "epoch": 1.8080550098231827, + "grad_norm": 0.6596090197563171, + "learning_rate": 1.721855083525657e-06, + "loss": 0.5363, + "step": 12270 + }, + { + "epoch": 1.8082023575638506, + "grad_norm": 0.6018069386482239, + "learning_rate": 1.7214866538130537e-06, + "loss": 0.5298, + "step": 12271 + }, + { + "epoch": 1.8083497053045186, + "grad_norm": 0.5930723547935486, + "learning_rate": 1.7211182428233309e-06, + "loss": 0.5116, + "step": 12272 + }, + { + "epoch": 1.8084970530451865, + "grad_norm": 0.615628719329834, + "learning_rate": 1.7207498505653488e-06, + "loss": 0.5205, + "step": 12273 + }, + { + "epoch": 1.8086444007858546, + "grad_norm": 0.5967796444892883, + "learning_rate": 1.7203814770479665e-06, + "loss": 0.5278, + "step": 12274 + }, + { + "epoch": 1.8087917485265226, + "grad_norm": 0.590684711933136, + "learning_rate": 1.7200131222800448e-06, + "loss": 0.5248, + "step": 12275 + }, + { + "epoch": 1.8089390962671905, + "grad_norm": 0.6179744005203247, + "learning_rate": 1.7196447862704418e-06, + "loss": 0.5427, + "step": 12276 + }, + { + "epoch": 1.8090864440078587, + "grad_norm": 0.5973530411720276, + "learning_rate": 1.7192764690280157e-06, + "loss": 0.5301, + "step": 12277 + }, + { + "epoch": 1.8092337917485266, + "grad_norm": 0.6259773373603821, + "learning_rate": 1.7189081705616243e-06, + "loss": 0.5233, + "step": 12278 + }, + { + "epoch": 1.8093811394891945, + "grad_norm": 0.6148660778999329, + "learning_rate": 1.7185398908801248e-06, + "loss": 0.5537, + "step": 12279 + }, + { + "epoch": 1.8095284872298625, + "grad_norm": 0.6658810973167419, + "learning_rate": 1.7181716299923751e-06, + "loss": 0.5025, + "step": 12280 + }, + { + "epoch": 1.8096758349705304, + "grad_norm": 0.6039487719535828, + "learning_rate": 1.7178033879072303e-06, + "loss": 0.5284, + "step": 12281 + }, + { + "epoch": 1.8098231827111984, + "grad_norm": 0.6647236347198486, + "learning_rate": 1.7174351646335473e-06, + "loss": 0.5598, + "step": 12282 + }, + { + "epoch": 1.8099705304518663, + "grad_norm": 0.6074221730232239, + "learning_rate": 1.7170669601801817e-06, + "loss": 0.5058, + "step": 12283 + }, + { + "epoch": 1.8101178781925342, + "grad_norm": 0.6203068494796753, + "learning_rate": 1.7166987745559884e-06, + "loss": 0.5333, + "step": 12284 + }, + { + "epoch": 1.8102652259332024, + "grad_norm": 0.6040428876876831, + "learning_rate": 1.7163306077698222e-06, + "loss": 0.5213, + "step": 12285 + }, + { + "epoch": 1.8104125736738703, + "grad_norm": 0.6055794358253479, + "learning_rate": 1.7159624598305375e-06, + "loss": 0.5333, + "step": 12286 + }, + { + "epoch": 1.8105599214145385, + "grad_norm": 0.5877382159233093, + "learning_rate": 1.7155943307469877e-06, + "loss": 0.4736, + "step": 12287 + }, + { + "epoch": 1.8107072691552064, + "grad_norm": 0.6014665961265564, + "learning_rate": 1.7152262205280267e-06, + "loss": 0.5368, + "step": 12288 + }, + { + "epoch": 1.8108546168958743, + "grad_norm": 0.6356736421585083, + "learning_rate": 1.7148581291825067e-06, + "loss": 0.5561, + "step": 12289 + }, + { + "epoch": 1.8110019646365423, + "grad_norm": 0.5917418599128723, + "learning_rate": 1.7144900567192807e-06, + "loss": 0.5329, + "step": 12290 + }, + { + "epoch": 1.8111493123772102, + "grad_norm": 0.634894609451294, + "learning_rate": 1.7141220031472e-06, + "loss": 0.5095, + "step": 12291 + }, + { + "epoch": 1.8112966601178782, + "grad_norm": 0.624688446521759, + "learning_rate": 1.7137539684751168e-06, + "loss": 0.5148, + "step": 12292 + }, + { + "epoch": 1.811444007858546, + "grad_norm": 0.5974530577659607, + "learning_rate": 1.7133859527118817e-06, + "loss": 0.5102, + "step": 12293 + }, + { + "epoch": 1.811591355599214, + "grad_norm": 0.6002781987190247, + "learning_rate": 1.7130179558663457e-06, + "loss": 0.5194, + "step": 12294 + }, + { + "epoch": 1.811738703339882, + "grad_norm": 0.6165129542350769, + "learning_rate": 1.7126499779473581e-06, + "loss": 0.51, + "step": 12295 + }, + { + "epoch": 1.8118860510805501, + "grad_norm": 0.611430287361145, + "learning_rate": 1.71228201896377e-06, + "loss": 0.5495, + "step": 12296 + }, + { + "epoch": 1.812033398821218, + "grad_norm": 0.6400500535964966, + "learning_rate": 1.7119140789244298e-06, + "loss": 0.4959, + "step": 12297 + }, + { + "epoch": 1.8121807465618862, + "grad_norm": 0.6547544598579407, + "learning_rate": 1.711546157838186e-06, + "loss": 0.5238, + "step": 12298 + }, + { + "epoch": 1.8123280943025541, + "grad_norm": 0.6088714003562927, + "learning_rate": 1.7111782557138876e-06, + "loss": 0.4973, + "step": 12299 + }, + { + "epoch": 1.812475442043222, + "grad_norm": 0.6123571395874023, + "learning_rate": 1.7108103725603819e-06, + "loss": 0.5388, + "step": 12300 + }, + { + "epoch": 1.81262278978389, + "grad_norm": 0.5883840918540955, + "learning_rate": 1.7104425083865168e-06, + "loss": 0.5244, + "step": 12301 + }, + { + "epoch": 1.812770137524558, + "grad_norm": 0.6118053197860718, + "learning_rate": 1.7100746632011388e-06, + "loss": 0.5333, + "step": 12302 + }, + { + "epoch": 1.812917485265226, + "grad_norm": 0.5801438689231873, + "learning_rate": 1.7097068370130947e-06, + "loss": 0.5665, + "step": 12303 + }, + { + "epoch": 1.8130648330058938, + "grad_norm": 0.5976722836494446, + "learning_rate": 1.709339029831231e-06, + "loss": 0.5135, + "step": 12304 + }, + { + "epoch": 1.8132121807465618, + "grad_norm": 0.5862783193588257, + "learning_rate": 1.7089712416643921e-06, + "loss": 0.5189, + "step": 12305 + }, + { + "epoch": 1.8133595284872297, + "grad_norm": 0.615498423576355, + "learning_rate": 1.7086034725214245e-06, + "loss": 0.5285, + "step": 12306 + }, + { + "epoch": 1.8135068762278979, + "grad_norm": 0.6041061282157898, + "learning_rate": 1.7082357224111718e-06, + "loss": 0.5424, + "step": 12307 + }, + { + "epoch": 1.8136542239685658, + "grad_norm": 0.5662017464637756, + "learning_rate": 1.7078679913424787e-06, + "loss": 0.5303, + "step": 12308 + }, + { + "epoch": 1.813801571709234, + "grad_norm": 0.5747575163841248, + "learning_rate": 1.7075002793241887e-06, + "loss": 0.5506, + "step": 12309 + }, + { + "epoch": 1.8139489194499019, + "grad_norm": 0.6591145396232605, + "learning_rate": 1.7071325863651455e-06, + "loss": 0.5128, + "step": 12310 + }, + { + "epoch": 1.8140962671905698, + "grad_norm": 0.601377010345459, + "learning_rate": 1.7067649124741912e-06, + "loss": 0.5276, + "step": 12311 + }, + { + "epoch": 1.8142436149312378, + "grad_norm": 0.6085675358772278, + "learning_rate": 1.706397257660169e-06, + "loss": 0.5047, + "step": 12312 + }, + { + "epoch": 1.8143909626719057, + "grad_norm": 0.6065175533294678, + "learning_rate": 1.7060296219319205e-06, + "loss": 0.5267, + "step": 12313 + }, + { + "epoch": 1.8145383104125736, + "grad_norm": 0.5913798809051514, + "learning_rate": 1.7056620052982875e-06, + "loss": 0.5459, + "step": 12314 + }, + { + "epoch": 1.8146856581532416, + "grad_norm": 0.5971483588218689, + "learning_rate": 1.7052944077681106e-06, + "loss": 0.4722, + "step": 12315 + }, + { + "epoch": 1.8148330058939095, + "grad_norm": 0.5979045033454895, + "learning_rate": 1.7049268293502305e-06, + "loss": 0.5277, + "step": 12316 + }, + { + "epoch": 1.8149803536345774, + "grad_norm": 0.6015703678131104, + "learning_rate": 1.7045592700534872e-06, + "loss": 0.5476, + "step": 12317 + }, + { + "epoch": 1.8151277013752456, + "grad_norm": 0.58671635389328, + "learning_rate": 1.7041917298867203e-06, + "loss": 0.4859, + "step": 12318 + }, + { + "epoch": 1.8152750491159135, + "grad_norm": 0.6427555680274963, + "learning_rate": 1.7038242088587693e-06, + "loss": 0.5004, + "step": 12319 + }, + { + "epoch": 1.8154223968565817, + "grad_norm": 0.5976908802986145, + "learning_rate": 1.7034567069784725e-06, + "loss": 0.5287, + "step": 12320 + }, + { + "epoch": 1.8155697445972496, + "grad_norm": 0.5854730010032654, + "learning_rate": 1.7030892242546682e-06, + "loss": 0.551, + "step": 12321 + }, + { + "epoch": 1.8157170923379176, + "grad_norm": 0.5769508481025696, + "learning_rate": 1.7027217606961941e-06, + "loss": 0.5084, + "step": 12322 + }, + { + "epoch": 1.8158644400785855, + "grad_norm": 0.6210262179374695, + "learning_rate": 1.7023543163118878e-06, + "loss": 0.524, + "step": 12323 + }, + { + "epoch": 1.8160117878192534, + "grad_norm": 0.6037257313728333, + "learning_rate": 1.7019868911105862e-06, + "loss": 0.5344, + "step": 12324 + }, + { + "epoch": 1.8161591355599214, + "grad_norm": 0.6185755133628845, + "learning_rate": 1.7016194851011258e-06, + "loss": 0.5385, + "step": 12325 + }, + { + "epoch": 1.8163064833005893, + "grad_norm": 0.6425879597663879, + "learning_rate": 1.701252098292342e-06, + "loss": 0.5261, + "step": 12326 + }, + { + "epoch": 1.8164538310412572, + "grad_norm": 0.6098206043243408, + "learning_rate": 1.7008847306930707e-06, + "loss": 0.5362, + "step": 12327 + }, + { + "epoch": 1.8166011787819254, + "grad_norm": 0.5997880101203918, + "learning_rate": 1.700517382312147e-06, + "loss": 0.4973, + "step": 12328 + }, + { + "epoch": 1.8167485265225933, + "grad_norm": 0.6029638648033142, + "learning_rate": 1.7001500531584047e-06, + "loss": 0.4936, + "step": 12329 + }, + { + "epoch": 1.8168958742632613, + "grad_norm": 0.6445863246917725, + "learning_rate": 1.699782743240679e-06, + "loss": 0.5043, + "step": 12330 + }, + { + "epoch": 1.8170432220039294, + "grad_norm": 0.5964537262916565, + "learning_rate": 1.699415452567803e-06, + "loss": 0.5579, + "step": 12331 + }, + { + "epoch": 1.8171905697445974, + "grad_norm": 0.6010265350341797, + "learning_rate": 1.6990481811486097e-06, + "loss": 0.5191, + "step": 12332 + }, + { + "epoch": 1.8173379174852653, + "grad_norm": 0.6046677827835083, + "learning_rate": 1.6986809289919321e-06, + "loss": 0.5109, + "step": 12333 + }, + { + "epoch": 1.8174852652259332, + "grad_norm": 0.599380612373352, + "learning_rate": 1.698313696106602e-06, + "loss": 0.5398, + "step": 12334 + }, + { + "epoch": 1.8176326129666012, + "grad_norm": 0.6113892197608948, + "learning_rate": 1.697946482501452e-06, + "loss": 0.5345, + "step": 12335 + }, + { + "epoch": 1.817779960707269, + "grad_norm": 0.5709198117256165, + "learning_rate": 1.6975792881853123e-06, + "loss": 0.554, + "step": 12336 + }, + { + "epoch": 1.817927308447937, + "grad_norm": 0.5847815871238708, + "learning_rate": 1.6972121131670152e-06, + "loss": 0.5331, + "step": 12337 + }, + { + "epoch": 1.818074656188605, + "grad_norm": 0.595064640045166, + "learning_rate": 1.6968449574553896e-06, + "loss": 0.5227, + "step": 12338 + }, + { + "epoch": 1.8182220039292731, + "grad_norm": 0.610200047492981, + "learning_rate": 1.6964778210592664e-06, + "loss": 0.5334, + "step": 12339 + }, + { + "epoch": 1.818369351669941, + "grad_norm": 0.6338502764701843, + "learning_rate": 1.6961107039874747e-06, + "loss": 0.517, + "step": 12340 + }, + { + "epoch": 1.818516699410609, + "grad_norm": 0.6077284216880798, + "learning_rate": 1.695743606248843e-06, + "loss": 0.5371, + "step": 12341 + }, + { + "epoch": 1.8186640471512772, + "grad_norm": 0.6419398188591003, + "learning_rate": 1.695376527852201e-06, + "loss": 0.5104, + "step": 12342 + }, + { + "epoch": 1.818811394891945, + "grad_norm": 0.5848707556724548, + "learning_rate": 1.6950094688063757e-06, + "loss": 0.5104, + "step": 12343 + }, + { + "epoch": 1.818958742632613, + "grad_norm": 0.6142268180847168, + "learning_rate": 1.6946424291201951e-06, + "loss": 0.5104, + "step": 12344 + }, + { + "epoch": 1.819106090373281, + "grad_norm": 0.6410918235778809, + "learning_rate": 1.6942754088024866e-06, + "loss": 0.5231, + "step": 12345 + }, + { + "epoch": 1.819253438113949, + "grad_norm": 0.5684906840324402, + "learning_rate": 1.6939084078620766e-06, + "loss": 0.5321, + "step": 12346 + }, + { + "epoch": 1.8194007858546168, + "grad_norm": 0.6250415444374084, + "learning_rate": 1.69354142630779e-06, + "loss": 0.513, + "step": 12347 + }, + { + "epoch": 1.8195481335952848, + "grad_norm": 0.6082693934440613, + "learning_rate": 1.693174464148455e-06, + "loss": 0.5249, + "step": 12348 + }, + { + "epoch": 1.8196954813359527, + "grad_norm": 0.5949401259422302, + "learning_rate": 1.6928075213928958e-06, + "loss": 0.5415, + "step": 12349 + }, + { + "epoch": 1.8198428290766209, + "grad_norm": 0.5933512449264526, + "learning_rate": 1.692440598049937e-06, + "loss": 0.5028, + "step": 12350 + }, + { + "epoch": 1.8199901768172888, + "grad_norm": 0.6712465882301331, + "learning_rate": 1.6920736941284034e-06, + "loss": 0.5359, + "step": 12351 + }, + { + "epoch": 1.8201375245579567, + "grad_norm": 0.6948124170303345, + "learning_rate": 1.6917068096371181e-06, + "loss": 0.527, + "step": 12352 + }, + { + "epoch": 1.820284872298625, + "grad_norm": 0.5798683762550354, + "learning_rate": 1.691339944584905e-06, + "loss": 0.5287, + "step": 12353 + }, + { + "epoch": 1.8204322200392928, + "grad_norm": 0.6085073947906494, + "learning_rate": 1.6909730989805872e-06, + "loss": 0.5599, + "step": 12354 + }, + { + "epoch": 1.8205795677799608, + "grad_norm": 0.6081967949867249, + "learning_rate": 1.6906062728329869e-06, + "loss": 0.5267, + "step": 12355 + }, + { + "epoch": 1.8207269155206287, + "grad_norm": 0.5920896530151367, + "learning_rate": 1.6902394661509262e-06, + "loss": 0.5209, + "step": 12356 + }, + { + "epoch": 1.8208742632612966, + "grad_norm": 0.5995556116104126, + "learning_rate": 1.6898726789432264e-06, + "loss": 0.5312, + "step": 12357 + }, + { + "epoch": 1.8210216110019646, + "grad_norm": 0.6442856788635254, + "learning_rate": 1.6895059112187084e-06, + "loss": 0.4964, + "step": 12358 + }, + { + "epoch": 1.8211689587426325, + "grad_norm": 0.6100735068321228, + "learning_rate": 1.689139162986193e-06, + "loss": 0.5417, + "step": 12359 + }, + { + "epoch": 1.8213163064833005, + "grad_norm": 0.6361826658248901, + "learning_rate": 1.6887724342545008e-06, + "loss": 0.5147, + "step": 12360 + }, + { + "epoch": 1.8214636542239686, + "grad_norm": 0.580018937587738, + "learning_rate": 1.688405725032451e-06, + "loss": 0.472, + "step": 12361 + }, + { + "epoch": 1.8216110019646365, + "grad_norm": 0.58710777759552, + "learning_rate": 1.6880390353288629e-06, + "loss": 0.5076, + "step": 12362 + }, + { + "epoch": 1.8217583497053045, + "grad_norm": 0.597529947757721, + "learning_rate": 1.687672365152555e-06, + "loss": 0.5274, + "step": 12363 + }, + { + "epoch": 1.8219056974459726, + "grad_norm": 0.639458179473877, + "learning_rate": 1.6873057145123456e-06, + "loss": 0.5349, + "step": 12364 + }, + { + "epoch": 1.8220530451866406, + "grad_norm": 0.6047441959381104, + "learning_rate": 1.6869390834170529e-06, + "loss": 0.5221, + "step": 12365 + }, + { + "epoch": 1.8222003929273085, + "grad_norm": 0.6351785659790039, + "learning_rate": 1.686572471875494e-06, + "loss": 0.4968, + "step": 12366 + }, + { + "epoch": 1.8223477406679764, + "grad_norm": 0.5766279101371765, + "learning_rate": 1.686205879896485e-06, + "loss": 0.5597, + "step": 12367 + }, + { + "epoch": 1.8224950884086444, + "grad_norm": 0.6164668798446655, + "learning_rate": 1.6858393074888435e-06, + "loss": 0.5111, + "step": 12368 + }, + { + "epoch": 1.8226424361493123, + "grad_norm": 0.6193920969963074, + "learning_rate": 1.6854727546613843e-06, + "loss": 0.5135, + "step": 12369 + }, + { + "epoch": 1.8227897838899803, + "grad_norm": 0.5928753614425659, + "learning_rate": 1.6851062214229235e-06, + "loss": 0.5345, + "step": 12370 + }, + { + "epoch": 1.8229371316306482, + "grad_norm": 0.6028345227241516, + "learning_rate": 1.6847397077822763e-06, + "loss": 0.535, + "step": 12371 + }, + { + "epoch": 1.8230844793713163, + "grad_norm": 0.5789644718170166, + "learning_rate": 1.6843732137482562e-06, + "loss": 0.5246, + "step": 12372 + }, + { + "epoch": 1.8232318271119843, + "grad_norm": 0.5833607316017151, + "learning_rate": 1.684006739329678e-06, + "loss": 0.5274, + "step": 12373 + }, + { + "epoch": 1.8233791748526522, + "grad_norm": 0.5971733927726746, + "learning_rate": 1.683640284535355e-06, + "loss": 0.5493, + "step": 12374 + }, + { + "epoch": 1.8235265225933204, + "grad_norm": 0.5965708494186401, + "learning_rate": 1.6832738493741003e-06, + "loss": 0.4971, + "step": 12375 + }, + { + "epoch": 1.8236738703339883, + "grad_norm": 0.6531571745872498, + "learning_rate": 1.6829074338547263e-06, + "loss": 0.5694, + "step": 12376 + }, + { + "epoch": 1.8238212180746562, + "grad_norm": 0.5901873111724854, + "learning_rate": 1.6825410379860451e-06, + "loss": 0.5416, + "step": 12377 + }, + { + "epoch": 1.8239685658153242, + "grad_norm": 0.6123639941215515, + "learning_rate": 1.6821746617768688e-06, + "loss": 0.5107, + "step": 12378 + }, + { + "epoch": 1.8241159135559921, + "grad_norm": 0.6058542132377625, + "learning_rate": 1.6818083052360085e-06, + "loss": 0.5475, + "step": 12379 + }, + { + "epoch": 1.82426326129666, + "grad_norm": 0.5817769169807434, + "learning_rate": 1.6814419683722743e-06, + "loss": 0.5498, + "step": 12380 + }, + { + "epoch": 1.824410609037328, + "grad_norm": 0.6914609670639038, + "learning_rate": 1.681075651194477e-06, + "loss": 0.5253, + "step": 12381 + }, + { + "epoch": 1.824557956777996, + "grad_norm": 0.5920212864875793, + "learning_rate": 1.6807093537114264e-06, + "loss": 0.5163, + "step": 12382 + }, + { + "epoch": 1.824705304518664, + "grad_norm": 0.5729153156280518, + "learning_rate": 1.6803430759319312e-06, + "loss": 0.5547, + "step": 12383 + }, + { + "epoch": 1.824852652259332, + "grad_norm": 0.7225809693336487, + "learning_rate": 1.6799768178648002e-06, + "loss": 0.5229, + "step": 12384 + }, + { + "epoch": 1.825, + "grad_norm": 0.5935959815979004, + "learning_rate": 1.6796105795188428e-06, + "loss": 0.5494, + "step": 12385 + }, + { + "epoch": 1.8251473477406681, + "grad_norm": 0.6433302760124207, + "learning_rate": 1.679244360902866e-06, + "loss": 0.5387, + "step": 12386 + }, + { + "epoch": 1.825294695481336, + "grad_norm": 0.6339324116706848, + "learning_rate": 1.6788781620256773e-06, + "loss": 0.5292, + "step": 12387 + }, + { + "epoch": 1.825442043222004, + "grad_norm": 0.6064818501472473, + "learning_rate": 1.678511982896084e-06, + "loss": 0.5138, + "step": 12388 + }, + { + "epoch": 1.825589390962672, + "grad_norm": 0.5712849497795105, + "learning_rate": 1.678145823522892e-06, + "loss": 0.5544, + "step": 12389 + }, + { + "epoch": 1.8257367387033399, + "grad_norm": 0.600052535533905, + "learning_rate": 1.6777796839149073e-06, + "loss": 0.5562, + "step": 12390 + }, + { + "epoch": 1.8258840864440078, + "grad_norm": 0.580197811126709, + "learning_rate": 1.6774135640809358e-06, + "loss": 0.4948, + "step": 12391 + }, + { + "epoch": 1.8260314341846757, + "grad_norm": 0.6348087191581726, + "learning_rate": 1.6770474640297821e-06, + "loss": 0.5244, + "step": 12392 + }, + { + "epoch": 1.8261787819253437, + "grad_norm": 0.6550151109695435, + "learning_rate": 1.6766813837702512e-06, + "loss": 0.5215, + "step": 12393 + }, + { + "epoch": 1.8263261296660118, + "grad_norm": 0.6126700639724731, + "learning_rate": 1.6763153233111465e-06, + "loss": 0.5179, + "step": 12394 + }, + { + "epoch": 1.8264734774066798, + "grad_norm": 0.5984671115875244, + "learning_rate": 1.6759492826612722e-06, + "loss": 0.5431, + "step": 12395 + }, + { + "epoch": 1.8266208251473477, + "grad_norm": 0.5686889886856079, + "learning_rate": 1.675583261829431e-06, + "loss": 0.5438, + "step": 12396 + }, + { + "epoch": 1.8267681728880159, + "grad_norm": 0.5995474457740784, + "learning_rate": 1.6752172608244254e-06, + "loss": 0.5241, + "step": 12397 + }, + { + "epoch": 1.8269155206286838, + "grad_norm": 0.6046947836875916, + "learning_rate": 1.6748512796550585e-06, + "loss": 0.5226, + "step": 12398 + }, + { + "epoch": 1.8270628683693517, + "grad_norm": 0.6266152858734131, + "learning_rate": 1.6744853183301308e-06, + "loss": 0.5058, + "step": 12399 + }, + { + "epoch": 1.8272102161100197, + "grad_norm": 0.6052895784378052, + "learning_rate": 1.6741193768584441e-06, + "loss": 0.5239, + "step": 12400 + }, + { + "epoch": 1.8273575638506876, + "grad_norm": 0.6018230319023132, + "learning_rate": 1.6737534552487992e-06, + "loss": 0.5252, + "step": 12401 + }, + { + "epoch": 1.8275049115913555, + "grad_norm": 0.6027414202690125, + "learning_rate": 1.6733875535099962e-06, + "loss": 0.533, + "step": 12402 + }, + { + "epoch": 1.8276522593320235, + "grad_norm": 0.6035842299461365, + "learning_rate": 1.6730216716508352e-06, + "loss": 0.5065, + "step": 12403 + }, + { + "epoch": 1.8277996070726914, + "grad_norm": 0.6054717302322388, + "learning_rate": 1.6726558096801143e-06, + "loss": 0.5423, + "step": 12404 + }, + { + "epoch": 1.8279469548133596, + "grad_norm": 0.6041622161865234, + "learning_rate": 1.6722899676066333e-06, + "loss": 0.5176, + "step": 12405 + }, + { + "epoch": 1.8280943025540275, + "grad_norm": 0.5995705723762512, + "learning_rate": 1.6719241454391907e-06, + "loss": 0.5637, + "step": 12406 + }, + { + "epoch": 1.8282416502946954, + "grad_norm": 0.6086515188217163, + "learning_rate": 1.671558343186584e-06, + "loss": 0.557, + "step": 12407 + }, + { + "epoch": 1.8283889980353636, + "grad_norm": 0.5998965501785278, + "learning_rate": 1.6711925608576102e-06, + "loss": 0.5278, + "step": 12408 + }, + { + "epoch": 1.8285363457760315, + "grad_norm": 0.5878695249557495, + "learning_rate": 1.6708267984610669e-06, + "loss": 0.5146, + "step": 12409 + }, + { + "epoch": 1.8286836935166995, + "grad_norm": 0.6047269105911255, + "learning_rate": 1.67046105600575e-06, + "loss": 0.55, + "step": 12410 + }, + { + "epoch": 1.8288310412573674, + "grad_norm": 0.6043235659599304, + "learning_rate": 1.6700953335004557e-06, + "loss": 0.5321, + "step": 12411 + }, + { + "epoch": 1.8289783889980353, + "grad_norm": 0.5946025848388672, + "learning_rate": 1.6697296309539795e-06, + "loss": 0.5271, + "step": 12412 + }, + { + "epoch": 1.8291257367387033, + "grad_norm": 0.5761821866035461, + "learning_rate": 1.669363948375116e-06, + "loss": 0.5253, + "step": 12413 + }, + { + "epoch": 1.8292730844793712, + "grad_norm": 0.624296247959137, + "learning_rate": 1.66899828577266e-06, + "loss": 0.5442, + "step": 12414 + }, + { + "epoch": 1.8294204322200391, + "grad_norm": 0.6354081630706787, + "learning_rate": 1.6686326431554057e-06, + "loss": 0.5359, + "step": 12415 + }, + { + "epoch": 1.8295677799607073, + "grad_norm": 0.6066690683364868, + "learning_rate": 1.668267020532146e-06, + "loss": 0.5462, + "step": 12416 + }, + { + "epoch": 1.8297151277013752, + "grad_norm": 0.6221599578857422, + "learning_rate": 1.6679014179116748e-06, + "loss": 0.5233, + "step": 12417 + }, + { + "epoch": 1.8298624754420432, + "grad_norm": 0.6396351456642151, + "learning_rate": 1.6675358353027838e-06, + "loss": 0.5172, + "step": 12418 + }, + { + "epoch": 1.8300098231827113, + "grad_norm": 0.6022396087646484, + "learning_rate": 1.6671702727142659e-06, + "loss": 0.5389, + "step": 12419 + }, + { + "epoch": 1.8301571709233793, + "grad_norm": 0.6322325468063354, + "learning_rate": 1.6668047301549122e-06, + "loss": 0.5428, + "step": 12420 + }, + { + "epoch": 1.8303045186640472, + "grad_norm": 0.6010305881500244, + "learning_rate": 1.6664392076335134e-06, + "loss": 0.5263, + "step": 12421 + }, + { + "epoch": 1.8304518664047151, + "grad_norm": 0.5967128276824951, + "learning_rate": 1.666073705158861e-06, + "loss": 0.545, + "step": 12422 + }, + { + "epoch": 1.830599214145383, + "grad_norm": 0.5871379375457764, + "learning_rate": 1.665708222739745e-06, + "loss": 0.5547, + "step": 12423 + }, + { + "epoch": 1.830746561886051, + "grad_norm": 0.6021212935447693, + "learning_rate": 1.6653427603849552e-06, + "loss": 0.5077, + "step": 12424 + }, + { + "epoch": 1.830893909626719, + "grad_norm": 0.607499361038208, + "learning_rate": 1.6649773181032802e-06, + "loss": 0.5501, + "step": 12425 + }, + { + "epoch": 1.8310412573673869, + "grad_norm": 0.6608202457427979, + "learning_rate": 1.664611895903509e-06, + "loss": 0.5186, + "step": 12426 + }, + { + "epoch": 1.831188605108055, + "grad_norm": 0.6209824681282043, + "learning_rate": 1.6642464937944302e-06, + "loss": 0.5365, + "step": 12427 + }, + { + "epoch": 1.831335952848723, + "grad_norm": 0.6185650825500488, + "learning_rate": 1.6638811117848307e-06, + "loss": 0.5259, + "step": 12428 + }, + { + "epoch": 1.8314833005893911, + "grad_norm": 0.6098282337188721, + "learning_rate": 1.663515749883499e-06, + "loss": 0.5243, + "step": 12429 + }, + { + "epoch": 1.831630648330059, + "grad_norm": 0.6545236110687256, + "learning_rate": 1.663150408099221e-06, + "loss": 0.5285, + "step": 12430 + }, + { + "epoch": 1.831777996070727, + "grad_norm": 0.6087938547134399, + "learning_rate": 1.662785086440783e-06, + "loss": 0.5431, + "step": 12431 + }, + { + "epoch": 1.831925343811395, + "grad_norm": 0.6245424151420593, + "learning_rate": 1.662419784916971e-06, + "loss": 0.5601, + "step": 12432 + }, + { + "epoch": 1.8320726915520629, + "grad_norm": 0.6279801726341248, + "learning_rate": 1.6620545035365703e-06, + "loss": 0.5299, + "step": 12433 + }, + { + "epoch": 1.8322200392927308, + "grad_norm": 0.596102774143219, + "learning_rate": 1.6616892423083658e-06, + "loss": 0.5349, + "step": 12434 + }, + { + "epoch": 1.8323673870333987, + "grad_norm": 0.5869517922401428, + "learning_rate": 1.6613240012411418e-06, + "loss": 0.5453, + "step": 12435 + }, + { + "epoch": 1.8325147347740667, + "grad_norm": 0.6057943105697632, + "learning_rate": 1.6609587803436822e-06, + "loss": 0.5419, + "step": 12436 + }, + { + "epoch": 1.8326620825147346, + "grad_norm": 0.608203113079071, + "learning_rate": 1.6605935796247704e-06, + "loss": 0.5052, + "step": 12437 + }, + { + "epoch": 1.8328094302554028, + "grad_norm": 0.6122738122940063, + "learning_rate": 1.6602283990931895e-06, + "loss": 0.4819, + "step": 12438 + }, + { + "epoch": 1.8329567779960707, + "grad_norm": 0.5964191555976868, + "learning_rate": 1.6598632387577216e-06, + "loss": 0.5265, + "step": 12439 + }, + { + "epoch": 1.8331041257367389, + "grad_norm": 0.6157891750335693, + "learning_rate": 1.6594980986271485e-06, + "loss": 0.5203, + "step": 12440 + }, + { + "epoch": 1.8332514734774068, + "grad_norm": 0.6251574158668518, + "learning_rate": 1.6591329787102523e-06, + "loss": 0.5025, + "step": 12441 + }, + { + "epoch": 1.8333988212180747, + "grad_norm": 0.5771304965019226, + "learning_rate": 1.6587678790158134e-06, + "loss": 0.5051, + "step": 12442 + }, + { + "epoch": 1.8335461689587427, + "grad_norm": 0.6076191067695618, + "learning_rate": 1.6584027995526125e-06, + "loss": 0.5243, + "step": 12443 + }, + { + "epoch": 1.8336935166994106, + "grad_norm": 0.5712296962738037, + "learning_rate": 1.6580377403294295e-06, + "loss": 0.4959, + "step": 12444 + }, + { + "epoch": 1.8338408644400785, + "grad_norm": 0.636947751045227, + "learning_rate": 1.657672701355044e-06, + "loss": 0.5271, + "step": 12445 + }, + { + "epoch": 1.8339882121807465, + "grad_norm": 0.5780942440032959, + "learning_rate": 1.657307682638235e-06, + "loss": 0.5428, + "step": 12446 + }, + { + "epoch": 1.8341355599214144, + "grad_norm": 0.606618344783783, + "learning_rate": 1.6569426841877806e-06, + "loss": 0.5217, + "step": 12447 + }, + { + "epoch": 1.8342829076620824, + "grad_norm": 0.6146298050880432, + "learning_rate": 1.6565777060124596e-06, + "loss": 0.5205, + "step": 12448 + }, + { + "epoch": 1.8344302554027505, + "grad_norm": 0.5680444836616516, + "learning_rate": 1.6562127481210495e-06, + "loss": 0.5035, + "step": 12449 + }, + { + "epoch": 1.8345776031434184, + "grad_norm": 0.6124752759933472, + "learning_rate": 1.6558478105223264e-06, + "loss": 0.5011, + "step": 12450 + }, + { + "epoch": 1.8347249508840866, + "grad_norm": 0.5952718257904053, + "learning_rate": 1.6554828932250676e-06, + "loss": 0.5137, + "step": 12451 + }, + { + "epoch": 1.8348722986247545, + "grad_norm": 0.5786422491073608, + "learning_rate": 1.6551179962380498e-06, + "loss": 0.4853, + "step": 12452 + }, + { + "epoch": 1.8350196463654225, + "grad_norm": 0.6481749415397644, + "learning_rate": 1.6547531195700476e-06, + "loss": 0.5309, + "step": 12453 + }, + { + "epoch": 1.8351669941060904, + "grad_norm": 0.646102249622345, + "learning_rate": 1.6543882632298363e-06, + "loss": 0.5278, + "step": 12454 + }, + { + "epoch": 1.8353143418467583, + "grad_norm": 0.5887139439582825, + "learning_rate": 1.6540234272261908e-06, + "loss": 0.5317, + "step": 12455 + }, + { + "epoch": 1.8354616895874263, + "grad_norm": 0.6158859729766846, + "learning_rate": 1.6536586115678846e-06, + "loss": 0.542, + "step": 12456 + }, + { + "epoch": 1.8356090373280942, + "grad_norm": 0.609656572341919, + "learning_rate": 1.6532938162636919e-06, + "loss": 0.5443, + "step": 12457 + }, + { + "epoch": 1.8357563850687622, + "grad_norm": 0.5962823033332825, + "learning_rate": 1.6529290413223853e-06, + "loss": 0.4953, + "step": 12458 + }, + { + "epoch": 1.83590373280943, + "grad_norm": 0.6456615328788757, + "learning_rate": 1.6525642867527386e-06, + "loss": 0.5443, + "step": 12459 + }, + { + "epoch": 1.8360510805500982, + "grad_norm": 0.6179705858230591, + "learning_rate": 1.652199552563523e-06, + "loss": 0.5611, + "step": 12460 + }, + { + "epoch": 1.8361984282907662, + "grad_norm": 0.5916454195976257, + "learning_rate": 1.6518348387635114e-06, + "loss": 0.5311, + "step": 12461 + }, + { + "epoch": 1.8363457760314343, + "grad_norm": 0.6071233749389648, + "learning_rate": 1.6514701453614734e-06, + "loss": 0.5399, + "step": 12462 + }, + { + "epoch": 1.8364931237721023, + "grad_norm": 0.58741295337677, + "learning_rate": 1.6511054723661807e-06, + "loss": 0.5003, + "step": 12463 + }, + { + "epoch": 1.8366404715127702, + "grad_norm": 0.6177278757095337, + "learning_rate": 1.6507408197864033e-06, + "loss": 0.4861, + "step": 12464 + }, + { + "epoch": 1.8367878192534381, + "grad_norm": 0.6362592577934265, + "learning_rate": 1.6503761876309104e-06, + "loss": 0.5187, + "step": 12465 + }, + { + "epoch": 1.836935166994106, + "grad_norm": 0.6160345077514648, + "learning_rate": 1.6500115759084723e-06, + "loss": 0.5272, + "step": 12466 + }, + { + "epoch": 1.837082514734774, + "grad_norm": 0.5926811695098877, + "learning_rate": 1.6496469846278568e-06, + "loss": 0.5221, + "step": 12467 + }, + { + "epoch": 1.837229862475442, + "grad_norm": 0.6162476539611816, + "learning_rate": 1.6492824137978325e-06, + "loss": 0.5383, + "step": 12468 + }, + { + "epoch": 1.83737721021611, + "grad_norm": 0.5907217264175415, + "learning_rate": 1.648917863427167e-06, + "loss": 0.5204, + "step": 12469 + }, + { + "epoch": 1.837524557956778, + "grad_norm": 0.5780962705612183, + "learning_rate": 1.6485533335246285e-06, + "loss": 0.5316, + "step": 12470 + }, + { + "epoch": 1.837671905697446, + "grad_norm": 0.6105409860610962, + "learning_rate": 1.6481888240989824e-06, + "loss": 0.5022, + "step": 12471 + }, + { + "epoch": 1.837819253438114, + "grad_norm": 0.5918161869049072, + "learning_rate": 1.6478243351589957e-06, + "loss": 0.502, + "step": 12472 + }, + { + "epoch": 1.837966601178782, + "grad_norm": 0.6021809577941895, + "learning_rate": 1.6474598667134336e-06, + "loss": 0.5387, + "step": 12473 + }, + { + "epoch": 1.83811394891945, + "grad_norm": 0.6421228051185608, + "learning_rate": 1.6470954187710621e-06, + "loss": 0.5199, + "step": 12474 + }, + { + "epoch": 1.838261296660118, + "grad_norm": 0.6070942282676697, + "learning_rate": 1.6467309913406459e-06, + "loss": 0.5322, + "step": 12475 + }, + { + "epoch": 1.8384086444007859, + "grad_norm": 0.6122100949287415, + "learning_rate": 1.646366584430949e-06, + "loss": 0.5326, + "step": 12476 + }, + { + "epoch": 1.8385559921414538, + "grad_norm": 0.5972127914428711, + "learning_rate": 1.646002198050735e-06, + "loss": 0.5523, + "step": 12477 + }, + { + "epoch": 1.8387033398821218, + "grad_norm": 0.59320068359375, + "learning_rate": 1.6456378322087682e-06, + "loss": 0.5365, + "step": 12478 + }, + { + "epoch": 1.8388506876227897, + "grad_norm": 0.6171185374259949, + "learning_rate": 1.6452734869138106e-06, + "loss": 0.5028, + "step": 12479 + }, + { + "epoch": 1.8389980353634576, + "grad_norm": 0.643592894077301, + "learning_rate": 1.6449091621746246e-06, + "loss": 0.5226, + "step": 12480 + }, + { + "epoch": 1.8391453831041258, + "grad_norm": 0.624270498752594, + "learning_rate": 1.6445448579999722e-06, + "loss": 0.5197, + "step": 12481 + }, + { + "epoch": 1.8392927308447937, + "grad_norm": 0.5704781413078308, + "learning_rate": 1.644180574398615e-06, + "loss": 0.5268, + "step": 12482 + }, + { + "epoch": 1.8394400785854617, + "grad_norm": 0.638228714466095, + "learning_rate": 1.6438163113793134e-06, + "loss": 0.5502, + "step": 12483 + }, + { + "epoch": 1.8395874263261298, + "grad_norm": 0.5833841562271118, + "learning_rate": 1.643452068950828e-06, + "loss": 0.5247, + "step": 12484 + }, + { + "epoch": 1.8397347740667978, + "grad_norm": 0.6331367492675781, + "learning_rate": 1.6430878471219186e-06, + "loss": 0.5252, + "step": 12485 + }, + { + "epoch": 1.8398821218074657, + "grad_norm": 0.60352623462677, + "learning_rate": 1.6427236459013446e-06, + "loss": 0.545, + "step": 12486 + }, + { + "epoch": 1.8400294695481336, + "grad_norm": 0.6254677772521973, + "learning_rate": 1.6423594652978648e-06, + "loss": 0.5274, + "step": 12487 + }, + { + "epoch": 1.8401768172888016, + "grad_norm": 0.5819662809371948, + "learning_rate": 1.6419953053202372e-06, + "loss": 0.5387, + "step": 12488 + }, + { + "epoch": 1.8403241650294695, + "grad_norm": 0.6199820041656494, + "learning_rate": 1.6416311659772206e-06, + "loss": 0.5334, + "step": 12489 + }, + { + "epoch": 1.8404715127701374, + "grad_norm": 0.5874654054641724, + "learning_rate": 1.6412670472775715e-06, + "loss": 0.516, + "step": 12490 + }, + { + "epoch": 1.8406188605108054, + "grad_norm": 0.5885591506958008, + "learning_rate": 1.6409029492300474e-06, + "loss": 0.5349, + "step": 12491 + }, + { + "epoch": 1.8407662082514735, + "grad_norm": 0.6221877932548523, + "learning_rate": 1.6405388718434046e-06, + "loss": 0.4871, + "step": 12492 + }, + { + "epoch": 1.8409135559921415, + "grad_norm": 0.6380377411842346, + "learning_rate": 1.6401748151263985e-06, + "loss": 0.5502, + "step": 12493 + }, + { + "epoch": 1.8410609037328094, + "grad_norm": 0.6351560950279236, + "learning_rate": 1.6398107790877848e-06, + "loss": 0.5393, + "step": 12494 + }, + { + "epoch": 1.8412082514734776, + "grad_norm": 0.610151469707489, + "learning_rate": 1.639446763736318e-06, + "loss": 0.5065, + "step": 12495 + }, + { + "epoch": 1.8413555992141455, + "grad_norm": 0.5754992961883545, + "learning_rate": 1.6390827690807531e-06, + "loss": 0.4849, + "step": 12496 + }, + { + "epoch": 1.8415029469548134, + "grad_norm": 0.646041989326477, + "learning_rate": 1.6387187951298444e-06, + "loss": 0.5176, + "step": 12497 + }, + { + "epoch": 1.8416502946954814, + "grad_norm": 0.6131318211555481, + "learning_rate": 1.6383548418923446e-06, + "loss": 0.5154, + "step": 12498 + }, + { + "epoch": 1.8417976424361493, + "grad_norm": 0.6228312253952026, + "learning_rate": 1.6379909093770063e-06, + "loss": 0.5225, + "step": 12499 + }, + { + "epoch": 1.8419449901768172, + "grad_norm": 0.6160492897033691, + "learning_rate": 1.6376269975925823e-06, + "loss": 0.542, + "step": 12500 + }, + { + "epoch": 1.8420923379174852, + "grad_norm": 0.5921025276184082, + "learning_rate": 1.637263106547825e-06, + "loss": 0.5058, + "step": 12501 + }, + { + "epoch": 1.842239685658153, + "grad_norm": 0.6710477471351624, + "learning_rate": 1.636899236251485e-06, + "loss": 0.5258, + "step": 12502 + }, + { + "epoch": 1.8423870333988213, + "grad_norm": 0.6262224316596985, + "learning_rate": 1.6365353867123138e-06, + "loss": 0.5269, + "step": 12503 + }, + { + "epoch": 1.8425343811394892, + "grad_norm": 0.6298155188560486, + "learning_rate": 1.6361715579390613e-06, + "loss": 0.5352, + "step": 12504 + }, + { + "epoch": 1.8426817288801571, + "grad_norm": 0.5662040710449219, + "learning_rate": 1.635807749940478e-06, + "loss": 0.5105, + "step": 12505 + }, + { + "epoch": 1.8428290766208253, + "grad_norm": 0.5991016030311584, + "learning_rate": 1.6354439627253127e-06, + "loss": 0.5466, + "step": 12506 + }, + { + "epoch": 1.8429764243614932, + "grad_norm": 0.6196655035018921, + "learning_rate": 1.6350801963023147e-06, + "loss": 0.5289, + "step": 12507 + }, + { + "epoch": 1.8431237721021612, + "grad_norm": 0.6110659241676331, + "learning_rate": 1.6347164506802324e-06, + "loss": 0.5001, + "step": 12508 + }, + { + "epoch": 1.843271119842829, + "grad_norm": 0.6374261379241943, + "learning_rate": 1.6343527258678135e-06, + "loss": 0.5227, + "step": 12509 + }, + { + "epoch": 1.843418467583497, + "grad_norm": 0.6000247597694397, + "learning_rate": 1.6339890218738056e-06, + "loss": 0.5446, + "step": 12510 + }, + { + "epoch": 1.843565815324165, + "grad_norm": 0.6053310632705688, + "learning_rate": 1.6336253387069554e-06, + "loss": 0.4929, + "step": 12511 + }, + { + "epoch": 1.843713163064833, + "grad_norm": 0.6054990887641907, + "learning_rate": 1.6332616763760096e-06, + "loss": 0.5045, + "step": 12512 + }, + { + "epoch": 1.8438605108055008, + "grad_norm": 0.5880151987075806, + "learning_rate": 1.6328980348897138e-06, + "loss": 0.4928, + "step": 12513 + }, + { + "epoch": 1.844007858546169, + "grad_norm": 0.6194759607315063, + "learning_rate": 1.6325344142568136e-06, + "loss": 0.538, + "step": 12514 + }, + { + "epoch": 1.844155206286837, + "grad_norm": 0.6047922372817993, + "learning_rate": 1.632170814486054e-06, + "loss": 0.5434, + "step": 12515 + }, + { + "epoch": 1.8443025540275049, + "grad_norm": 0.601594865322113, + "learning_rate": 1.631807235586179e-06, + "loss": 0.545, + "step": 12516 + }, + { + "epoch": 1.844449901768173, + "grad_norm": 0.6111121773719788, + "learning_rate": 1.631443677565933e-06, + "loss": 0.4989, + "step": 12517 + }, + { + "epoch": 1.844597249508841, + "grad_norm": 0.617434024810791, + "learning_rate": 1.6310801404340598e-06, + "loss": 0.5275, + "step": 12518 + }, + { + "epoch": 1.844744597249509, + "grad_norm": 0.5847355723381042, + "learning_rate": 1.6307166241993012e-06, + "loss": 0.5085, + "step": 12519 + }, + { + "epoch": 1.8448919449901768, + "grad_norm": 0.6304795742034912, + "learning_rate": 1.6303531288704e-06, + "loss": 0.5464, + "step": 12520 + }, + { + "epoch": 1.8450392927308448, + "grad_norm": 0.6047099828720093, + "learning_rate": 1.6299896544560978e-06, + "loss": 0.5053, + "step": 12521 + }, + { + "epoch": 1.8451866404715127, + "grad_norm": 0.6086491942405701, + "learning_rate": 1.6296262009651367e-06, + "loss": 0.5364, + "step": 12522 + }, + { + "epoch": 1.8453339882121806, + "grad_norm": 0.5955548286437988, + "learning_rate": 1.6292627684062574e-06, + "loss": 0.5332, + "step": 12523 + }, + { + "epoch": 1.8454813359528486, + "grad_norm": 0.5894961357116699, + "learning_rate": 1.6288993567881995e-06, + "loss": 0.5265, + "step": 12524 + }, + { + "epoch": 1.8456286836935167, + "grad_norm": 0.6379593014717102, + "learning_rate": 1.6285359661197042e-06, + "loss": 0.5465, + "step": 12525 + }, + { + "epoch": 1.8457760314341847, + "grad_norm": 0.6010504961013794, + "learning_rate": 1.6281725964095096e-06, + "loss": 0.5448, + "step": 12526 + }, + { + "epoch": 1.8459233791748526, + "grad_norm": 0.5761468410491943, + "learning_rate": 1.6278092476663554e-06, + "loss": 0.5096, + "step": 12527 + }, + { + "epoch": 1.8460707269155208, + "grad_norm": 0.6188875436782837, + "learning_rate": 1.6274459198989797e-06, + "loss": 0.5344, + "step": 12528 + }, + { + "epoch": 1.8462180746561887, + "grad_norm": 0.6405048966407776, + "learning_rate": 1.6270826131161206e-06, + "loss": 0.5431, + "step": 12529 + }, + { + "epoch": 1.8463654223968566, + "grad_norm": 0.5849891901016235, + "learning_rate": 1.6267193273265146e-06, + "loss": 0.5402, + "step": 12530 + }, + { + "epoch": 1.8465127701375246, + "grad_norm": 0.6172814965248108, + "learning_rate": 1.6263560625388996e-06, + "loss": 0.5328, + "step": 12531 + }, + { + "epoch": 1.8466601178781925, + "grad_norm": 0.6096716523170471, + "learning_rate": 1.6259928187620105e-06, + "loss": 0.5339, + "step": 12532 + }, + { + "epoch": 1.8468074656188604, + "grad_norm": 0.6244282722473145, + "learning_rate": 1.6256295960045854e-06, + "loss": 0.4985, + "step": 12533 + }, + { + "epoch": 1.8469548133595284, + "grad_norm": 0.5848279595375061, + "learning_rate": 1.6252663942753583e-06, + "loss": 0.53, + "step": 12534 + }, + { + "epoch": 1.8471021611001963, + "grad_norm": 0.6053786873817444, + "learning_rate": 1.6249032135830638e-06, + "loss": 0.509, + "step": 12535 + }, + { + "epoch": 1.8472495088408645, + "grad_norm": 0.6239436268806458, + "learning_rate": 1.6245400539364375e-06, + "loss": 0.5377, + "step": 12536 + }, + { + "epoch": 1.8473968565815324, + "grad_norm": 0.583246111869812, + "learning_rate": 1.6241769153442116e-06, + "loss": 0.5115, + "step": 12537 + }, + { + "epoch": 1.8475442043222003, + "grad_norm": 0.6134377121925354, + "learning_rate": 1.6238137978151203e-06, + "loss": 0.5055, + "step": 12538 + }, + { + "epoch": 1.8476915520628685, + "grad_norm": 0.6027123928070068, + "learning_rate": 1.623450701357896e-06, + "loss": 0.5087, + "step": 12539 + }, + { + "epoch": 1.8478388998035364, + "grad_norm": 0.5929616093635559, + "learning_rate": 1.623087625981271e-06, + "loss": 0.4972, + "step": 12540 + }, + { + "epoch": 1.8479862475442044, + "grad_norm": 0.6949145197868347, + "learning_rate": 1.6227245716939777e-06, + "loss": 0.4823, + "step": 12541 + }, + { + "epoch": 1.8481335952848723, + "grad_norm": 0.6121867895126343, + "learning_rate": 1.6223615385047467e-06, + "loss": 0.5311, + "step": 12542 + }, + { + "epoch": 1.8482809430255402, + "grad_norm": 0.5628690123558044, + "learning_rate": 1.6219985264223093e-06, + "loss": 0.5414, + "step": 12543 + }, + { + "epoch": 1.8484282907662082, + "grad_norm": 0.6530505418777466, + "learning_rate": 1.6216355354553953e-06, + "loss": 0.5222, + "step": 12544 + }, + { + "epoch": 1.8485756385068761, + "grad_norm": 0.6274197697639465, + "learning_rate": 1.621272565612735e-06, + "loss": 0.5188, + "step": 12545 + }, + { + "epoch": 1.848722986247544, + "grad_norm": 0.6098949909210205, + "learning_rate": 1.620909616903057e-06, + "loss": 0.5367, + "step": 12546 + }, + { + "epoch": 1.8488703339882122, + "grad_norm": 0.5947245359420776, + "learning_rate": 1.6205466893350905e-06, + "loss": 0.548, + "step": 12547 + }, + { + "epoch": 1.8490176817288801, + "grad_norm": 0.6127727031707764, + "learning_rate": 1.6201837829175637e-06, + "loss": 0.5583, + "step": 12548 + }, + { + "epoch": 1.849165029469548, + "grad_norm": 0.594135046005249, + "learning_rate": 1.6198208976592043e-06, + "loss": 0.546, + "step": 12549 + }, + { + "epoch": 1.8493123772102162, + "grad_norm": 0.599881112575531, + "learning_rate": 1.6194580335687392e-06, + "loss": 0.5307, + "step": 12550 + }, + { + "epoch": 1.8494597249508842, + "grad_norm": 0.5951188206672668, + "learning_rate": 1.6190951906548958e-06, + "loss": 0.5108, + "step": 12551 + }, + { + "epoch": 1.8496070726915521, + "grad_norm": 0.6047112941741943, + "learning_rate": 1.6187323689263995e-06, + "loss": 0.5054, + "step": 12552 + }, + { + "epoch": 1.84975442043222, + "grad_norm": 0.6009184122085571, + "learning_rate": 1.6183695683919765e-06, + "loss": 0.5386, + "step": 12553 + }, + { + "epoch": 1.849901768172888, + "grad_norm": 0.5630689263343811, + "learning_rate": 1.6180067890603521e-06, + "loss": 0.5289, + "step": 12554 + }, + { + "epoch": 1.850049115913556, + "grad_norm": 0.6174377799034119, + "learning_rate": 1.6176440309402509e-06, + "loss": 0.5266, + "step": 12555 + }, + { + "epoch": 1.8501964636542239, + "grad_norm": 0.6418784856796265, + "learning_rate": 1.6172812940403966e-06, + "loss": 0.5248, + "step": 12556 + }, + { + "epoch": 1.8503438113948918, + "grad_norm": 0.6356958150863647, + "learning_rate": 1.616918578369513e-06, + "loss": 0.5295, + "step": 12557 + }, + { + "epoch": 1.85049115913556, + "grad_norm": 0.6235473155975342, + "learning_rate": 1.6165558839363234e-06, + "loss": 0.5413, + "step": 12558 + }, + { + "epoch": 1.8506385068762279, + "grad_norm": 0.6383559107780457, + "learning_rate": 1.6161932107495507e-06, + "loss": 0.5178, + "step": 12559 + }, + { + "epoch": 1.8507858546168958, + "grad_norm": 0.5823670625686646, + "learning_rate": 1.6158305588179166e-06, + "loss": 0.506, + "step": 12560 + }, + { + "epoch": 1.850933202357564, + "grad_norm": 0.6581199169158936, + "learning_rate": 1.6154679281501429e-06, + "loss": 0.5156, + "step": 12561 + }, + { + "epoch": 1.851080550098232, + "grad_norm": 0.6131667494773865, + "learning_rate": 1.615105318754951e-06, + "loss": 0.4826, + "step": 12562 + }, + { + "epoch": 1.8512278978388998, + "grad_norm": 0.5908950567245483, + "learning_rate": 1.6147427306410608e-06, + "loss": 0.5141, + "step": 12563 + }, + { + "epoch": 1.8513752455795678, + "grad_norm": 0.6176474690437317, + "learning_rate": 1.6143801638171925e-06, + "loss": 0.5481, + "step": 12564 + }, + { + "epoch": 1.8515225933202357, + "grad_norm": 0.575637936592102, + "learning_rate": 1.6140176182920662e-06, + "loss": 0.5442, + "step": 12565 + }, + { + "epoch": 1.8516699410609037, + "grad_norm": 0.6052481532096863, + "learning_rate": 1.6136550940744007e-06, + "loss": 0.5164, + "step": 12566 + }, + { + "epoch": 1.8518172888015716, + "grad_norm": 0.6292054057121277, + "learning_rate": 1.6132925911729148e-06, + "loss": 0.5402, + "step": 12567 + }, + { + "epoch": 1.8519646365422395, + "grad_norm": 0.5767233967781067, + "learning_rate": 1.6129301095963246e-06, + "loss": 0.4915, + "step": 12568 + }, + { + "epoch": 1.8521119842829077, + "grad_norm": 0.6043287515640259, + "learning_rate": 1.61256764935335e-06, + "loss": 0.5347, + "step": 12569 + }, + { + "epoch": 1.8522593320235756, + "grad_norm": 0.5880776047706604, + "learning_rate": 1.6122052104527076e-06, + "loss": 0.5088, + "step": 12570 + }, + { + "epoch": 1.8524066797642438, + "grad_norm": 0.5706408023834229, + "learning_rate": 1.6118427929031133e-06, + "loss": 0.5352, + "step": 12571 + }, + { + "epoch": 1.8525540275049117, + "grad_norm": 0.631572961807251, + "learning_rate": 1.6114803967132837e-06, + "loss": 0.5565, + "step": 12572 + }, + { + "epoch": 1.8527013752455797, + "grad_norm": 0.583160936832428, + "learning_rate": 1.611118021891933e-06, + "loss": 0.5357, + "step": 12573 + }, + { + "epoch": 1.8528487229862476, + "grad_norm": 0.6017404794692993, + "learning_rate": 1.6107556684477776e-06, + "loss": 0.5072, + "step": 12574 + }, + { + "epoch": 1.8529960707269155, + "grad_norm": 0.638901948928833, + "learning_rate": 1.6103933363895312e-06, + "loss": 0.5356, + "step": 12575 + }, + { + "epoch": 1.8531434184675835, + "grad_norm": 0.5943235754966736, + "learning_rate": 1.6100310257259077e-06, + "loss": 0.5206, + "step": 12576 + }, + { + "epoch": 1.8532907662082514, + "grad_norm": 0.5855939984321594, + "learning_rate": 1.6096687364656205e-06, + "loss": 0.5509, + "step": 12577 + }, + { + "epoch": 1.8534381139489193, + "grad_norm": 0.6054356098175049, + "learning_rate": 1.6093064686173828e-06, + "loss": 0.5249, + "step": 12578 + }, + { + "epoch": 1.8535854616895873, + "grad_norm": 0.5934239029884338, + "learning_rate": 1.6089442221899068e-06, + "loss": 0.5226, + "step": 12579 + }, + { + "epoch": 1.8537328094302554, + "grad_norm": 0.6302759647369385, + "learning_rate": 1.6085819971919042e-06, + "loss": 0.533, + "step": 12580 + }, + { + "epoch": 1.8538801571709234, + "grad_norm": 0.6192997694015503, + "learning_rate": 1.6082197936320865e-06, + "loss": 0.5232, + "step": 12581 + }, + { + "epoch": 1.8540275049115915, + "grad_norm": 0.607856035232544, + "learning_rate": 1.6078576115191647e-06, + "loss": 0.5339, + "step": 12582 + }, + { + "epoch": 1.8541748526522595, + "grad_norm": 0.6135491132736206, + "learning_rate": 1.6074954508618485e-06, + "loss": 0.5153, + "step": 12583 + }, + { + "epoch": 1.8543222003929274, + "grad_norm": 0.6037787795066833, + "learning_rate": 1.6071333116688484e-06, + "loss": 0.5152, + "step": 12584 + }, + { + "epoch": 1.8544695481335953, + "grad_norm": 0.5740422010421753, + "learning_rate": 1.6067711939488736e-06, + "loss": 0.4872, + "step": 12585 + }, + { + "epoch": 1.8546168958742633, + "grad_norm": 0.603039026260376, + "learning_rate": 1.6064090977106323e-06, + "loss": 0.5027, + "step": 12586 + }, + { + "epoch": 1.8547642436149312, + "grad_norm": 0.5713183283805847, + "learning_rate": 1.6060470229628334e-06, + "loss": 0.5512, + "step": 12587 + }, + { + "epoch": 1.8549115913555991, + "grad_norm": 0.611957848072052, + "learning_rate": 1.6056849697141838e-06, + "loss": 0.4769, + "step": 12588 + }, + { + "epoch": 1.855058939096267, + "grad_norm": 0.5975543260574341, + "learning_rate": 1.6053229379733914e-06, + "loss": 0.4997, + "step": 12589 + }, + { + "epoch": 1.855206286836935, + "grad_norm": 0.61817866563797, + "learning_rate": 1.6049609277491629e-06, + "loss": 0.5225, + "step": 12590 + }, + { + "epoch": 1.8553536345776032, + "grad_norm": 0.6040887236595154, + "learning_rate": 1.604598939050204e-06, + "loss": 0.4839, + "step": 12591 + }, + { + "epoch": 1.855500982318271, + "grad_norm": 0.6233900189399719, + "learning_rate": 1.6042369718852208e-06, + "loss": 0.5375, + "step": 12592 + }, + { + "epoch": 1.8556483300589393, + "grad_norm": 0.6120287179946899, + "learning_rate": 1.6038750262629182e-06, + "loss": 0.5049, + "step": 12593 + }, + { + "epoch": 1.8557956777996072, + "grad_norm": 0.6175855994224548, + "learning_rate": 1.6035131021920008e-06, + "loss": 0.5501, + "step": 12594 + }, + { + "epoch": 1.8559430255402751, + "grad_norm": 0.6209318041801453, + "learning_rate": 1.6031511996811734e-06, + "loss": 0.5315, + "step": 12595 + }, + { + "epoch": 1.856090373280943, + "grad_norm": 0.5887359380722046, + "learning_rate": 1.6027893187391382e-06, + "loss": 0.5507, + "step": 12596 + }, + { + "epoch": 1.856237721021611, + "grad_norm": 0.6133720874786377, + "learning_rate": 1.6024274593745998e-06, + "loss": 0.5242, + "step": 12597 + }, + { + "epoch": 1.856385068762279, + "grad_norm": 0.6383337378501892, + "learning_rate": 1.6020656215962593e-06, + "loss": 0.5111, + "step": 12598 + }, + { + "epoch": 1.8565324165029469, + "grad_norm": 0.5831623673439026, + "learning_rate": 1.6017038054128197e-06, + "loss": 0.5607, + "step": 12599 + }, + { + "epoch": 1.8566797642436148, + "grad_norm": 0.5707332491874695, + "learning_rate": 1.6013420108329825e-06, + "loss": 0.5097, + "step": 12600 + }, + { + "epoch": 1.856827111984283, + "grad_norm": 0.608397901058197, + "learning_rate": 1.6009802378654483e-06, + "loss": 0.5318, + "step": 12601 + }, + { + "epoch": 1.856974459724951, + "grad_norm": 0.6131672263145447, + "learning_rate": 1.600618486518918e-06, + "loss": 0.517, + "step": 12602 + }, + { + "epoch": 1.8571218074656188, + "grad_norm": 0.6171614527702332, + "learning_rate": 1.6002567568020906e-06, + "loss": 0.519, + "step": 12603 + }, + { + "epoch": 1.857269155206287, + "grad_norm": 0.6210247278213501, + "learning_rate": 1.599895048723667e-06, + "loss": 0.5297, + "step": 12604 + }, + { + "epoch": 1.857416502946955, + "grad_norm": 0.5848509669303894, + "learning_rate": 1.5995333622923443e-06, + "loss": 0.5163, + "step": 12605 + }, + { + "epoch": 1.8575638506876229, + "grad_norm": 0.5915268063545227, + "learning_rate": 1.5991716975168222e-06, + "loss": 0.5173, + "step": 12606 + }, + { + "epoch": 1.8577111984282908, + "grad_norm": 0.6326485276222229, + "learning_rate": 1.5988100544057988e-06, + "loss": 0.4857, + "step": 12607 + }, + { + "epoch": 1.8578585461689587, + "grad_norm": 0.609363853931427, + "learning_rate": 1.5984484329679706e-06, + "loss": 0.5178, + "step": 12608 + }, + { + "epoch": 1.8580058939096267, + "grad_norm": 0.6096388101577759, + "learning_rate": 1.5980868332120348e-06, + "loss": 0.5162, + "step": 12609 + }, + { + "epoch": 1.8581532416502946, + "grad_norm": 0.6092121005058289, + "learning_rate": 1.5977252551466877e-06, + "loss": 0.54, + "step": 12610 + }, + { + "epoch": 1.8583005893909625, + "grad_norm": 0.6401953101158142, + "learning_rate": 1.5973636987806245e-06, + "loss": 0.5113, + "step": 12611 + }, + { + "epoch": 1.8584479371316307, + "grad_norm": 0.6117885708808899, + "learning_rate": 1.5970021641225417e-06, + "loss": 0.5366, + "step": 12612 + }, + { + "epoch": 1.8585952848722986, + "grad_norm": 0.5649957656860352, + "learning_rate": 1.5966406511811328e-06, + "loss": 0.5285, + "step": 12613 + }, + { + "epoch": 1.8587426326129666, + "grad_norm": 0.6022456884384155, + "learning_rate": 1.5962791599650928e-06, + "loss": 0.5358, + "step": 12614 + }, + { + "epoch": 1.8588899803536347, + "grad_norm": 0.5820597410202026, + "learning_rate": 1.5959176904831152e-06, + "loss": 0.5346, + "step": 12615 + }, + { + "epoch": 1.8590373280943027, + "grad_norm": 0.5872514247894287, + "learning_rate": 1.595556242743893e-06, + "loss": 0.5402, + "step": 12616 + }, + { + "epoch": 1.8591846758349706, + "grad_norm": 0.6007794141769409, + "learning_rate": 1.595194816756119e-06, + "loss": 0.533, + "step": 12617 + }, + { + "epoch": 1.8593320235756385, + "grad_norm": 0.5988203287124634, + "learning_rate": 1.5948334125284853e-06, + "loss": 0.5059, + "step": 12618 + }, + { + "epoch": 1.8594793713163065, + "grad_norm": 0.6045300960540771, + "learning_rate": 1.5944720300696834e-06, + "loss": 0.5062, + "step": 12619 + }, + { + "epoch": 1.8596267190569744, + "grad_norm": 0.5975401997566223, + "learning_rate": 1.5941106693884046e-06, + "loss": 0.5362, + "step": 12620 + }, + { + "epoch": 1.8597740667976423, + "grad_norm": 0.6305041909217834, + "learning_rate": 1.593749330493339e-06, + "loss": 0.5287, + "step": 12621 + }, + { + "epoch": 1.8599214145383103, + "grad_norm": 0.6182711720466614, + "learning_rate": 1.5933880133931773e-06, + "loss": 0.519, + "step": 12622 + }, + { + "epoch": 1.8600687622789784, + "grad_norm": 0.6022855043411255, + "learning_rate": 1.5930267180966083e-06, + "loss": 0.4986, + "step": 12623 + }, + { + "epoch": 1.8602161100196464, + "grad_norm": 0.6188307404518127, + "learning_rate": 1.5926654446123216e-06, + "loss": 0.5461, + "step": 12624 + }, + { + "epoch": 1.8603634577603143, + "grad_norm": 0.630310595035553, + "learning_rate": 1.5923041929490053e-06, + "loss": 0.493, + "step": 12625 + }, + { + "epoch": 1.8605108055009825, + "grad_norm": 0.6159061193466187, + "learning_rate": 1.5919429631153473e-06, + "loss": 0.5248, + "step": 12626 + }, + { + "epoch": 1.8606581532416504, + "grad_norm": 0.6112442016601562, + "learning_rate": 1.5915817551200353e-06, + "loss": 0.5289, + "step": 12627 + }, + { + "epoch": 1.8608055009823183, + "grad_norm": 0.6336503028869629, + "learning_rate": 1.591220568971756e-06, + "loss": 0.5373, + "step": 12628 + }, + { + "epoch": 1.8609528487229863, + "grad_norm": 0.6174224019050598, + "learning_rate": 1.590859404679196e-06, + "loss": 0.5279, + "step": 12629 + }, + { + "epoch": 1.8611001964636542, + "grad_norm": 0.6075657606124878, + "learning_rate": 1.5904982622510406e-06, + "loss": 0.5355, + "step": 12630 + }, + { + "epoch": 1.8612475442043221, + "grad_norm": 0.6164861917495728, + "learning_rate": 1.5901371416959754e-06, + "loss": 0.5205, + "step": 12631 + }, + { + "epoch": 1.86139489194499, + "grad_norm": 0.6156929731369019, + "learning_rate": 1.5897760430226853e-06, + "loss": 0.5411, + "step": 12632 + }, + { + "epoch": 1.861542239685658, + "grad_norm": 0.6208190321922302, + "learning_rate": 1.589414966239855e-06, + "loss": 0.5433, + "step": 12633 + }, + { + "epoch": 1.8616895874263262, + "grad_norm": 0.6238278150558472, + "learning_rate": 1.5890539113561671e-06, + "loss": 0.5387, + "step": 12634 + }, + { + "epoch": 1.8618369351669941, + "grad_norm": 0.5981313586235046, + "learning_rate": 1.5886928783803052e-06, + "loss": 0.4993, + "step": 12635 + }, + { + "epoch": 1.861984282907662, + "grad_norm": 0.5957437753677368, + "learning_rate": 1.5883318673209524e-06, + "loss": 0.5319, + "step": 12636 + }, + { + "epoch": 1.8621316306483302, + "grad_norm": 0.596971869468689, + "learning_rate": 1.5879708781867904e-06, + "loss": 0.5107, + "step": 12637 + }, + { + "epoch": 1.8622789783889981, + "grad_norm": 0.6270797252655029, + "learning_rate": 1.5876099109865012e-06, + "loss": 0.5374, + "step": 12638 + }, + { + "epoch": 1.862426326129666, + "grad_norm": 0.5755958557128906, + "learning_rate": 1.5872489657287654e-06, + "loss": 0.5129, + "step": 12639 + }, + { + "epoch": 1.862573673870334, + "grad_norm": 0.6195076704025269, + "learning_rate": 1.5868880424222644e-06, + "loss": 0.5294, + "step": 12640 + }, + { + "epoch": 1.862721021611002, + "grad_norm": 0.617451012134552, + "learning_rate": 1.5865271410756772e-06, + "loss": 0.509, + "step": 12641 + }, + { + "epoch": 1.8628683693516699, + "grad_norm": 0.65822434425354, + "learning_rate": 1.586166261697683e-06, + "loss": 0.49, + "step": 12642 + }, + { + "epoch": 1.8630157170923378, + "grad_norm": 0.6116169691085815, + "learning_rate": 1.5858054042969626e-06, + "loss": 0.5351, + "step": 12643 + }, + { + "epoch": 1.8631630648330058, + "grad_norm": 0.5871865153312683, + "learning_rate": 1.5854445688821935e-06, + "loss": 0.4836, + "step": 12644 + }, + { + "epoch": 1.863310412573674, + "grad_norm": 0.5951717495918274, + "learning_rate": 1.5850837554620534e-06, + "loss": 0.5293, + "step": 12645 + }, + { + "epoch": 1.8634577603143418, + "grad_norm": 0.6413256525993347, + "learning_rate": 1.58472296404522e-06, + "loss": 0.5211, + "step": 12646 + }, + { + "epoch": 1.8636051080550098, + "grad_norm": 0.6008620858192444, + "learning_rate": 1.5843621946403697e-06, + "loss": 0.5273, + "step": 12647 + }, + { + "epoch": 1.863752455795678, + "grad_norm": 0.5899954438209534, + "learning_rate": 1.5840014472561797e-06, + "loss": 0.5256, + "step": 12648 + }, + { + "epoch": 1.8638998035363459, + "grad_norm": 0.6030955910682678, + "learning_rate": 1.583640721901325e-06, + "loss": 0.5099, + "step": 12649 + }, + { + "epoch": 1.8640471512770138, + "grad_norm": 0.5798968076705933, + "learning_rate": 1.583280018584481e-06, + "loss": 0.518, + "step": 12650 + }, + { + "epoch": 1.8641944990176817, + "grad_norm": 0.6180931925773621, + "learning_rate": 1.5829193373143234e-06, + "loss": 0.5559, + "step": 12651 + }, + { + "epoch": 1.8643418467583497, + "grad_norm": 0.6303184628486633, + "learning_rate": 1.582558678099525e-06, + "loss": 0.5357, + "step": 12652 + }, + { + "epoch": 1.8644891944990176, + "grad_norm": 0.6112546920776367, + "learning_rate": 1.5821980409487596e-06, + "loss": 0.5278, + "step": 12653 + }, + { + "epoch": 1.8646365422396856, + "grad_norm": 0.6346851587295532, + "learning_rate": 1.5818374258707013e-06, + "loss": 0.5229, + "step": 12654 + }, + { + "epoch": 1.8647838899803535, + "grad_norm": 0.5917268395423889, + "learning_rate": 1.5814768328740226e-06, + "loss": 0.4859, + "step": 12655 + }, + { + "epoch": 1.8649312377210217, + "grad_norm": 0.5879557132720947, + "learning_rate": 1.5811162619673948e-06, + "loss": 0.4972, + "step": 12656 + }, + { + "epoch": 1.8650785854616896, + "grad_norm": 0.6070362329483032, + "learning_rate": 1.58075571315949e-06, + "loss": 0.5099, + "step": 12657 + }, + { + "epoch": 1.8652259332023575, + "grad_norm": 0.5990157127380371, + "learning_rate": 1.580395186458979e-06, + "loss": 0.535, + "step": 12658 + }, + { + "epoch": 1.8653732809430257, + "grad_norm": 0.6337330341339111, + "learning_rate": 1.5800346818745328e-06, + "loss": 0.4794, + "step": 12659 + }, + { + "epoch": 1.8655206286836936, + "grad_norm": 0.6061440110206604, + "learning_rate": 1.5796741994148206e-06, + "loss": 0.5015, + "step": 12660 + }, + { + "epoch": 1.8656679764243616, + "grad_norm": 0.5752519369125366, + "learning_rate": 1.5793137390885126e-06, + "loss": 0.5264, + "step": 12661 + }, + { + "epoch": 1.8658153241650295, + "grad_norm": 0.6087402105331421, + "learning_rate": 1.578953300904277e-06, + "loss": 0.533, + "step": 12662 + }, + { + "epoch": 1.8659626719056974, + "grad_norm": 0.5982412099838257, + "learning_rate": 1.5785928848707827e-06, + "loss": 0.5147, + "step": 12663 + }, + { + "epoch": 1.8661100196463654, + "grad_norm": 0.5962754487991333, + "learning_rate": 1.5782324909966972e-06, + "loss": 0.5134, + "step": 12664 + }, + { + "epoch": 1.8662573673870333, + "grad_norm": 0.6043440699577332, + "learning_rate": 1.5778721192906885e-06, + "loss": 0.5373, + "step": 12665 + }, + { + "epoch": 1.8664047151277012, + "grad_norm": 0.586394727230072, + "learning_rate": 1.5775117697614222e-06, + "loss": 0.5208, + "step": 12666 + }, + { + "epoch": 1.8665520628683694, + "grad_norm": 0.6015907526016235, + "learning_rate": 1.5771514424175654e-06, + "loss": 0.5434, + "step": 12667 + }, + { + "epoch": 1.8666994106090373, + "grad_norm": 0.5921532511711121, + "learning_rate": 1.5767911372677836e-06, + "loss": 0.5257, + "step": 12668 + }, + { + "epoch": 1.8668467583497053, + "grad_norm": 0.5828734040260315, + "learning_rate": 1.576430854320742e-06, + "loss": 0.5147, + "step": 12669 + }, + { + "epoch": 1.8669941060903734, + "grad_norm": 0.5655114054679871, + "learning_rate": 1.5760705935851051e-06, + "loss": 0.5082, + "step": 12670 + }, + { + "epoch": 1.8671414538310414, + "grad_norm": 0.5868111252784729, + "learning_rate": 1.5757103550695372e-06, + "loss": 0.4935, + "step": 12671 + }, + { + "epoch": 1.8672888015717093, + "grad_norm": 0.5839481353759766, + "learning_rate": 1.5753501387827014e-06, + "loss": 0.4725, + "step": 12672 + }, + { + "epoch": 1.8674361493123772, + "grad_norm": 0.5878472924232483, + "learning_rate": 1.574989944733261e-06, + "loss": 0.521, + "step": 12673 + }, + { + "epoch": 1.8675834970530452, + "grad_norm": 0.604155957698822, + "learning_rate": 1.5746297729298793e-06, + "loss": 0.5149, + "step": 12674 + }, + { + "epoch": 1.867730844793713, + "grad_norm": 0.6027399897575378, + "learning_rate": 1.574269623381217e-06, + "loss": 0.5253, + "step": 12675 + }, + { + "epoch": 1.867878192534381, + "grad_norm": 0.6220179796218872, + "learning_rate": 1.573909496095936e-06, + "loss": 0.5265, + "step": 12676 + }, + { + "epoch": 1.868025540275049, + "grad_norm": 0.5972262024879456, + "learning_rate": 1.5735493910826977e-06, + "loss": 0.5241, + "step": 12677 + }, + { + "epoch": 1.8681728880157171, + "grad_norm": 0.6408780217170715, + "learning_rate": 1.5731893083501615e-06, + "loss": 0.5089, + "step": 12678 + }, + { + "epoch": 1.868320235756385, + "grad_norm": 0.5908819437026978, + "learning_rate": 1.5728292479069873e-06, + "loss": 0.5211, + "step": 12679 + }, + { + "epoch": 1.868467583497053, + "grad_norm": 0.6544514298439026, + "learning_rate": 1.5724692097618358e-06, + "loss": 0.5705, + "step": 12680 + }, + { + "epoch": 1.8686149312377212, + "grad_norm": 0.5729785561561584, + "learning_rate": 1.5721091939233643e-06, + "loss": 0.5226, + "step": 12681 + }, + { + "epoch": 1.868762278978389, + "grad_norm": 0.5892890095710754, + "learning_rate": 1.5717492004002316e-06, + "loss": 0.5196, + "step": 12682 + }, + { + "epoch": 1.868909626719057, + "grad_norm": 0.588030219078064, + "learning_rate": 1.5713892292010953e-06, + "loss": 0.5246, + "step": 12683 + }, + { + "epoch": 1.869056974459725, + "grad_norm": 0.6235259175300598, + "learning_rate": 1.5710292803346125e-06, + "loss": 0.5127, + "step": 12684 + }, + { + "epoch": 1.869204322200393, + "grad_norm": 0.620481014251709, + "learning_rate": 1.5706693538094397e-06, + "loss": 0.5172, + "step": 12685 + }, + { + "epoch": 1.8693516699410608, + "grad_norm": 0.6113089919090271, + "learning_rate": 1.570309449634233e-06, + "loss": 0.5374, + "step": 12686 + }, + { + "epoch": 1.8694990176817288, + "grad_norm": 0.5942187309265137, + "learning_rate": 1.5699495678176482e-06, + "loss": 0.5001, + "step": 12687 + }, + { + "epoch": 1.8696463654223967, + "grad_norm": 0.5844235420227051, + "learning_rate": 1.56958970836834e-06, + "loss": 0.5144, + "step": 12688 + }, + { + "epoch": 1.8697937131630649, + "grad_norm": 0.5998966097831726, + "learning_rate": 1.5692298712949631e-06, + "loss": 0.5105, + "step": 12689 + }, + { + "epoch": 1.8699410609037328, + "grad_norm": 0.623511016368866, + "learning_rate": 1.568870056606171e-06, + "loss": 0.5079, + "step": 12690 + }, + { + "epoch": 1.8700884086444007, + "grad_norm": 0.6186590790748596, + "learning_rate": 1.5685102643106172e-06, + "loss": 0.5054, + "step": 12691 + }, + { + "epoch": 1.870235756385069, + "grad_norm": 0.6971758008003235, + "learning_rate": 1.5681504944169548e-06, + "loss": 0.5212, + "step": 12692 + }, + { + "epoch": 1.8703831041257368, + "grad_norm": 0.627117395401001, + "learning_rate": 1.567790746933836e-06, + "loss": 0.5109, + "step": 12693 + }, + { + "epoch": 1.8705304518664048, + "grad_norm": 0.5832144618034363, + "learning_rate": 1.5674310218699124e-06, + "loss": 0.5065, + "step": 12694 + }, + { + "epoch": 1.8706777996070727, + "grad_norm": 0.6387969851493835, + "learning_rate": 1.5670713192338356e-06, + "loss": 0.526, + "step": 12695 + }, + { + "epoch": 1.8708251473477406, + "grad_norm": 0.607430636882782, + "learning_rate": 1.5667116390342556e-06, + "loss": 0.5158, + "step": 12696 + }, + { + "epoch": 1.8709724950884086, + "grad_norm": 0.628290057182312, + "learning_rate": 1.566351981279823e-06, + "loss": 0.5395, + "step": 12697 + }, + { + "epoch": 1.8711198428290765, + "grad_norm": 0.6145370602607727, + "learning_rate": 1.5659923459791877e-06, + "loss": 0.5503, + "step": 12698 + }, + { + "epoch": 1.8712671905697444, + "grad_norm": 0.5767096877098083, + "learning_rate": 1.565632733140998e-06, + "loss": 0.4903, + "step": 12699 + }, + { + "epoch": 1.8714145383104126, + "grad_norm": 0.591731071472168, + "learning_rate": 1.5652731427739038e-06, + "loss": 0.4672, + "step": 12700 + }, + { + "epoch": 1.8715618860510805, + "grad_norm": 0.6189079880714417, + "learning_rate": 1.5649135748865512e-06, + "loss": 0.5187, + "step": 12701 + }, + { + "epoch": 1.8717092337917485, + "grad_norm": 0.6009140610694885, + "learning_rate": 1.5645540294875886e-06, + "loss": 0.5229, + "step": 12702 + }, + { + "epoch": 1.8718565815324166, + "grad_norm": 0.6267814040184021, + "learning_rate": 1.564194506585663e-06, + "loss": 0.5117, + "step": 12703 + }, + { + "epoch": 1.8720039292730846, + "grad_norm": 0.6443687677383423, + "learning_rate": 1.5638350061894204e-06, + "loss": 0.5207, + "step": 12704 + }, + { + "epoch": 1.8721512770137525, + "grad_norm": 0.5839438438415527, + "learning_rate": 1.5634755283075067e-06, + "loss": 0.5345, + "step": 12705 + }, + { + "epoch": 1.8722986247544204, + "grad_norm": 0.6552605032920837, + "learning_rate": 1.5631160729485676e-06, + "loss": 0.5342, + "step": 12706 + }, + { + "epoch": 1.8724459724950884, + "grad_norm": 0.6183806657791138, + "learning_rate": 1.5627566401212474e-06, + "loss": 0.5139, + "step": 12707 + }, + { + "epoch": 1.8725933202357563, + "grad_norm": 0.5745800733566284, + "learning_rate": 1.5623972298341905e-06, + "loss": 0.5234, + "step": 12708 + }, + { + "epoch": 1.8727406679764242, + "grad_norm": 0.6091349720954895, + "learning_rate": 1.5620378420960403e-06, + "loss": 0.5306, + "step": 12709 + }, + { + "epoch": 1.8728880157170922, + "grad_norm": 0.6547766327857971, + "learning_rate": 1.56167847691544e-06, + "loss": 0.519, + "step": 12710 + }, + { + "epoch": 1.8730353634577603, + "grad_norm": 0.633060097694397, + "learning_rate": 1.5613191343010325e-06, + "loss": 0.5211, + "step": 12711 + }, + { + "epoch": 1.8731827111984283, + "grad_norm": 0.611480712890625, + "learning_rate": 1.5609598142614593e-06, + "loss": 0.4878, + "step": 12712 + }, + { + "epoch": 1.8733300589390964, + "grad_norm": 0.6006019115447998, + "learning_rate": 1.5606005168053622e-06, + "loss": 0.5192, + "step": 12713 + }, + { + "epoch": 1.8734774066797644, + "grad_norm": 0.6100708842277527, + "learning_rate": 1.5602412419413821e-06, + "loss": 0.5597, + "step": 12714 + }, + { + "epoch": 1.8736247544204323, + "grad_norm": 0.6187954545021057, + "learning_rate": 1.5598819896781594e-06, + "loss": 0.5542, + "step": 12715 + }, + { + "epoch": 1.8737721021611002, + "grad_norm": 0.599299967288971, + "learning_rate": 1.559522760024333e-06, + "loss": 0.5202, + "step": 12716 + }, + { + "epoch": 1.8739194499017682, + "grad_norm": 0.5825784206390381, + "learning_rate": 1.559163552988544e-06, + "loss": 0.556, + "step": 12717 + }, + { + "epoch": 1.874066797642436, + "grad_norm": 0.6240317821502686, + "learning_rate": 1.5588043685794306e-06, + "loss": 0.5701, + "step": 12718 + }, + { + "epoch": 1.874214145383104, + "grad_norm": 0.6187127828598022, + "learning_rate": 1.5584452068056305e-06, + "loss": 0.5396, + "step": 12719 + }, + { + "epoch": 1.874361493123772, + "grad_norm": 0.647819459438324, + "learning_rate": 1.5580860676757814e-06, + "loss": 0.5468, + "step": 12720 + }, + { + "epoch": 1.87450884086444, + "grad_norm": 0.6360974907875061, + "learning_rate": 1.5577269511985206e-06, + "loss": 0.4955, + "step": 12721 + }, + { + "epoch": 1.874656188605108, + "grad_norm": 0.6497788429260254, + "learning_rate": 1.5573678573824846e-06, + "loss": 0.5093, + "step": 12722 + }, + { + "epoch": 1.874803536345776, + "grad_norm": 0.6556471586227417, + "learning_rate": 1.5570087862363095e-06, + "loss": 0.5496, + "step": 12723 + }, + { + "epoch": 1.8749508840864442, + "grad_norm": 0.6070156097412109, + "learning_rate": 1.5566497377686307e-06, + "loss": 0.5499, + "step": 12724 + }, + { + "epoch": 1.875098231827112, + "grad_norm": 0.5766957402229309, + "learning_rate": 1.5562907119880837e-06, + "loss": 0.4947, + "step": 12725 + }, + { + "epoch": 1.87524557956778, + "grad_norm": 0.5935595035552979, + "learning_rate": 1.5559317089033022e-06, + "loss": 0.5322, + "step": 12726 + }, + { + "epoch": 1.875392927308448, + "grad_norm": 0.6341683864593506, + "learning_rate": 1.5555727285229202e-06, + "loss": 0.5341, + "step": 12727 + }, + { + "epoch": 1.875540275049116, + "grad_norm": 0.5984130501747131, + "learning_rate": 1.5552137708555712e-06, + "loss": 0.5521, + "step": 12728 + }, + { + "epoch": 1.8756876227897838, + "grad_norm": 0.6261610984802246, + "learning_rate": 1.554854835909888e-06, + "loss": 0.5347, + "step": 12729 + }, + { + "epoch": 1.8758349705304518, + "grad_norm": 0.5936816930770874, + "learning_rate": 1.5544959236945023e-06, + "loss": 0.5165, + "step": 12730 + }, + { + "epoch": 1.8759823182711197, + "grad_norm": 0.6199691891670227, + "learning_rate": 1.5541370342180468e-06, + "loss": 0.5431, + "step": 12731 + }, + { + "epoch": 1.8761296660117877, + "grad_norm": 0.5675486922264099, + "learning_rate": 1.5537781674891513e-06, + "loss": 0.5117, + "step": 12732 + }, + { + "epoch": 1.8762770137524558, + "grad_norm": 0.6141977310180664, + "learning_rate": 1.5534193235164473e-06, + "loss": 0.5349, + "step": 12733 + }, + { + "epoch": 1.8764243614931237, + "grad_norm": 0.6158269047737122, + "learning_rate": 1.5530605023085649e-06, + "loss": 0.4992, + "step": 12734 + }, + { + "epoch": 1.876571709233792, + "grad_norm": 0.5785543918609619, + "learning_rate": 1.5527017038741332e-06, + "loss": 0.4669, + "step": 12735 + }, + { + "epoch": 1.8767190569744598, + "grad_norm": 0.6009633541107178, + "learning_rate": 1.5523429282217812e-06, + "loss": 0.5356, + "step": 12736 + }, + { + "epoch": 1.8768664047151278, + "grad_norm": 0.612277626991272, + "learning_rate": 1.551984175360137e-06, + "loss": 0.5152, + "step": 12737 + }, + { + "epoch": 1.8770137524557957, + "grad_norm": 0.6122591495513916, + "learning_rate": 1.551625445297829e-06, + "loss": 0.526, + "step": 12738 + }, + { + "epoch": 1.8771611001964637, + "grad_norm": 0.6084426641464233, + "learning_rate": 1.551266738043484e-06, + "loss": 0.5245, + "step": 12739 + }, + { + "epoch": 1.8773084479371316, + "grad_norm": 0.5833567976951599, + "learning_rate": 1.550908053605729e-06, + "loss": 0.4904, + "step": 12740 + }, + { + "epoch": 1.8774557956777995, + "grad_norm": 0.5799320936203003, + "learning_rate": 1.5505493919931905e-06, + "loss": 0.522, + "step": 12741 + }, + { + "epoch": 1.8776031434184675, + "grad_norm": 0.6370176076889038, + "learning_rate": 1.5501907532144933e-06, + "loss": 0.5179, + "step": 12742 + }, + { + "epoch": 1.8777504911591356, + "grad_norm": 0.6109105348587036, + "learning_rate": 1.5498321372782634e-06, + "loss": 0.5174, + "step": 12743 + }, + { + "epoch": 1.8778978388998036, + "grad_norm": 0.6154400110244751, + "learning_rate": 1.549473544193125e-06, + "loss": 0.5371, + "step": 12744 + }, + { + "epoch": 1.8780451866404715, + "grad_norm": 0.5721460580825806, + "learning_rate": 1.549114973967702e-06, + "loss": 0.4937, + "step": 12745 + }, + { + "epoch": 1.8781925343811396, + "grad_norm": 0.6439931988716125, + "learning_rate": 1.5487564266106176e-06, + "loss": 0.5273, + "step": 12746 + }, + { + "epoch": 1.8783398821218076, + "grad_norm": 0.6164969205856323, + "learning_rate": 1.548397902130495e-06, + "loss": 0.5225, + "step": 12747 + }, + { + "epoch": 1.8784872298624755, + "grad_norm": 0.6360358595848083, + "learning_rate": 1.5480394005359572e-06, + "loss": 0.5253, + "step": 12748 + }, + { + "epoch": 1.8786345776031435, + "grad_norm": 0.6036432981491089, + "learning_rate": 1.5476809218356248e-06, + "loss": 0.5006, + "step": 12749 + }, + { + "epoch": 1.8787819253438114, + "grad_norm": 0.6186674237251282, + "learning_rate": 1.5473224660381194e-06, + "loss": 0.5254, + "step": 12750 + }, + { + "epoch": 1.8789292730844793, + "grad_norm": 0.6018390655517578, + "learning_rate": 1.546964033152062e-06, + "loss": 0.5107, + "step": 12751 + }, + { + "epoch": 1.8790766208251473, + "grad_norm": 0.6066272258758545, + "learning_rate": 1.5466056231860727e-06, + "loss": 0.5496, + "step": 12752 + }, + { + "epoch": 1.8792239685658152, + "grad_norm": 0.6366710066795349, + "learning_rate": 1.5462472361487698e-06, + "loss": 0.5351, + "step": 12753 + }, + { + "epoch": 1.8793713163064834, + "grad_norm": 0.5902858376502991, + "learning_rate": 1.5458888720487747e-06, + "loss": 0.5119, + "step": 12754 + }, + { + "epoch": 1.8795186640471513, + "grad_norm": 0.6331484913825989, + "learning_rate": 1.5455305308947045e-06, + "loss": 0.5367, + "step": 12755 + }, + { + "epoch": 1.8796660117878192, + "grad_norm": 0.5802460312843323, + "learning_rate": 1.5451722126951776e-06, + "loss": 0.5311, + "step": 12756 + }, + { + "epoch": 1.8798133595284874, + "grad_norm": 0.6154000163078308, + "learning_rate": 1.544813917458811e-06, + "loss": 0.532, + "step": 12757 + }, + { + "epoch": 1.8799607072691553, + "grad_norm": 0.5901840925216675, + "learning_rate": 1.5444556451942216e-06, + "loss": 0.5331, + "step": 12758 + }, + { + "epoch": 1.8801080550098233, + "grad_norm": 0.6083796620368958, + "learning_rate": 1.544097395910026e-06, + "loss": 0.5138, + "step": 12759 + }, + { + "epoch": 1.8802554027504912, + "grad_norm": 0.6602314114570618, + "learning_rate": 1.5437391696148397e-06, + "loss": 0.509, + "step": 12760 + }, + { + "epoch": 1.8804027504911591, + "grad_norm": 0.6696207523345947, + "learning_rate": 1.5433809663172777e-06, + "loss": 0.5657, + "step": 12761 + }, + { + "epoch": 1.880550098231827, + "grad_norm": 0.6000041365623474, + "learning_rate": 1.5430227860259547e-06, + "loss": 0.5262, + "step": 12762 + }, + { + "epoch": 1.880697445972495, + "grad_norm": 0.6223084926605225, + "learning_rate": 1.5426646287494851e-06, + "loss": 0.5227, + "step": 12763 + }, + { + "epoch": 1.880844793713163, + "grad_norm": 0.6473931670188904, + "learning_rate": 1.5423064944964819e-06, + "loss": 0.5321, + "step": 12764 + }, + { + "epoch": 1.880992141453831, + "grad_norm": 0.6213973164558411, + "learning_rate": 1.5419483832755583e-06, + "loss": 0.5275, + "step": 12765 + }, + { + "epoch": 1.881139489194499, + "grad_norm": 0.6305712461471558, + "learning_rate": 1.5415902950953277e-06, + "loss": 0.5621, + "step": 12766 + }, + { + "epoch": 1.881286836935167, + "grad_norm": 0.6223016977310181, + "learning_rate": 1.5412322299644e-06, + "loss": 0.5495, + "step": 12767 + }, + { + "epoch": 1.8814341846758351, + "grad_norm": 0.5731018781661987, + "learning_rate": 1.5408741878913874e-06, + "loss": 0.4914, + "step": 12768 + }, + { + "epoch": 1.881581532416503, + "grad_norm": 0.5777367949485779, + "learning_rate": 1.540516168884901e-06, + "loss": 0.4863, + "step": 12769 + }, + { + "epoch": 1.881728880157171, + "grad_norm": 0.6074481010437012, + "learning_rate": 1.540158172953551e-06, + "loss": 0.5402, + "step": 12770 + }, + { + "epoch": 1.881876227897839, + "grad_norm": 0.609599232673645, + "learning_rate": 1.5398002001059462e-06, + "loss": 0.5214, + "step": 12771 + }, + { + "epoch": 1.8820235756385069, + "grad_norm": 0.6549091339111328, + "learning_rate": 1.5394422503506962e-06, + "loss": 0.5352, + "step": 12772 + }, + { + "epoch": 1.8821709233791748, + "grad_norm": 0.5940406918525696, + "learning_rate": 1.5390843236964097e-06, + "loss": 0.5388, + "step": 12773 + }, + { + "epoch": 1.8823182711198427, + "grad_norm": 0.6377359628677368, + "learning_rate": 1.538726420151695e-06, + "loss": 0.5318, + "step": 12774 + }, + { + "epoch": 1.8824656188605107, + "grad_norm": 0.6177948713302612, + "learning_rate": 1.5383685397251585e-06, + "loss": 0.4747, + "step": 12775 + }, + { + "epoch": 1.8826129666011788, + "grad_norm": 0.5789219737052917, + "learning_rate": 1.5380106824254076e-06, + "loss": 0.5449, + "step": 12776 + }, + { + "epoch": 1.8827603143418468, + "grad_norm": 0.5934888124465942, + "learning_rate": 1.5376528482610487e-06, + "loss": 0.5067, + "step": 12777 + }, + { + "epoch": 1.8829076620825147, + "grad_norm": 0.6289137005805969, + "learning_rate": 1.5372950372406876e-06, + "loss": 0.5456, + "step": 12778 + }, + { + "epoch": 1.8830550098231829, + "grad_norm": 0.5863431096076965, + "learning_rate": 1.5369372493729289e-06, + "loss": 0.5462, + "step": 12779 + }, + { + "epoch": 1.8832023575638508, + "grad_norm": 0.587670624256134, + "learning_rate": 1.5365794846663781e-06, + "loss": 0.5188, + "step": 12780 + }, + { + "epoch": 1.8833497053045187, + "grad_norm": 0.6187390685081482, + "learning_rate": 1.5362217431296386e-06, + "loss": 0.526, + "step": 12781 + }, + { + "epoch": 1.8834970530451867, + "grad_norm": 0.6107187867164612, + "learning_rate": 1.535864024771314e-06, + "loss": 0.5157, + "step": 12782 + }, + { + "epoch": 1.8836444007858546, + "grad_norm": 0.6065537929534912, + "learning_rate": 1.5355063296000078e-06, + "loss": 0.5434, + "step": 12783 + }, + { + "epoch": 1.8837917485265225, + "grad_norm": 0.6189431548118591, + "learning_rate": 1.5351486576243219e-06, + "loss": 0.4937, + "step": 12784 + }, + { + "epoch": 1.8839390962671905, + "grad_norm": 0.6343380212783813, + "learning_rate": 1.5347910088528579e-06, + "loss": 0.5212, + "step": 12785 + }, + { + "epoch": 1.8840864440078584, + "grad_norm": 0.5960689187049866, + "learning_rate": 1.5344333832942177e-06, + "loss": 0.5183, + "step": 12786 + }, + { + "epoch": 1.8842337917485266, + "grad_norm": 0.6096001863479614, + "learning_rate": 1.5340757809570016e-06, + "loss": 0.5273, + "step": 12787 + }, + { + "epoch": 1.8843811394891945, + "grad_norm": 0.5597547888755798, + "learning_rate": 1.5337182018498098e-06, + "loss": 0.5197, + "step": 12788 + }, + { + "epoch": 1.8845284872298624, + "grad_norm": 0.6028164029121399, + "learning_rate": 1.533360645981242e-06, + "loss": 0.5541, + "step": 12789 + }, + { + "epoch": 1.8846758349705306, + "grad_norm": 0.6022403240203857, + "learning_rate": 1.5330031133598975e-06, + "loss": 0.5575, + "step": 12790 + }, + { + "epoch": 1.8848231827111985, + "grad_norm": 0.6127487421035767, + "learning_rate": 1.5326456039943744e-06, + "loss": 0.5296, + "step": 12791 + }, + { + "epoch": 1.8849705304518665, + "grad_norm": 0.6614026427268982, + "learning_rate": 1.5322881178932711e-06, + "loss": 0.5332, + "step": 12792 + }, + { + "epoch": 1.8851178781925344, + "grad_norm": 0.6354072093963623, + "learning_rate": 1.5319306550651847e-06, + "loss": 0.532, + "step": 12793 + }, + { + "epoch": 1.8852652259332023, + "grad_norm": 0.6164027452468872, + "learning_rate": 1.5315732155187118e-06, + "loss": 0.5263, + "step": 12794 + }, + { + "epoch": 1.8854125736738703, + "grad_norm": 0.5935357213020325, + "learning_rate": 1.5312157992624488e-06, + "loss": 0.534, + "step": 12795 + }, + { + "epoch": 1.8855599214145382, + "grad_norm": 0.6383805274963379, + "learning_rate": 1.5308584063049917e-06, + "loss": 0.533, + "step": 12796 + }, + { + "epoch": 1.8857072691552061, + "grad_norm": 0.6125941872596741, + "learning_rate": 1.530501036654935e-06, + "loss": 0.5101, + "step": 12797 + }, + { + "epoch": 1.8858546168958743, + "grad_norm": 0.5539658665657043, + "learning_rate": 1.5301436903208741e-06, + "loss": 0.4947, + "step": 12798 + }, + { + "epoch": 1.8860019646365422, + "grad_norm": 0.6394781470298767, + "learning_rate": 1.529786367311402e-06, + "loss": 0.5307, + "step": 12799 + }, + { + "epoch": 1.8861493123772102, + "grad_norm": 0.6039510369300842, + "learning_rate": 1.5294290676351134e-06, + "loss": 0.5239, + "step": 12800 + }, + { + "epoch": 1.8862966601178783, + "grad_norm": 0.6623058915138245, + "learning_rate": 1.5290717913006e-06, + "loss": 0.4997, + "step": 12801 + }, + { + "epoch": 1.8864440078585463, + "grad_norm": 0.6305545568466187, + "learning_rate": 1.5287145383164548e-06, + "loss": 0.5518, + "step": 12802 + }, + { + "epoch": 1.8865913555992142, + "grad_norm": 0.6060226559638977, + "learning_rate": 1.5283573086912695e-06, + "loss": 0.5378, + "step": 12803 + }, + { + "epoch": 1.8867387033398821, + "grad_norm": 0.6260468363761902, + "learning_rate": 1.5280001024336349e-06, + "loss": 0.5164, + "step": 12804 + }, + { + "epoch": 1.88688605108055, + "grad_norm": 0.6434730291366577, + "learning_rate": 1.5276429195521422e-06, + "loss": 0.5233, + "step": 12805 + }, + { + "epoch": 1.887033398821218, + "grad_norm": 0.6084304451942444, + "learning_rate": 1.5272857600553811e-06, + "loss": 0.51, + "step": 12806 + }, + { + "epoch": 1.887180746561886, + "grad_norm": 0.6299970149993896, + "learning_rate": 1.5269286239519413e-06, + "loss": 0.537, + "step": 12807 + }, + { + "epoch": 1.8873280943025539, + "grad_norm": 0.6119669079780579, + "learning_rate": 1.5265715112504115e-06, + "loss": 0.5356, + "step": 12808 + }, + { + "epoch": 1.887475442043222, + "grad_norm": 0.6052073836326599, + "learning_rate": 1.5262144219593804e-06, + "loss": 0.4797, + "step": 12809 + }, + { + "epoch": 1.88762278978389, + "grad_norm": 0.604308545589447, + "learning_rate": 1.525857356087436e-06, + "loss": 0.5098, + "step": 12810 + }, + { + "epoch": 1.887770137524558, + "grad_norm": 0.6404770016670227, + "learning_rate": 1.525500313643165e-06, + "loss": 0.5362, + "step": 12811 + }, + { + "epoch": 1.887917485265226, + "grad_norm": 0.6130903959274292, + "learning_rate": 1.5251432946351546e-06, + "loss": 0.5269, + "step": 12812 + }, + { + "epoch": 1.888064833005894, + "grad_norm": 0.6040036082267761, + "learning_rate": 1.5247862990719903e-06, + "loss": 0.5477, + "step": 12813 + }, + { + "epoch": 1.888212180746562, + "grad_norm": 0.6435888409614563, + "learning_rate": 1.5244293269622585e-06, + "loss": 0.5154, + "step": 12814 + }, + { + "epoch": 1.8883595284872299, + "grad_norm": 0.5718441009521484, + "learning_rate": 1.5240723783145445e-06, + "loss": 0.5318, + "step": 12815 + }, + { + "epoch": 1.8885068762278978, + "grad_norm": 0.5927877426147461, + "learning_rate": 1.523715453137431e-06, + "loss": 0.5142, + "step": 12816 + }, + { + "epoch": 1.8886542239685657, + "grad_norm": 0.5800014138221741, + "learning_rate": 1.5233585514395034e-06, + "loss": 0.5067, + "step": 12817 + }, + { + "epoch": 1.8888015717092337, + "grad_norm": 0.6122204065322876, + "learning_rate": 1.523001673229344e-06, + "loss": 0.5883, + "step": 12818 + }, + { + "epoch": 1.8889489194499016, + "grad_norm": 0.6267737150192261, + "learning_rate": 1.5226448185155369e-06, + "loss": 0.538, + "step": 12819 + }, + { + "epoch": 1.8890962671905698, + "grad_norm": 0.6421999931335449, + "learning_rate": 1.522287987306663e-06, + "loss": 0.5377, + "step": 12820 + }, + { + "epoch": 1.8892436149312377, + "grad_norm": 0.5821121335029602, + "learning_rate": 1.5219311796113046e-06, + "loss": 0.4864, + "step": 12821 + }, + { + "epoch": 1.8893909626719056, + "grad_norm": 0.6600903868675232, + "learning_rate": 1.5215743954380424e-06, + "loss": 0.5264, + "step": 12822 + }, + { + "epoch": 1.8895383104125738, + "grad_norm": 0.6142587661743164, + "learning_rate": 1.521217634795457e-06, + "loss": 0.5199, + "step": 12823 + }, + { + "epoch": 1.8896856581532417, + "grad_norm": 0.6391143202781677, + "learning_rate": 1.5208608976921289e-06, + "loss": 0.5335, + "step": 12824 + }, + { + "epoch": 1.8898330058939097, + "grad_norm": 0.628871738910675, + "learning_rate": 1.5205041841366364e-06, + "loss": 0.5564, + "step": 12825 + }, + { + "epoch": 1.8899803536345776, + "grad_norm": 0.6024682521820068, + "learning_rate": 1.5201474941375583e-06, + "loss": 0.5307, + "step": 12826 + }, + { + "epoch": 1.8901277013752456, + "grad_norm": 0.5840508937835693, + "learning_rate": 1.519790827703474e-06, + "loss": 0.5429, + "step": 12827 + }, + { + "epoch": 1.8902750491159135, + "grad_norm": 0.5803223848342896, + "learning_rate": 1.5194341848429608e-06, + "loss": 0.5173, + "step": 12828 + }, + { + "epoch": 1.8904223968565814, + "grad_norm": 0.5805843472480774, + "learning_rate": 1.5190775655645956e-06, + "loss": 0.5088, + "step": 12829 + }, + { + "epoch": 1.8905697445972494, + "grad_norm": 0.6230022311210632, + "learning_rate": 1.5187209698769547e-06, + "loss": 0.527, + "step": 12830 + }, + { + "epoch": 1.8907170923379175, + "grad_norm": 0.6156206130981445, + "learning_rate": 1.5183643977886142e-06, + "loss": 0.5475, + "step": 12831 + }, + { + "epoch": 1.8908644400785855, + "grad_norm": 0.6281238198280334, + "learning_rate": 1.5180078493081498e-06, + "loss": 0.5251, + "step": 12832 + }, + { + "epoch": 1.8910117878192534, + "grad_norm": 0.5979703068733215, + "learning_rate": 1.5176513244441366e-06, + "loss": 0.5196, + "step": 12833 + }, + { + "epoch": 1.8911591355599215, + "grad_norm": 0.6343428492546082, + "learning_rate": 1.5172948232051474e-06, + "loss": 0.5344, + "step": 12834 + }, + { + "epoch": 1.8913064833005895, + "grad_norm": 0.6214479207992554, + "learning_rate": 1.5169383455997568e-06, + "loss": 0.5263, + "step": 12835 + }, + { + "epoch": 1.8914538310412574, + "grad_norm": 0.6182910203933716, + "learning_rate": 1.5165818916365381e-06, + "loss": 0.5089, + "step": 12836 + }, + { + "epoch": 1.8916011787819254, + "grad_norm": 0.5978158712387085, + "learning_rate": 1.5162254613240637e-06, + "loss": 0.5108, + "step": 12837 + }, + { + "epoch": 1.8917485265225933, + "grad_norm": 0.6312450766563416, + "learning_rate": 1.5158690546709053e-06, + "loss": 0.5272, + "step": 12838 + }, + { + "epoch": 1.8918958742632612, + "grad_norm": 0.6157163381576538, + "learning_rate": 1.5155126716856349e-06, + "loss": 0.5027, + "step": 12839 + }, + { + "epoch": 1.8920432220039292, + "grad_norm": 0.6305433511734009, + "learning_rate": 1.5151563123768226e-06, + "loss": 0.5175, + "step": 12840 + }, + { + "epoch": 1.892190569744597, + "grad_norm": 0.6141781806945801, + "learning_rate": 1.5147999767530392e-06, + "loss": 0.523, + "step": 12841 + }, + { + "epoch": 1.8923379174852653, + "grad_norm": 0.6428095698356628, + "learning_rate": 1.5144436648228544e-06, + "loss": 0.5283, + "step": 12842 + }, + { + "epoch": 1.8924852652259332, + "grad_norm": 0.6193917989730835, + "learning_rate": 1.5140873765948371e-06, + "loss": 0.5274, + "step": 12843 + }, + { + "epoch": 1.8926326129666011, + "grad_norm": 0.5882181525230408, + "learning_rate": 1.513731112077556e-06, + "loss": 0.529, + "step": 12844 + }, + { + "epoch": 1.8927799607072693, + "grad_norm": 0.6433449387550354, + "learning_rate": 1.5133748712795793e-06, + "loss": 0.5212, + "step": 12845 + }, + { + "epoch": 1.8929273084479372, + "grad_norm": 0.6007508635520935, + "learning_rate": 1.5130186542094739e-06, + "loss": 0.5192, + "step": 12846 + }, + { + "epoch": 1.8930746561886052, + "grad_norm": 0.6188454031944275, + "learning_rate": 1.512662460875807e-06, + "loss": 0.5255, + "step": 12847 + }, + { + "epoch": 1.893222003929273, + "grad_norm": 0.6201295852661133, + "learning_rate": 1.5123062912871448e-06, + "loss": 0.5193, + "step": 12848 + }, + { + "epoch": 1.893369351669941, + "grad_norm": 0.625614583492279, + "learning_rate": 1.511950145452053e-06, + "loss": 0.5026, + "step": 12849 + }, + { + "epoch": 1.893516699410609, + "grad_norm": 0.6196681261062622, + "learning_rate": 1.511594023379097e-06, + "loss": 0.5027, + "step": 12850 + }, + { + "epoch": 1.893664047151277, + "grad_norm": 0.611014723777771, + "learning_rate": 1.511237925076841e-06, + "loss": 0.5621, + "step": 12851 + }, + { + "epoch": 1.8938113948919448, + "grad_norm": 0.6252511739730835, + "learning_rate": 1.5108818505538492e-06, + "loss": 0.5329, + "step": 12852 + }, + { + "epoch": 1.893958742632613, + "grad_norm": 0.6149322986602783, + "learning_rate": 1.5105257998186847e-06, + "loss": 0.549, + "step": 12853 + }, + { + "epoch": 1.894106090373281, + "grad_norm": 0.6263691186904907, + "learning_rate": 1.5101697728799107e-06, + "loss": 0.5363, + "step": 12854 + }, + { + "epoch": 1.894253438113949, + "grad_norm": 0.6097862720489502, + "learning_rate": 1.5098137697460897e-06, + "loss": 0.5207, + "step": 12855 + }, + { + "epoch": 1.894400785854617, + "grad_norm": 0.6133122444152832, + "learning_rate": 1.5094577904257828e-06, + "loss": 0.4841, + "step": 12856 + }, + { + "epoch": 1.894548133595285, + "grad_norm": 0.6133166551589966, + "learning_rate": 1.5091018349275519e-06, + "loss": 0.4846, + "step": 12857 + }, + { + "epoch": 1.894695481335953, + "grad_norm": 0.6222600340843201, + "learning_rate": 1.5087459032599566e-06, + "loss": 0.5441, + "step": 12858 + }, + { + "epoch": 1.8948428290766208, + "grad_norm": 0.6848324537277222, + "learning_rate": 1.5083899954315578e-06, + "loss": 0.5292, + "step": 12859 + }, + { + "epoch": 1.8949901768172888, + "grad_norm": 0.5740510821342468, + "learning_rate": 1.5080341114509141e-06, + "loss": 0.4928, + "step": 12860 + }, + { + "epoch": 1.8951375245579567, + "grad_norm": 0.5910704731941223, + "learning_rate": 1.5076782513265854e-06, + "loss": 0.5405, + "step": 12861 + }, + { + "epoch": 1.8952848722986246, + "grad_norm": 0.634044885635376, + "learning_rate": 1.5073224150671287e-06, + "loss": 0.5274, + "step": 12862 + }, + { + "epoch": 1.8954322200392926, + "grad_norm": 0.6272875666618347, + "learning_rate": 1.5069666026811018e-06, + "loss": 0.5348, + "step": 12863 + }, + { + "epoch": 1.8955795677799607, + "grad_norm": 0.6150694489479065, + "learning_rate": 1.5066108141770633e-06, + "loss": 0.5316, + "step": 12864 + }, + { + "epoch": 1.8957269155206287, + "grad_norm": 0.5760290026664734, + "learning_rate": 1.5062550495635687e-06, + "loss": 0.5101, + "step": 12865 + }, + { + "epoch": 1.8958742632612968, + "grad_norm": 0.6008244752883911, + "learning_rate": 1.505899308849174e-06, + "loss": 0.5073, + "step": 12866 + }, + { + "epoch": 1.8960216110019648, + "grad_norm": 0.5945150852203369, + "learning_rate": 1.5055435920424344e-06, + "loss": 0.5262, + "step": 12867 + }, + { + "epoch": 1.8961689587426327, + "grad_norm": 0.6204687356948853, + "learning_rate": 1.5051878991519052e-06, + "loss": 0.5437, + "step": 12868 + }, + { + "epoch": 1.8963163064833006, + "grad_norm": 0.6145604252815247, + "learning_rate": 1.5048322301861404e-06, + "loss": 0.5615, + "step": 12869 + }, + { + "epoch": 1.8964636542239686, + "grad_norm": 0.6193618774414062, + "learning_rate": 1.5044765851536935e-06, + "loss": 0.5111, + "step": 12870 + }, + { + "epoch": 1.8966110019646365, + "grad_norm": 0.6070166230201721, + "learning_rate": 1.5041209640631183e-06, + "loss": 0.5175, + "step": 12871 + }, + { + "epoch": 1.8967583497053044, + "grad_norm": 0.600406289100647, + "learning_rate": 1.5037653669229665e-06, + "loss": 0.5017, + "step": 12872 + }, + { + "epoch": 1.8969056974459724, + "grad_norm": 0.6102761626243591, + "learning_rate": 1.50340979374179e-06, + "loss": 0.5105, + "step": 12873 + }, + { + "epoch": 1.8970530451866403, + "grad_norm": 0.6238556504249573, + "learning_rate": 1.5030542445281412e-06, + "loss": 0.5034, + "step": 12874 + }, + { + "epoch": 1.8972003929273085, + "grad_norm": 0.6132614016532898, + "learning_rate": 1.5026987192905701e-06, + "loss": 0.5406, + "step": 12875 + }, + { + "epoch": 1.8973477406679764, + "grad_norm": 0.6337457895278931, + "learning_rate": 1.5023432180376268e-06, + "loss": 0.5396, + "step": 12876 + }, + { + "epoch": 1.8974950884086446, + "grad_norm": 0.5834625959396362, + "learning_rate": 1.5019877407778612e-06, + "loss": 0.5107, + "step": 12877 + }, + { + "epoch": 1.8976424361493125, + "grad_norm": 0.8562626838684082, + "learning_rate": 1.5016322875198225e-06, + "loss": 0.5276, + "step": 12878 + }, + { + "epoch": 1.8977897838899804, + "grad_norm": 0.6157400608062744, + "learning_rate": 1.5012768582720588e-06, + "loss": 0.5498, + "step": 12879 + }, + { + "epoch": 1.8979371316306484, + "grad_norm": 0.6185735464096069, + "learning_rate": 1.5009214530431185e-06, + "loss": 0.5643, + "step": 12880 + }, + { + "epoch": 1.8980844793713163, + "grad_norm": 0.5923054218292236, + "learning_rate": 1.500566071841549e-06, + "loss": 0.5453, + "step": 12881 + }, + { + "epoch": 1.8982318271119842, + "grad_norm": 0.5868661999702454, + "learning_rate": 1.5002107146758962e-06, + "loss": 0.524, + "step": 12882 + }, + { + "epoch": 1.8983791748526522, + "grad_norm": 0.6098695397377014, + "learning_rate": 1.4998553815547067e-06, + "loss": 0.5411, + "step": 12883 + }, + { + "epoch": 1.89852652259332, + "grad_norm": 0.6003113389015198, + "learning_rate": 1.4995000724865267e-06, + "loss": 0.5184, + "step": 12884 + }, + { + "epoch": 1.8986738703339883, + "grad_norm": 0.6230581998825073, + "learning_rate": 1.4991447874799e-06, + "loss": 0.5663, + "step": 12885 + }, + { + "epoch": 1.8988212180746562, + "grad_norm": 0.6292345523834229, + "learning_rate": 1.4987895265433721e-06, + "loss": 0.5184, + "step": 12886 + }, + { + "epoch": 1.8989685658153241, + "grad_norm": 0.5634700059890747, + "learning_rate": 1.4984342896854865e-06, + "loss": 0.5416, + "step": 12887 + }, + { + "epoch": 1.8991159135559923, + "grad_norm": 0.6363692283630371, + "learning_rate": 1.4980790769147863e-06, + "loss": 0.4915, + "step": 12888 + }, + { + "epoch": 1.8992632612966602, + "grad_norm": 0.600461483001709, + "learning_rate": 1.4977238882398142e-06, + "loss": 0.4729, + "step": 12889 + }, + { + "epoch": 1.8994106090373282, + "grad_norm": 0.5882978439331055, + "learning_rate": 1.497368723669113e-06, + "loss": 0.5158, + "step": 12890 + }, + { + "epoch": 1.899557956777996, + "grad_norm": 0.6134586334228516, + "learning_rate": 1.4970135832112234e-06, + "loss": 0.5329, + "step": 12891 + }, + { + "epoch": 1.899705304518664, + "grad_norm": 0.610605001449585, + "learning_rate": 1.4966584668746868e-06, + "loss": 0.5424, + "step": 12892 + }, + { + "epoch": 1.899852652259332, + "grad_norm": 0.6060076355934143, + "learning_rate": 1.4963033746680434e-06, + "loss": 0.5172, + "step": 12893 + }, + { + "epoch": 1.9, + "grad_norm": 0.5811653137207031, + "learning_rate": 1.4959483065998332e-06, + "loss": 0.5345, + "step": 12894 + }, + { + "epoch": 1.9001473477406678, + "grad_norm": 0.6174764037132263, + "learning_rate": 1.4955932626785952e-06, + "loss": 0.5545, + "step": 12895 + }, + { + "epoch": 1.900294695481336, + "grad_norm": 0.5727139711380005, + "learning_rate": 1.495238242912868e-06, + "loss": 0.4911, + "step": 12896 + }, + { + "epoch": 1.900442043222004, + "grad_norm": 0.6058086156845093, + "learning_rate": 1.4948832473111901e-06, + "loss": 0.5422, + "step": 12897 + }, + { + "epoch": 1.9005893909626719, + "grad_norm": 0.5838003754615784, + "learning_rate": 1.4945282758820984e-06, + "loss": 0.521, + "step": 12898 + }, + { + "epoch": 1.90073673870334, + "grad_norm": 0.616416335105896, + "learning_rate": 1.4941733286341304e-06, + "loss": 0.5391, + "step": 12899 + }, + { + "epoch": 1.900884086444008, + "grad_norm": 0.6164499521255493, + "learning_rate": 1.4938184055758212e-06, + "loss": 0.5068, + "step": 12900 + }, + { + "epoch": 1.901031434184676, + "grad_norm": 0.6062665581703186, + "learning_rate": 1.4934635067157083e-06, + "loss": 0.5331, + "step": 12901 + }, + { + "epoch": 1.9011787819253438, + "grad_norm": 0.5989317893981934, + "learning_rate": 1.4931086320623257e-06, + "loss": 0.5064, + "step": 12902 + }, + { + "epoch": 1.9013261296660118, + "grad_norm": 0.6124863028526306, + "learning_rate": 1.4927537816242083e-06, + "loss": 0.5356, + "step": 12903 + }, + { + "epoch": 1.9014734774066797, + "grad_norm": 0.61485356092453, + "learning_rate": 1.49239895540989e-06, + "loss": 0.5031, + "step": 12904 + }, + { + "epoch": 1.9016208251473476, + "grad_norm": 0.5993887782096863, + "learning_rate": 1.492044153427904e-06, + "loss": 0.5051, + "step": 12905 + }, + { + "epoch": 1.9017681728880156, + "grad_norm": 0.6003915071487427, + "learning_rate": 1.491689375686784e-06, + "loss": 0.5262, + "step": 12906 + }, + { + "epoch": 1.9019155206286837, + "grad_norm": 0.5706369876861572, + "learning_rate": 1.491334622195061e-06, + "loss": 0.5215, + "step": 12907 + }, + { + "epoch": 1.9020628683693517, + "grad_norm": 0.6213664412498474, + "learning_rate": 1.4909798929612673e-06, + "loss": 0.5246, + "step": 12908 + }, + { + "epoch": 1.9022102161100196, + "grad_norm": 0.5912420749664307, + "learning_rate": 1.4906251879939343e-06, + "loss": 0.5266, + "step": 12909 + }, + { + "epoch": 1.9023575638506878, + "grad_norm": 0.6556861996650696, + "learning_rate": 1.4902705073015916e-06, + "loss": 0.5069, + "step": 12910 + }, + { + "epoch": 1.9025049115913557, + "grad_norm": 0.6028760671615601, + "learning_rate": 1.48991585089277e-06, + "loss": 0.5658, + "step": 12911 + }, + { + "epoch": 1.9026522593320236, + "grad_norm": 0.5778571367263794, + "learning_rate": 1.4895612187759983e-06, + "loss": 0.5045, + "step": 12912 + }, + { + "epoch": 1.9027996070726916, + "grad_norm": 0.6129018664360046, + "learning_rate": 1.4892066109598051e-06, + "loss": 0.5388, + "step": 12913 + }, + { + "epoch": 1.9029469548133595, + "grad_norm": 0.5955844521522522, + "learning_rate": 1.4888520274527194e-06, + "loss": 0.5482, + "step": 12914 + }, + { + "epoch": 1.9030943025540275, + "grad_norm": 0.6125480532646179, + "learning_rate": 1.4884974682632677e-06, + "loss": 0.5414, + "step": 12915 + }, + { + "epoch": 1.9032416502946954, + "grad_norm": 0.6064927577972412, + "learning_rate": 1.4881429333999778e-06, + "loss": 0.5458, + "step": 12916 + }, + { + "epoch": 1.9033889980353633, + "grad_norm": 0.596318244934082, + "learning_rate": 1.4877884228713757e-06, + "loss": 0.521, + "step": 12917 + }, + { + "epoch": 1.9035363457760315, + "grad_norm": 0.5730981826782227, + "learning_rate": 1.487433936685987e-06, + "loss": 0.5285, + "step": 12918 + }, + { + "epoch": 1.9036836935166994, + "grad_norm": 0.613100528717041, + "learning_rate": 1.4870794748523373e-06, + "loss": 0.5141, + "step": 12919 + }, + { + "epoch": 1.9038310412573674, + "grad_norm": 0.5939289331436157, + "learning_rate": 1.4867250373789513e-06, + "loss": 0.5438, + "step": 12920 + }, + { + "epoch": 1.9039783889980355, + "grad_norm": 0.5781876444816589, + "learning_rate": 1.4863706242743527e-06, + "loss": 0.5029, + "step": 12921 + }, + { + "epoch": 1.9041257367387034, + "grad_norm": 0.6026217341423035, + "learning_rate": 1.4860162355470653e-06, + "loss": 0.5364, + "step": 12922 + }, + { + "epoch": 1.9042730844793714, + "grad_norm": 0.6209302544593811, + "learning_rate": 1.485661871205612e-06, + "loss": 0.5045, + "step": 12923 + }, + { + "epoch": 1.9044204322200393, + "grad_norm": 0.6104133725166321, + "learning_rate": 1.4853075312585148e-06, + "loss": 0.4892, + "step": 12924 + }, + { + "epoch": 1.9045677799607073, + "grad_norm": 0.5828790068626404, + "learning_rate": 1.4849532157142955e-06, + "loss": 0.5347, + "step": 12925 + }, + { + "epoch": 1.9047151277013752, + "grad_norm": 0.647014319896698, + "learning_rate": 1.484598924581475e-06, + "loss": 0.5267, + "step": 12926 + }, + { + "epoch": 1.9048624754420431, + "grad_norm": 0.6078020930290222, + "learning_rate": 1.4842446578685743e-06, + "loss": 0.5074, + "step": 12927 + }, + { + "epoch": 1.905009823182711, + "grad_norm": 0.6029396653175354, + "learning_rate": 1.4838904155841132e-06, + "loss": 0.4818, + "step": 12928 + }, + { + "epoch": 1.9051571709233792, + "grad_norm": 0.6135491728782654, + "learning_rate": 1.4835361977366114e-06, + "loss": 0.5098, + "step": 12929 + }, + { + "epoch": 1.9053045186640472, + "grad_norm": 0.6237752437591553, + "learning_rate": 1.4831820043345873e-06, + "loss": 0.5025, + "step": 12930 + }, + { + "epoch": 1.905451866404715, + "grad_norm": 0.5814804434776306, + "learning_rate": 1.4828278353865588e-06, + "loss": 0.5041, + "step": 12931 + }, + { + "epoch": 1.9055992141453832, + "grad_norm": 0.5711829662322998, + "learning_rate": 1.482473690901044e-06, + "loss": 0.4829, + "step": 12932 + }, + { + "epoch": 1.9057465618860512, + "grad_norm": 0.6032673120498657, + "learning_rate": 1.4821195708865593e-06, + "loss": 0.5044, + "step": 12933 + }, + { + "epoch": 1.9058939096267191, + "grad_norm": 0.6017781496047974, + "learning_rate": 1.4817654753516213e-06, + "loss": 0.5143, + "step": 12934 + }, + { + "epoch": 1.906041257367387, + "grad_norm": 0.6325311660766602, + "learning_rate": 1.4814114043047468e-06, + "loss": 0.5344, + "step": 12935 + }, + { + "epoch": 1.906188605108055, + "grad_norm": 0.5918756723403931, + "learning_rate": 1.4810573577544496e-06, + "loss": 0.5256, + "step": 12936 + }, + { + "epoch": 1.906335952848723, + "grad_norm": 0.6193375587463379, + "learning_rate": 1.4807033357092444e-06, + "loss": 0.5371, + "step": 12937 + }, + { + "epoch": 1.9064833005893909, + "grad_norm": 0.6124848127365112, + "learning_rate": 1.4803493381776468e-06, + "loss": 0.5223, + "step": 12938 + }, + { + "epoch": 1.9066306483300588, + "grad_norm": 0.6466457843780518, + "learning_rate": 1.4799953651681691e-06, + "loss": 0.5748, + "step": 12939 + }, + { + "epoch": 1.906777996070727, + "grad_norm": 0.6070420742034912, + "learning_rate": 1.4796414166893246e-06, + "loss": 0.5119, + "step": 12940 + }, + { + "epoch": 1.906925343811395, + "grad_norm": 0.5903626084327698, + "learning_rate": 1.479287492749626e-06, + "loss": 0.5001, + "step": 12941 + }, + { + "epoch": 1.9070726915520628, + "grad_norm": 0.5772653222084045, + "learning_rate": 1.4789335933575838e-06, + "loss": 0.5325, + "step": 12942 + }, + { + "epoch": 1.907220039292731, + "grad_norm": 0.5906093716621399, + "learning_rate": 1.4785797185217099e-06, + "loss": 0.502, + "step": 12943 + }, + { + "epoch": 1.907367387033399, + "grad_norm": 0.6167232394218445, + "learning_rate": 1.4782258682505146e-06, + "loss": 0.5103, + "step": 12944 + }, + { + "epoch": 1.9075147347740669, + "grad_norm": 0.5763007402420044, + "learning_rate": 1.4778720425525085e-06, + "loss": 0.4779, + "step": 12945 + }, + { + "epoch": 1.9076620825147348, + "grad_norm": 0.6213579177856445, + "learning_rate": 1.4775182414362e-06, + "loss": 0.4924, + "step": 12946 + }, + { + "epoch": 1.9078094302554027, + "grad_norm": 0.5820359587669373, + "learning_rate": 1.477164464910098e-06, + "loss": 0.5266, + "step": 12947 + }, + { + "epoch": 1.9079567779960707, + "grad_norm": 0.6367177963256836, + "learning_rate": 1.4768107129827114e-06, + "loss": 0.5353, + "step": 12948 + }, + { + "epoch": 1.9081041257367386, + "grad_norm": 0.6268755197525024, + "learning_rate": 1.4764569856625472e-06, + "loss": 0.558, + "step": 12949 + }, + { + "epoch": 1.9082514734774065, + "grad_norm": 0.6239703893661499, + "learning_rate": 1.4761032829581118e-06, + "loss": 0.5177, + "step": 12950 + }, + { + "epoch": 1.9083988212180747, + "grad_norm": 0.604188859462738, + "learning_rate": 1.4757496048779127e-06, + "loss": 0.5232, + "step": 12951 + }, + { + "epoch": 1.9085461689587426, + "grad_norm": 0.596611738204956, + "learning_rate": 1.475395951430455e-06, + "loss": 0.526, + "step": 12952 + }, + { + "epoch": 1.9086935166994106, + "grad_norm": 0.5796675086021423, + "learning_rate": 1.4750423226242445e-06, + "loss": 0.5298, + "step": 12953 + }, + { + "epoch": 1.9088408644400787, + "grad_norm": 0.6129311323165894, + "learning_rate": 1.474688718467785e-06, + "loss": 0.5261, + "step": 12954 + }, + { + "epoch": 1.9089882121807467, + "grad_norm": 0.6465165019035339, + "learning_rate": 1.4743351389695808e-06, + "loss": 0.471, + "step": 12955 + }, + { + "epoch": 1.9091355599214146, + "grad_norm": 0.5966616272926331, + "learning_rate": 1.4739815841381355e-06, + "loss": 0.5289, + "step": 12956 + }, + { + "epoch": 1.9092829076620825, + "grad_norm": 0.6005702614784241, + "learning_rate": 1.473628053981952e-06, + "loss": 0.5182, + "step": 12957 + }, + { + "epoch": 1.9094302554027505, + "grad_norm": 0.6141689419746399, + "learning_rate": 1.4732745485095324e-06, + "loss": 0.517, + "step": 12958 + }, + { + "epoch": 1.9095776031434184, + "grad_norm": 0.608485996723175, + "learning_rate": 1.4729210677293784e-06, + "loss": 0.4967, + "step": 12959 + }, + { + "epoch": 1.9097249508840863, + "grad_norm": 0.5900144577026367, + "learning_rate": 1.4725676116499909e-06, + "loss": 0.5329, + "step": 12960 + }, + { + "epoch": 1.9098722986247543, + "grad_norm": 0.5823460221290588, + "learning_rate": 1.4722141802798705e-06, + "loss": 0.5185, + "step": 12961 + }, + { + "epoch": 1.9100196463654224, + "grad_norm": 0.5954617857933044, + "learning_rate": 1.471860773627517e-06, + "loss": 0.5168, + "step": 12962 + }, + { + "epoch": 1.9101669941060904, + "grad_norm": 0.5845192074775696, + "learning_rate": 1.4715073917014295e-06, + "loss": 0.5178, + "step": 12963 + }, + { + "epoch": 1.9103143418467583, + "grad_norm": 0.6392862796783447, + "learning_rate": 1.4711540345101073e-06, + "loss": 0.5214, + "step": 12964 + }, + { + "epoch": 1.9104616895874265, + "grad_norm": 0.5953618288040161, + "learning_rate": 1.4708007020620475e-06, + "loss": 0.524, + "step": 12965 + }, + { + "epoch": 1.9106090373280944, + "grad_norm": 0.5929054021835327, + "learning_rate": 1.4704473943657483e-06, + "loss": 0.5333, + "step": 12966 + }, + { + "epoch": 1.9107563850687623, + "grad_norm": 0.6136192083358765, + "learning_rate": 1.4700941114297062e-06, + "loss": 0.5366, + "step": 12967 + }, + { + "epoch": 1.9109037328094303, + "grad_norm": 0.5905357599258423, + "learning_rate": 1.469740853262418e-06, + "loss": 0.5284, + "step": 12968 + }, + { + "epoch": 1.9110510805500982, + "grad_norm": 0.6358411908149719, + "learning_rate": 1.4693876198723789e-06, + "loss": 0.5298, + "step": 12969 + }, + { + "epoch": 1.9111984282907661, + "grad_norm": 0.5859122276306152, + "learning_rate": 1.469034411268084e-06, + "loss": 0.544, + "step": 12970 + }, + { + "epoch": 1.911345776031434, + "grad_norm": 0.6121860146522522, + "learning_rate": 1.4686812274580282e-06, + "loss": 0.5017, + "step": 12971 + }, + { + "epoch": 1.911493123772102, + "grad_norm": 0.6104930639266968, + "learning_rate": 1.4683280684507051e-06, + "loss": 0.5198, + "step": 12972 + }, + { + "epoch": 1.9116404715127702, + "grad_norm": 0.6057182550430298, + "learning_rate": 1.4679749342546078e-06, + "loss": 0.5237, + "step": 12973 + }, + { + "epoch": 1.911787819253438, + "grad_norm": 0.6150299906730652, + "learning_rate": 1.4676218248782293e-06, + "loss": 0.5369, + "step": 12974 + }, + { + "epoch": 1.911935166994106, + "grad_norm": 0.6098352670669556, + "learning_rate": 1.4672687403300617e-06, + "loss": 0.5348, + "step": 12975 + }, + { + "epoch": 1.9120825147347742, + "grad_norm": 0.6266268491744995, + "learning_rate": 1.4669156806185969e-06, + "loss": 0.5315, + "step": 12976 + }, + { + "epoch": 1.9122298624754421, + "grad_norm": 0.6031677722930908, + "learning_rate": 1.4665626457523251e-06, + "loss": 0.5184, + "step": 12977 + }, + { + "epoch": 1.91237721021611, + "grad_norm": 0.6185811161994934, + "learning_rate": 1.4662096357397371e-06, + "loss": 0.5354, + "step": 12978 + }, + { + "epoch": 1.912524557956778, + "grad_norm": 0.6154536008834839, + "learning_rate": 1.4658566505893226e-06, + "loss": 0.5548, + "step": 12979 + }, + { + "epoch": 1.912671905697446, + "grad_norm": 0.6084572672843933, + "learning_rate": 1.4655036903095705e-06, + "loss": 0.5115, + "step": 12980 + }, + { + "epoch": 1.9128192534381139, + "grad_norm": 0.600322425365448, + "learning_rate": 1.4651507549089693e-06, + "loss": 0.4819, + "step": 12981 + }, + { + "epoch": 1.9129666011787818, + "grad_norm": 0.6126863360404968, + "learning_rate": 1.4647978443960075e-06, + "loss": 0.5425, + "step": 12982 + }, + { + "epoch": 1.9131139489194497, + "grad_norm": 0.6310728192329407, + "learning_rate": 1.4644449587791716e-06, + "loss": 0.4994, + "step": 12983 + }, + { + "epoch": 1.913261296660118, + "grad_norm": 0.6242693066596985, + "learning_rate": 1.4640920980669487e-06, + "loss": 0.5085, + "step": 12984 + }, + { + "epoch": 1.9134086444007858, + "grad_norm": 0.5573834180831909, + "learning_rate": 1.4637392622678254e-06, + "loss": 0.5178, + "step": 12985 + }, + { + "epoch": 1.9135559921414538, + "grad_norm": 0.6088093519210815, + "learning_rate": 1.4633864513902864e-06, + "loss": 0.5027, + "step": 12986 + }, + { + "epoch": 1.913703339882122, + "grad_norm": 0.6682909727096558, + "learning_rate": 1.4630336654428172e-06, + "loss": 0.5119, + "step": 12987 + }, + { + "epoch": 1.9138506876227899, + "grad_norm": 0.6533017158508301, + "learning_rate": 1.4626809044339021e-06, + "loss": 0.507, + "step": 12988 + }, + { + "epoch": 1.9139980353634578, + "grad_norm": 0.5754474401473999, + "learning_rate": 1.4623281683720248e-06, + "loss": 0.54, + "step": 12989 + }, + { + "epoch": 1.9141453831041257, + "grad_norm": 0.6127025485038757, + "learning_rate": 1.461975457265668e-06, + "loss": 0.5296, + "step": 12990 + }, + { + "epoch": 1.9142927308447937, + "grad_norm": 0.6058083772659302, + "learning_rate": 1.4616227711233147e-06, + "loss": 0.52, + "step": 12991 + }, + { + "epoch": 1.9144400785854616, + "grad_norm": 0.5577771067619324, + "learning_rate": 1.4612701099534465e-06, + "loss": 0.5031, + "step": 12992 + }, + { + "epoch": 1.9145874263261295, + "grad_norm": 0.6266824007034302, + "learning_rate": 1.4609174737645453e-06, + "loss": 0.5398, + "step": 12993 + }, + { + "epoch": 1.9147347740667975, + "grad_norm": 0.6167150139808655, + "learning_rate": 1.4605648625650915e-06, + "loss": 0.5155, + "step": 12994 + }, + { + "epoch": 1.9148821218074656, + "grad_norm": 0.6126610636711121, + "learning_rate": 1.460212276363565e-06, + "loss": 0.5145, + "step": 12995 + }, + { + "epoch": 1.9150294695481336, + "grad_norm": 0.6205779314041138, + "learning_rate": 1.4598597151684463e-06, + "loss": 0.5268, + "step": 12996 + }, + { + "epoch": 1.9151768172888017, + "grad_norm": 0.5941891670227051, + "learning_rate": 1.4595071789882132e-06, + "loss": 0.4944, + "step": 12997 + }, + { + "epoch": 1.9153241650294697, + "grad_norm": 0.6937230229377747, + "learning_rate": 1.4591546678313435e-06, + "loss": 0.4949, + "step": 12998 + }, + { + "epoch": 1.9154715127701376, + "grad_norm": 0.6409006118774414, + "learning_rate": 1.4588021817063171e-06, + "loss": 0.5124, + "step": 12999 + }, + { + "epoch": 1.9156188605108055, + "grad_norm": 0.6287356019020081, + "learning_rate": 1.458449720621609e-06, + "loss": 0.5317, + "step": 13000 + }, + { + "epoch": 1.9157662082514735, + "grad_norm": 0.632697582244873, + "learning_rate": 1.4580972845856972e-06, + "loss": 0.5131, + "step": 13001 + }, + { + "epoch": 1.9159135559921414, + "grad_norm": 0.6167992353439331, + "learning_rate": 1.4577448736070565e-06, + "loss": 0.5302, + "step": 13002 + }, + { + "epoch": 1.9160609037328094, + "grad_norm": 0.6028645038604736, + "learning_rate": 1.4573924876941636e-06, + "loss": 0.5359, + "step": 13003 + }, + { + "epoch": 1.9162082514734773, + "grad_norm": 0.5809696316719055, + "learning_rate": 1.4570401268554913e-06, + "loss": 0.5433, + "step": 13004 + }, + { + "epoch": 1.9163555992141452, + "grad_norm": 0.5930546522140503, + "learning_rate": 1.4566877910995158e-06, + "loss": 0.5255, + "step": 13005 + }, + { + "epoch": 1.9165029469548134, + "grad_norm": 0.6352711319923401, + "learning_rate": 1.4563354804347084e-06, + "loss": 0.5466, + "step": 13006 + }, + { + "epoch": 1.9166502946954813, + "grad_norm": 0.6288111209869385, + "learning_rate": 1.4559831948695445e-06, + "loss": 0.5661, + "step": 13007 + }, + { + "epoch": 1.9167976424361495, + "grad_norm": 0.5920986533164978, + "learning_rate": 1.4556309344124937e-06, + "loss": 0.5241, + "step": 13008 + }, + { + "epoch": 1.9169449901768174, + "grad_norm": 0.6180787086486816, + "learning_rate": 1.4552786990720302e-06, + "loss": 0.5081, + "step": 13009 + }, + { + "epoch": 1.9170923379174853, + "grad_norm": 0.6235425472259521, + "learning_rate": 1.4549264888566233e-06, + "loss": 0.5331, + "step": 13010 + }, + { + "epoch": 1.9172396856581533, + "grad_norm": 0.6203895211219788, + "learning_rate": 1.454574303774744e-06, + "loss": 0.5002, + "step": 13011 + }, + { + "epoch": 1.9173870333988212, + "grad_norm": 0.6309157609939575, + "learning_rate": 1.4542221438348631e-06, + "loss": 0.5061, + "step": 13012 + }, + { + "epoch": 1.9175343811394892, + "grad_norm": 0.5950527191162109, + "learning_rate": 1.4538700090454483e-06, + "loss": 0.5127, + "step": 13013 + }, + { + "epoch": 1.917681728880157, + "grad_norm": 0.5581483840942383, + "learning_rate": 1.4535178994149705e-06, + "loss": 0.5322, + "step": 13014 + }, + { + "epoch": 1.917829076620825, + "grad_norm": 0.5749949216842651, + "learning_rate": 1.4531658149518952e-06, + "loss": 0.516, + "step": 13015 + }, + { + "epoch": 1.917976424361493, + "grad_norm": 0.6003997921943665, + "learning_rate": 1.4528137556646914e-06, + "loss": 0.5234, + "step": 13016 + }, + { + "epoch": 1.9181237721021611, + "grad_norm": 0.5709144473075867, + "learning_rate": 1.4524617215618253e-06, + "loss": 0.5467, + "step": 13017 + }, + { + "epoch": 1.918271119842829, + "grad_norm": 0.6981947422027588, + "learning_rate": 1.4521097126517644e-06, + "loss": 0.5322, + "step": 13018 + }, + { + "epoch": 1.9184184675834972, + "grad_norm": 0.5972132682800293, + "learning_rate": 1.4517577289429723e-06, + "loss": 0.4974, + "step": 13019 + }, + { + "epoch": 1.9185658153241651, + "grad_norm": 0.5900067687034607, + "learning_rate": 1.4514057704439162e-06, + "loss": 0.5067, + "step": 13020 + }, + { + "epoch": 1.918713163064833, + "grad_norm": 0.6083065867424011, + "learning_rate": 1.4510538371630584e-06, + "loss": 0.5266, + "step": 13021 + }, + { + "epoch": 1.918860510805501, + "grad_norm": 0.5903234481811523, + "learning_rate": 1.450701929108865e-06, + "loss": 0.5215, + "step": 13022 + }, + { + "epoch": 1.919007858546169, + "grad_norm": 0.6025954484939575, + "learning_rate": 1.450350046289797e-06, + "loss": 0.5227, + "step": 13023 + }, + { + "epoch": 1.919155206286837, + "grad_norm": 0.62913578748703, + "learning_rate": 1.449998188714319e-06, + "loss": 0.5294, + "step": 13024 + }, + { + "epoch": 1.9193025540275048, + "grad_norm": 0.5886679291725159, + "learning_rate": 1.4496463563908914e-06, + "loss": 0.5082, + "step": 13025 + }, + { + "epoch": 1.9194499017681728, + "grad_norm": 0.621342122554779, + "learning_rate": 1.449294549327977e-06, + "loss": 0.5393, + "step": 13026 + }, + { + "epoch": 1.919597249508841, + "grad_norm": 0.640922486782074, + "learning_rate": 1.4489427675340348e-06, + "loss": 0.5494, + "step": 13027 + }, + { + "epoch": 1.9197445972495089, + "grad_norm": 0.6390912532806396, + "learning_rate": 1.4485910110175271e-06, + "loss": 0.5337, + "step": 13028 + }, + { + "epoch": 1.9198919449901768, + "grad_norm": 0.6208292841911316, + "learning_rate": 1.4482392797869127e-06, + "loss": 0.5491, + "step": 13029 + }, + { + "epoch": 1.920039292730845, + "grad_norm": 0.6045359373092651, + "learning_rate": 1.447887573850649e-06, + "loss": 0.5251, + "step": 13030 + }, + { + "epoch": 1.9201866404715129, + "grad_norm": 0.6215729117393494, + "learning_rate": 1.4475358932171968e-06, + "loss": 0.513, + "step": 13031 + }, + { + "epoch": 1.9203339882121808, + "grad_norm": 0.5957313179969788, + "learning_rate": 1.4471842378950118e-06, + "loss": 0.5076, + "step": 13032 + }, + { + "epoch": 1.9204813359528488, + "grad_norm": 0.6297581195831299, + "learning_rate": 1.446832607892553e-06, + "loss": 0.5267, + "step": 13033 + }, + { + "epoch": 1.9206286836935167, + "grad_norm": 0.6153076887130737, + "learning_rate": 1.446481003218275e-06, + "loss": 0.5755, + "step": 13034 + }, + { + "epoch": 1.9207760314341846, + "grad_norm": 0.5920471549034119, + "learning_rate": 1.4461294238806355e-06, + "loss": 0.4933, + "step": 13035 + }, + { + "epoch": 1.9209233791748526, + "grad_norm": 0.639594316482544, + "learning_rate": 1.4457778698880887e-06, + "loss": 0.5156, + "step": 13036 + }, + { + "epoch": 1.9210707269155205, + "grad_norm": 0.5949198007583618, + "learning_rate": 1.44542634124909e-06, + "loss": 0.5313, + "step": 13037 + }, + { + "epoch": 1.9212180746561887, + "grad_norm": 0.6270139813423157, + "learning_rate": 1.4450748379720925e-06, + "loss": 0.5347, + "step": 13038 + }, + { + "epoch": 1.9213654223968566, + "grad_norm": 0.6044071912765503, + "learning_rate": 1.4447233600655514e-06, + "loss": 0.522, + "step": 13039 + }, + { + "epoch": 1.9215127701375245, + "grad_norm": 0.6127368807792664, + "learning_rate": 1.4443719075379175e-06, + "loss": 0.5083, + "step": 13040 + }, + { + "epoch": 1.9216601178781927, + "grad_norm": 0.631316065788269, + "learning_rate": 1.4440204803976452e-06, + "loss": 0.533, + "step": 13041 + }, + { + "epoch": 1.9218074656188606, + "grad_norm": 0.6216622591018677, + "learning_rate": 1.4436690786531836e-06, + "loss": 0.5202, + "step": 13042 + }, + { + "epoch": 1.9219548133595286, + "grad_norm": 0.5971319079399109, + "learning_rate": 1.4433177023129868e-06, + "loss": 0.5169, + "step": 13043 + }, + { + "epoch": 1.9221021611001965, + "grad_norm": 0.596314013004303, + "learning_rate": 1.4429663513855025e-06, + "loss": 0.511, + "step": 13044 + }, + { + "epoch": 1.9222495088408644, + "grad_norm": 0.6189841032028198, + "learning_rate": 1.4426150258791824e-06, + "loss": 0.522, + "step": 13045 + }, + { + "epoch": 1.9223968565815324, + "grad_norm": 0.6092084646224976, + "learning_rate": 1.442263725802474e-06, + "loss": 0.5423, + "step": 13046 + }, + { + "epoch": 1.9225442043222003, + "grad_norm": 0.6016453504562378, + "learning_rate": 1.4419124511638273e-06, + "loss": 0.5123, + "step": 13047 + }, + { + "epoch": 1.9226915520628682, + "grad_norm": 0.601157009601593, + "learning_rate": 1.4415612019716904e-06, + "loss": 0.5087, + "step": 13048 + }, + { + "epoch": 1.9228388998035364, + "grad_norm": 0.5937336683273315, + "learning_rate": 1.4412099782345096e-06, + "loss": 0.5101, + "step": 13049 + }, + { + "epoch": 1.9229862475442043, + "grad_norm": 0.6155300140380859, + "learning_rate": 1.4408587799607326e-06, + "loss": 0.5142, + "step": 13050 + }, + { + "epoch": 1.9231335952848723, + "grad_norm": 0.6000258922576904, + "learning_rate": 1.4405076071588047e-06, + "loss": 0.5139, + "step": 13051 + }, + { + "epoch": 1.9232809430255404, + "grad_norm": 0.6067041754722595, + "learning_rate": 1.4401564598371728e-06, + "loss": 0.5289, + "step": 13052 + }, + { + "epoch": 1.9234282907662084, + "grad_norm": 0.5974053740501404, + "learning_rate": 1.43980533800428e-06, + "loss": 0.5444, + "step": 13053 + }, + { + "epoch": 1.9235756385068763, + "grad_norm": 0.608201265335083, + "learning_rate": 1.4394542416685725e-06, + "loss": 0.4873, + "step": 13054 + }, + { + "epoch": 1.9237229862475442, + "grad_norm": 0.6318696141242981, + "learning_rate": 1.4391031708384917e-06, + "loss": 0.5154, + "step": 13055 + }, + { + "epoch": 1.9238703339882122, + "grad_norm": 0.6323356628417969, + "learning_rate": 1.4387521255224833e-06, + "loss": 0.5243, + "step": 13056 + }, + { + "epoch": 1.92401768172888, + "grad_norm": 0.6194377541542053, + "learning_rate": 1.4384011057289878e-06, + "loss": 0.5086, + "step": 13057 + }, + { + "epoch": 1.924165029469548, + "grad_norm": 0.6124513745307922, + "learning_rate": 1.4380501114664486e-06, + "loss": 0.5542, + "step": 13058 + }, + { + "epoch": 1.924312377210216, + "grad_norm": 0.6106680035591125, + "learning_rate": 1.4376991427433052e-06, + "loss": 0.5352, + "step": 13059 + }, + { + "epoch": 1.9244597249508841, + "grad_norm": 0.5972914099693298, + "learning_rate": 1.437348199568e-06, + "loss": 0.5223, + "step": 13060 + }, + { + "epoch": 1.924607072691552, + "grad_norm": 0.6263208389282227, + "learning_rate": 1.4369972819489714e-06, + "loss": 0.542, + "step": 13061 + }, + { + "epoch": 1.92475442043222, + "grad_norm": 0.6240456700325012, + "learning_rate": 1.4366463898946603e-06, + "loss": 0.4941, + "step": 13062 + }, + { + "epoch": 1.9249017681728882, + "grad_norm": 0.6119546294212341, + "learning_rate": 1.436295523413504e-06, + "loss": 0.5284, + "step": 13063 + }, + { + "epoch": 1.925049115913556, + "grad_norm": 0.6038997173309326, + "learning_rate": 1.4359446825139425e-06, + "loss": 0.547, + "step": 13064 + }, + { + "epoch": 1.925196463654224, + "grad_norm": 0.6295552253723145, + "learning_rate": 1.4355938672044112e-06, + "loss": 0.4994, + "step": 13065 + }, + { + "epoch": 1.925343811394892, + "grad_norm": 0.6189745664596558, + "learning_rate": 1.4352430774933494e-06, + "loss": 0.5354, + "step": 13066 + }, + { + "epoch": 1.92549115913556, + "grad_norm": 0.6181761622428894, + "learning_rate": 1.4348923133891907e-06, + "loss": 0.5285, + "step": 13067 + }, + { + "epoch": 1.9256385068762278, + "grad_norm": 0.5952091813087463, + "learning_rate": 1.4345415749003738e-06, + "loss": 0.5411, + "step": 13068 + }, + { + "epoch": 1.9257858546168958, + "grad_norm": 0.5855717658996582, + "learning_rate": 1.4341908620353309e-06, + "loss": 0.5225, + "step": 13069 + }, + { + "epoch": 1.9259332023575637, + "grad_norm": 0.6029735207557678, + "learning_rate": 1.4338401748024991e-06, + "loss": 0.5308, + "step": 13070 + }, + { + "epoch": 1.9260805500982319, + "grad_norm": 0.5976906418800354, + "learning_rate": 1.43348951321031e-06, + "loss": 0.5125, + "step": 13071 + }, + { + "epoch": 1.9262278978388998, + "grad_norm": 0.5913706421852112, + "learning_rate": 1.4331388772671989e-06, + "loss": 0.5364, + "step": 13072 + }, + { + "epoch": 1.9263752455795677, + "grad_norm": 0.6355464458465576, + "learning_rate": 1.4327882669815963e-06, + "loss": 0.5043, + "step": 13073 + }, + { + "epoch": 1.926522593320236, + "grad_norm": 0.5969071984291077, + "learning_rate": 1.4324376823619363e-06, + "loss": 0.5447, + "step": 13074 + }, + { + "epoch": 1.9266699410609038, + "grad_norm": 0.6174291372299194, + "learning_rate": 1.4320871234166483e-06, + "loss": 0.5618, + "step": 13075 + }, + { + "epoch": 1.9268172888015718, + "grad_norm": 0.638817310333252, + "learning_rate": 1.4317365901541654e-06, + "loss": 0.4842, + "step": 13076 + }, + { + "epoch": 1.9269646365422397, + "grad_norm": 0.5917872786521912, + "learning_rate": 1.431386082582915e-06, + "loss": 0.5125, + "step": 13077 + }, + { + "epoch": 1.9271119842829076, + "grad_norm": 0.6045477390289307, + "learning_rate": 1.4310356007113297e-06, + "loss": 0.547, + "step": 13078 + }, + { + "epoch": 1.9272593320235756, + "grad_norm": 0.6292953491210938, + "learning_rate": 1.4306851445478364e-06, + "loss": 0.5225, + "step": 13079 + }, + { + "epoch": 1.9274066797642435, + "grad_norm": 0.6418991684913635, + "learning_rate": 1.430334714100863e-06, + "loss": 0.5137, + "step": 13080 + }, + { + "epoch": 1.9275540275049114, + "grad_norm": 0.6174084544181824, + "learning_rate": 1.429984309378839e-06, + "loss": 0.529, + "step": 13081 + }, + { + "epoch": 1.9277013752455796, + "grad_norm": 0.5980799794197083, + "learning_rate": 1.4296339303901896e-06, + "loss": 0.5176, + "step": 13082 + }, + { + "epoch": 1.9278487229862475, + "grad_norm": 0.6500878930091858, + "learning_rate": 1.4292835771433428e-06, + "loss": 0.5202, + "step": 13083 + }, + { + "epoch": 1.9279960707269155, + "grad_norm": 0.5999394059181213, + "learning_rate": 1.428933249646722e-06, + "loss": 0.5189, + "step": 13084 + }, + { + "epoch": 1.9281434184675836, + "grad_norm": 0.6051875948905945, + "learning_rate": 1.4285829479087563e-06, + "loss": 0.5088, + "step": 13085 + }, + { + "epoch": 1.9282907662082516, + "grad_norm": 0.6155783534049988, + "learning_rate": 1.428232671937867e-06, + "loss": 0.5005, + "step": 13086 + }, + { + "epoch": 1.9284381139489195, + "grad_norm": 0.6152646541595459, + "learning_rate": 1.4278824217424803e-06, + "loss": 0.5116, + "step": 13087 + }, + { + "epoch": 1.9285854616895874, + "grad_norm": 0.6045884490013123, + "learning_rate": 1.4275321973310174e-06, + "loss": 0.5476, + "step": 13088 + }, + { + "epoch": 1.9287328094302554, + "grad_norm": 0.6035907864570618, + "learning_rate": 1.4271819987119034e-06, + "loss": 0.5197, + "step": 13089 + }, + { + "epoch": 1.9288801571709233, + "grad_norm": 0.6141873598098755, + "learning_rate": 1.4268318258935577e-06, + "loss": 0.5322, + "step": 13090 + }, + { + "epoch": 1.9290275049115913, + "grad_norm": 0.5937678813934326, + "learning_rate": 1.4264816788844047e-06, + "loss": 0.5535, + "step": 13091 + }, + { + "epoch": 1.9291748526522592, + "grad_norm": 0.6058858633041382, + "learning_rate": 1.4261315576928626e-06, + "loss": 0.533, + "step": 13092 + }, + { + "epoch": 1.9293222003929273, + "grad_norm": 0.5966548919677734, + "learning_rate": 1.425781462327354e-06, + "loss": 0.4838, + "step": 13093 + }, + { + "epoch": 1.9294695481335953, + "grad_norm": 0.6056492328643799, + "learning_rate": 1.4254313927962962e-06, + "loss": 0.547, + "step": 13094 + }, + { + "epoch": 1.9296168958742632, + "grad_norm": 0.6142704486846924, + "learning_rate": 1.4250813491081105e-06, + "loss": 0.5495, + "step": 13095 + }, + { + "epoch": 1.9297642436149314, + "grad_norm": 0.6146789193153381, + "learning_rate": 1.4247313312712142e-06, + "loss": 0.5118, + "step": 13096 + }, + { + "epoch": 1.9299115913555993, + "grad_norm": 0.6093620657920837, + "learning_rate": 1.424381339294024e-06, + "loss": 0.5414, + "step": 13097 + }, + { + "epoch": 1.9300589390962672, + "grad_norm": 0.587471067905426, + "learning_rate": 1.4240313731849592e-06, + "loss": 0.4841, + "step": 13098 + }, + { + "epoch": 1.9302062868369352, + "grad_norm": 0.6168700456619263, + "learning_rate": 1.423681432952434e-06, + "loss": 0.549, + "step": 13099 + }, + { + "epoch": 1.9303536345776031, + "grad_norm": 0.5819888114929199, + "learning_rate": 1.4233315186048668e-06, + "loss": 0.5414, + "step": 13100 + }, + { + "epoch": 1.930500982318271, + "grad_norm": 0.6325669884681702, + "learning_rate": 1.4229816301506705e-06, + "loss": 0.5318, + "step": 13101 + }, + { + "epoch": 1.930648330058939, + "grad_norm": 0.5866665244102478, + "learning_rate": 1.4226317675982616e-06, + "loss": 0.5122, + "step": 13102 + }, + { + "epoch": 1.930795677799607, + "grad_norm": 0.6102436780929565, + "learning_rate": 1.4222819309560527e-06, + "loss": 0.5369, + "step": 13103 + }, + { + "epoch": 1.930943025540275, + "grad_norm": 0.5912744998931885, + "learning_rate": 1.4219321202324588e-06, + "loss": 0.5063, + "step": 13104 + }, + { + "epoch": 1.931090373280943, + "grad_norm": 0.6315524578094482, + "learning_rate": 1.4215823354358904e-06, + "loss": 0.5246, + "step": 13105 + }, + { + "epoch": 1.931237721021611, + "grad_norm": 0.6290096044540405, + "learning_rate": 1.4212325765747625e-06, + "loss": 0.5221, + "step": 13106 + }, + { + "epoch": 1.9313850687622791, + "grad_norm": 0.6059592366218567, + "learning_rate": 1.420882843657484e-06, + "loss": 0.5463, + "step": 13107 + }, + { + "epoch": 1.931532416502947, + "grad_norm": 0.6258959770202637, + "learning_rate": 1.420533136692468e-06, + "loss": 0.506, + "step": 13108 + }, + { + "epoch": 1.931679764243615, + "grad_norm": 0.6052356958389282, + "learning_rate": 1.4201834556881226e-06, + "loss": 0.5284, + "step": 13109 + }, + { + "epoch": 1.931827111984283, + "grad_norm": 0.5814704298973083, + "learning_rate": 1.4198338006528595e-06, + "loss": 0.5228, + "step": 13110 + }, + { + "epoch": 1.9319744597249509, + "grad_norm": 0.5787243843078613, + "learning_rate": 1.4194841715950864e-06, + "loss": 0.5384, + "step": 13111 + }, + { + "epoch": 1.9321218074656188, + "grad_norm": 0.6470999717712402, + "learning_rate": 1.4191345685232128e-06, + "loss": 0.4912, + "step": 13112 + }, + { + "epoch": 1.9322691552062867, + "grad_norm": 0.596328854560852, + "learning_rate": 1.4187849914456447e-06, + "loss": 0.533, + "step": 13113 + }, + { + "epoch": 1.9324165029469547, + "grad_norm": 0.6123766303062439, + "learning_rate": 1.418435440370792e-06, + "loss": 0.5349, + "step": 13114 + }, + { + "epoch": 1.9325638506876228, + "grad_norm": 0.593027651309967, + "learning_rate": 1.4180859153070584e-06, + "loss": 0.5438, + "step": 13115 + }, + { + "epoch": 1.9327111984282908, + "grad_norm": 0.5819000005722046, + "learning_rate": 1.417736416262852e-06, + "loss": 0.53, + "step": 13116 + }, + { + "epoch": 1.9328585461689587, + "grad_norm": 0.6248828768730164, + "learning_rate": 1.417386943246576e-06, + "loss": 0.5077, + "step": 13117 + }, + { + "epoch": 1.9330058939096268, + "grad_norm": 0.6518598198890686, + "learning_rate": 1.4170374962666377e-06, + "loss": 0.5011, + "step": 13118 + }, + { + "epoch": 1.9331532416502948, + "grad_norm": 0.6400059461593628, + "learning_rate": 1.4166880753314382e-06, + "loss": 0.5064, + "step": 13119 + }, + { + "epoch": 1.9333005893909627, + "grad_norm": 0.6052497029304504, + "learning_rate": 1.416338680449384e-06, + "loss": 0.5393, + "step": 13120 + }, + { + "epoch": 1.9334479371316307, + "grad_norm": 0.6023979187011719, + "learning_rate": 1.4159893116288747e-06, + "loss": 0.5112, + "step": 13121 + }, + { + "epoch": 1.9335952848722986, + "grad_norm": 0.6091774702072144, + "learning_rate": 1.4156399688783143e-06, + "loss": 0.5551, + "step": 13122 + }, + { + "epoch": 1.9337426326129665, + "grad_norm": 0.5991960763931274, + "learning_rate": 1.415290652206105e-06, + "loss": 0.476, + "step": 13123 + }, + { + "epoch": 1.9338899803536345, + "grad_norm": 0.6389259099960327, + "learning_rate": 1.4149413616206458e-06, + "loss": 0.5264, + "step": 13124 + }, + { + "epoch": 1.9340373280943024, + "grad_norm": 0.5730992555618286, + "learning_rate": 1.4145920971303389e-06, + "loss": 0.5039, + "step": 13125 + }, + { + "epoch": 1.9341846758349706, + "grad_norm": 0.5947561860084534, + "learning_rate": 1.4142428587435818e-06, + "loss": 0.5145, + "step": 13126 + }, + { + "epoch": 1.9343320235756385, + "grad_norm": 0.602365255355835, + "learning_rate": 1.4138936464687757e-06, + "loss": 0.5047, + "step": 13127 + }, + { + "epoch": 1.9344793713163064, + "grad_norm": 0.5912635326385498, + "learning_rate": 1.4135444603143172e-06, + "loss": 0.4978, + "step": 13128 + }, + { + "epoch": 1.9346267190569746, + "grad_norm": 0.5949813723564148, + "learning_rate": 1.413195300288606e-06, + "loss": 0.4966, + "step": 13129 + }, + { + "epoch": 1.9347740667976425, + "grad_norm": 0.5937680602073669, + "learning_rate": 1.4128461664000367e-06, + "loss": 0.5208, + "step": 13130 + }, + { + "epoch": 1.9349214145383105, + "grad_norm": 0.6008622646331787, + "learning_rate": 1.4124970586570087e-06, + "loss": 0.5208, + "step": 13131 + }, + { + "epoch": 1.9350687622789784, + "grad_norm": 0.5759239792823792, + "learning_rate": 1.4121479770679148e-06, + "loss": 0.537, + "step": 13132 + }, + { + "epoch": 1.9352161100196463, + "grad_norm": 0.5954644680023193, + "learning_rate": 1.4117989216411529e-06, + "loss": 0.5465, + "step": 13133 + }, + { + "epoch": 1.9353634577603143, + "grad_norm": 0.6029619574546814, + "learning_rate": 1.411449892385116e-06, + "loss": 0.5268, + "step": 13134 + }, + { + "epoch": 1.9355108055009822, + "grad_norm": 0.6213268041610718, + "learning_rate": 1.411100889308199e-06, + "loss": 0.5362, + "step": 13135 + }, + { + "epoch": 1.9356581532416501, + "grad_norm": 0.5957341194152832, + "learning_rate": 1.410751912418794e-06, + "loss": 0.5351, + "step": 13136 + }, + { + "epoch": 1.9358055009823183, + "grad_norm": 0.6060255765914917, + "learning_rate": 1.4104029617252962e-06, + "loss": 0.5446, + "step": 13137 + }, + { + "epoch": 1.9359528487229862, + "grad_norm": 0.6089731454849243, + "learning_rate": 1.4100540372360946e-06, + "loss": 0.5153, + "step": 13138 + }, + { + "epoch": 1.9361001964636544, + "grad_norm": 0.6358145475387573, + "learning_rate": 1.4097051389595833e-06, + "loss": 0.5007, + "step": 13139 + }, + { + "epoch": 1.9362475442043223, + "grad_norm": 0.6074784398078918, + "learning_rate": 1.409356266904151e-06, + "loss": 0.4988, + "step": 13140 + }, + { + "epoch": 1.9363948919449903, + "grad_norm": 0.5910152792930603, + "learning_rate": 1.4090074210781901e-06, + "loss": 0.5144, + "step": 13141 + }, + { + "epoch": 1.9365422396856582, + "grad_norm": 0.5967753529548645, + "learning_rate": 1.4086586014900878e-06, + "loss": 0.5123, + "step": 13142 + }, + { + "epoch": 1.9366895874263261, + "grad_norm": 0.6009259819984436, + "learning_rate": 1.4083098081482356e-06, + "loss": 0.5357, + "step": 13143 + }, + { + "epoch": 1.936836935166994, + "grad_norm": 0.6095307469367981, + "learning_rate": 1.40796104106102e-06, + "loss": 0.5154, + "step": 13144 + }, + { + "epoch": 1.936984282907662, + "grad_norm": 0.6002750396728516, + "learning_rate": 1.4076123002368285e-06, + "loss": 0.5171, + "step": 13145 + }, + { + "epoch": 1.93713163064833, + "grad_norm": 0.6230196356773376, + "learning_rate": 1.4072635856840496e-06, + "loss": 0.5502, + "step": 13146 + }, + { + "epoch": 1.9372789783889979, + "grad_norm": 0.6126433610916138, + "learning_rate": 1.406914897411068e-06, + "loss": 0.5498, + "step": 13147 + }, + { + "epoch": 1.937426326129666, + "grad_norm": 0.6150339245796204, + "learning_rate": 1.4065662354262715e-06, + "loss": 0.5353, + "step": 13148 + }, + { + "epoch": 1.937573673870334, + "grad_norm": 0.625499427318573, + "learning_rate": 1.4062175997380435e-06, + "loss": 0.5158, + "step": 13149 + }, + { + "epoch": 1.9377210216110021, + "grad_norm": 0.6079448461532593, + "learning_rate": 1.4058689903547697e-06, + "loss": 0.5058, + "step": 13150 + }, + { + "epoch": 1.93786836935167, + "grad_norm": 0.5971341729164124, + "learning_rate": 1.4055204072848328e-06, + "loss": 0.5381, + "step": 13151 + }, + { + "epoch": 1.938015717092338, + "grad_norm": 0.6397987008094788, + "learning_rate": 1.4051718505366177e-06, + "loss": 0.4886, + "step": 13152 + }, + { + "epoch": 1.938163064833006, + "grad_norm": 0.5943425893783569, + "learning_rate": 1.4048233201185052e-06, + "loss": 0.5124, + "step": 13153 + }, + { + "epoch": 1.9383104125736739, + "grad_norm": 0.6384581923484802, + "learning_rate": 1.4044748160388788e-06, + "loss": 0.534, + "step": 13154 + }, + { + "epoch": 1.9384577603143418, + "grad_norm": 0.6070263385772705, + "learning_rate": 1.4041263383061188e-06, + "loss": 0.5193, + "step": 13155 + }, + { + "epoch": 1.9386051080550097, + "grad_norm": 0.6556513905525208, + "learning_rate": 1.4037778869286067e-06, + "loss": 0.5019, + "step": 13156 + }, + { + "epoch": 1.9387524557956777, + "grad_norm": 0.610125720500946, + "learning_rate": 1.4034294619147217e-06, + "loss": 0.4953, + "step": 13157 + }, + { + "epoch": 1.9388998035363456, + "grad_norm": 0.59829181432724, + "learning_rate": 1.4030810632728437e-06, + "loss": 0.5255, + "step": 13158 + }, + { + "epoch": 1.9390471512770138, + "grad_norm": 0.6164745688438416, + "learning_rate": 1.4027326910113518e-06, + "loss": 0.5242, + "step": 13159 + }, + { + "epoch": 1.9391944990176817, + "grad_norm": 0.6132985949516296, + "learning_rate": 1.402384345138625e-06, + "loss": 0.5216, + "step": 13160 + }, + { + "epoch": 1.9393418467583499, + "grad_norm": 0.6394188404083252, + "learning_rate": 1.4020360256630386e-06, + "loss": 0.5229, + "step": 13161 + }, + { + "epoch": 1.9394891944990178, + "grad_norm": 0.593514621257782, + "learning_rate": 1.401687732592972e-06, + "loss": 0.5506, + "step": 13162 + }, + { + "epoch": 1.9396365422396857, + "grad_norm": 0.5949491858482361, + "learning_rate": 1.4013394659368002e-06, + "loss": 0.5232, + "step": 13163 + }, + { + "epoch": 1.9397838899803537, + "grad_norm": 0.5996668338775635, + "learning_rate": 1.4009912257028983e-06, + "loss": 0.5368, + "step": 13164 + }, + { + "epoch": 1.9399312377210216, + "grad_norm": 0.6318284273147583, + "learning_rate": 1.4006430118996424e-06, + "loss": 0.5311, + "step": 13165 + }, + { + "epoch": 1.9400785854616895, + "grad_norm": 0.632170557975769, + "learning_rate": 1.400294824535406e-06, + "loss": 0.4934, + "step": 13166 + }, + { + "epoch": 1.9402259332023575, + "grad_norm": 0.5705246925354004, + "learning_rate": 1.3999466636185637e-06, + "loss": 0.5252, + "step": 13167 + }, + { + "epoch": 1.9403732809430254, + "grad_norm": 0.6519119739532471, + "learning_rate": 1.3995985291574875e-06, + "loss": 0.5112, + "step": 13168 + }, + { + "epoch": 1.9405206286836936, + "grad_norm": 0.5697336196899414, + "learning_rate": 1.3992504211605518e-06, + "loss": 0.5137, + "step": 13169 + }, + { + "epoch": 1.9406679764243615, + "grad_norm": 0.6073163151741028, + "learning_rate": 1.3989023396361257e-06, + "loss": 0.538, + "step": 13170 + }, + { + "epoch": 1.9408153241650294, + "grad_norm": 0.5858981609344482, + "learning_rate": 1.3985542845925832e-06, + "loss": 0.4834, + "step": 13171 + }, + { + "epoch": 1.9409626719056976, + "grad_norm": 0.6096786260604858, + "learning_rate": 1.3982062560382925e-06, + "loss": 0.5312, + "step": 13172 + }, + { + "epoch": 1.9411100196463655, + "grad_norm": 0.5997249484062195, + "learning_rate": 1.3978582539816255e-06, + "loss": 0.5306, + "step": 13173 + }, + { + "epoch": 1.9412573673870335, + "grad_norm": 0.625883936882019, + "learning_rate": 1.3975102784309496e-06, + "loss": 0.5339, + "step": 13174 + }, + { + "epoch": 1.9414047151277014, + "grad_norm": 0.6155754923820496, + "learning_rate": 1.3971623293946354e-06, + "loss": 0.5262, + "step": 13175 + }, + { + "epoch": 1.9415520628683693, + "grad_norm": 0.6219296455383301, + "learning_rate": 1.3968144068810488e-06, + "loss": 0.5256, + "step": 13176 + }, + { + "epoch": 1.9416994106090373, + "grad_norm": 0.6345530152320862, + "learning_rate": 1.3964665108985596e-06, + "loss": 0.5426, + "step": 13177 + }, + { + "epoch": 1.9418467583497052, + "grad_norm": 0.5801154375076294, + "learning_rate": 1.3961186414555316e-06, + "loss": 0.5166, + "step": 13178 + }, + { + "epoch": 1.9419941060903732, + "grad_norm": 0.6254790425300598, + "learning_rate": 1.395770798560334e-06, + "loss": 0.5421, + "step": 13179 + }, + { + "epoch": 1.9421414538310413, + "grad_norm": 0.6720700263977051, + "learning_rate": 1.39542298222133e-06, + "loss": 0.491, + "step": 13180 + }, + { + "epoch": 1.9422888015717092, + "grad_norm": 0.6165552139282227, + "learning_rate": 1.395075192446886e-06, + "loss": 0.5269, + "step": 13181 + }, + { + "epoch": 1.9424361493123772, + "grad_norm": 0.5921722054481506, + "learning_rate": 1.394727429245364e-06, + "loss": 0.5263, + "step": 13182 + }, + { + "epoch": 1.9425834970530453, + "grad_norm": 0.6177211999893188, + "learning_rate": 1.3943796926251302e-06, + "loss": 0.5126, + "step": 13183 + }, + { + "epoch": 1.9427308447937133, + "grad_norm": 0.5917698740959167, + "learning_rate": 1.3940319825945452e-06, + "loss": 0.5391, + "step": 13184 + }, + { + "epoch": 1.9428781925343812, + "grad_norm": 0.621071994304657, + "learning_rate": 1.3936842991619737e-06, + "loss": 0.5276, + "step": 13185 + }, + { + "epoch": 1.9430255402750491, + "grad_norm": 0.5794036984443665, + "learning_rate": 1.3933366423357745e-06, + "loss": 0.5377, + "step": 13186 + }, + { + "epoch": 1.943172888015717, + "grad_norm": 0.6383150815963745, + "learning_rate": 1.3929890121243108e-06, + "loss": 0.5196, + "step": 13187 + }, + { + "epoch": 1.943320235756385, + "grad_norm": 0.6182950139045715, + "learning_rate": 1.3926414085359413e-06, + "loss": 0.4888, + "step": 13188 + }, + { + "epoch": 1.943467583497053, + "grad_norm": 0.6196316480636597, + "learning_rate": 1.392293831579028e-06, + "loss": 0.5205, + "step": 13189 + }, + { + "epoch": 1.9436149312377209, + "grad_norm": 0.6117976903915405, + "learning_rate": 1.391946281261927e-06, + "loss": 0.5232, + "step": 13190 + }, + { + "epoch": 1.943762278978389, + "grad_norm": 0.5838075876235962, + "learning_rate": 1.3915987575929993e-06, + "loss": 0.5603, + "step": 13191 + }, + { + "epoch": 1.943909626719057, + "grad_norm": 0.6053594350814819, + "learning_rate": 1.3912512605806006e-06, + "loss": 0.5347, + "step": 13192 + }, + { + "epoch": 1.944056974459725, + "grad_norm": 0.636357843875885, + "learning_rate": 1.3909037902330902e-06, + "loss": 0.5363, + "step": 13193 + }, + { + "epoch": 1.944204322200393, + "grad_norm": 0.6008695363998413, + "learning_rate": 1.3905563465588233e-06, + "loss": 0.4989, + "step": 13194 + }, + { + "epoch": 1.944351669941061, + "grad_norm": 0.6188035607337952, + "learning_rate": 1.3902089295661542e-06, + "loss": 0.5141, + "step": 13195 + }, + { + "epoch": 1.944499017681729, + "grad_norm": 0.6662555932998657, + "learning_rate": 1.3898615392634412e-06, + "loss": 0.5246, + "step": 13196 + }, + { + "epoch": 1.9446463654223969, + "grad_norm": 0.5844250917434692, + "learning_rate": 1.3895141756590368e-06, + "loss": 0.5542, + "step": 13197 + }, + { + "epoch": 1.9447937131630648, + "grad_norm": 0.5582199692726135, + "learning_rate": 1.3891668387612969e-06, + "loss": 0.5046, + "step": 13198 + }, + { + "epoch": 1.9449410609037328, + "grad_norm": 0.6120741367340088, + "learning_rate": 1.3888195285785725e-06, + "loss": 0.5132, + "step": 13199 + }, + { + "epoch": 1.9450884086444007, + "grad_norm": 0.6233428716659546, + "learning_rate": 1.388472245119218e-06, + "loss": 0.5273, + "step": 13200 + }, + { + "epoch": 1.9452357563850686, + "grad_norm": 0.6379364728927612, + "learning_rate": 1.3881249883915837e-06, + "loss": 0.474, + "step": 13201 + }, + { + "epoch": 1.9453831041257368, + "grad_norm": 0.6117990612983704, + "learning_rate": 1.3877777584040231e-06, + "loss": 0.5187, + "step": 13202 + }, + { + "epoch": 1.9455304518664047, + "grad_norm": 0.6249734163284302, + "learning_rate": 1.3874305551648845e-06, + "loss": 0.5229, + "step": 13203 + }, + { + "epoch": 1.9456777996070727, + "grad_norm": 0.5839127898216248, + "learning_rate": 1.3870833786825206e-06, + "loss": 0.534, + "step": 13204 + }, + { + "epoch": 1.9458251473477408, + "grad_norm": 0.5893152952194214, + "learning_rate": 1.386736228965278e-06, + "loss": 0.4718, + "step": 13205 + }, + { + "epoch": 1.9459724950884087, + "grad_norm": 0.6067469716072083, + "learning_rate": 1.3863891060215081e-06, + "loss": 0.5548, + "step": 13206 + }, + { + "epoch": 1.9461198428290767, + "grad_norm": 0.6023824214935303, + "learning_rate": 1.386042009859557e-06, + "loss": 0.497, + "step": 13207 + }, + { + "epoch": 1.9462671905697446, + "grad_norm": 0.5588300824165344, + "learning_rate": 1.385694940487774e-06, + "loss": 0.5111, + "step": 13208 + }, + { + "epoch": 1.9464145383104126, + "grad_norm": 0.617807149887085, + "learning_rate": 1.385347897914504e-06, + "loss": 0.5014, + "step": 13209 + }, + { + "epoch": 1.9465618860510805, + "grad_norm": 0.6102244257926941, + "learning_rate": 1.3850008821480953e-06, + "loss": 0.4965, + "step": 13210 + }, + { + "epoch": 1.9467092337917484, + "grad_norm": 0.5772694945335388, + "learning_rate": 1.3846538931968926e-06, + "loss": 0.5273, + "step": 13211 + }, + { + "epoch": 1.9468565815324164, + "grad_norm": 0.5683314800262451, + "learning_rate": 1.384306931069239e-06, + "loss": 0.5227, + "step": 13212 + }, + { + "epoch": 1.9470039292730845, + "grad_norm": 0.6473140120506287, + "learning_rate": 1.3839599957734824e-06, + "loss": 0.5133, + "step": 13213 + }, + { + "epoch": 1.9471512770137525, + "grad_norm": 0.6198661923408508, + "learning_rate": 1.3836130873179625e-06, + "loss": 0.5066, + "step": 13214 + }, + { + "epoch": 1.9472986247544204, + "grad_norm": 0.6041414737701416, + "learning_rate": 1.3832662057110258e-06, + "loss": 0.544, + "step": 13215 + }, + { + "epoch": 1.9474459724950886, + "grad_norm": 0.6149082183837891, + "learning_rate": 1.382919350961012e-06, + "loss": 0.496, + "step": 13216 + }, + { + "epoch": 1.9475933202357565, + "grad_norm": 0.6058870553970337, + "learning_rate": 1.3825725230762648e-06, + "loss": 0.5371, + "step": 13217 + }, + { + "epoch": 1.9477406679764244, + "grad_norm": 0.6066380143165588, + "learning_rate": 1.3822257220651233e-06, + "loss": 0.5187, + "step": 13218 + }, + { + "epoch": 1.9478880157170924, + "grad_norm": 0.6154372096061707, + "learning_rate": 1.3818789479359301e-06, + "loss": 0.5046, + "step": 13219 + }, + { + "epoch": 1.9480353634577603, + "grad_norm": 0.5821316838264465, + "learning_rate": 1.3815322006970225e-06, + "loss": 0.5094, + "step": 13220 + }, + { + "epoch": 1.9481827111984282, + "grad_norm": 0.5670087933540344, + "learning_rate": 1.381185480356742e-06, + "loss": 0.5338, + "step": 13221 + }, + { + "epoch": 1.9483300589390962, + "grad_norm": 0.5980899333953857, + "learning_rate": 1.380838786923425e-06, + "loss": 0.53, + "step": 13222 + }, + { + "epoch": 1.948477406679764, + "grad_norm": 0.6642991304397583, + "learning_rate": 1.380492120405411e-06, + "loss": 0.5348, + "step": 13223 + }, + { + "epoch": 1.9486247544204323, + "grad_norm": 0.5884350538253784, + "learning_rate": 1.3801454808110357e-06, + "loss": 0.5154, + "step": 13224 + }, + { + "epoch": 1.9487721021611002, + "grad_norm": 0.5983878970146179, + "learning_rate": 1.3797988681486374e-06, + "loss": 0.4904, + "step": 13225 + }, + { + "epoch": 1.9489194499017681, + "grad_norm": 0.6208047270774841, + "learning_rate": 1.3794522824265497e-06, + "loss": 0.5434, + "step": 13226 + }, + { + "epoch": 1.9490667976424363, + "grad_norm": 0.5990192294120789, + "learning_rate": 1.37910572365311e-06, + "loss": 0.5616, + "step": 13227 + }, + { + "epoch": 1.9492141453831042, + "grad_norm": 0.6040388345718384, + "learning_rate": 1.378759191836651e-06, + "loss": 0.5154, + "step": 13228 + }, + { + "epoch": 1.9493614931237722, + "grad_norm": 0.628852128982544, + "learning_rate": 1.3784126869855085e-06, + "loss": 0.5207, + "step": 13229 + }, + { + "epoch": 1.94950884086444, + "grad_norm": 0.620682418346405, + "learning_rate": 1.3780662091080138e-06, + "loss": 0.5182, + "step": 13230 + }, + { + "epoch": 1.949656188605108, + "grad_norm": 0.6000327467918396, + "learning_rate": 1.3777197582125007e-06, + "loss": 0.5263, + "step": 13231 + }, + { + "epoch": 1.949803536345776, + "grad_norm": 0.6379454731941223, + "learning_rate": 1.377373334307302e-06, + "loss": 0.5128, + "step": 13232 + }, + { + "epoch": 1.949950884086444, + "grad_norm": 0.5781944394111633, + "learning_rate": 1.3770269374007473e-06, + "loss": 0.5028, + "step": 13233 + }, + { + "epoch": 1.9500982318271118, + "grad_norm": 0.6158100366592407, + "learning_rate": 1.3766805675011684e-06, + "loss": 0.5194, + "step": 13234 + }, + { + "epoch": 1.95024557956778, + "grad_norm": 0.624638020992279, + "learning_rate": 1.3763342246168944e-06, + "loss": 0.5277, + "step": 13235 + }, + { + "epoch": 1.950392927308448, + "grad_norm": 0.5934603810310364, + "learning_rate": 1.3759879087562564e-06, + "loss": 0.5276, + "step": 13236 + }, + { + "epoch": 1.9505402750491159, + "grad_norm": 0.6146101355552673, + "learning_rate": 1.3756416199275808e-06, + "loss": 0.5117, + "step": 13237 + }, + { + "epoch": 1.950687622789784, + "grad_norm": 0.5867331624031067, + "learning_rate": 1.375295358139198e-06, + "loss": 0.5131, + "step": 13238 + }, + { + "epoch": 1.950834970530452, + "grad_norm": 0.5896748304367065, + "learning_rate": 1.3749491233994334e-06, + "loss": 0.5159, + "step": 13239 + }, + { + "epoch": 1.95098231827112, + "grad_norm": 0.6094586849212646, + "learning_rate": 1.374602915716615e-06, + "loss": 0.5104, + "step": 13240 + }, + { + "epoch": 1.9511296660117878, + "grad_norm": 0.5680878758430481, + "learning_rate": 1.3742567350990685e-06, + "loss": 0.5023, + "step": 13241 + }, + { + "epoch": 1.9512770137524558, + "grad_norm": 0.6670346260070801, + "learning_rate": 1.3739105815551202e-06, + "loss": 0.4844, + "step": 13242 + }, + { + "epoch": 1.9514243614931237, + "grad_norm": 0.6127878427505493, + "learning_rate": 1.3735644550930931e-06, + "loss": 0.5069, + "step": 13243 + }, + { + "epoch": 1.9515717092337916, + "grad_norm": 0.6049591302871704, + "learning_rate": 1.3732183557213138e-06, + "loss": 0.5198, + "step": 13244 + }, + { + "epoch": 1.9517190569744596, + "grad_norm": 0.6104769110679626, + "learning_rate": 1.3728722834481035e-06, + "loss": 0.4901, + "step": 13245 + }, + { + "epoch": 1.9518664047151277, + "grad_norm": 0.6085119247436523, + "learning_rate": 1.372526238281787e-06, + "loss": 0.5352, + "step": 13246 + }, + { + "epoch": 1.9520137524557957, + "grad_norm": 0.5872098207473755, + "learning_rate": 1.3721802202306848e-06, + "loss": 0.4907, + "step": 13247 + }, + { + "epoch": 1.9521611001964636, + "grad_norm": 0.5908783078193665, + "learning_rate": 1.37183422930312e-06, + "loss": 0.5354, + "step": 13248 + }, + { + "epoch": 1.9523084479371318, + "grad_norm": 0.616010844707489, + "learning_rate": 1.3714882655074121e-06, + "loss": 0.5185, + "step": 13249 + }, + { + "epoch": 1.9524557956777997, + "grad_norm": 0.6029312014579773, + "learning_rate": 1.3711423288518832e-06, + "loss": 0.5374, + "step": 13250 + }, + { + "epoch": 1.9526031434184676, + "grad_norm": 0.6026750206947327, + "learning_rate": 1.3707964193448505e-06, + "loss": 0.4985, + "step": 13251 + }, + { + "epoch": 1.9527504911591356, + "grad_norm": 0.6120471358299255, + "learning_rate": 1.3704505369946356e-06, + "loss": 0.4761, + "step": 13252 + }, + { + "epoch": 1.9528978388998035, + "grad_norm": 0.5837117433547974, + "learning_rate": 1.3701046818095542e-06, + "loss": 0.4824, + "step": 13253 + }, + { + "epoch": 1.9530451866404714, + "grad_norm": 0.612709641456604, + "learning_rate": 1.3697588537979268e-06, + "loss": 0.4931, + "step": 13254 + }, + { + "epoch": 1.9531925343811394, + "grad_norm": 0.6086157560348511, + "learning_rate": 1.369413052968067e-06, + "loss": 0.506, + "step": 13255 + }, + { + "epoch": 1.9533398821218073, + "grad_norm": 0.6202586889266968, + "learning_rate": 1.3690672793282947e-06, + "loss": 0.5257, + "step": 13256 + }, + { + "epoch": 1.9534872298624755, + "grad_norm": 0.5902076363563538, + "learning_rate": 1.3687215328869228e-06, + "loss": 0.5425, + "step": 13257 + }, + { + "epoch": 1.9536345776031434, + "grad_norm": 0.5693423748016357, + "learning_rate": 1.3683758136522682e-06, + "loss": 0.5196, + "step": 13258 + }, + { + "epoch": 1.9537819253438113, + "grad_norm": 0.583856463432312, + "learning_rate": 1.3680301216326446e-06, + "loss": 0.5304, + "step": 13259 + }, + { + "epoch": 1.9539292730844795, + "grad_norm": 0.6084807515144348, + "learning_rate": 1.3676844568363646e-06, + "loss": 0.5177, + "step": 13260 + }, + { + "epoch": 1.9540766208251474, + "grad_norm": 0.6369236707687378, + "learning_rate": 1.3673388192717435e-06, + "loss": 0.5109, + "step": 13261 + }, + { + "epoch": 1.9542239685658154, + "grad_norm": 0.5957422256469727, + "learning_rate": 1.3669932089470914e-06, + "loss": 0.5089, + "step": 13262 + }, + { + "epoch": 1.9543713163064833, + "grad_norm": 0.6018698811531067, + "learning_rate": 1.3666476258707224e-06, + "loss": 0.5195, + "step": 13263 + }, + { + "epoch": 1.9545186640471512, + "grad_norm": 0.6096877455711365, + "learning_rate": 1.366302070050945e-06, + "loss": 0.5194, + "step": 13264 + }, + { + "epoch": 1.9546660117878192, + "grad_norm": 0.6176289916038513, + "learning_rate": 1.3659565414960724e-06, + "loss": 0.5116, + "step": 13265 + }, + { + "epoch": 1.9548133595284871, + "grad_norm": 0.6679618954658508, + "learning_rate": 1.3656110402144121e-06, + "loss": 0.5383, + "step": 13266 + }, + { + "epoch": 1.954960707269155, + "grad_norm": 0.6047735810279846, + "learning_rate": 1.3652655662142753e-06, + "loss": 0.508, + "step": 13267 + }, + { + "epoch": 1.9551080550098232, + "grad_norm": 0.6103686690330505, + "learning_rate": 1.3649201195039671e-06, + "loss": 0.4994, + "step": 13268 + }, + { + "epoch": 1.9552554027504911, + "grad_norm": 0.5784331560134888, + "learning_rate": 1.3645747000917997e-06, + "loss": 0.5082, + "step": 13269 + }, + { + "epoch": 1.955402750491159, + "grad_norm": 0.6254082322120667, + "learning_rate": 1.3642293079860775e-06, + "loss": 0.5244, + "step": 13270 + }, + { + "epoch": 1.9555500982318272, + "grad_norm": 0.611113429069519, + "learning_rate": 1.3638839431951084e-06, + "loss": 0.5038, + "step": 13271 + }, + { + "epoch": 1.9556974459724952, + "grad_norm": 0.5750036239624023, + "learning_rate": 1.3635386057271972e-06, + "loss": 0.5323, + "step": 13272 + }, + { + "epoch": 1.955844793713163, + "grad_norm": 0.5836424827575684, + "learning_rate": 1.3631932955906497e-06, + "loss": 0.5038, + "step": 13273 + }, + { + "epoch": 1.955992141453831, + "grad_norm": 0.6166847348213196, + "learning_rate": 1.3628480127937698e-06, + "loss": 0.5434, + "step": 13274 + }, + { + "epoch": 1.956139489194499, + "grad_norm": 0.6200598478317261, + "learning_rate": 1.3625027573448623e-06, + "loss": 0.493, + "step": 13275 + }, + { + "epoch": 1.956286836935167, + "grad_norm": 0.5659326314926147, + "learning_rate": 1.3621575292522293e-06, + "loss": 0.5047, + "step": 13276 + }, + { + "epoch": 1.9564341846758349, + "grad_norm": 0.5790144205093384, + "learning_rate": 1.3618123285241752e-06, + "loss": 0.5056, + "step": 13277 + }, + { + "epoch": 1.9565815324165028, + "grad_norm": 0.6412802934646606, + "learning_rate": 1.3614671551690006e-06, + "loss": 0.5169, + "step": 13278 + }, + { + "epoch": 1.956728880157171, + "grad_norm": 0.618565559387207, + "learning_rate": 1.3611220091950061e-06, + "loss": 0.5485, + "step": 13279 + }, + { + "epoch": 1.9568762278978389, + "grad_norm": 0.6000627279281616, + "learning_rate": 1.3607768906104941e-06, + "loss": 0.5212, + "step": 13280 + }, + { + "epoch": 1.957023575638507, + "grad_norm": 0.552536129951477, + "learning_rate": 1.360431799423762e-06, + "loss": 0.5033, + "step": 13281 + }, + { + "epoch": 1.957170923379175, + "grad_norm": 0.5980793237686157, + "learning_rate": 1.3600867356431123e-06, + "loss": 0.5029, + "step": 13282 + }, + { + "epoch": 1.957318271119843, + "grad_norm": 0.5965638756752014, + "learning_rate": 1.3597416992768403e-06, + "loss": 0.5014, + "step": 13283 + }, + { + "epoch": 1.9574656188605108, + "grad_norm": 0.6204590201377869, + "learning_rate": 1.3593966903332472e-06, + "loss": 0.5307, + "step": 13284 + }, + { + "epoch": 1.9576129666011788, + "grad_norm": 0.5969483256340027, + "learning_rate": 1.3590517088206273e-06, + "loss": 0.55, + "step": 13285 + }, + { + "epoch": 1.9577603143418467, + "grad_norm": 0.5976953506469727, + "learning_rate": 1.3587067547472802e-06, + "loss": 0.5182, + "step": 13286 + }, + { + "epoch": 1.9579076620825147, + "grad_norm": 0.5866329669952393, + "learning_rate": 1.3583618281214988e-06, + "loss": 0.5045, + "step": 13287 + }, + { + "epoch": 1.9580550098231826, + "grad_norm": 0.6362663507461548, + "learning_rate": 1.3580169289515813e-06, + "loss": 0.5322, + "step": 13288 + }, + { + "epoch": 1.9582023575638505, + "grad_norm": 0.588513970375061, + "learning_rate": 1.3576720572458199e-06, + "loss": 0.5018, + "step": 13289 + }, + { + "epoch": 1.9583497053045187, + "grad_norm": 0.581517219543457, + "learning_rate": 1.357327213012511e-06, + "loss": 0.5238, + "step": 13290 + }, + { + "epoch": 1.9584970530451866, + "grad_norm": 0.6159449815750122, + "learning_rate": 1.3569823962599455e-06, + "loss": 0.5032, + "step": 13291 + }, + { + "epoch": 1.9586444007858548, + "grad_norm": 0.5982994437217712, + "learning_rate": 1.3566376069964183e-06, + "loss": 0.486, + "step": 13292 + }, + { + "epoch": 1.9587917485265227, + "grad_norm": 0.6005967259407043, + "learning_rate": 1.3562928452302191e-06, + "loss": 0.5075, + "step": 13293 + }, + { + "epoch": 1.9589390962671906, + "grad_norm": 0.6285942792892456, + "learning_rate": 1.3559481109696414e-06, + "loss": 0.5306, + "step": 13294 + }, + { + "epoch": 1.9590864440078586, + "grad_norm": 0.5879655480384827, + "learning_rate": 1.3556034042229745e-06, + "loss": 0.5285, + "step": 13295 + }, + { + "epoch": 1.9592337917485265, + "grad_norm": 0.6235517263412476, + "learning_rate": 1.35525872499851e-06, + "loss": 0.5147, + "step": 13296 + }, + { + "epoch": 1.9593811394891945, + "grad_norm": 0.5745067596435547, + "learning_rate": 1.354914073304535e-06, + "loss": 0.5054, + "step": 13297 + }, + { + "epoch": 1.9595284872298624, + "grad_norm": 0.581826388835907, + "learning_rate": 1.3545694491493405e-06, + "loss": 0.5442, + "step": 13298 + }, + { + "epoch": 1.9596758349705303, + "grad_norm": 0.679551362991333, + "learning_rate": 1.3542248525412128e-06, + "loss": 0.5087, + "step": 13299 + }, + { + "epoch": 1.9598231827111983, + "grad_norm": 0.5961521863937378, + "learning_rate": 1.3538802834884407e-06, + "loss": 0.5355, + "step": 13300 + }, + { + "epoch": 1.9599705304518664, + "grad_norm": 0.6236521005630493, + "learning_rate": 1.3535357419993096e-06, + "loss": 0.5155, + "step": 13301 + }, + { + "epoch": 1.9601178781925344, + "grad_norm": 0.6299113035202026, + "learning_rate": 1.3531912280821067e-06, + "loss": 0.5247, + "step": 13302 + }, + { + "epoch": 1.9602652259332025, + "grad_norm": 0.5939375758171082, + "learning_rate": 1.3528467417451159e-06, + "loss": 0.4895, + "step": 13303 + }, + { + "epoch": 1.9604125736738705, + "grad_norm": 0.6153790354728699, + "learning_rate": 1.3525022829966238e-06, + "loss": 0.5331, + "step": 13304 + }, + { + "epoch": 1.9605599214145384, + "grad_norm": 0.6279494166374207, + "learning_rate": 1.352157851844913e-06, + "loss": 0.5264, + "step": 13305 + }, + { + "epoch": 1.9607072691552063, + "grad_norm": 0.6181237697601318, + "learning_rate": 1.3518134482982676e-06, + "loss": 0.545, + "step": 13306 + }, + { + "epoch": 1.9608546168958743, + "grad_norm": 0.6178293228149414, + "learning_rate": 1.351469072364971e-06, + "loss": 0.4914, + "step": 13307 + }, + { + "epoch": 1.9610019646365422, + "grad_norm": 0.6071099638938904, + "learning_rate": 1.3511247240533031e-06, + "loss": 0.5296, + "step": 13308 + }, + { + "epoch": 1.9611493123772101, + "grad_norm": 0.5841922163963318, + "learning_rate": 1.3507804033715483e-06, + "loss": 0.5176, + "step": 13309 + }, + { + "epoch": 1.961296660117878, + "grad_norm": 0.6222324371337891, + "learning_rate": 1.3504361103279849e-06, + "loss": 0.5258, + "step": 13310 + }, + { + "epoch": 1.9614440078585462, + "grad_norm": 0.602834939956665, + "learning_rate": 1.3500918449308941e-06, + "loss": 0.4833, + "step": 13311 + }, + { + "epoch": 1.9615913555992142, + "grad_norm": 0.6048809885978699, + "learning_rate": 1.3497476071885548e-06, + "loss": 0.5523, + "step": 13312 + }, + { + "epoch": 1.961738703339882, + "grad_norm": 0.5909993648529053, + "learning_rate": 1.3494033971092468e-06, + "loss": 0.5114, + "step": 13313 + }, + { + "epoch": 1.9618860510805503, + "grad_norm": 0.5829167366027832, + "learning_rate": 1.3490592147012465e-06, + "loss": 0.5216, + "step": 13314 + }, + { + "epoch": 1.9620333988212182, + "grad_norm": 0.594325065612793, + "learning_rate": 1.3487150599728332e-06, + "loss": 0.5196, + "step": 13315 + }, + { + "epoch": 1.9621807465618861, + "grad_norm": 0.6067898869514465, + "learning_rate": 1.348370932932282e-06, + "loss": 0.4985, + "step": 13316 + }, + { + "epoch": 1.962328094302554, + "grad_norm": 0.5971085429191589, + "learning_rate": 1.3480268335878702e-06, + "loss": 0.5088, + "step": 13317 + }, + { + "epoch": 1.962475442043222, + "grad_norm": 0.6222856640815735, + "learning_rate": 1.347682761947872e-06, + "loss": 0.4948, + "step": 13318 + }, + { + "epoch": 1.96262278978389, + "grad_norm": 0.6214535236358643, + "learning_rate": 1.347338718020564e-06, + "loss": 0.5372, + "step": 13319 + }, + { + "epoch": 1.9627701375245579, + "grad_norm": 0.5992721915245056, + "learning_rate": 1.3469947018142182e-06, + "loss": 0.5268, + "step": 13320 + }, + { + "epoch": 1.9629174852652258, + "grad_norm": 0.6543217301368713, + "learning_rate": 1.3466507133371098e-06, + "loss": 0.5304, + "step": 13321 + }, + { + "epoch": 1.963064833005894, + "grad_norm": 0.6056069135665894, + "learning_rate": 1.3463067525975098e-06, + "loss": 0.5418, + "step": 13322 + }, + { + "epoch": 1.963212180746562, + "grad_norm": 0.6097806692123413, + "learning_rate": 1.3459628196036925e-06, + "loss": 0.5322, + "step": 13323 + }, + { + "epoch": 1.9633595284872298, + "grad_norm": 0.6263090968132019, + "learning_rate": 1.345618914363927e-06, + "loss": 0.5063, + "step": 13324 + }, + { + "epoch": 1.963506876227898, + "grad_norm": 0.6231069564819336, + "learning_rate": 1.3452750368864862e-06, + "loss": 0.5332, + "step": 13325 + }, + { + "epoch": 1.963654223968566, + "grad_norm": 0.6275864243507385, + "learning_rate": 1.3449311871796391e-06, + "loss": 0.5303, + "step": 13326 + }, + { + "epoch": 1.9638015717092339, + "grad_norm": 0.605265736579895, + "learning_rate": 1.3445873652516544e-06, + "loss": 0.5257, + "step": 13327 + }, + { + "epoch": 1.9639489194499018, + "grad_norm": 0.5878580808639526, + "learning_rate": 1.3442435711108022e-06, + "loss": 0.5238, + "step": 13328 + }, + { + "epoch": 1.9640962671905697, + "grad_norm": 0.5957210063934326, + "learning_rate": 1.3438998047653496e-06, + "loss": 0.5154, + "step": 13329 + }, + { + "epoch": 1.9642436149312377, + "grad_norm": 0.5658184289932251, + "learning_rate": 1.3435560662235653e-06, + "loss": 0.4979, + "step": 13330 + }, + { + "epoch": 1.9643909626719056, + "grad_norm": 0.6606426239013672, + "learning_rate": 1.3432123554937138e-06, + "loss": 0.4903, + "step": 13331 + }, + { + "epoch": 1.9645383104125735, + "grad_norm": 0.5757264494895935, + "learning_rate": 1.342868672584064e-06, + "loss": 0.5558, + "step": 13332 + }, + { + "epoch": 1.9646856581532417, + "grad_norm": 0.6069794297218323, + "learning_rate": 1.3425250175028787e-06, + "loss": 0.5378, + "step": 13333 + }, + { + "epoch": 1.9648330058939096, + "grad_norm": 0.6457976698875427, + "learning_rate": 1.342181390258425e-06, + "loss": 0.528, + "step": 13334 + }, + { + "epoch": 1.9649803536345776, + "grad_norm": 0.5731423497200012, + "learning_rate": 1.3418377908589648e-06, + "loss": 0.4879, + "step": 13335 + }, + { + "epoch": 1.9651277013752457, + "grad_norm": 0.5935949683189392, + "learning_rate": 1.3414942193127634e-06, + "loss": 0.4854, + "step": 13336 + }, + { + "epoch": 1.9652750491159137, + "grad_norm": 0.6034498810768127, + "learning_rate": 1.3411506756280816e-06, + "loss": 0.5294, + "step": 13337 + }, + { + "epoch": 1.9654223968565816, + "grad_norm": 0.5949940085411072, + "learning_rate": 1.3408071598131834e-06, + "loss": 0.5141, + "step": 13338 + }, + { + "epoch": 1.9655697445972495, + "grad_norm": 0.6341071128845215, + "learning_rate": 1.3404636718763286e-06, + "loss": 0.5024, + "step": 13339 + }, + { + "epoch": 1.9657170923379175, + "grad_norm": 0.6205683350563049, + "learning_rate": 1.3401202118257794e-06, + "loss": 0.5604, + "step": 13340 + }, + { + "epoch": 1.9658644400785854, + "grad_norm": 0.6112958788871765, + "learning_rate": 1.3397767796697945e-06, + "loss": 0.5207, + "step": 13341 + }, + { + "epoch": 1.9660117878192533, + "grad_norm": 0.6172552108764648, + "learning_rate": 1.3394333754166336e-06, + "loss": 0.5286, + "step": 13342 + }, + { + "epoch": 1.9661591355599213, + "grad_norm": 0.6068810820579529, + "learning_rate": 1.3390899990745565e-06, + "loss": 0.5061, + "step": 13343 + }, + { + "epoch": 1.9663064833005894, + "grad_norm": 0.6283009052276611, + "learning_rate": 1.3387466506518198e-06, + "loss": 0.5485, + "step": 13344 + }, + { + "epoch": 1.9664538310412574, + "grad_norm": 0.5859224796295166, + "learning_rate": 1.3384033301566824e-06, + "loss": 0.515, + "step": 13345 + }, + { + "epoch": 1.9666011787819253, + "grad_norm": 0.6204249858856201, + "learning_rate": 1.338060037597399e-06, + "loss": 0.5095, + "step": 13346 + }, + { + "epoch": 1.9667485265225935, + "grad_norm": 0.5851289629936218, + "learning_rate": 1.3377167729822276e-06, + "loss": 0.5346, + "step": 13347 + }, + { + "epoch": 1.9668958742632614, + "grad_norm": 0.5987462997436523, + "learning_rate": 1.3373735363194219e-06, + "loss": 0.5257, + "step": 13348 + }, + { + "epoch": 1.9670432220039293, + "grad_norm": 0.5874655842781067, + "learning_rate": 1.337030327617238e-06, + "loss": 0.519, + "step": 13349 + }, + { + "epoch": 1.9671905697445973, + "grad_norm": 0.6284695267677307, + "learning_rate": 1.3366871468839287e-06, + "loss": 0.5246, + "step": 13350 + }, + { + "epoch": 1.9673379174852652, + "grad_norm": 0.6410111784934998, + "learning_rate": 1.3363439941277484e-06, + "loss": 0.4911, + "step": 13351 + }, + { + "epoch": 1.9674852652259331, + "grad_norm": 0.6982108950614929, + "learning_rate": 1.3360008693569487e-06, + "loss": 0.5071, + "step": 13352 + }, + { + "epoch": 1.967632612966601, + "grad_norm": 0.6144706010818481, + "learning_rate": 1.335657772579783e-06, + "loss": 0.5256, + "step": 13353 + }, + { + "epoch": 1.967779960707269, + "grad_norm": 0.5860385298728943, + "learning_rate": 1.3353147038045005e-06, + "loss": 0.5222, + "step": 13354 + }, + { + "epoch": 1.9679273084479372, + "grad_norm": 0.6057323217391968, + "learning_rate": 1.3349716630393544e-06, + "loss": 0.5159, + "step": 13355 + }, + { + "epoch": 1.968074656188605, + "grad_norm": 0.5962213277816772, + "learning_rate": 1.3346286502925923e-06, + "loss": 0.5423, + "step": 13356 + }, + { + "epoch": 1.968222003929273, + "grad_norm": 0.6236079335212708, + "learning_rate": 1.3342856655724656e-06, + "loss": 0.5355, + "step": 13357 + }, + { + "epoch": 1.9683693516699412, + "grad_norm": 0.6194347143173218, + "learning_rate": 1.3339427088872208e-06, + "loss": 0.5238, + "step": 13358 + }, + { + "epoch": 1.9685166994106091, + "grad_norm": 0.6752691268920898, + "learning_rate": 1.3335997802451081e-06, + "loss": 0.546, + "step": 13359 + }, + { + "epoch": 1.968664047151277, + "grad_norm": 0.5769377946853638, + "learning_rate": 1.3332568796543727e-06, + "loss": 0.5063, + "step": 13360 + }, + { + "epoch": 1.968811394891945, + "grad_norm": 0.6007162928581238, + "learning_rate": 1.3329140071232626e-06, + "loss": 0.5211, + "step": 13361 + }, + { + "epoch": 1.968958742632613, + "grad_norm": 0.6608765721321106, + "learning_rate": 1.3325711626600226e-06, + "loss": 0.513, + "step": 13362 + }, + { + "epoch": 1.9691060903732809, + "grad_norm": 0.6232946515083313, + "learning_rate": 1.3322283462728997e-06, + "loss": 0.529, + "step": 13363 + }, + { + "epoch": 1.9692534381139488, + "grad_norm": 0.6254937052726746, + "learning_rate": 1.3318855579701362e-06, + "loss": 0.5357, + "step": 13364 + }, + { + "epoch": 1.9694007858546168, + "grad_norm": 0.6143460869789124, + "learning_rate": 1.3315427977599783e-06, + "loss": 0.5517, + "step": 13365 + }, + { + "epoch": 1.969548133595285, + "grad_norm": 0.644669234752655, + "learning_rate": 1.3312000656506668e-06, + "loss": 0.5105, + "step": 13366 + }, + { + "epoch": 1.9696954813359528, + "grad_norm": 0.6192106604576111, + "learning_rate": 1.3308573616504466e-06, + "loss": 0.5307, + "step": 13367 + }, + { + "epoch": 1.9698428290766208, + "grad_norm": 0.6088021993637085, + "learning_rate": 1.3305146857675573e-06, + "loss": 0.5219, + "step": 13368 + }, + { + "epoch": 1.969990176817289, + "grad_norm": 0.6013450026512146, + "learning_rate": 1.3301720380102425e-06, + "loss": 0.5119, + "step": 13369 + }, + { + "epoch": 1.9701375245579569, + "grad_norm": 0.5958139300346375, + "learning_rate": 1.3298294183867406e-06, + "loss": 0.5098, + "step": 13370 + }, + { + "epoch": 1.9702848722986248, + "grad_norm": 0.588196337223053, + "learning_rate": 1.3294868269052931e-06, + "loss": 0.4933, + "step": 13371 + }, + { + "epoch": 1.9704322200392927, + "grad_norm": 0.6080529093742371, + "learning_rate": 1.3291442635741378e-06, + "loss": 0.5247, + "step": 13372 + }, + { + "epoch": 1.9705795677799607, + "grad_norm": 0.6147506237030029, + "learning_rate": 1.3288017284015146e-06, + "loss": 0.5372, + "step": 13373 + }, + { + "epoch": 1.9707269155206286, + "grad_norm": 0.6242337822914124, + "learning_rate": 1.3284592213956604e-06, + "loss": 0.5376, + "step": 13374 + }, + { + "epoch": 1.9708742632612966, + "grad_norm": 0.6163382530212402, + "learning_rate": 1.3281167425648115e-06, + "loss": 0.5158, + "step": 13375 + }, + { + "epoch": 1.9710216110019645, + "grad_norm": 0.5914702415466309, + "learning_rate": 1.3277742919172063e-06, + "loss": 0.5288, + "step": 13376 + }, + { + "epoch": 1.9711689587426326, + "grad_norm": 0.5891701579093933, + "learning_rate": 1.3274318694610787e-06, + "loss": 0.524, + "step": 13377 + }, + { + "epoch": 1.9713163064833006, + "grad_norm": 0.5829471349716187, + "learning_rate": 1.3270894752046654e-06, + "loss": 0.5485, + "step": 13378 + }, + { + "epoch": 1.9714636542239685, + "grad_norm": 0.625963568687439, + "learning_rate": 1.3267471091561984e-06, + "loss": 0.5124, + "step": 13379 + }, + { + "epoch": 1.9716110019646367, + "grad_norm": 0.6041393280029297, + "learning_rate": 1.3264047713239147e-06, + "loss": 0.5189, + "step": 13380 + }, + { + "epoch": 1.9717583497053046, + "grad_norm": 0.6362062692642212, + "learning_rate": 1.3260624617160452e-06, + "loss": 0.4821, + "step": 13381 + }, + { + "epoch": 1.9719056974459725, + "grad_norm": 0.6026150584220886, + "learning_rate": 1.3257201803408238e-06, + "loss": 0.5529, + "step": 13382 + }, + { + "epoch": 1.9720530451866405, + "grad_norm": 0.604938268661499, + "learning_rate": 1.3253779272064803e-06, + "loss": 0.5428, + "step": 13383 + }, + { + "epoch": 1.9722003929273084, + "grad_norm": 0.653922975063324, + "learning_rate": 1.3250357023212476e-06, + "loss": 0.5618, + "step": 13384 + }, + { + "epoch": 1.9723477406679764, + "grad_norm": 0.6426324248313904, + "learning_rate": 1.3246935056933543e-06, + "loss": 0.4999, + "step": 13385 + }, + { + "epoch": 1.9724950884086443, + "grad_norm": 0.6487461924552917, + "learning_rate": 1.3243513373310319e-06, + "loss": 0.5295, + "step": 13386 + }, + { + "epoch": 1.9726424361493122, + "grad_norm": 0.5976526737213135, + "learning_rate": 1.3240091972425074e-06, + "loss": 0.5317, + "step": 13387 + }, + { + "epoch": 1.9727897838899804, + "grad_norm": 0.604058027267456, + "learning_rate": 1.3236670854360111e-06, + "loss": 0.524, + "step": 13388 + }, + { + "epoch": 1.9729371316306483, + "grad_norm": 0.612758219242096, + "learning_rate": 1.3233250019197686e-06, + "loss": 0.5593, + "step": 13389 + }, + { + "epoch": 1.9730844793713163, + "grad_norm": 0.5935797095298767, + "learning_rate": 1.322982946702009e-06, + "loss": 0.5181, + "step": 13390 + }, + { + "epoch": 1.9732318271119844, + "grad_norm": 0.6022918820381165, + "learning_rate": 1.3226409197909568e-06, + "loss": 0.5532, + "step": 13391 + }, + { + "epoch": 1.9733791748526524, + "grad_norm": 0.5808506608009338, + "learning_rate": 1.322298921194839e-06, + "loss": 0.5194, + "step": 13392 + }, + { + "epoch": 1.9735265225933203, + "grad_norm": 0.6395818591117859, + "learning_rate": 1.3219569509218794e-06, + "loss": 0.5321, + "step": 13393 + }, + { + "epoch": 1.9736738703339882, + "grad_norm": 0.5984014272689819, + "learning_rate": 1.3216150089803021e-06, + "loss": 0.5213, + "step": 13394 + }, + { + "epoch": 1.9738212180746562, + "grad_norm": 0.5955034494400024, + "learning_rate": 1.321273095378332e-06, + "loss": 0.5049, + "step": 13395 + }, + { + "epoch": 1.973968565815324, + "grad_norm": 0.6307682394981384, + "learning_rate": 1.32093121012419e-06, + "loss": 0.5393, + "step": 13396 + }, + { + "epoch": 1.974115913555992, + "grad_norm": 0.6029784083366394, + "learning_rate": 1.3205893532261005e-06, + "loss": 0.5059, + "step": 13397 + }, + { + "epoch": 1.97426326129666, + "grad_norm": 0.6213921308517456, + "learning_rate": 1.3202475246922827e-06, + "loss": 0.5275, + "step": 13398 + }, + { + "epoch": 1.9744106090373281, + "grad_norm": 0.5824012160301208, + "learning_rate": 1.3199057245309597e-06, + "loss": 0.539, + "step": 13399 + }, + { + "epoch": 1.974557956777996, + "grad_norm": 0.5843321681022644, + "learning_rate": 1.3195639527503496e-06, + "loss": 0.5424, + "step": 13400 + }, + { + "epoch": 1.974705304518664, + "grad_norm": 0.6444269418716431, + "learning_rate": 1.3192222093586738e-06, + "loss": 0.5223, + "step": 13401 + }, + { + "epoch": 1.9748526522593322, + "grad_norm": 0.6285486817359924, + "learning_rate": 1.3188804943641487e-06, + "loss": 0.5291, + "step": 13402 + }, + { + "epoch": 1.975, + "grad_norm": 0.6164600253105164, + "learning_rate": 1.318538807774995e-06, + "loss": 0.5479, + "step": 13403 + }, + { + "epoch": 1.975147347740668, + "grad_norm": 0.5983991622924805, + "learning_rate": 1.3181971495994277e-06, + "loss": 0.5027, + "step": 13404 + }, + { + "epoch": 1.975294695481336, + "grad_norm": 0.6574848890304565, + "learning_rate": 1.317855519845666e-06, + "loss": 0.527, + "step": 13405 + }, + { + "epoch": 1.975442043222004, + "grad_norm": 0.5834578275680542, + "learning_rate": 1.3175139185219232e-06, + "loss": 0.5217, + "step": 13406 + }, + { + "epoch": 1.9755893909626718, + "grad_norm": 0.640695333480835, + "learning_rate": 1.3171723456364174e-06, + "loss": 0.5315, + "step": 13407 + }, + { + "epoch": 1.9757367387033398, + "grad_norm": 0.6144391298294067, + "learning_rate": 1.3168308011973606e-06, + "loss": 0.5319, + "step": 13408 + }, + { + "epoch": 1.9758840864440077, + "grad_norm": 0.5978763103485107, + "learning_rate": 1.3164892852129693e-06, + "loss": 0.5374, + "step": 13409 + }, + { + "epoch": 1.9760314341846759, + "grad_norm": 0.6351933479309082, + "learning_rate": 1.3161477976914544e-06, + "loss": 0.5169, + "step": 13410 + }, + { + "epoch": 1.9761787819253438, + "grad_norm": 0.589931309223175, + "learning_rate": 1.3158063386410308e-06, + "loss": 0.5208, + "step": 13411 + }, + { + "epoch": 1.9763261296660117, + "grad_norm": 0.6038304567337036, + "learning_rate": 1.3154649080699083e-06, + "loss": 0.5, + "step": 13412 + }, + { + "epoch": 1.97647347740668, + "grad_norm": 0.6363475918769836, + "learning_rate": 1.3151235059863004e-06, + "loss": 0.5025, + "step": 13413 + }, + { + "epoch": 1.9766208251473478, + "grad_norm": 0.5594498515129089, + "learning_rate": 1.3147821323984154e-06, + "loss": 0.5004, + "step": 13414 + }, + { + "epoch": 1.9767681728880158, + "grad_norm": 0.5767444372177124, + "learning_rate": 1.314440787314465e-06, + "loss": 0.5224, + "step": 13415 + }, + { + "epoch": 1.9769155206286837, + "grad_norm": 0.5823526978492737, + "learning_rate": 1.3140994707426565e-06, + "loss": 0.5168, + "step": 13416 + }, + { + "epoch": 1.9770628683693516, + "grad_norm": 0.6050450801849365, + "learning_rate": 1.3137581826912e-06, + "loss": 0.5056, + "step": 13417 + }, + { + "epoch": 1.9772102161100196, + "grad_norm": 0.6275569796562195, + "learning_rate": 1.3134169231683034e-06, + "loss": 0.5317, + "step": 13418 + }, + { + "epoch": 1.9773575638506875, + "grad_norm": 0.6536324620246887, + "learning_rate": 1.3130756921821725e-06, + "loss": 0.5442, + "step": 13419 + }, + { + "epoch": 1.9775049115913554, + "grad_norm": 0.6322187185287476, + "learning_rate": 1.3127344897410151e-06, + "loss": 0.5494, + "step": 13420 + }, + { + "epoch": 1.9776522593320236, + "grad_norm": 0.621344268321991, + "learning_rate": 1.3123933158530356e-06, + "loss": 0.5289, + "step": 13421 + }, + { + "epoch": 1.9777996070726915, + "grad_norm": 0.6109063029289246, + "learning_rate": 1.3120521705264412e-06, + "loss": 0.5153, + "step": 13422 + }, + { + "epoch": 1.9779469548133597, + "grad_norm": 0.6167712211608887, + "learning_rate": 1.3117110537694333e-06, + "loss": 0.5074, + "step": 13423 + }, + { + "epoch": 1.9780943025540276, + "grad_norm": 0.5881867408752441, + "learning_rate": 1.3113699655902185e-06, + "loss": 0.519, + "step": 13424 + }, + { + "epoch": 1.9782416502946956, + "grad_norm": 0.608514666557312, + "learning_rate": 1.3110289059969978e-06, + "loss": 0.5088, + "step": 13425 + }, + { + "epoch": 1.9783889980353635, + "grad_norm": 0.63304203748703, + "learning_rate": 1.3106878749979745e-06, + "loss": 0.51, + "step": 13426 + }, + { + "epoch": 1.9785363457760314, + "grad_norm": 0.5914740562438965, + "learning_rate": 1.3103468726013497e-06, + "loss": 0.5129, + "step": 13427 + }, + { + "epoch": 1.9786836935166994, + "grad_norm": 0.5708059668540955, + "learning_rate": 1.3100058988153253e-06, + "loss": 0.5191, + "step": 13428 + }, + { + "epoch": 1.9788310412573673, + "grad_norm": 0.5969638824462891, + "learning_rate": 1.3096649536481002e-06, + "loss": 0.4968, + "step": 13429 + }, + { + "epoch": 1.9789783889980352, + "grad_norm": 0.623920202255249, + "learning_rate": 1.3093240371078753e-06, + "loss": 0.5416, + "step": 13430 + }, + { + "epoch": 1.9791257367387032, + "grad_norm": 0.5709536671638489, + "learning_rate": 1.3089831492028481e-06, + "loss": 0.5092, + "step": 13431 + }, + { + "epoch": 1.9792730844793713, + "grad_norm": 0.6075456142425537, + "learning_rate": 1.3086422899412182e-06, + "loss": 0.5557, + "step": 13432 + }, + { + "epoch": 1.9794204322200393, + "grad_norm": 0.6093226671218872, + "learning_rate": 1.3083014593311818e-06, + "loss": 0.5593, + "step": 13433 + }, + { + "epoch": 1.9795677799607074, + "grad_norm": 0.630101203918457, + "learning_rate": 1.3079606573809373e-06, + "loss": 0.5151, + "step": 13434 + }, + { + "epoch": 1.9797151277013754, + "grad_norm": 0.6462607979774475, + "learning_rate": 1.3076198840986786e-06, + "loss": 0.557, + "step": 13435 + }, + { + "epoch": 1.9798624754420433, + "grad_norm": 0.5855913758277893, + "learning_rate": 1.3072791394926039e-06, + "loss": 0.5253, + "step": 13436 + }, + { + "epoch": 1.9800098231827112, + "grad_norm": 0.6189942955970764, + "learning_rate": 1.3069384235709054e-06, + "loss": 0.5406, + "step": 13437 + }, + { + "epoch": 1.9801571709233792, + "grad_norm": 0.5930195450782776, + "learning_rate": 1.306597736341779e-06, + "loss": 0.5368, + "step": 13438 + }, + { + "epoch": 1.980304518664047, + "grad_norm": 0.6226943135261536, + "learning_rate": 1.3062570778134165e-06, + "loss": 0.5321, + "step": 13439 + }, + { + "epoch": 1.980451866404715, + "grad_norm": 0.6073375344276428, + "learning_rate": 1.305916447994012e-06, + "loss": 0.51, + "step": 13440 + }, + { + "epoch": 1.980599214145383, + "grad_norm": 0.6057597398757935, + "learning_rate": 1.3055758468917572e-06, + "loss": 0.5308, + "step": 13441 + }, + { + "epoch": 1.980746561886051, + "grad_norm": 0.612257182598114, + "learning_rate": 1.305235274514842e-06, + "loss": 0.5368, + "step": 13442 + }, + { + "epoch": 1.980893909626719, + "grad_norm": 0.5914848446846008, + "learning_rate": 1.3048947308714588e-06, + "loss": 0.5366, + "step": 13443 + }, + { + "epoch": 1.981041257367387, + "grad_norm": 0.626958429813385, + "learning_rate": 1.3045542159697964e-06, + "loss": 0.5421, + "step": 13444 + }, + { + "epoch": 1.9811886051080552, + "grad_norm": 0.619823694229126, + "learning_rate": 1.3042137298180448e-06, + "loss": 0.5089, + "step": 13445 + }, + { + "epoch": 1.981335952848723, + "grad_norm": 0.5868056416511536, + "learning_rate": 1.3038732724243914e-06, + "loss": 0.51, + "step": 13446 + }, + { + "epoch": 1.981483300589391, + "grad_norm": 0.5919167995452881, + "learning_rate": 1.3035328437970257e-06, + "loss": 0.5293, + "step": 13447 + }, + { + "epoch": 1.981630648330059, + "grad_norm": 0.6226991415023804, + "learning_rate": 1.3031924439441333e-06, + "loss": 0.4867, + "step": 13448 + }, + { + "epoch": 1.981777996070727, + "grad_norm": 0.6295313835144043, + "learning_rate": 1.302852072873902e-06, + "loss": 0.5383, + "step": 13449 + }, + { + "epoch": 1.9819253438113948, + "grad_norm": 0.6152560114860535, + "learning_rate": 1.3025117305945157e-06, + "loss": 0.5283, + "step": 13450 + }, + { + "epoch": 1.9820726915520628, + "grad_norm": 0.6165643334388733, + "learning_rate": 1.3021714171141616e-06, + "loss": 0.5511, + "step": 13451 + }, + { + "epoch": 1.9822200392927307, + "grad_norm": 0.6467542052268982, + "learning_rate": 1.3018311324410215e-06, + "loss": 0.5225, + "step": 13452 + }, + { + "epoch": 1.9823673870333989, + "grad_norm": 0.5974464416503906, + "learning_rate": 1.3014908765832822e-06, + "loss": 0.5144, + "step": 13453 + }, + { + "epoch": 1.9825147347740668, + "grad_norm": 0.6160319447517395, + "learning_rate": 1.3011506495491246e-06, + "loss": 0.5265, + "step": 13454 + }, + { + "epoch": 1.9826620825147347, + "grad_norm": 0.6145029067993164, + "learning_rate": 1.3008104513467323e-06, + "loss": 0.5012, + "step": 13455 + }, + { + "epoch": 1.982809430255403, + "grad_norm": 0.6023223996162415, + "learning_rate": 1.3004702819842855e-06, + "loss": 0.5302, + "step": 13456 + }, + { + "epoch": 1.9829567779960708, + "grad_norm": 0.5897045135498047, + "learning_rate": 1.3001301414699665e-06, + "loss": 0.5148, + "step": 13457 + }, + { + "epoch": 1.9831041257367388, + "grad_norm": 0.5814288258552551, + "learning_rate": 1.2997900298119547e-06, + "loss": 0.5326, + "step": 13458 + }, + { + "epoch": 1.9832514734774067, + "grad_norm": 0.5978745222091675, + "learning_rate": 1.2994499470184291e-06, + "loss": 0.5145, + "step": 13459 + }, + { + "epoch": 1.9833988212180746, + "grad_norm": 0.5964848399162292, + "learning_rate": 1.2991098930975703e-06, + "loss": 0.5074, + "step": 13460 + }, + { + "epoch": 1.9835461689587426, + "grad_norm": 0.6369531750679016, + "learning_rate": 1.298769868057554e-06, + "loss": 0.5207, + "step": 13461 + }, + { + "epoch": 1.9836935166994105, + "grad_norm": 0.6406348347663879, + "learning_rate": 1.2984298719065603e-06, + "loss": 0.5027, + "step": 13462 + }, + { + "epoch": 1.9838408644400785, + "grad_norm": 0.5895318984985352, + "learning_rate": 1.298089904652763e-06, + "loss": 0.53, + "step": 13463 + }, + { + "epoch": 1.9839882121807466, + "grad_norm": 0.6225086450576782, + "learning_rate": 1.2977499663043413e-06, + "loss": 0.5301, + "step": 13464 + }, + { + "epoch": 1.9841355599214145, + "grad_norm": 0.6191727519035339, + "learning_rate": 1.2974100568694678e-06, + "loss": 0.517, + "step": 13465 + }, + { + "epoch": 1.9842829076620825, + "grad_norm": 0.6323673129081726, + "learning_rate": 1.2970701763563195e-06, + "loss": 0.5193, + "step": 13466 + }, + { + "epoch": 1.9844302554027506, + "grad_norm": 0.5992730259895325, + "learning_rate": 1.2967303247730684e-06, + "loss": 0.5244, + "step": 13467 + }, + { + "epoch": 1.9845776031434186, + "grad_norm": 0.6224736571311951, + "learning_rate": 1.2963905021278891e-06, + "loss": 0.5159, + "step": 13468 + }, + { + "epoch": 1.9847249508840865, + "grad_norm": 0.5805699825286865, + "learning_rate": 1.296050708428953e-06, + "loss": 0.5267, + "step": 13469 + }, + { + "epoch": 1.9848722986247544, + "grad_norm": 0.5980829000473022, + "learning_rate": 1.2957109436844339e-06, + "loss": 0.5293, + "step": 13470 + }, + { + "epoch": 1.9850196463654224, + "grad_norm": 0.6191127896308899, + "learning_rate": 1.2953712079025004e-06, + "loss": 0.5399, + "step": 13471 + }, + { + "epoch": 1.9851669941060903, + "grad_norm": 0.6078231930732727, + "learning_rate": 1.2950315010913255e-06, + "loss": 0.536, + "step": 13472 + }, + { + "epoch": 1.9853143418467583, + "grad_norm": 0.609655499458313, + "learning_rate": 1.2946918232590768e-06, + "loss": 0.5061, + "step": 13473 + }, + { + "epoch": 1.9854616895874262, + "grad_norm": 0.6074957847595215, + "learning_rate": 1.2943521744139253e-06, + "loss": 0.5434, + "step": 13474 + }, + { + "epoch": 1.9856090373280944, + "grad_norm": 0.6041358113288879, + "learning_rate": 1.2940125545640378e-06, + "loss": 0.5046, + "step": 13475 + }, + { + "epoch": 1.9857563850687623, + "grad_norm": 0.6452686190605164, + "learning_rate": 1.2936729637175837e-06, + "loss": 0.5078, + "step": 13476 + }, + { + "epoch": 1.9859037328094302, + "grad_norm": 0.6456257700920105, + "learning_rate": 1.2933334018827279e-06, + "loss": 0.5418, + "step": 13477 + }, + { + "epoch": 1.9860510805500984, + "grad_norm": 0.6211262941360474, + "learning_rate": 1.2929938690676386e-06, + "loss": 0.5274, + "step": 13478 + }, + { + "epoch": 1.9861984282907663, + "grad_norm": 0.5881133079528809, + "learning_rate": 1.2926543652804802e-06, + "loss": 0.5408, + "step": 13479 + }, + { + "epoch": 1.9863457760314343, + "grad_norm": 0.5877825617790222, + "learning_rate": 1.2923148905294186e-06, + "loss": 0.5315, + "step": 13480 + }, + { + "epoch": 1.9864931237721022, + "grad_norm": 0.610397219657898, + "learning_rate": 1.2919754448226163e-06, + "loss": 0.5148, + "step": 13481 + }, + { + "epoch": 1.9866404715127701, + "grad_norm": 0.6804978251457214, + "learning_rate": 1.2916360281682395e-06, + "loss": 0.5323, + "step": 13482 + }, + { + "epoch": 1.986787819253438, + "grad_norm": 0.6307302117347717, + "learning_rate": 1.2912966405744481e-06, + "loss": 0.5395, + "step": 13483 + }, + { + "epoch": 1.986935166994106, + "grad_norm": 0.6184972524642944, + "learning_rate": 1.2909572820494064e-06, + "loss": 0.4798, + "step": 13484 + }, + { + "epoch": 1.987082514734774, + "grad_norm": 0.6175137758255005, + "learning_rate": 1.2906179526012745e-06, + "loss": 0.5164, + "step": 13485 + }, + { + "epoch": 1.987229862475442, + "grad_norm": 0.6019551157951355, + "learning_rate": 1.2902786522382143e-06, + "loss": 0.5282, + "step": 13486 + }, + { + "epoch": 1.98737721021611, + "grad_norm": 0.6257526278495789, + "learning_rate": 1.289939380968384e-06, + "loss": 0.522, + "step": 13487 + }, + { + "epoch": 1.987524557956778, + "grad_norm": 0.5713227987289429, + "learning_rate": 1.289600138799945e-06, + "loss": 0.5246, + "step": 13488 + }, + { + "epoch": 1.9876719056974461, + "grad_norm": 0.6242373585700989, + "learning_rate": 1.2892609257410544e-06, + "loss": 0.5267, + "step": 13489 + }, + { + "epoch": 1.987819253438114, + "grad_norm": 0.61590576171875, + "learning_rate": 1.2889217417998706e-06, + "loss": 0.5203, + "step": 13490 + }, + { + "epoch": 1.987966601178782, + "grad_norm": 0.662054717540741, + "learning_rate": 1.2885825869845515e-06, + "loss": 0.5286, + "step": 13491 + }, + { + "epoch": 1.98811394891945, + "grad_norm": 0.6050826907157898, + "learning_rate": 1.2882434613032524e-06, + "loss": 0.5296, + "step": 13492 + }, + { + "epoch": 1.9882612966601179, + "grad_norm": 0.6076838374137878, + "learning_rate": 1.2879043647641304e-06, + "loss": 0.5311, + "step": 13493 + }, + { + "epoch": 1.9884086444007858, + "grad_norm": 0.6153640151023865, + "learning_rate": 1.287565297375339e-06, + "loss": 0.5278, + "step": 13494 + }, + { + "epoch": 1.9885559921414537, + "grad_norm": 0.608771026134491, + "learning_rate": 1.2872262591450347e-06, + "loss": 0.5197, + "step": 13495 + }, + { + "epoch": 1.9887033398821217, + "grad_norm": 0.608420729637146, + "learning_rate": 1.2868872500813688e-06, + "loss": 0.5312, + "step": 13496 + }, + { + "epoch": 1.9888506876227898, + "grad_norm": 0.5958409905433655, + "learning_rate": 1.2865482701924964e-06, + "loss": 0.5175, + "step": 13497 + }, + { + "epoch": 1.9889980353634578, + "grad_norm": 0.5914264917373657, + "learning_rate": 1.2862093194865682e-06, + "loss": 0.4977, + "step": 13498 + }, + { + "epoch": 1.9891453831041257, + "grad_norm": 0.617560863494873, + "learning_rate": 1.2858703979717377e-06, + "loss": 0.5222, + "step": 13499 + }, + { + "epoch": 1.9892927308447939, + "grad_norm": 0.5819882154464722, + "learning_rate": 1.2855315056561535e-06, + "loss": 0.5027, + "step": 13500 + }, + { + "epoch": 1.9894400785854618, + "grad_norm": 0.6224554181098938, + "learning_rate": 1.285192642547968e-06, + "loss": 0.5163, + "step": 13501 + }, + { + "epoch": 1.9895874263261297, + "grad_norm": 0.610167384147644, + "learning_rate": 1.2848538086553285e-06, + "loss": 0.5222, + "step": 13502 + }, + { + "epoch": 1.9897347740667977, + "grad_norm": 0.57759690284729, + "learning_rate": 1.2845150039863862e-06, + "loss": 0.4844, + "step": 13503 + }, + { + "epoch": 1.9898821218074656, + "grad_norm": 0.6108860969543457, + "learning_rate": 1.284176228549287e-06, + "loss": 0.5502, + "step": 13504 + }, + { + "epoch": 1.9900294695481335, + "grad_norm": 0.6367982625961304, + "learning_rate": 1.2838374823521801e-06, + "loss": 0.5199, + "step": 13505 + }, + { + "epoch": 1.9901768172888015, + "grad_norm": 0.5992765426635742, + "learning_rate": 1.2834987654032105e-06, + "loss": 0.5239, + "step": 13506 + }, + { + "epoch": 1.9903241650294694, + "grad_norm": 0.5852530598640442, + "learning_rate": 1.2831600777105257e-06, + "loss": 0.5462, + "step": 13507 + }, + { + "epoch": 1.9904715127701376, + "grad_norm": 0.6180011034011841, + "learning_rate": 1.2828214192822705e-06, + "loss": 0.5088, + "step": 13508 + }, + { + "epoch": 1.9906188605108055, + "grad_norm": 0.5961679220199585, + "learning_rate": 1.2824827901265883e-06, + "loss": 0.5462, + "step": 13509 + }, + { + "epoch": 1.9907662082514734, + "grad_norm": 0.610153079032898, + "learning_rate": 1.2821441902516249e-06, + "loss": 0.5411, + "step": 13510 + }, + { + "epoch": 1.9909135559921416, + "grad_norm": 0.6068251132965088, + "learning_rate": 1.2818056196655215e-06, + "loss": 0.4827, + "step": 13511 + }, + { + "epoch": 1.9910609037328095, + "grad_norm": 0.5821979641914368, + "learning_rate": 1.2814670783764226e-06, + "loss": 0.501, + "step": 13512 + }, + { + "epoch": 1.9912082514734775, + "grad_norm": 0.606584370136261, + "learning_rate": 1.281128566392468e-06, + "loss": 0.5194, + "step": 13513 + }, + { + "epoch": 1.9913555992141454, + "grad_norm": 0.6045545935630798, + "learning_rate": 1.2807900837218006e-06, + "loss": 0.5128, + "step": 13514 + }, + { + "epoch": 1.9915029469548133, + "grad_norm": 0.6017376780509949, + "learning_rate": 1.280451630372559e-06, + "loss": 0.5096, + "step": 13515 + }, + { + "epoch": 1.9916502946954813, + "grad_norm": 0.646090567111969, + "learning_rate": 1.2801132063528842e-06, + "loss": 0.5217, + "step": 13516 + }, + { + "epoch": 1.9917976424361492, + "grad_norm": 0.6191940307617188, + "learning_rate": 1.279774811670914e-06, + "loss": 0.5159, + "step": 13517 + }, + { + "epoch": 1.9919449901768171, + "grad_norm": 0.596461832523346, + "learning_rate": 1.2794364463347886e-06, + "loss": 0.5004, + "step": 13518 + }, + { + "epoch": 1.9920923379174853, + "grad_norm": 0.6163018345832825, + "learning_rate": 1.2790981103526428e-06, + "loss": 0.5176, + "step": 13519 + }, + { + "epoch": 1.9922396856581532, + "grad_norm": 0.646528422832489, + "learning_rate": 1.2787598037326157e-06, + "loss": 0.5363, + "step": 13520 + }, + { + "epoch": 1.9923870333988212, + "grad_norm": 0.5938848257064819, + "learning_rate": 1.2784215264828417e-06, + "loss": 0.5081, + "step": 13521 + }, + { + "epoch": 1.9925343811394893, + "grad_norm": 0.5916367173194885, + "learning_rate": 1.278083278611458e-06, + "loss": 0.5461, + "step": 13522 + }, + { + "epoch": 1.9926817288801573, + "grad_norm": 0.6005079746246338, + "learning_rate": 1.2777450601265972e-06, + "loss": 0.531, + "step": 13523 + }, + { + "epoch": 1.9928290766208252, + "grad_norm": 0.6289000511169434, + "learning_rate": 1.2774068710363956e-06, + "loss": 0.5293, + "step": 13524 + }, + { + "epoch": 1.9929764243614931, + "grad_norm": 0.6323119401931763, + "learning_rate": 1.2770687113489843e-06, + "loss": 0.5191, + "step": 13525 + }, + { + "epoch": 1.993123772102161, + "grad_norm": 0.5962536931037903, + "learning_rate": 1.276730581072497e-06, + "loss": 0.5269, + "step": 13526 + }, + { + "epoch": 1.993271119842829, + "grad_norm": 0.6175171732902527, + "learning_rate": 1.2763924802150663e-06, + "loss": 0.5274, + "step": 13527 + }, + { + "epoch": 1.993418467583497, + "grad_norm": 0.6342068910598755, + "learning_rate": 1.2760544087848217e-06, + "loss": 0.5478, + "step": 13528 + }, + { + "epoch": 1.9935658153241649, + "grad_norm": 0.5968220233917236, + "learning_rate": 1.2757163667898953e-06, + "loss": 0.5193, + "step": 13529 + }, + { + "epoch": 1.993713163064833, + "grad_norm": 0.7174525856971741, + "learning_rate": 1.2753783542384151e-06, + "loss": 0.5446, + "step": 13530 + }, + { + "epoch": 1.993860510805501, + "grad_norm": 0.573078453540802, + "learning_rate": 1.2750403711385122e-06, + "loss": 0.5019, + "step": 13531 + }, + { + "epoch": 1.994007858546169, + "grad_norm": 0.6707305908203125, + "learning_rate": 1.2747024174983131e-06, + "loss": 0.5353, + "step": 13532 + }, + { + "epoch": 1.994155206286837, + "grad_norm": 0.5875083804130554, + "learning_rate": 1.2743644933259468e-06, + "loss": 0.5164, + "step": 13533 + }, + { + "epoch": 1.994302554027505, + "grad_norm": 0.6596897840499878, + "learning_rate": 1.2740265986295385e-06, + "loss": 0.5031, + "step": 13534 + }, + { + "epoch": 1.994449901768173, + "grad_norm": 0.6080136895179749, + "learning_rate": 1.2736887334172165e-06, + "loss": 0.4958, + "step": 13535 + }, + { + "epoch": 1.9945972495088409, + "grad_norm": 0.590434730052948, + "learning_rate": 1.2733508976971048e-06, + "loss": 0.4782, + "step": 13536 + }, + { + "epoch": 1.9947445972495088, + "grad_norm": 0.5921505093574524, + "learning_rate": 1.2730130914773292e-06, + "loss": 0.5233, + "step": 13537 + }, + { + "epoch": 1.9948919449901767, + "grad_norm": 0.5949186086654663, + "learning_rate": 1.2726753147660123e-06, + "loss": 0.5399, + "step": 13538 + }, + { + "epoch": 1.9950392927308447, + "grad_norm": 0.6162256598472595, + "learning_rate": 1.2723375675712796e-06, + "loss": 0.5295, + "step": 13539 + }, + { + "epoch": 1.9951866404715126, + "grad_norm": 0.6471084356307983, + "learning_rate": 1.2719998499012516e-06, + "loss": 0.5178, + "step": 13540 + }, + { + "epoch": 1.9953339882121808, + "grad_norm": 0.640831470489502, + "learning_rate": 1.2716621617640518e-06, + "loss": 0.5437, + "step": 13541 + }, + { + "epoch": 1.9954813359528487, + "grad_norm": 0.6296052932739258, + "learning_rate": 1.2713245031678e-06, + "loss": 0.5179, + "step": 13542 + }, + { + "epoch": 1.9956286836935166, + "grad_norm": 0.6058283448219299, + "learning_rate": 1.2709868741206192e-06, + "loss": 0.4843, + "step": 13543 + }, + { + "epoch": 1.9957760314341848, + "grad_norm": 0.6068002581596375, + "learning_rate": 1.2706492746306262e-06, + "loss": 0.5115, + "step": 13544 + }, + { + "epoch": 1.9959233791748527, + "grad_norm": 0.5943723917007446, + "learning_rate": 1.2703117047059427e-06, + "loss": 0.5319, + "step": 13545 + }, + { + "epoch": 1.9960707269155207, + "grad_norm": 0.6065002083778381, + "learning_rate": 1.2699741643546849e-06, + "loss": 0.5182, + "step": 13546 + }, + { + "epoch": 1.9962180746561886, + "grad_norm": 0.6628167033195496, + "learning_rate": 1.2696366535849721e-06, + "loss": 0.4887, + "step": 13547 + }, + { + "epoch": 1.9963654223968565, + "grad_norm": 0.6146310567855835, + "learning_rate": 1.2692991724049203e-06, + "loss": 0.5404, + "step": 13548 + }, + { + "epoch": 1.9965127701375245, + "grad_norm": 0.5697465538978577, + "learning_rate": 1.2689617208226468e-06, + "loss": 0.4671, + "step": 13549 + }, + { + "epoch": 1.9966601178781924, + "grad_norm": 0.6189188361167908, + "learning_rate": 1.2686242988462658e-06, + "loss": 0.5264, + "step": 13550 + }, + { + "epoch": 1.9968074656188604, + "grad_norm": 0.5973168611526489, + "learning_rate": 1.2682869064838938e-06, + "loss": 0.5202, + "step": 13551 + }, + { + "epoch": 1.9969548133595285, + "grad_norm": 0.6200776696205139, + "learning_rate": 1.2679495437436428e-06, + "loss": 0.5305, + "step": 13552 + }, + { + "epoch": 1.9971021611001964, + "grad_norm": 0.6355925798416138, + "learning_rate": 1.2676122106336287e-06, + "loss": 0.5332, + "step": 13553 + }, + { + "epoch": 1.9972495088408644, + "grad_norm": 0.607285737991333, + "learning_rate": 1.2672749071619617e-06, + "loss": 0.5145, + "step": 13554 + }, + { + "epoch": 1.9973968565815325, + "grad_norm": 0.5990614891052246, + "learning_rate": 1.2669376333367559e-06, + "loss": 0.5316, + "step": 13555 + }, + { + "epoch": 1.9975442043222005, + "grad_norm": 0.6069377064704895, + "learning_rate": 1.2666003891661217e-06, + "loss": 0.5251, + "step": 13556 + }, + { + "epoch": 1.9976915520628684, + "grad_norm": 0.6267927885055542, + "learning_rate": 1.2662631746581688e-06, + "loss": 0.5174, + "step": 13557 + }, + { + "epoch": 1.9978388998035363, + "grad_norm": 0.5619716644287109, + "learning_rate": 1.2659259898210089e-06, + "loss": 0.5436, + "step": 13558 + }, + { + "epoch": 1.9979862475442043, + "grad_norm": 0.6296464204788208, + "learning_rate": 1.2655888346627492e-06, + "loss": 0.5407, + "step": 13559 + }, + { + "epoch": 1.9981335952848722, + "grad_norm": 0.6475867033004761, + "learning_rate": 1.2652517091914998e-06, + "loss": 0.5407, + "step": 13560 + }, + { + "epoch": 1.9982809430255402, + "grad_norm": 0.6222131848335266, + "learning_rate": 1.2649146134153667e-06, + "loss": 0.4926, + "step": 13561 + }, + { + "epoch": 1.998428290766208, + "grad_norm": 0.6167193055152893, + "learning_rate": 1.264577547342459e-06, + "loss": 0.4986, + "step": 13562 + }, + { + "epoch": 1.9985756385068763, + "grad_norm": 0.6240342259407043, + "learning_rate": 1.2642405109808797e-06, + "loss": 0.5444, + "step": 13563 + }, + { + "epoch": 1.9987229862475442, + "grad_norm": 0.600028395652771, + "learning_rate": 1.2639035043387387e-06, + "loss": 0.5172, + "step": 13564 + }, + { + "epoch": 1.9988703339882123, + "grad_norm": 0.6570491194725037, + "learning_rate": 1.2635665274241376e-06, + "loss": 0.508, + "step": 13565 + }, + { + "epoch": 1.9990176817288803, + "grad_norm": 0.600551426410675, + "learning_rate": 1.2632295802451828e-06, + "loss": 0.5178, + "step": 13566 + }, + { + "epoch": 1.9991650294695482, + "grad_norm": 0.5935050845146179, + "learning_rate": 1.2628926628099753e-06, + "loss": 0.552, + "step": 13567 + }, + { + "epoch": 1.9993123772102162, + "grad_norm": 0.616021454334259, + "learning_rate": 1.26255577512662e-06, + "loss": 0.4991, + "step": 13568 + }, + { + "epoch": 1.999459724950884, + "grad_norm": 0.5976921916007996, + "learning_rate": 1.2622189172032168e-06, + "loss": 0.5208, + "step": 13569 + }, + { + "epoch": 1.999607072691552, + "grad_norm": 0.6000241041183472, + "learning_rate": 1.2618820890478689e-06, + "loss": 0.5504, + "step": 13570 + }, + { + "epoch": 1.99975442043222, + "grad_norm": 0.6293209791183472, + "learning_rate": 1.2615452906686754e-06, + "loss": 0.5334, + "step": 13571 + }, + { + "epoch": 1.999901768172888, + "grad_norm": 0.6006196737289429, + "learning_rate": 1.2612085220737377e-06, + "loss": 0.5121, + "step": 13572 + } + ], + "logging_steps": 1, + "max_steps": 20358, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 6786, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6.856842412132663e+19, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}