| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 0, |
| "global_step": 560, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0017857142857142857, |
| "grad_norm": 0.6997888088226318, |
| "learning_rate": 9.982142857142858e-06, |
| "loss": 1.9503, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0035714285714285713, |
| "grad_norm": 0.7093144059181213, |
| "learning_rate": 9.964285714285714e-06, |
| "loss": 2.0277, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.005357142857142857, |
| "grad_norm": 0.6343945264816284, |
| "learning_rate": 9.946428571428572e-06, |
| "loss": 1.9098, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.007142857142857143, |
| "grad_norm": 0.6176633834838867, |
| "learning_rate": 9.92857142857143e-06, |
| "loss": 1.8188, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.008928571428571428, |
| "grad_norm": 0.6151384115219116, |
| "learning_rate": 9.910714285714288e-06, |
| "loss": 1.9782, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.010714285714285714, |
| "grad_norm": 0.6134297251701355, |
| "learning_rate": 9.892857142857143e-06, |
| "loss": 1.83, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0125, |
| "grad_norm": 0.6008761525154114, |
| "learning_rate": 9.875000000000001e-06, |
| "loss": 1.8764, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.014285714285714285, |
| "grad_norm": 0.5730451941490173, |
| "learning_rate": 9.857142857142859e-06, |
| "loss": 1.8342, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.01607142857142857, |
| "grad_norm": 0.5792098641395569, |
| "learning_rate": 9.839285714285715e-06, |
| "loss": 1.8666, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.017857142857142856, |
| "grad_norm": 0.5382242202758789, |
| "learning_rate": 9.821428571428573e-06, |
| "loss": 1.9054, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.019642857142857142, |
| "grad_norm": 0.46677500009536743, |
| "learning_rate": 9.803571428571428e-06, |
| "loss": 1.768, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.02142857142857143, |
| "grad_norm": 0.47485437989234924, |
| "learning_rate": 9.785714285714286e-06, |
| "loss": 1.8044, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.023214285714285715, |
| "grad_norm": 0.48358583450317383, |
| "learning_rate": 9.767857142857144e-06, |
| "loss": 1.7768, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.025, |
| "grad_norm": 0.46866756677627563, |
| "learning_rate": 9.75e-06, |
| "loss": 1.8323, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.026785714285714284, |
| "grad_norm": 0.4622134864330292, |
| "learning_rate": 9.732142857142858e-06, |
| "loss": 1.8337, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.02857142857142857, |
| "grad_norm": 0.43944844603538513, |
| "learning_rate": 9.714285714285715e-06, |
| "loss": 1.7608, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.030357142857142857, |
| "grad_norm": 0.4220949113368988, |
| "learning_rate": 9.696428571428573e-06, |
| "loss": 1.6767, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.03214285714285714, |
| "grad_norm": 0.41527998447418213, |
| "learning_rate": 9.678571428571429e-06, |
| "loss": 1.7412, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.033928571428571426, |
| "grad_norm": 0.43707507848739624, |
| "learning_rate": 9.660714285714287e-06, |
| "loss": 1.7807, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.03571428571428571, |
| "grad_norm": 0.41880887746810913, |
| "learning_rate": 9.642857142857144e-06, |
| "loss": 1.7464, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0375, |
| "grad_norm": 0.4193197190761566, |
| "learning_rate": 9.625e-06, |
| "loss": 1.7347, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.039285714285714285, |
| "grad_norm": 0.383999228477478, |
| "learning_rate": 9.607142857142858e-06, |
| "loss": 1.6653, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.04107142857142857, |
| "grad_norm": 0.3941427171230316, |
| "learning_rate": 9.589285714285716e-06, |
| "loss": 1.7175, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.04285714285714286, |
| "grad_norm": 0.386326402425766, |
| "learning_rate": 9.571428571428573e-06, |
| "loss": 1.6642, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.044642857142857144, |
| "grad_norm": 0.3823203146457672, |
| "learning_rate": 9.55357142857143e-06, |
| "loss": 1.7253, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.04642857142857143, |
| "grad_norm": 0.36001327633857727, |
| "learning_rate": 9.535714285714287e-06, |
| "loss": 1.7415, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.048214285714285716, |
| "grad_norm": 0.36957600712776184, |
| "learning_rate": 9.517857142857143e-06, |
| "loss": 1.7296, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.37834134697914124, |
| "learning_rate": 9.5e-06, |
| "loss": 1.6701, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.05178571428571429, |
| "grad_norm": 0.343870609998703, |
| "learning_rate": 9.482142857142858e-06, |
| "loss": 1.7299, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.05357142857142857, |
| "grad_norm": 0.3333272933959961, |
| "learning_rate": 9.464285714285714e-06, |
| "loss": 1.6733, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.055357142857142855, |
| "grad_norm": 0.3580067455768585, |
| "learning_rate": 9.446428571428572e-06, |
| "loss": 1.7065, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.05714285714285714, |
| "grad_norm": 0.3354710638523102, |
| "learning_rate": 9.42857142857143e-06, |
| "loss": 1.6561, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.05892857142857143, |
| "grad_norm": 0.4108162820339203, |
| "learning_rate": 9.410714285714286e-06, |
| "loss": 1.5889, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.060714285714285714, |
| "grad_norm": 0.34176838397979736, |
| "learning_rate": 9.392857142857143e-06, |
| "loss": 1.6161, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.0625, |
| "grad_norm": 0.3366299271583557, |
| "learning_rate": 9.375000000000001e-06, |
| "loss": 1.599, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.06428571428571428, |
| "grad_norm": 0.3173324763774872, |
| "learning_rate": 9.357142857142859e-06, |
| "loss": 1.6042, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.06607142857142857, |
| "grad_norm": 0.3377523124217987, |
| "learning_rate": 9.339285714285715e-06, |
| "loss": 1.6056, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.06785714285714285, |
| "grad_norm": 0.31548359990119934, |
| "learning_rate": 9.321428571428572e-06, |
| "loss": 1.6573, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.06964285714285715, |
| "grad_norm": 0.32269155979156494, |
| "learning_rate": 9.30357142857143e-06, |
| "loss": 1.5897, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.07142857142857142, |
| "grad_norm": 0.30696460604667664, |
| "learning_rate": 9.285714285714288e-06, |
| "loss": 1.6129, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.07321428571428572, |
| "grad_norm": 0.31791722774505615, |
| "learning_rate": 9.267857142857144e-06, |
| "loss": 1.6427, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.075, |
| "grad_norm": 0.31335708498954773, |
| "learning_rate": 9.250000000000001e-06, |
| "loss": 1.5697, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.07678571428571429, |
| "grad_norm": 0.31561946868896484, |
| "learning_rate": 9.232142857142859e-06, |
| "loss": 1.653, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.07857142857142857, |
| "grad_norm": 0.3354925215244293, |
| "learning_rate": 9.214285714285715e-06, |
| "loss": 1.5793, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.08035714285714286, |
| "grad_norm": 0.3125816583633423, |
| "learning_rate": 9.196428571428571e-06, |
| "loss": 1.5243, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.08214285714285714, |
| "grad_norm": 0.3044743537902832, |
| "learning_rate": 9.178571428571429e-06, |
| "loss": 1.5449, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.08392857142857142, |
| "grad_norm": 0.3276882469654083, |
| "learning_rate": 9.160714285714286e-06, |
| "loss": 1.65, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.08571428571428572, |
| "grad_norm": 0.34233033657073975, |
| "learning_rate": 9.142857142857144e-06, |
| "loss": 1.5939, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.0875, |
| "grad_norm": 0.36175018548965454, |
| "learning_rate": 9.125e-06, |
| "loss": 1.5721, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.08928571428571429, |
| "grad_norm": 0.3349234461784363, |
| "learning_rate": 9.107142857142858e-06, |
| "loss": 1.615, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.09107142857142857, |
| "grad_norm": 0.3084949553012848, |
| "learning_rate": 9.089285714285715e-06, |
| "loss": 1.5363, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.09285714285714286, |
| "grad_norm": 0.30576226115226746, |
| "learning_rate": 9.071428571428573e-06, |
| "loss": 1.521, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.09464285714285714, |
| "grad_norm": 0.3125852346420288, |
| "learning_rate": 9.053571428571429e-06, |
| "loss": 1.5768, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.09642857142857143, |
| "grad_norm": 0.3131454586982727, |
| "learning_rate": 9.035714285714287e-06, |
| "loss": 1.5326, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.09821428571428571, |
| "grad_norm": 0.3140488862991333, |
| "learning_rate": 9.017857142857144e-06, |
| "loss": 1.6095, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.3363245129585266, |
| "learning_rate": 9e-06, |
| "loss": 1.59, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.10178571428571428, |
| "grad_norm": 0.3293783664703369, |
| "learning_rate": 8.982142857142858e-06, |
| "loss": 1.5722, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.10357142857142858, |
| "grad_norm": 0.31743383407592773, |
| "learning_rate": 8.964285714285716e-06, |
| "loss": 1.4729, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.10535714285714286, |
| "grad_norm": 0.3278064727783203, |
| "learning_rate": 8.946428571428573e-06, |
| "loss": 1.494, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.10714285714285714, |
| "grad_norm": 0.3063907027244568, |
| "learning_rate": 8.92857142857143e-06, |
| "loss": 1.5308, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.10892857142857143, |
| "grad_norm": 0.32603612542152405, |
| "learning_rate": 8.910714285714287e-06, |
| "loss": 1.505, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.11071428571428571, |
| "grad_norm": 0.32034823298454285, |
| "learning_rate": 8.892857142857143e-06, |
| "loss": 1.5049, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.1125, |
| "grad_norm": 0.2999460697174072, |
| "learning_rate": 8.875e-06, |
| "loss": 1.4992, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.11428571428571428, |
| "grad_norm": 0.3157401382923126, |
| "learning_rate": 8.857142857142858e-06, |
| "loss": 1.5424, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.11607142857142858, |
| "grad_norm": 0.46912819147109985, |
| "learning_rate": 8.839285714285714e-06, |
| "loss": 1.542, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.11785714285714285, |
| "grad_norm": 0.31604939699172974, |
| "learning_rate": 8.821428571428572e-06, |
| "loss": 1.5034, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.11964285714285715, |
| "grad_norm": 0.3068097233772278, |
| "learning_rate": 8.80357142857143e-06, |
| "loss": 1.5091, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.12142857142857143, |
| "grad_norm": 0.30518803000450134, |
| "learning_rate": 8.785714285714286e-06, |
| "loss": 1.4497, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.12321428571428572, |
| "grad_norm": 0.3579995632171631, |
| "learning_rate": 8.767857142857143e-06, |
| "loss": 1.5062, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.125, |
| "grad_norm": 0.3688044250011444, |
| "learning_rate": 8.750000000000001e-06, |
| "loss": 1.5727, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.12678571428571428, |
| "grad_norm": 0.32263967394828796, |
| "learning_rate": 8.732142857142859e-06, |
| "loss": 1.4353, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.12857142857142856, |
| "grad_norm": 0.30005359649658203, |
| "learning_rate": 8.714285714285715e-06, |
| "loss": 1.468, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.13035714285714287, |
| "grad_norm": 0.33392152190208435, |
| "learning_rate": 8.696428571428572e-06, |
| "loss": 1.5394, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.13214285714285715, |
| "grad_norm": 0.36938440799713135, |
| "learning_rate": 8.67857142857143e-06, |
| "loss": 1.5137, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.13392857142857142, |
| "grad_norm": 0.33022594451904297, |
| "learning_rate": 8.660714285714286e-06, |
| "loss": 1.4917, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.1357142857142857, |
| "grad_norm": 0.35589250922203064, |
| "learning_rate": 8.642857142857144e-06, |
| "loss": 1.4468, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.1375, |
| "grad_norm": 0.3233088552951813, |
| "learning_rate": 8.625000000000001e-06, |
| "loss": 1.4459, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.1392857142857143, |
| "grad_norm": 0.3153448700904846, |
| "learning_rate": 8.607142857142859e-06, |
| "loss": 1.444, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.14107142857142857, |
| "grad_norm": 0.3484407365322113, |
| "learning_rate": 8.589285714285715e-06, |
| "loss": 1.4911, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 0.47459274530410767, |
| "learning_rate": 8.571428571428571e-06, |
| "loss": 1.5459, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.14464285714285716, |
| "grad_norm": 0.31747081875801086, |
| "learning_rate": 8.553571428571429e-06, |
| "loss": 1.4274, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.14642857142857144, |
| "grad_norm": 0.34533751010894775, |
| "learning_rate": 8.535714285714286e-06, |
| "loss": 1.5468, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.14821428571428572, |
| "grad_norm": 0.44818830490112305, |
| "learning_rate": 8.517857142857144e-06, |
| "loss": 1.5172, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.3334464728832245, |
| "learning_rate": 8.5e-06, |
| "loss": 1.4703, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.15178571428571427, |
| "grad_norm": 0.33317744731903076, |
| "learning_rate": 8.482142857142858e-06, |
| "loss": 1.4714, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.15357142857142858, |
| "grad_norm": 0.3387729227542877, |
| "learning_rate": 8.464285714285715e-06, |
| "loss": 1.4725, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.15535714285714286, |
| "grad_norm": 0.3361343443393707, |
| "learning_rate": 8.446428571428571e-06, |
| "loss": 1.4619, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.15714285714285714, |
| "grad_norm": 0.3541395664215088, |
| "learning_rate": 8.428571428571429e-06, |
| "loss": 1.5199, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.15892857142857142, |
| "grad_norm": 0.33765190839767456, |
| "learning_rate": 8.410714285714287e-06, |
| "loss": 1.4983, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.16071428571428573, |
| "grad_norm": 0.452921599149704, |
| "learning_rate": 8.392857142857144e-06, |
| "loss": 1.4619, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.1625, |
| "grad_norm": 0.3351086974143982, |
| "learning_rate": 8.375e-06, |
| "loss": 1.3916, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.16428571428571428, |
| "grad_norm": 0.3732268810272217, |
| "learning_rate": 8.357142857142858e-06, |
| "loss": 1.484, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.16607142857142856, |
| "grad_norm": 0.36562690138816833, |
| "learning_rate": 8.339285714285716e-06, |
| "loss": 1.4536, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.16785714285714284, |
| "grad_norm": 0.32931259274482727, |
| "learning_rate": 8.321428571428573e-06, |
| "loss": 1.4649, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.16964285714285715, |
| "grad_norm": 0.5226176381111145, |
| "learning_rate": 8.30357142857143e-06, |
| "loss": 1.4584, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.17142857142857143, |
| "grad_norm": 0.5753301978111267, |
| "learning_rate": 8.285714285714287e-06, |
| "loss": 1.5382, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.1732142857142857, |
| "grad_norm": 0.34495556354522705, |
| "learning_rate": 8.267857142857143e-06, |
| "loss": 1.4719, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.175, |
| "grad_norm": 0.35743340849876404, |
| "learning_rate": 8.25e-06, |
| "loss": 1.4244, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.1767857142857143, |
| "grad_norm": 0.3325170576572418, |
| "learning_rate": 8.232142857142857e-06, |
| "loss": 1.4362, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.17857142857142858, |
| "grad_norm": 0.37087783217430115, |
| "learning_rate": 8.214285714285714e-06, |
| "loss": 1.4888, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.18035714285714285, |
| "grad_norm": 0.34744587540626526, |
| "learning_rate": 8.196428571428572e-06, |
| "loss": 1.4836, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.18214285714285713, |
| "grad_norm": 0.3370732367038727, |
| "learning_rate": 8.17857142857143e-06, |
| "loss": 1.4962, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.18392857142857144, |
| "grad_norm": 0.36961331963539124, |
| "learning_rate": 8.160714285714286e-06, |
| "loss": 1.4483, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.18571428571428572, |
| "grad_norm": 0.3708970844745636, |
| "learning_rate": 8.142857142857143e-06, |
| "loss": 1.4767, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.1875, |
| "grad_norm": 0.34523463249206543, |
| "learning_rate": 8.125000000000001e-06, |
| "loss": 1.4583, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.18928571428571428, |
| "grad_norm": 0.34832295775413513, |
| "learning_rate": 8.107142857142859e-06, |
| "loss": 1.4046, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.19107142857142856, |
| "grad_norm": 0.3623919188976288, |
| "learning_rate": 8.089285714285715e-06, |
| "loss": 1.3721, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.19285714285714287, |
| "grad_norm": 0.39359399676322937, |
| "learning_rate": 8.071428571428572e-06, |
| "loss": 1.5102, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.19464285714285715, |
| "grad_norm": 0.4397876262664795, |
| "learning_rate": 8.05357142857143e-06, |
| "loss": 1.4742, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.19642857142857142, |
| "grad_norm": 0.3398594856262207, |
| "learning_rate": 8.035714285714286e-06, |
| "loss": 1.4093, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.1982142857142857, |
| "grad_norm": 0.44007447361946106, |
| "learning_rate": 8.017857142857144e-06, |
| "loss": 1.4157, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.384075790643692, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 1.4729, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.2017857142857143, |
| "grad_norm": 0.460844486951828, |
| "learning_rate": 7.982142857142859e-06, |
| "loss": 1.4098, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.20357142857142857, |
| "grad_norm": 0.3587513267993927, |
| "learning_rate": 7.964285714285715e-06, |
| "loss": 1.4758, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.20535714285714285, |
| "grad_norm": 0.4182850420475006, |
| "learning_rate": 7.946428571428571e-06, |
| "loss": 1.3661, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.20714285714285716, |
| "grad_norm": 0.41321325302124023, |
| "learning_rate": 7.928571428571429e-06, |
| "loss": 1.4011, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.20892857142857144, |
| "grad_norm": 0.3603422939777374, |
| "learning_rate": 7.910714285714286e-06, |
| "loss": 1.412, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.21071428571428572, |
| "grad_norm": 0.3570718467235565, |
| "learning_rate": 7.892857142857144e-06, |
| "loss": 1.4377, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.2125, |
| "grad_norm": 0.358900785446167, |
| "learning_rate": 7.875e-06, |
| "loss": 1.3817, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.21428571428571427, |
| "grad_norm": 0.3678073585033417, |
| "learning_rate": 7.857142857142858e-06, |
| "loss": 1.4669, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.21607142857142858, |
| "grad_norm": 0.35429590940475464, |
| "learning_rate": 7.839285714285715e-06, |
| "loss": 1.4117, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.21785714285714286, |
| "grad_norm": 0.3580191433429718, |
| "learning_rate": 7.821428571428571e-06, |
| "loss": 1.4172, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.21964285714285714, |
| "grad_norm": 0.35501304268836975, |
| "learning_rate": 7.803571428571429e-06, |
| "loss": 1.4412, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.22142857142857142, |
| "grad_norm": 0.35224324464797974, |
| "learning_rate": 7.785714285714287e-06, |
| "loss": 1.4106, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.22321428571428573, |
| "grad_norm": 0.3726520836353302, |
| "learning_rate": 7.767857142857144e-06, |
| "loss": 1.3818, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.225, |
| "grad_norm": 0.3572198748588562, |
| "learning_rate": 7.75e-06, |
| "loss": 1.3844, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.22678571428571428, |
| "grad_norm": 0.3563896715641022, |
| "learning_rate": 7.732142857142858e-06, |
| "loss": 1.4052, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.22857142857142856, |
| "grad_norm": 0.37256497144699097, |
| "learning_rate": 7.714285714285716e-06, |
| "loss": 1.4813, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.23035714285714284, |
| "grad_norm": 0.3678889870643616, |
| "learning_rate": 7.696428571428572e-06, |
| "loss": 1.424, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.23214285714285715, |
| "grad_norm": 0.37930241227149963, |
| "learning_rate": 7.67857142857143e-06, |
| "loss": 1.4352, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.23392857142857143, |
| "grad_norm": 0.34848180413246155, |
| "learning_rate": 7.660714285714287e-06, |
| "loss": 1.3944, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.2357142857142857, |
| "grad_norm": 0.4487292468547821, |
| "learning_rate": 7.642857142857143e-06, |
| "loss": 1.5334, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.2375, |
| "grad_norm": 0.4080350399017334, |
| "learning_rate": 7.625e-06, |
| "loss": 1.3737, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.2392857142857143, |
| "grad_norm": 0.3762721121311188, |
| "learning_rate": 7.6071428571428575e-06, |
| "loss": 1.4288, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.24107142857142858, |
| "grad_norm": 0.38287535309791565, |
| "learning_rate": 7.589285714285714e-06, |
| "loss": 1.4398, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.24285714285714285, |
| "grad_norm": 0.37439846992492676, |
| "learning_rate": 7.571428571428572e-06, |
| "loss": 1.4314, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.24464285714285713, |
| "grad_norm": 0.3716781735420227, |
| "learning_rate": 7.553571428571429e-06, |
| "loss": 1.4043, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.24642857142857144, |
| "grad_norm": 0.36782070994377136, |
| "learning_rate": 7.5357142857142865e-06, |
| "loss": 1.4057, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.24821428571428572, |
| "grad_norm": 0.36489415168762207, |
| "learning_rate": 7.517857142857143e-06, |
| "loss": 1.3609, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.3928413391113281, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 1.427, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.2517857142857143, |
| "grad_norm": 0.36631447076797485, |
| "learning_rate": 7.482142857142858e-06, |
| "loss": 1.3939, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.25357142857142856, |
| "grad_norm": 0.3827175498008728, |
| "learning_rate": 7.464285714285715e-06, |
| "loss": 1.4078, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.25535714285714284, |
| "grad_norm": 0.37270644307136536, |
| "learning_rate": 7.446428571428572e-06, |
| "loss": 1.4459, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.2571428571428571, |
| "grad_norm": 0.3645498752593994, |
| "learning_rate": 7.428571428571429e-06, |
| "loss": 1.4207, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.25892857142857145, |
| "grad_norm": 0.3855854868888855, |
| "learning_rate": 7.410714285714287e-06, |
| "loss": 1.4153, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.26071428571428573, |
| "grad_norm": 0.40187859535217285, |
| "learning_rate": 7.392857142857144e-06, |
| "loss": 1.3937, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.2625, |
| "grad_norm": 0.39412420988082886, |
| "learning_rate": 7.375000000000001e-06, |
| "loss": 1.3786, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.2642857142857143, |
| "grad_norm": 0.3723837733268738, |
| "learning_rate": 7.357142857142858e-06, |
| "loss": 1.3455, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.26607142857142857, |
| "grad_norm": 0.36095982789993286, |
| "learning_rate": 7.339285714285714e-06, |
| "loss": 1.3997, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.26785714285714285, |
| "grad_norm": 0.4379737079143524, |
| "learning_rate": 7.321428571428572e-06, |
| "loss": 1.4803, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.26964285714285713, |
| "grad_norm": 0.36974653601646423, |
| "learning_rate": 7.303571428571429e-06, |
| "loss": 1.4116, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.2714285714285714, |
| "grad_norm": 0.3808487057685852, |
| "learning_rate": 7.285714285714286e-06, |
| "loss": 1.3931, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.2732142857142857, |
| "grad_norm": 0.38126876950263977, |
| "learning_rate": 7.267857142857143e-06, |
| "loss": 1.3887, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.275, |
| "grad_norm": 0.39696696400642395, |
| "learning_rate": 7.25e-06, |
| "loss": 1.4065, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.2767857142857143, |
| "grad_norm": 0.3827652633190155, |
| "learning_rate": 7.232142857142858e-06, |
| "loss": 1.3768, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.2785714285714286, |
| "grad_norm": 0.37400951981544495, |
| "learning_rate": 7.2142857142857145e-06, |
| "loss": 1.3904, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.28035714285714286, |
| "grad_norm": 0.3787144720554352, |
| "learning_rate": 7.196428571428572e-06, |
| "loss": 1.4174, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.28214285714285714, |
| "grad_norm": 0.378603994846344, |
| "learning_rate": 7.178571428571429e-06, |
| "loss": 1.348, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.2839285714285714, |
| "grad_norm": 0.39289960265159607, |
| "learning_rate": 7.160714285714287e-06, |
| "loss": 1.3872, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 0.38201239705085754, |
| "learning_rate": 7.1428571428571436e-06, |
| "loss": 1.3866, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.2875, |
| "grad_norm": 0.37428876757621765, |
| "learning_rate": 7.125e-06, |
| "loss": 1.3853, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.2892857142857143, |
| "grad_norm": 0.3859560489654541, |
| "learning_rate": 7.107142857142858e-06, |
| "loss": 1.3971, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.2910714285714286, |
| "grad_norm": 0.4165991246700287, |
| "learning_rate": 7.089285714285715e-06, |
| "loss": 1.4174, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.29285714285714287, |
| "grad_norm": 0.43760836124420166, |
| "learning_rate": 7.0714285714285726e-06, |
| "loss": 1.3777, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.29464285714285715, |
| "grad_norm": 0.37146738171577454, |
| "learning_rate": 7.053571428571429e-06, |
| "loss": 1.3949, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.29642857142857143, |
| "grad_norm": 0.3897382915019989, |
| "learning_rate": 7.035714285714287e-06, |
| "loss": 1.3523, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.2982142857142857, |
| "grad_norm": 0.4206221401691437, |
| "learning_rate": 7.017857142857143e-06, |
| "loss": 1.364, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.3997578024864197, |
| "learning_rate": 7e-06, |
| "loss": 1.3687, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.30178571428571427, |
| "grad_norm": 0.3917233943939209, |
| "learning_rate": 6.9821428571428576e-06, |
| "loss": 1.3873, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.30357142857142855, |
| "grad_norm": 0.4036829471588135, |
| "learning_rate": 6.964285714285714e-06, |
| "loss": 1.3728, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.3053571428571429, |
| "grad_norm": 0.41000431776046753, |
| "learning_rate": 6.946428571428572e-06, |
| "loss": 1.298, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.30714285714285716, |
| "grad_norm": 0.40925708413124084, |
| "learning_rate": 6.928571428571429e-06, |
| "loss": 1.2966, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.30892857142857144, |
| "grad_norm": 0.43786993622779846, |
| "learning_rate": 6.910714285714286e-06, |
| "loss": 1.4269, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.3107142857142857, |
| "grad_norm": 0.41700488328933716, |
| "learning_rate": 6.892857142857143e-06, |
| "loss": 1.3893, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.3125, |
| "grad_norm": 0.5371460318565369, |
| "learning_rate": 6.875e-06, |
| "loss": 1.3981, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.3142857142857143, |
| "grad_norm": 0.39283287525177, |
| "learning_rate": 6.857142857142858e-06, |
| "loss": 1.3413, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.31607142857142856, |
| "grad_norm": 0.450718492269516, |
| "learning_rate": 6.839285714285715e-06, |
| "loss": 1.3852, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.31785714285714284, |
| "grad_norm": 0.3878072500228882, |
| "learning_rate": 6.8214285714285724e-06, |
| "loss": 1.347, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.3196428571428571, |
| "grad_norm": 0.3963475525379181, |
| "learning_rate": 6.803571428571429e-06, |
| "loss": 1.3509, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.32142857142857145, |
| "grad_norm": 0.3902972638607025, |
| "learning_rate": 6.785714285714287e-06, |
| "loss": 1.4188, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.32321428571428573, |
| "grad_norm": 0.4161182641983032, |
| "learning_rate": 6.767857142857144e-06, |
| "loss": 1.3397, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.325, |
| "grad_norm": 0.4012393355369568, |
| "learning_rate": 6.750000000000001e-06, |
| "loss": 1.4165, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.3267857142857143, |
| "grad_norm": 0.42954060435295105, |
| "learning_rate": 6.732142857142858e-06, |
| "loss": 1.3531, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.32857142857142857, |
| "grad_norm": 0.3946102261543274, |
| "learning_rate": 6.714285714285714e-06, |
| "loss": 1.425, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.33035714285714285, |
| "grad_norm": 0.4067486524581909, |
| "learning_rate": 6.696428571428571e-06, |
| "loss": 1.3773, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.33214285714285713, |
| "grad_norm": 0.4191884696483612, |
| "learning_rate": 6.678571428571429e-06, |
| "loss": 1.2963, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.3339285714285714, |
| "grad_norm": 0.3840485215187073, |
| "learning_rate": 6.660714285714286e-06, |
| "loss": 1.3377, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.3357142857142857, |
| "grad_norm": 0.4106978476047516, |
| "learning_rate": 6.642857142857143e-06, |
| "loss": 1.3604, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.3375, |
| "grad_norm": 0.4163394570350647, |
| "learning_rate": 6.625e-06, |
| "loss": 1.3599, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.3392857142857143, |
| "grad_norm": 0.3918200433254242, |
| "learning_rate": 6.607142857142858e-06, |
| "loss": 1.3012, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.3410714285714286, |
| "grad_norm": 0.4022958278656006, |
| "learning_rate": 6.589285714285715e-06, |
| "loss": 1.3567, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.34285714285714286, |
| "grad_norm": 0.40892308950424194, |
| "learning_rate": 6.571428571428572e-06, |
| "loss": 1.3227, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.34464285714285714, |
| "grad_norm": 0.4009507894515991, |
| "learning_rate": 6.553571428571429e-06, |
| "loss": 1.3213, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.3464285714285714, |
| "grad_norm": 0.4105026125907898, |
| "learning_rate": 6.535714285714286e-06, |
| "loss": 1.3223, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.3482142857142857, |
| "grad_norm": 0.4591858983039856, |
| "learning_rate": 6.517857142857144e-06, |
| "loss": 1.3754, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.4026089906692505, |
| "learning_rate": 6.5000000000000004e-06, |
| "loss": 1.3453, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.3517857142857143, |
| "grad_norm": 0.4099741280078888, |
| "learning_rate": 6.482142857142858e-06, |
| "loss": 1.3592, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.3535714285714286, |
| "grad_norm": 0.4273921549320221, |
| "learning_rate": 6.464285714285715e-06, |
| "loss": 1.3119, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.35535714285714287, |
| "grad_norm": 0.415178120136261, |
| "learning_rate": 6.446428571428573e-06, |
| "loss": 1.3219, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.35714285714285715, |
| "grad_norm": 0.4362969398498535, |
| "learning_rate": 6.4285714285714295e-06, |
| "loss": 1.3299, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.35892857142857143, |
| "grad_norm": 0.4466477036476135, |
| "learning_rate": 6.410714285714287e-06, |
| "loss": 1.3376, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.3607142857142857, |
| "grad_norm": 0.44762319326400757, |
| "learning_rate": 6.392857142857143e-06, |
| "loss": 1.3716, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.3625, |
| "grad_norm": 0.42173707485198975, |
| "learning_rate": 6.375e-06, |
| "loss": 1.3652, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.36428571428571427, |
| "grad_norm": 0.5742844343185425, |
| "learning_rate": 6.357142857142858e-06, |
| "loss": 1.3848, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.36607142857142855, |
| "grad_norm": 0.437112957239151, |
| "learning_rate": 6.3392857142857145e-06, |
| "loss": 1.3121, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.3678571428571429, |
| "grad_norm": 0.4026128351688385, |
| "learning_rate": 6.321428571428571e-06, |
| "loss": 1.3321, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.36964285714285716, |
| "grad_norm": 0.4601041376590729, |
| "learning_rate": 6.303571428571429e-06, |
| "loss": 1.3313, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.37142857142857144, |
| "grad_norm": 0.422014057636261, |
| "learning_rate": 6.285714285714286e-06, |
| "loss": 1.3062, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.3732142857142857, |
| "grad_norm": 0.4235393702983856, |
| "learning_rate": 6.2678571428571435e-06, |
| "loss": 1.3259, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.375, |
| "grad_norm": 0.3982096016407013, |
| "learning_rate": 6.25e-06, |
| "loss": 1.3089, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.3767857142857143, |
| "grad_norm": 0.44556036591529846, |
| "learning_rate": 6.232142857142858e-06, |
| "loss": 1.3831, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.37857142857142856, |
| "grad_norm": 0.4298821687698364, |
| "learning_rate": 6.214285714285715e-06, |
| "loss": 1.3776, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.38035714285714284, |
| "grad_norm": 0.43510201573371887, |
| "learning_rate": 6.1964285714285725e-06, |
| "loss": 1.389, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.3821428571428571, |
| "grad_norm": 0.455490380525589, |
| "learning_rate": 6.178571428571429e-06, |
| "loss": 1.4085, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.38392857142857145, |
| "grad_norm": 0.5122373700141907, |
| "learning_rate": 6.160714285714286e-06, |
| "loss": 1.3985, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.38571428571428573, |
| "grad_norm": 0.4266716241836548, |
| "learning_rate": 6.142857142857144e-06, |
| "loss": 1.3463, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.3875, |
| "grad_norm": 0.4357399344444275, |
| "learning_rate": 6.125000000000001e-06, |
| "loss": 1.3664, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.3892857142857143, |
| "grad_norm": 0.5229921340942383, |
| "learning_rate": 6.107142857142858e-06, |
| "loss": 1.2992, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.39107142857142857, |
| "grad_norm": 0.4309137761592865, |
| "learning_rate": 6.089285714285714e-06, |
| "loss": 1.3435, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.39285714285714285, |
| "grad_norm": 0.45066171884536743, |
| "learning_rate": 6.071428571428571e-06, |
| "loss": 1.3517, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.39464285714285713, |
| "grad_norm": 0.4304474890232086, |
| "learning_rate": 6.053571428571429e-06, |
| "loss": 1.2906, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.3964285714285714, |
| "grad_norm": 0.43573635816574097, |
| "learning_rate": 6.035714285714286e-06, |
| "loss": 1.2647, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.3982142857142857, |
| "grad_norm": 0.5297744274139404, |
| "learning_rate": 6.017857142857143e-06, |
| "loss": 1.3169, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.5126820802688599, |
| "learning_rate": 6e-06, |
| "loss": 1.2604, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.4017857142857143, |
| "grad_norm": 0.44824084639549255, |
| "learning_rate": 5.982142857142858e-06, |
| "loss": 1.3573, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.4035714285714286, |
| "grad_norm": 0.6343708634376526, |
| "learning_rate": 5.964285714285715e-06, |
| "loss": 1.447, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.40535714285714286, |
| "grad_norm": 0.4232017993927002, |
| "learning_rate": 5.9464285714285715e-06, |
| "loss": 1.3303, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.40714285714285714, |
| "grad_norm": 0.44813570380210876, |
| "learning_rate": 5.928571428571429e-06, |
| "loss": 1.3647, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.4089285714285714, |
| "grad_norm": 0.46437007188796997, |
| "learning_rate": 5.910714285714286e-06, |
| "loss": 1.3534, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.4107142857142857, |
| "grad_norm": 0.4851243793964386, |
| "learning_rate": 5.892857142857144e-06, |
| "loss": 1.376, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.4125, |
| "grad_norm": 0.43191760778427124, |
| "learning_rate": 5.8750000000000005e-06, |
| "loss": 1.3695, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.4142857142857143, |
| "grad_norm": 0.4866816997528076, |
| "learning_rate": 5.857142857142858e-06, |
| "loss": 1.3624, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.4160714285714286, |
| "grad_norm": 0.4390086531639099, |
| "learning_rate": 5.839285714285715e-06, |
| "loss": 1.3607, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.41785714285714287, |
| "grad_norm": 0.6692396998405457, |
| "learning_rate": 5.821428571428573e-06, |
| "loss": 1.3467, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.41964285714285715, |
| "grad_norm": 0.4408722221851349, |
| "learning_rate": 5.8035714285714295e-06, |
| "loss": 1.3205, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.42142857142857143, |
| "grad_norm": 0.5886948704719543, |
| "learning_rate": 5.785714285714286e-06, |
| "loss": 1.3403, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.4232142857142857, |
| "grad_norm": 0.6418564319610596, |
| "learning_rate": 5.767857142857143e-06, |
| "loss": 1.4085, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.425, |
| "grad_norm": 0.46830517053604126, |
| "learning_rate": 5.75e-06, |
| "loss": 1.3981, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.42678571428571427, |
| "grad_norm": 0.4939388036727905, |
| "learning_rate": 5.732142857142857e-06, |
| "loss": 1.3607, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 0.4522291123867035, |
| "learning_rate": 5.7142857142857145e-06, |
| "loss": 1.331, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.4303571428571429, |
| "grad_norm": 0.45634329319000244, |
| "learning_rate": 5.696428571428571e-06, |
| "loss": 1.3513, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.43214285714285716, |
| "grad_norm": 0.4688902795314789, |
| "learning_rate": 5.678571428571429e-06, |
| "loss": 1.314, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.43392857142857144, |
| "grad_norm": 0.6173244714736938, |
| "learning_rate": 5.660714285714286e-06, |
| "loss": 1.3453, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.4357142857142857, |
| "grad_norm": 0.4678085446357727, |
| "learning_rate": 5.6428571428571435e-06, |
| "loss": 1.3813, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.4375, |
| "grad_norm": 0.4518805742263794, |
| "learning_rate": 5.625e-06, |
| "loss": 1.3408, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.4392857142857143, |
| "grad_norm": 0.44260984659194946, |
| "learning_rate": 5.607142857142858e-06, |
| "loss": 1.3764, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.44107142857142856, |
| "grad_norm": 0.4549272060394287, |
| "learning_rate": 5.589285714285715e-06, |
| "loss": 1.3144, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.44285714285714284, |
| "grad_norm": 0.4923824071884155, |
| "learning_rate": 5.571428571428572e-06, |
| "loss": 1.3239, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.4446428571428571, |
| "grad_norm": 0.45576098561286926, |
| "learning_rate": 5.553571428571429e-06, |
| "loss": 1.3326, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.44642857142857145, |
| "grad_norm": 0.463734894990921, |
| "learning_rate": 5.535714285714286e-06, |
| "loss": 1.4091, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.44821428571428573, |
| "grad_norm": 0.6547235250473022, |
| "learning_rate": 5.517857142857144e-06, |
| "loss": 1.3331, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.47086387872695923, |
| "learning_rate": 5.500000000000001e-06, |
| "loss": 1.2845, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.4517857142857143, |
| "grad_norm": 0.490377813577652, |
| "learning_rate": 5.482142857142858e-06, |
| "loss": 1.3323, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.45357142857142857, |
| "grad_norm": 0.44158506393432617, |
| "learning_rate": 5.464285714285714e-06, |
| "loss": 1.3184, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.45535714285714285, |
| "grad_norm": 0.4678877592086792, |
| "learning_rate": 5.446428571428571e-06, |
| "loss": 1.3521, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.45714285714285713, |
| "grad_norm": 0.437651664018631, |
| "learning_rate": 5.428571428571429e-06, |
| "loss": 1.2956, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.4589285714285714, |
| "grad_norm": 0.46878582239151, |
| "learning_rate": 5.410714285714286e-06, |
| "loss": 1.2902, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.4607142857142857, |
| "grad_norm": 0.46873459219932556, |
| "learning_rate": 5.392857142857143e-06, |
| "loss": 1.344, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.4625, |
| "grad_norm": 0.4619480073451996, |
| "learning_rate": 5.375e-06, |
| "loss": 1.3006, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.4642857142857143, |
| "grad_norm": 0.4431358277797699, |
| "learning_rate": 5.357142857142857e-06, |
| "loss": 1.3011, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.4660714285714286, |
| "grad_norm": 0.4527128338813782, |
| "learning_rate": 5.339285714285715e-06, |
| "loss": 1.3425, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.46785714285714286, |
| "grad_norm": 0.44812703132629395, |
| "learning_rate": 5.3214285714285715e-06, |
| "loss": 1.3324, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.46964285714285714, |
| "grad_norm": 0.4387049973011017, |
| "learning_rate": 5.303571428571429e-06, |
| "loss": 1.2346, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.4714285714285714, |
| "grad_norm": 0.49831196665763855, |
| "learning_rate": 5.285714285714286e-06, |
| "loss": 1.3146, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.4732142857142857, |
| "grad_norm": 0.6227532029151917, |
| "learning_rate": 5.267857142857144e-06, |
| "loss": 1.3182, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.475, |
| "grad_norm": 0.7037027478218079, |
| "learning_rate": 5.2500000000000006e-06, |
| "loss": 1.4061, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.4767857142857143, |
| "grad_norm": 0.4282563328742981, |
| "learning_rate": 5.232142857142858e-06, |
| "loss": 1.3054, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.4785714285714286, |
| "grad_norm": 0.46296414732933044, |
| "learning_rate": 5.214285714285715e-06, |
| "loss": 1.3316, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.48035714285714287, |
| "grad_norm": 0.44861307740211487, |
| "learning_rate": 5.196428571428572e-06, |
| "loss": 1.3611, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.48214285714285715, |
| "grad_norm": 0.6239879727363586, |
| "learning_rate": 5.1785714285714296e-06, |
| "loss": 1.3288, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.48392857142857143, |
| "grad_norm": 0.4510335624217987, |
| "learning_rate": 5.160714285714286e-06, |
| "loss": 1.2894, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.4857142857142857, |
| "grad_norm": 0.44531846046447754, |
| "learning_rate": 5.142857142857142e-06, |
| "loss": 1.3373, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.4875, |
| "grad_norm": 0.527055025100708, |
| "learning_rate": 5.125e-06, |
| "loss": 1.2901, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.48928571428571427, |
| "grad_norm": 0.4528893530368805, |
| "learning_rate": 5.107142857142857e-06, |
| "loss": 1.2937, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.49107142857142855, |
| "grad_norm": 0.4583114981651306, |
| "learning_rate": 5.0892857142857146e-06, |
| "loss": 1.303, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.4928571428571429, |
| "grad_norm": 0.43386757373809814, |
| "learning_rate": 5.071428571428571e-06, |
| "loss": 1.2684, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.49464285714285716, |
| "grad_norm": 0.4504014253616333, |
| "learning_rate": 5.053571428571429e-06, |
| "loss": 1.3475, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.49642857142857144, |
| "grad_norm": 0.5378859639167786, |
| "learning_rate": 5.035714285714286e-06, |
| "loss": 1.325, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.4982142857142857, |
| "grad_norm": 0.45138758420944214, |
| "learning_rate": 5.017857142857144e-06, |
| "loss": 1.3187, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.446903258562088, |
| "learning_rate": 5e-06, |
| "loss": 1.2773, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.5017857142857143, |
| "grad_norm": 0.5092656016349792, |
| "learning_rate": 4.982142857142857e-06, |
| "loss": 1.3701, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.5035714285714286, |
| "grad_norm": 0.4944141209125519, |
| "learning_rate": 4.964285714285715e-06, |
| "loss": 1.3226, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.5053571428571428, |
| "grad_norm": 0.4606040418148041, |
| "learning_rate": 4.946428571428572e-06, |
| "loss": 1.2847, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.5071428571428571, |
| "grad_norm": 0.4338245689868927, |
| "learning_rate": 4.928571428571429e-06, |
| "loss": 1.3507, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.5089285714285714, |
| "grad_norm": 0.469093918800354, |
| "learning_rate": 4.910714285714286e-06, |
| "loss": 1.2832, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.5107142857142857, |
| "grad_norm": 0.47597774863243103, |
| "learning_rate": 4.892857142857143e-06, |
| "loss": 1.3471, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.5125, |
| "grad_norm": 0.4709608554840088, |
| "learning_rate": 4.875e-06, |
| "loss": 1.2719, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.5142857142857142, |
| "grad_norm": 0.47710120677948, |
| "learning_rate": 4.857142857142858e-06, |
| "loss": 1.328, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.5160714285714286, |
| "grad_norm": 0.4538082480430603, |
| "learning_rate": 4.839285714285714e-06, |
| "loss": 1.2903, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.5178571428571429, |
| "grad_norm": 0.45876508951187134, |
| "learning_rate": 4.821428571428572e-06, |
| "loss": 1.3186, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.5196428571428572, |
| "grad_norm": 0.5364006757736206, |
| "learning_rate": 4.803571428571429e-06, |
| "loss": 1.2773, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.5214285714285715, |
| "grad_norm": 0.5031774640083313, |
| "learning_rate": 4.785714285714287e-06, |
| "loss": 1.3626, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.5232142857142857, |
| "grad_norm": 0.46341967582702637, |
| "learning_rate": 4.7678571428571434e-06, |
| "loss": 1.2932, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.525, |
| "grad_norm": 0.47424939274787903, |
| "learning_rate": 4.75e-06, |
| "loss": 1.3062, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.5267857142857143, |
| "grad_norm": 0.4689320921897888, |
| "learning_rate": 4.732142857142857e-06, |
| "loss": 1.3843, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.5285714285714286, |
| "grad_norm": 0.490421861410141, |
| "learning_rate": 4.714285714285715e-06, |
| "loss": 1.3136, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.5303571428571429, |
| "grad_norm": 0.44690409302711487, |
| "learning_rate": 4.696428571428572e-06, |
| "loss": 1.288, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.5321428571428571, |
| "grad_norm": 0.45565712451934814, |
| "learning_rate": 4.678571428571429e-06, |
| "loss": 1.3222, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.5339285714285714, |
| "grad_norm": 0.4677983820438385, |
| "learning_rate": 4.660714285714286e-06, |
| "loss": 1.3776, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.5357142857142857, |
| "grad_norm": 0.4681625962257385, |
| "learning_rate": 4.642857142857144e-06, |
| "loss": 1.3122, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5375, |
| "grad_norm": 0.46499693393707275, |
| "learning_rate": 4.625000000000001e-06, |
| "loss": 1.2907, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.5392857142857143, |
| "grad_norm": 0.4690093398094177, |
| "learning_rate": 4.6071428571428574e-06, |
| "loss": 1.2977, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.5410714285714285, |
| "grad_norm": 0.4936232566833496, |
| "learning_rate": 4.589285714285714e-06, |
| "loss": 1.348, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.5428571428571428, |
| "grad_norm": 0.5024741888046265, |
| "learning_rate": 4.571428571428572e-06, |
| "loss": 1.3295, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.5446428571428571, |
| "grad_norm": 0.48183590173721313, |
| "learning_rate": 4.553571428571429e-06, |
| "loss": 1.2871, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.5464285714285714, |
| "grad_norm": 0.5088504552841187, |
| "learning_rate": 4.5357142857142865e-06, |
| "loss": 1.3618, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.5482142857142858, |
| "grad_norm": 0.46338167786598206, |
| "learning_rate": 4.517857142857143e-06, |
| "loss": 1.2916, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.5637802481651306, |
| "learning_rate": 4.5e-06, |
| "loss": 1.2854, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.5517857142857143, |
| "grad_norm": 0.4742050766944885, |
| "learning_rate": 4.482142857142858e-06, |
| "loss": 1.2854, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.5535714285714286, |
| "grad_norm": 0.4686720669269562, |
| "learning_rate": 4.464285714285715e-06, |
| "loss": 1.3777, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.5553571428571429, |
| "grad_norm": 0.466203510761261, |
| "learning_rate": 4.4464285714285715e-06, |
| "loss": 1.3441, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.5571428571428572, |
| "grad_norm": 0.4458286464214325, |
| "learning_rate": 4.428571428571429e-06, |
| "loss": 1.2773, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.5589285714285714, |
| "grad_norm": 0.4533957839012146, |
| "learning_rate": 4.410714285714286e-06, |
| "loss": 1.3226, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.5607142857142857, |
| "grad_norm": 0.44287845492362976, |
| "learning_rate": 4.392857142857143e-06, |
| "loss": 1.2981, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.5625, |
| "grad_norm": 0.4448222219944, |
| "learning_rate": 4.3750000000000005e-06, |
| "loss": 1.3291, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.5642857142857143, |
| "grad_norm": 0.473034530878067, |
| "learning_rate": 4.357142857142857e-06, |
| "loss": 1.3117, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.5660714285714286, |
| "grad_norm": 0.46073392033576965, |
| "learning_rate": 4.339285714285715e-06, |
| "loss": 1.3144, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.5678571428571428, |
| "grad_norm": 0.47019240260124207, |
| "learning_rate": 4.321428571428572e-06, |
| "loss": 1.3596, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.5696428571428571, |
| "grad_norm": 0.6573873162269592, |
| "learning_rate": 4.3035714285714295e-06, |
| "loss": 1.3042, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 0.47485339641571045, |
| "learning_rate": 4.2857142857142855e-06, |
| "loss": 1.3376, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.5732142857142857, |
| "grad_norm": 0.4572908878326416, |
| "learning_rate": 4.267857142857143e-06, |
| "loss": 1.3182, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.575, |
| "grad_norm": 0.5253431797027588, |
| "learning_rate": 4.25e-06, |
| "loss": 1.2737, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.5767857142857142, |
| "grad_norm": 0.5506420135498047, |
| "learning_rate": 4.232142857142858e-06, |
| "loss": 1.3182, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.5785714285714286, |
| "grad_norm": 0.46640023589134216, |
| "learning_rate": 4.2142857142857145e-06, |
| "loss": 1.2722, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.5803571428571429, |
| "grad_norm": 0.5084778666496277, |
| "learning_rate": 4.196428571428572e-06, |
| "loss": 1.2764, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.5821428571428572, |
| "grad_norm": 0.47945091128349304, |
| "learning_rate": 4.178571428571429e-06, |
| "loss": 1.2511, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.5839285714285715, |
| "grad_norm": 0.4847634732723236, |
| "learning_rate": 4.160714285714287e-06, |
| "loss": 1.3431, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.5857142857142857, |
| "grad_norm": 0.46884429454803467, |
| "learning_rate": 4.1428571428571435e-06, |
| "loss": 1.2875, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.5875, |
| "grad_norm": 0.47149261832237244, |
| "learning_rate": 4.125e-06, |
| "loss": 1.318, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.5892857142857143, |
| "grad_norm": 0.47463729977607727, |
| "learning_rate": 4.107142857142857e-06, |
| "loss": 1.3138, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.5910714285714286, |
| "grad_norm": 0.5062056183815002, |
| "learning_rate": 4.089285714285715e-06, |
| "loss": 1.2764, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.5928571428571429, |
| "grad_norm": 0.45992037653923035, |
| "learning_rate": 4.071428571428572e-06, |
| "loss": 1.2663, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.5946428571428571, |
| "grad_norm": 0.47592759132385254, |
| "learning_rate": 4.053571428571429e-06, |
| "loss": 1.3085, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.5964285714285714, |
| "grad_norm": 0.545661211013794, |
| "learning_rate": 4.035714285714286e-06, |
| "loss": 1.2963, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.5982142857142857, |
| "grad_norm": 0.48694783449172974, |
| "learning_rate": 4.017857142857143e-06, |
| "loss": 1.3279, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.4582449197769165, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 1.2888, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.6017857142857143, |
| "grad_norm": 0.5217434167861938, |
| "learning_rate": 3.9821428571428575e-06, |
| "loss": 1.2232, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.6035714285714285, |
| "grad_norm": 0.48110780119895935, |
| "learning_rate": 3.964285714285714e-06, |
| "loss": 1.2587, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.6053571428571428, |
| "grad_norm": 0.5427893400192261, |
| "learning_rate": 3.946428571428572e-06, |
| "loss": 1.3105, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.6071428571428571, |
| "grad_norm": 0.48423877358436584, |
| "learning_rate": 3.928571428571429e-06, |
| "loss": 1.2831, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.6089285714285714, |
| "grad_norm": 0.47350701689720154, |
| "learning_rate": 3.910714285714286e-06, |
| "loss": 1.2791, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.6107142857142858, |
| "grad_norm": 0.6432921886444092, |
| "learning_rate": 3.892857142857143e-06, |
| "loss": 1.3398, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.6125, |
| "grad_norm": 0.47377124428749084, |
| "learning_rate": 3.875e-06, |
| "loss": 1.2665, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.6142857142857143, |
| "grad_norm": 0.4444401264190674, |
| "learning_rate": 3.857142857142858e-06, |
| "loss": 1.2557, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.6160714285714286, |
| "grad_norm": 0.4990551769733429, |
| "learning_rate": 3.839285714285715e-06, |
| "loss": 1.3146, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.6178571428571429, |
| "grad_norm": 0.4426332414150238, |
| "learning_rate": 3.8214285714285715e-06, |
| "loss": 1.2646, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.6196428571428572, |
| "grad_norm": 0.46762406826019287, |
| "learning_rate": 3.8035714285714288e-06, |
| "loss": 1.2472, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.6214285714285714, |
| "grad_norm": 0.4878827929496765, |
| "learning_rate": 3.785714285714286e-06, |
| "loss": 1.3176, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.6232142857142857, |
| "grad_norm": 0.4643489718437195, |
| "learning_rate": 3.7678571428571433e-06, |
| "loss": 1.2463, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 0.5744695067405701, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 1.3041, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.6267857142857143, |
| "grad_norm": 0.5968716144561768, |
| "learning_rate": 3.7321428571428573e-06, |
| "loss": 1.4068, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.6285714285714286, |
| "grad_norm": 0.454670786857605, |
| "learning_rate": 3.7142857142857146e-06, |
| "loss": 1.2938, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.6303571428571428, |
| "grad_norm": 0.4747679829597473, |
| "learning_rate": 3.696428571428572e-06, |
| "loss": 1.335, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.6321428571428571, |
| "grad_norm": 0.47802817821502686, |
| "learning_rate": 3.678571428571429e-06, |
| "loss": 1.2879, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.6339285714285714, |
| "grad_norm": 0.4985521733760834, |
| "learning_rate": 3.660714285714286e-06, |
| "loss": 1.3693, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.6357142857142857, |
| "grad_norm": 0.4986526072025299, |
| "learning_rate": 3.642857142857143e-06, |
| "loss": 1.3127, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.6375, |
| "grad_norm": 0.5105973482131958, |
| "learning_rate": 3.625e-06, |
| "loss": 1.286, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.6392857142857142, |
| "grad_norm": 0.4753775894641876, |
| "learning_rate": 3.6071428571428573e-06, |
| "loss": 1.2683, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.6410714285714286, |
| "grad_norm": 0.4934110641479492, |
| "learning_rate": 3.5892857142857145e-06, |
| "loss": 1.2676, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.6428571428571429, |
| "grad_norm": 0.5534637570381165, |
| "learning_rate": 3.5714285714285718e-06, |
| "loss": 1.3717, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.6446428571428572, |
| "grad_norm": 0.49249303340911865, |
| "learning_rate": 3.553571428571429e-06, |
| "loss": 1.2703, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.6464285714285715, |
| "grad_norm": 0.4779037833213806, |
| "learning_rate": 3.5357142857142863e-06, |
| "loss": 1.3419, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.6482142857142857, |
| "grad_norm": 0.48425963521003723, |
| "learning_rate": 3.5178571428571435e-06, |
| "loss": 1.3201, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.497473806142807, |
| "learning_rate": 3.5e-06, |
| "loss": 1.25, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.6517857142857143, |
| "grad_norm": 0.49431318044662476, |
| "learning_rate": 3.482142857142857e-06, |
| "loss": 1.3033, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.6535714285714286, |
| "grad_norm": 0.657665491104126, |
| "learning_rate": 3.4642857142857145e-06, |
| "loss": 1.3362, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.6553571428571429, |
| "grad_norm": 0.5138763189315796, |
| "learning_rate": 3.4464285714285717e-06, |
| "loss": 1.33, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.6571428571428571, |
| "grad_norm": 0.5025115609169006, |
| "learning_rate": 3.428571428571429e-06, |
| "loss": 1.2995, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.6589285714285714, |
| "grad_norm": 0.48064032196998596, |
| "learning_rate": 3.4107142857142862e-06, |
| "loss": 1.3438, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.6607142857142857, |
| "grad_norm": 0.5210350751876831, |
| "learning_rate": 3.3928571428571435e-06, |
| "loss": 1.3845, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.6625, |
| "grad_norm": 0.4877013862133026, |
| "learning_rate": 3.3750000000000003e-06, |
| "loss": 1.3046, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.6642857142857143, |
| "grad_norm": 0.4923580288887024, |
| "learning_rate": 3.357142857142857e-06, |
| "loss": 1.2809, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.6660714285714285, |
| "grad_norm": 0.48784345388412476, |
| "learning_rate": 3.3392857142857144e-06, |
| "loss": 1.2812, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.6678571428571428, |
| "grad_norm": 0.5311793684959412, |
| "learning_rate": 3.3214285714285716e-06, |
| "loss": 1.4334, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.6696428571428571, |
| "grad_norm": 0.4956177771091461, |
| "learning_rate": 3.303571428571429e-06, |
| "loss": 1.2847, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.6714285714285714, |
| "grad_norm": 0.4931349456310272, |
| "learning_rate": 3.285714285714286e-06, |
| "loss": 1.3192, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.6732142857142858, |
| "grad_norm": 0.5116465091705322, |
| "learning_rate": 3.267857142857143e-06, |
| "loss": 1.3024, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.675, |
| "grad_norm": 0.499210387468338, |
| "learning_rate": 3.2500000000000002e-06, |
| "loss": 1.2661, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.6767857142857143, |
| "grad_norm": 0.5030224919319153, |
| "learning_rate": 3.2321428571428575e-06, |
| "loss": 1.2374, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.6785714285714286, |
| "grad_norm": 0.503461480140686, |
| "learning_rate": 3.2142857142857147e-06, |
| "loss": 1.2834, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.6803571428571429, |
| "grad_norm": 0.46770402789115906, |
| "learning_rate": 3.1964285714285716e-06, |
| "loss": 1.2843, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.6821428571428572, |
| "grad_norm": 0.48501157760620117, |
| "learning_rate": 3.178571428571429e-06, |
| "loss": 1.2583, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.6839285714285714, |
| "grad_norm": 0.4787866473197937, |
| "learning_rate": 3.1607142857142856e-06, |
| "loss": 1.3136, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.6857142857142857, |
| "grad_norm": 0.5361959338188171, |
| "learning_rate": 3.142857142857143e-06, |
| "loss": 1.3137, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.6875, |
| "grad_norm": 0.48236384987831116, |
| "learning_rate": 3.125e-06, |
| "loss": 1.2333, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.6892857142857143, |
| "grad_norm": 0.4979291558265686, |
| "learning_rate": 3.1071428571428574e-06, |
| "loss": 1.2305, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.6910714285714286, |
| "grad_norm": 0.4781268537044525, |
| "learning_rate": 3.0892857142857147e-06, |
| "loss": 1.2512, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.6928571428571428, |
| "grad_norm": 0.539099931716919, |
| "learning_rate": 3.071428571428572e-06, |
| "loss": 1.2722, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.6946428571428571, |
| "grad_norm": 0.47137516736984253, |
| "learning_rate": 3.053571428571429e-06, |
| "loss": 1.267, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.6964285714285714, |
| "grad_norm": 0.4990849494934082, |
| "learning_rate": 3.0357142857142856e-06, |
| "loss": 1.2527, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.6982142857142857, |
| "grad_norm": 0.4742002487182617, |
| "learning_rate": 3.017857142857143e-06, |
| "loss": 1.252, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.4833225607872009, |
| "learning_rate": 3e-06, |
| "loss": 1.2527, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.7017857142857142, |
| "grad_norm": 0.5953601002693176, |
| "learning_rate": 2.9821428571428573e-06, |
| "loss": 1.2919, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.7035714285714286, |
| "grad_norm": 0.4824086129665375, |
| "learning_rate": 2.9642857142857146e-06, |
| "loss": 1.2951, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.7053571428571429, |
| "grad_norm": 0.524111270904541, |
| "learning_rate": 2.946428571428572e-06, |
| "loss": 1.2351, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.7071428571428572, |
| "grad_norm": 0.5176703333854675, |
| "learning_rate": 2.928571428571429e-06, |
| "loss": 1.346, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.7089285714285715, |
| "grad_norm": 0.5603062510490417, |
| "learning_rate": 2.9107142857142863e-06, |
| "loss": 1.368, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.7107142857142857, |
| "grad_norm": 0.510238766670227, |
| "learning_rate": 2.892857142857143e-06, |
| "loss": 1.2433, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.7125, |
| "grad_norm": 0.5014546513557434, |
| "learning_rate": 2.875e-06, |
| "loss": 1.3554, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 0.6319053173065186, |
| "learning_rate": 2.8571428571428573e-06, |
| "loss": 1.308, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.7160714285714286, |
| "grad_norm": 0.5223023891448975, |
| "learning_rate": 2.8392857142857145e-06, |
| "loss": 1.3143, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.7178571428571429, |
| "grad_norm": 0.48164916038513184, |
| "learning_rate": 2.8214285714285718e-06, |
| "loss": 1.296, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.7196428571428571, |
| "grad_norm": 0.5192731618881226, |
| "learning_rate": 2.803571428571429e-06, |
| "loss": 1.2594, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.7214285714285714, |
| "grad_norm": 0.5068328380584717, |
| "learning_rate": 2.785714285714286e-06, |
| "loss": 1.3369, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.7232142857142857, |
| "grad_norm": 0.6028531193733215, |
| "learning_rate": 2.767857142857143e-06, |
| "loss": 1.2731, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.725, |
| "grad_norm": 0.48241686820983887, |
| "learning_rate": 2.7500000000000004e-06, |
| "loss": 1.2601, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.7267857142857143, |
| "grad_norm": 0.4899542033672333, |
| "learning_rate": 2.732142857142857e-06, |
| "loss": 1.2853, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.7285714285714285, |
| "grad_norm": 0.49059104919433594, |
| "learning_rate": 2.7142857142857144e-06, |
| "loss": 1.2885, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.7303571428571428, |
| "grad_norm": 0.49142804741859436, |
| "learning_rate": 2.6964285714285717e-06, |
| "loss": 1.2797, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.7321428571428571, |
| "grad_norm": 0.4777282178401947, |
| "learning_rate": 2.6785714285714285e-06, |
| "loss": 1.297, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.7339285714285714, |
| "grad_norm": 0.5065960884094238, |
| "learning_rate": 2.6607142857142858e-06, |
| "loss": 1.284, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.7357142857142858, |
| "grad_norm": 0.5673279166221619, |
| "learning_rate": 2.642857142857143e-06, |
| "loss": 1.2991, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.7375, |
| "grad_norm": 0.5462106466293335, |
| "learning_rate": 2.6250000000000003e-06, |
| "loss": 1.3213, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.7392857142857143, |
| "grad_norm": 0.5614867210388184, |
| "learning_rate": 2.6071428571428575e-06, |
| "loss": 1.2939, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.7410714285714286, |
| "grad_norm": 0.478646844625473, |
| "learning_rate": 2.5892857142857148e-06, |
| "loss": 1.2683, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.7428571428571429, |
| "grad_norm": 0.5402962565422058, |
| "learning_rate": 2.571428571428571e-06, |
| "loss": 1.2643, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.7446428571428572, |
| "grad_norm": 0.4907895028591156, |
| "learning_rate": 2.5535714285714284e-06, |
| "loss": 1.2604, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.7464285714285714, |
| "grad_norm": 0.4979659914970398, |
| "learning_rate": 2.5357142857142857e-06, |
| "loss": 1.265, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.7482142857142857, |
| "grad_norm": 0.4915474057197571, |
| "learning_rate": 2.517857142857143e-06, |
| "loss": 1.2653, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.4773080050945282, |
| "learning_rate": 2.5e-06, |
| "loss": 1.2286, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.7517857142857143, |
| "grad_norm": 0.5044196844100952, |
| "learning_rate": 2.4821428571428575e-06, |
| "loss": 1.3119, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.7535714285714286, |
| "grad_norm": 0.49431711435317993, |
| "learning_rate": 2.4642857142857147e-06, |
| "loss": 1.2951, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.7553571428571428, |
| "grad_norm": 0.49043866991996765, |
| "learning_rate": 2.4464285714285715e-06, |
| "loss": 1.277, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.7571428571428571, |
| "grad_norm": 0.6170372366905212, |
| "learning_rate": 2.428571428571429e-06, |
| "loss": 1.3304, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.7589285714285714, |
| "grad_norm": 0.4647572338581085, |
| "learning_rate": 2.410714285714286e-06, |
| "loss": 1.24, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.7607142857142857, |
| "grad_norm": 0.49019157886505127, |
| "learning_rate": 2.3928571428571433e-06, |
| "loss": 1.2307, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.7625, |
| "grad_norm": 0.6097828149795532, |
| "learning_rate": 2.375e-06, |
| "loss": 1.2541, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.7642857142857142, |
| "grad_norm": 0.5185028314590454, |
| "learning_rate": 2.3571428571428574e-06, |
| "loss": 1.372, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.7660714285714286, |
| "grad_norm": 0.49547451734542847, |
| "learning_rate": 2.3392857142857146e-06, |
| "loss": 1.2792, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.7678571428571429, |
| "grad_norm": 0.5313171744346619, |
| "learning_rate": 2.321428571428572e-06, |
| "loss": 1.2909, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.7696428571428572, |
| "grad_norm": 0.7000820636749268, |
| "learning_rate": 2.3035714285714287e-06, |
| "loss": 1.2131, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.7714285714285715, |
| "grad_norm": 0.49376264214515686, |
| "learning_rate": 2.285714285714286e-06, |
| "loss": 1.2634, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.7732142857142857, |
| "grad_norm": 0.5121849179267883, |
| "learning_rate": 2.2678571428571432e-06, |
| "loss": 1.3224, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.775, |
| "grad_norm": 0.6177911162376404, |
| "learning_rate": 2.25e-06, |
| "loss": 1.3438, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.7767857142857143, |
| "grad_norm": 0.682819128036499, |
| "learning_rate": 2.2321428571428573e-06, |
| "loss": 1.3062, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.7785714285714286, |
| "grad_norm": 0.5072125792503357, |
| "learning_rate": 2.2142857142857146e-06, |
| "loss": 1.2587, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.7803571428571429, |
| "grad_norm": 0.4885023832321167, |
| "learning_rate": 2.1964285714285714e-06, |
| "loss": 1.2627, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.7821428571428571, |
| "grad_norm": 0.49310681223869324, |
| "learning_rate": 2.1785714285714286e-06, |
| "loss": 1.3108, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.7839285714285714, |
| "grad_norm": 0.6132137775421143, |
| "learning_rate": 2.160714285714286e-06, |
| "loss": 1.2016, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.7857142857142857, |
| "grad_norm": 0.5641778707504272, |
| "learning_rate": 2.1428571428571427e-06, |
| "loss": 1.3018, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.7875, |
| "grad_norm": 0.5050976872444153, |
| "learning_rate": 2.125e-06, |
| "loss": 1.3549, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.7892857142857143, |
| "grad_norm": 0.5003477334976196, |
| "learning_rate": 2.1071428571428572e-06, |
| "loss": 1.3315, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.7910714285714285, |
| "grad_norm": 0.4969649314880371, |
| "learning_rate": 2.0892857142857145e-06, |
| "loss": 1.3, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.7928571428571428, |
| "grad_norm": 0.5957094430923462, |
| "learning_rate": 2.0714285714285717e-06, |
| "loss": 1.3144, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.7946428571428571, |
| "grad_norm": 0.5155512690544128, |
| "learning_rate": 2.0535714285714286e-06, |
| "loss": 1.3148, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.7964285714285714, |
| "grad_norm": 0.4873362183570862, |
| "learning_rate": 2.035714285714286e-06, |
| "loss": 1.2745, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.7982142857142858, |
| "grad_norm": 0.4859369695186615, |
| "learning_rate": 2.017857142857143e-06, |
| "loss": 1.2988, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.4897172152996063, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 1.2945, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.8017857142857143, |
| "grad_norm": 0.5550750494003296, |
| "learning_rate": 1.982142857142857e-06, |
| "loss": 1.2986, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.8035714285714286, |
| "grad_norm": 0.48994505405426025, |
| "learning_rate": 1.9642857142857144e-06, |
| "loss": 1.3026, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.8053571428571429, |
| "grad_norm": 0.49379977583885193, |
| "learning_rate": 1.9464285714285717e-06, |
| "loss": 1.3036, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.8071428571428572, |
| "grad_norm": 0.5711806416511536, |
| "learning_rate": 1.928571428571429e-06, |
| "loss": 1.4071, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.8089285714285714, |
| "grad_norm": 0.5102342963218689, |
| "learning_rate": 1.9107142857142858e-06, |
| "loss": 1.2941, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.8107142857142857, |
| "grad_norm": 0.49400219321250916, |
| "learning_rate": 1.892857142857143e-06, |
| "loss": 1.2947, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.8125, |
| "grad_norm": 0.5019742250442505, |
| "learning_rate": 1.8750000000000003e-06, |
| "loss": 1.2813, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.8142857142857143, |
| "grad_norm": 0.4831081032752991, |
| "learning_rate": 1.8571428571428573e-06, |
| "loss": 1.2455, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.8160714285714286, |
| "grad_norm": 0.6246116161346436, |
| "learning_rate": 1.8392857142857146e-06, |
| "loss": 1.2438, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.8178571428571428, |
| "grad_norm": 0.48425114154815674, |
| "learning_rate": 1.8214285714285716e-06, |
| "loss": 1.2531, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.8196428571428571, |
| "grad_norm": 0.5049098134040833, |
| "learning_rate": 1.8035714285714286e-06, |
| "loss": 1.2603, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.8214285714285714, |
| "grad_norm": 0.5139657258987427, |
| "learning_rate": 1.7857142857142859e-06, |
| "loss": 1.2987, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.8232142857142857, |
| "grad_norm": 0.4795459508895874, |
| "learning_rate": 1.7678571428571431e-06, |
| "loss": 1.2956, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.825, |
| "grad_norm": 0.490296870470047, |
| "learning_rate": 1.75e-06, |
| "loss": 1.2379, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.8267857142857142, |
| "grad_norm": 0.4932839572429657, |
| "learning_rate": 1.7321428571428572e-06, |
| "loss": 1.3099, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.8285714285714286, |
| "grad_norm": 0.5037016272544861, |
| "learning_rate": 1.7142857142857145e-06, |
| "loss": 1.3468, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.8303571428571429, |
| "grad_norm": 0.49343612790107727, |
| "learning_rate": 1.6964285714285717e-06, |
| "loss": 1.3217, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.8321428571428572, |
| "grad_norm": 0.49327361583709717, |
| "learning_rate": 1.6785714285714286e-06, |
| "loss": 1.3203, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.8339285714285715, |
| "grad_norm": 0.48709049820899963, |
| "learning_rate": 1.6607142857142858e-06, |
| "loss": 1.2245, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.8357142857142857, |
| "grad_norm": 0.5107455253601074, |
| "learning_rate": 1.642857142857143e-06, |
| "loss": 1.2494, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.8375, |
| "grad_norm": 0.5042998194694519, |
| "learning_rate": 1.6250000000000001e-06, |
| "loss": 1.2904, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.8392857142857143, |
| "grad_norm": 0.4961761236190796, |
| "learning_rate": 1.6071428571428574e-06, |
| "loss": 1.2893, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.8410714285714286, |
| "grad_norm": 0.4918581545352936, |
| "learning_rate": 1.5892857142857144e-06, |
| "loss": 1.2581, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.8428571428571429, |
| "grad_norm": 0.4863058030605316, |
| "learning_rate": 1.5714285714285714e-06, |
| "loss": 1.3444, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.8446428571428571, |
| "grad_norm": 0.47693178057670593, |
| "learning_rate": 1.5535714285714287e-06, |
| "loss": 1.2956, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.8464285714285714, |
| "grad_norm": 0.5165431499481201, |
| "learning_rate": 1.535714285714286e-06, |
| "loss": 1.3898, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.8482142857142857, |
| "grad_norm": 0.8129104375839233, |
| "learning_rate": 1.5178571428571428e-06, |
| "loss": 1.2759, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.5097749829292297, |
| "learning_rate": 1.5e-06, |
| "loss": 1.2506, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.8517857142857143, |
| "grad_norm": 0.6216138601303101, |
| "learning_rate": 1.4821428571428573e-06, |
| "loss": 1.2784, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.8535714285714285, |
| "grad_norm": 0.5131860971450806, |
| "learning_rate": 1.4642857142857145e-06, |
| "loss": 1.3087, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.8553571428571428, |
| "grad_norm": 0.532474160194397, |
| "learning_rate": 1.4464285714285716e-06, |
| "loss": 1.3609, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 0.4782165586948395, |
| "learning_rate": 1.4285714285714286e-06, |
| "loss": 1.2722, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.8589285714285714, |
| "grad_norm": 0.4915912449359894, |
| "learning_rate": 1.4107142857142859e-06, |
| "loss": 1.3194, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.8607142857142858, |
| "grad_norm": 0.4973057806491852, |
| "learning_rate": 1.392857142857143e-06, |
| "loss": 1.2925, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.8625, |
| "grad_norm": 0.4953904151916504, |
| "learning_rate": 1.3750000000000002e-06, |
| "loss": 1.2731, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.8642857142857143, |
| "grad_norm": 0.48121222853660583, |
| "learning_rate": 1.3571428571428572e-06, |
| "loss": 1.2951, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.8660714285714286, |
| "grad_norm": 0.497459352016449, |
| "learning_rate": 1.3392857142857143e-06, |
| "loss": 1.2501, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.8678571428571429, |
| "grad_norm": 0.49168872833251953, |
| "learning_rate": 1.3214285714285715e-06, |
| "loss": 1.2746, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.8696428571428572, |
| "grad_norm": 0.6732675433158875, |
| "learning_rate": 1.3035714285714288e-06, |
| "loss": 1.2376, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.8714285714285714, |
| "grad_norm": 0.49309980869293213, |
| "learning_rate": 1.2857142857142856e-06, |
| "loss": 1.2153, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.8732142857142857, |
| "grad_norm": 0.5174947381019592, |
| "learning_rate": 1.2678571428571428e-06, |
| "loss": 1.2484, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.875, |
| "grad_norm": 0.4835157096385956, |
| "learning_rate": 1.25e-06, |
| "loss": 1.2991, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.8767857142857143, |
| "grad_norm": 0.4993467330932617, |
| "learning_rate": 1.2321428571428574e-06, |
| "loss": 1.3207, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.8785714285714286, |
| "grad_norm": 0.6864446401596069, |
| "learning_rate": 1.2142857142857144e-06, |
| "loss": 1.2658, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.8803571428571428, |
| "grad_norm": 0.6041408181190491, |
| "learning_rate": 1.1964285714285717e-06, |
| "loss": 1.2204, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.8821428571428571, |
| "grad_norm": 0.49740070104599, |
| "learning_rate": 1.1785714285714287e-06, |
| "loss": 1.2915, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.8839285714285714, |
| "grad_norm": 0.504573404788971, |
| "learning_rate": 1.160714285714286e-06, |
| "loss": 1.2821, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.8857142857142857, |
| "grad_norm": 0.5079774260520935, |
| "learning_rate": 1.142857142857143e-06, |
| "loss": 1.3058, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.8875, |
| "grad_norm": 0.512030303478241, |
| "learning_rate": 1.125e-06, |
| "loss": 1.2856, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.8892857142857142, |
| "grad_norm": 0.4908679723739624, |
| "learning_rate": 1.1071428571428573e-06, |
| "loss": 1.2765, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.8910714285714286, |
| "grad_norm": 0.4868737757205963, |
| "learning_rate": 1.0892857142857143e-06, |
| "loss": 1.2691, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.8928571428571429, |
| "grad_norm": 0.504216194152832, |
| "learning_rate": 1.0714285714285714e-06, |
| "loss": 1.2786, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.8946428571428572, |
| "grad_norm": 0.48668280243873596, |
| "learning_rate": 1.0535714285714286e-06, |
| "loss": 1.2701, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.8964285714285715, |
| "grad_norm": 0.5180802345275879, |
| "learning_rate": 1.0357142857142859e-06, |
| "loss": 1.3073, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.8982142857142857, |
| "grad_norm": 0.48071718215942383, |
| "learning_rate": 1.017857142857143e-06, |
| "loss": 1.2082, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.5288301706314087, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.326, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.9017857142857143, |
| "grad_norm": 0.5516347289085388, |
| "learning_rate": 9.821428571428572e-07, |
| "loss": 1.2103, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.9035714285714286, |
| "grad_norm": 0.4964546859264374, |
| "learning_rate": 9.642857142857145e-07, |
| "loss": 1.2661, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.9053571428571429, |
| "grad_norm": 0.5081775784492493, |
| "learning_rate": 9.464285714285715e-07, |
| "loss": 1.2868, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.9071428571428571, |
| "grad_norm": 0.5029599070549011, |
| "learning_rate": 9.285714285714287e-07, |
| "loss": 1.291, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.9089285714285714, |
| "grad_norm": 0.5202727913856506, |
| "learning_rate": 9.107142857142858e-07, |
| "loss": 1.2899, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.9107142857142857, |
| "grad_norm": 0.48697784543037415, |
| "learning_rate": 8.928571428571429e-07, |
| "loss": 1.2969, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.9125, |
| "grad_norm": 0.4840863049030304, |
| "learning_rate": 8.75e-07, |
| "loss": 1.2627, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.9142857142857143, |
| "grad_norm": 0.5034891963005066, |
| "learning_rate": 8.571428571428572e-07, |
| "loss": 1.3292, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.9160714285714285, |
| "grad_norm": 0.5149267315864563, |
| "learning_rate": 8.392857142857143e-07, |
| "loss": 1.2654, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.9178571428571428, |
| "grad_norm": 0.7686546444892883, |
| "learning_rate": 8.214285714285715e-07, |
| "loss": 1.27, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.9196428571428571, |
| "grad_norm": 0.5021243691444397, |
| "learning_rate": 8.035714285714287e-07, |
| "loss": 1.3903, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.9214285714285714, |
| "grad_norm": 0.5143928527832031, |
| "learning_rate": 7.857142857142857e-07, |
| "loss": 1.3113, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.9232142857142858, |
| "grad_norm": 0.48257723450660706, |
| "learning_rate": 7.67857142857143e-07, |
| "loss": 1.2867, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.925, |
| "grad_norm": 0.4944348633289337, |
| "learning_rate": 7.5e-07, |
| "loss": 1.2695, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.9267857142857143, |
| "grad_norm": 0.6723678708076477, |
| "learning_rate": 7.321428571428573e-07, |
| "loss": 1.2364, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.9285714285714286, |
| "grad_norm": 0.5064557194709778, |
| "learning_rate": 7.142857142857143e-07, |
| "loss": 1.2422, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.9303571428571429, |
| "grad_norm": 0.5481106042861938, |
| "learning_rate": 6.964285714285715e-07, |
| "loss": 1.2502, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.9321428571428572, |
| "grad_norm": 0.5038375854492188, |
| "learning_rate": 6.785714285714286e-07, |
| "loss": 1.3219, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.9339285714285714, |
| "grad_norm": 0.5069401264190674, |
| "learning_rate": 6.607142857142858e-07, |
| "loss": 1.2895, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.9357142857142857, |
| "grad_norm": 0.5010185241699219, |
| "learning_rate": 6.428571428571428e-07, |
| "loss": 1.2923, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.9375, |
| "grad_norm": 0.4841311573982239, |
| "learning_rate": 6.25e-07, |
| "loss": 1.2651, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.9392857142857143, |
| "grad_norm": 0.5106439590454102, |
| "learning_rate": 6.071428571428572e-07, |
| "loss": 1.2708, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.9410714285714286, |
| "grad_norm": 0.4902538061141968, |
| "learning_rate": 5.892857142857143e-07, |
| "loss": 1.2703, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.9428571428571428, |
| "grad_norm": 0.48699891567230225, |
| "learning_rate": 5.714285714285715e-07, |
| "loss": 1.2679, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.9446428571428571, |
| "grad_norm": 0.5491330623626709, |
| "learning_rate": 5.535714285714286e-07, |
| "loss": 1.2921, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.9464285714285714, |
| "grad_norm": 0.5123488903045654, |
| "learning_rate": 5.357142857142857e-07, |
| "loss": 1.2446, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.9482142857142857, |
| "grad_norm": 0.6574044823646545, |
| "learning_rate": 5.178571428571429e-07, |
| "loss": 1.3066, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.5071901082992554, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 1.2726, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.9517857142857142, |
| "grad_norm": 0.495935320854187, |
| "learning_rate": 4.821428571428572e-07, |
| "loss": 1.2758, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.9535714285714286, |
| "grad_norm": 0.5281900763511658, |
| "learning_rate": 4.642857142857143e-07, |
| "loss": 1.2688, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.9553571428571429, |
| "grad_norm": 0.49741920828819275, |
| "learning_rate": 4.4642857142857147e-07, |
| "loss": 1.2416, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.9571428571428572, |
| "grad_norm": 0.5077670812606812, |
| "learning_rate": 4.285714285714286e-07, |
| "loss": 1.3029, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.9589285714285715, |
| "grad_norm": 0.46955814957618713, |
| "learning_rate": 4.1071428571428577e-07, |
| "loss": 1.262, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.9607142857142857, |
| "grad_norm": 0.4829843044281006, |
| "learning_rate": 3.9285714285714286e-07, |
| "loss": 1.2283, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.9625, |
| "grad_norm": 0.47563934326171875, |
| "learning_rate": 3.75e-07, |
| "loss": 1.2962, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.9642857142857143, |
| "grad_norm": 0.6257872581481934, |
| "learning_rate": 3.5714285714285716e-07, |
| "loss": 1.1969, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.9660714285714286, |
| "grad_norm": 0.5398846864700317, |
| "learning_rate": 3.392857142857143e-07, |
| "loss": 1.2731, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.9678571428571429, |
| "grad_norm": 0.48545607924461365, |
| "learning_rate": 3.214285714285714e-07, |
| "loss": 1.2559, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.9696428571428571, |
| "grad_norm": 0.504273533821106, |
| "learning_rate": 3.035714285714286e-07, |
| "loss": 1.2457, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.9714285714285714, |
| "grad_norm": 0.4949125051498413, |
| "learning_rate": 2.8571428571428575e-07, |
| "loss": 1.296, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.9732142857142857, |
| "grad_norm": 0.5022711753845215, |
| "learning_rate": 2.6785714285714284e-07, |
| "loss": 1.2173, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.975, |
| "grad_norm": 0.4856776297092438, |
| "learning_rate": 2.5000000000000004e-07, |
| "loss": 1.3013, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.9767857142857143, |
| "grad_norm": 0.508557915687561, |
| "learning_rate": 2.3214285714285716e-07, |
| "loss": 1.2674, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.9785714285714285, |
| "grad_norm": 0.5153559446334839, |
| "learning_rate": 2.142857142857143e-07, |
| "loss": 1.2569, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.9803571428571428, |
| "grad_norm": 0.49251508712768555, |
| "learning_rate": 1.9642857142857143e-07, |
| "loss": 1.2879, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.9821428571428571, |
| "grad_norm": 0.49773454666137695, |
| "learning_rate": 1.7857142857142858e-07, |
| "loss": 1.2204, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.9839285714285714, |
| "grad_norm": 0.5022481083869934, |
| "learning_rate": 1.607142857142857e-07, |
| "loss": 1.2859, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.9857142857142858, |
| "grad_norm": 0.5000441670417786, |
| "learning_rate": 1.4285714285714287e-07, |
| "loss": 1.2217, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.9875, |
| "grad_norm": 0.48934027552604675, |
| "learning_rate": 1.2500000000000002e-07, |
| "loss": 1.2892, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.9892857142857143, |
| "grad_norm": 0.5030660033226013, |
| "learning_rate": 1.0714285714285716e-07, |
| "loss": 1.2903, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.9910714285714286, |
| "grad_norm": 0.4894131124019623, |
| "learning_rate": 8.928571428571429e-08, |
| "loss": 1.3078, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.9928571428571429, |
| "grad_norm": 0.6754637956619263, |
| "learning_rate": 7.142857142857144e-08, |
| "loss": 1.2988, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.9946428571428572, |
| "grad_norm": 0.5316541194915771, |
| "learning_rate": 5.357142857142858e-08, |
| "loss": 1.2646, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.9964285714285714, |
| "grad_norm": 0.6239261627197266, |
| "learning_rate": 3.571428571428572e-08, |
| "loss": 1.3093, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.9982142857142857, |
| "grad_norm": 0.490326851606369, |
| "learning_rate": 1.785714285714286e-08, |
| "loss": 1.3136, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.49080222845077515, |
| "learning_rate": 0.0, |
| "loss": 1.2867, |
| "step": 560 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 560, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 0, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.959895737659556e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|