| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 61.53846153846154, |
| "eval_steps": 5, |
| "global_step": 800, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.07692307692307693, |
| "grad_norm": 10603.1904296875, |
| "learning_rate": 0.0, |
| "loss": 22.6733, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.15384615384615385, |
| "grad_norm": 18773.23046875, |
| "learning_rate": 4e-08, |
| "loss": 20.714, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.23076923076923078, |
| "grad_norm": 4270.68408203125, |
| "learning_rate": 8e-08, |
| "loss": 22.6768, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.3076923076923077, |
| "grad_norm": 8179.71484375, |
| "learning_rate": 1.2000000000000002e-07, |
| "loss": 25.0131, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.38461538461538464, |
| "grad_norm": 16160.1201171875, |
| "learning_rate": 1.6e-07, |
| "loss": 22.2821, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.46153846153846156, |
| "grad_norm": 8064.51904296875, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 21.2213, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.5384615384615384, |
| "grad_norm": 12106.345703125, |
| "learning_rate": 2.4000000000000003e-07, |
| "loss": 24.638, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.6153846153846154, |
| "grad_norm": 8413.4814453125, |
| "learning_rate": 2.8e-07, |
| "loss": 20.6285, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.6923076923076923, |
| "grad_norm": 15460.4384765625, |
| "learning_rate": 3.2e-07, |
| "loss": 21.5866, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.7692307692307693, |
| "grad_norm": 4440.8818359375, |
| "learning_rate": 3.6e-07, |
| "loss": 22.0973, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.8461538461538461, |
| "grad_norm": 4846.01171875, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 26.5506, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.9230769230769231, |
| "grad_norm": 6784.392578125, |
| "learning_rate": 4.4e-07, |
| "loss": 26.4308, |
| "step": 12 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 10356.7490234375, |
| "learning_rate": 4.800000000000001e-07, |
| "loss": 21.2232, |
| "step": 13 |
| }, |
| { |
| "epoch": 1.0769230769230769, |
| "grad_norm": 12669.5966796875, |
| "learning_rate": 5.2e-07, |
| "loss": 19.8431, |
| "step": 14 |
| }, |
| { |
| "epoch": 1.1538461538461537, |
| "grad_norm": 7368.5390625, |
| "learning_rate": 5.6e-07, |
| "loss": 19.2139, |
| "step": 15 |
| }, |
| { |
| "epoch": 1.2307692307692308, |
| "grad_norm": 4959.1923828125, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 18.3705, |
| "step": 16 |
| }, |
| { |
| "epoch": 1.3076923076923077, |
| "grad_norm": 5191.330078125, |
| "learning_rate": 6.4e-07, |
| "loss": 22.0389, |
| "step": 17 |
| }, |
| { |
| "epoch": 1.3846153846153846, |
| "grad_norm": 10824.8740234375, |
| "learning_rate": 6.800000000000001e-07, |
| "loss": 20.2947, |
| "step": 18 |
| }, |
| { |
| "epoch": 1.4615384615384617, |
| "grad_norm": 5129.83056640625, |
| "learning_rate": 7.2e-07, |
| "loss": 21.5472, |
| "step": 19 |
| }, |
| { |
| "epoch": 1.5384615384615383, |
| "grad_norm": 7372.88818359375, |
| "learning_rate": 7.6e-07, |
| "loss": 19.5856, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.6153846153846154, |
| "grad_norm": 4771.4990234375, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 18.078, |
| "step": 21 |
| }, |
| { |
| "epoch": 1.6923076923076923, |
| "grad_norm": 8714.8642578125, |
| "learning_rate": 8.400000000000001e-07, |
| "loss": 20.8261, |
| "step": 22 |
| }, |
| { |
| "epoch": 1.7692307692307692, |
| "grad_norm": 10882.0322265625, |
| "learning_rate": 8.8e-07, |
| "loss": 17.8416, |
| "step": 23 |
| }, |
| { |
| "epoch": 1.8461538461538463, |
| "grad_norm": 3724.619873046875, |
| "learning_rate": 9.200000000000001e-07, |
| "loss": 18.9121, |
| "step": 24 |
| }, |
| { |
| "epoch": 1.9230769230769231, |
| "grad_norm": 8529.5771484375, |
| "learning_rate": 9.600000000000001e-07, |
| "loss": 24.5815, |
| "step": 25 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 8510.6318359375, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 20.9304, |
| "step": 26 |
| }, |
| { |
| "epoch": 2.076923076923077, |
| "grad_norm": 9672.4150390625, |
| "learning_rate": 1.04e-06, |
| "loss": 18.5716, |
| "step": 27 |
| }, |
| { |
| "epoch": 2.1538461538461537, |
| "grad_norm": 7587.6533203125, |
| "learning_rate": 1.08e-06, |
| "loss": 18.0011, |
| "step": 28 |
| }, |
| { |
| "epoch": 2.230769230769231, |
| "grad_norm": 33927.44140625, |
| "learning_rate": 1.12e-06, |
| "loss": 17.8299, |
| "step": 29 |
| }, |
| { |
| "epoch": 2.3076923076923075, |
| "grad_norm": 5066.283203125, |
| "learning_rate": 1.1600000000000001e-06, |
| "loss": 15.1792, |
| "step": 30 |
| }, |
| { |
| "epoch": 2.3846153846153846, |
| "grad_norm": 11348.0380859375, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 17.1275, |
| "step": 31 |
| }, |
| { |
| "epoch": 2.4615384615384617, |
| "grad_norm": 44569.71484375, |
| "learning_rate": 1.2400000000000002e-06, |
| "loss": 17.5373, |
| "step": 32 |
| }, |
| { |
| "epoch": 2.5384615384615383, |
| "grad_norm": 11042.56640625, |
| "learning_rate": 1.28e-06, |
| "loss": 14.4275, |
| "step": 33 |
| }, |
| { |
| "epoch": 2.6153846153846154, |
| "grad_norm": 14324.48046875, |
| "learning_rate": 1.32e-06, |
| "loss": 16.0846, |
| "step": 34 |
| }, |
| { |
| "epoch": 2.6923076923076925, |
| "grad_norm": 6262.25732421875, |
| "learning_rate": 1.3600000000000001e-06, |
| "loss": 12.7508, |
| "step": 35 |
| }, |
| { |
| "epoch": 2.769230769230769, |
| "grad_norm": 4430.26611328125, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 13.6969, |
| "step": 36 |
| }, |
| { |
| "epoch": 2.8461538461538463, |
| "grad_norm": 6138.24267578125, |
| "learning_rate": 1.44e-06, |
| "loss": 12.5667, |
| "step": 37 |
| }, |
| { |
| "epoch": 2.9230769230769234, |
| "grad_norm": 3587.69482421875, |
| "learning_rate": 1.48e-06, |
| "loss": 13.052, |
| "step": 38 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 11029.201171875, |
| "learning_rate": 1.52e-06, |
| "loss": 14.1599, |
| "step": 39 |
| }, |
| { |
| "epoch": 3.076923076923077, |
| "grad_norm": 4541.20166015625, |
| "learning_rate": 1.56e-06, |
| "loss": 14.2273, |
| "step": 40 |
| }, |
| { |
| "epoch": 3.1538461538461537, |
| "grad_norm": 6240.2138671875, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 11.7308, |
| "step": 41 |
| }, |
| { |
| "epoch": 3.230769230769231, |
| "grad_norm": 11963.646484375, |
| "learning_rate": 1.6400000000000002e-06, |
| "loss": 12.2135, |
| "step": 42 |
| }, |
| { |
| "epoch": 3.3076923076923075, |
| "grad_norm": 7477.02392578125, |
| "learning_rate": 1.6800000000000002e-06, |
| "loss": 7.8914, |
| "step": 43 |
| }, |
| { |
| "epoch": 3.3846153846153846, |
| "grad_norm": 4601.59130859375, |
| "learning_rate": 1.72e-06, |
| "loss": 10.5209, |
| "step": 44 |
| }, |
| { |
| "epoch": 3.4615384615384617, |
| "grad_norm": 12468.453125, |
| "learning_rate": 1.76e-06, |
| "loss": 9.8911, |
| "step": 45 |
| }, |
| { |
| "epoch": 3.5384615384615383, |
| "grad_norm": 4691.3603515625, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 8.5108, |
| "step": 46 |
| }, |
| { |
| "epoch": 3.6153846153846154, |
| "grad_norm": 14303.9404296875, |
| "learning_rate": 1.8400000000000002e-06, |
| "loss": 9.6945, |
| "step": 47 |
| }, |
| { |
| "epoch": 3.6923076923076925, |
| "grad_norm": 2076.6015625, |
| "learning_rate": 1.8800000000000002e-06, |
| "loss": 5.6841, |
| "step": 48 |
| }, |
| { |
| "epoch": 3.769230769230769, |
| "grad_norm": 2748.860107421875, |
| "learning_rate": 1.9200000000000003e-06, |
| "loss": 5.7316, |
| "step": 49 |
| }, |
| { |
| "epoch": 3.8461538461538463, |
| "grad_norm": 4779.833984375, |
| "learning_rate": 1.9600000000000003e-06, |
| "loss": 6.1169, |
| "step": 50 |
| }, |
| { |
| "epoch": 3.9230769230769234, |
| "grad_norm": 6074.677734375, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 6.5592, |
| "step": 51 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 7416.8369140625, |
| "learning_rate": 2.04e-06, |
| "loss": 7.5151, |
| "step": 52 |
| }, |
| { |
| "epoch": 4.076923076923077, |
| "grad_norm": 4175.63232421875, |
| "learning_rate": 2.08e-06, |
| "loss": 6.613, |
| "step": 53 |
| }, |
| { |
| "epoch": 4.153846153846154, |
| "grad_norm": 2247.82177734375, |
| "learning_rate": 2.12e-06, |
| "loss": 5.7516, |
| "step": 54 |
| }, |
| { |
| "epoch": 4.230769230769231, |
| "grad_norm": 6121.06298828125, |
| "learning_rate": 2.16e-06, |
| "loss": 6.1682, |
| "step": 55 |
| }, |
| { |
| "epoch": 4.3076923076923075, |
| "grad_norm": 4141.83349609375, |
| "learning_rate": 2.2e-06, |
| "loss": 6.0549, |
| "step": 56 |
| }, |
| { |
| "epoch": 4.384615384615385, |
| "grad_norm": 1865.611572265625, |
| "learning_rate": 2.24e-06, |
| "loss": 5.7047, |
| "step": 57 |
| }, |
| { |
| "epoch": 4.461538461538462, |
| "grad_norm": 3789.943115234375, |
| "learning_rate": 2.28e-06, |
| "loss": 6.7775, |
| "step": 58 |
| }, |
| { |
| "epoch": 4.538461538461538, |
| "grad_norm": 10279.765625, |
| "learning_rate": 2.3200000000000002e-06, |
| "loss": 6.5486, |
| "step": 59 |
| }, |
| { |
| "epoch": 4.615384615384615, |
| "grad_norm": 4512.77392578125, |
| "learning_rate": 2.3600000000000003e-06, |
| "loss": 5.9955, |
| "step": 60 |
| }, |
| { |
| "epoch": 4.6923076923076925, |
| "grad_norm": 9854.623046875, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 5.008, |
| "step": 61 |
| }, |
| { |
| "epoch": 4.769230769230769, |
| "grad_norm": 1842.1689453125, |
| "learning_rate": 2.4400000000000004e-06, |
| "loss": 5.6766, |
| "step": 62 |
| }, |
| { |
| "epoch": 4.846153846153846, |
| "grad_norm": 8768.17578125, |
| "learning_rate": 2.4800000000000004e-06, |
| "loss": 4.1796, |
| "step": 63 |
| }, |
| { |
| "epoch": 4.923076923076923, |
| "grad_norm": 1296.5732421875, |
| "learning_rate": 2.52e-06, |
| "loss": 4.6935, |
| "step": 64 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 3000.628662109375, |
| "learning_rate": 2.56e-06, |
| "loss": 3.4662, |
| "step": 65 |
| }, |
| { |
| "epoch": 5.076923076923077, |
| "grad_norm": 2375.695556640625, |
| "learning_rate": 2.6e-06, |
| "loss": 5.7503, |
| "step": 66 |
| }, |
| { |
| "epoch": 5.153846153846154, |
| "grad_norm": 1394.402587890625, |
| "learning_rate": 2.64e-06, |
| "loss": 5.1836, |
| "step": 67 |
| }, |
| { |
| "epoch": 5.230769230769231, |
| "grad_norm": 5750.8896484375, |
| "learning_rate": 2.68e-06, |
| "loss": 4.6198, |
| "step": 68 |
| }, |
| { |
| "epoch": 5.3076923076923075, |
| "grad_norm": 2214.36572265625, |
| "learning_rate": 2.7200000000000002e-06, |
| "loss": 4.5119, |
| "step": 69 |
| }, |
| { |
| "epoch": 5.384615384615385, |
| "grad_norm": 4186.42919921875, |
| "learning_rate": 2.7600000000000003e-06, |
| "loss": 4.4198, |
| "step": 70 |
| }, |
| { |
| "epoch": 5.461538461538462, |
| "grad_norm": 1318.0018310546875, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 3.2784, |
| "step": 71 |
| }, |
| { |
| "epoch": 5.538461538461538, |
| "grad_norm": 1700.1236572265625, |
| "learning_rate": 2.84e-06, |
| "loss": 3.6871, |
| "step": 72 |
| }, |
| { |
| "epoch": 5.615384615384615, |
| "grad_norm": 3383.92626953125, |
| "learning_rate": 2.88e-06, |
| "loss": 4.5666, |
| "step": 73 |
| }, |
| { |
| "epoch": 5.6923076923076925, |
| "grad_norm": 1545.00439453125, |
| "learning_rate": 2.92e-06, |
| "loss": 3.5176, |
| "step": 74 |
| }, |
| { |
| "epoch": 5.769230769230769, |
| "grad_norm": 2377.838623046875, |
| "learning_rate": 2.96e-06, |
| "loss": 3.8324, |
| "step": 75 |
| }, |
| { |
| "epoch": 5.846153846153846, |
| "grad_norm": 884.7638549804688, |
| "learning_rate": 3e-06, |
| "loss": 2.3883, |
| "step": 76 |
| }, |
| { |
| "epoch": 5.923076923076923, |
| "grad_norm": 1984.365234375, |
| "learning_rate": 3.04e-06, |
| "loss": 3.7156, |
| "step": 77 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 3334.85205078125, |
| "learning_rate": 3.08e-06, |
| "loss": 3.0619, |
| "step": 78 |
| }, |
| { |
| "epoch": 6.076923076923077, |
| "grad_norm": 2713.583740234375, |
| "learning_rate": 3.12e-06, |
| "loss": 4.0911, |
| "step": 79 |
| }, |
| { |
| "epoch": 6.153846153846154, |
| "grad_norm": 831.1427612304688, |
| "learning_rate": 3.1600000000000002e-06, |
| "loss": 4.4941, |
| "step": 80 |
| }, |
| { |
| "epoch": 6.230769230769231, |
| "grad_norm": 1827.432861328125, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 3.5104, |
| "step": 81 |
| }, |
| { |
| "epoch": 6.3076923076923075, |
| "grad_norm": 1751.157958984375, |
| "learning_rate": 3.2400000000000003e-06, |
| "loss": 3.3955, |
| "step": 82 |
| }, |
| { |
| "epoch": 6.384615384615385, |
| "grad_norm": 1335.197265625, |
| "learning_rate": 3.2800000000000004e-06, |
| "loss": 3.0868, |
| "step": 83 |
| }, |
| { |
| "epoch": 6.461538461538462, |
| "grad_norm": 2164.307373046875, |
| "learning_rate": 3.3200000000000004e-06, |
| "loss": 3.5908, |
| "step": 84 |
| }, |
| { |
| "epoch": 6.538461538461538, |
| "grad_norm": 3304.489990234375, |
| "learning_rate": 3.3600000000000004e-06, |
| "loss": 3.2302, |
| "step": 85 |
| }, |
| { |
| "epoch": 6.615384615384615, |
| "grad_norm": 3139.857421875, |
| "learning_rate": 3.4000000000000005e-06, |
| "loss": 3.3594, |
| "step": 86 |
| }, |
| { |
| "epoch": 6.6923076923076925, |
| "grad_norm": 1009.633544921875, |
| "learning_rate": 3.44e-06, |
| "loss": 2.9373, |
| "step": 87 |
| }, |
| { |
| "epoch": 6.769230769230769, |
| "grad_norm": 1588.922119140625, |
| "learning_rate": 3.48e-06, |
| "loss": 3.0434, |
| "step": 88 |
| }, |
| { |
| "epoch": 6.846153846153846, |
| "grad_norm": 2253.773681640625, |
| "learning_rate": 3.52e-06, |
| "loss": 2.5412, |
| "step": 89 |
| }, |
| { |
| "epoch": 6.923076923076923, |
| "grad_norm": 1642.713134765625, |
| "learning_rate": 3.5600000000000002e-06, |
| "loss": 2.2512, |
| "step": 90 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 1364.428955078125, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 3.0326, |
| "step": 91 |
| }, |
| { |
| "epoch": 7.076923076923077, |
| "grad_norm": 11717.6826171875, |
| "learning_rate": 3.6400000000000003e-06, |
| "loss": 3.6506, |
| "step": 92 |
| }, |
| { |
| "epoch": 7.153846153846154, |
| "grad_norm": 1019.0966186523438, |
| "learning_rate": 3.6800000000000003e-06, |
| "loss": 2.8424, |
| "step": 93 |
| }, |
| { |
| "epoch": 7.230769230769231, |
| "grad_norm": 508.2272644042969, |
| "learning_rate": 3.7200000000000004e-06, |
| "loss": 2.4272, |
| "step": 94 |
| }, |
| { |
| "epoch": 7.3076923076923075, |
| "grad_norm": 6681.02880859375, |
| "learning_rate": 3.7600000000000004e-06, |
| "loss": 2.5006, |
| "step": 95 |
| }, |
| { |
| "epoch": 7.384615384615385, |
| "grad_norm": 327.763916015625, |
| "learning_rate": 3.8000000000000005e-06, |
| "loss": 1.7235, |
| "step": 96 |
| }, |
| { |
| "epoch": 7.461538461538462, |
| "grad_norm": 717.2875366210938, |
| "learning_rate": 3.8400000000000005e-06, |
| "loss": 1.3483, |
| "step": 97 |
| }, |
| { |
| "epoch": 7.538461538461538, |
| "grad_norm": 14631.060546875, |
| "learning_rate": 3.88e-06, |
| "loss": 1.7917, |
| "step": 98 |
| }, |
| { |
| "epoch": 7.615384615384615, |
| "grad_norm": 449.271484375, |
| "learning_rate": 3.920000000000001e-06, |
| "loss": 1.3824, |
| "step": 99 |
| }, |
| { |
| "epoch": 7.6923076923076925, |
| "grad_norm": 1226.365966796875, |
| "learning_rate": 3.96e-06, |
| "loss": 1.6257, |
| "step": 100 |
| }, |
| { |
| "epoch": 7.769230769230769, |
| "grad_norm": 254.60325622558594, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 1.9142, |
| "step": 101 |
| }, |
| { |
| "epoch": 7.846153846153846, |
| "grad_norm": 558.2205200195312, |
| "learning_rate": 4.04e-06, |
| "loss": 1.1077, |
| "step": 102 |
| }, |
| { |
| "epoch": 7.923076923076923, |
| "grad_norm": 188.0357666015625, |
| "learning_rate": 4.08e-06, |
| "loss": 0.9966, |
| "step": 103 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 145.9339599609375, |
| "learning_rate": 4.12e-06, |
| "loss": 1.1691, |
| "step": 104 |
| }, |
| { |
| "epoch": 8.076923076923077, |
| "grad_norm": 122.64077758789062, |
| "learning_rate": 4.16e-06, |
| "loss": 1.0857, |
| "step": 105 |
| }, |
| { |
| "epoch": 8.153846153846153, |
| "grad_norm": 416.3562927246094, |
| "learning_rate": 4.2000000000000004e-06, |
| "loss": 1.3436, |
| "step": 106 |
| }, |
| { |
| "epoch": 8.23076923076923, |
| "grad_norm": 313.8348388671875, |
| "learning_rate": 4.24e-06, |
| "loss": 1.5924, |
| "step": 107 |
| }, |
| { |
| "epoch": 8.307692307692308, |
| "grad_norm": 174.41867065429688, |
| "learning_rate": 4.2800000000000005e-06, |
| "loss": 1.2701, |
| "step": 108 |
| }, |
| { |
| "epoch": 8.384615384615385, |
| "grad_norm": 135.88780212402344, |
| "learning_rate": 4.32e-06, |
| "loss": 0.9964, |
| "step": 109 |
| }, |
| { |
| "epoch": 8.461538461538462, |
| "grad_norm": 119.21726989746094, |
| "learning_rate": 4.360000000000001e-06, |
| "loss": 0.9765, |
| "step": 110 |
| }, |
| { |
| "epoch": 8.538461538461538, |
| "grad_norm": 124.00638580322266, |
| "learning_rate": 4.4e-06, |
| "loss": 1.0436, |
| "step": 111 |
| }, |
| { |
| "epoch": 8.615384615384615, |
| "grad_norm": 87.69738006591797, |
| "learning_rate": 4.440000000000001e-06, |
| "loss": 0.9144, |
| "step": 112 |
| }, |
| { |
| "epoch": 8.692307692307692, |
| "grad_norm": 98.52690124511719, |
| "learning_rate": 4.48e-06, |
| "loss": 0.8034, |
| "step": 113 |
| }, |
| { |
| "epoch": 8.76923076923077, |
| "grad_norm": 67.8703842163086, |
| "learning_rate": 4.520000000000001e-06, |
| "loss": 0.7949, |
| "step": 114 |
| }, |
| { |
| "epoch": 8.846153846153847, |
| "grad_norm": 129.94183349609375, |
| "learning_rate": 4.56e-06, |
| "loss": 1.1878, |
| "step": 115 |
| }, |
| { |
| "epoch": 8.923076923076923, |
| "grad_norm": 185.4768829345703, |
| "learning_rate": 4.600000000000001e-06, |
| "loss": 1.015, |
| "step": 116 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 122.55597686767578, |
| "learning_rate": 4.6400000000000005e-06, |
| "loss": 0.9446, |
| "step": 117 |
| }, |
| { |
| "epoch": 9.076923076923077, |
| "grad_norm": 86.92323303222656, |
| "learning_rate": 4.680000000000001e-06, |
| "loss": 0.833, |
| "step": 118 |
| }, |
| { |
| "epoch": 9.153846153846153, |
| "grad_norm": 109.41946411132812, |
| "learning_rate": 4.7200000000000005e-06, |
| "loss": 0.9403, |
| "step": 119 |
| }, |
| { |
| "epoch": 9.23076923076923, |
| "grad_norm": 79.83373260498047, |
| "learning_rate": 4.76e-06, |
| "loss": 0.6093, |
| "step": 120 |
| }, |
| { |
| "epoch": 9.307692307692308, |
| "grad_norm": 102.91453552246094, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 0.9903, |
| "step": 121 |
| }, |
| { |
| "epoch": 9.384615384615385, |
| "grad_norm": 329.02606201171875, |
| "learning_rate": 4.84e-06, |
| "loss": 1.2028, |
| "step": 122 |
| }, |
| { |
| "epoch": 9.461538461538462, |
| "grad_norm": 160.85386657714844, |
| "learning_rate": 4.880000000000001e-06, |
| "loss": 1.4724, |
| "step": 123 |
| }, |
| { |
| "epoch": 9.538461538461538, |
| "grad_norm": 229.27630615234375, |
| "learning_rate": 4.92e-06, |
| "loss": 0.9034, |
| "step": 124 |
| }, |
| { |
| "epoch": 9.615384615384615, |
| "grad_norm": 91.00493621826172, |
| "learning_rate": 4.960000000000001e-06, |
| "loss": 1.0996, |
| "step": 125 |
| }, |
| { |
| "epoch": 9.692307692307692, |
| "grad_norm": 60.38620376586914, |
| "learning_rate": 5e-06, |
| "loss": 0.7422, |
| "step": 126 |
| }, |
| { |
| "epoch": 9.76923076923077, |
| "grad_norm": 52.122467041015625, |
| "learning_rate": 5.04e-06, |
| "loss": 0.8313, |
| "step": 127 |
| }, |
| { |
| "epoch": 9.846153846153847, |
| "grad_norm": 36.39122772216797, |
| "learning_rate": 5.0800000000000005e-06, |
| "loss": 0.8565, |
| "step": 128 |
| }, |
| { |
| "epoch": 9.923076923076923, |
| "grad_norm": 46.33543014526367, |
| "learning_rate": 5.12e-06, |
| "loss": 0.8425, |
| "step": 129 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 85.27603149414062, |
| "learning_rate": 5.1600000000000006e-06, |
| "loss": 0.6042, |
| "step": 130 |
| }, |
| { |
| "epoch": 10.076923076923077, |
| "grad_norm": 108.0735855102539, |
| "learning_rate": 5.2e-06, |
| "loss": 0.8783, |
| "step": 131 |
| }, |
| { |
| "epoch": 10.153846153846153, |
| "grad_norm": 153.18704223632812, |
| "learning_rate": 5.240000000000001e-06, |
| "loss": 1.1126, |
| "step": 132 |
| }, |
| { |
| "epoch": 10.23076923076923, |
| "grad_norm": 93.4756851196289, |
| "learning_rate": 5.28e-06, |
| "loss": 0.8742, |
| "step": 133 |
| }, |
| { |
| "epoch": 10.307692307692308, |
| "grad_norm": 33.396385192871094, |
| "learning_rate": 5.320000000000001e-06, |
| "loss": 0.7842, |
| "step": 134 |
| }, |
| { |
| "epoch": 10.384615384615385, |
| "grad_norm": 64.87910461425781, |
| "learning_rate": 5.36e-06, |
| "loss": 0.783, |
| "step": 135 |
| }, |
| { |
| "epoch": 10.461538461538462, |
| "grad_norm": 92.65341186523438, |
| "learning_rate": 5.400000000000001e-06, |
| "loss": 0.7836, |
| "step": 136 |
| }, |
| { |
| "epoch": 10.538461538461538, |
| "grad_norm": 145.02798461914062, |
| "learning_rate": 5.4400000000000004e-06, |
| "loss": 0.9292, |
| "step": 137 |
| }, |
| { |
| "epoch": 10.615384615384615, |
| "grad_norm": 70.17644500732422, |
| "learning_rate": 5.480000000000001e-06, |
| "loss": 0.7544, |
| "step": 138 |
| }, |
| { |
| "epoch": 10.692307692307692, |
| "grad_norm": 41.04573059082031, |
| "learning_rate": 5.5200000000000005e-06, |
| "loss": 0.7911, |
| "step": 139 |
| }, |
| { |
| "epoch": 10.76923076923077, |
| "grad_norm": 109.60137176513672, |
| "learning_rate": 5.560000000000001e-06, |
| "loss": 0.9269, |
| "step": 140 |
| }, |
| { |
| "epoch": 10.846153846153847, |
| "grad_norm": 129.09300231933594, |
| "learning_rate": 5.600000000000001e-06, |
| "loss": 0.9927, |
| "step": 141 |
| }, |
| { |
| "epoch": 10.923076923076923, |
| "grad_norm": 140.2090301513672, |
| "learning_rate": 5.64e-06, |
| "loss": 0.9177, |
| "step": 142 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 136.94422912597656, |
| "learning_rate": 5.68e-06, |
| "loss": 0.8187, |
| "step": 143 |
| }, |
| { |
| "epoch": 11.076923076923077, |
| "grad_norm": 85.431396484375, |
| "learning_rate": 5.72e-06, |
| "loss": 0.7158, |
| "step": 144 |
| }, |
| { |
| "epoch": 11.153846153846153, |
| "grad_norm": 38.3684196472168, |
| "learning_rate": 5.76e-06, |
| "loss": 0.7148, |
| "step": 145 |
| }, |
| { |
| "epoch": 11.23076923076923, |
| "grad_norm": 49.99292755126953, |
| "learning_rate": 5.8e-06, |
| "loss": 0.761, |
| "step": 146 |
| }, |
| { |
| "epoch": 11.307692307692308, |
| "grad_norm": 64.29403686523438, |
| "learning_rate": 5.84e-06, |
| "loss": 0.8409, |
| "step": 147 |
| }, |
| { |
| "epoch": 11.384615384615385, |
| "grad_norm": 105.57362365722656, |
| "learning_rate": 5.8800000000000005e-06, |
| "loss": 0.7556, |
| "step": 148 |
| }, |
| { |
| "epoch": 11.461538461538462, |
| "grad_norm": 93.06744384765625, |
| "learning_rate": 5.92e-06, |
| "loss": 0.8834, |
| "step": 149 |
| }, |
| { |
| "epoch": 11.538461538461538, |
| "grad_norm": 30.977516174316406, |
| "learning_rate": 5.9600000000000005e-06, |
| "loss": 0.7184, |
| "step": 150 |
| }, |
| { |
| "epoch": 11.615384615384615, |
| "grad_norm": 58.94570541381836, |
| "learning_rate": 6e-06, |
| "loss": 0.643, |
| "step": 151 |
| }, |
| { |
| "epoch": 11.692307692307692, |
| "grad_norm": 102.8298110961914, |
| "learning_rate": 6.040000000000001e-06, |
| "loss": 0.8481, |
| "step": 152 |
| }, |
| { |
| "epoch": 11.76923076923077, |
| "grad_norm": 105.51367950439453, |
| "learning_rate": 6.08e-06, |
| "loss": 0.8117, |
| "step": 153 |
| }, |
| { |
| "epoch": 11.846153846153847, |
| "grad_norm": 73.83160400390625, |
| "learning_rate": 6.120000000000001e-06, |
| "loss": 0.7865, |
| "step": 154 |
| }, |
| { |
| "epoch": 11.923076923076923, |
| "grad_norm": 83.67406463623047, |
| "learning_rate": 6.16e-06, |
| "loss": 0.6597, |
| "step": 155 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 37.48268127441406, |
| "learning_rate": 6.200000000000001e-06, |
| "loss": 0.5755, |
| "step": 156 |
| }, |
| { |
| "epoch": 12.076923076923077, |
| "grad_norm": 42.852882385253906, |
| "learning_rate": 6.24e-06, |
| "loss": 0.5435, |
| "step": 157 |
| }, |
| { |
| "epoch": 12.153846153846153, |
| "grad_norm": 68.47445678710938, |
| "learning_rate": 6.280000000000001e-06, |
| "loss": 0.9999, |
| "step": 158 |
| }, |
| { |
| "epoch": 12.23076923076923, |
| "grad_norm": 90.29669189453125, |
| "learning_rate": 6.3200000000000005e-06, |
| "loss": 1.3117, |
| "step": 159 |
| }, |
| { |
| "epoch": 12.307692307692308, |
| "grad_norm": 88.94297790527344, |
| "learning_rate": 6.360000000000001e-06, |
| "loss": 1.1126, |
| "step": 160 |
| }, |
| { |
| "epoch": 12.384615384615385, |
| "grad_norm": 34.859493255615234, |
| "learning_rate": 6.4000000000000006e-06, |
| "loss": 0.6848, |
| "step": 161 |
| }, |
| { |
| "epoch": 12.461538461538462, |
| "grad_norm": 39.13771057128906, |
| "learning_rate": 6.440000000000001e-06, |
| "loss": 0.7297, |
| "step": 162 |
| }, |
| { |
| "epoch": 12.538461538461538, |
| "grad_norm": 65.69542694091797, |
| "learning_rate": 6.480000000000001e-06, |
| "loss": 0.8216, |
| "step": 163 |
| }, |
| { |
| "epoch": 12.615384615384615, |
| "grad_norm": 51.67008972167969, |
| "learning_rate": 6.520000000000001e-06, |
| "loss": 0.725, |
| "step": 164 |
| }, |
| { |
| "epoch": 12.692307692307692, |
| "grad_norm": 23.950634002685547, |
| "learning_rate": 6.560000000000001e-06, |
| "loss": 0.6542, |
| "step": 165 |
| }, |
| { |
| "epoch": 12.76923076923077, |
| "grad_norm": 62.85305404663086, |
| "learning_rate": 6.600000000000001e-06, |
| "loss": 1.0018, |
| "step": 166 |
| }, |
| { |
| "epoch": 12.846153846153847, |
| "grad_norm": 71.49420928955078, |
| "learning_rate": 6.640000000000001e-06, |
| "loss": 0.8524, |
| "step": 167 |
| }, |
| { |
| "epoch": 12.923076923076923, |
| "grad_norm": 65.00899505615234, |
| "learning_rate": 6.680000000000001e-06, |
| "loss": 0.8182, |
| "step": 168 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 62.54741668701172, |
| "learning_rate": 6.720000000000001e-06, |
| "loss": 0.6744, |
| "step": 169 |
| }, |
| { |
| "epoch": 13.076923076923077, |
| "grad_norm": 53.9980354309082, |
| "learning_rate": 6.760000000000001e-06, |
| "loss": 0.7276, |
| "step": 170 |
| }, |
| { |
| "epoch": 13.153846153846153, |
| "grad_norm": 49.69089126586914, |
| "learning_rate": 6.800000000000001e-06, |
| "loss": 0.7577, |
| "step": 171 |
| }, |
| { |
| "epoch": 13.23076923076923, |
| "grad_norm": 56.17091751098633, |
| "learning_rate": 6.8400000000000014e-06, |
| "loss": 0.6323, |
| "step": 172 |
| }, |
| { |
| "epoch": 13.307692307692308, |
| "grad_norm": 131.43931579589844, |
| "learning_rate": 6.88e-06, |
| "loss": 1.235, |
| "step": 173 |
| }, |
| { |
| "epoch": 13.384615384615385, |
| "grad_norm": 75.32357788085938, |
| "learning_rate": 6.92e-06, |
| "loss": 0.6758, |
| "step": 174 |
| }, |
| { |
| "epoch": 13.461538461538462, |
| "grad_norm": 69.21751403808594, |
| "learning_rate": 6.96e-06, |
| "loss": 0.7003, |
| "step": 175 |
| }, |
| { |
| "epoch": 13.538461538461538, |
| "grad_norm": 129.3466339111328, |
| "learning_rate": 7e-06, |
| "loss": 0.7214, |
| "step": 176 |
| }, |
| { |
| "epoch": 13.615384615384615, |
| "grad_norm": 44.15930938720703, |
| "learning_rate": 7.04e-06, |
| "loss": 0.526, |
| "step": 177 |
| }, |
| { |
| "epoch": 13.692307692307692, |
| "grad_norm": 53.00956344604492, |
| "learning_rate": 7.08e-06, |
| "loss": 0.5758, |
| "step": 178 |
| }, |
| { |
| "epoch": 13.76923076923077, |
| "grad_norm": 68.80349731445312, |
| "learning_rate": 7.1200000000000004e-06, |
| "loss": 1.0676, |
| "step": 179 |
| }, |
| { |
| "epoch": 13.846153846153847, |
| "grad_norm": 178.73776245117188, |
| "learning_rate": 7.16e-06, |
| "loss": 0.7152, |
| "step": 180 |
| }, |
| { |
| "epoch": 13.923076923076923, |
| "grad_norm": 113.57772064208984, |
| "learning_rate": 7.2000000000000005e-06, |
| "loss": 0.9495, |
| "step": 181 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 93.48717498779297, |
| "learning_rate": 7.24e-06, |
| "loss": 0.7061, |
| "step": 182 |
| }, |
| { |
| "epoch": 14.076923076923077, |
| "grad_norm": 82.60102081298828, |
| "learning_rate": 7.280000000000001e-06, |
| "loss": 1.2033, |
| "step": 183 |
| }, |
| { |
| "epoch": 14.153846153846153, |
| "grad_norm": 66.38690948486328, |
| "learning_rate": 7.32e-06, |
| "loss": 0.9306, |
| "step": 184 |
| }, |
| { |
| "epoch": 14.23076923076923, |
| "grad_norm": 62.00544357299805, |
| "learning_rate": 7.360000000000001e-06, |
| "loss": 0.6083, |
| "step": 185 |
| }, |
| { |
| "epoch": 14.307692307692308, |
| "grad_norm": 42.88933563232422, |
| "learning_rate": 7.4e-06, |
| "loss": 0.9682, |
| "step": 186 |
| }, |
| { |
| "epoch": 14.384615384615385, |
| "grad_norm": 35.30773162841797, |
| "learning_rate": 7.440000000000001e-06, |
| "loss": 0.9414, |
| "step": 187 |
| }, |
| { |
| "epoch": 14.461538461538462, |
| "grad_norm": 58.071876525878906, |
| "learning_rate": 7.48e-06, |
| "loss": 0.8669, |
| "step": 188 |
| }, |
| { |
| "epoch": 14.538461538461538, |
| "grad_norm": 94.3634033203125, |
| "learning_rate": 7.520000000000001e-06, |
| "loss": 0.9253, |
| "step": 189 |
| }, |
| { |
| "epoch": 14.615384615384615, |
| "grad_norm": 70.96503448486328, |
| "learning_rate": 7.5600000000000005e-06, |
| "loss": 0.9492, |
| "step": 190 |
| }, |
| { |
| "epoch": 14.692307692307692, |
| "grad_norm": 53.165340423583984, |
| "learning_rate": 7.600000000000001e-06, |
| "loss": 0.7418, |
| "step": 191 |
| }, |
| { |
| "epoch": 14.76923076923077, |
| "grad_norm": 29.172006607055664, |
| "learning_rate": 7.640000000000001e-06, |
| "loss": 0.6271, |
| "step": 192 |
| }, |
| { |
| "epoch": 14.846153846153847, |
| "grad_norm": 115.76858520507812, |
| "learning_rate": 7.680000000000001e-06, |
| "loss": 1.0605, |
| "step": 193 |
| }, |
| { |
| "epoch": 14.923076923076923, |
| "grad_norm": 61.08775329589844, |
| "learning_rate": 7.72e-06, |
| "loss": 0.8579, |
| "step": 194 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 65.41018676757812, |
| "learning_rate": 7.76e-06, |
| "loss": 0.694, |
| "step": 195 |
| }, |
| { |
| "epoch": 15.076923076923077, |
| "grad_norm": 67.5228271484375, |
| "learning_rate": 7.800000000000002e-06, |
| "loss": 0.695, |
| "step": 196 |
| }, |
| { |
| "epoch": 15.153846153846153, |
| "grad_norm": 30.65340805053711, |
| "learning_rate": 7.840000000000001e-06, |
| "loss": 0.6205, |
| "step": 197 |
| }, |
| { |
| "epoch": 15.23076923076923, |
| "grad_norm": 83.59008026123047, |
| "learning_rate": 7.88e-06, |
| "loss": 0.6932, |
| "step": 198 |
| }, |
| { |
| "epoch": 15.307692307692308, |
| "grad_norm": 48.95726013183594, |
| "learning_rate": 7.92e-06, |
| "loss": 0.7861, |
| "step": 199 |
| }, |
| { |
| "epoch": 15.384615384615385, |
| "grad_norm": 65.74507904052734, |
| "learning_rate": 7.960000000000002e-06, |
| "loss": 0.7961, |
| "step": 200 |
| }, |
| { |
| "epoch": 15.461538461538462, |
| "grad_norm": 58.61296081542969, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.7204, |
| "step": 201 |
| }, |
| { |
| "epoch": 15.538461538461538, |
| "grad_norm": 79.05587005615234, |
| "learning_rate": 8.040000000000001e-06, |
| "loss": 0.8421, |
| "step": 202 |
| }, |
| { |
| "epoch": 15.615384615384615, |
| "grad_norm": 44.74805450439453, |
| "learning_rate": 8.08e-06, |
| "loss": 0.6835, |
| "step": 203 |
| }, |
| { |
| "epoch": 15.692307692307692, |
| "grad_norm": 86.16783142089844, |
| "learning_rate": 8.120000000000002e-06, |
| "loss": 0.8516, |
| "step": 204 |
| }, |
| { |
| "epoch": 15.76923076923077, |
| "grad_norm": 78.34517669677734, |
| "learning_rate": 8.16e-06, |
| "loss": 0.5696, |
| "step": 205 |
| }, |
| { |
| "epoch": 15.846153846153847, |
| "grad_norm": 60.46382141113281, |
| "learning_rate": 8.2e-06, |
| "loss": 0.7431, |
| "step": 206 |
| }, |
| { |
| "epoch": 15.923076923076923, |
| "grad_norm": 75.38282012939453, |
| "learning_rate": 8.24e-06, |
| "loss": 0.7579, |
| "step": 207 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 61.66571807861328, |
| "learning_rate": 8.28e-06, |
| "loss": 0.5579, |
| "step": 208 |
| }, |
| { |
| "epoch": 16.076923076923077, |
| "grad_norm": 176.31370544433594, |
| "learning_rate": 8.32e-06, |
| "loss": 1.0035, |
| "step": 209 |
| }, |
| { |
| "epoch": 16.153846153846153, |
| "grad_norm": 217.41488647460938, |
| "learning_rate": 8.36e-06, |
| "loss": 1.1147, |
| "step": 210 |
| }, |
| { |
| "epoch": 16.23076923076923, |
| "grad_norm": 45.08114242553711, |
| "learning_rate": 8.400000000000001e-06, |
| "loss": 0.7391, |
| "step": 211 |
| }, |
| { |
| "epoch": 16.307692307692307, |
| "grad_norm": 56.17387771606445, |
| "learning_rate": 8.44e-06, |
| "loss": 0.7647, |
| "step": 212 |
| }, |
| { |
| "epoch": 16.384615384615383, |
| "grad_norm": 51.677215576171875, |
| "learning_rate": 8.48e-06, |
| "loss": 0.6528, |
| "step": 213 |
| }, |
| { |
| "epoch": 16.46153846153846, |
| "grad_norm": 49.8151969909668, |
| "learning_rate": 8.52e-06, |
| "loss": 0.8012, |
| "step": 214 |
| }, |
| { |
| "epoch": 16.53846153846154, |
| "grad_norm": 83.117431640625, |
| "learning_rate": 8.560000000000001e-06, |
| "loss": 0.7326, |
| "step": 215 |
| }, |
| { |
| "epoch": 16.615384615384617, |
| "grad_norm": 31.789459228515625, |
| "learning_rate": 8.6e-06, |
| "loss": 0.7549, |
| "step": 216 |
| }, |
| { |
| "epoch": 16.692307692307693, |
| "grad_norm": 80.32394409179688, |
| "learning_rate": 8.64e-06, |
| "loss": 0.6813, |
| "step": 217 |
| }, |
| { |
| "epoch": 16.76923076923077, |
| "grad_norm": 76.61673736572266, |
| "learning_rate": 8.68e-06, |
| "loss": 0.6526, |
| "step": 218 |
| }, |
| { |
| "epoch": 16.846153846153847, |
| "grad_norm": 46.9598503112793, |
| "learning_rate": 8.720000000000001e-06, |
| "loss": 0.501, |
| "step": 219 |
| }, |
| { |
| "epoch": 16.923076923076923, |
| "grad_norm": 73.53797912597656, |
| "learning_rate": 8.76e-06, |
| "loss": 0.9022, |
| "step": 220 |
| }, |
| { |
| "epoch": 17.0, |
| "grad_norm": 58.83550262451172, |
| "learning_rate": 8.8e-06, |
| "loss": 0.7628, |
| "step": 221 |
| }, |
| { |
| "epoch": 17.076923076923077, |
| "grad_norm": 90.0180435180664, |
| "learning_rate": 8.84e-06, |
| "loss": 1.0188, |
| "step": 222 |
| }, |
| { |
| "epoch": 17.153846153846153, |
| "grad_norm": 48.37046813964844, |
| "learning_rate": 8.880000000000001e-06, |
| "loss": 0.7707, |
| "step": 223 |
| }, |
| { |
| "epoch": 17.23076923076923, |
| "grad_norm": 52.4448356628418, |
| "learning_rate": 8.920000000000001e-06, |
| "loss": 0.7103, |
| "step": 224 |
| }, |
| { |
| "epoch": 17.307692307692307, |
| "grad_norm": 36.983585357666016, |
| "learning_rate": 8.96e-06, |
| "loss": 0.7305, |
| "step": 225 |
| }, |
| { |
| "epoch": 17.384615384615383, |
| "grad_norm": 16.054697036743164, |
| "learning_rate": 9e-06, |
| "loss": 0.6611, |
| "step": 226 |
| }, |
| { |
| "epoch": 17.46153846153846, |
| "grad_norm": 45.36906814575195, |
| "learning_rate": 9.040000000000002e-06, |
| "loss": 0.6746, |
| "step": 227 |
| }, |
| { |
| "epoch": 17.53846153846154, |
| "grad_norm": 40.45295715332031, |
| "learning_rate": 9.080000000000001e-06, |
| "loss": 0.7042, |
| "step": 228 |
| }, |
| { |
| "epoch": 17.615384615384617, |
| "grad_norm": 56.89924240112305, |
| "learning_rate": 9.12e-06, |
| "loss": 0.5384, |
| "step": 229 |
| }, |
| { |
| "epoch": 17.692307692307693, |
| "grad_norm": 56.17110061645508, |
| "learning_rate": 9.16e-06, |
| "loss": 0.9373, |
| "step": 230 |
| }, |
| { |
| "epoch": 17.76923076923077, |
| "grad_norm": 61.18904113769531, |
| "learning_rate": 9.200000000000002e-06, |
| "loss": 0.6669, |
| "step": 231 |
| }, |
| { |
| "epoch": 17.846153846153847, |
| "grad_norm": 42.18205261230469, |
| "learning_rate": 9.240000000000001e-06, |
| "loss": 0.6855, |
| "step": 232 |
| }, |
| { |
| "epoch": 17.923076923076923, |
| "grad_norm": 73.21139526367188, |
| "learning_rate": 9.280000000000001e-06, |
| "loss": 0.8783, |
| "step": 233 |
| }, |
| { |
| "epoch": 18.0, |
| "grad_norm": 60.66477584838867, |
| "learning_rate": 9.32e-06, |
| "loss": 0.751, |
| "step": 234 |
| }, |
| { |
| "epoch": 18.076923076923077, |
| "grad_norm": 42.04085159301758, |
| "learning_rate": 9.360000000000002e-06, |
| "loss": 0.6296, |
| "step": 235 |
| }, |
| { |
| "epoch": 18.153846153846153, |
| "grad_norm": 68.64347076416016, |
| "learning_rate": 9.4e-06, |
| "loss": 0.7494, |
| "step": 236 |
| }, |
| { |
| "epoch": 18.23076923076923, |
| "grad_norm": 72.14678192138672, |
| "learning_rate": 9.440000000000001e-06, |
| "loss": 0.6706, |
| "step": 237 |
| }, |
| { |
| "epoch": 18.307692307692307, |
| "grad_norm": 90.8731918334961, |
| "learning_rate": 9.48e-06, |
| "loss": 0.6978, |
| "step": 238 |
| }, |
| { |
| "epoch": 18.384615384615383, |
| "grad_norm": 85.11774444580078, |
| "learning_rate": 9.52e-06, |
| "loss": 0.7161, |
| "step": 239 |
| }, |
| { |
| "epoch": 18.46153846153846, |
| "grad_norm": 30.654109954833984, |
| "learning_rate": 9.56e-06, |
| "loss": 0.7263, |
| "step": 240 |
| }, |
| { |
| "epoch": 18.53846153846154, |
| "grad_norm": 29.126256942749023, |
| "learning_rate": 9.600000000000001e-06, |
| "loss": 0.4471, |
| "step": 241 |
| }, |
| { |
| "epoch": 18.615384615384617, |
| "grad_norm": 39.70819854736328, |
| "learning_rate": 9.640000000000001e-06, |
| "loss": 0.5729, |
| "step": 242 |
| }, |
| { |
| "epoch": 18.692307692307693, |
| "grad_norm": 43.10990524291992, |
| "learning_rate": 9.68e-06, |
| "loss": 0.6048, |
| "step": 243 |
| }, |
| { |
| "epoch": 18.76923076923077, |
| "grad_norm": 57.911685943603516, |
| "learning_rate": 9.72e-06, |
| "loss": 0.6993, |
| "step": 244 |
| }, |
| { |
| "epoch": 18.846153846153847, |
| "grad_norm": 23.198652267456055, |
| "learning_rate": 9.760000000000001e-06, |
| "loss": 0.5685, |
| "step": 245 |
| }, |
| { |
| "epoch": 18.923076923076923, |
| "grad_norm": 45.444454193115234, |
| "learning_rate": 9.800000000000001e-06, |
| "loss": 0.678, |
| "step": 246 |
| }, |
| { |
| "epoch": 19.0, |
| "grad_norm": 26.42611312866211, |
| "learning_rate": 9.84e-06, |
| "loss": 0.5872, |
| "step": 247 |
| }, |
| { |
| "epoch": 19.076923076923077, |
| "grad_norm": 50.92378234863281, |
| "learning_rate": 9.88e-06, |
| "loss": 0.4833, |
| "step": 248 |
| }, |
| { |
| "epoch": 19.153846153846153, |
| "grad_norm": 46.32240676879883, |
| "learning_rate": 9.920000000000002e-06, |
| "loss": 0.5696, |
| "step": 249 |
| }, |
| { |
| "epoch": 19.23076923076923, |
| "grad_norm": 30.97173500061035, |
| "learning_rate": 9.960000000000001e-06, |
| "loss": 0.3094, |
| "step": 250 |
| }, |
| { |
| "epoch": 19.307692307692307, |
| "grad_norm": 99.83480834960938, |
| "learning_rate": 1e-05, |
| "loss": 1.3693, |
| "step": 251 |
| }, |
| { |
| "epoch": 19.384615384615383, |
| "grad_norm": 41.79911422729492, |
| "learning_rate": 1.004e-05, |
| "loss": 0.7863, |
| "step": 252 |
| }, |
| { |
| "epoch": 19.46153846153846, |
| "grad_norm": 40.042179107666016, |
| "learning_rate": 1.008e-05, |
| "loss": 0.4821, |
| "step": 253 |
| }, |
| { |
| "epoch": 19.53846153846154, |
| "grad_norm": 36.340492248535156, |
| "learning_rate": 1.0120000000000001e-05, |
| "loss": 0.6628, |
| "step": 254 |
| }, |
| { |
| "epoch": 19.615384615384617, |
| "grad_norm": 37.4381217956543, |
| "learning_rate": 1.0160000000000001e-05, |
| "loss": 0.5221, |
| "step": 255 |
| }, |
| { |
| "epoch": 19.692307692307693, |
| "grad_norm": 78.24024963378906, |
| "learning_rate": 1.02e-05, |
| "loss": 0.6961, |
| "step": 256 |
| }, |
| { |
| "epoch": 19.76923076923077, |
| "grad_norm": 61.44567108154297, |
| "learning_rate": 1.024e-05, |
| "loss": 0.6614, |
| "step": 257 |
| }, |
| { |
| "epoch": 19.846153846153847, |
| "grad_norm": 69.41854095458984, |
| "learning_rate": 1.0280000000000002e-05, |
| "loss": 1.0344, |
| "step": 258 |
| }, |
| { |
| "epoch": 19.923076923076923, |
| "grad_norm": 54.26890182495117, |
| "learning_rate": 1.0320000000000001e-05, |
| "loss": 0.6941, |
| "step": 259 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 119.05464935302734, |
| "learning_rate": 1.036e-05, |
| "loss": 0.5696, |
| "step": 260 |
| }, |
| { |
| "epoch": 20.076923076923077, |
| "grad_norm": 43.01278305053711, |
| "learning_rate": 1.04e-05, |
| "loss": 0.6823, |
| "step": 261 |
| }, |
| { |
| "epoch": 20.153846153846153, |
| "grad_norm": 46.86002731323242, |
| "learning_rate": 1.0440000000000002e-05, |
| "loss": 0.6659, |
| "step": 262 |
| }, |
| { |
| "epoch": 20.23076923076923, |
| "grad_norm": 38.46581268310547, |
| "learning_rate": 1.0480000000000001e-05, |
| "loss": 0.4717, |
| "step": 263 |
| }, |
| { |
| "epoch": 20.307692307692307, |
| "grad_norm": 185.26800537109375, |
| "learning_rate": 1.0520000000000001e-05, |
| "loss": 0.7102, |
| "step": 264 |
| }, |
| { |
| "epoch": 20.384615384615383, |
| "grad_norm": 74.64105224609375, |
| "learning_rate": 1.056e-05, |
| "loss": 0.3521, |
| "step": 265 |
| }, |
| { |
| "epoch": 20.46153846153846, |
| "grad_norm": 60.956748962402344, |
| "learning_rate": 1.0600000000000002e-05, |
| "loss": 0.6713, |
| "step": 266 |
| }, |
| { |
| "epoch": 20.53846153846154, |
| "grad_norm": 43.28743362426758, |
| "learning_rate": 1.0640000000000001e-05, |
| "loss": 0.7422, |
| "step": 267 |
| }, |
| { |
| "epoch": 20.615384615384617, |
| "grad_norm": 56.09255599975586, |
| "learning_rate": 1.0680000000000001e-05, |
| "loss": 0.5013, |
| "step": 268 |
| }, |
| { |
| "epoch": 20.692307692307693, |
| "grad_norm": 70.18143463134766, |
| "learning_rate": 1.072e-05, |
| "loss": 0.5307, |
| "step": 269 |
| }, |
| { |
| "epoch": 20.76923076923077, |
| "grad_norm": 41.185638427734375, |
| "learning_rate": 1.0760000000000002e-05, |
| "loss": 0.8098, |
| "step": 270 |
| }, |
| { |
| "epoch": 20.846153846153847, |
| "grad_norm": 19.666332244873047, |
| "learning_rate": 1.0800000000000002e-05, |
| "loss": 0.4061, |
| "step": 271 |
| }, |
| { |
| "epoch": 20.923076923076923, |
| "grad_norm": 52.35540771484375, |
| "learning_rate": 1.0840000000000001e-05, |
| "loss": 0.2785, |
| "step": 272 |
| }, |
| { |
| "epoch": 21.0, |
| "grad_norm": 64.35240936279297, |
| "learning_rate": 1.0880000000000001e-05, |
| "loss": 0.4654, |
| "step": 273 |
| }, |
| { |
| "epoch": 21.076923076923077, |
| "grad_norm": 39.35055923461914, |
| "learning_rate": 1.0920000000000002e-05, |
| "loss": 0.4549, |
| "step": 274 |
| }, |
| { |
| "epoch": 21.153846153846153, |
| "grad_norm": 46.019657135009766, |
| "learning_rate": 1.0960000000000002e-05, |
| "loss": 0.4013, |
| "step": 275 |
| }, |
| { |
| "epoch": 21.23076923076923, |
| "grad_norm": 34.99978256225586, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 0.5063, |
| "step": 276 |
| }, |
| { |
| "epoch": 21.307692307692307, |
| "grad_norm": 274.4259033203125, |
| "learning_rate": 1.1040000000000001e-05, |
| "loss": 0.8115, |
| "step": 277 |
| }, |
| { |
| "epoch": 21.384615384615383, |
| "grad_norm": 58.749969482421875, |
| "learning_rate": 1.1080000000000002e-05, |
| "loss": 1.0227, |
| "step": 278 |
| }, |
| { |
| "epoch": 21.46153846153846, |
| "grad_norm": 68.37541961669922, |
| "learning_rate": 1.1120000000000002e-05, |
| "loss": 0.462, |
| "step": 279 |
| }, |
| { |
| "epoch": 21.53846153846154, |
| "grad_norm": 30.333087921142578, |
| "learning_rate": 1.1160000000000002e-05, |
| "loss": 0.4942, |
| "step": 280 |
| }, |
| { |
| "epoch": 21.615384615384617, |
| "grad_norm": 25.71001434326172, |
| "learning_rate": 1.1200000000000001e-05, |
| "loss": 0.2491, |
| "step": 281 |
| }, |
| { |
| "epoch": 21.692307692307693, |
| "grad_norm": 76.39506530761719, |
| "learning_rate": 1.1240000000000002e-05, |
| "loss": 1.0506, |
| "step": 282 |
| }, |
| { |
| "epoch": 21.76923076923077, |
| "grad_norm": 55.327606201171875, |
| "learning_rate": 1.128e-05, |
| "loss": 0.6434, |
| "step": 283 |
| }, |
| { |
| "epoch": 21.846153846153847, |
| "grad_norm": 56.551788330078125, |
| "learning_rate": 1.132e-05, |
| "loss": 0.8624, |
| "step": 284 |
| }, |
| { |
| "epoch": 21.923076923076923, |
| "grad_norm": 49.042423248291016, |
| "learning_rate": 1.136e-05, |
| "loss": 0.5041, |
| "step": 285 |
| }, |
| { |
| "epoch": 22.0, |
| "grad_norm": 58.502357482910156, |
| "learning_rate": 1.14e-05, |
| "loss": 0.4436, |
| "step": 286 |
| }, |
| { |
| "epoch": 22.076923076923077, |
| "grad_norm": 62.351776123046875, |
| "learning_rate": 1.144e-05, |
| "loss": 0.7322, |
| "step": 287 |
| }, |
| { |
| "epoch": 22.153846153846153, |
| "grad_norm": 98.29476928710938, |
| "learning_rate": 1.148e-05, |
| "loss": 0.9344, |
| "step": 288 |
| }, |
| { |
| "epoch": 22.23076923076923, |
| "grad_norm": 51.842838287353516, |
| "learning_rate": 1.152e-05, |
| "loss": 0.6241, |
| "step": 289 |
| }, |
| { |
| "epoch": 22.307692307692307, |
| "grad_norm": 44.713233947753906, |
| "learning_rate": 1.156e-05, |
| "loss": 0.418, |
| "step": 290 |
| }, |
| { |
| "epoch": 22.384615384615383, |
| "grad_norm": 40.88868713378906, |
| "learning_rate": 1.16e-05, |
| "loss": 0.3467, |
| "step": 291 |
| }, |
| { |
| "epoch": 22.46153846153846, |
| "grad_norm": 71.91492462158203, |
| "learning_rate": 1.164e-05, |
| "loss": 0.6216, |
| "step": 292 |
| }, |
| { |
| "epoch": 22.53846153846154, |
| "grad_norm": 69.22135925292969, |
| "learning_rate": 1.168e-05, |
| "loss": 0.3065, |
| "step": 293 |
| }, |
| { |
| "epoch": 22.615384615384617, |
| "grad_norm": 50.13063430786133, |
| "learning_rate": 1.172e-05, |
| "loss": 0.2739, |
| "step": 294 |
| }, |
| { |
| "epoch": 22.692307692307693, |
| "grad_norm": 43.53316879272461, |
| "learning_rate": 1.1760000000000001e-05, |
| "loss": 0.676, |
| "step": 295 |
| }, |
| { |
| "epoch": 22.76923076923077, |
| "grad_norm": 72.19952392578125, |
| "learning_rate": 1.18e-05, |
| "loss": 0.5518, |
| "step": 296 |
| }, |
| { |
| "epoch": 22.846153846153847, |
| "grad_norm": 99.4913330078125, |
| "learning_rate": 1.184e-05, |
| "loss": 0.4786, |
| "step": 297 |
| }, |
| { |
| "epoch": 22.923076923076923, |
| "grad_norm": 130.802978515625, |
| "learning_rate": 1.188e-05, |
| "loss": 0.3785, |
| "step": 298 |
| }, |
| { |
| "epoch": 23.0, |
| "grad_norm": 62.112648010253906, |
| "learning_rate": 1.1920000000000001e-05, |
| "loss": 0.3357, |
| "step": 299 |
| }, |
| { |
| "epoch": 23.076923076923077, |
| "grad_norm": 27.632856369018555, |
| "learning_rate": 1.196e-05, |
| "loss": 0.498, |
| "step": 300 |
| }, |
| { |
| "epoch": 23.153846153846153, |
| "grad_norm": 73.1949691772461, |
| "learning_rate": 1.2e-05, |
| "loss": 0.4796, |
| "step": 301 |
| }, |
| { |
| "epoch": 23.23076923076923, |
| "grad_norm": 101.7704086303711, |
| "learning_rate": 1.204e-05, |
| "loss": 0.4953, |
| "step": 302 |
| }, |
| { |
| "epoch": 23.307692307692307, |
| "grad_norm": 101.37821197509766, |
| "learning_rate": 1.2080000000000001e-05, |
| "loss": 0.594, |
| "step": 303 |
| }, |
| { |
| "epoch": 23.384615384615383, |
| "grad_norm": 80.45525360107422, |
| "learning_rate": 1.2120000000000001e-05, |
| "loss": 0.827, |
| "step": 304 |
| }, |
| { |
| "epoch": 23.46153846153846, |
| "grad_norm": 34.36630630493164, |
| "learning_rate": 1.216e-05, |
| "loss": 0.5362, |
| "step": 305 |
| }, |
| { |
| "epoch": 23.53846153846154, |
| "grad_norm": 41.87324905395508, |
| "learning_rate": 1.22e-05, |
| "loss": 0.3961, |
| "step": 306 |
| }, |
| { |
| "epoch": 23.615384615384617, |
| "grad_norm": 97.23712158203125, |
| "learning_rate": 1.2240000000000001e-05, |
| "loss": 0.5439, |
| "step": 307 |
| }, |
| { |
| "epoch": 23.692307692307693, |
| "grad_norm": 125.3348388671875, |
| "learning_rate": 1.2280000000000001e-05, |
| "loss": 0.7085, |
| "step": 308 |
| }, |
| { |
| "epoch": 23.76923076923077, |
| "grad_norm": 66.71159362792969, |
| "learning_rate": 1.232e-05, |
| "loss": 0.3981, |
| "step": 309 |
| }, |
| { |
| "epoch": 23.846153846153847, |
| "grad_norm": 45.817039489746094, |
| "learning_rate": 1.236e-05, |
| "loss": 0.4767, |
| "step": 310 |
| }, |
| { |
| "epoch": 23.923076923076923, |
| "grad_norm": 60.648868560791016, |
| "learning_rate": 1.2400000000000002e-05, |
| "loss": 0.1732, |
| "step": 311 |
| }, |
| { |
| "epoch": 24.0, |
| "grad_norm": 93.42353820800781, |
| "learning_rate": 1.2440000000000001e-05, |
| "loss": 0.7817, |
| "step": 312 |
| }, |
| { |
| "epoch": 24.076923076923077, |
| "grad_norm": 98.92288970947266, |
| "learning_rate": 1.248e-05, |
| "loss": 0.7387, |
| "step": 313 |
| }, |
| { |
| "epoch": 24.153846153846153, |
| "grad_norm": 79.44998168945312, |
| "learning_rate": 1.252e-05, |
| "loss": 0.8928, |
| "step": 314 |
| }, |
| { |
| "epoch": 24.23076923076923, |
| "grad_norm": 39.60928726196289, |
| "learning_rate": 1.2560000000000002e-05, |
| "loss": 0.4137, |
| "step": 315 |
| }, |
| { |
| "epoch": 24.307692307692307, |
| "grad_norm": 60.29780960083008, |
| "learning_rate": 1.2600000000000001e-05, |
| "loss": 0.4034, |
| "step": 316 |
| }, |
| { |
| "epoch": 24.384615384615383, |
| "grad_norm": 49.01469039916992, |
| "learning_rate": 1.2640000000000001e-05, |
| "loss": 0.64, |
| "step": 317 |
| }, |
| { |
| "epoch": 24.46153846153846, |
| "grad_norm": 77.11458587646484, |
| "learning_rate": 1.268e-05, |
| "loss": 0.5456, |
| "step": 318 |
| }, |
| { |
| "epoch": 24.53846153846154, |
| "grad_norm": 76.5569839477539, |
| "learning_rate": 1.2720000000000002e-05, |
| "loss": 0.8046, |
| "step": 319 |
| }, |
| { |
| "epoch": 24.615384615384617, |
| "grad_norm": 36.19839096069336, |
| "learning_rate": 1.2760000000000001e-05, |
| "loss": 0.5598, |
| "step": 320 |
| }, |
| { |
| "epoch": 24.692307692307693, |
| "grad_norm": 48.85591506958008, |
| "learning_rate": 1.2800000000000001e-05, |
| "loss": 0.6344, |
| "step": 321 |
| }, |
| { |
| "epoch": 24.76923076923077, |
| "grad_norm": 41.91597366333008, |
| "learning_rate": 1.284e-05, |
| "loss": 0.5952, |
| "step": 322 |
| }, |
| { |
| "epoch": 24.846153846153847, |
| "grad_norm": 23.071367263793945, |
| "learning_rate": 1.2880000000000002e-05, |
| "loss": 0.5319, |
| "step": 323 |
| }, |
| { |
| "epoch": 24.923076923076923, |
| "grad_norm": 24.764896392822266, |
| "learning_rate": 1.2920000000000002e-05, |
| "loss": 0.3762, |
| "step": 324 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 57.47997283935547, |
| "learning_rate": 1.2960000000000001e-05, |
| "loss": 1.0161, |
| "step": 325 |
| }, |
| { |
| "epoch": 25.076923076923077, |
| "grad_norm": 89.52471923828125, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 0.61, |
| "step": 326 |
| }, |
| { |
| "epoch": 25.153846153846153, |
| "grad_norm": 70.85286712646484, |
| "learning_rate": 1.3040000000000002e-05, |
| "loss": 0.6296, |
| "step": 327 |
| }, |
| { |
| "epoch": 25.23076923076923, |
| "grad_norm": 29.83013343811035, |
| "learning_rate": 1.3080000000000002e-05, |
| "loss": 0.4853, |
| "step": 328 |
| }, |
| { |
| "epoch": 25.307692307692307, |
| "grad_norm": 42.2051887512207, |
| "learning_rate": 1.3120000000000001e-05, |
| "loss": 0.3599, |
| "step": 329 |
| }, |
| { |
| "epoch": 25.384615384615383, |
| "grad_norm": 22.57636260986328, |
| "learning_rate": 1.3160000000000001e-05, |
| "loss": 0.1958, |
| "step": 330 |
| }, |
| { |
| "epoch": 25.46153846153846, |
| "grad_norm": 31.4184513092041, |
| "learning_rate": 1.3200000000000002e-05, |
| "loss": 0.4977, |
| "step": 331 |
| }, |
| { |
| "epoch": 25.53846153846154, |
| "grad_norm": 65.11288452148438, |
| "learning_rate": 1.3240000000000002e-05, |
| "loss": 0.8784, |
| "step": 332 |
| }, |
| { |
| "epoch": 25.615384615384617, |
| "grad_norm": 63.44681930541992, |
| "learning_rate": 1.3280000000000002e-05, |
| "loss": 0.8258, |
| "step": 333 |
| }, |
| { |
| "epoch": 25.692307692307693, |
| "grad_norm": 61.35824203491211, |
| "learning_rate": 1.3320000000000001e-05, |
| "loss": 0.6076, |
| "step": 334 |
| }, |
| { |
| "epoch": 25.76923076923077, |
| "grad_norm": 42.7951774597168, |
| "learning_rate": 1.3360000000000003e-05, |
| "loss": 0.64, |
| "step": 335 |
| }, |
| { |
| "epoch": 25.846153846153847, |
| "grad_norm": 53.61860656738281, |
| "learning_rate": 1.3400000000000002e-05, |
| "loss": 0.3146, |
| "step": 336 |
| }, |
| { |
| "epoch": 25.923076923076923, |
| "grad_norm": 120.34032440185547, |
| "learning_rate": 1.3440000000000002e-05, |
| "loss": 1.4029, |
| "step": 337 |
| }, |
| { |
| "epoch": 26.0, |
| "grad_norm": 54.58380126953125, |
| "learning_rate": 1.3480000000000001e-05, |
| "loss": 0.4976, |
| "step": 338 |
| }, |
| { |
| "epoch": 26.076923076923077, |
| "grad_norm": 35.85188293457031, |
| "learning_rate": 1.3520000000000003e-05, |
| "loss": 0.2843, |
| "step": 339 |
| }, |
| { |
| "epoch": 26.153846153846153, |
| "grad_norm": 43.697750091552734, |
| "learning_rate": 1.3560000000000002e-05, |
| "loss": 0.3621, |
| "step": 340 |
| }, |
| { |
| "epoch": 26.23076923076923, |
| "grad_norm": 76.05105590820312, |
| "learning_rate": 1.3600000000000002e-05, |
| "loss": 0.4223, |
| "step": 341 |
| }, |
| { |
| "epoch": 26.307692307692307, |
| "grad_norm": 118.16177368164062, |
| "learning_rate": 1.3640000000000002e-05, |
| "loss": 0.8052, |
| "step": 342 |
| }, |
| { |
| "epoch": 26.384615384615383, |
| "grad_norm": 113.29070281982422, |
| "learning_rate": 1.3680000000000003e-05, |
| "loss": 0.8527, |
| "step": 343 |
| }, |
| { |
| "epoch": 26.46153846153846, |
| "grad_norm": 65.74857330322266, |
| "learning_rate": 1.3720000000000002e-05, |
| "loss": 0.4324, |
| "step": 344 |
| }, |
| { |
| "epoch": 26.53846153846154, |
| "grad_norm": 76.03609466552734, |
| "learning_rate": 1.376e-05, |
| "loss": 0.6704, |
| "step": 345 |
| }, |
| { |
| "epoch": 26.615384615384617, |
| "grad_norm": 84.36861419677734, |
| "learning_rate": 1.38e-05, |
| "loss": 0.6508, |
| "step": 346 |
| }, |
| { |
| "epoch": 26.692307692307693, |
| "grad_norm": 73.54359436035156, |
| "learning_rate": 1.384e-05, |
| "loss": 0.9164, |
| "step": 347 |
| }, |
| { |
| "epoch": 26.76923076923077, |
| "grad_norm": 80.98385620117188, |
| "learning_rate": 1.3880000000000001e-05, |
| "loss": 0.6096, |
| "step": 348 |
| }, |
| { |
| "epoch": 26.846153846153847, |
| "grad_norm": 54.7966194152832, |
| "learning_rate": 1.392e-05, |
| "loss": 0.6865, |
| "step": 349 |
| }, |
| { |
| "epoch": 26.923076923076923, |
| "grad_norm": 36.034706115722656, |
| "learning_rate": 1.396e-05, |
| "loss": 0.5209, |
| "step": 350 |
| }, |
| { |
| "epoch": 27.0, |
| "grad_norm": 58.19102478027344, |
| "learning_rate": 1.4e-05, |
| "loss": 0.3163, |
| "step": 351 |
| }, |
| { |
| "epoch": 27.076923076923077, |
| "grad_norm": 22.627710342407227, |
| "learning_rate": 1.4040000000000001e-05, |
| "loss": 0.3443, |
| "step": 352 |
| }, |
| { |
| "epoch": 27.153846153846153, |
| "grad_norm": 37.32178497314453, |
| "learning_rate": 1.408e-05, |
| "loss": 1.0384, |
| "step": 353 |
| }, |
| { |
| "epoch": 27.23076923076923, |
| "grad_norm": 38.546485900878906, |
| "learning_rate": 1.412e-05, |
| "loss": 0.3171, |
| "step": 354 |
| }, |
| { |
| "epoch": 27.307692307692307, |
| "grad_norm": 22.176666259765625, |
| "learning_rate": 1.416e-05, |
| "loss": 0.5926, |
| "step": 355 |
| }, |
| { |
| "epoch": 27.384615384615383, |
| "grad_norm": 23.193613052368164, |
| "learning_rate": 1.4200000000000001e-05, |
| "loss": 0.5167, |
| "step": 356 |
| }, |
| { |
| "epoch": 27.46153846153846, |
| "grad_norm": 23.118364334106445, |
| "learning_rate": 1.4240000000000001e-05, |
| "loss": 0.2792, |
| "step": 357 |
| }, |
| { |
| "epoch": 27.53846153846154, |
| "grad_norm": 14.209169387817383, |
| "learning_rate": 1.428e-05, |
| "loss": 0.4094, |
| "step": 358 |
| }, |
| { |
| "epoch": 27.615384615384617, |
| "grad_norm": 43.388553619384766, |
| "learning_rate": 1.432e-05, |
| "loss": 0.1616, |
| "step": 359 |
| }, |
| { |
| "epoch": 27.692307692307693, |
| "grad_norm": 37.662174224853516, |
| "learning_rate": 1.4360000000000001e-05, |
| "loss": 0.5995, |
| "step": 360 |
| }, |
| { |
| "epoch": 27.76923076923077, |
| "grad_norm": 53.796566009521484, |
| "learning_rate": 1.4400000000000001e-05, |
| "loss": 0.7795, |
| "step": 361 |
| }, |
| { |
| "epoch": 27.846153846153847, |
| "grad_norm": 25.604795455932617, |
| "learning_rate": 1.444e-05, |
| "loss": 0.2185, |
| "step": 362 |
| }, |
| { |
| "epoch": 27.923076923076923, |
| "grad_norm": 27.013303756713867, |
| "learning_rate": 1.448e-05, |
| "loss": 0.1652, |
| "step": 363 |
| }, |
| { |
| "epoch": 28.0, |
| "grad_norm": 44.7171516418457, |
| "learning_rate": 1.4520000000000002e-05, |
| "loss": 0.987, |
| "step": 364 |
| }, |
| { |
| "epoch": 28.076923076923077, |
| "grad_norm": 41.05904006958008, |
| "learning_rate": 1.4560000000000001e-05, |
| "loss": 0.4601, |
| "step": 365 |
| }, |
| { |
| "epoch": 28.153846153846153, |
| "grad_norm": 45.71525955200195, |
| "learning_rate": 1.46e-05, |
| "loss": 0.3749, |
| "step": 366 |
| }, |
| { |
| "epoch": 28.23076923076923, |
| "grad_norm": 24.003860473632812, |
| "learning_rate": 1.464e-05, |
| "loss": 0.4918, |
| "step": 367 |
| }, |
| { |
| "epoch": 28.307692307692307, |
| "grad_norm": 55.62363815307617, |
| "learning_rate": 1.4680000000000002e-05, |
| "loss": 0.7905, |
| "step": 368 |
| }, |
| { |
| "epoch": 28.384615384615383, |
| "grad_norm": 40.681175231933594, |
| "learning_rate": 1.4720000000000001e-05, |
| "loss": 0.4372, |
| "step": 369 |
| }, |
| { |
| "epoch": 28.46153846153846, |
| "grad_norm": 23.041379928588867, |
| "learning_rate": 1.4760000000000001e-05, |
| "loss": 0.3825, |
| "step": 370 |
| }, |
| { |
| "epoch": 28.53846153846154, |
| "grad_norm": 48.101505279541016, |
| "learning_rate": 1.48e-05, |
| "loss": 0.2319, |
| "step": 371 |
| }, |
| { |
| "epoch": 28.615384615384617, |
| "grad_norm": 36.70085525512695, |
| "learning_rate": 1.4840000000000002e-05, |
| "loss": 0.5991, |
| "step": 372 |
| }, |
| { |
| "epoch": 28.692307692307693, |
| "grad_norm": 37.7666130065918, |
| "learning_rate": 1.4880000000000002e-05, |
| "loss": 0.2135, |
| "step": 373 |
| }, |
| { |
| "epoch": 28.76923076923077, |
| "grad_norm": 25.588153839111328, |
| "learning_rate": 1.4920000000000001e-05, |
| "loss": 0.1974, |
| "step": 374 |
| }, |
| { |
| "epoch": 28.846153846153847, |
| "grad_norm": 11.466187477111816, |
| "learning_rate": 1.496e-05, |
| "loss": 0.1845, |
| "step": 375 |
| }, |
| { |
| "epoch": 28.923076923076923, |
| "grad_norm": 41.00111770629883, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 0.3138, |
| "step": 376 |
| }, |
| { |
| "epoch": 29.0, |
| "grad_norm": 50.57583999633789, |
| "learning_rate": 1.5040000000000002e-05, |
| "loss": 0.4909, |
| "step": 377 |
| }, |
| { |
| "epoch": 29.076923076923077, |
| "grad_norm": 45.61771774291992, |
| "learning_rate": 1.5080000000000001e-05, |
| "loss": 0.8464, |
| "step": 378 |
| }, |
| { |
| "epoch": 29.153846153846153, |
| "grad_norm": 40.37639617919922, |
| "learning_rate": 1.5120000000000001e-05, |
| "loss": 0.3515, |
| "step": 379 |
| }, |
| { |
| "epoch": 29.23076923076923, |
| "grad_norm": 67.5796890258789, |
| "learning_rate": 1.516e-05, |
| "loss": 0.6139, |
| "step": 380 |
| }, |
| { |
| "epoch": 29.307692307692307, |
| "grad_norm": 51.77175521850586, |
| "learning_rate": 1.5200000000000002e-05, |
| "loss": 0.1876, |
| "step": 381 |
| }, |
| { |
| "epoch": 29.384615384615383, |
| "grad_norm": 48.26830291748047, |
| "learning_rate": 1.5240000000000001e-05, |
| "loss": 0.3167, |
| "step": 382 |
| }, |
| { |
| "epoch": 29.46153846153846, |
| "grad_norm": 47.83180618286133, |
| "learning_rate": 1.5280000000000003e-05, |
| "loss": 0.4492, |
| "step": 383 |
| }, |
| { |
| "epoch": 29.53846153846154, |
| "grad_norm": 73.85650634765625, |
| "learning_rate": 1.5320000000000002e-05, |
| "loss": 0.783, |
| "step": 384 |
| }, |
| { |
| "epoch": 29.615384615384617, |
| "grad_norm": 73.28657531738281, |
| "learning_rate": 1.5360000000000002e-05, |
| "loss": 0.9757, |
| "step": 385 |
| }, |
| { |
| "epoch": 29.692307692307693, |
| "grad_norm": 32.33823776245117, |
| "learning_rate": 1.54e-05, |
| "loss": 0.6493, |
| "step": 386 |
| }, |
| { |
| "epoch": 29.76923076923077, |
| "grad_norm": 40.99183654785156, |
| "learning_rate": 1.544e-05, |
| "loss": 0.407, |
| "step": 387 |
| }, |
| { |
| "epoch": 29.846153846153847, |
| "grad_norm": 19.310026168823242, |
| "learning_rate": 1.548e-05, |
| "loss": 0.1387, |
| "step": 388 |
| }, |
| { |
| "epoch": 29.923076923076923, |
| "grad_norm": 86.82865142822266, |
| "learning_rate": 1.552e-05, |
| "loss": 0.9859, |
| "step": 389 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 66.46072387695312, |
| "learning_rate": 1.556e-05, |
| "loss": 0.5921, |
| "step": 390 |
| }, |
| { |
| "epoch": 30.076923076923077, |
| "grad_norm": 47.81773376464844, |
| "learning_rate": 1.5600000000000003e-05, |
| "loss": 0.3752, |
| "step": 391 |
| }, |
| { |
| "epoch": 30.153846153846153, |
| "grad_norm": 76.41130828857422, |
| "learning_rate": 1.5640000000000003e-05, |
| "loss": 0.4966, |
| "step": 392 |
| }, |
| { |
| "epoch": 30.23076923076923, |
| "grad_norm": 67.10013580322266, |
| "learning_rate": 1.5680000000000002e-05, |
| "loss": 0.3609, |
| "step": 393 |
| }, |
| { |
| "epoch": 30.307692307692307, |
| "grad_norm": 38.465450286865234, |
| "learning_rate": 1.5720000000000002e-05, |
| "loss": 0.6302, |
| "step": 394 |
| }, |
| { |
| "epoch": 30.384615384615383, |
| "grad_norm": 37.98847579956055, |
| "learning_rate": 1.576e-05, |
| "loss": 0.3228, |
| "step": 395 |
| }, |
| { |
| "epoch": 30.46153846153846, |
| "grad_norm": 83.65345001220703, |
| "learning_rate": 1.58e-05, |
| "loss": 0.6936, |
| "step": 396 |
| }, |
| { |
| "epoch": 30.53846153846154, |
| "grad_norm": 81.71379089355469, |
| "learning_rate": 1.584e-05, |
| "loss": 0.5248, |
| "step": 397 |
| }, |
| { |
| "epoch": 30.615384615384617, |
| "grad_norm": 46.50620651245117, |
| "learning_rate": 1.588e-05, |
| "loss": 0.6357, |
| "step": 398 |
| }, |
| { |
| "epoch": 30.692307692307693, |
| "grad_norm": 68.04540252685547, |
| "learning_rate": 1.5920000000000003e-05, |
| "loss": 0.429, |
| "step": 399 |
| }, |
| { |
| "epoch": 30.76923076923077, |
| "grad_norm": 55.85519790649414, |
| "learning_rate": 1.5960000000000003e-05, |
| "loss": 0.4192, |
| "step": 400 |
| }, |
| { |
| "epoch": 30.846153846153847, |
| "grad_norm": 71.962890625, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.4128, |
| "step": 401 |
| }, |
| { |
| "epoch": 30.923076923076923, |
| "grad_norm": 77.69264221191406, |
| "learning_rate": 1.6040000000000002e-05, |
| "loss": 1.0509, |
| "step": 402 |
| }, |
| { |
| "epoch": 31.0, |
| "grad_norm": 36.69390106201172, |
| "learning_rate": 1.6080000000000002e-05, |
| "loss": 0.6102, |
| "step": 403 |
| }, |
| { |
| "epoch": 31.076923076923077, |
| "grad_norm": 59.204612731933594, |
| "learning_rate": 1.612e-05, |
| "loss": 0.1778, |
| "step": 404 |
| }, |
| { |
| "epoch": 31.153846153846153, |
| "grad_norm": 65.2522964477539, |
| "learning_rate": 1.616e-05, |
| "loss": 0.6148, |
| "step": 405 |
| }, |
| { |
| "epoch": 31.23076923076923, |
| "grad_norm": 14.981352806091309, |
| "learning_rate": 1.62e-05, |
| "loss": 0.1487, |
| "step": 406 |
| }, |
| { |
| "epoch": 31.307692307692307, |
| "grad_norm": 47.70904541015625, |
| "learning_rate": 1.6240000000000004e-05, |
| "loss": 0.5439, |
| "step": 407 |
| }, |
| { |
| "epoch": 31.384615384615383, |
| "grad_norm": 77.24309539794922, |
| "learning_rate": 1.628e-05, |
| "loss": 0.8684, |
| "step": 408 |
| }, |
| { |
| "epoch": 31.46153846153846, |
| "grad_norm": 73.8941879272461, |
| "learning_rate": 1.632e-05, |
| "loss": 0.4744, |
| "step": 409 |
| }, |
| { |
| "epoch": 31.53846153846154, |
| "grad_norm": 59.932586669921875, |
| "learning_rate": 1.636e-05, |
| "loss": 0.8794, |
| "step": 410 |
| }, |
| { |
| "epoch": 31.615384615384617, |
| "grad_norm": 21.144742965698242, |
| "learning_rate": 1.64e-05, |
| "loss": 0.2288, |
| "step": 411 |
| }, |
| { |
| "epoch": 31.692307692307693, |
| "grad_norm": 45.16164016723633, |
| "learning_rate": 1.6440000000000002e-05, |
| "loss": 0.3846, |
| "step": 412 |
| }, |
| { |
| "epoch": 31.76923076923077, |
| "grad_norm": 63.43020248413086, |
| "learning_rate": 1.648e-05, |
| "loss": 0.607, |
| "step": 413 |
| }, |
| { |
| "epoch": 31.846153846153847, |
| "grad_norm": 74.06291961669922, |
| "learning_rate": 1.652e-05, |
| "loss": 0.6584, |
| "step": 414 |
| }, |
| { |
| "epoch": 31.923076923076923, |
| "grad_norm": 76.64544677734375, |
| "learning_rate": 1.656e-05, |
| "loss": 0.9753, |
| "step": 415 |
| }, |
| { |
| "epoch": 32.0, |
| "grad_norm": 66.81476593017578, |
| "learning_rate": 1.66e-05, |
| "loss": 0.3477, |
| "step": 416 |
| }, |
| { |
| "epoch": 32.07692307692308, |
| "grad_norm": 58.571075439453125, |
| "learning_rate": 1.664e-05, |
| "loss": 0.3969, |
| "step": 417 |
| }, |
| { |
| "epoch": 32.15384615384615, |
| "grad_norm": 50.05775451660156, |
| "learning_rate": 1.668e-05, |
| "loss": 0.5592, |
| "step": 418 |
| }, |
| { |
| "epoch": 32.23076923076923, |
| "grad_norm": 69.51533508300781, |
| "learning_rate": 1.672e-05, |
| "loss": 0.3837, |
| "step": 419 |
| }, |
| { |
| "epoch": 32.30769230769231, |
| "grad_norm": 64.64315032958984, |
| "learning_rate": 1.6760000000000002e-05, |
| "loss": 0.7959, |
| "step": 420 |
| }, |
| { |
| "epoch": 32.38461538461539, |
| "grad_norm": 51.027652740478516, |
| "learning_rate": 1.6800000000000002e-05, |
| "loss": 0.8768, |
| "step": 421 |
| }, |
| { |
| "epoch": 32.46153846153846, |
| "grad_norm": 35.286190032958984, |
| "learning_rate": 1.684e-05, |
| "loss": 0.6673, |
| "step": 422 |
| }, |
| { |
| "epoch": 32.53846153846154, |
| "grad_norm": 38.671775817871094, |
| "learning_rate": 1.688e-05, |
| "loss": 0.6072, |
| "step": 423 |
| }, |
| { |
| "epoch": 32.61538461538461, |
| "grad_norm": 63.433650970458984, |
| "learning_rate": 1.692e-05, |
| "loss": 1.3278, |
| "step": 424 |
| }, |
| { |
| "epoch": 32.69230769230769, |
| "grad_norm": 68.43083190917969, |
| "learning_rate": 1.696e-05, |
| "loss": 0.3996, |
| "step": 425 |
| }, |
| { |
| "epoch": 32.76923076923077, |
| "grad_norm": 34.67466354370117, |
| "learning_rate": 1.7e-05, |
| "loss": 0.2814, |
| "step": 426 |
| }, |
| { |
| "epoch": 32.84615384615385, |
| "grad_norm": 16.92761993408203, |
| "learning_rate": 1.704e-05, |
| "loss": 0.1775, |
| "step": 427 |
| }, |
| { |
| "epoch": 32.92307692307692, |
| "grad_norm": 44.911170959472656, |
| "learning_rate": 1.7080000000000002e-05, |
| "loss": 0.7652, |
| "step": 428 |
| }, |
| { |
| "epoch": 33.0, |
| "grad_norm": 33.2980842590332, |
| "learning_rate": 1.7120000000000002e-05, |
| "loss": 0.3709, |
| "step": 429 |
| }, |
| { |
| "epoch": 33.07692307692308, |
| "grad_norm": 43.332359313964844, |
| "learning_rate": 1.7160000000000002e-05, |
| "loss": 0.8261, |
| "step": 430 |
| }, |
| { |
| "epoch": 33.15384615384615, |
| "grad_norm": 47.77151107788086, |
| "learning_rate": 1.72e-05, |
| "loss": 0.4492, |
| "step": 431 |
| }, |
| { |
| "epoch": 33.23076923076923, |
| "grad_norm": 77.03539276123047, |
| "learning_rate": 1.724e-05, |
| "loss": 0.9605, |
| "step": 432 |
| }, |
| { |
| "epoch": 33.30769230769231, |
| "grad_norm": 36.878074645996094, |
| "learning_rate": 1.728e-05, |
| "loss": 0.5138, |
| "step": 433 |
| }, |
| { |
| "epoch": 33.38461538461539, |
| "grad_norm": 78.08937072753906, |
| "learning_rate": 1.732e-05, |
| "loss": 0.7583, |
| "step": 434 |
| }, |
| { |
| "epoch": 33.46153846153846, |
| "grad_norm": 18.130016326904297, |
| "learning_rate": 1.736e-05, |
| "loss": 0.5514, |
| "step": 435 |
| }, |
| { |
| "epoch": 33.53846153846154, |
| "grad_norm": 53.54596710205078, |
| "learning_rate": 1.7400000000000003e-05, |
| "loss": 0.588, |
| "step": 436 |
| }, |
| { |
| "epoch": 33.61538461538461, |
| "grad_norm": 46.70728302001953, |
| "learning_rate": 1.7440000000000002e-05, |
| "loss": 0.3447, |
| "step": 437 |
| }, |
| { |
| "epoch": 33.69230769230769, |
| "grad_norm": 20.540084838867188, |
| "learning_rate": 1.7480000000000002e-05, |
| "loss": 0.1428, |
| "step": 438 |
| }, |
| { |
| "epoch": 33.76923076923077, |
| "grad_norm": 48.03786849975586, |
| "learning_rate": 1.752e-05, |
| "loss": 0.413, |
| "step": 439 |
| }, |
| { |
| "epoch": 33.84615384615385, |
| "grad_norm": 75.21665954589844, |
| "learning_rate": 1.756e-05, |
| "loss": 0.2307, |
| "step": 440 |
| }, |
| { |
| "epoch": 33.92307692307692, |
| "grad_norm": 38.96320343017578, |
| "learning_rate": 1.76e-05, |
| "loss": 0.5157, |
| "step": 441 |
| }, |
| { |
| "epoch": 34.0, |
| "grad_norm": 62.66585922241211, |
| "learning_rate": 1.764e-05, |
| "loss": 0.7783, |
| "step": 442 |
| }, |
| { |
| "epoch": 34.07692307692308, |
| "grad_norm": 68.1346206665039, |
| "learning_rate": 1.768e-05, |
| "loss": 0.7453, |
| "step": 443 |
| }, |
| { |
| "epoch": 34.15384615384615, |
| "grad_norm": 64.73094177246094, |
| "learning_rate": 1.7720000000000003e-05, |
| "loss": 0.4063, |
| "step": 444 |
| }, |
| { |
| "epoch": 34.23076923076923, |
| "grad_norm": 45.85354995727539, |
| "learning_rate": 1.7760000000000003e-05, |
| "loss": 0.6128, |
| "step": 445 |
| }, |
| { |
| "epoch": 34.30769230769231, |
| "grad_norm": 60.594276428222656, |
| "learning_rate": 1.7800000000000002e-05, |
| "loss": 0.4022, |
| "step": 446 |
| }, |
| { |
| "epoch": 34.38461538461539, |
| "grad_norm": 65.88479614257812, |
| "learning_rate": 1.7840000000000002e-05, |
| "loss": 0.5737, |
| "step": 447 |
| }, |
| { |
| "epoch": 34.46153846153846, |
| "grad_norm": 47.70244216918945, |
| "learning_rate": 1.788e-05, |
| "loss": 0.3676, |
| "step": 448 |
| }, |
| { |
| "epoch": 34.53846153846154, |
| "grad_norm": 34.25014114379883, |
| "learning_rate": 1.792e-05, |
| "loss": 0.5729, |
| "step": 449 |
| }, |
| { |
| "epoch": 34.61538461538461, |
| "grad_norm": 59.29137420654297, |
| "learning_rate": 1.796e-05, |
| "loss": 0.3314, |
| "step": 450 |
| }, |
| { |
| "epoch": 34.69230769230769, |
| "grad_norm": 59.163780212402344, |
| "learning_rate": 1.8e-05, |
| "loss": 0.3097, |
| "step": 451 |
| }, |
| { |
| "epoch": 34.76923076923077, |
| "grad_norm": 46.27375793457031, |
| "learning_rate": 1.8040000000000003e-05, |
| "loss": 0.5513, |
| "step": 452 |
| }, |
| { |
| "epoch": 34.84615384615385, |
| "grad_norm": 39.06897735595703, |
| "learning_rate": 1.8080000000000003e-05, |
| "loss": 0.4424, |
| "step": 453 |
| }, |
| { |
| "epoch": 34.92307692307692, |
| "grad_norm": 48.06359100341797, |
| "learning_rate": 1.8120000000000003e-05, |
| "loss": 0.4674, |
| "step": 454 |
| }, |
| { |
| "epoch": 35.0, |
| "grad_norm": 72.48213195800781, |
| "learning_rate": 1.8160000000000002e-05, |
| "loss": 0.6007, |
| "step": 455 |
| }, |
| { |
| "epoch": 35.07692307692308, |
| "grad_norm": 44.720558166503906, |
| "learning_rate": 1.8200000000000002e-05, |
| "loss": 0.1797, |
| "step": 456 |
| }, |
| { |
| "epoch": 35.15384615384615, |
| "grad_norm": 14.797567367553711, |
| "learning_rate": 1.824e-05, |
| "loss": 0.1474, |
| "step": 457 |
| }, |
| { |
| "epoch": 35.23076923076923, |
| "grad_norm": 30.398681640625, |
| "learning_rate": 1.828e-05, |
| "loss": 0.4062, |
| "step": 458 |
| }, |
| { |
| "epoch": 35.30769230769231, |
| "grad_norm": 44.051231384277344, |
| "learning_rate": 1.832e-05, |
| "loss": 0.6224, |
| "step": 459 |
| }, |
| { |
| "epoch": 35.38461538461539, |
| "grad_norm": 24.96405601501465, |
| "learning_rate": 1.8360000000000004e-05, |
| "loss": 0.6572, |
| "step": 460 |
| }, |
| { |
| "epoch": 35.46153846153846, |
| "grad_norm": 51.63269805908203, |
| "learning_rate": 1.8400000000000003e-05, |
| "loss": 0.8461, |
| "step": 461 |
| }, |
| { |
| "epoch": 35.53846153846154, |
| "grad_norm": 49.89200973510742, |
| "learning_rate": 1.8440000000000003e-05, |
| "loss": 0.7514, |
| "step": 462 |
| }, |
| { |
| "epoch": 35.61538461538461, |
| "grad_norm": 41.851051330566406, |
| "learning_rate": 1.8480000000000003e-05, |
| "loss": 0.544, |
| "step": 463 |
| }, |
| { |
| "epoch": 35.69230769230769, |
| "grad_norm": 22.07673454284668, |
| "learning_rate": 1.8520000000000002e-05, |
| "loss": 0.6125, |
| "step": 464 |
| }, |
| { |
| "epoch": 35.76923076923077, |
| "grad_norm": 93.32474517822266, |
| "learning_rate": 1.8560000000000002e-05, |
| "loss": 0.5891, |
| "step": 465 |
| }, |
| { |
| "epoch": 35.84615384615385, |
| "grad_norm": 95.31482696533203, |
| "learning_rate": 1.86e-05, |
| "loss": 0.8448, |
| "step": 466 |
| }, |
| { |
| "epoch": 35.92307692307692, |
| "grad_norm": 121.12299346923828, |
| "learning_rate": 1.864e-05, |
| "loss": 1.0558, |
| "step": 467 |
| }, |
| { |
| "epoch": 36.0, |
| "grad_norm": 64.19267272949219, |
| "learning_rate": 1.8680000000000004e-05, |
| "loss": 0.4891, |
| "step": 468 |
| }, |
| { |
| "epoch": 36.07692307692308, |
| "grad_norm": 50.5765495300293, |
| "learning_rate": 1.8720000000000004e-05, |
| "loss": 0.5591, |
| "step": 469 |
| }, |
| { |
| "epoch": 36.15384615384615, |
| "grad_norm": 37.23386764526367, |
| "learning_rate": 1.876e-05, |
| "loss": 0.4727, |
| "step": 470 |
| }, |
| { |
| "epoch": 36.23076923076923, |
| "grad_norm": 44.87166213989258, |
| "learning_rate": 1.88e-05, |
| "loss": 0.4375, |
| "step": 471 |
| }, |
| { |
| "epoch": 36.30769230769231, |
| "grad_norm": 31.29073715209961, |
| "learning_rate": 1.884e-05, |
| "loss": 0.5385, |
| "step": 472 |
| }, |
| { |
| "epoch": 36.38461538461539, |
| "grad_norm": 32.528541564941406, |
| "learning_rate": 1.8880000000000002e-05, |
| "loss": 0.6831, |
| "step": 473 |
| }, |
| { |
| "epoch": 36.46153846153846, |
| "grad_norm": 43.69649887084961, |
| "learning_rate": 1.8920000000000002e-05, |
| "loss": 0.4986, |
| "step": 474 |
| }, |
| { |
| "epoch": 36.53846153846154, |
| "grad_norm": 22.515159606933594, |
| "learning_rate": 1.896e-05, |
| "loss": 0.298, |
| "step": 475 |
| }, |
| { |
| "epoch": 36.61538461538461, |
| "grad_norm": 59.822330474853516, |
| "learning_rate": 1.9e-05, |
| "loss": 0.2583, |
| "step": 476 |
| }, |
| { |
| "epoch": 36.69230769230769, |
| "grad_norm": 65.19972229003906, |
| "learning_rate": 1.904e-05, |
| "loss": 0.5001, |
| "step": 477 |
| }, |
| { |
| "epoch": 36.76923076923077, |
| "grad_norm": 50.5339469909668, |
| "learning_rate": 1.908e-05, |
| "loss": 0.5146, |
| "step": 478 |
| }, |
| { |
| "epoch": 36.84615384615385, |
| "grad_norm": 46.214683532714844, |
| "learning_rate": 1.912e-05, |
| "loss": 0.4223, |
| "step": 479 |
| }, |
| { |
| "epoch": 36.92307692307692, |
| "grad_norm": 38.803321838378906, |
| "learning_rate": 1.916e-05, |
| "loss": 0.7122, |
| "step": 480 |
| }, |
| { |
| "epoch": 37.0, |
| "grad_norm": 27.04830551147461, |
| "learning_rate": 1.9200000000000003e-05, |
| "loss": 0.1822, |
| "step": 481 |
| }, |
| { |
| "epoch": 37.07692307692308, |
| "grad_norm": 21.252769470214844, |
| "learning_rate": 1.9240000000000002e-05, |
| "loss": 0.2326, |
| "step": 482 |
| }, |
| { |
| "epoch": 37.15384615384615, |
| "grad_norm": 34.464412689208984, |
| "learning_rate": 1.9280000000000002e-05, |
| "loss": 0.2239, |
| "step": 483 |
| }, |
| { |
| "epoch": 37.23076923076923, |
| "grad_norm": 41.89882278442383, |
| "learning_rate": 1.932e-05, |
| "loss": 0.3468, |
| "step": 484 |
| }, |
| { |
| "epoch": 37.30769230769231, |
| "grad_norm": 25.541357040405273, |
| "learning_rate": 1.936e-05, |
| "loss": 0.405, |
| "step": 485 |
| }, |
| { |
| "epoch": 37.38461538461539, |
| "grad_norm": 48.3160400390625, |
| "learning_rate": 1.94e-05, |
| "loss": 0.2665, |
| "step": 486 |
| }, |
| { |
| "epoch": 37.46153846153846, |
| "grad_norm": 26.55426597595215, |
| "learning_rate": 1.944e-05, |
| "loss": 0.1677, |
| "step": 487 |
| }, |
| { |
| "epoch": 37.53846153846154, |
| "grad_norm": 71.87734985351562, |
| "learning_rate": 1.948e-05, |
| "loss": 1.3239, |
| "step": 488 |
| }, |
| { |
| "epoch": 37.61538461538461, |
| "grad_norm": 53.14076232910156, |
| "learning_rate": 1.9520000000000003e-05, |
| "loss": 0.5742, |
| "step": 489 |
| }, |
| { |
| "epoch": 37.69230769230769, |
| "grad_norm": 55.616424560546875, |
| "learning_rate": 1.9560000000000002e-05, |
| "loss": 0.5266, |
| "step": 490 |
| }, |
| { |
| "epoch": 37.76923076923077, |
| "grad_norm": 32.749019622802734, |
| "learning_rate": 1.9600000000000002e-05, |
| "loss": 0.2234, |
| "step": 491 |
| }, |
| { |
| "epoch": 37.84615384615385, |
| "grad_norm": 30.874309539794922, |
| "learning_rate": 1.9640000000000002e-05, |
| "loss": 0.2585, |
| "step": 492 |
| }, |
| { |
| "epoch": 37.92307692307692, |
| "grad_norm": 17.95946502685547, |
| "learning_rate": 1.968e-05, |
| "loss": 0.1004, |
| "step": 493 |
| }, |
| { |
| "epoch": 38.0, |
| "grad_norm": 76.27622985839844, |
| "learning_rate": 1.972e-05, |
| "loss": 0.4079, |
| "step": 494 |
| }, |
| { |
| "epoch": 38.07692307692308, |
| "grad_norm": 79.43333435058594, |
| "learning_rate": 1.976e-05, |
| "loss": 0.8453, |
| "step": 495 |
| }, |
| { |
| "epoch": 38.15384615384615, |
| "grad_norm": 60.751834869384766, |
| "learning_rate": 1.98e-05, |
| "loss": 0.9003, |
| "step": 496 |
| }, |
| { |
| "epoch": 38.23076923076923, |
| "grad_norm": 49.671142578125, |
| "learning_rate": 1.9840000000000003e-05, |
| "loss": 0.172, |
| "step": 497 |
| }, |
| { |
| "epoch": 38.30769230769231, |
| "grad_norm": 32.0305061340332, |
| "learning_rate": 1.9880000000000003e-05, |
| "loss": 0.3262, |
| "step": 498 |
| }, |
| { |
| "epoch": 38.38461538461539, |
| "grad_norm": 46.92594909667969, |
| "learning_rate": 1.9920000000000002e-05, |
| "loss": 0.6245, |
| "step": 499 |
| }, |
| { |
| "epoch": 38.46153846153846, |
| "grad_norm": 43.71823501586914, |
| "learning_rate": 1.9960000000000002e-05, |
| "loss": 0.4124, |
| "step": 500 |
| }, |
| { |
| "epoch": 38.53846153846154, |
| "grad_norm": 19.13474464416504, |
| "learning_rate": 2e-05, |
| "loss": 0.2149, |
| "step": 501 |
| }, |
| { |
| "epoch": 38.61538461538461, |
| "grad_norm": 38.319427490234375, |
| "learning_rate": 1.9999997563060744e-05, |
| "loss": 0.235, |
| "step": 502 |
| }, |
| { |
| "epoch": 38.69230769230769, |
| "grad_norm": 24.14805793762207, |
| "learning_rate": 1.9999990252244153e-05, |
| "loss": 0.2052, |
| "step": 503 |
| }, |
| { |
| "epoch": 38.76923076923077, |
| "grad_norm": 22.26850128173828, |
| "learning_rate": 1.9999978067553796e-05, |
| "loss": 0.1105, |
| "step": 504 |
| }, |
| { |
| "epoch": 38.84615384615385, |
| "grad_norm": 43.19673156738281, |
| "learning_rate": 1.9999961008995607e-05, |
| "loss": 0.7993, |
| "step": 505 |
| }, |
| { |
| "epoch": 38.92307692307692, |
| "grad_norm": 36.335838317871094, |
| "learning_rate": 1.9999939076577906e-05, |
| "loss": 0.1197, |
| "step": 506 |
| }, |
| { |
| "epoch": 39.0, |
| "grad_norm": 63.675071716308594, |
| "learning_rate": 1.9999912270311376e-05, |
| "loss": 0.767, |
| "step": 507 |
| }, |
| { |
| "epoch": 39.07692307692308, |
| "grad_norm": 24.863452911376953, |
| "learning_rate": 1.999988059020909e-05, |
| "loss": 0.3484, |
| "step": 508 |
| }, |
| { |
| "epoch": 39.15384615384615, |
| "grad_norm": 23.017812728881836, |
| "learning_rate": 1.9999844036286483e-05, |
| "loss": 0.4688, |
| "step": 509 |
| }, |
| { |
| "epoch": 39.23076923076923, |
| "grad_norm": 60.56824493408203, |
| "learning_rate": 1.999980260856137e-05, |
| "loss": 0.471, |
| "step": 510 |
| }, |
| { |
| "epoch": 39.30769230769231, |
| "grad_norm": 45.144256591796875, |
| "learning_rate": 1.9999756307053947e-05, |
| "loss": 0.5551, |
| "step": 511 |
| }, |
| { |
| "epoch": 39.38461538461539, |
| "grad_norm": 18.20176887512207, |
| "learning_rate": 1.999970513178678e-05, |
| "loss": 0.6448, |
| "step": 512 |
| }, |
| { |
| "epoch": 39.46153846153846, |
| "grad_norm": 25.116472244262695, |
| "learning_rate": 1.9999649082784807e-05, |
| "loss": 0.3543, |
| "step": 513 |
| }, |
| { |
| "epoch": 39.53846153846154, |
| "grad_norm": 56.92240524291992, |
| "learning_rate": 1.999958816007535e-05, |
| "loss": 0.5884, |
| "step": 514 |
| }, |
| { |
| "epoch": 39.61538461538461, |
| "grad_norm": 36.347930908203125, |
| "learning_rate": 1.99995223636881e-05, |
| "loss": 0.8939, |
| "step": 515 |
| }, |
| { |
| "epoch": 39.69230769230769, |
| "grad_norm": 38.58857345581055, |
| "learning_rate": 1.9999451693655125e-05, |
| "loss": 0.513, |
| "step": 516 |
| }, |
| { |
| "epoch": 39.76923076923077, |
| "grad_norm": 26.801912307739258, |
| "learning_rate": 1.9999376150010868e-05, |
| "loss": 0.1728, |
| "step": 517 |
| }, |
| { |
| "epoch": 39.84615384615385, |
| "grad_norm": 48.60188674926758, |
| "learning_rate": 1.9999295732792146e-05, |
| "loss": 0.524, |
| "step": 518 |
| }, |
| { |
| "epoch": 39.92307692307692, |
| "grad_norm": 27.04090690612793, |
| "learning_rate": 1.9999210442038164e-05, |
| "loss": 0.2746, |
| "step": 519 |
| }, |
| { |
| "epoch": 40.0, |
| "grad_norm": 67.3294906616211, |
| "learning_rate": 1.9999120277790477e-05, |
| "loss": 0.767, |
| "step": 520 |
| }, |
| { |
| "epoch": 40.07692307692308, |
| "grad_norm": 69.61468505859375, |
| "learning_rate": 1.9999025240093045e-05, |
| "loss": 0.5924, |
| "step": 521 |
| }, |
| { |
| "epoch": 40.15384615384615, |
| "grad_norm": 28.077123641967773, |
| "learning_rate": 1.9998925328992175e-05, |
| "loss": 0.254, |
| "step": 522 |
| }, |
| { |
| "epoch": 40.23076923076923, |
| "grad_norm": 108.91661071777344, |
| "learning_rate": 1.999882054453657e-05, |
| "loss": 0.6933, |
| "step": 523 |
| }, |
| { |
| "epoch": 40.30769230769231, |
| "grad_norm": 42.598304748535156, |
| "learning_rate": 1.9998710886777298e-05, |
| "loss": 0.4899, |
| "step": 524 |
| }, |
| { |
| "epoch": 40.38461538461539, |
| "grad_norm": 34.36406326293945, |
| "learning_rate": 1.9998596355767805e-05, |
| "loss": 0.5635, |
| "step": 525 |
| }, |
| { |
| "epoch": 40.46153846153846, |
| "grad_norm": 48.5196647644043, |
| "learning_rate": 1.9998476951563914e-05, |
| "loss": 0.3365, |
| "step": 526 |
| }, |
| { |
| "epoch": 40.53846153846154, |
| "grad_norm": 56.3973274230957, |
| "learning_rate": 1.9998352674223816e-05, |
| "loss": 0.8872, |
| "step": 527 |
| }, |
| { |
| "epoch": 40.61538461538461, |
| "grad_norm": 56.69743347167969, |
| "learning_rate": 1.9998223523808092e-05, |
| "loss": 0.5956, |
| "step": 528 |
| }, |
| { |
| "epoch": 40.69230769230769, |
| "grad_norm": 80.67139434814453, |
| "learning_rate": 1.999808950037968e-05, |
| "loss": 0.2614, |
| "step": 529 |
| }, |
| { |
| "epoch": 40.76923076923077, |
| "grad_norm": 52.51334762573242, |
| "learning_rate": 1.99979506040039e-05, |
| "loss": 0.5652, |
| "step": 530 |
| }, |
| { |
| "epoch": 40.84615384615385, |
| "grad_norm": 77.20426940917969, |
| "learning_rate": 1.9997806834748455e-05, |
| "loss": 0.3495, |
| "step": 531 |
| }, |
| { |
| "epoch": 40.92307692307692, |
| "grad_norm": 46.386573791503906, |
| "learning_rate": 1.9997658192683412e-05, |
| "loss": 0.4954, |
| "step": 532 |
| }, |
| { |
| "epoch": 41.0, |
| "grad_norm": 93.00538635253906, |
| "learning_rate": 1.9997504677881224e-05, |
| "loss": 0.3318, |
| "step": 533 |
| }, |
| { |
| "epoch": 41.07692307692308, |
| "grad_norm": 56.72392654418945, |
| "learning_rate": 1.9997346290416703e-05, |
| "loss": 0.2394, |
| "step": 534 |
| }, |
| { |
| "epoch": 41.15384615384615, |
| "grad_norm": 62.16911697387695, |
| "learning_rate": 1.999718303036705e-05, |
| "loss": 0.3713, |
| "step": 535 |
| }, |
| { |
| "epoch": 41.23076923076923, |
| "grad_norm": 65.15827178955078, |
| "learning_rate": 1.9997014897811834e-05, |
| "loss": 0.3201, |
| "step": 536 |
| }, |
| { |
| "epoch": 41.30769230769231, |
| "grad_norm": 100.19380187988281, |
| "learning_rate": 1.9996841892833e-05, |
| "loss": 0.8308, |
| "step": 537 |
| }, |
| { |
| "epoch": 41.38461538461539, |
| "grad_norm": 99.45419311523438, |
| "learning_rate": 1.999666401551487e-05, |
| "loss": 1.2223, |
| "step": 538 |
| }, |
| { |
| "epoch": 41.46153846153846, |
| "grad_norm": 54.75605392456055, |
| "learning_rate": 1.9996481265944146e-05, |
| "loss": 0.3295, |
| "step": 539 |
| }, |
| { |
| "epoch": 41.53846153846154, |
| "grad_norm": 72.30227661132812, |
| "learning_rate": 1.9996293644209886e-05, |
| "loss": 0.8631, |
| "step": 540 |
| }, |
| { |
| "epoch": 41.61538461538461, |
| "grad_norm": 90.02909851074219, |
| "learning_rate": 1.9996101150403543e-05, |
| "loss": 0.6333, |
| "step": 541 |
| }, |
| { |
| "epoch": 41.69230769230769, |
| "grad_norm": 91.50467681884766, |
| "learning_rate": 1.9995903784618936e-05, |
| "loss": 0.6422, |
| "step": 542 |
| }, |
| { |
| "epoch": 41.76923076923077, |
| "grad_norm": 53.51863479614258, |
| "learning_rate": 1.9995701546952252e-05, |
| "loss": 0.1777, |
| "step": 543 |
| }, |
| { |
| "epoch": 41.84615384615385, |
| "grad_norm": 46.842586517333984, |
| "learning_rate": 1.9995494437502064e-05, |
| "loss": 0.4172, |
| "step": 544 |
| }, |
| { |
| "epoch": 41.92307692307692, |
| "grad_norm": 76.82200622558594, |
| "learning_rate": 1.9995282456369313e-05, |
| "loss": 0.2708, |
| "step": 545 |
| }, |
| { |
| "epoch": 42.0, |
| "grad_norm": 50.79060363769531, |
| "learning_rate": 1.9995065603657317e-05, |
| "loss": 0.4857, |
| "step": 546 |
| }, |
| { |
| "epoch": 42.07692307692308, |
| "grad_norm": 64.46525573730469, |
| "learning_rate": 1.999484387947177e-05, |
| "loss": 0.5747, |
| "step": 547 |
| }, |
| { |
| "epoch": 42.15384615384615, |
| "grad_norm": 15.964370727539062, |
| "learning_rate": 1.999461728392073e-05, |
| "loss": 0.2815, |
| "step": 548 |
| }, |
| { |
| "epoch": 42.23076923076923, |
| "grad_norm": 28.7789306640625, |
| "learning_rate": 1.9994385817114644e-05, |
| "loss": 0.1986, |
| "step": 549 |
| }, |
| { |
| "epoch": 42.30769230769231, |
| "grad_norm": 43.08668518066406, |
| "learning_rate": 1.9994149479166324e-05, |
| "loss": 0.5432, |
| "step": 550 |
| }, |
| { |
| "epoch": 42.38461538461539, |
| "grad_norm": 14.126999855041504, |
| "learning_rate": 1.999390827019096e-05, |
| "loss": 0.1372, |
| "step": 551 |
| }, |
| { |
| "epoch": 42.46153846153846, |
| "grad_norm": 21.118301391601562, |
| "learning_rate": 1.999366219030611e-05, |
| "loss": 0.1582, |
| "step": 552 |
| }, |
| { |
| "epoch": 42.53846153846154, |
| "grad_norm": 39.95669174194336, |
| "learning_rate": 1.9993411239631713e-05, |
| "loss": 0.344, |
| "step": 553 |
| }, |
| { |
| "epoch": 42.61538461538461, |
| "grad_norm": 28.600141525268555, |
| "learning_rate": 1.999315541829008e-05, |
| "loss": 0.4129, |
| "step": 554 |
| }, |
| { |
| "epoch": 42.69230769230769, |
| "grad_norm": 47.09449005126953, |
| "learning_rate": 1.9992894726405894e-05, |
| "loss": 0.092, |
| "step": 555 |
| }, |
| { |
| "epoch": 42.76923076923077, |
| "grad_norm": 16.952911376953125, |
| "learning_rate": 1.999262916410621e-05, |
| "loss": 0.1394, |
| "step": 556 |
| }, |
| { |
| "epoch": 42.84615384615385, |
| "grad_norm": 32.12388610839844, |
| "learning_rate": 1.999235873152047e-05, |
| "loss": 0.219, |
| "step": 557 |
| }, |
| { |
| "epoch": 42.92307692307692, |
| "grad_norm": 44.476688385009766, |
| "learning_rate": 1.999208342878047e-05, |
| "loss": 0.2061, |
| "step": 558 |
| }, |
| { |
| "epoch": 43.0, |
| "grad_norm": 33.65949630737305, |
| "learning_rate": 1.9991803256020393e-05, |
| "loss": 0.0972, |
| "step": 559 |
| }, |
| { |
| "epoch": 43.07692307692308, |
| "grad_norm": 17.978668212890625, |
| "learning_rate": 1.9991518213376787e-05, |
| "loss": 0.1124, |
| "step": 560 |
| }, |
| { |
| "epoch": 43.15384615384615, |
| "grad_norm": 56.47621154785156, |
| "learning_rate": 1.9991228300988586e-05, |
| "loss": 0.604, |
| "step": 561 |
| }, |
| { |
| "epoch": 43.23076923076923, |
| "grad_norm": 45.38515853881836, |
| "learning_rate": 1.9990933518997086e-05, |
| "loss": 0.6914, |
| "step": 562 |
| }, |
| { |
| "epoch": 43.30769230769231, |
| "grad_norm": 40.5052604675293, |
| "learning_rate": 1.9990633867545956e-05, |
| "loss": 0.5163, |
| "step": 563 |
| }, |
| { |
| "epoch": 43.38461538461539, |
| "grad_norm": 30.98360252380371, |
| "learning_rate": 1.999032934678125e-05, |
| "loss": 0.4277, |
| "step": 564 |
| }, |
| { |
| "epoch": 43.46153846153846, |
| "grad_norm": 26.534019470214844, |
| "learning_rate": 1.9990019956851384e-05, |
| "loss": 0.218, |
| "step": 565 |
| }, |
| { |
| "epoch": 43.53846153846154, |
| "grad_norm": 27.41169548034668, |
| "learning_rate": 1.998970569790715e-05, |
| "loss": 0.1273, |
| "step": 566 |
| }, |
| { |
| "epoch": 43.61538461538461, |
| "grad_norm": 22.965587615966797, |
| "learning_rate": 1.9989386570101716e-05, |
| "loss": 0.1367, |
| "step": 567 |
| }, |
| { |
| "epoch": 43.69230769230769, |
| "grad_norm": 74.6824951171875, |
| "learning_rate": 1.9989062573590618e-05, |
| "loss": 1.1331, |
| "step": 568 |
| }, |
| { |
| "epoch": 43.76923076923077, |
| "grad_norm": 65.099609375, |
| "learning_rate": 1.9988733708531772e-05, |
| "loss": 1.4358, |
| "step": 569 |
| }, |
| { |
| "epoch": 43.84615384615385, |
| "grad_norm": 39.96792984008789, |
| "learning_rate": 1.998839997508546e-05, |
| "loss": 0.2261, |
| "step": 570 |
| }, |
| { |
| "epoch": 43.92307692307692, |
| "grad_norm": 23.558868408203125, |
| "learning_rate": 1.9988061373414342e-05, |
| "loss": 0.2481, |
| "step": 571 |
| }, |
| { |
| "epoch": 44.0, |
| "grad_norm": 95.01315307617188, |
| "learning_rate": 1.9987717903683447e-05, |
| "loss": 1.1462, |
| "step": 572 |
| }, |
| { |
| "epoch": 44.07692307692308, |
| "grad_norm": 27.77621841430664, |
| "learning_rate": 1.998736956606018e-05, |
| "loss": 0.5905, |
| "step": 573 |
| }, |
| { |
| "epoch": 44.15384615384615, |
| "grad_norm": 40.2691535949707, |
| "learning_rate": 1.9987016360714307e-05, |
| "loss": 0.4781, |
| "step": 574 |
| }, |
| { |
| "epoch": 44.23076923076923, |
| "grad_norm": 36.15031433105469, |
| "learning_rate": 1.998665828781799e-05, |
| "loss": 0.4033, |
| "step": 575 |
| }, |
| { |
| "epoch": 44.30769230769231, |
| "grad_norm": 46.30068588256836, |
| "learning_rate": 1.9986295347545738e-05, |
| "loss": 0.5782, |
| "step": 576 |
| }, |
| { |
| "epoch": 44.38461538461539, |
| "grad_norm": 22.156299591064453, |
| "learning_rate": 1.9985927540074453e-05, |
| "loss": 0.2182, |
| "step": 577 |
| }, |
| { |
| "epoch": 44.46153846153846, |
| "grad_norm": 51.11417007446289, |
| "learning_rate": 1.9985554865583394e-05, |
| "loss": 0.5061, |
| "step": 578 |
| }, |
| { |
| "epoch": 44.53846153846154, |
| "grad_norm": 32.809696197509766, |
| "learning_rate": 1.99851773242542e-05, |
| "loss": 0.265, |
| "step": 579 |
| }, |
| { |
| "epoch": 44.61538461538461, |
| "grad_norm": 46.30125045776367, |
| "learning_rate": 1.9984794916270876e-05, |
| "loss": 0.6807, |
| "step": 580 |
| }, |
| { |
| "epoch": 44.69230769230769, |
| "grad_norm": 12.768202781677246, |
| "learning_rate": 1.9984407641819812e-05, |
| "loss": 0.1261, |
| "step": 581 |
| }, |
| { |
| "epoch": 44.76923076923077, |
| "grad_norm": 48.871124267578125, |
| "learning_rate": 1.998401550108975e-05, |
| "loss": 0.7503, |
| "step": 582 |
| }, |
| { |
| "epoch": 44.84615384615385, |
| "grad_norm": 22.152219772338867, |
| "learning_rate": 1.9983618494271825e-05, |
| "loss": 0.2886, |
| "step": 583 |
| }, |
| { |
| "epoch": 44.92307692307692, |
| "grad_norm": 24.310588836669922, |
| "learning_rate": 1.9983216621559525e-05, |
| "loss": 0.0716, |
| "step": 584 |
| }, |
| { |
| "epoch": 45.0, |
| "grad_norm": 11.027695655822754, |
| "learning_rate": 1.998280988314872e-05, |
| "loss": 0.1276, |
| "step": 585 |
| }, |
| { |
| "epoch": 45.07692307692308, |
| "grad_norm": 31.255794525146484, |
| "learning_rate": 1.9982398279237657e-05, |
| "loss": 0.1271, |
| "step": 586 |
| }, |
| { |
| "epoch": 45.15384615384615, |
| "grad_norm": 31.5863037109375, |
| "learning_rate": 1.9981981810026932e-05, |
| "loss": 0.2312, |
| "step": 587 |
| }, |
| { |
| "epoch": 45.23076923076923, |
| "grad_norm": 47.123294830322266, |
| "learning_rate": 1.998156047571954e-05, |
| "loss": 0.5386, |
| "step": 588 |
| }, |
| { |
| "epoch": 45.30769230769231, |
| "grad_norm": 19.838153839111328, |
| "learning_rate": 1.9981134276520828e-05, |
| "loss": 0.1585, |
| "step": 589 |
| }, |
| { |
| "epoch": 45.38461538461539, |
| "grad_norm": 52.45973205566406, |
| "learning_rate": 1.9980703212638522e-05, |
| "loss": 0.963, |
| "step": 590 |
| }, |
| { |
| "epoch": 45.46153846153846, |
| "grad_norm": 93.08457946777344, |
| "learning_rate": 1.9980267284282718e-05, |
| "loss": 1.381, |
| "step": 591 |
| }, |
| { |
| "epoch": 45.53846153846154, |
| "grad_norm": 45.8686637878418, |
| "learning_rate": 1.997982649166588e-05, |
| "loss": 0.1468, |
| "step": 592 |
| }, |
| { |
| "epoch": 45.61538461538461, |
| "grad_norm": 45.240047454833984, |
| "learning_rate": 1.9979380835002846e-05, |
| "loss": 0.3763, |
| "step": 593 |
| }, |
| { |
| "epoch": 45.69230769230769, |
| "grad_norm": 36.02961730957031, |
| "learning_rate": 1.9978930314510826e-05, |
| "loss": 0.1194, |
| "step": 594 |
| }, |
| { |
| "epoch": 45.76923076923077, |
| "grad_norm": 37.365089416503906, |
| "learning_rate": 1.9978474930409396e-05, |
| "loss": 0.2036, |
| "step": 595 |
| }, |
| { |
| "epoch": 45.84615384615385, |
| "grad_norm": 38.51081085205078, |
| "learning_rate": 1.9978014682920503e-05, |
| "loss": 0.3827, |
| "step": 596 |
| }, |
| { |
| "epoch": 45.92307692307692, |
| "grad_norm": 24.498191833496094, |
| "learning_rate": 1.997754957226847e-05, |
| "loss": 0.118, |
| "step": 597 |
| }, |
| { |
| "epoch": 46.0, |
| "grad_norm": 55.18220901489258, |
| "learning_rate": 1.9977079598679978e-05, |
| "loss": 0.23, |
| "step": 598 |
| }, |
| { |
| "epoch": 46.07692307692308, |
| "grad_norm": 31.934246063232422, |
| "learning_rate": 1.99766047623841e-05, |
| "loss": 0.2494, |
| "step": 599 |
| }, |
| { |
| "epoch": 46.15384615384615, |
| "grad_norm": 38.73695373535156, |
| "learning_rate": 1.9976125063612254e-05, |
| "loss": 0.4175, |
| "step": 600 |
| }, |
| { |
| "epoch": 46.23076923076923, |
| "grad_norm": 40.32987976074219, |
| "learning_rate": 1.9975640502598243e-05, |
| "loss": 0.3902, |
| "step": 601 |
| }, |
| { |
| "epoch": 46.30769230769231, |
| "grad_norm": 50.24580001831055, |
| "learning_rate": 1.9975151079578238e-05, |
| "loss": 0.4656, |
| "step": 602 |
| }, |
| { |
| "epoch": 46.38461538461539, |
| "grad_norm": 66.17344665527344, |
| "learning_rate": 1.9974656794790777e-05, |
| "loss": 1.1274, |
| "step": 603 |
| }, |
| { |
| "epoch": 46.46153846153846, |
| "grad_norm": 60.75761032104492, |
| "learning_rate": 1.9974157648476768e-05, |
| "loss": 0.4145, |
| "step": 604 |
| }, |
| { |
| "epoch": 46.53846153846154, |
| "grad_norm": 44.54948043823242, |
| "learning_rate": 1.9973653640879486e-05, |
| "loss": 0.5102, |
| "step": 605 |
| }, |
| { |
| "epoch": 46.61538461538461, |
| "grad_norm": 58.077396392822266, |
| "learning_rate": 1.997314477224458e-05, |
| "loss": 0.8864, |
| "step": 606 |
| }, |
| { |
| "epoch": 46.69230769230769, |
| "grad_norm": 69.51765441894531, |
| "learning_rate": 1.997263104282007e-05, |
| "loss": 0.7171, |
| "step": 607 |
| }, |
| { |
| "epoch": 46.76923076923077, |
| "grad_norm": 38.186458587646484, |
| "learning_rate": 1.997211245285634e-05, |
| "loss": 0.4604, |
| "step": 608 |
| }, |
| { |
| "epoch": 46.84615384615385, |
| "grad_norm": 39.436607360839844, |
| "learning_rate": 1.997158900260614e-05, |
| "loss": 0.3123, |
| "step": 609 |
| }, |
| { |
| "epoch": 46.92307692307692, |
| "grad_norm": 15.021434783935547, |
| "learning_rate": 1.99710606923246e-05, |
| "loss": 0.1642, |
| "step": 610 |
| }, |
| { |
| "epoch": 47.0, |
| "grad_norm": 14.399913787841797, |
| "learning_rate": 1.9970527522269204e-05, |
| "loss": 0.105, |
| "step": 611 |
| }, |
| { |
| "epoch": 47.07692307692308, |
| "grad_norm": 44.68442153930664, |
| "learning_rate": 1.996998949269982e-05, |
| "loss": 0.3639, |
| "step": 612 |
| }, |
| { |
| "epoch": 47.15384615384615, |
| "grad_norm": 41.75777816772461, |
| "learning_rate": 1.9969446603878673e-05, |
| "loss": 0.3786, |
| "step": 613 |
| }, |
| { |
| "epoch": 47.23076923076923, |
| "grad_norm": 55.93494415283203, |
| "learning_rate": 1.996889885607036e-05, |
| "loss": 0.5464, |
| "step": 614 |
| }, |
| { |
| "epoch": 47.30769230769231, |
| "grad_norm": 27.718795776367188, |
| "learning_rate": 1.9968346249541848e-05, |
| "loss": 0.4085, |
| "step": 615 |
| }, |
| { |
| "epoch": 47.38461538461539, |
| "grad_norm": 26.89528465270996, |
| "learning_rate": 1.9967788784562474e-05, |
| "loss": 0.2946, |
| "step": 616 |
| }, |
| { |
| "epoch": 47.46153846153846, |
| "grad_norm": 19.02779197692871, |
| "learning_rate": 1.9967226461403934e-05, |
| "loss": 0.2457, |
| "step": 617 |
| }, |
| { |
| "epoch": 47.53846153846154, |
| "grad_norm": 28.85311508178711, |
| "learning_rate": 1.99666592803403e-05, |
| "loss": 0.5738, |
| "step": 618 |
| }, |
| { |
| "epoch": 47.61538461538461, |
| "grad_norm": 57.92436599731445, |
| "learning_rate": 1.996608724164801e-05, |
| "loss": 0.2921, |
| "step": 619 |
| }, |
| { |
| "epoch": 47.69230769230769, |
| "grad_norm": 56.59422302246094, |
| "learning_rate": 1.9965510345605866e-05, |
| "loss": 0.6956, |
| "step": 620 |
| }, |
| { |
| "epoch": 47.76923076923077, |
| "grad_norm": 20.687774658203125, |
| "learning_rate": 1.9964928592495046e-05, |
| "loss": 0.233, |
| "step": 621 |
| }, |
| { |
| "epoch": 47.84615384615385, |
| "grad_norm": 29.573144912719727, |
| "learning_rate": 1.996434198259908e-05, |
| "loss": 0.3683, |
| "step": 622 |
| }, |
| { |
| "epoch": 47.92307692307692, |
| "grad_norm": 21.21002769470215, |
| "learning_rate": 1.9963750516203887e-05, |
| "loss": 0.1318, |
| "step": 623 |
| }, |
| { |
| "epoch": 48.0, |
| "grad_norm": 39.380615234375, |
| "learning_rate": 1.9963154193597728e-05, |
| "loss": 0.6031, |
| "step": 624 |
| }, |
| { |
| "epoch": 48.07692307692308, |
| "grad_norm": 39.11266326904297, |
| "learning_rate": 1.996255301507125e-05, |
| "loss": 0.1495, |
| "step": 625 |
| }, |
| { |
| "epoch": 48.15384615384615, |
| "grad_norm": 46.29478454589844, |
| "learning_rate": 1.9961946980917457e-05, |
| "loss": 0.5444, |
| "step": 626 |
| }, |
| { |
| "epoch": 48.23076923076923, |
| "grad_norm": 31.465709686279297, |
| "learning_rate": 1.9961336091431728e-05, |
| "loss": 0.4781, |
| "step": 627 |
| }, |
| { |
| "epoch": 48.30769230769231, |
| "grad_norm": 37.89440155029297, |
| "learning_rate": 1.9960720346911798e-05, |
| "loss": 0.3573, |
| "step": 628 |
| }, |
| { |
| "epoch": 48.38461538461539, |
| "grad_norm": 39.69857406616211, |
| "learning_rate": 1.9960099747657774e-05, |
| "loss": 0.3921, |
| "step": 629 |
| }, |
| { |
| "epoch": 48.46153846153846, |
| "grad_norm": 55.70697021484375, |
| "learning_rate": 1.995947429397213e-05, |
| "loss": 0.5863, |
| "step": 630 |
| }, |
| { |
| "epoch": 48.53846153846154, |
| "grad_norm": 52.685943603515625, |
| "learning_rate": 1.9958843986159705e-05, |
| "loss": 0.5162, |
| "step": 631 |
| }, |
| { |
| "epoch": 48.61538461538461, |
| "grad_norm": 33.439208984375, |
| "learning_rate": 1.9958208824527702e-05, |
| "loss": 0.2662, |
| "step": 632 |
| }, |
| { |
| "epoch": 48.69230769230769, |
| "grad_norm": 34.79633331298828, |
| "learning_rate": 1.9957568809385693e-05, |
| "loss": 0.2713, |
| "step": 633 |
| }, |
| { |
| "epoch": 48.76923076923077, |
| "grad_norm": 62.909305572509766, |
| "learning_rate": 1.9956923941045613e-05, |
| "loss": 0.385, |
| "step": 634 |
| }, |
| { |
| "epoch": 48.84615384615385, |
| "grad_norm": 79.76982116699219, |
| "learning_rate": 1.995627421982176e-05, |
| "loss": 0.7246, |
| "step": 635 |
| }, |
| { |
| "epoch": 48.92307692307692, |
| "grad_norm": 51.8908805847168, |
| "learning_rate": 1.99556196460308e-05, |
| "loss": 0.4349, |
| "step": 636 |
| }, |
| { |
| "epoch": 49.0, |
| "grad_norm": 33.157596588134766, |
| "learning_rate": 1.995496021999177e-05, |
| "loss": 0.1781, |
| "step": 637 |
| }, |
| { |
| "epoch": 49.07692307692308, |
| "grad_norm": 23.671682357788086, |
| "learning_rate": 1.9954295942026065e-05, |
| "loss": 0.1832, |
| "step": 638 |
| }, |
| { |
| "epoch": 49.15384615384615, |
| "grad_norm": 37.103172302246094, |
| "learning_rate": 1.995362681245744e-05, |
| "loss": 0.1814, |
| "step": 639 |
| }, |
| { |
| "epoch": 49.23076923076923, |
| "grad_norm": 42.37955856323242, |
| "learning_rate": 1.9952952831612027e-05, |
| "loss": 0.7221, |
| "step": 640 |
| }, |
| { |
| "epoch": 49.30769230769231, |
| "grad_norm": 29.361839294433594, |
| "learning_rate": 1.9952273999818312e-05, |
| "loss": 0.3707, |
| "step": 641 |
| }, |
| { |
| "epoch": 49.38461538461539, |
| "grad_norm": 13.604857444763184, |
| "learning_rate": 1.9951590317407152e-05, |
| "loss": 0.2947, |
| "step": 642 |
| }, |
| { |
| "epoch": 49.46153846153846, |
| "grad_norm": 52.00338363647461, |
| "learning_rate": 1.9950901784711765e-05, |
| "loss": 0.4188, |
| "step": 643 |
| }, |
| { |
| "epoch": 49.53846153846154, |
| "grad_norm": 56.11465835571289, |
| "learning_rate": 1.9950208402067735e-05, |
| "loss": 0.3924, |
| "step": 644 |
| }, |
| { |
| "epoch": 49.61538461538461, |
| "grad_norm": 58.536102294921875, |
| "learning_rate": 1.9949510169813006e-05, |
| "loss": 0.4006, |
| "step": 645 |
| }, |
| { |
| "epoch": 49.69230769230769, |
| "grad_norm": 31.55925178527832, |
| "learning_rate": 1.9948807088287884e-05, |
| "loss": 0.0908, |
| "step": 646 |
| }, |
| { |
| "epoch": 49.76923076923077, |
| "grad_norm": 38.99277877807617, |
| "learning_rate": 1.994809915783505e-05, |
| "loss": 0.4612, |
| "step": 647 |
| }, |
| { |
| "epoch": 49.84615384615385, |
| "grad_norm": 62.03502655029297, |
| "learning_rate": 1.9947386378799534e-05, |
| "loss": 0.6084, |
| "step": 648 |
| }, |
| { |
| "epoch": 49.92307692307692, |
| "grad_norm": 70.17023468017578, |
| "learning_rate": 1.9946668751528745e-05, |
| "loss": 0.3815, |
| "step": 649 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 70.30763244628906, |
| "learning_rate": 1.9945946276372435e-05, |
| "loss": 0.2997, |
| "step": 650 |
| }, |
| { |
| "epoch": 50.07692307692308, |
| "grad_norm": 35.853515625, |
| "learning_rate": 1.9945218953682736e-05, |
| "loss": 0.5588, |
| "step": 651 |
| }, |
| { |
| "epoch": 50.15384615384615, |
| "grad_norm": 33.02523422241211, |
| "learning_rate": 1.9944486783814135e-05, |
| "loss": 0.5543, |
| "step": 652 |
| }, |
| { |
| "epoch": 50.23076923076923, |
| "grad_norm": 48.58433532714844, |
| "learning_rate": 1.994374976712348e-05, |
| "loss": 0.5824, |
| "step": 653 |
| }, |
| { |
| "epoch": 50.30769230769231, |
| "grad_norm": 49.95491027832031, |
| "learning_rate": 1.994300790396999e-05, |
| "loss": 0.438, |
| "step": 654 |
| }, |
| { |
| "epoch": 50.38461538461539, |
| "grad_norm": 44.924007415771484, |
| "learning_rate": 1.9942261194715236e-05, |
| "loss": 0.4423, |
| "step": 655 |
| }, |
| { |
| "epoch": 50.46153846153846, |
| "grad_norm": 12.01087474822998, |
| "learning_rate": 1.9941509639723155e-05, |
| "loss": 0.2123, |
| "step": 656 |
| }, |
| { |
| "epoch": 50.53846153846154, |
| "grad_norm": 23.593204498291016, |
| "learning_rate": 1.9940753239360047e-05, |
| "loss": 0.2493, |
| "step": 657 |
| }, |
| { |
| "epoch": 50.61538461538461, |
| "grad_norm": 38.11962890625, |
| "learning_rate": 1.993999199399457e-05, |
| "loss": 0.3431, |
| "step": 658 |
| }, |
| { |
| "epoch": 50.69230769230769, |
| "grad_norm": 13.917471885681152, |
| "learning_rate": 1.9939225903997748e-05, |
| "loss": 0.0597, |
| "step": 659 |
| }, |
| { |
| "epoch": 50.76923076923077, |
| "grad_norm": 29.287017822265625, |
| "learning_rate": 1.993845496974297e-05, |
| "loss": 0.307, |
| "step": 660 |
| }, |
| { |
| "epoch": 50.84615384615385, |
| "grad_norm": 12.36963176727295, |
| "learning_rate": 1.9937679191605964e-05, |
| "loss": 0.2267, |
| "step": 661 |
| }, |
| { |
| "epoch": 50.92307692307692, |
| "grad_norm": 19.681686401367188, |
| "learning_rate": 1.993689856996485e-05, |
| "loss": 0.0911, |
| "step": 662 |
| }, |
| { |
| "epoch": 51.0, |
| "grad_norm": 17.56113052368164, |
| "learning_rate": 1.9936113105200085e-05, |
| "loss": 0.276, |
| "step": 663 |
| }, |
| { |
| "epoch": 51.07692307692308, |
| "grad_norm": 36.885414123535156, |
| "learning_rate": 1.99353227976945e-05, |
| "loss": 0.2167, |
| "step": 664 |
| }, |
| { |
| "epoch": 51.15384615384615, |
| "grad_norm": 36.57621383666992, |
| "learning_rate": 1.9934527647833276e-05, |
| "loss": 0.2526, |
| "step": 665 |
| }, |
| { |
| "epoch": 51.23076923076923, |
| "grad_norm": 31.334314346313477, |
| "learning_rate": 1.9933727656003964e-05, |
| "loss": 0.2672, |
| "step": 666 |
| }, |
| { |
| "epoch": 51.30769230769231, |
| "grad_norm": 22.630327224731445, |
| "learning_rate": 1.993292282259647e-05, |
| "loss": 0.091, |
| "step": 667 |
| }, |
| { |
| "epoch": 51.38461538461539, |
| "grad_norm": 48.03598403930664, |
| "learning_rate": 1.9932113148003057e-05, |
| "loss": 0.3044, |
| "step": 668 |
| }, |
| { |
| "epoch": 51.46153846153846, |
| "grad_norm": 16.784225463867188, |
| "learning_rate": 1.9931298632618355e-05, |
| "loss": 0.2004, |
| "step": 669 |
| }, |
| { |
| "epoch": 51.53846153846154, |
| "grad_norm": 32.46980285644531, |
| "learning_rate": 1.9930479276839347e-05, |
| "loss": 0.2895, |
| "step": 670 |
| }, |
| { |
| "epoch": 51.61538461538461, |
| "grad_norm": 40.98526382446289, |
| "learning_rate": 1.992965508106537e-05, |
| "loss": 0.1088, |
| "step": 671 |
| }, |
| { |
| "epoch": 51.69230769230769, |
| "grad_norm": 43.09739685058594, |
| "learning_rate": 1.9928826045698138e-05, |
| "loss": 0.5275, |
| "step": 672 |
| }, |
| { |
| "epoch": 51.76923076923077, |
| "grad_norm": 22.318811416625977, |
| "learning_rate": 1.9927992171141707e-05, |
| "loss": 0.2584, |
| "step": 673 |
| }, |
| { |
| "epoch": 51.84615384615385, |
| "grad_norm": 49.650753021240234, |
| "learning_rate": 1.99271534578025e-05, |
| "loss": 0.4818, |
| "step": 674 |
| }, |
| { |
| "epoch": 51.92307692307692, |
| "grad_norm": 53.377262115478516, |
| "learning_rate": 1.992630990608929e-05, |
| "loss": 0.7079, |
| "step": 675 |
| }, |
| { |
| "epoch": 52.0, |
| "grad_norm": 58.257423400878906, |
| "learning_rate": 1.9925461516413224e-05, |
| "loss": 0.1021, |
| "step": 676 |
| }, |
| { |
| "epoch": 52.07692307692308, |
| "grad_norm": 69.56619262695312, |
| "learning_rate": 1.9924608289187786e-05, |
| "loss": 0.5832, |
| "step": 677 |
| }, |
| { |
| "epoch": 52.15384615384615, |
| "grad_norm": 62.240211486816406, |
| "learning_rate": 1.9923750224828833e-05, |
| "loss": 0.7071, |
| "step": 678 |
| }, |
| { |
| "epoch": 52.23076923076923, |
| "grad_norm": 21.16132164001465, |
| "learning_rate": 1.992288732375458e-05, |
| "loss": 0.2678, |
| "step": 679 |
| }, |
| { |
| "epoch": 52.30769230769231, |
| "grad_norm": 24.814916610717773, |
| "learning_rate": 1.9922019586385587e-05, |
| "loss": 0.1287, |
| "step": 680 |
| }, |
| { |
| "epoch": 52.38461538461539, |
| "grad_norm": 35.99689865112305, |
| "learning_rate": 1.9921147013144782e-05, |
| "loss": 0.2424, |
| "step": 681 |
| }, |
| { |
| "epoch": 52.46153846153846, |
| "grad_norm": 40.905635833740234, |
| "learning_rate": 1.9920269604457444e-05, |
| "loss": 0.4172, |
| "step": 682 |
| }, |
| { |
| "epoch": 52.53846153846154, |
| "grad_norm": 17.19913101196289, |
| "learning_rate": 1.9919387360751216e-05, |
| "loss": 0.1629, |
| "step": 683 |
| }, |
| { |
| "epoch": 52.61538461538461, |
| "grad_norm": 20.01600456237793, |
| "learning_rate": 1.991850028245609e-05, |
| "loss": 0.1851, |
| "step": 684 |
| }, |
| { |
| "epoch": 52.69230769230769, |
| "grad_norm": 49.92776107788086, |
| "learning_rate": 1.9917608370004417e-05, |
| "loss": 0.4779, |
| "step": 685 |
| }, |
| { |
| "epoch": 52.76923076923077, |
| "grad_norm": 104.74503326416016, |
| "learning_rate": 1.9916711623830904e-05, |
| "loss": 1.3957, |
| "step": 686 |
| }, |
| { |
| "epoch": 52.84615384615385, |
| "grad_norm": 42.07792663574219, |
| "learning_rate": 1.9915810044372618e-05, |
| "loss": 0.6729, |
| "step": 687 |
| }, |
| { |
| "epoch": 52.92307692307692, |
| "grad_norm": 35.505592346191406, |
| "learning_rate": 1.9914903632068975e-05, |
| "loss": 0.3948, |
| "step": 688 |
| }, |
| { |
| "epoch": 53.0, |
| "grad_norm": 44.838714599609375, |
| "learning_rate": 1.9913992387361747e-05, |
| "loss": 1.169, |
| "step": 689 |
| }, |
| { |
| "epoch": 53.07692307692308, |
| "grad_norm": 34.66373062133789, |
| "learning_rate": 1.9913076310695068e-05, |
| "loss": 0.2086, |
| "step": 690 |
| }, |
| { |
| "epoch": 53.15384615384615, |
| "grad_norm": 20.64740562438965, |
| "learning_rate": 1.991215540251542e-05, |
| "loss": 0.1257, |
| "step": 691 |
| }, |
| { |
| "epoch": 53.23076923076923, |
| "grad_norm": 19.88491439819336, |
| "learning_rate": 1.991122966327164e-05, |
| "loss": 0.1137, |
| "step": 692 |
| }, |
| { |
| "epoch": 53.30769230769231, |
| "grad_norm": 29.428546905517578, |
| "learning_rate": 1.991029909341493e-05, |
| "loss": 0.3089, |
| "step": 693 |
| }, |
| { |
| "epoch": 53.38461538461539, |
| "grad_norm": 59.91632843017578, |
| "learning_rate": 1.9909363693398828e-05, |
| "loss": 0.5156, |
| "step": 694 |
| }, |
| { |
| "epoch": 53.46153846153846, |
| "grad_norm": 58.243263244628906, |
| "learning_rate": 1.9908423463679246e-05, |
| "loss": 0.1847, |
| "step": 695 |
| }, |
| { |
| "epoch": 53.53846153846154, |
| "grad_norm": 29.226238250732422, |
| "learning_rate": 1.9907478404714438e-05, |
| "loss": 0.2015, |
| "step": 696 |
| }, |
| { |
| "epoch": 53.61538461538461, |
| "grad_norm": 22.936357498168945, |
| "learning_rate": 1.990652851696501e-05, |
| "loss": 0.2968, |
| "step": 697 |
| }, |
| { |
| "epoch": 53.69230769230769, |
| "grad_norm": 22.54434585571289, |
| "learning_rate": 1.990557380089393e-05, |
| "loss": 0.1628, |
| "step": 698 |
| }, |
| { |
| "epoch": 53.76923076923077, |
| "grad_norm": 42.838775634765625, |
| "learning_rate": 1.9904614256966514e-05, |
| "loss": 0.4323, |
| "step": 699 |
| }, |
| { |
| "epoch": 53.84615384615385, |
| "grad_norm": 33.568359375, |
| "learning_rate": 1.990364988565043e-05, |
| "loss": 0.4084, |
| "step": 700 |
| }, |
| { |
| "epoch": 53.92307692307692, |
| "grad_norm": 13.65829849243164, |
| "learning_rate": 1.9902680687415704e-05, |
| "loss": 0.2112, |
| "step": 701 |
| }, |
| { |
| "epoch": 54.0, |
| "grad_norm": 36.00635528564453, |
| "learning_rate": 1.990170666273471e-05, |
| "loss": 0.2038, |
| "step": 702 |
| }, |
| { |
| "epoch": 54.07692307692308, |
| "grad_norm": 40.733489990234375, |
| "learning_rate": 1.9900727812082177e-05, |
| "loss": 0.1079, |
| "step": 703 |
| }, |
| { |
| "epoch": 54.15384615384615, |
| "grad_norm": 18.362619400024414, |
| "learning_rate": 1.989974413593518e-05, |
| "loss": 0.2279, |
| "step": 704 |
| }, |
| { |
| "epoch": 54.23076923076923, |
| "grad_norm": 18.40960121154785, |
| "learning_rate": 1.989875563477316e-05, |
| "loss": 0.1829, |
| "step": 705 |
| }, |
| { |
| "epoch": 54.30769230769231, |
| "grad_norm": 25.479307174682617, |
| "learning_rate": 1.989776230907789e-05, |
| "loss": 0.2494, |
| "step": 706 |
| }, |
| { |
| "epoch": 54.38461538461539, |
| "grad_norm": 25.522314071655273, |
| "learning_rate": 1.989676415933351e-05, |
| "loss": 0.5584, |
| "step": 707 |
| }, |
| { |
| "epoch": 54.46153846153846, |
| "grad_norm": 52.02535629272461, |
| "learning_rate": 1.989576118602651e-05, |
| "loss": 0.4463, |
| "step": 708 |
| }, |
| { |
| "epoch": 54.53846153846154, |
| "grad_norm": 8.517179489135742, |
| "learning_rate": 1.9894753389645723e-05, |
| "loss": 0.0818, |
| "step": 709 |
| }, |
| { |
| "epoch": 54.61538461538461, |
| "grad_norm": 21.887935638427734, |
| "learning_rate": 1.9893740770682334e-05, |
| "loss": 0.4517, |
| "step": 710 |
| }, |
| { |
| "epoch": 54.69230769230769, |
| "grad_norm": 26.428762435913086, |
| "learning_rate": 1.9892723329629885e-05, |
| "loss": 0.3262, |
| "step": 711 |
| }, |
| { |
| "epoch": 54.76923076923077, |
| "grad_norm": 28.858240127563477, |
| "learning_rate": 1.9891701066984264e-05, |
| "loss": 0.5892, |
| "step": 712 |
| }, |
| { |
| "epoch": 54.84615384615385, |
| "grad_norm": 27.605409622192383, |
| "learning_rate": 1.9890673983243708e-05, |
| "loss": 0.2364, |
| "step": 713 |
| }, |
| { |
| "epoch": 54.92307692307692, |
| "grad_norm": 15.756985664367676, |
| "learning_rate": 1.9889642078908805e-05, |
| "loss": 0.1361, |
| "step": 714 |
| }, |
| { |
| "epoch": 55.0, |
| "grad_norm": 44.47917175292969, |
| "learning_rate": 1.9888605354482494e-05, |
| "loss": 0.5785, |
| "step": 715 |
| }, |
| { |
| "epoch": 55.07692307692308, |
| "grad_norm": 18.722244262695312, |
| "learning_rate": 1.988756381047006e-05, |
| "loss": 0.1005, |
| "step": 716 |
| }, |
| { |
| "epoch": 55.15384615384615, |
| "grad_norm": 26.933124542236328, |
| "learning_rate": 1.988651744737914e-05, |
| "loss": 0.5561, |
| "step": 717 |
| }, |
| { |
| "epoch": 55.23076923076923, |
| "grad_norm": 32.02790451049805, |
| "learning_rate": 1.9885466265719723e-05, |
| "loss": 0.0889, |
| "step": 718 |
| }, |
| { |
| "epoch": 55.30769230769231, |
| "grad_norm": 27.264633178710938, |
| "learning_rate": 1.9884410266004134e-05, |
| "loss": 0.2253, |
| "step": 719 |
| }, |
| { |
| "epoch": 55.38461538461539, |
| "grad_norm": 30.937807083129883, |
| "learning_rate": 1.988334944874706e-05, |
| "loss": 0.3582, |
| "step": 720 |
| }, |
| { |
| "epoch": 55.46153846153846, |
| "grad_norm": 46.29901885986328, |
| "learning_rate": 1.988228381446553e-05, |
| "loss": 0.4241, |
| "step": 721 |
| }, |
| { |
| "epoch": 55.53846153846154, |
| "grad_norm": 42.519954681396484, |
| "learning_rate": 1.988121336367892e-05, |
| "loss": 0.8594, |
| "step": 722 |
| }, |
| { |
| "epoch": 55.61538461538461, |
| "grad_norm": 39.3632926940918, |
| "learning_rate": 1.9880138096908955e-05, |
| "loss": 0.1339, |
| "step": 723 |
| }, |
| { |
| "epoch": 55.69230769230769, |
| "grad_norm": 32.2740364074707, |
| "learning_rate": 1.9879058014679704e-05, |
| "loss": 0.1419, |
| "step": 724 |
| }, |
| { |
| "epoch": 55.76923076923077, |
| "grad_norm": 28.521841049194336, |
| "learning_rate": 1.987797311751759e-05, |
| "loss": 0.2118, |
| "step": 725 |
| }, |
| { |
| "epoch": 55.84615384615385, |
| "grad_norm": 28.847856521606445, |
| "learning_rate": 1.9876883405951378e-05, |
| "loss": 0.2045, |
| "step": 726 |
| }, |
| { |
| "epoch": 55.92307692307692, |
| "grad_norm": 26.72178077697754, |
| "learning_rate": 1.9875788880512183e-05, |
| "loss": 0.7234, |
| "step": 727 |
| }, |
| { |
| "epoch": 56.0, |
| "grad_norm": 24.55845069885254, |
| "learning_rate": 1.9874689541733455e-05, |
| "loss": 0.078, |
| "step": 728 |
| }, |
| { |
| "epoch": 56.07692307692308, |
| "grad_norm": 30.381683349609375, |
| "learning_rate": 1.9873585390151003e-05, |
| "loss": 0.2247, |
| "step": 729 |
| }, |
| { |
| "epoch": 56.15384615384615, |
| "grad_norm": 18.63511085510254, |
| "learning_rate": 1.9872476426302983e-05, |
| "loss": 0.2439, |
| "step": 730 |
| }, |
| { |
| "epoch": 56.23076923076923, |
| "grad_norm": 18.45151710510254, |
| "learning_rate": 1.987136265072988e-05, |
| "loss": 0.0866, |
| "step": 731 |
| }, |
| { |
| "epoch": 56.30769230769231, |
| "grad_norm": 29.958969116210938, |
| "learning_rate": 1.987024406397454e-05, |
| "loss": 0.3542, |
| "step": 732 |
| }, |
| { |
| "epoch": 56.38461538461539, |
| "grad_norm": 19.41910171508789, |
| "learning_rate": 1.9869120666582153e-05, |
| "loss": 0.1824, |
| "step": 733 |
| }, |
| { |
| "epoch": 56.46153846153846, |
| "grad_norm": 26.677873611450195, |
| "learning_rate": 1.986799245910024e-05, |
| "loss": 0.3551, |
| "step": 734 |
| }, |
| { |
| "epoch": 56.53846153846154, |
| "grad_norm": 28.94716453552246, |
| "learning_rate": 1.986685944207868e-05, |
| "loss": 0.4222, |
| "step": 735 |
| }, |
| { |
| "epoch": 56.61538461538461, |
| "grad_norm": 18.966806411743164, |
| "learning_rate": 1.9865721616069695e-05, |
| "loss": 0.0927, |
| "step": 736 |
| }, |
| { |
| "epoch": 56.69230769230769, |
| "grad_norm": 11.261028289794922, |
| "learning_rate": 1.9864578981627844e-05, |
| "loss": 0.0623, |
| "step": 737 |
| }, |
| { |
| "epoch": 56.76923076923077, |
| "grad_norm": 17.40890884399414, |
| "learning_rate": 1.9863431539310033e-05, |
| "loss": 0.2936, |
| "step": 738 |
| }, |
| { |
| "epoch": 56.84615384615385, |
| "grad_norm": 24.21697998046875, |
| "learning_rate": 1.986227928967551e-05, |
| "loss": 0.0637, |
| "step": 739 |
| }, |
| { |
| "epoch": 56.92307692307692, |
| "grad_norm": 31.43907356262207, |
| "learning_rate": 1.9861122233285873e-05, |
| "loss": 0.5369, |
| "step": 740 |
| }, |
| { |
| "epoch": 57.0, |
| "grad_norm": 50.822444915771484, |
| "learning_rate": 1.985996037070505e-05, |
| "loss": 0.3608, |
| "step": 741 |
| }, |
| { |
| "epoch": 57.07692307692308, |
| "grad_norm": 15.900673866271973, |
| "learning_rate": 1.9858793702499322e-05, |
| "loss": 0.1084, |
| "step": 742 |
| }, |
| { |
| "epoch": 57.15384615384615, |
| "grad_norm": 44.476287841796875, |
| "learning_rate": 1.9857622229237315e-05, |
| "loss": 0.4102, |
| "step": 743 |
| }, |
| { |
| "epoch": 57.23076923076923, |
| "grad_norm": 65.13040161132812, |
| "learning_rate": 1.9856445951489984e-05, |
| "loss": 0.4725, |
| "step": 744 |
| }, |
| { |
| "epoch": 57.30769230769231, |
| "grad_norm": 24.75728416442871, |
| "learning_rate": 1.985526486983063e-05, |
| "loss": 0.267, |
| "step": 745 |
| }, |
| { |
| "epoch": 57.38461538461539, |
| "grad_norm": 41.35836410522461, |
| "learning_rate": 1.9854078984834904e-05, |
| "loss": 0.6484, |
| "step": 746 |
| }, |
| { |
| "epoch": 57.46153846153846, |
| "grad_norm": 45.00423049926758, |
| "learning_rate": 1.985288829708079e-05, |
| "loss": 0.9051, |
| "step": 747 |
| }, |
| { |
| "epoch": 57.53846153846154, |
| "grad_norm": 28.75640869140625, |
| "learning_rate": 1.9851692807148612e-05, |
| "loss": 0.2823, |
| "step": 748 |
| }, |
| { |
| "epoch": 57.61538461538461, |
| "grad_norm": 25.26997184753418, |
| "learning_rate": 1.9850492515621038e-05, |
| "loss": 0.3037, |
| "step": 749 |
| }, |
| { |
| "epoch": 57.69230769230769, |
| "grad_norm": 16.20356559753418, |
| "learning_rate": 1.984928742308308e-05, |
| "loss": 0.1665, |
| "step": 750 |
| }, |
| { |
| "epoch": 57.76923076923077, |
| "grad_norm": 22.45698356628418, |
| "learning_rate": 1.9848077530122083e-05, |
| "loss": 0.4112, |
| "step": 751 |
| }, |
| { |
| "epoch": 57.84615384615385, |
| "grad_norm": 27.950660705566406, |
| "learning_rate": 1.9846862837327733e-05, |
| "loss": 0.294, |
| "step": 752 |
| }, |
| { |
| "epoch": 57.92307692307692, |
| "grad_norm": 19.849308013916016, |
| "learning_rate": 1.9845643345292055e-05, |
| "loss": 0.0727, |
| "step": 753 |
| }, |
| { |
| "epoch": 58.0, |
| "grad_norm": 50.8568229675293, |
| "learning_rate": 1.9844419054609418e-05, |
| "loss": 0.3167, |
| "step": 754 |
| }, |
| { |
| "epoch": 58.07692307692308, |
| "grad_norm": 49.81483459472656, |
| "learning_rate": 1.9843189965876525e-05, |
| "loss": 0.541, |
| "step": 755 |
| }, |
| { |
| "epoch": 58.15384615384615, |
| "grad_norm": 49.06199645996094, |
| "learning_rate": 1.984195607969242e-05, |
| "loss": 0.6278, |
| "step": 756 |
| }, |
| { |
| "epoch": 58.23076923076923, |
| "grad_norm": 29.350656509399414, |
| "learning_rate": 1.9840717396658483e-05, |
| "loss": 0.3204, |
| "step": 757 |
| }, |
| { |
| "epoch": 58.30769230769231, |
| "grad_norm": 20.372554779052734, |
| "learning_rate": 1.9839473917378432e-05, |
| "loss": 0.0781, |
| "step": 758 |
| }, |
| { |
| "epoch": 58.38461538461539, |
| "grad_norm": 22.690336227416992, |
| "learning_rate": 1.983822564245833e-05, |
| "loss": 0.1219, |
| "step": 759 |
| }, |
| { |
| "epoch": 58.46153846153846, |
| "grad_norm": 24.395050048828125, |
| "learning_rate": 1.9836972572506557e-05, |
| "loss": 0.3527, |
| "step": 760 |
| }, |
| { |
| "epoch": 58.53846153846154, |
| "grad_norm": 30.973020553588867, |
| "learning_rate": 1.983571470813386e-05, |
| "loss": 0.39, |
| "step": 761 |
| }, |
| { |
| "epoch": 58.61538461538461, |
| "grad_norm": 28.124399185180664, |
| "learning_rate": 1.98344520499533e-05, |
| "loss": 0.4424, |
| "step": 762 |
| }, |
| { |
| "epoch": 58.69230769230769, |
| "grad_norm": 31.887248992919922, |
| "learning_rate": 1.983318459858028e-05, |
| "loss": 0.0827, |
| "step": 763 |
| }, |
| { |
| "epoch": 58.76923076923077, |
| "grad_norm": 35.62253189086914, |
| "learning_rate": 1.9831912354632537e-05, |
| "loss": 0.6014, |
| "step": 764 |
| }, |
| { |
| "epoch": 58.84615384615385, |
| "grad_norm": 35.35845947265625, |
| "learning_rate": 1.9830635318730155e-05, |
| "loss": 0.1419, |
| "step": 765 |
| }, |
| { |
| "epoch": 58.92307692307692, |
| "grad_norm": 12.43506908416748, |
| "learning_rate": 1.9829353491495545e-05, |
| "loss": 0.1451, |
| "step": 766 |
| }, |
| { |
| "epoch": 59.0, |
| "grad_norm": 17.245344161987305, |
| "learning_rate": 1.982806687355345e-05, |
| "loss": 0.1339, |
| "step": 767 |
| }, |
| { |
| "epoch": 59.07692307692308, |
| "grad_norm": 31.799854278564453, |
| "learning_rate": 1.982677546553095e-05, |
| "loss": 0.2812, |
| "step": 768 |
| }, |
| { |
| "epoch": 59.15384615384615, |
| "grad_norm": 19.206119537353516, |
| "learning_rate": 1.982547926805747e-05, |
| "loss": 0.0983, |
| "step": 769 |
| }, |
| { |
| "epoch": 59.23076923076923, |
| "grad_norm": 33.53507614135742, |
| "learning_rate": 1.9824178281764753e-05, |
| "loss": 0.1039, |
| "step": 770 |
| }, |
| { |
| "epoch": 59.30769230769231, |
| "grad_norm": 13.916312217712402, |
| "learning_rate": 1.982287250728689e-05, |
| "loss": 0.095, |
| "step": 771 |
| }, |
| { |
| "epoch": 59.38461538461539, |
| "grad_norm": 24.899681091308594, |
| "learning_rate": 1.9821561945260292e-05, |
| "loss": 0.2406, |
| "step": 772 |
| }, |
| { |
| "epoch": 59.46153846153846, |
| "grad_norm": 17.658226013183594, |
| "learning_rate": 1.982024659632372e-05, |
| "loss": 0.1433, |
| "step": 773 |
| }, |
| { |
| "epoch": 59.53846153846154, |
| "grad_norm": 38.41850280761719, |
| "learning_rate": 1.9818926461118254e-05, |
| "loss": 0.1838, |
| "step": 774 |
| }, |
| { |
| "epoch": 59.61538461538461, |
| "grad_norm": 27.547054290771484, |
| "learning_rate": 1.981760154028731e-05, |
| "loss": 0.7646, |
| "step": 775 |
| }, |
| { |
| "epoch": 59.69230769230769, |
| "grad_norm": 48.75922775268555, |
| "learning_rate": 1.9816271834476642e-05, |
| "loss": 0.8896, |
| "step": 776 |
| }, |
| { |
| "epoch": 59.76923076923077, |
| "grad_norm": 25.430660247802734, |
| "learning_rate": 1.981493734433433e-05, |
| "loss": 0.4132, |
| "step": 777 |
| }, |
| { |
| "epoch": 59.84615384615385, |
| "grad_norm": 29.855377197265625, |
| "learning_rate": 1.981359807051079e-05, |
| "loss": 0.0553, |
| "step": 778 |
| }, |
| { |
| "epoch": 59.92307692307692, |
| "grad_norm": 29.918643951416016, |
| "learning_rate": 1.981225401365877e-05, |
| "loss": 0.2585, |
| "step": 779 |
| }, |
| { |
| "epoch": 60.0, |
| "grad_norm": 36.88877487182617, |
| "learning_rate": 1.981090517443334e-05, |
| "loss": 0.0971, |
| "step": 780 |
| }, |
| { |
| "epoch": 60.07692307692308, |
| "grad_norm": 48.528507232666016, |
| "learning_rate": 1.9809551553491918e-05, |
| "loss": 0.9207, |
| "step": 781 |
| }, |
| { |
| "epoch": 60.15384615384615, |
| "grad_norm": 37.711578369140625, |
| "learning_rate": 1.9808193151494233e-05, |
| "loss": 0.9668, |
| "step": 782 |
| }, |
| { |
| "epoch": 60.23076923076923, |
| "grad_norm": 22.573158264160156, |
| "learning_rate": 1.9806829969102356e-05, |
| "loss": 0.3043, |
| "step": 783 |
| }, |
| { |
| "epoch": 60.30769230769231, |
| "grad_norm": 24.48107147216797, |
| "learning_rate": 1.9805462006980688e-05, |
| "loss": 0.2461, |
| "step": 784 |
| }, |
| { |
| "epoch": 60.38461538461539, |
| "grad_norm": 17.666908264160156, |
| "learning_rate": 1.980408926579596e-05, |
| "loss": 0.0987, |
| "step": 785 |
| }, |
| { |
| "epoch": 60.46153846153846, |
| "grad_norm": 23.56198501586914, |
| "learning_rate": 1.9802711746217222e-05, |
| "loss": 0.1972, |
| "step": 786 |
| }, |
| { |
| "epoch": 60.53846153846154, |
| "grad_norm": 20.815799713134766, |
| "learning_rate": 1.9801329448915863e-05, |
| "loss": 0.3076, |
| "step": 787 |
| }, |
| { |
| "epoch": 60.61538461538461, |
| "grad_norm": 31.176063537597656, |
| "learning_rate": 1.9799942374565597e-05, |
| "loss": 0.3889, |
| "step": 788 |
| }, |
| { |
| "epoch": 60.69230769230769, |
| "grad_norm": 13.005535125732422, |
| "learning_rate": 1.979855052384247e-05, |
| "loss": 0.0651, |
| "step": 789 |
| }, |
| { |
| "epoch": 60.76923076923077, |
| "grad_norm": 18.74942970275879, |
| "learning_rate": 1.9797153897424854e-05, |
| "loss": 0.0623, |
| "step": 790 |
| }, |
| { |
| "epoch": 60.84615384615385, |
| "grad_norm": 20.31600570678711, |
| "learning_rate": 1.979575249599344e-05, |
| "loss": 0.1789, |
| "step": 791 |
| }, |
| { |
| "epoch": 60.92307692307692, |
| "grad_norm": 38.58935546875, |
| "learning_rate": 1.9794346320231265e-05, |
| "loss": 0.4844, |
| "step": 792 |
| }, |
| { |
| "epoch": 61.0, |
| "grad_norm": 30.701807022094727, |
| "learning_rate": 1.9792935370823676e-05, |
| "loss": 0.2106, |
| "step": 793 |
| }, |
| { |
| "epoch": 61.07692307692308, |
| "grad_norm": 16.613651275634766, |
| "learning_rate": 1.9791519648458352e-05, |
| "loss": 0.0731, |
| "step": 794 |
| }, |
| { |
| "epoch": 61.15384615384615, |
| "grad_norm": 31.29366111755371, |
| "learning_rate": 1.97900991538253e-05, |
| "loss": 0.3114, |
| "step": 795 |
| }, |
| { |
| "epoch": 61.23076923076923, |
| "grad_norm": 34.595489501953125, |
| "learning_rate": 1.9788673887616852e-05, |
| "loss": 0.4594, |
| "step": 796 |
| }, |
| { |
| "epoch": 61.30769230769231, |
| "grad_norm": 25.498994827270508, |
| "learning_rate": 1.9787243850527663e-05, |
| "loss": 0.2845, |
| "step": 797 |
| }, |
| { |
| "epoch": 61.38461538461539, |
| "grad_norm": 39.75912857055664, |
| "learning_rate": 1.978580904325472e-05, |
| "loss": 0.2296, |
| "step": 798 |
| }, |
| { |
| "epoch": 61.46153846153846, |
| "grad_norm": 46.827213287353516, |
| "learning_rate": 1.9784369466497333e-05, |
| "loss": 0.1907, |
| "step": 799 |
| }, |
| { |
| "epoch": 61.53846153846154, |
| "grad_norm": 35.235965728759766, |
| "learning_rate": 1.9782925120957123e-05, |
| "loss": 0.1975, |
| "step": 800 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 5000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 385, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": true, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|