| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.976642335766424, |
| "eval_steps": 500, |
| "global_step": 1368, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.004379562043795621, |
| "grad_norm": 34.64235305786133, |
| "learning_rate": 5.0000000000000004e-08, |
| "loss": 2.6583, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.008759124087591242, |
| "grad_norm": 33.89678192138672, |
| "learning_rate": 1.0000000000000001e-07, |
| "loss": 2.5074, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.013138686131386862, |
| "grad_norm": 35.2148551940918, |
| "learning_rate": 1.5000000000000002e-07, |
| "loss": 2.7094, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.017518248175182483, |
| "grad_norm": 35.11457061767578, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 2.7266, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.021897810218978103, |
| "grad_norm": 35.70753479003906, |
| "learning_rate": 2.5000000000000004e-07, |
| "loss": 2.7442, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.026277372262773723, |
| "grad_norm": 34.34943771362305, |
| "learning_rate": 3.0000000000000004e-07, |
| "loss": 2.5578, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.030656934306569343, |
| "grad_norm": 34.31540298461914, |
| "learning_rate": 3.5000000000000004e-07, |
| "loss": 2.5893, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.035036496350364967, |
| "grad_norm": 32.545223236083984, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 2.5039, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.03941605839416058, |
| "grad_norm": 35.70431137084961, |
| "learning_rate": 4.5000000000000003e-07, |
| "loss": 2.6719, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.043795620437956206, |
| "grad_norm": 34.14265441894531, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 2.5764, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04817518248175182, |
| "grad_norm": 32.08097839355469, |
| "learning_rate": 5.5e-07, |
| "loss": 2.4564, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.052554744525547446, |
| "grad_norm": 32.66060256958008, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 2.458, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.05693430656934306, |
| "grad_norm": 33.21636962890625, |
| "learning_rate": 6.5e-07, |
| "loss": 2.4835, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.061313868613138686, |
| "grad_norm": 33.92257308959961, |
| "learning_rate": 7.000000000000001e-07, |
| "loss": 2.4288, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.06569343065693431, |
| "grad_norm": 32.19805145263672, |
| "learning_rate": 7.5e-07, |
| "loss": 2.2411, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.07007299270072993, |
| "grad_norm": 32.355220794677734, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 2.1597, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.07445255474452554, |
| "grad_norm": 33.08480453491211, |
| "learning_rate": 8.500000000000001e-07, |
| "loss": 2.1377, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.07883211678832117, |
| "grad_norm": 33.459957122802734, |
| "learning_rate": 9.000000000000001e-07, |
| "loss": 2.0306, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.08321167883211679, |
| "grad_norm": 32.897315979003906, |
| "learning_rate": 9.500000000000001e-07, |
| "loss": 1.8697, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.08759124087591241, |
| "grad_norm": 33.81785202026367, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.8147, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.09197080291970802, |
| "grad_norm": 32.52595520019531, |
| "learning_rate": 1.0500000000000001e-06, |
| "loss": 1.6526, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.09635036496350365, |
| "grad_norm": 34.09442138671875, |
| "learning_rate": 1.1e-06, |
| "loss": 1.6127, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.10072992700729927, |
| "grad_norm": 30.89822769165039, |
| "learning_rate": 1.1500000000000002e-06, |
| "loss": 1.3872, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.10510948905109489, |
| "grad_norm": 29.566524505615234, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 1.2755, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.10948905109489052, |
| "grad_norm": 28.26628875732422, |
| "learning_rate": 1.25e-06, |
| "loss": 1.1409, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.11386861313868613, |
| "grad_norm": 30.7103328704834, |
| "learning_rate": 1.3e-06, |
| "loss": 0.966, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.11824817518248175, |
| "grad_norm": 28.975385665893555, |
| "learning_rate": 1.3500000000000002e-06, |
| "loss": 0.7579, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.12262773722627737, |
| "grad_norm": 26.821529388427734, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 0.6013, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.12700729927007298, |
| "grad_norm": 23.804439544677734, |
| "learning_rate": 1.45e-06, |
| "loss": 0.4978, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.13138686131386862, |
| "grad_norm": 21.404451370239258, |
| "learning_rate": 1.5e-06, |
| "loss": 0.3926, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.13576642335766423, |
| "grad_norm": 17.63161849975586, |
| "learning_rate": 1.5500000000000002e-06, |
| "loss": 0.2568, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.14014598540145987, |
| "grad_norm": 10.998854637145996, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 0.2373, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.14452554744525548, |
| "grad_norm": 6.9544997215271, |
| "learning_rate": 1.6500000000000003e-06, |
| "loss": 0.1689, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.14890510948905109, |
| "grad_norm": 5.1013102531433105, |
| "learning_rate": 1.7000000000000002e-06, |
| "loss": 0.1471, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.15328467153284672, |
| "grad_norm": 4.501709461212158, |
| "learning_rate": 1.75e-06, |
| "loss": 0.132, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.15766423357664233, |
| "grad_norm": 3.198529005050659, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 0.1065, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.16204379562043797, |
| "grad_norm": 3.2325005531311035, |
| "learning_rate": 1.85e-06, |
| "loss": 0.0907, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.16642335766423358, |
| "grad_norm": 1.5125375986099243, |
| "learning_rate": 1.9000000000000002e-06, |
| "loss": 0.0782, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.1708029197080292, |
| "grad_norm": 1.9160635471343994, |
| "learning_rate": 1.9500000000000004e-06, |
| "loss": 0.0852, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.17518248175182483, |
| "grad_norm": 1.6062333583831787, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.074, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.17956204379562044, |
| "grad_norm": 1.5675855875015259, |
| "learning_rate": 2.05e-06, |
| "loss": 0.0704, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.18394160583941604, |
| "grad_norm": 1.440182089805603, |
| "learning_rate": 2.1000000000000002e-06, |
| "loss": 0.0674, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.18832116788321168, |
| "grad_norm": 1.1466726064682007, |
| "learning_rate": 2.15e-06, |
| "loss": 0.0702, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.1927007299270073, |
| "grad_norm": 1.2195515632629395, |
| "learning_rate": 2.2e-06, |
| "loss": 0.0723, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.19708029197080293, |
| "grad_norm": 1.743561029434204, |
| "learning_rate": 2.25e-06, |
| "loss": 0.0875, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.20145985401459854, |
| "grad_norm": 0.9764343500137329, |
| "learning_rate": 2.3000000000000004e-06, |
| "loss": 0.062, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.20583941605839415, |
| "grad_norm": 0.8891277313232422, |
| "learning_rate": 2.35e-06, |
| "loss": 0.0576, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.21021897810218979, |
| "grad_norm": 0.9648666977882385, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 0.0656, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.2145985401459854, |
| "grad_norm": 0.784566342830658, |
| "learning_rate": 2.4500000000000003e-06, |
| "loss": 0.0548, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.21897810218978103, |
| "grad_norm": 0.9402966499328613, |
| "learning_rate": 2.5e-06, |
| "loss": 0.0626, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.22335766423357664, |
| "grad_norm": 1.3284685611724854, |
| "learning_rate": 2.55e-06, |
| "loss": 0.0632, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.22773722627737225, |
| "grad_norm": 1.0913968086242676, |
| "learning_rate": 2.6e-06, |
| "loss": 0.0675, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.2321167883211679, |
| "grad_norm": 1.1069140434265137, |
| "learning_rate": 2.6500000000000005e-06, |
| "loss": 0.0541, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.2364963503649635, |
| "grad_norm": 0.8529757857322693, |
| "learning_rate": 2.7000000000000004e-06, |
| "loss": 0.0657, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.24087591240875914, |
| "grad_norm": 0.7182446718215942, |
| "learning_rate": 2.7500000000000004e-06, |
| "loss": 0.0607, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.24525547445255474, |
| "grad_norm": 1.0538653135299683, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 0.0556, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.24963503649635035, |
| "grad_norm": 1.2083594799041748, |
| "learning_rate": 2.85e-06, |
| "loss": 0.0532, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.25401459854014596, |
| "grad_norm": 0.8183572888374329, |
| "learning_rate": 2.9e-06, |
| "loss": 0.0529, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.2583941605839416, |
| "grad_norm": 0.9014842510223389, |
| "learning_rate": 2.95e-06, |
| "loss": 0.0601, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.26277372262773724, |
| "grad_norm": 0.9017247557640076, |
| "learning_rate": 3e-06, |
| "loss": 0.0584, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.2671532846715328, |
| "grad_norm": 1.1078683137893677, |
| "learning_rate": 3.05e-06, |
| "loss": 0.0635, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.27153284671532846, |
| "grad_norm": 1.174526572227478, |
| "learning_rate": 3.1000000000000004e-06, |
| "loss": 0.0523, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.2759124087591241, |
| "grad_norm": 0.9296770095825195, |
| "learning_rate": 3.1500000000000003e-06, |
| "loss": 0.0588, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.28029197080291973, |
| "grad_norm": 0.8549372553825378, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 0.0639, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.2846715328467153, |
| "grad_norm": 0.8956279158592224, |
| "learning_rate": 3.2500000000000002e-06, |
| "loss": 0.059, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.28905109489051095, |
| "grad_norm": 0.7937710285186768, |
| "learning_rate": 3.3000000000000006e-06, |
| "loss": 0.0579, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.2934306569343066, |
| "grad_norm": 0.7786620855331421, |
| "learning_rate": 3.3500000000000005e-06, |
| "loss": 0.0586, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.29781021897810217, |
| "grad_norm": 0.7562637329101562, |
| "learning_rate": 3.4000000000000005e-06, |
| "loss": 0.046, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.3021897810218978, |
| "grad_norm": 0.8958250880241394, |
| "learning_rate": 3.45e-06, |
| "loss": 0.0566, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.30656934306569344, |
| "grad_norm": 0.9434528946876526, |
| "learning_rate": 3.5e-06, |
| "loss": 0.0548, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.310948905109489, |
| "grad_norm": 1.0564453601837158, |
| "learning_rate": 3.5500000000000003e-06, |
| "loss": 0.0529, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.31532846715328466, |
| "grad_norm": 0.896443247795105, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 0.0517, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.3197080291970803, |
| "grad_norm": 1.1364223957061768, |
| "learning_rate": 3.65e-06, |
| "loss": 0.0489, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.32408759124087594, |
| "grad_norm": 1.1319010257720947, |
| "learning_rate": 3.7e-06, |
| "loss": 0.0548, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.3284671532846715, |
| "grad_norm": 0.9694503545761108, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 0.0525, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.33284671532846716, |
| "grad_norm": 0.8128111958503723, |
| "learning_rate": 3.8000000000000005e-06, |
| "loss": 0.0566, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.3372262773722628, |
| "grad_norm": 0.9068273901939392, |
| "learning_rate": 3.85e-06, |
| "loss": 0.0475, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.3416058394160584, |
| "grad_norm": 0.9689438343048096, |
| "learning_rate": 3.900000000000001e-06, |
| "loss": 0.048, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.345985401459854, |
| "grad_norm": 0.940131664276123, |
| "learning_rate": 3.95e-06, |
| "loss": 0.0567, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.35036496350364965, |
| "grad_norm": 0.8836082220077515, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.0542, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.35474452554744523, |
| "grad_norm": 0.9325949549674988, |
| "learning_rate": 4.05e-06, |
| "loss": 0.0551, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.35912408759124087, |
| "grad_norm": 0.8954764008522034, |
| "learning_rate": 4.1e-06, |
| "loss": 0.0517, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.3635036496350365, |
| "grad_norm": 0.6444959044456482, |
| "learning_rate": 4.15e-06, |
| "loss": 0.0434, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.3678832116788321, |
| "grad_norm": 0.9097581505775452, |
| "learning_rate": 4.2000000000000004e-06, |
| "loss": 0.0471, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.3722627737226277, |
| "grad_norm": 0.849006712436676, |
| "learning_rate": 4.25e-06, |
| "loss": 0.0529, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.37664233576642336, |
| "grad_norm": 0.8611392378807068, |
| "learning_rate": 4.3e-06, |
| "loss": 0.0513, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.381021897810219, |
| "grad_norm": 0.7885357737541199, |
| "learning_rate": 4.350000000000001e-06, |
| "loss": 0.0523, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.3854014598540146, |
| "grad_norm": 0.7642116546630859, |
| "learning_rate": 4.4e-06, |
| "loss": 0.0407, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.3897810218978102, |
| "grad_norm": 0.8920945525169373, |
| "learning_rate": 4.450000000000001e-06, |
| "loss": 0.0485, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.39416058394160586, |
| "grad_norm": 0.9801046848297119, |
| "learning_rate": 4.5e-06, |
| "loss": 0.0404, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.39854014598540144, |
| "grad_norm": 1.0874953269958496, |
| "learning_rate": 4.5500000000000005e-06, |
| "loss": 0.0588, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.4029197080291971, |
| "grad_norm": 0.9019029140472412, |
| "learning_rate": 4.600000000000001e-06, |
| "loss": 0.0466, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.4072992700729927, |
| "grad_norm": 0.7258988618850708, |
| "learning_rate": 4.65e-06, |
| "loss": 0.0493, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.4116788321167883, |
| "grad_norm": 1.103407859802246, |
| "learning_rate": 4.7e-06, |
| "loss": 0.0495, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.41605839416058393, |
| "grad_norm": 0.751805305480957, |
| "learning_rate": 4.75e-06, |
| "loss": 0.0484, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.42043795620437957, |
| "grad_norm": 0.7717764973640442, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 0.0447, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.4248175182481752, |
| "grad_norm": 0.7147190570831299, |
| "learning_rate": 4.85e-06, |
| "loss": 0.0523, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.4291970802919708, |
| "grad_norm": 0.9990110993385315, |
| "learning_rate": 4.9000000000000005e-06, |
| "loss": 0.0454, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.4335766423357664, |
| "grad_norm": 0.7766187191009521, |
| "learning_rate": 4.95e-06, |
| "loss": 0.0472, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.43795620437956206, |
| "grad_norm": 0.7124347686767578, |
| "learning_rate": 5e-06, |
| "loss": 0.0473, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.44233576642335765, |
| "grad_norm": 0.9340270757675171, |
| "learning_rate": 4.99999232689698e-06, |
| "loss": 0.0499, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.4467153284671533, |
| "grad_norm": 0.7429985404014587, |
| "learning_rate": 4.999969307635021e-06, |
| "loss": 0.042, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.4510948905109489, |
| "grad_norm": 0.9131317138671875, |
| "learning_rate": 4.999930942355425e-06, |
| "loss": 0.0519, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.4554744525547445, |
| "grad_norm": 0.9970843195915222, |
| "learning_rate": 4.999877231293698e-06, |
| "loss": 0.0428, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.45985401459854014, |
| "grad_norm": 0.7625145316123962, |
| "learning_rate": 4.999808174779543e-06, |
| "loss": 0.0442, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.4642335766423358, |
| "grad_norm": 0.6059474945068359, |
| "learning_rate": 4.999723773236865e-06, |
| "loss": 0.0456, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.4686131386861314, |
| "grad_norm": 0.6798833608627319, |
| "learning_rate": 4.999624027183758e-06, |
| "loss": 0.0408, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.472992700729927, |
| "grad_norm": 1.0250803232192993, |
| "learning_rate": 4.999508937232514e-06, |
| "loss": 0.0471, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.47737226277372263, |
| "grad_norm": 0.8457198739051819, |
| "learning_rate": 4.999378504089609e-06, |
| "loss": 0.0425, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.48175182481751827, |
| "grad_norm": 0.9417868852615356, |
| "learning_rate": 4.999232728555705e-06, |
| "loss": 0.0388, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.48613138686131385, |
| "grad_norm": 0.8558921813964844, |
| "learning_rate": 4.999071611525643e-06, |
| "loss": 0.0423, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.4905109489051095, |
| "grad_norm": 0.7070104479789734, |
| "learning_rate": 4.998895153988437e-06, |
| "loss": 0.0354, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.4948905109489051, |
| "grad_norm": 0.8162719011306763, |
| "learning_rate": 4.998703357027268e-06, |
| "loss": 0.0465, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.4992700729927007, |
| "grad_norm": 0.9140358567237854, |
| "learning_rate": 4.998496221819479e-06, |
| "loss": 0.0457, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.5036496350364964, |
| "grad_norm": 0.6447531580924988, |
| "learning_rate": 4.998273749636564e-06, |
| "loss": 0.039, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.5080291970802919, |
| "grad_norm": 0.9157156944274902, |
| "learning_rate": 4.998035941844167e-06, |
| "loss": 0.0469, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.5124087591240876, |
| "grad_norm": 0.7706230878829956, |
| "learning_rate": 4.997782799902065e-06, |
| "loss": 0.0325, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.5167883211678832, |
| "grad_norm": 0.9391443729400635, |
| "learning_rate": 4.997514325364168e-06, |
| "loss": 0.0397, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.5211678832116788, |
| "grad_norm": 1.0085054636001587, |
| "learning_rate": 4.997230519878499e-06, |
| "loss": 0.0403, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.5255474452554745, |
| "grad_norm": 1.8318824768066406, |
| "learning_rate": 4.996931385187195e-06, |
| "loss": 0.0463, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5299270072992701, |
| "grad_norm": 1.0216630697250366, |
| "learning_rate": 4.9966169231264885e-06, |
| "loss": 0.0406, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.5343065693430656, |
| "grad_norm": 1.4819082021713257, |
| "learning_rate": 4.9962871356267e-06, |
| "loss": 0.0485, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.5386861313868613, |
| "grad_norm": 0.9435060024261475, |
| "learning_rate": 4.995942024712222e-06, |
| "loss": 0.04, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.5430656934306569, |
| "grad_norm": 0.7887905240058899, |
| "learning_rate": 4.995581592501514e-06, |
| "loss": 0.0397, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.5474452554744526, |
| "grad_norm": 0.8321148753166199, |
| "learning_rate": 4.995205841207082e-06, |
| "loss": 0.0413, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.5518248175182482, |
| "grad_norm": 1.0303553342819214, |
| "learning_rate": 4.99481477313547e-06, |
| "loss": 0.0422, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.5562043795620438, |
| "grad_norm": 0.7056427001953125, |
| "learning_rate": 4.994408390687241e-06, |
| "loss": 0.0362, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.5605839416058395, |
| "grad_norm": 0.9762740135192871, |
| "learning_rate": 4.993986696356966e-06, |
| "loss": 0.0385, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.564963503649635, |
| "grad_norm": 0.9447624683380127, |
| "learning_rate": 4.9935496927332095e-06, |
| "loss": 0.0402, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.5693430656934306, |
| "grad_norm": 0.6106760501861572, |
| "learning_rate": 4.993097382498511e-06, |
| "loss": 0.0319, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5737226277372263, |
| "grad_norm": 1.0554594993591309, |
| "learning_rate": 4.992629768429367e-06, |
| "loss": 0.0437, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.5781021897810219, |
| "grad_norm": 1.066218376159668, |
| "learning_rate": 4.992146853396219e-06, |
| "loss": 0.0382, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.5824817518248175, |
| "grad_norm": 0.7517623901367188, |
| "learning_rate": 4.991648640363434e-06, |
| "loss": 0.0317, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.5868613138686132, |
| "grad_norm": 0.8136976957321167, |
| "learning_rate": 4.991135132389282e-06, |
| "loss": 0.0339, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.5912408759124088, |
| "grad_norm": 0.9254240989685059, |
| "learning_rate": 4.990606332625923e-06, |
| "loss": 0.0413, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.5956204379562043, |
| "grad_norm": 0.6778447031974792, |
| "learning_rate": 4.990062244319387e-06, |
| "loss": 0.0377, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.1036059856414795, |
| "learning_rate": 4.989502870809547e-06, |
| "loss": 0.0376, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.6043795620437956, |
| "grad_norm": 0.8054158091545105, |
| "learning_rate": 4.988928215530111e-06, |
| "loss": 0.0367, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.6087591240875913, |
| "grad_norm": 0.9227175116539001, |
| "learning_rate": 4.988338282008588e-06, |
| "loss": 0.0374, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.6131386861313869, |
| "grad_norm": 0.8502228260040283, |
| "learning_rate": 4.9877330738662755e-06, |
| "loss": 0.0384, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6175182481751825, |
| "grad_norm": 0.684752881526947, |
| "learning_rate": 4.987112594818232e-06, |
| "loss": 0.0366, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.621897810218978, |
| "grad_norm": 0.7456391453742981, |
| "learning_rate": 4.9864768486732585e-06, |
| "loss": 0.037, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.6262773722627737, |
| "grad_norm": 0.6797431111335754, |
| "learning_rate": 4.985825839333872e-06, |
| "loss": 0.0325, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.6306569343065693, |
| "grad_norm": 0.8098205924034119, |
| "learning_rate": 4.985159570796279e-06, |
| "loss": 0.0343, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.635036496350365, |
| "grad_norm": 0.8089592456817627, |
| "learning_rate": 4.984478047150361e-06, |
| "loss": 0.026, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.6394160583941606, |
| "grad_norm": 0.9282512664794922, |
| "learning_rate": 4.983781272579637e-06, |
| "loss": 0.0334, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.6437956204379562, |
| "grad_norm": 0.802608072757721, |
| "learning_rate": 4.9830692513612445e-06, |
| "loss": 0.0259, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.6481751824817519, |
| "grad_norm": 1.3046361207962036, |
| "learning_rate": 4.982341987865914e-06, |
| "loss": 0.045, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.6525547445255474, |
| "grad_norm": 1.0812411308288574, |
| "learning_rate": 4.9815994865579405e-06, |
| "loss": 0.0329, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.656934306569343, |
| "grad_norm": 0.7856137156486511, |
| "learning_rate": 4.980841751995155e-06, |
| "loss": 0.0341, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6613138686131387, |
| "grad_norm": 1.0517083406448364, |
| "learning_rate": 4.980068788828897e-06, |
| "loss": 0.0299, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.6656934306569343, |
| "grad_norm": 0.6148231029510498, |
| "learning_rate": 4.979280601803988e-06, |
| "loss": 0.0304, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.67007299270073, |
| "grad_norm": 0.7572031021118164, |
| "learning_rate": 4.9784771957586995e-06, |
| "loss": 0.0309, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.6744525547445256, |
| "grad_norm": 2.0948777198791504, |
| "learning_rate": 4.977658575624727e-06, |
| "loss": 0.0307, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.6788321167883211, |
| "grad_norm": 0.624940037727356, |
| "learning_rate": 4.976824746427153e-06, |
| "loss": 0.03, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.6832116788321168, |
| "grad_norm": 0.8346346616744995, |
| "learning_rate": 4.975975713284426e-06, |
| "loss": 0.036, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.6875912408759124, |
| "grad_norm": 0.742098867893219, |
| "learning_rate": 4.975111481408319e-06, |
| "loss": 0.0325, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.691970802919708, |
| "grad_norm": 0.8000304102897644, |
| "learning_rate": 4.9742320561039055e-06, |
| "loss": 0.0332, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.6963503649635037, |
| "grad_norm": 1.063854694366455, |
| "learning_rate": 4.973337442769523e-06, |
| "loss": 0.0366, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.7007299270072993, |
| "grad_norm": 0.965560257434845, |
| "learning_rate": 4.972427646896738e-06, |
| "loss": 0.0331, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.7051094890510949, |
| "grad_norm": 1.5070244073867798, |
| "learning_rate": 4.971502674070317e-06, |
| "loss": 0.0446, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.7094890510948905, |
| "grad_norm": 0.8810545206069946, |
| "learning_rate": 4.970562529968189e-06, |
| "loss": 0.0299, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.7138686131386861, |
| "grad_norm": 0.7683446407318115, |
| "learning_rate": 4.969607220361414e-06, |
| "loss": 0.0244, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.7182481751824817, |
| "grad_norm": 0.7444891929626465, |
| "learning_rate": 4.968636751114141e-06, |
| "loss": 0.0338, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.7226277372262774, |
| "grad_norm": 0.7077688574790955, |
| "learning_rate": 4.96765112818358e-06, |
| "loss": 0.0285, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.727007299270073, |
| "grad_norm": 0.5648500919342041, |
| "learning_rate": 4.9666503576199574e-06, |
| "loss": 0.026, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.7313868613138687, |
| "grad_norm": 0.763556718826294, |
| "learning_rate": 4.965634445566489e-06, |
| "loss": 0.0299, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.7357664233576642, |
| "grad_norm": 0.6892725825309753, |
| "learning_rate": 4.9646033982593315e-06, |
| "loss": 0.023, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.7401459854014598, |
| "grad_norm": 1.0332573652267456, |
| "learning_rate": 4.963557222027551e-06, |
| "loss": 0.0313, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.7445255474452555, |
| "grad_norm": 1.214428424835205, |
| "learning_rate": 4.962495923293081e-06, |
| "loss": 0.027, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.7489051094890511, |
| "grad_norm": 0.9823130965232849, |
| "learning_rate": 4.961419508570686e-06, |
| "loss": 0.0231, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.7532846715328467, |
| "grad_norm": 1.2535115480422974, |
| "learning_rate": 4.960327984467919e-06, |
| "loss": 0.0326, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.7576642335766424, |
| "grad_norm": 0.9383441209793091, |
| "learning_rate": 4.959221357685081e-06, |
| "loss": 0.0286, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.762043795620438, |
| "grad_norm": 1.0426976680755615, |
| "learning_rate": 4.958099635015182e-06, |
| "loss": 0.0298, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.7664233576642335, |
| "grad_norm": 0.9159742593765259, |
| "learning_rate": 4.956962823343895e-06, |
| "loss": 0.025, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.7708029197080292, |
| "grad_norm": 0.8746912479400635, |
| "learning_rate": 4.95581092964952e-06, |
| "loss": 0.0299, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.7751824817518248, |
| "grad_norm": 0.9875199198722839, |
| "learning_rate": 4.954643961002936e-06, |
| "loss": 0.0309, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.7795620437956204, |
| "grad_norm": 0.7389516234397888, |
| "learning_rate": 4.953461924567559e-06, |
| "loss": 0.0291, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.7839416058394161, |
| "grad_norm": 0.790238082408905, |
| "learning_rate": 4.952264827599299e-06, |
| "loss": 0.0236, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.7883211678832117, |
| "grad_norm": 0.6766819953918457, |
| "learning_rate": 4.951052677446515e-06, |
| "loss": 0.0238, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.7927007299270074, |
| "grad_norm": 0.8832846283912659, |
| "learning_rate": 4.94982548154997e-06, |
| "loss": 0.0259, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.7970802919708029, |
| "grad_norm": 0.7298055291175842, |
| "learning_rate": 4.948583247442783e-06, |
| "loss": 0.023, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.8014598540145985, |
| "grad_norm": 0.911920428276062, |
| "learning_rate": 4.947325982750387e-06, |
| "loss": 0.0272, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.8058394160583942, |
| "grad_norm": 0.9145316481590271, |
| "learning_rate": 4.946053695190479e-06, |
| "loss": 0.0248, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.8102189781021898, |
| "grad_norm": 0.8759565353393555, |
| "learning_rate": 4.9447663925729735e-06, |
| "loss": 0.0263, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.8145985401459854, |
| "grad_norm": 1.1927592754364014, |
| "learning_rate": 4.943464082799956e-06, |
| "loss": 0.0305, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.8189781021897811, |
| "grad_norm": 0.752566933631897, |
| "learning_rate": 4.942146773865631e-06, |
| "loss": 0.0247, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.8233576642335766, |
| "grad_norm": 1.1121447086334229, |
| "learning_rate": 4.940814473856278e-06, |
| "loss": 0.0293, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.8277372262773722, |
| "grad_norm": 1.0319955348968506, |
| "learning_rate": 4.939467190950195e-06, |
| "loss": 0.0247, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.8321167883211679, |
| "grad_norm": 0.7960589528083801, |
| "learning_rate": 4.938104933417655e-06, |
| "loss": 0.0232, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.8364963503649635, |
| "grad_norm": 0.593197226524353, |
| "learning_rate": 4.936727709620853e-06, |
| "loss": 0.0232, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.8408759124087591, |
| "grad_norm": 0.6710584759712219, |
| "learning_rate": 4.9353355280138525e-06, |
| "loss": 0.0278, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.8452554744525548, |
| "grad_norm": 0.7627159357070923, |
| "learning_rate": 4.933928397142535e-06, |
| "loss": 0.0291, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.8496350364963504, |
| "grad_norm": 0.4998359680175781, |
| "learning_rate": 4.93250632564455e-06, |
| "loss": 0.018, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.8540145985401459, |
| "grad_norm": 0.8028760552406311, |
| "learning_rate": 4.931069322249258e-06, |
| "loss": 0.0193, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.8583941605839416, |
| "grad_norm": 0.6061640977859497, |
| "learning_rate": 4.929617395777678e-06, |
| "loss": 0.0142, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.8627737226277372, |
| "grad_norm": 0.5901748538017273, |
| "learning_rate": 4.928150555142436e-06, |
| "loss": 0.0177, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.8671532846715329, |
| "grad_norm": 0.7800254225730896, |
| "learning_rate": 4.926668809347707e-06, |
| "loss": 0.0264, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.8715328467153285, |
| "grad_norm": 0.9308339357376099, |
| "learning_rate": 4.925172167489162e-06, |
| "loss": 0.0247, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.8759124087591241, |
| "grad_norm": 0.9651213884353638, |
| "learning_rate": 4.923660638753911e-06, |
| "loss": 0.0216, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.8802919708029197, |
| "grad_norm": 1.1258251667022705, |
| "learning_rate": 4.9221342324204455e-06, |
| "loss": 0.0249, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.8846715328467153, |
| "grad_norm": 1.0175387859344482, |
| "learning_rate": 4.9205929578585845e-06, |
| "loss": 0.0201, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.8890510948905109, |
| "grad_norm": 1.5190610885620117, |
| "learning_rate": 4.9190368245294155e-06, |
| "loss": 0.0319, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.8934306569343066, |
| "grad_norm": 0.9947767853736877, |
| "learning_rate": 4.917465841985234e-06, |
| "loss": 0.0228, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.8978102189781022, |
| "grad_norm": 0.6416967511177063, |
| "learning_rate": 4.91588001986949e-06, |
| "loss": 0.0198, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.9021897810218978, |
| "grad_norm": 0.6980161666870117, |
| "learning_rate": 4.914279367916724e-06, |
| "loss": 0.0172, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.9065693430656935, |
| "grad_norm": 0.5301483869552612, |
| "learning_rate": 4.912663895952511e-06, |
| "loss": 0.0208, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.910948905109489, |
| "grad_norm": 0.6047857999801636, |
| "learning_rate": 4.911033613893397e-06, |
| "loss": 0.0227, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.9153284671532846, |
| "grad_norm": 0.6069537997245789, |
| "learning_rate": 4.909388531746837e-06, |
| "loss": 0.0195, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.9197080291970803, |
| "grad_norm": 0.6859843730926514, |
| "learning_rate": 4.907728659611143e-06, |
| "loss": 0.0244, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.9240875912408759, |
| "grad_norm": 0.6074005365371704, |
| "learning_rate": 4.906054007675408e-06, |
| "loss": 0.0195, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.9284671532846716, |
| "grad_norm": 1.1983692646026611, |
| "learning_rate": 4.9043645862194545e-06, |
| "loss": 0.023, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.9328467153284672, |
| "grad_norm": 0.8806214928627014, |
| "learning_rate": 4.902660405613767e-06, |
| "loss": 0.0243, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.9372262773722628, |
| "grad_norm": 0.6523962616920471, |
| "learning_rate": 4.900941476319426e-06, |
| "loss": 0.016, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.9416058394160584, |
| "grad_norm": 0.5673899054527283, |
| "learning_rate": 4.899207808888051e-06, |
| "loss": 0.0158, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.945985401459854, |
| "grad_norm": 0.9643133282661438, |
| "learning_rate": 4.897459413961729e-06, |
| "loss": 0.0194, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.9503649635036496, |
| "grad_norm": 0.6007612347602844, |
| "learning_rate": 4.8956963022729495e-06, |
| "loss": 0.0187, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.9547445255474453, |
| "grad_norm": 0.968173623085022, |
| "learning_rate": 4.893918484644545e-06, |
| "loss": 0.0223, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.9591240875912409, |
| "grad_norm": 0.6649457216262817, |
| "learning_rate": 4.892125971989616e-06, |
| "loss": 0.0205, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.9635036496350365, |
| "grad_norm": 0.48259082436561584, |
| "learning_rate": 4.890318775311471e-06, |
| "loss": 0.0121, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.9678832116788321, |
| "grad_norm": 0.8284991383552551, |
| "learning_rate": 4.888496905703554e-06, |
| "loss": 0.0176, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.9722627737226277, |
| "grad_norm": 0.5141683220863342, |
| "learning_rate": 4.8866603743493805e-06, |
| "loss": 0.0154, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.9766423357664233, |
| "grad_norm": 1.0223891735076904, |
| "learning_rate": 4.884809192522466e-06, |
| "loss": 0.0111, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.981021897810219, |
| "grad_norm": 0.844782292842865, |
| "learning_rate": 4.882943371586256e-06, |
| "loss": 0.016, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.9854014598540146, |
| "grad_norm": 0.6978311538696289, |
| "learning_rate": 4.881062922994061e-06, |
| "loss": 0.0129, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.9897810218978103, |
| "grad_norm": 0.8764100074768066, |
| "learning_rate": 4.879167858288982e-06, |
| "loss": 0.0213, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.9941605839416059, |
| "grad_norm": 1.0449023246765137, |
| "learning_rate": 4.877258189103839e-06, |
| "loss": 0.015, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.9985401459854014, |
| "grad_norm": 0.7534664869308472, |
| "learning_rate": 4.875333927161104e-06, |
| "loss": 0.0144, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.7534664869308472, |
| "learning_rate": 4.8733950842728236e-06, |
| "loss": 0.0186, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.0043795620437956, |
| "grad_norm": 1.4982736110687256, |
| "learning_rate": 4.871441672340551e-06, |
| "loss": 0.0126, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.0087591240875913, |
| "grad_norm": 1.206292986869812, |
| "learning_rate": 4.869473703355273e-06, |
| "loss": 0.0165, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.013138686131387, |
| "grad_norm": 0.4586186408996582, |
| "learning_rate": 4.867491189397331e-06, |
| "loss": 0.0089, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.0175182481751825, |
| "grad_norm": 0.5647240281105042, |
| "learning_rate": 4.8654941426363525e-06, |
| "loss": 0.0122, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.0218978102189782, |
| "grad_norm": 0.6478530764579773, |
| "learning_rate": 4.863482575331173e-06, |
| "loss": 0.012, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.0262773722627738, |
| "grad_norm": 0.48696213960647583, |
| "learning_rate": 4.861456499829764e-06, |
| "loss": 0.0092, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.0306569343065692, |
| "grad_norm": 0.6736640334129333, |
| "learning_rate": 4.859415928569154e-06, |
| "loss": 0.0149, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.0350364963503649, |
| "grad_norm": 0.6518754363059998, |
| "learning_rate": 4.857360874075355e-06, |
| "loss": 0.0085, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.0394160583941605, |
| "grad_norm": 0.5145443677902222, |
| "learning_rate": 4.855291348963281e-06, |
| "loss": 0.0102, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.0437956204379562, |
| "grad_norm": 0.5647151470184326, |
| "learning_rate": 4.853207365936676e-06, |
| "loss": 0.0065, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.0481751824817518, |
| "grad_norm": 0.46668219566345215, |
| "learning_rate": 4.8511089377880334e-06, |
| "loss": 0.0081, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.0525547445255474, |
| "grad_norm": 0.9103809595108032, |
| "learning_rate": 4.848996077398518e-06, |
| "loss": 0.0107, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.056934306569343, |
| "grad_norm": 0.5947101712226868, |
| "learning_rate": 4.8468687977378855e-06, |
| "loss": 0.0095, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.0613138686131387, |
| "grad_norm": 0.7154219150543213, |
| "learning_rate": 4.844727111864405e-06, |
| "loss": 0.0097, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.0656934306569343, |
| "grad_norm": 0.9023681282997131, |
| "learning_rate": 4.842571032924778e-06, |
| "loss": 0.0105, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.07007299270073, |
| "grad_norm": 0.6020027995109558, |
| "learning_rate": 4.840400574154056e-06, |
| "loss": 0.0065, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.0744525547445256, |
| "grad_norm": 0.7602945566177368, |
| "learning_rate": 4.838215748875562e-06, |
| "loss": 0.0121, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.0788321167883212, |
| "grad_norm": 0.8768120408058167, |
| "learning_rate": 4.83601657050081e-06, |
| "loss": 0.0146, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.0832116788321169, |
| "grad_norm": 0.7482877373695374, |
| "learning_rate": 4.833803052529414e-06, |
| "loss": 0.0076, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.0875912408759123, |
| "grad_norm": 0.4619101881980896, |
| "learning_rate": 4.831575208549018e-06, |
| "loss": 0.0114, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.091970802919708, |
| "grad_norm": 0.7442188262939453, |
| "learning_rate": 4.829333052235202e-06, |
| "loss": 0.0119, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.0963503649635036, |
| "grad_norm": 0.754559338092804, |
| "learning_rate": 4.827076597351403e-06, |
| "loss": 0.011, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.1007299270072992, |
| "grad_norm": 0.8147054314613342, |
| "learning_rate": 4.824805857748831e-06, |
| "loss": 0.0098, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.1051094890510949, |
| "grad_norm": 0.814437985420227, |
| "learning_rate": 4.82252084736638e-06, |
| "loss": 0.0077, |
| "step": 253 |
| }, |
| { |
| "epoch": 1.1094890510948905, |
| "grad_norm": 0.7731255888938904, |
| "learning_rate": 4.820221580230545e-06, |
| "loss": 0.0129, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.1138686131386861, |
| "grad_norm": 0.7589200139045715, |
| "learning_rate": 4.8179080704553386e-06, |
| "loss": 0.0095, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.1182481751824818, |
| "grad_norm": 0.455625057220459, |
| "learning_rate": 4.815580332242199e-06, |
| "loss": 0.0088, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.1226277372262774, |
| "grad_norm": 0.51591956615448, |
| "learning_rate": 4.8132383798799075e-06, |
| "loss": 0.0071, |
| "step": 257 |
| }, |
| { |
| "epoch": 1.127007299270073, |
| "grad_norm": 0.6024675965309143, |
| "learning_rate": 4.810882227744495e-06, |
| "loss": 0.0108, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.1313868613138687, |
| "grad_norm": 0.6686123609542847, |
| "learning_rate": 4.808511890299163e-06, |
| "loss": 0.0139, |
| "step": 259 |
| }, |
| { |
| "epoch": 1.1357664233576643, |
| "grad_norm": 0.7872790694236755, |
| "learning_rate": 4.806127382094184e-06, |
| "loss": 0.0113, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.14014598540146, |
| "grad_norm": 0.6551967263221741, |
| "learning_rate": 4.803728717766822e-06, |
| "loss": 0.0069, |
| "step": 261 |
| }, |
| { |
| "epoch": 1.1445255474452556, |
| "grad_norm": 0.7421084642410278, |
| "learning_rate": 4.801315912041232e-06, |
| "loss": 0.0083, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.148905109489051, |
| "grad_norm": 0.6349561810493469, |
| "learning_rate": 4.798888979728382e-06, |
| "loss": 0.0097, |
| "step": 263 |
| }, |
| { |
| "epoch": 1.1532846715328466, |
| "grad_norm": 0.6274579167366028, |
| "learning_rate": 4.796447935725954e-06, |
| "loss": 0.0089, |
| "step": 264 |
| }, |
| { |
| "epoch": 1.1576642335766423, |
| "grad_norm": 0.5055127739906311, |
| "learning_rate": 4.793992795018253e-06, |
| "loss": 0.0062, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.162043795620438, |
| "grad_norm": 1.1284935474395752, |
| "learning_rate": 4.791523572676115e-06, |
| "loss": 0.0118, |
| "step": 266 |
| }, |
| { |
| "epoch": 1.1664233576642336, |
| "grad_norm": 0.6343486905097961, |
| "learning_rate": 4.789040283856822e-06, |
| "loss": 0.0058, |
| "step": 267 |
| }, |
| { |
| "epoch": 1.1708029197080292, |
| "grad_norm": 0.9384168982505798, |
| "learning_rate": 4.7865429438039955e-06, |
| "loss": 0.0096, |
| "step": 268 |
| }, |
| { |
| "epoch": 1.1751824817518248, |
| "grad_norm": 0.879307746887207, |
| "learning_rate": 4.784031567847515e-06, |
| "loss": 0.0129, |
| "step": 269 |
| }, |
| { |
| "epoch": 1.1795620437956205, |
| "grad_norm": 0.5268783569335938, |
| "learning_rate": 4.781506171403416e-06, |
| "loss": 0.0073, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.183941605839416, |
| "grad_norm": 1.332766056060791, |
| "learning_rate": 4.778966769973802e-06, |
| "loss": 0.0129, |
| "step": 271 |
| }, |
| { |
| "epoch": 1.1883211678832117, |
| "grad_norm": 0.7192438244819641, |
| "learning_rate": 4.7764133791467434e-06, |
| "loss": 0.0083, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.1927007299270074, |
| "grad_norm": 0.5047981142997742, |
| "learning_rate": 4.773846014596185e-06, |
| "loss": 0.0057, |
| "step": 273 |
| }, |
| { |
| "epoch": 1.197080291970803, |
| "grad_norm": 0.5075733661651611, |
| "learning_rate": 4.7712646920818486e-06, |
| "loss": 0.0098, |
| "step": 274 |
| }, |
| { |
| "epoch": 1.2014598540145984, |
| "grad_norm": 0.5874909162521362, |
| "learning_rate": 4.7686694274491375e-06, |
| "loss": 0.0072, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.205839416058394, |
| "grad_norm": 0.511114239692688, |
| "learning_rate": 4.766060236629037e-06, |
| "loss": 0.0058, |
| "step": 276 |
| }, |
| { |
| "epoch": 1.2102189781021897, |
| "grad_norm": 0.5427272915840149, |
| "learning_rate": 4.763437135638021e-06, |
| "loss": 0.0094, |
| "step": 277 |
| }, |
| { |
| "epoch": 1.2145985401459853, |
| "grad_norm": 0.6207345724105835, |
| "learning_rate": 4.760800140577947e-06, |
| "loss": 0.0117, |
| "step": 278 |
| }, |
| { |
| "epoch": 1.218978102189781, |
| "grad_norm": 0.9132710695266724, |
| "learning_rate": 4.758149267635963e-06, |
| "loss": 0.0085, |
| "step": 279 |
| }, |
| { |
| "epoch": 1.2233576642335766, |
| "grad_norm": 0.500217080116272, |
| "learning_rate": 4.755484533084407e-06, |
| "loss": 0.01, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.2277372262773723, |
| "grad_norm": 0.38535866141319275, |
| "learning_rate": 4.7528059532807045e-06, |
| "loss": 0.0038, |
| "step": 281 |
| }, |
| { |
| "epoch": 1.2321167883211679, |
| "grad_norm": 0.5505772233009338, |
| "learning_rate": 4.750113544667271e-06, |
| "loss": 0.0064, |
| "step": 282 |
| }, |
| { |
| "epoch": 1.2364963503649635, |
| "grad_norm": 0.5370091795921326, |
| "learning_rate": 4.747407323771408e-06, |
| "loss": 0.0083, |
| "step": 283 |
| }, |
| { |
| "epoch": 1.2408759124087592, |
| "grad_norm": 0.6680497527122498, |
| "learning_rate": 4.744687307205207e-06, |
| "loss": 0.006, |
| "step": 284 |
| }, |
| { |
| "epoch": 1.2452554744525548, |
| "grad_norm": 0.5799117088317871, |
| "learning_rate": 4.74195351166544e-06, |
| "loss": 0.0067, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.2496350364963504, |
| "grad_norm": 0.3809143304824829, |
| "learning_rate": 4.739205953933464e-06, |
| "loss": 0.0081, |
| "step": 286 |
| }, |
| { |
| "epoch": 1.254014598540146, |
| "grad_norm": 0.8633838891983032, |
| "learning_rate": 4.736444650875114e-06, |
| "loss": 0.0083, |
| "step": 287 |
| }, |
| { |
| "epoch": 1.2583941605839417, |
| "grad_norm": 0.4796256124973297, |
| "learning_rate": 4.7336696194405995e-06, |
| "loss": 0.0083, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.2627737226277373, |
| "grad_norm": 0.8990418314933777, |
| "learning_rate": 4.730880876664402e-06, |
| "loss": 0.0053, |
| "step": 289 |
| }, |
| { |
| "epoch": 1.2671532846715328, |
| "grad_norm": 0.21372799575328827, |
| "learning_rate": 4.72807843966517e-06, |
| "loss": 0.0042, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.2715328467153284, |
| "grad_norm": 1.0377510786056519, |
| "learning_rate": 4.725262325645615e-06, |
| "loss": 0.0083, |
| "step": 291 |
| }, |
| { |
| "epoch": 1.275912408759124, |
| "grad_norm": 0.29527121782302856, |
| "learning_rate": 4.722432551892402e-06, |
| "loss": 0.0023, |
| "step": 292 |
| }, |
| { |
| "epoch": 1.2802919708029197, |
| "grad_norm": 1.5753306150436401, |
| "learning_rate": 4.719589135776048e-06, |
| "loss": 0.0089, |
| "step": 293 |
| }, |
| { |
| "epoch": 1.2846715328467153, |
| "grad_norm": 0.3794252574443817, |
| "learning_rate": 4.716732094750813e-06, |
| "loss": 0.003, |
| "step": 294 |
| }, |
| { |
| "epoch": 1.289051094890511, |
| "grad_norm": 0.5407822132110596, |
| "learning_rate": 4.7138614463545926e-06, |
| "loss": 0.0075, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.2934306569343066, |
| "grad_norm": 0.8722830414772034, |
| "learning_rate": 4.710977208208812e-06, |
| "loss": 0.0051, |
| "step": 296 |
| }, |
| { |
| "epoch": 1.2978102189781022, |
| "grad_norm": 0.6819527745246887, |
| "learning_rate": 4.708079398018316e-06, |
| "loss": 0.0094, |
| "step": 297 |
| }, |
| { |
| "epoch": 1.3021897810218979, |
| "grad_norm": 0.7198041677474976, |
| "learning_rate": 4.7051680335712626e-06, |
| "loss": 0.0068, |
| "step": 298 |
| }, |
| { |
| "epoch": 1.3065693430656935, |
| "grad_norm": 0.467638224363327, |
| "learning_rate": 4.70224313273901e-06, |
| "loss": 0.0059, |
| "step": 299 |
| }, |
| { |
| "epoch": 1.310948905109489, |
| "grad_norm": 0.4593437612056732, |
| "learning_rate": 4.699304713476009e-06, |
| "loss": 0.0039, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.3153284671532846, |
| "grad_norm": 0.5790926814079285, |
| "learning_rate": 4.696352793819698e-06, |
| "loss": 0.0057, |
| "step": 301 |
| }, |
| { |
| "epoch": 1.3197080291970802, |
| "grad_norm": 0.3413192331790924, |
| "learning_rate": 4.693387391890382e-06, |
| "loss": 0.0055, |
| "step": 302 |
| }, |
| { |
| "epoch": 1.3240875912408758, |
| "grad_norm": 0.5049291849136353, |
| "learning_rate": 4.690408525891129e-06, |
| "loss": 0.0061, |
| "step": 303 |
| }, |
| { |
| "epoch": 1.3284671532846715, |
| "grad_norm": 0.25111323595046997, |
| "learning_rate": 4.687416214107655e-06, |
| "loss": 0.0041, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.332846715328467, |
| "grad_norm": 0.5559152364730835, |
| "learning_rate": 4.684410474908214e-06, |
| "loss": 0.0093, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.3372262773722627, |
| "grad_norm": 0.3842668831348419, |
| "learning_rate": 4.681391326743484e-06, |
| "loss": 0.0051, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.3416058394160584, |
| "grad_norm": 1.6264209747314453, |
| "learning_rate": 4.67835878814645e-06, |
| "loss": 0.0063, |
| "step": 307 |
| }, |
| { |
| "epoch": 1.345985401459854, |
| "grad_norm": 0.5829497575759888, |
| "learning_rate": 4.6753128777323e-06, |
| "loss": 0.0054, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.3503649635036497, |
| "grad_norm": 0.6949307322502136, |
| "learning_rate": 4.6722536141982995e-06, |
| "loss": 0.0055, |
| "step": 309 |
| }, |
| { |
| "epoch": 1.3547445255474453, |
| "grad_norm": 0.6198911070823669, |
| "learning_rate": 4.669181016323686e-06, |
| "loss": 0.0063, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.359124087591241, |
| "grad_norm": 0.4557003080844879, |
| "learning_rate": 4.666095102969545e-06, |
| "loss": 0.0053, |
| "step": 311 |
| }, |
| { |
| "epoch": 1.3635036496350366, |
| "grad_norm": 0.7198585271835327, |
| "learning_rate": 4.662995893078702e-06, |
| "loss": 0.0048, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.3678832116788322, |
| "grad_norm": 0.4380558431148529, |
| "learning_rate": 4.659883405675604e-06, |
| "loss": 0.0057, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.3722627737226278, |
| "grad_norm": 0.986754298210144, |
| "learning_rate": 4.656757659866199e-06, |
| "loss": 0.0091, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.3766423357664235, |
| "grad_norm": 1.1282256841659546, |
| "learning_rate": 4.6536186748378236e-06, |
| "loss": 0.0058, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.3810218978102191, |
| "grad_norm": 0.3973119854927063, |
| "learning_rate": 4.6504664698590795e-06, |
| "loss": 0.0048, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.3854014598540145, |
| "grad_norm": 0.4406156837940216, |
| "learning_rate": 4.647301064279725e-06, |
| "loss": 0.0039, |
| "step": 317 |
| }, |
| { |
| "epoch": 1.3897810218978102, |
| "grad_norm": 0.8249232172966003, |
| "learning_rate": 4.644122477530545e-06, |
| "loss": 0.0084, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.3941605839416058, |
| "grad_norm": 1.4877322912216187, |
| "learning_rate": 4.640930729123237e-06, |
| "loss": 0.0054, |
| "step": 319 |
| }, |
| { |
| "epoch": 1.3985401459854014, |
| "grad_norm": 0.4890510141849518, |
| "learning_rate": 4.6377258386502956e-06, |
| "loss": 0.0021, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.402919708029197, |
| "grad_norm": 0.36471042037010193, |
| "learning_rate": 4.634507825784882e-06, |
| "loss": 0.004, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.4072992700729927, |
| "grad_norm": 1.1714568138122559, |
| "learning_rate": 4.631276710280713e-06, |
| "loss": 0.0079, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.4116788321167884, |
| "grad_norm": 0.509325385093689, |
| "learning_rate": 4.628032511971934e-06, |
| "loss": 0.0027, |
| "step": 323 |
| }, |
| { |
| "epoch": 1.416058394160584, |
| "grad_norm": 0.34730231761932373, |
| "learning_rate": 4.624775250772999e-06, |
| "loss": 0.004, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.4204379562043796, |
| "grad_norm": 0.4304009974002838, |
| "learning_rate": 4.6215049466785484e-06, |
| "loss": 0.0046, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.4248175182481753, |
| "grad_norm": 0.721092700958252, |
| "learning_rate": 4.618221619763287e-06, |
| "loss": 0.0042, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.4291970802919707, |
| "grad_norm": 0.9019221067428589, |
| "learning_rate": 4.6149252901818585e-06, |
| "loss": 0.008, |
| "step": 327 |
| }, |
| { |
| "epoch": 1.4335766423357663, |
| "grad_norm": 3.142669439315796, |
| "learning_rate": 4.611615978168725e-06, |
| "loss": 0.0053, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.437956204379562, |
| "grad_norm": 0.8218545317649841, |
| "learning_rate": 4.608293704038039e-06, |
| "loss": 0.007, |
| "step": 329 |
| }, |
| { |
| "epoch": 1.4423357664233576, |
| "grad_norm": 0.49122154712677, |
| "learning_rate": 4.604958488183523e-06, |
| "loss": 0.0056, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.4467153284671532, |
| "grad_norm": 0.7947913408279419, |
| "learning_rate": 4.6016103510783405e-06, |
| "loss": 0.0069, |
| "step": 331 |
| }, |
| { |
| "epoch": 1.4510948905109489, |
| "grad_norm": 0.38262632489204407, |
| "learning_rate": 4.598249313274972e-06, |
| "loss": 0.0054, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.4554744525547445, |
| "grad_norm": 0.7605669498443604, |
| "learning_rate": 4.59487539540509e-06, |
| "loss": 0.0074, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.4598540145985401, |
| "grad_norm": 0.4355056583881378, |
| "learning_rate": 4.591488618179428e-06, |
| "loss": 0.0027, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.4642335766423358, |
| "grad_norm": 0.4696539640426636, |
| "learning_rate": 4.58808900238766e-06, |
| "loss": 0.0063, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.4686131386861314, |
| "grad_norm": 0.4078298807144165, |
| "learning_rate": 4.584676568898267e-06, |
| "loss": 0.0039, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.472992700729927, |
| "grad_norm": 0.22500784695148468, |
| "learning_rate": 4.581251338658412e-06, |
| "loss": 0.0027, |
| "step": 337 |
| }, |
| { |
| "epoch": 1.4773722627737227, |
| "grad_norm": 0.28224533796310425, |
| "learning_rate": 4.577813332693812e-06, |
| "loss": 0.0037, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.4817518248175183, |
| "grad_norm": 0.4234824478626251, |
| "learning_rate": 4.574362572108604e-06, |
| "loss": 0.0057, |
| "step": 339 |
| }, |
| { |
| "epoch": 1.486131386861314, |
| "grad_norm": 0.4610466957092285, |
| "learning_rate": 4.570899078085223e-06, |
| "loss": 0.0033, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.4905109489051096, |
| "grad_norm": 0.8538670539855957, |
| "learning_rate": 4.567422871884265e-06, |
| "loss": 0.0044, |
| "step": 341 |
| }, |
| { |
| "epoch": 1.4948905109489052, |
| "grad_norm": 0.4335832893848419, |
| "learning_rate": 4.563933974844361e-06, |
| "loss": 0.0041, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.4992700729927007, |
| "grad_norm": 0.4888335168361664, |
| "learning_rate": 4.560432408382045e-06, |
| "loss": 0.003, |
| "step": 343 |
| }, |
| { |
| "epoch": 1.5036496350364965, |
| "grad_norm": 0.545806884765625, |
| "learning_rate": 4.5569181939916195e-06, |
| "loss": 0.0062, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.508029197080292, |
| "grad_norm": 0.7364339828491211, |
| "learning_rate": 4.553391353245029e-06, |
| "loss": 0.0068, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.5124087591240876, |
| "grad_norm": 0.7074061036109924, |
| "learning_rate": 4.549851907791722e-06, |
| "loss": 0.0034, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.5167883211678832, |
| "grad_norm": 0.39756596088409424, |
| "learning_rate": 4.546299879358524e-06, |
| "loss": 0.0032, |
| "step": 347 |
| }, |
| { |
| "epoch": 1.5211678832116788, |
| "grad_norm": 0.6966583728790283, |
| "learning_rate": 4.542735289749498e-06, |
| "loss": 0.0013, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.5255474452554745, |
| "grad_norm": 0.19892163574695587, |
| "learning_rate": 4.5391581608458144e-06, |
| "loss": 0.0011, |
| "step": 349 |
| }, |
| { |
| "epoch": 1.5299270072992701, |
| "grad_norm": 0.718493640422821, |
| "learning_rate": 4.535568514605617e-06, |
| "loss": 0.0026, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.5343065693430655, |
| "grad_norm": 0.8941331505775452, |
| "learning_rate": 4.5319663730638865e-06, |
| "loss": 0.0034, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.5386861313868612, |
| "grad_norm": 0.33956244587898254, |
| "learning_rate": 4.528351758332303e-06, |
| "loss": 0.002, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.5430656934306568, |
| "grad_norm": 0.557651937007904, |
| "learning_rate": 4.5247246925991185e-06, |
| "loss": 0.0013, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.5474452554744524, |
| "grad_norm": 0.7165636420249939, |
| "learning_rate": 4.5210851981290096e-06, |
| "loss": 0.003, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.551824817518248, |
| "grad_norm": 0.36456218361854553, |
| "learning_rate": 4.5174332972629505e-06, |
| "loss": 0.0022, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.5562043795620437, |
| "grad_norm": 0.1896594613790512, |
| "learning_rate": 4.5137690124180714e-06, |
| "loss": 0.0056, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.5605839416058394, |
| "grad_norm": 0.6159863471984863, |
| "learning_rate": 4.510092366087518e-06, |
| "loss": 0.0057, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.564963503649635, |
| "grad_norm": 1.0295354127883911, |
| "learning_rate": 4.506403380840321e-06, |
| "loss": 0.0011, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.5693430656934306, |
| "grad_norm": 0.33694684505462646, |
| "learning_rate": 4.50270207932125e-06, |
| "loss": 0.0024, |
| "step": 359 |
| }, |
| { |
| "epoch": 1.5737226277372263, |
| "grad_norm": 0.8961917757987976, |
| "learning_rate": 4.498988484250681e-06, |
| "loss": 0.0058, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.578102189781022, |
| "grad_norm": 1.736559510231018, |
| "learning_rate": 4.4952626184244504e-06, |
| "loss": 0.006, |
| "step": 361 |
| }, |
| { |
| "epoch": 1.5824817518248175, |
| "grad_norm": 0.41748425364494324, |
| "learning_rate": 4.491524504713722e-06, |
| "loss": 0.0017, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.5868613138686132, |
| "grad_norm": 0.501815140247345, |
| "learning_rate": 4.487774166064839e-06, |
| "loss": 0.0018, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.5912408759124088, |
| "grad_norm": 0.4359874427318573, |
| "learning_rate": 4.48401162549919e-06, |
| "loss": 0.0044, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.5956204379562045, |
| "grad_norm": 0.3699054718017578, |
| "learning_rate": 4.480236906113066e-06, |
| "loss": 0.0036, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.5684164762496948, |
| "learning_rate": 4.476450031077512e-06, |
| "loss": 0.0023, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.6043795620437957, |
| "grad_norm": 0.6451728343963623, |
| "learning_rate": 4.4726510236381956e-06, |
| "loss": 0.0044, |
| "step": 367 |
| }, |
| { |
| "epoch": 1.6087591240875914, |
| "grad_norm": 2.3887782096862793, |
| "learning_rate": 4.468839907115259e-06, |
| "loss": 0.0059, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.613138686131387, |
| "grad_norm": 0.6304333806037903, |
| "learning_rate": 4.465016704903171e-06, |
| "loss": 0.0023, |
| "step": 369 |
| }, |
| { |
| "epoch": 1.6175182481751826, |
| "grad_norm": 0.38788676261901855, |
| "learning_rate": 4.461181440470592e-06, |
| "loss": 0.0027, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.621897810218978, |
| "grad_norm": 0.3805489242076874, |
| "learning_rate": 4.457334137360226e-06, |
| "loss": 0.0012, |
| "step": 371 |
| }, |
| { |
| "epoch": 1.6262773722627737, |
| "grad_norm": 0.3548617660999298, |
| "learning_rate": 4.453474819188676e-06, |
| "loss": 0.0032, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.6306569343065693, |
| "grad_norm": 0.8332701921463013, |
| "learning_rate": 4.449603509646297e-06, |
| "loss": 0.0028, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.635036496350365, |
| "grad_norm": 0.7843290567398071, |
| "learning_rate": 4.445720232497055e-06, |
| "loss": 0.0037, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.6394160583941606, |
| "grad_norm": 0.7074784636497498, |
| "learning_rate": 4.44182501157838e-06, |
| "loss": 0.003, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.6437956204379562, |
| "grad_norm": 0.6076835989952087, |
| "learning_rate": 4.4379178708010155e-06, |
| "loss": 0.0019, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.6481751824817519, |
| "grad_norm": 0.5793138146400452, |
| "learning_rate": 4.433998834148877e-06, |
| "loss": 0.0036, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.6525547445255473, |
| "grad_norm": 0.7881670594215393, |
| "learning_rate": 4.430067925678902e-06, |
| "loss": 0.0025, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.656934306569343, |
| "grad_norm": 0.24594959616661072, |
| "learning_rate": 4.426125169520903e-06, |
| "loss": 0.0022, |
| "step": 379 |
| }, |
| { |
| "epoch": 1.6613138686131386, |
| "grad_norm": 0.2806392312049866, |
| "learning_rate": 4.42217058987742e-06, |
| "loss": 0.0005, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.6656934306569342, |
| "grad_norm": 0.4979081153869629, |
| "learning_rate": 4.418204211023569e-06, |
| "loss": 0.0021, |
| "step": 381 |
| }, |
| { |
| "epoch": 1.6700729927007298, |
| "grad_norm": 0.42502567172050476, |
| "learning_rate": 4.4142260573068995e-06, |
| "loss": 0.0053, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.6744525547445255, |
| "grad_norm": 1.1811860799789429, |
| "learning_rate": 4.410236153147235e-06, |
| "loss": 0.0026, |
| "step": 383 |
| }, |
| { |
| "epoch": 1.6788321167883211, |
| "grad_norm": 0.4582519829273224, |
| "learning_rate": 4.4062345230365345e-06, |
| "loss": 0.0024, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.6832116788321168, |
| "grad_norm": 0.30464282631874084, |
| "learning_rate": 4.402221191538733e-06, |
| "loss": 0.0055, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.6875912408759124, |
| "grad_norm": 0.22526738047599792, |
| "learning_rate": 4.3981961832895945e-06, |
| "loss": 0.0003, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.691970802919708, |
| "grad_norm": 0.32826468348503113, |
| "learning_rate": 4.394159522996564e-06, |
| "loss": 0.0009, |
| "step": 387 |
| }, |
| { |
| "epoch": 1.6963503649635037, |
| "grad_norm": 0.5943058133125305, |
| "learning_rate": 4.390111235438606e-06, |
| "loss": 0.0028, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.7007299270072993, |
| "grad_norm": 1.7098802328109741, |
| "learning_rate": 4.3860513454660666e-06, |
| "loss": 0.0035, |
| "step": 389 |
| }, |
| { |
| "epoch": 1.705109489051095, |
| "grad_norm": 0.36092230677604675, |
| "learning_rate": 4.381979878000506e-06, |
| "loss": 0.0037, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.7094890510948906, |
| "grad_norm": 0.2771202027797699, |
| "learning_rate": 4.377896858034557e-06, |
| "loss": 0.0018, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.7138686131386862, |
| "grad_norm": 0.12323533743619919, |
| "learning_rate": 4.373802310631765e-06, |
| "loss": 0.0008, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.7182481751824819, |
| "grad_norm": 0.19630667567253113, |
| "learning_rate": 4.3696962609264375e-06, |
| "loss": 0.0008, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.7226277372262775, |
| "grad_norm": 0.4139691889286041, |
| "learning_rate": 4.365578734123489e-06, |
| "loss": 0.0031, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.7270072992700731, |
| "grad_norm": 0.6594070196151733, |
| "learning_rate": 4.3614497554982845e-06, |
| "loss": 0.0044, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.7313868613138688, |
| "grad_norm": 0.2723977863788605, |
| "learning_rate": 4.357309350396488e-06, |
| "loss": 0.0018, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.7357664233576642, |
| "grad_norm": 0.16032417118549347, |
| "learning_rate": 4.3531575442339025e-06, |
| "loss": 0.0005, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.7401459854014598, |
| "grad_norm": 0.3799298107624054, |
| "learning_rate": 4.348994362496316e-06, |
| "loss": 0.006, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.7445255474452555, |
| "grad_norm": 0.28333285450935364, |
| "learning_rate": 4.344819830739349e-06, |
| "loss": 0.0015, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.748905109489051, |
| "grad_norm": 0.3942627012729645, |
| "learning_rate": 4.34063397458829e-06, |
| "loss": 0.0018, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.7532846715328467, |
| "grad_norm": 0.8048702478408813, |
| "learning_rate": 4.336436819737942e-06, |
| "loss": 0.0021, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.7576642335766424, |
| "grad_norm": 0.1157551184296608, |
| "learning_rate": 4.332228391952469e-06, |
| "loss": 0.0009, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.762043795620438, |
| "grad_norm": 0.18697626888751984, |
| "learning_rate": 4.328008717065228e-06, |
| "loss": 0.0031, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.7664233576642334, |
| "grad_norm": 0.6587929129600525, |
| "learning_rate": 4.323777820978622e-06, |
| "loss": 0.0011, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.770802919708029, |
| "grad_norm": 0.40322232246398926, |
| "learning_rate": 4.319535729663929e-06, |
| "loss": 0.0013, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.7751824817518247, |
| "grad_norm": 0.33533793687820435, |
| "learning_rate": 4.315282469161156e-06, |
| "loss": 0.0008, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.7795620437956203, |
| "grad_norm": 0.2024499624967575, |
| "learning_rate": 4.3110180655788645e-06, |
| "loss": 0.0022, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.783941605839416, |
| "grad_norm": 0.5895872116088867, |
| "learning_rate": 4.306742545094022e-06, |
| "loss": 0.0019, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.7883211678832116, |
| "grad_norm": 0.3792962431907654, |
| "learning_rate": 4.3024559339518355e-06, |
| "loss": 0.0017, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.7927007299270072, |
| "grad_norm": 0.7945428490638733, |
| "learning_rate": 4.298158258465593e-06, |
| "loss": 0.0027, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.7970802919708029, |
| "grad_norm": 0.37964075803756714, |
| "learning_rate": 4.2938495450164984e-06, |
| "loss": 0.0014, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.8014598540145985, |
| "grad_norm": 0.08326616883277893, |
| "learning_rate": 4.289529820053515e-06, |
| "loss": 0.0005, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.8058394160583942, |
| "grad_norm": 0.14445550739765167, |
| "learning_rate": 4.285199110093198e-06, |
| "loss": 0.0021, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.8102189781021898, |
| "grad_norm": 0.24620558321475983, |
| "learning_rate": 4.280857441719533e-06, |
| "loss": 0.0007, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.8145985401459854, |
| "grad_norm": 0.2617506980895996, |
| "learning_rate": 4.276504841583778e-06, |
| "loss": 0.0011, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.818978102189781, |
| "grad_norm": 0.22467154264450073, |
| "learning_rate": 4.27214133640429e-06, |
| "loss": 0.0006, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.8233576642335767, |
| "grad_norm": 0.25831958651542664, |
| "learning_rate": 4.267766952966369e-06, |
| "loss": 0.0029, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.8277372262773723, |
| "grad_norm": 0.30368125438690186, |
| "learning_rate": 4.263381718122092e-06, |
| "loss": 0.0016, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.832116788321168, |
| "grad_norm": 0.6697282195091248, |
| "learning_rate": 4.258985658790144e-06, |
| "loss": 0.0044, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.8364963503649636, |
| "grad_norm": 0.4912242293357849, |
| "learning_rate": 4.25457880195566e-06, |
| "loss": 0.0014, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.8408759124087593, |
| "grad_norm": 0.17477519810199738, |
| "learning_rate": 4.2501611746700526e-06, |
| "loss": 0.0002, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.845255474452555, |
| "grad_norm": 0.09962823987007141, |
| "learning_rate": 4.245732804050848e-06, |
| "loss": 0.0009, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.8496350364963505, |
| "grad_norm": 0.5256549119949341, |
| "learning_rate": 4.241293717281523e-06, |
| "loss": 0.0005, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.854014598540146, |
| "grad_norm": 0.1596180498600006, |
| "learning_rate": 4.236843941611332e-06, |
| "loss": 0.001, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.8583941605839416, |
| "grad_norm": 0.3437536656856537, |
| "learning_rate": 4.232383504355147e-06, |
| "loss": 0.002, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.8627737226277372, |
| "grad_norm": 0.32742857933044434, |
| "learning_rate": 4.227912432893282e-06, |
| "loss": 0.0018, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.8671532846715329, |
| "grad_norm": 0.5527262091636658, |
| "learning_rate": 4.223430754671331e-06, |
| "loss": 0.0004, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.8715328467153285, |
| "grad_norm": 0.11191878467798233, |
| "learning_rate": 4.218938497199996e-06, |
| "loss": 0.0003, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.8759124087591241, |
| "grad_norm": 0.09846347570419312, |
| "learning_rate": 4.214435688054922e-06, |
| "loss": 0.0004, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.8802919708029195, |
| "grad_norm": 0.16991831362247467, |
| "learning_rate": 4.209922354876523e-06, |
| "loss": 0.0008, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.8846715328467152, |
| "grad_norm": 0.126469686627388, |
| "learning_rate": 4.2053985253698155e-06, |
| "loss": 0.0004, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.8890510948905108, |
| "grad_norm": 0.3232942521572113, |
| "learning_rate": 4.200864227304247e-06, |
| "loss": 0.0022, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.8934306569343065, |
| "grad_norm": 0.3737439811229706, |
| "learning_rate": 4.196319488513528e-06, |
| "loss": 0.0017, |
| "step": 433 |
| }, |
| { |
| "epoch": 1.897810218978102, |
| "grad_norm": 0.14488628506660461, |
| "learning_rate": 4.191764336895455e-06, |
| "loss": 0.0002, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.9021897810218977, |
| "grad_norm": 0.16040323674678802, |
| "learning_rate": 4.187198800411748e-06, |
| "loss": 0.0005, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.9065693430656934, |
| "grad_norm": 0.19812235236167908, |
| "learning_rate": 4.182622907087872e-06, |
| "loss": 0.0002, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.910948905109489, |
| "grad_norm": 0.059883181005716324, |
| "learning_rate": 4.178036685012869e-06, |
| "loss": 0.0005, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.9153284671532846, |
| "grad_norm": 0.2905563414096832, |
| "learning_rate": 4.1734401623391794e-06, |
| "loss": 0.001, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.9197080291970803, |
| "grad_norm": 0.38163650035858154, |
| "learning_rate": 4.168833367282479e-06, |
| "loss": 0.0007, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.924087591240876, |
| "grad_norm": 0.04273957014083862, |
| "learning_rate": 4.164216328121499e-06, |
| "loss": 0.0001, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.9284671532846716, |
| "grad_norm": 0.02129952795803547, |
| "learning_rate": 4.15958907319785e-06, |
| "loss": 0.0001, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.9328467153284672, |
| "grad_norm": 0.016533153131604195, |
| "learning_rate": 4.154951630915859e-06, |
| "loss": 0.0001, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.9372262773722628, |
| "grad_norm": 0.11019770801067352, |
| "learning_rate": 4.150304029742381e-06, |
| "loss": 0.0002, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.9416058394160585, |
| "grad_norm": 0.05474651977419853, |
| "learning_rate": 4.145646298206636e-06, |
| "loss": 0.0002, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.945985401459854, |
| "grad_norm": 0.10142989456653595, |
| "learning_rate": 4.1409784649000255e-06, |
| "loss": 0.0001, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.9503649635036497, |
| "grad_norm": 0.21639519929885864, |
| "learning_rate": 4.136300558475962e-06, |
| "loss": 0.0019, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.9547445255474454, |
| "grad_norm": 0.45263969898223877, |
| "learning_rate": 4.131612607649694e-06, |
| "loss": 0.0034, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.959124087591241, |
| "grad_norm": 0.38673898577690125, |
| "learning_rate": 4.126914641198123e-06, |
| "loss": 0.0005, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.9635036496350367, |
| "grad_norm": 0.29815611243247986, |
| "learning_rate": 4.1222066879596344e-06, |
| "loss": 0.0004, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.967883211678832, |
| "grad_norm": 0.029003242030739784, |
| "learning_rate": 4.1174887768339165e-06, |
| "loss": 0.0002, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.9722627737226277, |
| "grad_norm": 0.21172675490379333, |
| "learning_rate": 4.112760936781783e-06, |
| "loss": 0.0002, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.9766423357664233, |
| "grad_norm": 0.27625802159309387, |
| "learning_rate": 4.108023196824998e-06, |
| "loss": 0.0038, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.981021897810219, |
| "grad_norm": 0.26207876205444336, |
| "learning_rate": 4.103275586046095e-06, |
| "loss": 0.0002, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.9854014598540146, |
| "grad_norm": 0.6478922367095947, |
| "learning_rate": 4.098518133588198e-06, |
| "loss": 0.0015, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.9897810218978103, |
| "grad_norm": 0.10226385295391083, |
| "learning_rate": 4.093750868654845e-06, |
| "loss": 0.0005, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.994160583941606, |
| "grad_norm": 0.18696191906929016, |
| "learning_rate": 4.088973820509811e-06, |
| "loss": 0.0004, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.9985401459854013, |
| "grad_norm": 0.26377999782562256, |
| "learning_rate": 4.0841870184769184e-06, |
| "loss": 0.0006, |
| "step": 457 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.26377999782562256, |
| "learning_rate": 4.079390491939868e-06, |
| "loss": 0.0002, |
| "step": 458 |
| }, |
| { |
| "epoch": 2.0043795620437956, |
| "grad_norm": 0.1395256221294403, |
| "learning_rate": 4.074584270342057e-06, |
| "loss": 0.0001, |
| "step": 459 |
| }, |
| { |
| "epoch": 2.0087591240875913, |
| "grad_norm": 0.028063921257853508, |
| "learning_rate": 4.069768383186388e-06, |
| "loss": 0.0001, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.013138686131387, |
| "grad_norm": 0.12402255833148956, |
| "learning_rate": 4.064942860035102e-06, |
| "loss": 0.0008, |
| "step": 461 |
| }, |
| { |
| "epoch": 2.0175182481751825, |
| "grad_norm": 1.1348239183425903, |
| "learning_rate": 4.060107730509587e-06, |
| "loss": 0.0002, |
| "step": 462 |
| }, |
| { |
| "epoch": 2.021897810218978, |
| "grad_norm": 0.08540225028991699, |
| "learning_rate": 4.055263024290201e-06, |
| "loss": 0.0001, |
| "step": 463 |
| }, |
| { |
| "epoch": 2.026277372262774, |
| "grad_norm": 0.021235302090644836, |
| "learning_rate": 4.0504087711160875e-06, |
| "loss": 0.0006, |
| "step": 464 |
| }, |
| { |
| "epoch": 2.0306569343065695, |
| "grad_norm": 0.11977211385965347, |
| "learning_rate": 4.045545000784995e-06, |
| "loss": 0.0001, |
| "step": 465 |
| }, |
| { |
| "epoch": 2.035036496350365, |
| "grad_norm": 0.03654933720827103, |
| "learning_rate": 4.040671743153091e-06, |
| "loss": 0.0, |
| "step": 466 |
| }, |
| { |
| "epoch": 2.0394160583941607, |
| "grad_norm": 0.2099592536687851, |
| "learning_rate": 4.035789028134782e-06, |
| "loss": 0.0015, |
| "step": 467 |
| }, |
| { |
| "epoch": 2.0437956204379564, |
| "grad_norm": 0.3698459267616272, |
| "learning_rate": 4.03089688570253e-06, |
| "loss": 0.0005, |
| "step": 468 |
| }, |
| { |
| "epoch": 2.048175182481752, |
| "grad_norm": 0.12982334196567535, |
| "learning_rate": 4.025995345886663e-06, |
| "loss": 0.0005, |
| "step": 469 |
| }, |
| { |
| "epoch": 2.0525547445255476, |
| "grad_norm": 0.2783415615558624, |
| "learning_rate": 4.021084438775199e-06, |
| "loss": 0.0001, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.0569343065693433, |
| "grad_norm": 0.0721098855137825, |
| "learning_rate": 4.016164194513654e-06, |
| "loss": 0.0001, |
| "step": 471 |
| }, |
| { |
| "epoch": 2.0613138686131385, |
| "grad_norm": 0.013497601263225079, |
| "learning_rate": 4.01123464330486e-06, |
| "loss": 0.0001, |
| "step": 472 |
| }, |
| { |
| "epoch": 2.065693430656934, |
| "grad_norm": 0.025111181661486626, |
| "learning_rate": 4.006295815408781e-06, |
| "loss": 0.0, |
| "step": 473 |
| }, |
| { |
| "epoch": 2.0700729927007298, |
| "grad_norm": 0.01752329058945179, |
| "learning_rate": 4.001347741142327e-06, |
| "loss": 0.0016, |
| "step": 474 |
| }, |
| { |
| "epoch": 2.0744525547445254, |
| "grad_norm": 0.5526646971702576, |
| "learning_rate": 3.996390450879163e-06, |
| "loss": 0.0002, |
| "step": 475 |
| }, |
| { |
| "epoch": 2.078832116788321, |
| "grad_norm": 0.3007132112979889, |
| "learning_rate": 3.9914239750495276e-06, |
| "loss": 0.0003, |
| "step": 476 |
| }, |
| { |
| "epoch": 2.0832116788321167, |
| "grad_norm": 0.42960840463638306, |
| "learning_rate": 3.986448344140047e-06, |
| "loss": 0.0011, |
| "step": 477 |
| }, |
| { |
| "epoch": 2.0875912408759123, |
| "grad_norm": 0.396423876285553, |
| "learning_rate": 3.9814635886935425e-06, |
| "loss": 0.0001, |
| "step": 478 |
| }, |
| { |
| "epoch": 2.091970802919708, |
| "grad_norm": 0.04294263571500778, |
| "learning_rate": 3.976469739308849e-06, |
| "loss": 0.0001, |
| "step": 479 |
| }, |
| { |
| "epoch": 2.0963503649635036, |
| "grad_norm": 0.04891116917133331, |
| "learning_rate": 3.971466826640623e-06, |
| "loss": 0.0, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.100729927007299, |
| "grad_norm": 0.028048237785696983, |
| "learning_rate": 3.966454881399155e-06, |
| "loss": 0.0002, |
| "step": 481 |
| }, |
| { |
| "epoch": 2.105109489051095, |
| "grad_norm": 0.08178327232599258, |
| "learning_rate": 3.961433934350183e-06, |
| "loss": 0.0001, |
| "step": 482 |
| }, |
| { |
| "epoch": 2.1094890510948905, |
| "grad_norm": 0.1565946638584137, |
| "learning_rate": 3.956404016314703e-06, |
| "loss": 0.0, |
| "step": 483 |
| }, |
| { |
| "epoch": 2.113868613138686, |
| "grad_norm": 0.017099319025874138, |
| "learning_rate": 3.951365158168778e-06, |
| "loss": 0.0, |
| "step": 484 |
| }, |
| { |
| "epoch": 2.1182481751824818, |
| "grad_norm": 0.013942725956439972, |
| "learning_rate": 3.9463173908433505e-06, |
| "loss": 0.0001, |
| "step": 485 |
| }, |
| { |
| "epoch": 2.1226277372262774, |
| "grad_norm": 0.031127380207180977, |
| "learning_rate": 3.94126074532405e-06, |
| "loss": 0.0001, |
| "step": 486 |
| }, |
| { |
| "epoch": 2.127007299270073, |
| "grad_norm": 0.0673493817448616, |
| "learning_rate": 3.936195252651008e-06, |
| "loss": 0.0, |
| "step": 487 |
| }, |
| { |
| "epoch": 2.1313868613138687, |
| "grad_norm": 0.009533442556858063, |
| "learning_rate": 3.931120943918661e-06, |
| "loss": 0.0001, |
| "step": 488 |
| }, |
| { |
| "epoch": 2.1357664233576643, |
| "grad_norm": 0.2884984016418457, |
| "learning_rate": 3.9260378502755644e-06, |
| "loss": 0.0, |
| "step": 489 |
| }, |
| { |
| "epoch": 2.14014598540146, |
| "grad_norm": 0.012546413578093052, |
| "learning_rate": 3.9209460029242e-06, |
| "loss": 0.0002, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.1445255474452556, |
| "grad_norm": 0.06351834535598755, |
| "learning_rate": 3.915845433120781e-06, |
| "loss": 0.0001, |
| "step": 491 |
| }, |
| { |
| "epoch": 2.1489051094890512, |
| "grad_norm": 0.06691469997167587, |
| "learning_rate": 3.910736172175066e-06, |
| "loss": 0.0002, |
| "step": 492 |
| }, |
| { |
| "epoch": 2.153284671532847, |
| "grad_norm": 0.13160394132137299, |
| "learning_rate": 3.905618251450165e-06, |
| "loss": 0.0002, |
| "step": 493 |
| }, |
| { |
| "epoch": 2.1576642335766425, |
| "grad_norm": 0.13259641826152802, |
| "learning_rate": 3.900491702362344e-06, |
| "loss": 0.0001, |
| "step": 494 |
| }, |
| { |
| "epoch": 2.162043795620438, |
| "grad_norm": 0.0303142461925745, |
| "learning_rate": 3.895356556380833e-06, |
| "loss": 0.0, |
| "step": 495 |
| }, |
| { |
| "epoch": 2.1664233576642338, |
| "grad_norm": 0.013111322186887264, |
| "learning_rate": 3.890212845027637e-06, |
| "loss": 0.0001, |
| "step": 496 |
| }, |
| { |
| "epoch": 2.170802919708029, |
| "grad_norm": 0.10938696563243866, |
| "learning_rate": 3.8850605998773374e-06, |
| "loss": 0.0002, |
| "step": 497 |
| }, |
| { |
| "epoch": 2.1751824817518246, |
| "grad_norm": 0.032018087804317474, |
| "learning_rate": 3.8798998525568985e-06, |
| "loss": 0.0001, |
| "step": 498 |
| }, |
| { |
| "epoch": 2.1795620437956202, |
| "grad_norm": 0.0468863919377327, |
| "learning_rate": 3.87473063474548e-06, |
| "loss": 0.0001, |
| "step": 499 |
| }, |
| { |
| "epoch": 2.183941605839416, |
| "grad_norm": 0.07472304254770279, |
| "learning_rate": 3.869552978174233e-06, |
| "loss": 0.0013, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.1883211678832115, |
| "grad_norm": 1.0564846992492676, |
| "learning_rate": 3.8643669146261105e-06, |
| "loss": 0.0, |
| "step": 501 |
| }, |
| { |
| "epoch": 2.192700729927007, |
| "grad_norm": 0.014464089646935463, |
| "learning_rate": 3.859172475935674e-06, |
| "loss": 0.0003, |
| "step": 502 |
| }, |
| { |
| "epoch": 2.197080291970803, |
| "grad_norm": 0.18908506631851196, |
| "learning_rate": 3.853969693988892e-06, |
| "loss": 0.0, |
| "step": 503 |
| }, |
| { |
| "epoch": 2.2014598540145984, |
| "grad_norm": 0.059129055589437485, |
| "learning_rate": 3.848758600722953e-06, |
| "loss": 0.0001, |
| "step": 504 |
| }, |
| { |
| "epoch": 2.205839416058394, |
| "grad_norm": 0.008117246441543102, |
| "learning_rate": 3.843539228126059e-06, |
| "loss": 0.0017, |
| "step": 505 |
| }, |
| { |
| "epoch": 2.2102189781021897, |
| "grad_norm": 0.8072256445884705, |
| "learning_rate": 3.838311608237239e-06, |
| "loss": 0.0036, |
| "step": 506 |
| }, |
| { |
| "epoch": 2.2145985401459853, |
| "grad_norm": 1.5086314678192139, |
| "learning_rate": 3.833075773146142e-06, |
| "loss": 0.0001, |
| "step": 507 |
| }, |
| { |
| "epoch": 2.218978102189781, |
| "grad_norm": 0.19763076305389404, |
| "learning_rate": 3.827831754992854e-06, |
| "loss": 0.0, |
| "step": 508 |
| }, |
| { |
| "epoch": 2.2233576642335766, |
| "grad_norm": 0.007341600954532623, |
| "learning_rate": 3.822579585967685e-06, |
| "loss": 0.0, |
| "step": 509 |
| }, |
| { |
| "epoch": 2.2277372262773723, |
| "grad_norm": 0.014308220706880093, |
| "learning_rate": 3.817319298310984e-06, |
| "loss": 0.0001, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.232116788321168, |
| "grad_norm": 0.11574184894561768, |
| "learning_rate": 3.812050924312934e-06, |
| "loss": 0.0001, |
| "step": 511 |
| }, |
| { |
| "epoch": 2.2364963503649635, |
| "grad_norm": 0.04201903194189072, |
| "learning_rate": 3.8067744963133555e-06, |
| "loss": 0.0004, |
| "step": 512 |
| }, |
| { |
| "epoch": 2.240875912408759, |
| "grad_norm": 0.2121448516845703, |
| "learning_rate": 3.8014900467015093e-06, |
| "loss": 0.0008, |
| "step": 513 |
| }, |
| { |
| "epoch": 2.245255474452555, |
| "grad_norm": 0.41363075375556946, |
| "learning_rate": 3.7961976079158964e-06, |
| "loss": 0.0014, |
| "step": 514 |
| }, |
| { |
| "epoch": 2.2496350364963504, |
| "grad_norm": 0.0038762835320085287, |
| "learning_rate": 3.79089721244406e-06, |
| "loss": 0.0001, |
| "step": 515 |
| }, |
| { |
| "epoch": 2.254014598540146, |
| "grad_norm": 0.018642032518982887, |
| "learning_rate": 3.785588892822383e-06, |
| "loss": 0.0003, |
| "step": 516 |
| }, |
| { |
| "epoch": 2.2583941605839417, |
| "grad_norm": 0.12630635499954224, |
| "learning_rate": 3.780272681635894e-06, |
| "loss": 0.0, |
| "step": 517 |
| }, |
| { |
| "epoch": 2.2627737226277373, |
| "grad_norm": 0.34310439229011536, |
| "learning_rate": 3.77494861151806e-06, |
| "loss": 0.0013, |
| "step": 518 |
| }, |
| { |
| "epoch": 2.267153284671533, |
| "grad_norm": 0.044213853776454926, |
| "learning_rate": 3.769616715150593e-06, |
| "loss": 0.0005, |
| "step": 519 |
| }, |
| { |
| "epoch": 2.2715328467153286, |
| "grad_norm": 0.17094682157039642, |
| "learning_rate": 3.7642770252632444e-06, |
| "loss": 0.0001, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.2759124087591243, |
| "grad_norm": 0.0257079117000103, |
| "learning_rate": 3.7589295746336074e-06, |
| "loss": 0.0, |
| "step": 521 |
| }, |
| { |
| "epoch": 2.28029197080292, |
| "grad_norm": 0.01195420790463686, |
| "learning_rate": 3.753574396086913e-06, |
| "loss": 0.0001, |
| "step": 522 |
| }, |
| { |
| "epoch": 2.2846715328467155, |
| "grad_norm": 0.426551878452301, |
| "learning_rate": 3.748211522495831e-06, |
| "loss": 0.001, |
| "step": 523 |
| }, |
| { |
| "epoch": 2.289051094890511, |
| "grad_norm": 0.02542410045862198, |
| "learning_rate": 3.742840986780266e-06, |
| "loss": 0.0002, |
| "step": 524 |
| }, |
| { |
| "epoch": 2.293430656934307, |
| "grad_norm": 0.2616007924079895, |
| "learning_rate": 3.737462821907158e-06, |
| "loss": 0.002, |
| "step": 525 |
| }, |
| { |
| "epoch": 2.297810218978102, |
| "grad_norm": 0.1279967725276947, |
| "learning_rate": 3.732077060890277e-06, |
| "loss": 0.0004, |
| "step": 526 |
| }, |
| { |
| "epoch": 2.3021897810218976, |
| "grad_norm": 0.21853570640087128, |
| "learning_rate": 3.7266837367900214e-06, |
| "loss": 0.0011, |
| "step": 527 |
| }, |
| { |
| "epoch": 2.3065693430656933, |
| "grad_norm": 0.21419601142406464, |
| "learning_rate": 3.721282882713218e-06, |
| "loss": 0.0005, |
| "step": 528 |
| }, |
| { |
| "epoch": 2.310948905109489, |
| "grad_norm": 0.2496281862258911, |
| "learning_rate": 3.7158745318129135e-06, |
| "loss": 0.0003, |
| "step": 529 |
| }, |
| { |
| "epoch": 2.3153284671532846, |
| "grad_norm": 0.10131111741065979, |
| "learning_rate": 3.710458717288176e-06, |
| "loss": 0.0003, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.31970802919708, |
| "grad_norm": 0.023186631500720978, |
| "learning_rate": 3.7050354723838855e-06, |
| "loss": 0.0005, |
| "step": 531 |
| }, |
| { |
| "epoch": 2.324087591240876, |
| "grad_norm": 0.3965378403663635, |
| "learning_rate": 3.6996048303905373e-06, |
| "loss": 0.0003, |
| "step": 532 |
| }, |
| { |
| "epoch": 2.3284671532846715, |
| "grad_norm": 0.03234691172838211, |
| "learning_rate": 3.6941668246440323e-06, |
| "loss": 0.0002, |
| "step": 533 |
| }, |
| { |
| "epoch": 2.332846715328467, |
| "grad_norm": 0.14468444883823395, |
| "learning_rate": 3.688721488525471e-06, |
| "loss": 0.0003, |
| "step": 534 |
| }, |
| { |
| "epoch": 2.3372262773722627, |
| "grad_norm": 0.09114562720060349, |
| "learning_rate": 3.683268855460955e-06, |
| "loss": 0.0, |
| "step": 535 |
| }, |
| { |
| "epoch": 2.3416058394160584, |
| "grad_norm": 0.011452419683337212, |
| "learning_rate": 3.6778089589213756e-06, |
| "loss": 0.0, |
| "step": 536 |
| }, |
| { |
| "epoch": 2.345985401459854, |
| "grad_norm": 0.026728764176368713, |
| "learning_rate": 3.6723418324222126e-06, |
| "loss": 0.0, |
| "step": 537 |
| }, |
| { |
| "epoch": 2.3503649635036497, |
| "grad_norm": 0.010162679478526115, |
| "learning_rate": 3.666867509523325e-06, |
| "loss": 0.0003, |
| "step": 538 |
| }, |
| { |
| "epoch": 2.3547445255474453, |
| "grad_norm": 0.2717076241970062, |
| "learning_rate": 3.661386023828749e-06, |
| "loss": 0.0002, |
| "step": 539 |
| }, |
| { |
| "epoch": 2.359124087591241, |
| "grad_norm": 0.17123937606811523, |
| "learning_rate": 3.6558974089864875e-06, |
| "loss": 0.0, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.3635036496350366, |
| "grad_norm": 0.05525152385234833, |
| "learning_rate": 3.650401698688305e-06, |
| "loss": 0.0001, |
| "step": 541 |
| }, |
| { |
| "epoch": 2.367883211678832, |
| "grad_norm": 0.0844765231013298, |
| "learning_rate": 3.644898926669524e-06, |
| "loss": 0.0001, |
| "step": 542 |
| }, |
| { |
| "epoch": 2.372262773722628, |
| "grad_norm": 0.05303291603922844, |
| "learning_rate": 3.6393891267088132e-06, |
| "loss": 0.0, |
| "step": 543 |
| }, |
| { |
| "epoch": 2.3766423357664235, |
| "grad_norm": 0.02489842288196087, |
| "learning_rate": 3.633872332627983e-06, |
| "loss": 0.0014, |
| "step": 544 |
| }, |
| { |
| "epoch": 2.381021897810219, |
| "grad_norm": 0.4054766893386841, |
| "learning_rate": 3.628348578291776e-06, |
| "loss": 0.0, |
| "step": 545 |
| }, |
| { |
| "epoch": 2.3854014598540147, |
| "grad_norm": 0.2564426064491272, |
| "learning_rate": 3.6228178976076626e-06, |
| "loss": 0.0005, |
| "step": 546 |
| }, |
| { |
| "epoch": 2.3897810218978104, |
| "grad_norm": 0.4229254722595215, |
| "learning_rate": 3.6172803245256283e-06, |
| "loss": 0.0001, |
| "step": 547 |
| }, |
| { |
| "epoch": 2.394160583941606, |
| "grad_norm": 0.05393270403146744, |
| "learning_rate": 3.611735893037967e-06, |
| "loss": 0.0, |
| "step": 548 |
| }, |
| { |
| "epoch": 2.398540145985401, |
| "grad_norm": 0.004804346710443497, |
| "learning_rate": 3.6061846371790754e-06, |
| "loss": 0.0001, |
| "step": 549 |
| }, |
| { |
| "epoch": 2.402919708029197, |
| "grad_norm": 0.2905990481376648, |
| "learning_rate": 3.6006265910252393e-06, |
| "loss": 0.0, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.4072992700729925, |
| "grad_norm": 0.05537007004022598, |
| "learning_rate": 3.5950617886944272e-06, |
| "loss": 0.0001, |
| "step": 551 |
| }, |
| { |
| "epoch": 2.411678832116788, |
| "grad_norm": 0.006207960192114115, |
| "learning_rate": 3.5894902643460807e-06, |
| "loss": 0.0, |
| "step": 552 |
| }, |
| { |
| "epoch": 2.4160583941605838, |
| "grad_norm": 0.14528508484363556, |
| "learning_rate": 3.5839120521809036e-06, |
| "loss": 0.0003, |
| "step": 553 |
| }, |
| { |
| "epoch": 2.4204379562043794, |
| "grad_norm": 0.007411749102175236, |
| "learning_rate": 3.578327186440654e-06, |
| "loss": 0.0002, |
| "step": 554 |
| }, |
| { |
| "epoch": 2.424817518248175, |
| "grad_norm": 0.1338014453649521, |
| "learning_rate": 3.5727357014079306e-06, |
| "loss": 0.0001, |
| "step": 555 |
| }, |
| { |
| "epoch": 2.4291970802919707, |
| "grad_norm": 0.0037137684412300587, |
| "learning_rate": 3.5671376314059676e-06, |
| "loss": 0.0, |
| "step": 556 |
| }, |
| { |
| "epoch": 2.4335766423357663, |
| "grad_norm": 0.26890304684638977, |
| "learning_rate": 3.561533010798418e-06, |
| "loss": 0.0004, |
| "step": 557 |
| }, |
| { |
| "epoch": 2.437956204379562, |
| "grad_norm": 0.10123980045318604, |
| "learning_rate": 3.555921873989148e-06, |
| "loss": 0.0, |
| "step": 558 |
| }, |
| { |
| "epoch": 2.4423357664233576, |
| "grad_norm": 0.033905990421772, |
| "learning_rate": 3.5503042554220206e-06, |
| "loss": 0.0001, |
| "step": 559 |
| }, |
| { |
| "epoch": 2.4467153284671532, |
| "grad_norm": 0.2706269323825836, |
| "learning_rate": 3.5446801895806904e-06, |
| "loss": 0.0003, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.451094890510949, |
| "grad_norm": 0.017258645966649055, |
| "learning_rate": 3.539049710988386e-06, |
| "loss": 0.0, |
| "step": 561 |
| }, |
| { |
| "epoch": 2.4554744525547445, |
| "grad_norm": 0.010611895471811295, |
| "learning_rate": 3.5334128542077007e-06, |
| "loss": 0.0, |
| "step": 562 |
| }, |
| { |
| "epoch": 2.45985401459854, |
| "grad_norm": 0.0026555017102509737, |
| "learning_rate": 3.527769653840381e-06, |
| "loss": 0.0, |
| "step": 563 |
| }, |
| { |
| "epoch": 2.4642335766423358, |
| "grad_norm": 0.15091587603092194, |
| "learning_rate": 3.5221201445271136e-06, |
| "loss": 0.0001, |
| "step": 564 |
| }, |
| { |
| "epoch": 2.4686131386861314, |
| "grad_norm": 0.05164916068315506, |
| "learning_rate": 3.5164643609473115e-06, |
| "loss": 0.0004, |
| "step": 565 |
| }, |
| { |
| "epoch": 2.472992700729927, |
| "grad_norm": 0.48653078079223633, |
| "learning_rate": 3.5108023378189036e-06, |
| "loss": 0.0, |
| "step": 566 |
| }, |
| { |
| "epoch": 2.4773722627737227, |
| "grad_norm": 0.015059034340083599, |
| "learning_rate": 3.5051341098981184e-06, |
| "loss": 0.0, |
| "step": 567 |
| }, |
| { |
| "epoch": 2.4817518248175183, |
| "grad_norm": 0.013922065496444702, |
| "learning_rate": 3.499459711979274e-06, |
| "loss": 0.0, |
| "step": 568 |
| }, |
| { |
| "epoch": 2.486131386861314, |
| "grad_norm": 0.2496362030506134, |
| "learning_rate": 3.493779178894561e-06, |
| "loss": 0.0004, |
| "step": 569 |
| }, |
| { |
| "epoch": 2.4905109489051096, |
| "grad_norm": 0.07800492644309998, |
| "learning_rate": 3.488092545513833e-06, |
| "loss": 0.0, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.4948905109489052, |
| "grad_norm": 0.613246738910675, |
| "learning_rate": 3.4823998467443886e-06, |
| "loss": 0.001, |
| "step": 571 |
| }, |
| { |
| "epoch": 2.499270072992701, |
| "grad_norm": 0.5939133167266846, |
| "learning_rate": 3.4767011175307596e-06, |
| "loss": 0.0005, |
| "step": 572 |
| }, |
| { |
| "epoch": 2.5036496350364965, |
| "grad_norm": 0.02257866971194744, |
| "learning_rate": 3.4709963928544952e-06, |
| "loss": 0.0001, |
| "step": 573 |
| }, |
| { |
| "epoch": 2.508029197080292, |
| "grad_norm": 0.31688934564590454, |
| "learning_rate": 3.4652857077339464e-06, |
| "loss": 0.0016, |
| "step": 574 |
| }, |
| { |
| "epoch": 2.512408759124088, |
| "grad_norm": 0.24091926217079163, |
| "learning_rate": 3.459569097224054e-06, |
| "loss": 0.0, |
| "step": 575 |
| }, |
| { |
| "epoch": 2.5167883211678834, |
| "grad_norm": 0.0038259460125118494, |
| "learning_rate": 3.4538465964161315e-06, |
| "loss": 0.0009, |
| "step": 576 |
| }, |
| { |
| "epoch": 2.521167883211679, |
| "grad_norm": 0.8010972738265991, |
| "learning_rate": 3.448118240437649e-06, |
| "loss": 0.0, |
| "step": 577 |
| }, |
| { |
| "epoch": 2.5255474452554747, |
| "grad_norm": 0.10152903199195862, |
| "learning_rate": 3.442384064452019e-06, |
| "loss": 0.0, |
| "step": 578 |
| }, |
| { |
| "epoch": 2.5299270072992703, |
| "grad_norm": 0.0037650642916560173, |
| "learning_rate": 3.4366441036583803e-06, |
| "loss": 0.0, |
| "step": 579 |
| }, |
| { |
| "epoch": 2.5343065693430655, |
| "grad_norm": 0.002322736894711852, |
| "learning_rate": 3.4308983932913806e-06, |
| "loss": 0.0, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.538686131386861, |
| "grad_norm": 0.010998697020113468, |
| "learning_rate": 3.4251469686209626e-06, |
| "loss": 0.0, |
| "step": 581 |
| }, |
| { |
| "epoch": 2.543065693430657, |
| "grad_norm": 0.009687647223472595, |
| "learning_rate": 3.419389864952145e-06, |
| "loss": 0.0, |
| "step": 582 |
| }, |
| { |
| "epoch": 2.5474452554744524, |
| "grad_norm": 0.003304305486381054, |
| "learning_rate": 3.413627117624808e-06, |
| "loss": 0.0001, |
| "step": 583 |
| }, |
| { |
| "epoch": 2.551824817518248, |
| "grad_norm": 0.04525240138173103, |
| "learning_rate": 3.4078587620134747e-06, |
| "loss": 0.0, |
| "step": 584 |
| }, |
| { |
| "epoch": 2.5562043795620437, |
| "grad_norm": 0.14142946898937225, |
| "learning_rate": 3.4020848335270946e-06, |
| "loss": 0.0013, |
| "step": 585 |
| }, |
| { |
| "epoch": 2.5605839416058394, |
| "grad_norm": 0.18268825113773346, |
| "learning_rate": 3.3963053676088253e-06, |
| "loss": 0.0, |
| "step": 586 |
| }, |
| { |
| "epoch": 2.564963503649635, |
| "grad_norm": 0.07787997275590897, |
| "learning_rate": 3.390520399735818e-06, |
| "loss": 0.0008, |
| "step": 587 |
| }, |
| { |
| "epoch": 2.5693430656934306, |
| "grad_norm": 0.6478791832923889, |
| "learning_rate": 3.3847299654189947e-06, |
| "loss": 0.0, |
| "step": 588 |
| }, |
| { |
| "epoch": 2.5737226277372263, |
| "grad_norm": 0.029990263283252716, |
| "learning_rate": 3.3789341002028364e-06, |
| "loss": 0.0, |
| "step": 589 |
| }, |
| { |
| "epoch": 2.578102189781022, |
| "grad_norm": 0.029525484889745712, |
| "learning_rate": 3.3731328396651586e-06, |
| "loss": 0.0, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.5824817518248175, |
| "grad_norm": 0.008475772105157375, |
| "learning_rate": 3.3673262194168976e-06, |
| "loss": 0.0, |
| "step": 591 |
| }, |
| { |
| "epoch": 2.586861313868613, |
| "grad_norm": 0.013320226222276688, |
| "learning_rate": 3.3615142751018893e-06, |
| "loss": 0.0, |
| "step": 592 |
| }, |
| { |
| "epoch": 2.591240875912409, |
| "grad_norm": 0.009780370630323887, |
| "learning_rate": 3.3556970423966515e-06, |
| "loss": 0.0, |
| "step": 593 |
| }, |
| { |
| "epoch": 2.5956204379562045, |
| "grad_norm": 0.015174774453043938, |
| "learning_rate": 3.349874557010166e-06, |
| "loss": 0.0, |
| "step": 594 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 0.011269648559391499, |
| "learning_rate": 3.3440468546836564e-06, |
| "loss": 0.0, |
| "step": 595 |
| }, |
| { |
| "epoch": 2.6043795620437957, |
| "grad_norm": 0.014948060736060143, |
| "learning_rate": 3.3382139711903707e-06, |
| "loss": 0.0, |
| "step": 596 |
| }, |
| { |
| "epoch": 2.6087591240875914, |
| "grad_norm": 0.005212848540395498, |
| "learning_rate": 3.3323759423353618e-06, |
| "loss": 0.0001, |
| "step": 597 |
| }, |
| { |
| "epoch": 2.613138686131387, |
| "grad_norm": 0.3330608308315277, |
| "learning_rate": 3.3265328039552676e-06, |
| "loss": 0.0013, |
| "step": 598 |
| }, |
| { |
| "epoch": 2.6175182481751826, |
| "grad_norm": 0.01612473465502262, |
| "learning_rate": 3.320684591918089e-06, |
| "loss": 0.002, |
| "step": 599 |
| }, |
| { |
| "epoch": 2.621897810218978, |
| "grad_norm": 1.6749413013458252, |
| "learning_rate": 3.3148313421229743e-06, |
| "loss": 0.0, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.6262773722627735, |
| "grad_norm": 0.007137598004192114, |
| "learning_rate": 3.308973090499994e-06, |
| "loss": 0.0003, |
| "step": 601 |
| }, |
| { |
| "epoch": 2.630656934306569, |
| "grad_norm": 0.2040940374135971, |
| "learning_rate": 3.303109873009922e-06, |
| "loss": 0.0, |
| "step": 602 |
| }, |
| { |
| "epoch": 2.6350364963503647, |
| "grad_norm": 0.016712144017219543, |
| "learning_rate": 3.297241725644016e-06, |
| "loss": 0.0, |
| "step": 603 |
| }, |
| { |
| "epoch": 2.6394160583941604, |
| "grad_norm": 0.05457067862153053, |
| "learning_rate": 3.2913686844237963e-06, |
| "loss": 0.0002, |
| "step": 604 |
| }, |
| { |
| "epoch": 2.643795620437956, |
| "grad_norm": 0.05992881581187248, |
| "learning_rate": 3.2854907854008224e-06, |
| "loss": 0.0018, |
| "step": 605 |
| }, |
| { |
| "epoch": 2.6481751824817517, |
| "grad_norm": 0.2480854094028473, |
| "learning_rate": 3.2796080646564738e-06, |
| "loss": 0.0, |
| "step": 606 |
| }, |
| { |
| "epoch": 2.6525547445255473, |
| "grad_norm": 0.009658033028244972, |
| "learning_rate": 3.273720558301729e-06, |
| "loss": 0.0003, |
| "step": 607 |
| }, |
| { |
| "epoch": 2.656934306569343, |
| "grad_norm": 0.06701422482728958, |
| "learning_rate": 3.267828302476942e-06, |
| "loss": 0.0001, |
| "step": 608 |
| }, |
| { |
| "epoch": 2.6613138686131386, |
| "grad_norm": 0.04225367307662964, |
| "learning_rate": 3.2619313333516213e-06, |
| "loss": 0.0001, |
| "step": 609 |
| }, |
| { |
| "epoch": 2.665693430656934, |
| "grad_norm": 0.01937365159392357, |
| "learning_rate": 3.2560296871242085e-06, |
| "loss": 0.0, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.67007299270073, |
| "grad_norm": 0.010443875566124916, |
| "learning_rate": 3.2501234000218558e-06, |
| "loss": 0.0001, |
| "step": 611 |
| }, |
| { |
| "epoch": 2.6744525547445255, |
| "grad_norm": 0.039485372602939606, |
| "learning_rate": 3.2442125083002014e-06, |
| "loss": 0.0001, |
| "step": 612 |
| }, |
| { |
| "epoch": 2.678832116788321, |
| "grad_norm": 0.01544400304555893, |
| "learning_rate": 3.238297048243151e-06, |
| "loss": 0.0, |
| "step": 613 |
| }, |
| { |
| "epoch": 2.6832116788321168, |
| "grad_norm": 0.014269383624196053, |
| "learning_rate": 3.2323770561626523e-06, |
| "loss": 0.0, |
| "step": 614 |
| }, |
| { |
| "epoch": 2.6875912408759124, |
| "grad_norm": 0.00998217985033989, |
| "learning_rate": 3.2264525683984717e-06, |
| "loss": 0.0001, |
| "step": 615 |
| }, |
| { |
| "epoch": 2.691970802919708, |
| "grad_norm": 0.0631580650806427, |
| "learning_rate": 3.2205236213179736e-06, |
| "loss": 0.0001, |
| "step": 616 |
| }, |
| { |
| "epoch": 2.6963503649635037, |
| "grad_norm": 0.022673817351460457, |
| "learning_rate": 3.2145902513158963e-06, |
| "loss": 0.0034, |
| "step": 617 |
| }, |
| { |
| "epoch": 2.7007299270072993, |
| "grad_norm": 0.24634726345539093, |
| "learning_rate": 3.2086524948141263e-06, |
| "loss": 0.0, |
| "step": 618 |
| }, |
| { |
| "epoch": 2.705109489051095, |
| "grad_norm": 0.018728157505393028, |
| "learning_rate": 3.2027103882614772e-06, |
| "loss": 0.0001, |
| "step": 619 |
| }, |
| { |
| "epoch": 2.7094890510948906, |
| "grad_norm": 1.7468042373657227, |
| "learning_rate": 3.1967639681334668e-06, |
| "loss": 0.0002, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.713868613138686, |
| "grad_norm": 0.014051638543605804, |
| "learning_rate": 3.1908132709320895e-06, |
| "loss": 0.0003, |
| "step": 621 |
| }, |
| { |
| "epoch": 2.718248175182482, |
| "grad_norm": 0.1467590481042862, |
| "learning_rate": 3.1848583331855952e-06, |
| "loss": 0.0, |
| "step": 622 |
| }, |
| { |
| "epoch": 2.7226277372262775, |
| "grad_norm": 0.0301121287047863, |
| "learning_rate": 3.178899191448266e-06, |
| "loss": 0.0001, |
| "step": 623 |
| }, |
| { |
| "epoch": 2.727007299270073, |
| "grad_norm": 0.011421700939536095, |
| "learning_rate": 3.1729358823001873e-06, |
| "loss": 0.0, |
| "step": 624 |
| }, |
| { |
| "epoch": 2.7313868613138688, |
| "grad_norm": 0.018829964101314545, |
| "learning_rate": 3.1669684423470277e-06, |
| "loss": 0.0001, |
| "step": 625 |
| }, |
| { |
| "epoch": 2.7357664233576644, |
| "grad_norm": 0.2821716070175171, |
| "learning_rate": 3.1609969082198124e-06, |
| "loss": 0.0001, |
| "step": 626 |
| }, |
| { |
| "epoch": 2.74014598540146, |
| "grad_norm": 0.03946685045957565, |
| "learning_rate": 3.155021316574699e-06, |
| "loss": 0.0001, |
| "step": 627 |
| }, |
| { |
| "epoch": 2.7445255474452557, |
| "grad_norm": 0.03428944945335388, |
| "learning_rate": 3.1490417040927513e-06, |
| "loss": 0.0001, |
| "step": 628 |
| }, |
| { |
| "epoch": 2.7489051094890513, |
| "grad_norm": 0.0415036678314209, |
| "learning_rate": 3.143058107479716e-06, |
| "loss": 0.0001, |
| "step": 629 |
| }, |
| { |
| "epoch": 2.753284671532847, |
| "grad_norm": 0.011763577349483967, |
| "learning_rate": 3.1370705634657953e-06, |
| "loss": 0.0005, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.7576642335766426, |
| "grad_norm": 0.2982889413833618, |
| "learning_rate": 3.1310791088054225e-06, |
| "loss": 0.0001, |
| "step": 631 |
| }, |
| { |
| "epoch": 2.7620437956204382, |
| "grad_norm": 0.07167187333106995, |
| "learning_rate": 3.1250837802770378e-06, |
| "loss": 0.0001, |
| "step": 632 |
| }, |
| { |
| "epoch": 2.7664233576642334, |
| "grad_norm": 0.009533251635730267, |
| "learning_rate": 3.1190846146828587e-06, |
| "loss": 0.0, |
| "step": 633 |
| }, |
| { |
| "epoch": 2.770802919708029, |
| "grad_norm": 0.09512810409069061, |
| "learning_rate": 3.1130816488486582e-06, |
| "loss": 0.0003, |
| "step": 634 |
| }, |
| { |
| "epoch": 2.7751824817518247, |
| "grad_norm": 0.024417169392108917, |
| "learning_rate": 3.1070749196235366e-06, |
| "loss": 0.0001, |
| "step": 635 |
| }, |
| { |
| "epoch": 2.7795620437956203, |
| "grad_norm": 0.026550231501460075, |
| "learning_rate": 3.1010644638796956e-06, |
| "loss": 0.0002, |
| "step": 636 |
| }, |
| { |
| "epoch": 2.783941605839416, |
| "grad_norm": 0.04400951415300369, |
| "learning_rate": 3.0950503185122116e-06, |
| "loss": 0.0001, |
| "step": 637 |
| }, |
| { |
| "epoch": 2.7883211678832116, |
| "grad_norm": 0.037575673311948776, |
| "learning_rate": 3.0890325204388107e-06, |
| "loss": 0.0001, |
| "step": 638 |
| }, |
| { |
| "epoch": 2.7927007299270072, |
| "grad_norm": 0.017704375088214874, |
| "learning_rate": 3.083011106599641e-06, |
| "loss": 0.0, |
| "step": 639 |
| }, |
| { |
| "epoch": 2.797080291970803, |
| "grad_norm": 0.5097067356109619, |
| "learning_rate": 3.0769861139570446e-06, |
| "loss": 0.0002, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.8014598540145985, |
| "grad_norm": 0.03586648404598236, |
| "learning_rate": 3.0709575794953333e-06, |
| "loss": 0.0001, |
| "step": 641 |
| }, |
| { |
| "epoch": 2.805839416058394, |
| "grad_norm": 0.08737017959356308, |
| "learning_rate": 3.06492554022056e-06, |
| "loss": 0.0001, |
| "step": 642 |
| }, |
| { |
| "epoch": 2.81021897810219, |
| "grad_norm": 0.03558499738574028, |
| "learning_rate": 3.0588900331602915e-06, |
| "loss": 0.0, |
| "step": 643 |
| }, |
| { |
| "epoch": 2.8145985401459854, |
| "grad_norm": 0.015121969394385815, |
| "learning_rate": 3.0528510953633824e-06, |
| "loss": 0.0, |
| "step": 644 |
| }, |
| { |
| "epoch": 2.818978102189781, |
| "grad_norm": 0.005024883430451155, |
| "learning_rate": 3.046808763899745e-06, |
| "loss": 0.0, |
| "step": 645 |
| }, |
| { |
| "epoch": 2.8233576642335767, |
| "grad_norm": 0.10004691034555435, |
| "learning_rate": 3.0407630758601257e-06, |
| "loss": 0.0002, |
| "step": 646 |
| }, |
| { |
| "epoch": 2.8277372262773723, |
| "grad_norm": 0.0034653018228709698, |
| "learning_rate": 3.034714068355874e-06, |
| "loss": 0.0, |
| "step": 647 |
| }, |
| { |
| "epoch": 2.832116788321168, |
| "grad_norm": 0.005470994859933853, |
| "learning_rate": 3.0286617785187157e-06, |
| "loss": 0.0, |
| "step": 648 |
| }, |
| { |
| "epoch": 2.8364963503649636, |
| "grad_norm": 0.004967759363353252, |
| "learning_rate": 3.022606243500526e-06, |
| "loss": 0.0, |
| "step": 649 |
| }, |
| { |
| "epoch": 2.8408759124087593, |
| "grad_norm": 0.005472187884151936, |
| "learning_rate": 3.0165475004730994e-06, |
| "loss": 0.0, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.845255474452555, |
| "grad_norm": 0.02719656005501747, |
| "learning_rate": 3.0104855866279244e-06, |
| "loss": 0.0, |
| "step": 651 |
| }, |
| { |
| "epoch": 2.8496350364963505, |
| "grad_norm": 0.006005981471389532, |
| "learning_rate": 3.0044205391759517e-06, |
| "loss": 0.0001, |
| "step": 652 |
| }, |
| { |
| "epoch": 2.8540145985401457, |
| "grad_norm": 0.07548055797815323, |
| "learning_rate": 2.9983523953473697e-06, |
| "loss": 0.0001, |
| "step": 653 |
| }, |
| { |
| "epoch": 2.8583941605839414, |
| "grad_norm": 0.022410407662391663, |
| "learning_rate": 2.9922811923913712e-06, |
| "loss": 0.0, |
| "step": 654 |
| }, |
| { |
| "epoch": 2.862773722627737, |
| "grad_norm": 0.0035277260467410088, |
| "learning_rate": 2.9862069675759297e-06, |
| "loss": 0.0, |
| "step": 655 |
| }, |
| { |
| "epoch": 2.8671532846715326, |
| "grad_norm": 0.00670978520065546, |
| "learning_rate": 2.980129758187567e-06, |
| "loss": 0.0, |
| "step": 656 |
| }, |
| { |
| "epoch": 2.8715328467153283, |
| "grad_norm": 0.0031098946928977966, |
| "learning_rate": 2.974049601531126e-06, |
| "loss": 0.0, |
| "step": 657 |
| }, |
| { |
| "epoch": 2.875912408759124, |
| "grad_norm": 0.013540991581976414, |
| "learning_rate": 2.9679665349295417e-06, |
| "loss": 0.0, |
| "step": 658 |
| }, |
| { |
| "epoch": 2.8802919708029195, |
| "grad_norm": 0.015702638775110245, |
| "learning_rate": 2.9618805957236113e-06, |
| "loss": 0.0, |
| "step": 659 |
| }, |
| { |
| "epoch": 2.884671532846715, |
| "grad_norm": 0.03669784590601921, |
| "learning_rate": 2.955791821271766e-06, |
| "loss": 0.0, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.889051094890511, |
| "grad_norm": 0.011530515737831593, |
| "learning_rate": 2.9497002489498394e-06, |
| "loss": 0.0, |
| "step": 661 |
| }, |
| { |
| "epoch": 2.8934306569343065, |
| "grad_norm": 0.004896043334156275, |
| "learning_rate": 2.9436059161508425e-06, |
| "loss": 0.0, |
| "step": 662 |
| }, |
| { |
| "epoch": 2.897810218978102, |
| "grad_norm": 0.006100150756537914, |
| "learning_rate": 2.9375088602847303e-06, |
| "loss": 0.0, |
| "step": 663 |
| }, |
| { |
| "epoch": 2.9021897810218977, |
| "grad_norm": 0.0026638389099389315, |
| "learning_rate": 2.931409118778172e-06, |
| "loss": 0.0, |
| "step": 664 |
| }, |
| { |
| "epoch": 2.9065693430656934, |
| "grad_norm": 0.03918790817260742, |
| "learning_rate": 2.9253067290743237e-06, |
| "loss": 0.0001, |
| "step": 665 |
| }, |
| { |
| "epoch": 2.910948905109489, |
| "grad_norm": 0.7945866584777832, |
| "learning_rate": 2.9192017286325975e-06, |
| "loss": 0.0, |
| "step": 666 |
| }, |
| { |
| "epoch": 2.9153284671532846, |
| "grad_norm": 0.004381685517728329, |
| "learning_rate": 2.913094154928431e-06, |
| "loss": 0.0, |
| "step": 667 |
| }, |
| { |
| "epoch": 2.9197080291970803, |
| "grad_norm": 0.006551735103130341, |
| "learning_rate": 2.9069840454530583e-06, |
| "loss": 0.0, |
| "step": 668 |
| }, |
| { |
| "epoch": 2.924087591240876, |
| "grad_norm": 0.003194763557985425, |
| "learning_rate": 2.900871437713279e-06, |
| "loss": 0.0002, |
| "step": 669 |
| }, |
| { |
| "epoch": 2.9284671532846716, |
| "grad_norm": 0.05487615615129471, |
| "learning_rate": 2.894756369231228e-06, |
| "loss": 0.0, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.932846715328467, |
| "grad_norm": 0.008219748735427856, |
| "learning_rate": 2.888638877544146e-06, |
| "loss": 0.0, |
| "step": 671 |
| }, |
| { |
| "epoch": 2.937226277372263, |
| "grad_norm": 0.0017953907372429967, |
| "learning_rate": 2.8825190002041475e-06, |
| "loss": 0.0, |
| "step": 672 |
| }, |
| { |
| "epoch": 2.9416058394160585, |
| "grad_norm": 0.00900351069867611, |
| "learning_rate": 2.8763967747779926e-06, |
| "loss": 0.0, |
| "step": 673 |
| }, |
| { |
| "epoch": 2.945985401459854, |
| "grad_norm": 0.0021545395720750093, |
| "learning_rate": 2.8702722388468544e-06, |
| "loss": 0.0, |
| "step": 674 |
| }, |
| { |
| "epoch": 2.9503649635036497, |
| "grad_norm": 0.01696695201098919, |
| "learning_rate": 2.864145430006089e-06, |
| "loss": 0.0, |
| "step": 675 |
| }, |
| { |
| "epoch": 2.9547445255474454, |
| "grad_norm": 0.018733413890004158, |
| "learning_rate": 2.858016385865004e-06, |
| "loss": 0.0001, |
| "step": 676 |
| }, |
| { |
| "epoch": 2.959124087591241, |
| "grad_norm": 0.0550026074051857, |
| "learning_rate": 2.85188514404663e-06, |
| "loss": 0.0, |
| "step": 677 |
| }, |
| { |
| "epoch": 2.9635036496350367, |
| "grad_norm": 0.00857064314186573, |
| "learning_rate": 2.845751742187487e-06, |
| "loss": 0.0, |
| "step": 678 |
| }, |
| { |
| "epoch": 2.9678832116788323, |
| "grad_norm": 0.008322341367602348, |
| "learning_rate": 2.839616217937354e-06, |
| "loss": 0.0, |
| "step": 679 |
| }, |
| { |
| "epoch": 2.972262773722628, |
| "grad_norm": 0.001857755589298904, |
| "learning_rate": 2.833478608959038e-06, |
| "loss": 0.0, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.9766423357664236, |
| "grad_norm": 0.014696883969008923, |
| "learning_rate": 2.827338952928146e-06, |
| "loss": 0.0, |
| "step": 681 |
| }, |
| { |
| "epoch": 2.981021897810219, |
| "grad_norm": 0.0031583639793097973, |
| "learning_rate": 2.821197287532847e-06, |
| "loss": 0.0, |
| "step": 682 |
| }, |
| { |
| "epoch": 2.985401459854015, |
| "grad_norm": 0.0016461616614833474, |
| "learning_rate": 2.8150536504736457e-06, |
| "loss": 0.0, |
| "step": 683 |
| }, |
| { |
| "epoch": 2.9897810218978105, |
| "grad_norm": 0.0036859370302408934, |
| "learning_rate": 2.8089080794631514e-06, |
| "loss": 0.0, |
| "step": 684 |
| }, |
| { |
| "epoch": 2.994160583941606, |
| "grad_norm": 0.0019211465260013938, |
| "learning_rate": 2.8027606122258435e-06, |
| "loss": 0.0, |
| "step": 685 |
| }, |
| { |
| "epoch": 2.9985401459854013, |
| "grad_norm": 0.001531517249532044, |
| "learning_rate": 2.79661128649784e-06, |
| "loss": 0.0, |
| "step": 686 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.002470475621521473, |
| "learning_rate": 2.7904601400266706e-06, |
| "loss": 0.0, |
| "step": 687 |
| }, |
| { |
| "epoch": 3.0043795620437956, |
| "grad_norm": 0.017151040956377983, |
| "learning_rate": 2.784307210571039e-06, |
| "loss": 0.0, |
| "step": 688 |
| }, |
| { |
| "epoch": 3.0087591240875913, |
| "grad_norm": 0.0010016716551035643, |
| "learning_rate": 2.7781525359005945e-06, |
| "loss": 0.0, |
| "step": 689 |
| }, |
| { |
| "epoch": 3.013138686131387, |
| "grad_norm": 0.0022327261976897717, |
| "learning_rate": 2.771996153795699e-06, |
| "loss": 0.0, |
| "step": 690 |
| }, |
| { |
| "epoch": 3.0175182481751825, |
| "grad_norm": 0.001771376933902502, |
| "learning_rate": 2.7658381020471965e-06, |
| "loss": 0.0, |
| "step": 691 |
| }, |
| { |
| "epoch": 3.021897810218978, |
| "grad_norm": 0.0039240955375134945, |
| "learning_rate": 2.7596784184561788e-06, |
| "loss": 0.0, |
| "step": 692 |
| }, |
| { |
| "epoch": 3.026277372262774, |
| "grad_norm": 0.002869370160624385, |
| "learning_rate": 2.7535171408337556e-06, |
| "loss": 0.0, |
| "step": 693 |
| }, |
| { |
| "epoch": 3.0306569343065695, |
| "grad_norm": 0.01324407197535038, |
| "learning_rate": 2.7473543070008213e-06, |
| "loss": 0.0, |
| "step": 694 |
| }, |
| { |
| "epoch": 3.035036496350365, |
| "grad_norm": 0.015128228813409805, |
| "learning_rate": 2.7411899547878223e-06, |
| "loss": 0.0, |
| "step": 695 |
| }, |
| { |
| "epoch": 3.0394160583941607, |
| "grad_norm": 0.0010945587418973446, |
| "learning_rate": 2.7350241220345273e-06, |
| "loss": 0.0, |
| "step": 696 |
| }, |
| { |
| "epoch": 3.0437956204379564, |
| "grad_norm": 0.006534193176776171, |
| "learning_rate": 2.7288568465897918e-06, |
| "loss": 0.0, |
| "step": 697 |
| }, |
| { |
| "epoch": 3.048175182481752, |
| "grad_norm": 0.0015636439202353358, |
| "learning_rate": 2.722688166311328e-06, |
| "loss": 0.0, |
| "step": 698 |
| }, |
| { |
| "epoch": 3.0525547445255476, |
| "grad_norm": 0.0023577925749123096, |
| "learning_rate": 2.7165181190654705e-06, |
| "loss": 0.0, |
| "step": 699 |
| }, |
| { |
| "epoch": 3.0569343065693433, |
| "grad_norm": 0.0021843581926077604, |
| "learning_rate": 2.7103467427269466e-06, |
| "loss": 0.0, |
| "step": 700 |
| }, |
| { |
| "epoch": 3.0613138686131385, |
| "grad_norm": 0.0018377688247710466, |
| "learning_rate": 2.704174075178641e-06, |
| "loss": 0.0, |
| "step": 701 |
| }, |
| { |
| "epoch": 3.065693430656934, |
| "grad_norm": 0.0018055844120681286, |
| "learning_rate": 2.6980001543113653e-06, |
| "loss": 0.0, |
| "step": 702 |
| }, |
| { |
| "epoch": 3.0700729927007298, |
| "grad_norm": 0.0028426465578377247, |
| "learning_rate": 2.691825018023624e-06, |
| "loss": 0.0, |
| "step": 703 |
| }, |
| { |
| "epoch": 3.0744525547445254, |
| "grad_norm": 0.006463038269430399, |
| "learning_rate": 2.6856487042213825e-06, |
| "loss": 0.0, |
| "step": 704 |
| }, |
| { |
| "epoch": 3.078832116788321, |
| "grad_norm": 0.0850614532828331, |
| "learning_rate": 2.6794712508178345e-06, |
| "loss": 0.0001, |
| "step": 705 |
| }, |
| { |
| "epoch": 3.0832116788321167, |
| "grad_norm": 0.0032790934201329947, |
| "learning_rate": 2.673292695733169e-06, |
| "loss": 0.0, |
| "step": 706 |
| }, |
| { |
| "epoch": 3.0875912408759123, |
| "grad_norm": 0.0014041299000382423, |
| "learning_rate": 2.6671130768943375e-06, |
| "loss": 0.0, |
| "step": 707 |
| }, |
| { |
| "epoch": 3.091970802919708, |
| "grad_norm": 0.0034296936355531216, |
| "learning_rate": 2.660932432234823e-06, |
| "loss": 0.0, |
| "step": 708 |
| }, |
| { |
| "epoch": 3.0963503649635036, |
| "grad_norm": 0.0021895640529692173, |
| "learning_rate": 2.654750799694402e-06, |
| "loss": 0.0, |
| "step": 709 |
| }, |
| { |
| "epoch": 3.100729927007299, |
| "grad_norm": 0.0013117026537656784, |
| "learning_rate": 2.648568217218919e-06, |
| "loss": 0.0, |
| "step": 710 |
| }, |
| { |
| "epoch": 3.105109489051095, |
| "grad_norm": 0.0008849436999298632, |
| "learning_rate": 2.6423847227600462e-06, |
| "loss": 0.0, |
| "step": 711 |
| }, |
| { |
| "epoch": 3.1094890510948905, |
| "grad_norm": 0.015677910298109055, |
| "learning_rate": 2.636200354275057e-06, |
| "loss": 0.0, |
| "step": 712 |
| }, |
| { |
| "epoch": 3.113868613138686, |
| "grad_norm": 0.0022430643439292908, |
| "learning_rate": 2.630015149726588e-06, |
| "loss": 0.0, |
| "step": 713 |
| }, |
| { |
| "epoch": 3.1182481751824818, |
| "grad_norm": 0.0011902175610885024, |
| "learning_rate": 2.6238291470824084e-06, |
| "loss": 0.0, |
| "step": 714 |
| }, |
| { |
| "epoch": 3.1226277372262774, |
| "grad_norm": 0.002818114822730422, |
| "learning_rate": 2.6176423843151866e-06, |
| "loss": 0.0, |
| "step": 715 |
| }, |
| { |
| "epoch": 3.127007299270073, |
| "grad_norm": 0.0025780019350349903, |
| "learning_rate": 2.6114548994022575e-06, |
| "loss": 0.0, |
| "step": 716 |
| }, |
| { |
| "epoch": 3.1313868613138687, |
| "grad_norm": 0.008761986158788204, |
| "learning_rate": 2.6052667303253886e-06, |
| "loss": 0.0, |
| "step": 717 |
| }, |
| { |
| "epoch": 3.1357664233576643, |
| "grad_norm": 0.0025233717169612646, |
| "learning_rate": 2.5990779150705454e-06, |
| "loss": 0.0, |
| "step": 718 |
| }, |
| { |
| "epoch": 3.14014598540146, |
| "grad_norm": 0.0033938682172447443, |
| "learning_rate": 2.5928884916276638e-06, |
| "loss": 0.0, |
| "step": 719 |
| }, |
| { |
| "epoch": 3.1445255474452556, |
| "grad_norm": 0.026820925995707512, |
| "learning_rate": 2.586698497990409e-06, |
| "loss": 0.0001, |
| "step": 720 |
| }, |
| { |
| "epoch": 3.1489051094890512, |
| "grad_norm": 0.0023744499776512384, |
| "learning_rate": 2.5805079721559496e-06, |
| "loss": 0.0, |
| "step": 721 |
| }, |
| { |
| "epoch": 3.153284671532847, |
| "grad_norm": 0.001529409782961011, |
| "learning_rate": 2.574316952124718e-06, |
| "loss": 0.0, |
| "step": 722 |
| }, |
| { |
| "epoch": 3.1576642335766425, |
| "grad_norm": 0.005077557172626257, |
| "learning_rate": 2.5681254759001828e-06, |
| "loss": 0.0, |
| "step": 723 |
| }, |
| { |
| "epoch": 3.162043795620438, |
| "grad_norm": 0.002494523301720619, |
| "learning_rate": 2.561933581488612e-06, |
| "loss": 0.0, |
| "step": 724 |
| }, |
| { |
| "epoch": 3.1664233576642338, |
| "grad_norm": 0.0017919996753335, |
| "learning_rate": 2.55574130689884e-06, |
| "loss": 0.0, |
| "step": 725 |
| }, |
| { |
| "epoch": 3.170802919708029, |
| "grad_norm": 0.0017449480947107077, |
| "learning_rate": 2.549548690142036e-06, |
| "loss": 0.0, |
| "step": 726 |
| }, |
| { |
| "epoch": 3.1751824817518246, |
| "grad_norm": 0.0017062796978279948, |
| "learning_rate": 2.5433557692314687e-06, |
| "loss": 0.0, |
| "step": 727 |
| }, |
| { |
| "epoch": 3.1795620437956202, |
| "grad_norm": 0.002210992621257901, |
| "learning_rate": 2.5371625821822743e-06, |
| "loss": 0.0, |
| "step": 728 |
| }, |
| { |
| "epoch": 3.183941605839416, |
| "grad_norm": 0.0025367604102939367, |
| "learning_rate": 2.530969167011222e-06, |
| "loss": 0.0, |
| "step": 729 |
| }, |
| { |
| "epoch": 3.1883211678832115, |
| "grad_norm": 0.0028825236950069666, |
| "learning_rate": 2.5247755617364826e-06, |
| "loss": 0.0, |
| "step": 730 |
| }, |
| { |
| "epoch": 3.192700729927007, |
| "grad_norm": 0.0011007302673533559, |
| "learning_rate": 2.5185818043773942e-06, |
| "loss": 0.0, |
| "step": 731 |
| }, |
| { |
| "epoch": 3.197080291970803, |
| "grad_norm": 0.008204109966754913, |
| "learning_rate": 2.5123879329542255e-06, |
| "loss": 0.0, |
| "step": 732 |
| }, |
| { |
| "epoch": 3.2014598540145984, |
| "grad_norm": 0.001251939800567925, |
| "learning_rate": 2.5061939854879485e-06, |
| "loss": 0.0, |
| "step": 733 |
| }, |
| { |
| "epoch": 3.205839416058394, |
| "grad_norm": 0.0024482880253344774, |
| "learning_rate": 2.5e-06, |
| "loss": 0.0, |
| "step": 734 |
| }, |
| { |
| "epoch": 3.2102189781021897, |
| "grad_norm": 0.002128303749486804, |
| "learning_rate": 2.4938060145120523e-06, |
| "loss": 0.0, |
| "step": 735 |
| }, |
| { |
| "epoch": 3.2145985401459853, |
| "grad_norm": 0.0013024493819102645, |
| "learning_rate": 2.4876120670457753e-06, |
| "loss": 0.0, |
| "step": 736 |
| }, |
| { |
| "epoch": 3.218978102189781, |
| "grad_norm": 0.0012688646093010902, |
| "learning_rate": 2.481418195622607e-06, |
| "loss": 0.0, |
| "step": 737 |
| }, |
| { |
| "epoch": 3.2233576642335766, |
| "grad_norm": 0.0021316749043762684, |
| "learning_rate": 2.475224438263518e-06, |
| "loss": 0.0, |
| "step": 738 |
| }, |
| { |
| "epoch": 3.2277372262773723, |
| "grad_norm": 0.0019681300036609173, |
| "learning_rate": 2.469030832988779e-06, |
| "loss": 0.0, |
| "step": 739 |
| }, |
| { |
| "epoch": 3.232116788321168, |
| "grad_norm": 0.0013115162728354335, |
| "learning_rate": 2.4628374178177274e-06, |
| "loss": 0.0, |
| "step": 740 |
| }, |
| { |
| "epoch": 3.2364963503649635, |
| "grad_norm": 0.002466258592903614, |
| "learning_rate": 2.4566442307685325e-06, |
| "loss": 0.0, |
| "step": 741 |
| }, |
| { |
| "epoch": 3.240875912408759, |
| "grad_norm": 0.005171903874725103, |
| "learning_rate": 2.450451309857965e-06, |
| "loss": 0.0, |
| "step": 742 |
| }, |
| { |
| "epoch": 3.245255474452555, |
| "grad_norm": 0.0015806729206815362, |
| "learning_rate": 2.4442586931011607e-06, |
| "loss": 0.0, |
| "step": 743 |
| }, |
| { |
| "epoch": 3.2496350364963504, |
| "grad_norm": 0.0023869157303124666, |
| "learning_rate": 2.438066418511389e-06, |
| "loss": 0.0, |
| "step": 744 |
| }, |
| { |
| "epoch": 3.254014598540146, |
| "grad_norm": 0.0015038090059533715, |
| "learning_rate": 2.431874524099818e-06, |
| "loss": 0.0, |
| "step": 745 |
| }, |
| { |
| "epoch": 3.2583941605839417, |
| "grad_norm": 0.0022007508669048548, |
| "learning_rate": 2.4256830478752823e-06, |
| "loss": 0.0, |
| "step": 746 |
| }, |
| { |
| "epoch": 3.2627737226277373, |
| "grad_norm": 0.000799484783783555, |
| "learning_rate": 2.419492027844051e-06, |
| "loss": 0.0, |
| "step": 747 |
| }, |
| { |
| "epoch": 3.267153284671533, |
| "grad_norm": 0.0014924613060429692, |
| "learning_rate": 2.413301502009591e-06, |
| "loss": 0.0, |
| "step": 748 |
| }, |
| { |
| "epoch": 3.2715328467153286, |
| "grad_norm": 0.0013423648197203875, |
| "learning_rate": 2.4071115083723367e-06, |
| "loss": 0.0, |
| "step": 749 |
| }, |
| { |
| "epoch": 3.2759124087591243, |
| "grad_norm": 0.0016152571188285947, |
| "learning_rate": 2.4009220849294546e-06, |
| "loss": 0.0, |
| "step": 750 |
| }, |
| { |
| "epoch": 3.28029197080292, |
| "grad_norm": 0.002237207954749465, |
| "learning_rate": 2.394733269674612e-06, |
| "loss": 0.0, |
| "step": 751 |
| }, |
| { |
| "epoch": 3.2846715328467155, |
| "grad_norm": 0.0030521079897880554, |
| "learning_rate": 2.388545100597743e-06, |
| "loss": 0.0, |
| "step": 752 |
| }, |
| { |
| "epoch": 3.289051094890511, |
| "grad_norm": 0.0032652418594807386, |
| "learning_rate": 2.3823576156848138e-06, |
| "loss": 0.0, |
| "step": 753 |
| }, |
| { |
| "epoch": 3.293430656934307, |
| "grad_norm": 0.0014183277962729335, |
| "learning_rate": 2.3761708529175924e-06, |
| "loss": 0.0, |
| "step": 754 |
| }, |
| { |
| "epoch": 3.297810218978102, |
| "grad_norm": 0.0078318752348423, |
| "learning_rate": 2.3699848502734126e-06, |
| "loss": 0.0, |
| "step": 755 |
| }, |
| { |
| "epoch": 3.3021897810218976, |
| "grad_norm": 0.003864576341584325, |
| "learning_rate": 2.3637996457249433e-06, |
| "loss": 0.0, |
| "step": 756 |
| }, |
| { |
| "epoch": 3.3065693430656933, |
| "grad_norm": 0.002102078404277563, |
| "learning_rate": 2.357615277239954e-06, |
| "loss": 0.0, |
| "step": 757 |
| }, |
| { |
| "epoch": 3.310948905109489, |
| "grad_norm": 0.0014949225587770343, |
| "learning_rate": 2.3514317827810816e-06, |
| "loss": 0.0, |
| "step": 758 |
| }, |
| { |
| "epoch": 3.3153284671532846, |
| "grad_norm": 0.00100362254306674, |
| "learning_rate": 2.3452492003055987e-06, |
| "loss": 0.0, |
| "step": 759 |
| }, |
| { |
| "epoch": 3.31970802919708, |
| "grad_norm": 0.2444867193698883, |
| "learning_rate": 2.3390675677651778e-06, |
| "loss": 0.0011, |
| "step": 760 |
| }, |
| { |
| "epoch": 3.324087591240876, |
| "grad_norm": 0.0020129296462982893, |
| "learning_rate": 2.332886923105663e-06, |
| "loss": 0.0, |
| "step": 761 |
| }, |
| { |
| "epoch": 3.3284671532846715, |
| "grad_norm": 0.22737674415111542, |
| "learning_rate": 2.326707304266832e-06, |
| "loss": 0.0006, |
| "step": 762 |
| }, |
| { |
| "epoch": 3.332846715328467, |
| "grad_norm": 0.008498843759298325, |
| "learning_rate": 2.3205287491821663e-06, |
| "loss": 0.0, |
| "step": 763 |
| }, |
| { |
| "epoch": 3.3372262773722627, |
| "grad_norm": 0.05480220168828964, |
| "learning_rate": 2.3143512957786184e-06, |
| "loss": 0.0001, |
| "step": 764 |
| }, |
| { |
| "epoch": 3.3416058394160584, |
| "grad_norm": 0.0011567015899345279, |
| "learning_rate": 2.308174981976377e-06, |
| "loss": 0.0, |
| "step": 765 |
| }, |
| { |
| "epoch": 3.345985401459854, |
| "grad_norm": 0.0017249404918402433, |
| "learning_rate": 2.301999845688635e-06, |
| "loss": 0.0, |
| "step": 766 |
| }, |
| { |
| "epoch": 3.3503649635036497, |
| "grad_norm": 0.04111481457948685, |
| "learning_rate": 2.2958259248213595e-06, |
| "loss": 0.0, |
| "step": 767 |
| }, |
| { |
| "epoch": 3.3547445255474453, |
| "grad_norm": 0.0038588643074035645, |
| "learning_rate": 2.2896532572730534e-06, |
| "loss": 0.0, |
| "step": 768 |
| }, |
| { |
| "epoch": 3.359124087591241, |
| "grad_norm": 0.001527044572867453, |
| "learning_rate": 2.2834818809345295e-06, |
| "loss": 0.0, |
| "step": 769 |
| }, |
| { |
| "epoch": 3.3635036496350366, |
| "grad_norm": 0.009431148879230022, |
| "learning_rate": 2.2773118336886723e-06, |
| "loss": 0.0, |
| "step": 770 |
| }, |
| { |
| "epoch": 3.367883211678832, |
| "grad_norm": 0.0027088054921478033, |
| "learning_rate": 2.271143153410208e-06, |
| "loss": 0.0, |
| "step": 771 |
| }, |
| { |
| "epoch": 3.372262773722628, |
| "grad_norm": 0.002571119461208582, |
| "learning_rate": 2.264975877965473e-06, |
| "loss": 0.0, |
| "step": 772 |
| }, |
| { |
| "epoch": 3.3766423357664235, |
| "grad_norm": 0.0013004938373342156, |
| "learning_rate": 2.258810045212178e-06, |
| "loss": 0.0, |
| "step": 773 |
| }, |
| { |
| "epoch": 3.381021897810219, |
| "grad_norm": 0.0035143231507390738, |
| "learning_rate": 2.2526456929991795e-06, |
| "loss": 0.0, |
| "step": 774 |
| }, |
| { |
| "epoch": 3.3854014598540147, |
| "grad_norm": 0.0021215202286839485, |
| "learning_rate": 2.2464828591662452e-06, |
| "loss": 0.0, |
| "step": 775 |
| }, |
| { |
| "epoch": 3.3897810218978104, |
| "grad_norm": 0.004478083923459053, |
| "learning_rate": 2.240321581543822e-06, |
| "loss": 0.0, |
| "step": 776 |
| }, |
| { |
| "epoch": 3.394160583941606, |
| "grad_norm": 0.0010630427859723568, |
| "learning_rate": 2.2341618979528044e-06, |
| "loss": 0.0, |
| "step": 777 |
| }, |
| { |
| "epoch": 3.398540145985401, |
| "grad_norm": 0.010118206031620502, |
| "learning_rate": 2.2280038462043017e-06, |
| "loss": 0.0, |
| "step": 778 |
| }, |
| { |
| "epoch": 3.402919708029197, |
| "grad_norm": 0.005418050102889538, |
| "learning_rate": 2.2218474640994064e-06, |
| "loss": 0.0, |
| "step": 779 |
| }, |
| { |
| "epoch": 3.4072992700729925, |
| "grad_norm": 0.00403578719124198, |
| "learning_rate": 2.215692789428962e-06, |
| "loss": 0.0, |
| "step": 780 |
| }, |
| { |
| "epoch": 3.411678832116788, |
| "grad_norm": 0.004099252633750439, |
| "learning_rate": 2.20953985997333e-06, |
| "loss": 0.0, |
| "step": 781 |
| }, |
| { |
| "epoch": 3.4160583941605838, |
| "grad_norm": 0.0032652115914970636, |
| "learning_rate": 2.2033887135021605e-06, |
| "loss": 0.0, |
| "step": 782 |
| }, |
| { |
| "epoch": 3.4204379562043794, |
| "grad_norm": 0.002060306491330266, |
| "learning_rate": 2.1972393877741578e-06, |
| "loss": 0.0, |
| "step": 783 |
| }, |
| { |
| "epoch": 3.424817518248175, |
| "grad_norm": 0.005281209945678711, |
| "learning_rate": 2.191091920536849e-06, |
| "loss": 0.0, |
| "step": 784 |
| }, |
| { |
| "epoch": 3.4291970802919707, |
| "grad_norm": 0.006397861056029797, |
| "learning_rate": 2.1849463495263547e-06, |
| "loss": 0.0, |
| "step": 785 |
| }, |
| { |
| "epoch": 3.4335766423357663, |
| "grad_norm": 0.008003026247024536, |
| "learning_rate": 2.1788027124671542e-06, |
| "loss": 0.0, |
| "step": 786 |
| }, |
| { |
| "epoch": 3.437956204379562, |
| "grad_norm": 0.005707223899662495, |
| "learning_rate": 2.1726610470718553e-06, |
| "loss": 0.0, |
| "step": 787 |
| }, |
| { |
| "epoch": 3.4423357664233576, |
| "grad_norm": 0.007127422373741865, |
| "learning_rate": 2.166521391040963e-06, |
| "loss": 0.0, |
| "step": 788 |
| }, |
| { |
| "epoch": 3.4467153284671532, |
| "grad_norm": 0.0022657839581370354, |
| "learning_rate": 2.1603837820626478e-06, |
| "loss": 0.0, |
| "step": 789 |
| }, |
| { |
| "epoch": 3.451094890510949, |
| "grad_norm": 0.004881497472524643, |
| "learning_rate": 2.1542482578125148e-06, |
| "loss": 0.0, |
| "step": 790 |
| }, |
| { |
| "epoch": 3.4554744525547445, |
| "grad_norm": 0.0031092618592083454, |
| "learning_rate": 2.1481148559533703e-06, |
| "loss": 0.0, |
| "step": 791 |
| }, |
| { |
| "epoch": 3.45985401459854, |
| "grad_norm": 0.010201151482760906, |
| "learning_rate": 2.1419836141349964e-06, |
| "loss": 0.0, |
| "step": 792 |
| }, |
| { |
| "epoch": 3.4642335766423358, |
| "grad_norm": 0.0022016458678990602, |
| "learning_rate": 2.1358545699939114e-06, |
| "loss": 0.0, |
| "step": 793 |
| }, |
| { |
| "epoch": 3.4686131386861314, |
| "grad_norm": 0.001897580805234611, |
| "learning_rate": 2.129727761153146e-06, |
| "loss": 0.0, |
| "step": 794 |
| }, |
| { |
| "epoch": 3.472992700729927, |
| "grad_norm": 0.0018193572759628296, |
| "learning_rate": 2.1236032252220074e-06, |
| "loss": 0.0, |
| "step": 795 |
| }, |
| { |
| "epoch": 3.4773722627737227, |
| "grad_norm": 0.0025425944477319717, |
| "learning_rate": 2.117480999795853e-06, |
| "loss": 0.0, |
| "step": 796 |
| }, |
| { |
| "epoch": 3.4817518248175183, |
| "grad_norm": 0.0025497935712337494, |
| "learning_rate": 2.1113611224558545e-06, |
| "loss": 0.0, |
| "step": 797 |
| }, |
| { |
| "epoch": 3.486131386861314, |
| "grad_norm": 0.002034541452303529, |
| "learning_rate": 2.1052436307687725e-06, |
| "loss": 0.0, |
| "step": 798 |
| }, |
| { |
| "epoch": 3.4905109489051096, |
| "grad_norm": 0.004022255074232817, |
| "learning_rate": 2.0991285622867215e-06, |
| "loss": 0.0, |
| "step": 799 |
| }, |
| { |
| "epoch": 3.4948905109489052, |
| "grad_norm": 0.003748381743207574, |
| "learning_rate": 2.093015954546942e-06, |
| "loss": 0.0, |
| "step": 800 |
| }, |
| { |
| "epoch": 3.499270072992701, |
| "grad_norm": 0.005397303961217403, |
| "learning_rate": 2.0869058450715694e-06, |
| "loss": 0.0, |
| "step": 801 |
| }, |
| { |
| "epoch": 3.5036496350364965, |
| "grad_norm": 0.0028435164131224155, |
| "learning_rate": 2.0807982713674037e-06, |
| "loss": 0.0, |
| "step": 802 |
| }, |
| { |
| "epoch": 3.508029197080292, |
| "grad_norm": 0.01242138259112835, |
| "learning_rate": 2.074693270925677e-06, |
| "loss": 0.0, |
| "step": 803 |
| }, |
| { |
| "epoch": 3.512408759124088, |
| "grad_norm": 0.08091861754655838, |
| "learning_rate": 2.068590881221829e-06, |
| "loss": 0.0002, |
| "step": 804 |
| }, |
| { |
| "epoch": 3.5167883211678834, |
| "grad_norm": 0.007172738667577505, |
| "learning_rate": 2.062491139715271e-06, |
| "loss": 0.0, |
| "step": 805 |
| }, |
| { |
| "epoch": 3.521167883211679, |
| "grad_norm": 0.0017966198502108455, |
| "learning_rate": 2.056394083849158e-06, |
| "loss": 0.0, |
| "step": 806 |
| }, |
| { |
| "epoch": 3.5255474452554747, |
| "grad_norm": 0.0038727717474102974, |
| "learning_rate": 2.0502997510501614e-06, |
| "loss": 0.0, |
| "step": 807 |
| }, |
| { |
| "epoch": 3.5299270072992703, |
| "grad_norm": 0.004094596020877361, |
| "learning_rate": 2.0442081787282354e-06, |
| "loss": 0.0, |
| "step": 808 |
| }, |
| { |
| "epoch": 3.5343065693430655, |
| "grad_norm": 0.006543958559632301, |
| "learning_rate": 2.03811940427639e-06, |
| "loss": 0.0, |
| "step": 809 |
| }, |
| { |
| "epoch": 3.538686131386861, |
| "grad_norm": 0.0020556284580379725, |
| "learning_rate": 2.0320334650704595e-06, |
| "loss": 0.0, |
| "step": 810 |
| }, |
| { |
| "epoch": 3.543065693430657, |
| "grad_norm": 0.0070542385801672935, |
| "learning_rate": 2.025950398468875e-06, |
| "loss": 0.0, |
| "step": 811 |
| }, |
| { |
| "epoch": 3.5474452554744524, |
| "grad_norm": 0.017733529210090637, |
| "learning_rate": 2.0198702418124345e-06, |
| "loss": 0.0, |
| "step": 812 |
| }, |
| { |
| "epoch": 3.551824817518248, |
| "grad_norm": 0.009895303286612034, |
| "learning_rate": 2.013793032424072e-06, |
| "loss": 0.0, |
| "step": 813 |
| }, |
| { |
| "epoch": 3.5562043795620437, |
| "grad_norm": 0.001627814257517457, |
| "learning_rate": 2.007718807608629e-06, |
| "loss": 0.0, |
| "step": 814 |
| }, |
| { |
| "epoch": 3.5605839416058394, |
| "grad_norm": 0.005018120631575584, |
| "learning_rate": 2.0016476046526308e-06, |
| "loss": 0.0, |
| "step": 815 |
| }, |
| { |
| "epoch": 3.564963503649635, |
| "grad_norm": 0.0010407187510281801, |
| "learning_rate": 1.995579460824048e-06, |
| "loss": 0.0, |
| "step": 816 |
| }, |
| { |
| "epoch": 3.5693430656934306, |
| "grad_norm": 0.0016328482888638973, |
| "learning_rate": 1.989514413372076e-06, |
| "loss": 0.0, |
| "step": 817 |
| }, |
| { |
| "epoch": 3.5737226277372263, |
| "grad_norm": 0.007556082680821419, |
| "learning_rate": 1.983452499526901e-06, |
| "loss": 0.0, |
| "step": 818 |
| }, |
| { |
| "epoch": 3.578102189781022, |
| "grad_norm": 0.0021091431844979525, |
| "learning_rate": 1.9773937564994747e-06, |
| "loss": 0.0, |
| "step": 819 |
| }, |
| { |
| "epoch": 3.5824817518248175, |
| "grad_norm": 0.0016330553917214274, |
| "learning_rate": 1.9713382214812847e-06, |
| "loss": 0.0, |
| "step": 820 |
| }, |
| { |
| "epoch": 3.586861313868613, |
| "grad_norm": 0.0024340248201042414, |
| "learning_rate": 1.9652859316441266e-06, |
| "loss": 0.0, |
| "step": 821 |
| }, |
| { |
| "epoch": 3.591240875912409, |
| "grad_norm": 0.001374023617245257, |
| "learning_rate": 1.9592369241398747e-06, |
| "loss": 0.0, |
| "step": 822 |
| }, |
| { |
| "epoch": 3.5956204379562045, |
| "grad_norm": 0.0006934819975867867, |
| "learning_rate": 1.9531912361002554e-06, |
| "loss": 0.0, |
| "step": 823 |
| }, |
| { |
| "epoch": 3.6, |
| "grad_norm": 0.0039897337555885315, |
| "learning_rate": 1.9471489046366184e-06, |
| "loss": 0.0, |
| "step": 824 |
| }, |
| { |
| "epoch": 3.6043795620437957, |
| "grad_norm": 0.002902815816923976, |
| "learning_rate": 1.941109966839709e-06, |
| "loss": 0.0, |
| "step": 825 |
| }, |
| { |
| "epoch": 3.6087591240875914, |
| "grad_norm": 0.009976208209991455, |
| "learning_rate": 1.9350744597794407e-06, |
| "loss": 0.0, |
| "step": 826 |
| }, |
| { |
| "epoch": 3.613138686131387, |
| "grad_norm": 0.0016394915292039514, |
| "learning_rate": 1.929042420504667e-06, |
| "loss": 0.0, |
| "step": 827 |
| }, |
| { |
| "epoch": 3.6175182481751826, |
| "grad_norm": 0.011417590081691742, |
| "learning_rate": 1.923013886042956e-06, |
| "loss": 0.0, |
| "step": 828 |
| }, |
| { |
| "epoch": 3.621897810218978, |
| "grad_norm": 0.006547160446643829, |
| "learning_rate": 1.91698889340036e-06, |
| "loss": 0.0, |
| "step": 829 |
| }, |
| { |
| "epoch": 3.6262773722627735, |
| "grad_norm": 0.0013417567824944854, |
| "learning_rate": 1.9109674795611897e-06, |
| "loss": 0.0, |
| "step": 830 |
| }, |
| { |
| "epoch": 3.630656934306569, |
| "grad_norm": 0.0021602141205221415, |
| "learning_rate": 1.9049496814877894e-06, |
| "loss": 0.0, |
| "step": 831 |
| }, |
| { |
| "epoch": 3.6350364963503647, |
| "grad_norm": 0.004333204589784145, |
| "learning_rate": 1.8989355361203057e-06, |
| "loss": 0.0, |
| "step": 832 |
| }, |
| { |
| "epoch": 3.6394160583941604, |
| "grad_norm": 0.0013988579157739878, |
| "learning_rate": 1.892925080376465e-06, |
| "loss": 0.0, |
| "step": 833 |
| }, |
| { |
| "epoch": 3.643795620437956, |
| "grad_norm": 0.0024188708048313856, |
| "learning_rate": 1.886918351151343e-06, |
| "loss": 0.0, |
| "step": 834 |
| }, |
| { |
| "epoch": 3.6481751824817517, |
| "grad_norm": 0.0826224833726883, |
| "learning_rate": 1.8809153853171428e-06, |
| "loss": 0.0, |
| "step": 835 |
| }, |
| { |
| "epoch": 3.6525547445255473, |
| "grad_norm": 0.0022762177977710962, |
| "learning_rate": 1.8749162197229626e-06, |
| "loss": 0.0, |
| "step": 836 |
| }, |
| { |
| "epoch": 3.656934306569343, |
| "grad_norm": 0.0012058281572535634, |
| "learning_rate": 1.8689208911945771e-06, |
| "loss": 0.0, |
| "step": 837 |
| }, |
| { |
| "epoch": 3.6613138686131386, |
| "grad_norm": 0.0028202789835631847, |
| "learning_rate": 1.8629294365342049e-06, |
| "loss": 0.0, |
| "step": 838 |
| }, |
| { |
| "epoch": 3.665693430656934, |
| "grad_norm": 0.0023832700680941343, |
| "learning_rate": 1.8569418925202841e-06, |
| "loss": 0.0, |
| "step": 839 |
| }, |
| { |
| "epoch": 3.67007299270073, |
| "grad_norm": 0.0048092082142829895, |
| "learning_rate": 1.8509582959072487e-06, |
| "loss": 0.0, |
| "step": 840 |
| }, |
| { |
| "epoch": 3.6744525547445255, |
| "grad_norm": 0.004336558748036623, |
| "learning_rate": 1.8449786834253016e-06, |
| "loss": 0.0, |
| "step": 841 |
| }, |
| { |
| "epoch": 3.678832116788321, |
| "grad_norm": 0.008016410283744335, |
| "learning_rate": 1.8390030917801883e-06, |
| "loss": 0.0, |
| "step": 842 |
| }, |
| { |
| "epoch": 3.6832116788321168, |
| "grad_norm": 0.003487163921818137, |
| "learning_rate": 1.8330315576529733e-06, |
| "loss": 0.0, |
| "step": 843 |
| }, |
| { |
| "epoch": 3.6875912408759124, |
| "grad_norm": 0.005607697181403637, |
| "learning_rate": 1.8270641176998138e-06, |
| "loss": 0.0, |
| "step": 844 |
| }, |
| { |
| "epoch": 3.691970802919708, |
| "grad_norm": 0.013032430782914162, |
| "learning_rate": 1.8211008085517348e-06, |
| "loss": 0.0, |
| "step": 845 |
| }, |
| { |
| "epoch": 3.6963503649635037, |
| "grad_norm": 0.0017879578517749906, |
| "learning_rate": 1.815141666814405e-06, |
| "loss": 0.0, |
| "step": 846 |
| }, |
| { |
| "epoch": 3.7007299270072993, |
| "grad_norm": 0.0024302792735397816, |
| "learning_rate": 1.809186729067911e-06, |
| "loss": 0.0, |
| "step": 847 |
| }, |
| { |
| "epoch": 3.705109489051095, |
| "grad_norm": 0.0009780308464542031, |
| "learning_rate": 1.8032360318665337e-06, |
| "loss": 0.0, |
| "step": 848 |
| }, |
| { |
| "epoch": 3.7094890510948906, |
| "grad_norm": 0.19592009484767914, |
| "learning_rate": 1.797289611738523e-06, |
| "loss": 0.0019, |
| "step": 849 |
| }, |
| { |
| "epoch": 3.713868613138686, |
| "grad_norm": 0.002372670453041792, |
| "learning_rate": 1.7913475051858746e-06, |
| "loss": 0.0, |
| "step": 850 |
| }, |
| { |
| "epoch": 3.718248175182482, |
| "grad_norm": 0.002017020247876644, |
| "learning_rate": 1.7854097486841043e-06, |
| "loss": 0.0, |
| "step": 851 |
| }, |
| { |
| "epoch": 3.7226277372262775, |
| "grad_norm": 0.0022830108646303415, |
| "learning_rate": 1.7794763786820268e-06, |
| "loss": 0.0, |
| "step": 852 |
| }, |
| { |
| "epoch": 3.727007299270073, |
| "grad_norm": 0.002295683603733778, |
| "learning_rate": 1.7735474316015294e-06, |
| "loss": 0.0, |
| "step": 853 |
| }, |
| { |
| "epoch": 3.7313868613138688, |
| "grad_norm": 0.001965237082913518, |
| "learning_rate": 1.767622943837349e-06, |
| "loss": 0.0, |
| "step": 854 |
| }, |
| { |
| "epoch": 3.7357664233576644, |
| "grad_norm": 0.0012728808214887977, |
| "learning_rate": 1.7617029517568502e-06, |
| "loss": 0.0, |
| "step": 855 |
| }, |
| { |
| "epoch": 3.74014598540146, |
| "grad_norm": 0.0020083924755454063, |
| "learning_rate": 1.7557874916997996e-06, |
| "loss": 0.0, |
| "step": 856 |
| }, |
| { |
| "epoch": 3.7445255474452557, |
| "grad_norm": 0.0020820496138185263, |
| "learning_rate": 1.7498765999781455e-06, |
| "loss": 0.0, |
| "step": 857 |
| }, |
| { |
| "epoch": 3.7489051094890513, |
| "grad_norm": 0.0020251362584531307, |
| "learning_rate": 1.7439703128757923e-06, |
| "loss": 0.0, |
| "step": 858 |
| }, |
| { |
| "epoch": 3.753284671532847, |
| "grad_norm": 0.0015859343111515045, |
| "learning_rate": 1.7380686666483793e-06, |
| "loss": 0.0, |
| "step": 859 |
| }, |
| { |
| "epoch": 3.7576642335766426, |
| "grad_norm": 0.001591153210029006, |
| "learning_rate": 1.7321716975230588e-06, |
| "loss": 0.0, |
| "step": 860 |
| }, |
| { |
| "epoch": 3.7620437956204382, |
| "grad_norm": 0.0018416156526654959, |
| "learning_rate": 1.7262794416982717e-06, |
| "loss": 0.0, |
| "step": 861 |
| }, |
| { |
| "epoch": 3.7664233576642334, |
| "grad_norm": 0.0017293720738962293, |
| "learning_rate": 1.7203919353435269e-06, |
| "loss": 0.0, |
| "step": 862 |
| }, |
| { |
| "epoch": 3.770802919708029, |
| "grad_norm": 0.0010803790064528584, |
| "learning_rate": 1.7145092145991786e-06, |
| "loss": 0.0, |
| "step": 863 |
| }, |
| { |
| "epoch": 3.7751824817518247, |
| "grad_norm": 0.002193450927734375, |
| "learning_rate": 1.7086313155762046e-06, |
| "loss": 0.0, |
| "step": 864 |
| }, |
| { |
| "epoch": 3.7795620437956203, |
| "grad_norm": 0.11664100736379623, |
| "learning_rate": 1.7027582743559845e-06, |
| "loss": 0.0008, |
| "step": 865 |
| }, |
| { |
| "epoch": 3.783941605839416, |
| "grad_norm": 0.0010278144618496299, |
| "learning_rate": 1.696890126990079e-06, |
| "loss": 0.0, |
| "step": 866 |
| }, |
| { |
| "epoch": 3.7883211678832116, |
| "grad_norm": 0.0027571087703108788, |
| "learning_rate": 1.691026909500007e-06, |
| "loss": 0.0, |
| "step": 867 |
| }, |
| { |
| "epoch": 3.7927007299270072, |
| "grad_norm": 0.001730642979964614, |
| "learning_rate": 1.6851686578770263e-06, |
| "loss": 0.0, |
| "step": 868 |
| }, |
| { |
| "epoch": 3.797080291970803, |
| "grad_norm": 0.0017755437875166535, |
| "learning_rate": 1.6793154080819112e-06, |
| "loss": 0.0, |
| "step": 869 |
| }, |
| { |
| "epoch": 3.8014598540145985, |
| "grad_norm": 0.0018059754511341453, |
| "learning_rate": 1.6734671960447335e-06, |
| "loss": 0.0, |
| "step": 870 |
| }, |
| { |
| "epoch": 3.805839416058394, |
| "grad_norm": 0.0010772132081910968, |
| "learning_rate": 1.6676240576646389e-06, |
| "loss": 0.0, |
| "step": 871 |
| }, |
| { |
| "epoch": 3.81021897810219, |
| "grad_norm": 0.0013907999964430928, |
| "learning_rate": 1.66178602880963e-06, |
| "loss": 0.0, |
| "step": 872 |
| }, |
| { |
| "epoch": 3.8145985401459854, |
| "grad_norm": 0.0017574954545125365, |
| "learning_rate": 1.655953145316344e-06, |
| "loss": 0.0, |
| "step": 873 |
| }, |
| { |
| "epoch": 3.818978102189781, |
| "grad_norm": 0.0023797417525202036, |
| "learning_rate": 1.6501254429898345e-06, |
| "loss": 0.0, |
| "step": 874 |
| }, |
| { |
| "epoch": 3.8233576642335767, |
| "grad_norm": 0.0010705847525969148, |
| "learning_rate": 1.6443029576033493e-06, |
| "loss": 0.0, |
| "step": 875 |
| }, |
| { |
| "epoch": 3.8277372262773723, |
| "grad_norm": 0.0013967688428238034, |
| "learning_rate": 1.6384857248981117e-06, |
| "loss": 0.0, |
| "step": 876 |
| }, |
| { |
| "epoch": 3.832116788321168, |
| "grad_norm": 0.0027486851904541254, |
| "learning_rate": 1.6326737805831039e-06, |
| "loss": 0.0, |
| "step": 877 |
| }, |
| { |
| "epoch": 3.8364963503649636, |
| "grad_norm": 0.0009487051866017282, |
| "learning_rate": 1.6268671603348428e-06, |
| "loss": 0.0, |
| "step": 878 |
| }, |
| { |
| "epoch": 3.8408759124087593, |
| "grad_norm": 0.0026726871728897095, |
| "learning_rate": 1.621065899797165e-06, |
| "loss": 0.0, |
| "step": 879 |
| }, |
| { |
| "epoch": 3.845255474452555, |
| "grad_norm": 0.0028086488600820303, |
| "learning_rate": 1.6152700345810063e-06, |
| "loss": 0.0, |
| "step": 880 |
| }, |
| { |
| "epoch": 3.8496350364963505, |
| "grad_norm": 0.005458397325128317, |
| "learning_rate": 1.6094796002641836e-06, |
| "loss": 0.0, |
| "step": 881 |
| }, |
| { |
| "epoch": 3.8540145985401457, |
| "grad_norm": 0.0014935546787455678, |
| "learning_rate": 1.6036946323911753e-06, |
| "loss": 0.0, |
| "step": 882 |
| }, |
| { |
| "epoch": 3.8583941605839414, |
| "grad_norm": 0.002109678229317069, |
| "learning_rate": 1.5979151664729063e-06, |
| "loss": 0.0, |
| "step": 883 |
| }, |
| { |
| "epoch": 3.862773722627737, |
| "grad_norm": 0.0017332076095044613, |
| "learning_rate": 1.5921412379865259e-06, |
| "loss": 0.0, |
| "step": 884 |
| }, |
| { |
| "epoch": 3.8671532846715326, |
| "grad_norm": 0.009105571545660496, |
| "learning_rate": 1.5863728823751922e-06, |
| "loss": 0.0, |
| "step": 885 |
| }, |
| { |
| "epoch": 3.8715328467153283, |
| "grad_norm": 0.0020738227758556604, |
| "learning_rate": 1.5806101350478552e-06, |
| "loss": 0.0, |
| "step": 886 |
| }, |
| { |
| "epoch": 3.875912408759124, |
| "grad_norm": 0.0010305409086868167, |
| "learning_rate": 1.5748530313790379e-06, |
| "loss": 0.0, |
| "step": 887 |
| }, |
| { |
| "epoch": 3.8802919708029195, |
| "grad_norm": 0.002297050319612026, |
| "learning_rate": 1.5691016067086198e-06, |
| "loss": 0.0, |
| "step": 888 |
| }, |
| { |
| "epoch": 3.884671532846715, |
| "grad_norm": 0.002647539833560586, |
| "learning_rate": 1.5633558963416203e-06, |
| "loss": 0.0, |
| "step": 889 |
| }, |
| { |
| "epoch": 3.889051094890511, |
| "grad_norm": 0.0012281707022339106, |
| "learning_rate": 1.5576159355479814e-06, |
| "loss": 0.0, |
| "step": 890 |
| }, |
| { |
| "epoch": 3.8934306569343065, |
| "grad_norm": 0.0018704022513702512, |
| "learning_rate": 1.5518817595623514e-06, |
| "loss": 0.0, |
| "step": 891 |
| }, |
| { |
| "epoch": 3.897810218978102, |
| "grad_norm": 0.001662683323957026, |
| "learning_rate": 1.546153403583869e-06, |
| "loss": 0.0, |
| "step": 892 |
| }, |
| { |
| "epoch": 3.9021897810218977, |
| "grad_norm": 0.0023420508950948715, |
| "learning_rate": 1.540430902775946e-06, |
| "loss": 0.0, |
| "step": 893 |
| }, |
| { |
| "epoch": 3.9065693430656934, |
| "grad_norm": 0.0019655104260891676, |
| "learning_rate": 1.534714292266054e-06, |
| "loss": 0.0, |
| "step": 894 |
| }, |
| { |
| "epoch": 3.910948905109489, |
| "grad_norm": 0.0009503703331574798, |
| "learning_rate": 1.5290036071455056e-06, |
| "loss": 0.0, |
| "step": 895 |
| }, |
| { |
| "epoch": 3.9153284671532846, |
| "grad_norm": 0.04980412498116493, |
| "learning_rate": 1.5232988824692406e-06, |
| "loss": 0.0, |
| "step": 896 |
| }, |
| { |
| "epoch": 3.9197080291970803, |
| "grad_norm": 0.0007013222202658653, |
| "learning_rate": 1.5176001532556118e-06, |
| "loss": 0.0, |
| "step": 897 |
| }, |
| { |
| "epoch": 3.924087591240876, |
| "grad_norm": 0.0010505859972909093, |
| "learning_rate": 1.511907454486168e-06, |
| "loss": 0.0, |
| "step": 898 |
| }, |
| { |
| "epoch": 3.9284671532846716, |
| "grad_norm": 0.0009482012246735394, |
| "learning_rate": 1.5062208211054398e-06, |
| "loss": 0.0, |
| "step": 899 |
| }, |
| { |
| "epoch": 3.932846715328467, |
| "grad_norm": 0.004321451764553785, |
| "learning_rate": 1.5005402880207272e-06, |
| "loss": 0.0, |
| "step": 900 |
| }, |
| { |
| "epoch": 3.937226277372263, |
| "grad_norm": 0.00210862560197711, |
| "learning_rate": 1.4948658901018826e-06, |
| "loss": 0.0, |
| "step": 901 |
| }, |
| { |
| "epoch": 3.9416058394160585, |
| "grad_norm": 0.00250151171348989, |
| "learning_rate": 1.4891976621810972e-06, |
| "loss": 0.0, |
| "step": 902 |
| }, |
| { |
| "epoch": 3.945985401459854, |
| "grad_norm": 0.0026350142434239388, |
| "learning_rate": 1.483535639052689e-06, |
| "loss": 0.0, |
| "step": 903 |
| }, |
| { |
| "epoch": 3.9503649635036497, |
| "grad_norm": 0.004716258030384779, |
| "learning_rate": 1.4778798554728866e-06, |
| "loss": 0.0, |
| "step": 904 |
| }, |
| { |
| "epoch": 3.9547445255474454, |
| "grad_norm": 0.0018248335691168904, |
| "learning_rate": 1.4722303461596192e-06, |
| "loss": 0.0, |
| "step": 905 |
| }, |
| { |
| "epoch": 3.959124087591241, |
| "grad_norm": 0.013006918132305145, |
| "learning_rate": 1.4665871457922997e-06, |
| "loss": 0.0, |
| "step": 906 |
| }, |
| { |
| "epoch": 3.9635036496350367, |
| "grad_norm": 0.0023916151840239763, |
| "learning_rate": 1.4609502890116146e-06, |
| "loss": 0.0, |
| "step": 907 |
| }, |
| { |
| "epoch": 3.9678832116788323, |
| "grad_norm": 0.001576004782691598, |
| "learning_rate": 1.4553198104193094e-06, |
| "loss": 0.0, |
| "step": 908 |
| }, |
| { |
| "epoch": 3.972262773722628, |
| "grad_norm": 0.0026557703968137503, |
| "learning_rate": 1.4496957445779792e-06, |
| "loss": 0.0, |
| "step": 909 |
| }, |
| { |
| "epoch": 3.9766423357664236, |
| "grad_norm": 0.002321658656001091, |
| "learning_rate": 1.4440781260108521e-06, |
| "loss": 0.0, |
| "step": 910 |
| }, |
| { |
| "epoch": 3.981021897810219, |
| "grad_norm": 0.0010188892483711243, |
| "learning_rate": 1.438466989201583e-06, |
| "loss": 0.0, |
| "step": 911 |
| }, |
| { |
| "epoch": 3.985401459854015, |
| "grad_norm": 0.0009046494378708303, |
| "learning_rate": 1.4328623685940335e-06, |
| "loss": 0.0, |
| "step": 912 |
| }, |
| { |
| "epoch": 3.9897810218978105, |
| "grad_norm": 0.0014007722493261099, |
| "learning_rate": 1.4272642985920705e-06, |
| "loss": 0.0, |
| "step": 913 |
| }, |
| { |
| "epoch": 3.994160583941606, |
| "grad_norm": 0.0011092593194916844, |
| "learning_rate": 1.4216728135593478e-06, |
| "loss": 0.0, |
| "step": 914 |
| }, |
| { |
| "epoch": 3.9985401459854013, |
| "grad_norm": 0.07808300852775574, |
| "learning_rate": 1.4160879478190975e-06, |
| "loss": 0.0003, |
| "step": 915 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.07808300852775574, |
| "learning_rate": 1.4105097356539204e-06, |
| "loss": 0.0, |
| "step": 916 |
| }, |
| { |
| "epoch": 4.004379562043796, |
| "grad_norm": 0.006101331207901239, |
| "learning_rate": 1.404938211305574e-06, |
| "loss": 0.0, |
| "step": 917 |
| }, |
| { |
| "epoch": 4.008759124087591, |
| "grad_norm": 0.005551626440137625, |
| "learning_rate": 1.399373408974762e-06, |
| "loss": 0.0, |
| "step": 918 |
| }, |
| { |
| "epoch": 4.013138686131387, |
| "grad_norm": 0.001011027256026864, |
| "learning_rate": 1.3938153628209258e-06, |
| "loss": 0.0, |
| "step": 919 |
| }, |
| { |
| "epoch": 4.0175182481751825, |
| "grad_norm": 0.0016165445558726788, |
| "learning_rate": 1.388264106962034e-06, |
| "loss": 0.0, |
| "step": 920 |
| }, |
| { |
| "epoch": 4.021897810218978, |
| "grad_norm": 0.001529879285953939, |
| "learning_rate": 1.3827196754743732e-06, |
| "loss": 0.0, |
| "step": 921 |
| }, |
| { |
| "epoch": 4.026277372262774, |
| "grad_norm": 0.004560156259685755, |
| "learning_rate": 1.3771821023923385e-06, |
| "loss": 0.0, |
| "step": 922 |
| }, |
| { |
| "epoch": 4.0306569343065695, |
| "grad_norm": 0.0005666704964824021, |
| "learning_rate": 1.371651421708225e-06, |
| "loss": 0.0, |
| "step": 923 |
| }, |
| { |
| "epoch": 4.035036496350365, |
| "grad_norm": 0.0015721054514870048, |
| "learning_rate": 1.3661276673720184e-06, |
| "loss": 0.0, |
| "step": 924 |
| }, |
| { |
| "epoch": 4.039416058394161, |
| "grad_norm": 0.0012247064150869846, |
| "learning_rate": 1.3606108732911882e-06, |
| "loss": 0.0, |
| "step": 925 |
| }, |
| { |
| "epoch": 4.043795620437956, |
| "grad_norm": 0.0029379287734627724, |
| "learning_rate": 1.3551010733304772e-06, |
| "loss": 0.0, |
| "step": 926 |
| }, |
| { |
| "epoch": 4.048175182481752, |
| "grad_norm": 0.0017358518671244383, |
| "learning_rate": 1.3495983013116953e-06, |
| "loss": 0.0, |
| "step": 927 |
| }, |
| { |
| "epoch": 4.052554744525548, |
| "grad_norm": 0.005259380210191011, |
| "learning_rate": 1.344102591013513e-06, |
| "loss": 0.0, |
| "step": 928 |
| }, |
| { |
| "epoch": 4.056934306569343, |
| "grad_norm": 0.002643146552145481, |
| "learning_rate": 1.338613976171251e-06, |
| "loss": 0.0, |
| "step": 929 |
| }, |
| { |
| "epoch": 4.061313868613139, |
| "grad_norm": 0.003910914063453674, |
| "learning_rate": 1.3331324904766747e-06, |
| "loss": 0.0, |
| "step": 930 |
| }, |
| { |
| "epoch": 4.065693430656935, |
| "grad_norm": 0.0017648044740781188, |
| "learning_rate": 1.3276581675777878e-06, |
| "loss": 0.0, |
| "step": 931 |
| }, |
| { |
| "epoch": 4.07007299270073, |
| "grad_norm": 0.0062748463824391365, |
| "learning_rate": 1.3221910410786248e-06, |
| "loss": 0.0, |
| "step": 932 |
| }, |
| { |
| "epoch": 4.074452554744526, |
| "grad_norm": 0.0033638945315033197, |
| "learning_rate": 1.3167311445390458e-06, |
| "loss": 0.0, |
| "step": 933 |
| }, |
| { |
| "epoch": 4.0788321167883215, |
| "grad_norm": 0.0043069347739219666, |
| "learning_rate": 1.3112785114745296e-06, |
| "loss": 0.0, |
| "step": 934 |
| }, |
| { |
| "epoch": 4.083211678832117, |
| "grad_norm": 0.003385148011147976, |
| "learning_rate": 1.3058331753559688e-06, |
| "loss": 0.0, |
| "step": 935 |
| }, |
| { |
| "epoch": 4.087591240875913, |
| "grad_norm": 0.002727649873122573, |
| "learning_rate": 1.300395169609463e-06, |
| "loss": 0.0, |
| "step": 936 |
| }, |
| { |
| "epoch": 4.091970802919708, |
| "grad_norm": 0.0012481784215196967, |
| "learning_rate": 1.2949645276161149e-06, |
| "loss": 0.0, |
| "step": 937 |
| }, |
| { |
| "epoch": 4.096350364963504, |
| "grad_norm": 0.0026758962776511908, |
| "learning_rate": 1.2895412827118252e-06, |
| "loss": 0.0, |
| "step": 938 |
| }, |
| { |
| "epoch": 4.1007299270073, |
| "grad_norm": 0.0016837861621752381, |
| "learning_rate": 1.2841254681870869e-06, |
| "loss": 0.0, |
| "step": 939 |
| }, |
| { |
| "epoch": 4.105109489051095, |
| "grad_norm": 0.0034886333160102367, |
| "learning_rate": 1.2787171172867826e-06, |
| "loss": 0.0, |
| "step": 940 |
| }, |
| { |
| "epoch": 4.109489051094891, |
| "grad_norm": 0.001255241222679615, |
| "learning_rate": 1.273316263209979e-06, |
| "loss": 0.0, |
| "step": 941 |
| }, |
| { |
| "epoch": 4.113868613138687, |
| "grad_norm": 0.0037068205419927835, |
| "learning_rate": 1.2679229391097243e-06, |
| "loss": 0.0, |
| "step": 942 |
| }, |
| { |
| "epoch": 4.118248175182481, |
| "grad_norm": 0.0017508207820355892, |
| "learning_rate": 1.2625371780928428e-06, |
| "loss": 0.0, |
| "step": 943 |
| }, |
| { |
| "epoch": 4.122627737226277, |
| "grad_norm": 0.00240209954790771, |
| "learning_rate": 1.2571590132197344e-06, |
| "loss": 0.0, |
| "step": 944 |
| }, |
| { |
| "epoch": 4.127007299270073, |
| "grad_norm": 0.001833326299674809, |
| "learning_rate": 1.25178847750417e-06, |
| "loss": 0.0, |
| "step": 945 |
| }, |
| { |
| "epoch": 4.131386861313868, |
| "grad_norm": 0.001805648673325777, |
| "learning_rate": 1.2464256039130876e-06, |
| "loss": 0.0, |
| "step": 946 |
| }, |
| { |
| "epoch": 4.135766423357664, |
| "grad_norm": 0.000977763906121254, |
| "learning_rate": 1.2410704253663932e-06, |
| "loss": 0.0, |
| "step": 947 |
| }, |
| { |
| "epoch": 4.1401459854014595, |
| "grad_norm": 0.001024547847919166, |
| "learning_rate": 1.235722974736756e-06, |
| "loss": 0.0, |
| "step": 948 |
| }, |
| { |
| "epoch": 4.144525547445255, |
| "grad_norm": 0.00856176670640707, |
| "learning_rate": 1.2303832848494076e-06, |
| "loss": 0.0, |
| "step": 949 |
| }, |
| { |
| "epoch": 4.148905109489051, |
| "grad_norm": 0.0012515663402155042, |
| "learning_rate": 1.2250513884819403e-06, |
| "loss": 0.0, |
| "step": 950 |
| }, |
| { |
| "epoch": 4.153284671532846, |
| "grad_norm": 0.0014503548154607415, |
| "learning_rate": 1.2197273183641068e-06, |
| "loss": 0.0, |
| "step": 951 |
| }, |
| { |
| "epoch": 4.157664233576642, |
| "grad_norm": 0.0015921717276796699, |
| "learning_rate": 1.2144111071776174e-06, |
| "loss": 0.0, |
| "step": 952 |
| }, |
| { |
| "epoch": 4.162043795620438, |
| "grad_norm": 0.0009129928075708449, |
| "learning_rate": 1.2091027875559408e-06, |
| "loss": 0.0, |
| "step": 953 |
| }, |
| { |
| "epoch": 4.166423357664233, |
| "grad_norm": 0.03934188932180405, |
| "learning_rate": 1.203802392084104e-06, |
| "loss": 0.0001, |
| "step": 954 |
| }, |
| { |
| "epoch": 4.170802919708029, |
| "grad_norm": 0.003913360647857189, |
| "learning_rate": 1.1985099532984917e-06, |
| "loss": 0.0, |
| "step": 955 |
| }, |
| { |
| "epoch": 4.175182481751825, |
| "grad_norm": 0.0021251190919429064, |
| "learning_rate": 1.1932255036866457e-06, |
| "loss": 0.0, |
| "step": 956 |
| }, |
| { |
| "epoch": 4.17956204379562, |
| "grad_norm": 0.0021929247304797173, |
| "learning_rate": 1.1879490756870673e-06, |
| "loss": 0.0, |
| "step": 957 |
| }, |
| { |
| "epoch": 4.183941605839416, |
| "grad_norm": 0.0033020619302988052, |
| "learning_rate": 1.1826807016890172e-06, |
| "loss": 0.0, |
| "step": 958 |
| }, |
| { |
| "epoch": 4.1883211678832115, |
| "grad_norm": 0.0023623050656169653, |
| "learning_rate": 1.177420414032316e-06, |
| "loss": 0.0, |
| "step": 959 |
| }, |
| { |
| "epoch": 4.192700729927007, |
| "grad_norm": 0.001533759874291718, |
| "learning_rate": 1.1721682450071476e-06, |
| "loss": 0.0, |
| "step": 960 |
| }, |
| { |
| "epoch": 4.197080291970803, |
| "grad_norm": 0.004321869928389788, |
| "learning_rate": 1.1669242268538588e-06, |
| "loss": 0.0, |
| "step": 961 |
| }, |
| { |
| "epoch": 4.201459854014598, |
| "grad_norm": 0.0024329880252480507, |
| "learning_rate": 1.161688391762763e-06, |
| "loss": 0.0, |
| "step": 962 |
| }, |
| { |
| "epoch": 4.205839416058394, |
| "grad_norm": 0.0015800101682543755, |
| "learning_rate": 1.1564607718739419e-06, |
| "loss": 0.0, |
| "step": 963 |
| }, |
| { |
| "epoch": 4.21021897810219, |
| "grad_norm": 0.00098977901507169, |
| "learning_rate": 1.151241399277048e-06, |
| "loss": 0.0, |
| "step": 964 |
| }, |
| { |
| "epoch": 4.214598540145985, |
| "grad_norm": 0.0004697853000834584, |
| "learning_rate": 1.1460303060111084e-06, |
| "loss": 0.0, |
| "step": 965 |
| }, |
| { |
| "epoch": 4.218978102189781, |
| "grad_norm": 0.00686879875138402, |
| "learning_rate": 1.1408275240643273e-06, |
| "loss": 0.0, |
| "step": 966 |
| }, |
| { |
| "epoch": 4.223357664233577, |
| "grad_norm": 0.0036535828839987516, |
| "learning_rate": 1.1356330853738905e-06, |
| "loss": 0.0, |
| "step": 967 |
| }, |
| { |
| "epoch": 4.227737226277372, |
| "grad_norm": 0.004695031326264143, |
| "learning_rate": 1.1304470218257684e-06, |
| "loss": 0.0, |
| "step": 968 |
| }, |
| { |
| "epoch": 4.232116788321168, |
| "grad_norm": 0.000920642982237041, |
| "learning_rate": 1.1252693652545211e-06, |
| "loss": 0.0, |
| "step": 969 |
| }, |
| { |
| "epoch": 4.2364963503649635, |
| "grad_norm": 0.001294864108785987, |
| "learning_rate": 1.1201001474431023e-06, |
| "loss": 0.0, |
| "step": 970 |
| }, |
| { |
| "epoch": 4.240875912408759, |
| "grad_norm": 0.0016395091079175472, |
| "learning_rate": 1.114939400122664e-06, |
| "loss": 0.0, |
| "step": 971 |
| }, |
| { |
| "epoch": 4.245255474452555, |
| "grad_norm": 0.00123142811935395, |
| "learning_rate": 1.1097871549723628e-06, |
| "loss": 0.0, |
| "step": 972 |
| }, |
| { |
| "epoch": 4.24963503649635, |
| "grad_norm": 0.0019211244070902467, |
| "learning_rate": 1.104643443619167e-06, |
| "loss": 0.0, |
| "step": 973 |
| }, |
| { |
| "epoch": 4.254014598540146, |
| "grad_norm": 0.0006962378975003958, |
| "learning_rate": 1.0995082976376565e-06, |
| "loss": 0.0, |
| "step": 974 |
| }, |
| { |
| "epoch": 4.258394160583942, |
| "grad_norm": 0.003143315203487873, |
| "learning_rate": 1.094381748549835e-06, |
| "loss": 0.0, |
| "step": 975 |
| }, |
| { |
| "epoch": 4.262773722627737, |
| "grad_norm": 0.0016434816643595695, |
| "learning_rate": 1.089263827824934e-06, |
| "loss": 0.0, |
| "step": 976 |
| }, |
| { |
| "epoch": 4.267153284671533, |
| "grad_norm": 0.0007677828543819487, |
| "learning_rate": 1.08415456687922e-06, |
| "loss": 0.0, |
| "step": 977 |
| }, |
| { |
| "epoch": 4.271532846715329, |
| "grad_norm": 0.0011101458221673965, |
| "learning_rate": 1.079053997075801e-06, |
| "loss": 0.0, |
| "step": 978 |
| }, |
| { |
| "epoch": 4.275912408759124, |
| "grad_norm": 0.0017435256158933043, |
| "learning_rate": 1.0739621497244358e-06, |
| "loss": 0.0, |
| "step": 979 |
| }, |
| { |
| "epoch": 4.28029197080292, |
| "grad_norm": 0.0011316316667944193, |
| "learning_rate": 1.068879056081339e-06, |
| "loss": 0.0001, |
| "step": 980 |
| }, |
| { |
| "epoch": 4.2846715328467155, |
| "grad_norm": 0.04685957357287407, |
| "learning_rate": 1.063804747348992e-06, |
| "loss": 0.0, |
| "step": 981 |
| }, |
| { |
| "epoch": 4.289051094890511, |
| "grad_norm": 0.001629164908081293, |
| "learning_rate": 1.0587392546759499e-06, |
| "loss": 0.0, |
| "step": 982 |
| }, |
| { |
| "epoch": 4.293430656934307, |
| "grad_norm": 0.0011207193601876497, |
| "learning_rate": 1.05368260915665e-06, |
| "loss": 0.0, |
| "step": 983 |
| }, |
| { |
| "epoch": 4.2978102189781024, |
| "grad_norm": 0.0013356918934732676, |
| "learning_rate": 1.048634841831222e-06, |
| "loss": 0.0, |
| "step": 984 |
| }, |
| { |
| "epoch": 4.302189781021898, |
| "grad_norm": 0.0032078237272799015, |
| "learning_rate": 1.0435959836852969e-06, |
| "loss": 0.0, |
| "step": 985 |
| }, |
| { |
| "epoch": 4.306569343065694, |
| "grad_norm": 0.0007658989634364843, |
| "learning_rate": 1.038566065649817e-06, |
| "loss": 0.0, |
| "step": 986 |
| }, |
| { |
| "epoch": 4.310948905109489, |
| "grad_norm": 0.0034092357382178307, |
| "learning_rate": 1.0335451186008455e-06, |
| "loss": 0.0, |
| "step": 987 |
| }, |
| { |
| "epoch": 4.315328467153285, |
| "grad_norm": 0.0037303243298083544, |
| "learning_rate": 1.0285331733593778e-06, |
| "loss": 0.0, |
| "step": 988 |
| }, |
| { |
| "epoch": 4.319708029197081, |
| "grad_norm": 0.0008536482346244156, |
| "learning_rate": 1.0235302606911515e-06, |
| "loss": 0.0, |
| "step": 989 |
| }, |
| { |
| "epoch": 4.324087591240876, |
| "grad_norm": 0.0019189509330317378, |
| "learning_rate": 1.0185364113064577e-06, |
| "loss": 0.0, |
| "step": 990 |
| }, |
| { |
| "epoch": 4.328467153284672, |
| "grad_norm": 0.0015431938227266073, |
| "learning_rate": 1.0135516558599538e-06, |
| "loss": 0.0, |
| "step": 991 |
| }, |
| { |
| "epoch": 4.3328467153284675, |
| "grad_norm": 0.0006807130994275212, |
| "learning_rate": 1.0085760249504728e-06, |
| "loss": 0.0, |
| "step": 992 |
| }, |
| { |
| "epoch": 4.337226277372263, |
| "grad_norm": 0.0010325718903914094, |
| "learning_rate": 1.0036095491208378e-06, |
| "loss": 0.0, |
| "step": 993 |
| }, |
| { |
| "epoch": 4.341605839416058, |
| "grad_norm": 0.00216138968244195, |
| "learning_rate": 9.986522588576737e-07, |
| "loss": 0.0, |
| "step": 994 |
| }, |
| { |
| "epoch": 4.3459854014598545, |
| "grad_norm": 0.0012068209471181035, |
| "learning_rate": 9.937041845912187e-07, |
| "loss": 0.0, |
| "step": 995 |
| }, |
| { |
| "epoch": 4.350364963503649, |
| "grad_norm": 0.0013215959770604968, |
| "learning_rate": 9.887653566951405e-07, |
| "loss": 0.0, |
| "step": 996 |
| }, |
| { |
| "epoch": 4.354744525547445, |
| "grad_norm": 0.0018075024709105492, |
| "learning_rate": 9.83835805486347e-07, |
| "loss": 0.0, |
| "step": 997 |
| }, |
| { |
| "epoch": 4.3591240875912405, |
| "grad_norm": 0.0016379299340769649, |
| "learning_rate": 9.789155612248016e-07, |
| "loss": 0.0, |
| "step": 998 |
| }, |
| { |
| "epoch": 4.363503649635036, |
| "grad_norm": 0.001572887529619038, |
| "learning_rate": 9.74004654113337e-07, |
| "loss": 0.0, |
| "step": 999 |
| }, |
| { |
| "epoch": 4.367883211678832, |
| "grad_norm": 0.0007369730155915022, |
| "learning_rate": 9.691031142974708e-07, |
| "loss": 0.0, |
| "step": 1000 |
| }, |
| { |
| "epoch": 4.372262773722627, |
| "grad_norm": 0.0035350644029676914, |
| "learning_rate": 9.642109718652184e-07, |
| "loss": 0.0, |
| "step": 1001 |
| }, |
| { |
| "epoch": 4.376642335766423, |
| "grad_norm": 0.0012651537545025349, |
| "learning_rate": 9.5932825684691e-07, |
| "loss": 0.0, |
| "step": 1002 |
| }, |
| { |
| "epoch": 4.381021897810219, |
| "grad_norm": 0.001015955931507051, |
| "learning_rate": 9.544549992150065e-07, |
| "loss": 0.0, |
| "step": 1003 |
| }, |
| { |
| "epoch": 4.385401459854014, |
| "grad_norm": 0.005865906830877066, |
| "learning_rate": 9.495912288839134e-07, |
| "loss": 0.0, |
| "step": 1004 |
| }, |
| { |
| "epoch": 4.38978102189781, |
| "grad_norm": 0.001037195441313088, |
| "learning_rate": 9.447369757098002e-07, |
| "loss": 0.0, |
| "step": 1005 |
| }, |
| { |
| "epoch": 4.394160583941606, |
| "grad_norm": 0.0032373452559113503, |
| "learning_rate": 9.398922694904139e-07, |
| "loss": 0.0, |
| "step": 1006 |
| }, |
| { |
| "epoch": 4.398540145985401, |
| "grad_norm": 0.0006133022252470255, |
| "learning_rate": 9.350571399648989e-07, |
| "loss": 0.0, |
| "step": 1007 |
| }, |
| { |
| "epoch": 4.402919708029197, |
| "grad_norm": 0.00048491251072846353, |
| "learning_rate": 9.302316168136124e-07, |
| "loss": 0.0, |
| "step": 1008 |
| }, |
| { |
| "epoch": 4.4072992700729925, |
| "grad_norm": 0.004875269718468189, |
| "learning_rate": 9.254157296579438e-07, |
| "loss": 0.0, |
| "step": 1009 |
| }, |
| { |
| "epoch": 4.411678832116788, |
| "grad_norm": 0.0012481579324230552, |
| "learning_rate": 9.206095080601318e-07, |
| "loss": 0.0, |
| "step": 1010 |
| }, |
| { |
| "epoch": 4.416058394160584, |
| "grad_norm": 0.0010592174949124455, |
| "learning_rate": 9.158129815230826e-07, |
| "loss": 0.0, |
| "step": 1011 |
| }, |
| { |
| "epoch": 4.420437956204379, |
| "grad_norm": 0.002346326829865575, |
| "learning_rate": 9.110261794901904e-07, |
| "loss": 0.0001, |
| "step": 1012 |
| }, |
| { |
| "epoch": 4.424817518248175, |
| "grad_norm": 0.032931648194789886, |
| "learning_rate": 9.062491313451555e-07, |
| "loss": 0.0, |
| "step": 1013 |
| }, |
| { |
| "epoch": 4.429197080291971, |
| "grad_norm": 0.0009940835880115628, |
| "learning_rate": 9.014818664118033e-07, |
| "loss": 0.0, |
| "step": 1014 |
| }, |
| { |
| "epoch": 4.433576642335766, |
| "grad_norm": 0.0009703072137199342, |
| "learning_rate": 8.967244139539063e-07, |
| "loss": 0.0, |
| "step": 1015 |
| }, |
| { |
| "epoch": 4.437956204379562, |
| "grad_norm": 0.0008442693506367505, |
| "learning_rate": 8.919768031750025e-07, |
| "loss": 0.0, |
| "step": 1016 |
| }, |
| { |
| "epoch": 4.442335766423358, |
| "grad_norm": 0.0013341221492737532, |
| "learning_rate": 8.872390632182176e-07, |
| "loss": 0.0, |
| "step": 1017 |
| }, |
| { |
| "epoch": 4.446715328467153, |
| "grad_norm": 0.0010671225609257817, |
| "learning_rate": 8.825112231660838e-07, |
| "loss": 0.0, |
| "step": 1018 |
| }, |
| { |
| "epoch": 4.451094890510949, |
| "grad_norm": 0.0017634209943935275, |
| "learning_rate": 8.777933120403656e-07, |
| "loss": 0.0, |
| "step": 1019 |
| }, |
| { |
| "epoch": 4.4554744525547445, |
| "grad_norm": 0.0034736371599137783, |
| "learning_rate": 8.730853588018773e-07, |
| "loss": 0.0, |
| "step": 1020 |
| }, |
| { |
| "epoch": 4.45985401459854, |
| "grad_norm": 0.0010752341477200389, |
| "learning_rate": 8.683873923503064e-07, |
| "loss": 0.0, |
| "step": 1021 |
| }, |
| { |
| "epoch": 4.464233576642336, |
| "grad_norm": 0.0011095002992078662, |
| "learning_rate": 8.636994415240377e-07, |
| "loss": 0.0, |
| "step": 1022 |
| }, |
| { |
| "epoch": 4.468613138686131, |
| "grad_norm": 0.0019367806380614638, |
| "learning_rate": 8.59021535099975e-07, |
| "loss": 0.0, |
| "step": 1023 |
| }, |
| { |
| "epoch": 4.472992700729927, |
| "grad_norm": 0.0009969058446586132, |
| "learning_rate": 8.543537017933642e-07, |
| "loss": 0.0, |
| "step": 1024 |
| }, |
| { |
| "epoch": 4.477372262773723, |
| "grad_norm": 0.0006511294632218778, |
| "learning_rate": 8.496959702576188e-07, |
| "loss": 0.0, |
| "step": 1025 |
| }, |
| { |
| "epoch": 4.481751824817518, |
| "grad_norm": 0.00075997767271474, |
| "learning_rate": 8.45048369084141e-07, |
| "loss": 0.0, |
| "step": 1026 |
| }, |
| { |
| "epoch": 4.486131386861314, |
| "grad_norm": 0.0008069901959970593, |
| "learning_rate": 8.404109268021493e-07, |
| "loss": 0.0, |
| "step": 1027 |
| }, |
| { |
| "epoch": 4.49051094890511, |
| "grad_norm": 0.03889809548854828, |
| "learning_rate": 8.357836718785018e-07, |
| "loss": 0.0001, |
| "step": 1028 |
| }, |
| { |
| "epoch": 4.494890510948905, |
| "grad_norm": 0.001798073179088533, |
| "learning_rate": 8.31166632717521e-07, |
| "loss": 0.0, |
| "step": 1029 |
| }, |
| { |
| "epoch": 4.499270072992701, |
| "grad_norm": 0.0023565480951219797, |
| "learning_rate": 8.265598376608211e-07, |
| "loss": 0.0, |
| "step": 1030 |
| }, |
| { |
| "epoch": 4.5036496350364965, |
| "grad_norm": 0.0009423934388905764, |
| "learning_rate": 8.219633149871322e-07, |
| "loss": 0.0, |
| "step": 1031 |
| }, |
| { |
| "epoch": 4.508029197080292, |
| "grad_norm": 0.001052824198268354, |
| "learning_rate": 8.17377092912128e-07, |
| "loss": 0.0001, |
| "step": 1032 |
| }, |
| { |
| "epoch": 4.512408759124088, |
| "grad_norm": 0.03564167395234108, |
| "learning_rate": 8.128011995882531e-07, |
| "loss": 0.0, |
| "step": 1033 |
| }, |
| { |
| "epoch": 4.516788321167883, |
| "grad_norm": 0.002029201714321971, |
| "learning_rate": 8.082356631045462e-07, |
| "loss": 0.0, |
| "step": 1034 |
| }, |
| { |
| "epoch": 4.521167883211679, |
| "grad_norm": 0.0014048311859369278, |
| "learning_rate": 8.036805114864737e-07, |
| "loss": 0.0, |
| "step": 1035 |
| }, |
| { |
| "epoch": 4.525547445255475, |
| "grad_norm": 0.0015611151466146111, |
| "learning_rate": 7.991357726957541e-07, |
| "loss": 0.0, |
| "step": 1036 |
| }, |
| { |
| "epoch": 4.52992700729927, |
| "grad_norm": 0.0011675754794850945, |
| "learning_rate": 7.946014746301859e-07, |
| "loss": 0.0, |
| "step": 1037 |
| }, |
| { |
| "epoch": 4.534306569343066, |
| "grad_norm": 0.0007950099534355104, |
| "learning_rate": 7.900776451234784e-07, |
| "loss": 0.0, |
| "step": 1038 |
| }, |
| { |
| "epoch": 4.538686131386862, |
| "grad_norm": 0.010591431520879269, |
| "learning_rate": 7.855643119450795e-07, |
| "loss": 0.0, |
| "step": 1039 |
| }, |
| { |
| "epoch": 4.543065693430657, |
| "grad_norm": 0.0014281709445640445, |
| "learning_rate": 7.810615028000045e-07, |
| "loss": 0.0, |
| "step": 1040 |
| }, |
| { |
| "epoch": 4.547445255474453, |
| "grad_norm": 0.001287741237320006, |
| "learning_rate": 7.7656924532867e-07, |
| "loss": 0.0, |
| "step": 1041 |
| }, |
| { |
| "epoch": 4.5518248175182485, |
| "grad_norm": 0.0012502416502684355, |
| "learning_rate": 7.720875671067188e-07, |
| "loss": 0.0, |
| "step": 1042 |
| }, |
| { |
| "epoch": 4.556204379562044, |
| "grad_norm": 0.028069162741303444, |
| "learning_rate": 7.676164956448534e-07, |
| "loss": 0.0, |
| "step": 1043 |
| }, |
| { |
| "epoch": 4.56058394160584, |
| "grad_norm": 0.0010672607459127903, |
| "learning_rate": 7.63156058388668e-07, |
| "loss": 0.0, |
| "step": 1044 |
| }, |
| { |
| "epoch": 4.5649635036496345, |
| "grad_norm": 0.0010177737567573786, |
| "learning_rate": 7.587062827184777e-07, |
| "loss": 0.0, |
| "step": 1045 |
| }, |
| { |
| "epoch": 4.569343065693431, |
| "grad_norm": 0.0007165819988586009, |
| "learning_rate": 7.542671959491523e-07, |
| "loss": 0.0, |
| "step": 1046 |
| }, |
| { |
| "epoch": 4.573722627737226, |
| "grad_norm": 0.00132820475846529, |
| "learning_rate": 7.498388253299483e-07, |
| "loss": 0.0001, |
| "step": 1047 |
| }, |
| { |
| "epoch": 4.578102189781022, |
| "grad_norm": 0.03435146063566208, |
| "learning_rate": 7.454211980443404e-07, |
| "loss": 0.0, |
| "step": 1048 |
| }, |
| { |
| "epoch": 4.582481751824817, |
| "grad_norm": 0.0011477185180410743, |
| "learning_rate": 7.410143412098561e-07, |
| "loss": 0.0, |
| "step": 1049 |
| }, |
| { |
| "epoch": 4.586861313868614, |
| "grad_norm": 0.0019768939819186926, |
| "learning_rate": 7.366182818779089e-07, |
| "loss": 0.0, |
| "step": 1050 |
| }, |
| { |
| "epoch": 4.591240875912408, |
| "grad_norm": 0.0011894172057509422, |
| "learning_rate": 7.322330470336314e-07, |
| "loss": 0.0, |
| "step": 1051 |
| }, |
| { |
| "epoch": 4.595620437956204, |
| "grad_norm": 0.0013547460548579693, |
| "learning_rate": 7.278586635957108e-07, |
| "loss": 0.0, |
| "step": 1052 |
| }, |
| { |
| "epoch": 4.6, |
| "grad_norm": 0.000404223712394014, |
| "learning_rate": 7.234951584162225e-07, |
| "loss": 0.0, |
| "step": 1053 |
| }, |
| { |
| "epoch": 4.604379562043795, |
| "grad_norm": 0.0006415692623704672, |
| "learning_rate": 7.191425582804667e-07, |
| "loss": 0.0, |
| "step": 1054 |
| }, |
| { |
| "epoch": 4.608759124087591, |
| "grad_norm": 0.0017907213186845183, |
| "learning_rate": 7.148008899068029e-07, |
| "loss": 0.0, |
| "step": 1055 |
| }, |
| { |
| "epoch": 4.613138686131387, |
| "grad_norm": 0.0004747869388666004, |
| "learning_rate": 7.104701799464856e-07, |
| "loss": 0.0, |
| "step": 1056 |
| }, |
| { |
| "epoch": 4.617518248175182, |
| "grad_norm": 0.0015843930887058377, |
| "learning_rate": 7.061504549835022e-07, |
| "loss": 0.0, |
| "step": 1057 |
| }, |
| { |
| "epoch": 4.621897810218978, |
| "grad_norm": 0.0016891838749870658, |
| "learning_rate": 7.018417415344081e-07, |
| "loss": 0.0, |
| "step": 1058 |
| }, |
| { |
| "epoch": 4.6262773722627735, |
| "grad_norm": 0.0012868677731603384, |
| "learning_rate": 6.975440660481652e-07, |
| "loss": 0.0, |
| "step": 1059 |
| }, |
| { |
| "epoch": 4.630656934306569, |
| "grad_norm": 0.0010564913973212242, |
| "learning_rate": 6.93257454905979e-07, |
| "loss": 0.0, |
| "step": 1060 |
| }, |
| { |
| "epoch": 4.635036496350365, |
| "grad_norm": 0.0006765465950593352, |
| "learning_rate": 6.889819344211365e-07, |
| "loss": 0.0, |
| "step": 1061 |
| }, |
| { |
| "epoch": 4.63941605839416, |
| "grad_norm": 0.0005323990480974317, |
| "learning_rate": 6.847175308388451e-07, |
| "loss": 0.0, |
| "step": 1062 |
| }, |
| { |
| "epoch": 4.643795620437956, |
| "grad_norm": 0.0019071334972977638, |
| "learning_rate": 6.8046427033607e-07, |
| "loss": 0.0, |
| "step": 1063 |
| }, |
| { |
| "epoch": 4.648175182481752, |
| "grad_norm": 0.019145332276821136, |
| "learning_rate": 6.762221790213782e-07, |
| "loss": 0.0, |
| "step": 1064 |
| }, |
| { |
| "epoch": 4.652554744525547, |
| "grad_norm": 0.0018211206188425422, |
| "learning_rate": 6.719912829347716e-07, |
| "loss": 0.0, |
| "step": 1065 |
| }, |
| { |
| "epoch": 4.656934306569343, |
| "grad_norm": 0.0008773288573138416, |
| "learning_rate": 6.677716080475314e-07, |
| "loss": 0.0, |
| "step": 1066 |
| }, |
| { |
| "epoch": 4.661313868613139, |
| "grad_norm": 0.0015226315008476377, |
| "learning_rate": 6.635631802620576e-07, |
| "loss": 0.0, |
| "step": 1067 |
| }, |
| { |
| "epoch": 4.665693430656934, |
| "grad_norm": 0.00191792962141335, |
| "learning_rate": 6.593660254117104e-07, |
| "loss": 0.0, |
| "step": 1068 |
| }, |
| { |
| "epoch": 4.67007299270073, |
| "grad_norm": 0.0018758567748591304, |
| "learning_rate": 6.55180169260651e-07, |
| "loss": 0.0, |
| "step": 1069 |
| }, |
| { |
| "epoch": 4.6744525547445255, |
| "grad_norm": 0.002108269138261676, |
| "learning_rate": 6.510056375036841e-07, |
| "loss": 0.0, |
| "step": 1070 |
| }, |
| { |
| "epoch": 4.678832116788321, |
| "grad_norm": 0.0009355146321468055, |
| "learning_rate": 6.468424557660988e-07, |
| "loss": 0.0, |
| "step": 1071 |
| }, |
| { |
| "epoch": 4.683211678832117, |
| "grad_norm": 0.0013250872725620866, |
| "learning_rate": 6.42690649603513e-07, |
| "loss": 0.0, |
| "step": 1072 |
| }, |
| { |
| "epoch": 4.687591240875912, |
| "grad_norm": 0.0017062638653442264, |
| "learning_rate": 6.385502445017161e-07, |
| "loss": 0.0, |
| "step": 1073 |
| }, |
| { |
| "epoch": 4.691970802919708, |
| "grad_norm": 0.00145150872413069, |
| "learning_rate": 6.34421265876512e-07, |
| "loss": 0.0, |
| "step": 1074 |
| }, |
| { |
| "epoch": 4.696350364963504, |
| "grad_norm": 0.0006055146222934127, |
| "learning_rate": 6.303037390735634e-07, |
| "loss": 0.0, |
| "step": 1075 |
| }, |
| { |
| "epoch": 4.700729927007299, |
| "grad_norm": 0.0014130697818472981, |
| "learning_rate": 6.261976893682362e-07, |
| "loss": 0.0, |
| "step": 1076 |
| }, |
| { |
| "epoch": 4.705109489051095, |
| "grad_norm": 0.002536613028496504, |
| "learning_rate": 6.221031419654445e-07, |
| "loss": 0.0, |
| "step": 1077 |
| }, |
| { |
| "epoch": 4.709489051094891, |
| "grad_norm": 0.02027207612991333, |
| "learning_rate": 6.18020121999495e-07, |
| "loss": 0.0, |
| "step": 1078 |
| }, |
| { |
| "epoch": 4.713868613138686, |
| "grad_norm": 0.0010621851542964578, |
| "learning_rate": 6.139486545339346e-07, |
| "loss": 0.0, |
| "step": 1079 |
| }, |
| { |
| "epoch": 4.718248175182482, |
| "grad_norm": 0.0038213583175092936, |
| "learning_rate": 6.098887645613944e-07, |
| "loss": 0.0, |
| "step": 1080 |
| }, |
| { |
| "epoch": 4.7226277372262775, |
| "grad_norm": 0.0009431196376681328, |
| "learning_rate": 6.058404770034373e-07, |
| "loss": 0.0, |
| "step": 1081 |
| }, |
| { |
| "epoch": 4.727007299270073, |
| "grad_norm": 0.013071099296212196, |
| "learning_rate": 6.01803816710406e-07, |
| "loss": 0.0, |
| "step": 1082 |
| }, |
| { |
| "epoch": 4.731386861313869, |
| "grad_norm": 0.0007845235522836447, |
| "learning_rate": 5.977788084612684e-07, |
| "loss": 0.0, |
| "step": 1083 |
| }, |
| { |
| "epoch": 4.735766423357664, |
| "grad_norm": 0.0016089725540950894, |
| "learning_rate": 5.937654769634666e-07, |
| "loss": 0.0, |
| "step": 1084 |
| }, |
| { |
| "epoch": 4.74014598540146, |
| "grad_norm": 0.0016838692827150226, |
| "learning_rate": 5.897638468527652e-07, |
| "loss": 0.0, |
| "step": 1085 |
| }, |
| { |
| "epoch": 4.744525547445256, |
| "grad_norm": 0.004209183622151613, |
| "learning_rate": 5.857739426931014e-07, |
| "loss": 0.0, |
| "step": 1086 |
| }, |
| { |
| "epoch": 4.748905109489051, |
| "grad_norm": 0.001283829566091299, |
| "learning_rate": 5.817957889764308e-07, |
| "loss": 0.0, |
| "step": 1087 |
| }, |
| { |
| "epoch": 4.753284671532847, |
| "grad_norm": 0.0008760584751144052, |
| "learning_rate": 5.778294101225807e-07, |
| "loss": 0.0, |
| "step": 1088 |
| }, |
| { |
| "epoch": 4.757664233576643, |
| "grad_norm": 0.0006950758979655802, |
| "learning_rate": 5.738748304790973e-07, |
| "loss": 0.0, |
| "step": 1089 |
| }, |
| { |
| "epoch": 4.762043795620438, |
| "grad_norm": 0.01165073737502098, |
| "learning_rate": 5.699320743210985e-07, |
| "loss": 0.0, |
| "step": 1090 |
| }, |
| { |
| "epoch": 4.766423357664234, |
| "grad_norm": 0.0010371040552854538, |
| "learning_rate": 5.660011658511235e-07, |
| "loss": 0.0, |
| "step": 1091 |
| }, |
| { |
| "epoch": 4.7708029197080295, |
| "grad_norm": 0.045324090868234634, |
| "learning_rate": 5.62082129198985e-07, |
| "loss": 0.0001, |
| "step": 1092 |
| }, |
| { |
| "epoch": 4.775182481751825, |
| "grad_norm": 0.0012658090563490987, |
| "learning_rate": 5.581749884216206e-07, |
| "loss": 0.0, |
| "step": 1093 |
| }, |
| { |
| "epoch": 4.779562043795621, |
| "grad_norm": 0.0010206351289525628, |
| "learning_rate": 5.542797675029452e-07, |
| "loss": 0.0, |
| "step": 1094 |
| }, |
| { |
| "epoch": 4.783941605839416, |
| "grad_norm": 0.0007952626910991967, |
| "learning_rate": 5.503964903537037e-07, |
| "loss": 0.0, |
| "step": 1095 |
| }, |
| { |
| "epoch": 4.788321167883212, |
| "grad_norm": 0.0005661824834533036, |
| "learning_rate": 5.465251808113247e-07, |
| "loss": 0.0, |
| "step": 1096 |
| }, |
| { |
| "epoch": 4.792700729927008, |
| "grad_norm": 0.0009383410215377808, |
| "learning_rate": 5.426658626397741e-07, |
| "loss": 0.0, |
| "step": 1097 |
| }, |
| { |
| "epoch": 4.797080291970802, |
| "grad_norm": 0.0022159917280077934, |
| "learning_rate": 5.388185595294082e-07, |
| "loss": 0.0, |
| "step": 1098 |
| }, |
| { |
| "epoch": 4.801459854014599, |
| "grad_norm": 0.0009989173850044608, |
| "learning_rate": 5.349832950968298e-07, |
| "loss": 0.0, |
| "step": 1099 |
| }, |
| { |
| "epoch": 4.805839416058394, |
| "grad_norm": 0.0011754210572689772, |
| "learning_rate": 5.311600928847419e-07, |
| "loss": 0.0, |
| "step": 1100 |
| }, |
| { |
| "epoch": 4.81021897810219, |
| "grad_norm": 0.00188217475079, |
| "learning_rate": 5.273489763618042e-07, |
| "loss": 0.0, |
| "step": 1101 |
| }, |
| { |
| "epoch": 4.814598540145985, |
| "grad_norm": 0.0011118212714791298, |
| "learning_rate": 5.235499689224885e-07, |
| "loss": 0.0, |
| "step": 1102 |
| }, |
| { |
| "epoch": 4.8189781021897815, |
| "grad_norm": 0.0011680370662361383, |
| "learning_rate": 5.197630938869352e-07, |
| "loss": 0.0, |
| "step": 1103 |
| }, |
| { |
| "epoch": 4.823357664233576, |
| "grad_norm": 0.0036738826893270016, |
| "learning_rate": 5.159883745008098e-07, |
| "loss": 0.0, |
| "step": 1104 |
| }, |
| { |
| "epoch": 4.827737226277372, |
| "grad_norm": 0.0012279701186344028, |
| "learning_rate": 5.122258339351616e-07, |
| "loss": 0.0, |
| "step": 1105 |
| }, |
| { |
| "epoch": 4.8321167883211675, |
| "grad_norm": 0.0037098790053278208, |
| "learning_rate": 5.084754952862788e-07, |
| "loss": 0.0, |
| "step": 1106 |
| }, |
| { |
| "epoch": 4.836496350364963, |
| "grad_norm": 0.0007585727726109326, |
| "learning_rate": 5.047373815755497e-07, |
| "loss": 0.0001, |
| "step": 1107 |
| }, |
| { |
| "epoch": 4.840875912408759, |
| "grad_norm": 0.04299236834049225, |
| "learning_rate": 5.010115157493198e-07, |
| "loss": 0.0, |
| "step": 1108 |
| }, |
| { |
| "epoch": 4.8452554744525544, |
| "grad_norm": 0.0015128895174711943, |
| "learning_rate": 4.972979206787503e-07, |
| "loss": 0.0, |
| "step": 1109 |
| }, |
| { |
| "epoch": 4.84963503649635, |
| "grad_norm": 0.0028542019426822662, |
| "learning_rate": 4.935966191596799e-07, |
| "loss": 0.0, |
| "step": 1110 |
| }, |
| { |
| "epoch": 4.854014598540146, |
| "grad_norm": 0.0013797401916235685, |
| "learning_rate": 4.899076339124828e-07, |
| "loss": 0.0, |
| "step": 1111 |
| }, |
| { |
| "epoch": 4.858394160583941, |
| "grad_norm": 0.0017670565284788609, |
| "learning_rate": 4.862309875819299e-07, |
| "loss": 0.0, |
| "step": 1112 |
| }, |
| { |
| "epoch": 4.862773722627737, |
| "grad_norm": 0.0010623923735693097, |
| "learning_rate": 4.825667027370498e-07, |
| "loss": 0.0, |
| "step": 1113 |
| }, |
| { |
| "epoch": 4.867153284671533, |
| "grad_norm": 0.0031603036914020777, |
| "learning_rate": 4.78914801870991e-07, |
| "loss": 0.0, |
| "step": 1114 |
| }, |
| { |
| "epoch": 4.871532846715328, |
| "grad_norm": 0.0019082998624071479, |
| "learning_rate": 4.7527530740088256e-07, |
| "loss": 0.0, |
| "step": 1115 |
| }, |
| { |
| "epoch": 4.875912408759124, |
| "grad_norm": 0.02687995322048664, |
| "learning_rate": 4.716482416676974e-07, |
| "loss": 0.0, |
| "step": 1116 |
| }, |
| { |
| "epoch": 4.8802919708029195, |
| "grad_norm": 0.0006433060625568032, |
| "learning_rate": 4.680336269361147e-07, |
| "loss": 0.0, |
| "step": 1117 |
| }, |
| { |
| "epoch": 4.884671532846715, |
| "grad_norm": 0.0005254243151284754, |
| "learning_rate": 4.644314853943835e-07, |
| "loss": 0.0, |
| "step": 1118 |
| }, |
| { |
| "epoch": 4.889051094890511, |
| "grad_norm": 0.002319559222087264, |
| "learning_rate": 4.608418391541861e-07, |
| "loss": 0.0, |
| "step": 1119 |
| }, |
| { |
| "epoch": 4.8934306569343065, |
| "grad_norm": 0.0019121607765555382, |
| "learning_rate": 4.572647102505029e-07, |
| "loss": 0.0, |
| "step": 1120 |
| }, |
| { |
| "epoch": 4.897810218978102, |
| "grad_norm": 0.00158763921353966, |
| "learning_rate": 4.5370012064147706e-07, |
| "loss": 0.0, |
| "step": 1121 |
| }, |
| { |
| "epoch": 4.902189781021898, |
| "grad_norm": 0.0010019956389442086, |
| "learning_rate": 4.501480922082788e-07, |
| "loss": 0.0, |
| "step": 1122 |
| }, |
| { |
| "epoch": 4.906569343065693, |
| "grad_norm": 0.002621067687869072, |
| "learning_rate": 4.466086467549724e-07, |
| "loss": 0.0, |
| "step": 1123 |
| }, |
| { |
| "epoch": 4.910948905109489, |
| "grad_norm": 0.0012446879409253597, |
| "learning_rate": 4.430818060083816e-07, |
| "loss": 0.0, |
| "step": 1124 |
| }, |
| { |
| "epoch": 4.915328467153285, |
| "grad_norm": 0.005736443214118481, |
| "learning_rate": 4.395675916179562e-07, |
| "loss": 0.0, |
| "step": 1125 |
| }, |
| { |
| "epoch": 4.91970802919708, |
| "grad_norm": 0.000912757939659059, |
| "learning_rate": 4.360660251556395e-07, |
| "loss": 0.0, |
| "step": 1126 |
| }, |
| { |
| "epoch": 4.924087591240876, |
| "grad_norm": 0.0015029318165034056, |
| "learning_rate": 4.3257712811573565e-07, |
| "loss": 0.0, |
| "step": 1127 |
| }, |
| { |
| "epoch": 4.9284671532846716, |
| "grad_norm": 0.0006483580800704658, |
| "learning_rate": 4.2910092191477764e-07, |
| "loss": 0.0, |
| "step": 1128 |
| }, |
| { |
| "epoch": 4.932846715328467, |
| "grad_norm": 0.0012757584918290377, |
| "learning_rate": 4.256374278913963e-07, |
| "loss": 0.0, |
| "step": 1129 |
| }, |
| { |
| "epoch": 4.937226277372263, |
| "grad_norm": 0.0012854667147621512, |
| "learning_rate": 4.221866673061889e-07, |
| "loss": 0.0, |
| "step": 1130 |
| }, |
| { |
| "epoch": 4.9416058394160585, |
| "grad_norm": 0.0008587121847085655, |
| "learning_rate": 4.187486613415878e-07, |
| "loss": 0.0, |
| "step": 1131 |
| }, |
| { |
| "epoch": 4.945985401459854, |
| "grad_norm": 0.0009980755858123302, |
| "learning_rate": 4.1532343110173315e-07, |
| "loss": 0.0, |
| "step": 1132 |
| }, |
| { |
| "epoch": 4.95036496350365, |
| "grad_norm": 0.000794942257925868, |
| "learning_rate": 4.1191099761234034e-07, |
| "loss": 0.0, |
| "step": 1133 |
| }, |
| { |
| "epoch": 4.954744525547445, |
| "grad_norm": 0.0011009223526343703, |
| "learning_rate": 4.085113818205724e-07, |
| "loss": 0.0, |
| "step": 1134 |
| }, |
| { |
| "epoch": 4.959124087591241, |
| "grad_norm": 0.000590562995057553, |
| "learning_rate": 4.051246045949106e-07, |
| "loss": 0.0, |
| "step": 1135 |
| }, |
| { |
| "epoch": 4.963503649635037, |
| "grad_norm": 0.0022269401233643293, |
| "learning_rate": 4.0175068672502786e-07, |
| "loss": 0.0, |
| "step": 1136 |
| }, |
| { |
| "epoch": 4.967883211678832, |
| "grad_norm": 0.000957436568569392, |
| "learning_rate": 3.983896489216596e-07, |
| "loss": 0.0, |
| "step": 1137 |
| }, |
| { |
| "epoch": 4.972262773722628, |
| "grad_norm": 0.0010053800651803613, |
| "learning_rate": 3.9504151181647687e-07, |
| "loss": 0.0, |
| "step": 1138 |
| }, |
| { |
| "epoch": 4.976642335766424, |
| "grad_norm": 0.006390800233930349, |
| "learning_rate": 3.917062959619611e-07, |
| "loss": 0.0, |
| "step": 1139 |
| }, |
| { |
| "epoch": 4.981021897810219, |
| "grad_norm": 0.0010258725378662348, |
| "learning_rate": 3.8838402183127566e-07, |
| "loss": 0.0, |
| "step": 1140 |
| }, |
| { |
| "epoch": 4.985401459854015, |
| "grad_norm": 0.0006171112763695419, |
| "learning_rate": 3.8507470981814206e-07, |
| "loss": 0.0, |
| "step": 1141 |
| }, |
| { |
| "epoch": 4.9897810218978105, |
| "grad_norm": 0.0014847249258309603, |
| "learning_rate": 3.8177838023671365e-07, |
| "loss": 0.0, |
| "step": 1142 |
| }, |
| { |
| "epoch": 4.994160583941606, |
| "grad_norm": 0.002058738609775901, |
| "learning_rate": 3.784950533214521e-07, |
| "loss": 0.0, |
| "step": 1143 |
| }, |
| { |
| "epoch": 4.998540145985402, |
| "grad_norm": 0.01912631094455719, |
| "learning_rate": 3.7522474922700167e-07, |
| "loss": 0.0, |
| "step": 1144 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.01912631094455719, |
| "learning_rate": 3.7196748802806634e-07, |
| "loss": 0.0, |
| "step": 1145 |
| }, |
| { |
| "epoch": 5.004379562043796, |
| "grad_norm": 0.011355338618159294, |
| "learning_rate": 3.6872328971928715e-07, |
| "loss": 0.0, |
| "step": 1146 |
| }, |
| { |
| "epoch": 5.008759124087591, |
| "grad_norm": 0.0012391695054247975, |
| "learning_rate": 3.6549217421511796e-07, |
| "loss": 0.0, |
| "step": 1147 |
| }, |
| { |
| "epoch": 5.013138686131387, |
| "grad_norm": 0.001630589715205133, |
| "learning_rate": 3.622741613497047e-07, |
| "loss": 0.0, |
| "step": 1148 |
| }, |
| { |
| "epoch": 5.0175182481751825, |
| "grad_norm": 0.008778959512710571, |
| "learning_rate": 3.5906927087676264e-07, |
| "loss": 0.0, |
| "step": 1149 |
| }, |
| { |
| "epoch": 5.021897810218978, |
| "grad_norm": 0.0007508657872676849, |
| "learning_rate": 3.558775224694558e-07, |
| "loss": 0.0, |
| "step": 1150 |
| }, |
| { |
| "epoch": 5.026277372262774, |
| "grad_norm": 0.03633660823106766, |
| "learning_rate": 3.5269893572027566e-07, |
| "loss": 0.0001, |
| "step": 1151 |
| }, |
| { |
| "epoch": 5.0306569343065695, |
| "grad_norm": 0.0021347696892917156, |
| "learning_rate": 3.495335301409206e-07, |
| "loss": 0.0, |
| "step": 1152 |
| }, |
| { |
| "epoch": 5.035036496350365, |
| "grad_norm": 0.001819720957428217, |
| "learning_rate": 3.4638132516217753e-07, |
| "loss": 0.0, |
| "step": 1153 |
| }, |
| { |
| "epoch": 5.039416058394161, |
| "grad_norm": 0.001548740197904408, |
| "learning_rate": 3.432423401338014e-07, |
| "loss": 0.0, |
| "step": 1154 |
| }, |
| { |
| "epoch": 5.043795620437956, |
| "grad_norm": 0.00336814159527421, |
| "learning_rate": 3.401165943243964e-07, |
| "loss": 0.0, |
| "step": 1155 |
| }, |
| { |
| "epoch": 5.048175182481752, |
| "grad_norm": 0.0037251547910273075, |
| "learning_rate": 3.3700410692129813e-07, |
| "loss": 0.0, |
| "step": 1156 |
| }, |
| { |
| "epoch": 5.052554744525548, |
| "grad_norm": 0.0006045170594006777, |
| "learning_rate": 3.3390489703045594e-07, |
| "loss": 0.0, |
| "step": 1157 |
| }, |
| { |
| "epoch": 5.056934306569343, |
| "grad_norm": 0.0013149244477972388, |
| "learning_rate": 3.308189836763151e-07, |
| "loss": 0.0, |
| "step": 1158 |
| }, |
| { |
| "epoch": 5.061313868613139, |
| "grad_norm": 0.0015525494236499071, |
| "learning_rate": 3.2774638580170077e-07, |
| "loss": 0.0, |
| "step": 1159 |
| }, |
| { |
| "epoch": 5.065693430656935, |
| "grad_norm": 0.0024911444634199142, |
| "learning_rate": 3.246871222677006e-07, |
| "loss": 0.0, |
| "step": 1160 |
| }, |
| { |
| "epoch": 5.07007299270073, |
| "grad_norm": 0.0009148967801593244, |
| "learning_rate": 3.216412118535503e-07, |
| "loss": 0.0, |
| "step": 1161 |
| }, |
| { |
| "epoch": 5.074452554744526, |
| "grad_norm": 0.0009300839155912399, |
| "learning_rate": 3.186086732565172e-07, |
| "loss": 0.0, |
| "step": 1162 |
| }, |
| { |
| "epoch": 5.0788321167883215, |
| "grad_norm": 0.0013003420317545533, |
| "learning_rate": 3.155895250917862e-07, |
| "loss": 0.0, |
| "step": 1163 |
| }, |
| { |
| "epoch": 5.083211678832117, |
| "grad_norm": 0.001971049001440406, |
| "learning_rate": 3.1258378589234527e-07, |
| "loss": 0.0, |
| "step": 1164 |
| }, |
| { |
| "epoch": 5.087591240875913, |
| "grad_norm": 0.004845777060836554, |
| "learning_rate": 3.0959147410887154e-07, |
| "loss": 0.0, |
| "step": 1165 |
| }, |
| { |
| "epoch": 5.091970802919708, |
| "grad_norm": 0.0011784955859184265, |
| "learning_rate": 3.066126081096185e-07, |
| "loss": 0.0, |
| "step": 1166 |
| }, |
| { |
| "epoch": 5.096350364963504, |
| "grad_norm": 0.0011790624121204019, |
| "learning_rate": 3.0364720618030253e-07, |
| "loss": 0.0, |
| "step": 1167 |
| }, |
| { |
| "epoch": 5.1007299270073, |
| "grad_norm": 0.0020030320156365633, |
| "learning_rate": 3.006952865239912e-07, |
| "loss": 0.0, |
| "step": 1168 |
| }, |
| { |
| "epoch": 5.105109489051095, |
| "grad_norm": 0.0009937508730217814, |
| "learning_rate": 2.977568672609915e-07, |
| "loss": 0.0, |
| "step": 1169 |
| }, |
| { |
| "epoch": 5.109489051094891, |
| "grad_norm": 0.0011130995117127895, |
| "learning_rate": 2.948319664287383e-07, |
| "loss": 0.0, |
| "step": 1170 |
| }, |
| { |
| "epoch": 5.113868613138687, |
| "grad_norm": 0.0005643954500555992, |
| "learning_rate": 2.919206019816842e-07, |
| "loss": 0.0, |
| "step": 1171 |
| }, |
| { |
| "epoch": 5.118248175182481, |
| "grad_norm": 0.001304757664911449, |
| "learning_rate": 2.8902279179118837e-07, |
| "loss": 0.0, |
| "step": 1172 |
| }, |
| { |
| "epoch": 5.122627737226277, |
| "grad_norm": 0.0014303921489045024, |
| "learning_rate": 2.861385536454078e-07, |
| "loss": 0.0, |
| "step": 1173 |
| }, |
| { |
| "epoch": 5.127007299270073, |
| "grad_norm": 0.0007192412158474326, |
| "learning_rate": 2.8326790524918766e-07, |
| "loss": 0.0, |
| "step": 1174 |
| }, |
| { |
| "epoch": 5.131386861313868, |
| "grad_norm": 0.0014316333690658212, |
| "learning_rate": 2.804108642239525e-07, |
| "loss": 0.0, |
| "step": 1175 |
| }, |
| { |
| "epoch": 5.135766423357664, |
| "grad_norm": 0.0007020977209322155, |
| "learning_rate": 2.7756744810759825e-07, |
| "loss": 0.0, |
| "step": 1176 |
| }, |
| { |
| "epoch": 5.1401459854014595, |
| "grad_norm": 0.0013175703352317214, |
| "learning_rate": 2.747376743543853e-07, |
| "loss": 0.0, |
| "step": 1177 |
| }, |
| { |
| "epoch": 5.144525547445255, |
| "grad_norm": 0.0023852188605815172, |
| "learning_rate": 2.719215603348299e-07, |
| "loss": 0.0, |
| "step": 1178 |
| }, |
| { |
| "epoch": 5.148905109489051, |
| "grad_norm": 0.0019143536919727921, |
| "learning_rate": 2.691191233355986e-07, |
| "loss": 0.0, |
| "step": 1179 |
| }, |
| { |
| "epoch": 5.153284671532846, |
| "grad_norm": 0.0012996941804885864, |
| "learning_rate": 2.6633038055940113e-07, |
| "loss": 0.0, |
| "step": 1180 |
| }, |
| { |
| "epoch": 5.157664233576642, |
| "grad_norm": 0.0014933161437511444, |
| "learning_rate": 2.635553491248863e-07, |
| "loss": 0.0, |
| "step": 1181 |
| }, |
| { |
| "epoch": 5.162043795620438, |
| "grad_norm": 0.0007987873977981508, |
| "learning_rate": 2.6079404606653593e-07, |
| "loss": 0.0, |
| "step": 1182 |
| }, |
| { |
| "epoch": 5.166423357664233, |
| "grad_norm": 0.0007068207487463951, |
| "learning_rate": 2.580464883345599e-07, |
| "loss": 0.0, |
| "step": 1183 |
| }, |
| { |
| "epoch": 5.170802919708029, |
| "grad_norm": 0.002916624303907156, |
| "learning_rate": 2.5531269279479325e-07, |
| "loss": 0.0, |
| "step": 1184 |
| }, |
| { |
| "epoch": 5.175182481751825, |
| "grad_norm": 0.0009090284002013505, |
| "learning_rate": 2.5259267622859183e-07, |
| "loss": 0.0, |
| "step": 1185 |
| }, |
| { |
| "epoch": 5.17956204379562, |
| "grad_norm": 0.002359941368922591, |
| "learning_rate": 2.498864553327296e-07, |
| "loss": 0.0, |
| "step": 1186 |
| }, |
| { |
| "epoch": 5.183941605839416, |
| "grad_norm": 0.0005083808209747076, |
| "learning_rate": 2.471940467192957e-07, |
| "loss": 0.0, |
| "step": 1187 |
| }, |
| { |
| "epoch": 5.1883211678832115, |
| "grad_norm": 0.0010856224689632654, |
| "learning_rate": 2.4451546691559305e-07, |
| "loss": 0.0, |
| "step": 1188 |
| }, |
| { |
| "epoch": 5.192700729927007, |
| "grad_norm": 0.001703603775240481, |
| "learning_rate": 2.4185073236403705e-07, |
| "loss": 0.0, |
| "step": 1189 |
| }, |
| { |
| "epoch": 5.197080291970803, |
| "grad_norm": 0.0008468633168376982, |
| "learning_rate": 2.391998594220535e-07, |
| "loss": 0.0001, |
| "step": 1190 |
| }, |
| { |
| "epoch": 5.201459854014598, |
| "grad_norm": 0.034505490213632584, |
| "learning_rate": 2.3656286436197967e-07, |
| "loss": 0.0, |
| "step": 1191 |
| }, |
| { |
| "epoch": 5.205839416058394, |
| "grad_norm": 0.000597065722104162, |
| "learning_rate": 2.3393976337096335e-07, |
| "loss": 0.0, |
| "step": 1192 |
| }, |
| { |
| "epoch": 5.21021897810219, |
| "grad_norm": 0.0011547644389793277, |
| "learning_rate": 2.3133057255086354e-07, |
| "loss": 0.0, |
| "step": 1193 |
| }, |
| { |
| "epoch": 5.214598540145985, |
| "grad_norm": 0.0008872752077877522, |
| "learning_rate": 2.2873530791815202e-07, |
| "loss": 0.0, |
| "step": 1194 |
| }, |
| { |
| "epoch": 5.218978102189781, |
| "grad_norm": 0.0013213708298280835, |
| "learning_rate": 2.2615398540381584e-07, |
| "loss": 0.0, |
| "step": 1195 |
| }, |
| { |
| "epoch": 5.223357664233577, |
| "grad_norm": 0.003217589110136032, |
| "learning_rate": 2.235866208532572e-07, |
| "loss": 0.0, |
| "step": 1196 |
| }, |
| { |
| "epoch": 5.227737226277372, |
| "grad_norm": 0.0014715356519445777, |
| "learning_rate": 2.2103323002619858e-07, |
| "loss": 0.0, |
| "step": 1197 |
| }, |
| { |
| "epoch": 5.232116788321168, |
| "grad_norm": 0.0017946857260540128, |
| "learning_rate": 2.1849382859658458e-07, |
| "loss": 0.0, |
| "step": 1198 |
| }, |
| { |
| "epoch": 5.2364963503649635, |
| "grad_norm": 0.0012165546650066972, |
| "learning_rate": 2.1596843215248602e-07, |
| "loss": 0.0, |
| "step": 1199 |
| }, |
| { |
| "epoch": 5.240875912408759, |
| "grad_norm": 0.0018216408789157867, |
| "learning_rate": 2.1345705619600503e-07, |
| "loss": 0.0, |
| "step": 1200 |
| }, |
| { |
| "epoch": 5.245255474452555, |
| "grad_norm": 0.0007338838768191636, |
| "learning_rate": 2.109597161431784e-07, |
| "loss": 0.0, |
| "step": 1201 |
| }, |
| { |
| "epoch": 5.24963503649635, |
| "grad_norm": 0.0010004921350628138, |
| "learning_rate": 2.0847642732388457e-07, |
| "loss": 0.0, |
| "step": 1202 |
| }, |
| { |
| "epoch": 5.254014598540146, |
| "grad_norm": 0.001378802815452218, |
| "learning_rate": 2.0600720498174769e-07, |
| "loss": 0.0, |
| "step": 1203 |
| }, |
| { |
| "epoch": 5.258394160583942, |
| "grad_norm": 0.0024660390336066484, |
| "learning_rate": 2.0355206427404628e-07, |
| "loss": 0.0, |
| "step": 1204 |
| }, |
| { |
| "epoch": 5.262773722627737, |
| "grad_norm": 0.0016563908429816365, |
| "learning_rate": 2.0111102027161795e-07, |
| "loss": 0.0, |
| "step": 1205 |
| }, |
| { |
| "epoch": 5.267153284671533, |
| "grad_norm": 0.0008946182206273079, |
| "learning_rate": 1.986840879587687e-07, |
| "loss": 0.0, |
| "step": 1206 |
| }, |
| { |
| "epoch": 5.271532846715329, |
| "grad_norm": 0.000675349379889667, |
| "learning_rate": 1.9627128223317942e-07, |
| "loss": 0.0, |
| "step": 1207 |
| }, |
| { |
| "epoch": 5.275912408759124, |
| "grad_norm": 0.0006476012640632689, |
| "learning_rate": 1.9387261790581618e-07, |
| "loss": 0.0001, |
| "step": 1208 |
| }, |
| { |
| "epoch": 5.28029197080292, |
| "grad_norm": 0.041956543922424316, |
| "learning_rate": 1.9148810970083726e-07, |
| "loss": 0.0, |
| "step": 1209 |
| }, |
| { |
| "epoch": 5.2846715328467155, |
| "grad_norm": 0.0009982686024159193, |
| "learning_rate": 1.8911777225550458e-07, |
| "loss": 0.0, |
| "step": 1210 |
| }, |
| { |
| "epoch": 5.289051094890511, |
| "grad_norm": 0.0013825714122503996, |
| "learning_rate": 1.8676162012009309e-07, |
| "loss": 0.0, |
| "step": 1211 |
| }, |
| { |
| "epoch": 5.293430656934307, |
| "grad_norm": 0.0012482904130592942, |
| "learning_rate": 1.844196677578011e-07, |
| "loss": 0.0, |
| "step": 1212 |
| }, |
| { |
| "epoch": 5.2978102189781024, |
| "grad_norm": 0.00040908699156716466, |
| "learning_rate": 1.8209192954466158e-07, |
| "loss": 0.0, |
| "step": 1213 |
| }, |
| { |
| "epoch": 5.302189781021898, |
| "grad_norm": 0.0009465290931984782, |
| "learning_rate": 1.797784197694552e-07, |
| "loss": 0.0, |
| "step": 1214 |
| }, |
| { |
| "epoch": 5.306569343065694, |
| "grad_norm": 0.0031450926326215267, |
| "learning_rate": 1.7747915263362087e-07, |
| "loss": 0.0, |
| "step": 1215 |
| }, |
| { |
| "epoch": 5.310948905109489, |
| "grad_norm": 0.0004483767843339592, |
| "learning_rate": 1.7519414225116937e-07, |
| "loss": 0.0, |
| "step": 1216 |
| }, |
| { |
| "epoch": 5.315328467153285, |
| "grad_norm": 0.001726379618048668, |
| "learning_rate": 1.7292340264859681e-07, |
| "loss": 0.0, |
| "step": 1217 |
| }, |
| { |
| "epoch": 5.319708029197081, |
| "grad_norm": 0.0006137871532700956, |
| "learning_rate": 1.706669477647982e-07, |
| "loss": 0.0, |
| "step": 1218 |
| }, |
| { |
| "epoch": 5.324087591240876, |
| "grad_norm": 0.0042232791893184185, |
| "learning_rate": 1.684247914509826e-07, |
| "loss": 0.0, |
| "step": 1219 |
| }, |
| { |
| "epoch": 5.328467153284672, |
| "grad_norm": 0.00042099610436707735, |
| "learning_rate": 1.6619694747058658e-07, |
| "loss": 0.0, |
| "step": 1220 |
| }, |
| { |
| "epoch": 5.3328467153284675, |
| "grad_norm": 0.0008045814465731382, |
| "learning_rate": 1.63983429499191e-07, |
| "loss": 0.0, |
| "step": 1221 |
| }, |
| { |
| "epoch": 5.337226277372263, |
| "grad_norm": 0.000947158201597631, |
| "learning_rate": 1.6178425112443774e-07, |
| "loss": 0.0, |
| "step": 1222 |
| }, |
| { |
| "epoch": 5.341605839416058, |
| "grad_norm": 0.0012920291628688574, |
| "learning_rate": 1.5959942584594462e-07, |
| "loss": 0.0, |
| "step": 1223 |
| }, |
| { |
| "epoch": 5.3459854014598545, |
| "grad_norm": 0.0009263503015972674, |
| "learning_rate": 1.5742896707522241e-07, |
| "loss": 0.0, |
| "step": 1224 |
| }, |
| { |
| "epoch": 5.350364963503649, |
| "grad_norm": 0.011447799392044544, |
| "learning_rate": 1.55272888135595e-07, |
| "loss": 0.0, |
| "step": 1225 |
| }, |
| { |
| "epoch": 5.354744525547445, |
| "grad_norm": 0.0012064941693097353, |
| "learning_rate": 1.5313120226211452e-07, |
| "loss": 0.0, |
| "step": 1226 |
| }, |
| { |
| "epoch": 5.3591240875912405, |
| "grad_norm": 0.0035175024531781673, |
| "learning_rate": 1.51003922601482e-07, |
| "loss": 0.0, |
| "step": 1227 |
| }, |
| { |
| "epoch": 5.363503649635036, |
| "grad_norm": 0.0019178701331838965, |
| "learning_rate": 1.4889106221196686e-07, |
| "loss": 0.0, |
| "step": 1228 |
| }, |
| { |
| "epoch": 5.367883211678832, |
| "grad_norm": 0.0013161320239305496, |
| "learning_rate": 1.4679263406332466e-07, |
| "loss": 0.0, |
| "step": 1229 |
| }, |
| { |
| "epoch": 5.372262773722627, |
| "grad_norm": 0.0006836819229647517, |
| "learning_rate": 1.447086510367199e-07, |
| "loss": 0.0, |
| "step": 1230 |
| }, |
| { |
| "epoch": 5.376642335766423, |
| "grad_norm": 0.000652970396913588, |
| "learning_rate": 1.4263912592464596e-07, |
| "loss": 0.0, |
| "step": 1231 |
| }, |
| { |
| "epoch": 5.381021897810219, |
| "grad_norm": 0.0011560834245756269, |
| "learning_rate": 1.4058407143084596e-07, |
| "loss": 0.0, |
| "step": 1232 |
| }, |
| { |
| "epoch": 5.385401459854014, |
| "grad_norm": 0.000645961205009371, |
| "learning_rate": 1.3854350017023622e-07, |
| "loss": 0.0, |
| "step": 1233 |
| }, |
| { |
| "epoch": 5.38978102189781, |
| "grad_norm": 0.0012333478080108762, |
| "learning_rate": 1.365174246688275e-07, |
| "loss": 0.0, |
| "step": 1234 |
| }, |
| { |
| "epoch": 5.394160583941606, |
| "grad_norm": 0.0014514855574816465, |
| "learning_rate": 1.3450585736364846e-07, |
| "loss": 0.0, |
| "step": 1235 |
| }, |
| { |
| "epoch": 5.398540145985401, |
| "grad_norm": 0.003036617999896407, |
| "learning_rate": 1.325088106026695e-07, |
| "loss": 0.0, |
| "step": 1236 |
| }, |
| { |
| "epoch": 5.402919708029197, |
| "grad_norm": 0.0006957058794796467, |
| "learning_rate": 1.305262966447274e-07, |
| "loss": 0.0, |
| "step": 1237 |
| }, |
| { |
| "epoch": 5.4072992700729925, |
| "grad_norm": 0.0009868113556876779, |
| "learning_rate": 1.2855832765944903e-07, |
| "loss": 0.0, |
| "step": 1238 |
| }, |
| { |
| "epoch": 5.411678832116788, |
| "grad_norm": 0.0008344174129888415, |
| "learning_rate": 1.266049157271773e-07, |
| "loss": 0.0, |
| "step": 1239 |
| }, |
| { |
| "epoch": 5.416058394160584, |
| "grad_norm": 0.001987181603908539, |
| "learning_rate": 1.2466607283889736e-07, |
| "loss": 0.0, |
| "step": 1240 |
| }, |
| { |
| "epoch": 5.420437956204379, |
| "grad_norm": 0.0010780440643429756, |
| "learning_rate": 1.227418108961617e-07, |
| "loss": 0.0, |
| "step": 1241 |
| }, |
| { |
| "epoch": 5.424817518248175, |
| "grad_norm": 0.0006175651215016842, |
| "learning_rate": 1.2083214171101893e-07, |
| "loss": 0.0, |
| "step": 1242 |
| }, |
| { |
| "epoch": 5.429197080291971, |
| "grad_norm": 0.0004599309468176216, |
| "learning_rate": 1.1893707700593948e-07, |
| "loss": 0.0, |
| "step": 1243 |
| }, |
| { |
| "epoch": 5.433576642335766, |
| "grad_norm": 0.0009555256110616028, |
| "learning_rate": 1.1705662841374422e-07, |
| "loss": 0.0, |
| "step": 1244 |
| }, |
| { |
| "epoch": 5.437956204379562, |
| "grad_norm": 0.0016692888457328081, |
| "learning_rate": 1.1519080747753487e-07, |
| "loss": 0.0, |
| "step": 1245 |
| }, |
| { |
| "epoch": 5.442335766423358, |
| "grad_norm": 0.001181193278171122, |
| "learning_rate": 1.1333962565061973e-07, |
| "loss": 0.0, |
| "step": 1246 |
| }, |
| { |
| "epoch": 5.446715328467153, |
| "grad_norm": 0.0008872969192452729, |
| "learning_rate": 1.1150309429644623e-07, |
| "loss": 0.0, |
| "step": 1247 |
| }, |
| { |
| "epoch": 5.451094890510949, |
| "grad_norm": 0.0013513396261259913, |
| "learning_rate": 1.0968122468852954e-07, |
| "loss": 0.0, |
| "step": 1248 |
| }, |
| { |
| "epoch": 5.4554744525547445, |
| "grad_norm": 0.001052756910212338, |
| "learning_rate": 1.0787402801038404e-07, |
| "loss": 0.0, |
| "step": 1249 |
| }, |
| { |
| "epoch": 5.45985401459854, |
| "grad_norm": 0.002058651763945818, |
| "learning_rate": 1.0608151535545536e-07, |
| "loss": 0.0, |
| "step": 1250 |
| }, |
| { |
| "epoch": 5.464233576642336, |
| "grad_norm": 0.0017638428835198283, |
| "learning_rate": 1.0430369772705035e-07, |
| "loss": 0.0, |
| "step": 1251 |
| }, |
| { |
| "epoch": 5.468613138686131, |
| "grad_norm": 0.0007398691959679127, |
| "learning_rate": 1.0254058603827138e-07, |
| "loss": 0.0, |
| "step": 1252 |
| }, |
| { |
| "epoch": 5.472992700729927, |
| "grad_norm": 0.002057944890111685, |
| "learning_rate": 1.0079219111194888e-07, |
| "loss": 0.0, |
| "step": 1253 |
| }, |
| { |
| "epoch": 5.477372262773723, |
| "grad_norm": 0.001300388597883284, |
| "learning_rate": 9.905852368057384e-08, |
| "loss": 0.0, |
| "step": 1254 |
| }, |
| { |
| "epoch": 5.481751824817518, |
| "grad_norm": 0.0007523080566897988, |
| "learning_rate": 9.733959438623374e-08, |
| "loss": 0.0001, |
| "step": 1255 |
| }, |
| { |
| "epoch": 5.486131386861314, |
| "grad_norm": 0.027787882834672928, |
| "learning_rate": 9.563541378054569e-08, |
| "loss": 0.0, |
| "step": 1256 |
| }, |
| { |
| "epoch": 5.49051094890511, |
| "grad_norm": 0.0009588192915543914, |
| "learning_rate": 9.394599232459223e-08, |
| "loss": 0.0, |
| "step": 1257 |
| }, |
| { |
| "epoch": 5.494890510948905, |
| "grad_norm": 0.0010805552592501044, |
| "learning_rate": 9.227134038885755e-08, |
| "loss": 0.0, |
| "step": 1258 |
| }, |
| { |
| "epoch": 5.499270072992701, |
| "grad_norm": 0.009671115316450596, |
| "learning_rate": 9.06114682531628e-08, |
| "loss": 0.0, |
| "step": 1259 |
| }, |
| { |
| "epoch": 5.5036496350364965, |
| "grad_norm": 0.0009569254470989108, |
| "learning_rate": 8.896638610660397e-08, |
| "loss": 0.0, |
| "step": 1260 |
| }, |
| { |
| "epoch": 5.508029197080292, |
| "grad_norm": 0.0019343396415933967, |
| "learning_rate": 8.733610404748905e-08, |
| "loss": 0.0, |
| "step": 1261 |
| }, |
| { |
| "epoch": 5.512408759124088, |
| "grad_norm": 0.0109703429043293, |
| "learning_rate": 8.572063208327569e-08, |
| "loss": 0.0, |
| "step": 1262 |
| }, |
| { |
| "epoch": 5.516788321167883, |
| "grad_norm": 0.0006216384354047477, |
| "learning_rate": 8.411998013051037e-08, |
| "loss": 0.0, |
| "step": 1263 |
| }, |
| { |
| "epoch": 5.521167883211679, |
| "grad_norm": 0.0008817921043373644, |
| "learning_rate": 8.253415801476649e-08, |
| "loss": 0.0, |
| "step": 1264 |
| }, |
| { |
| "epoch": 5.525547445255475, |
| "grad_norm": 0.0004825249488931149, |
| "learning_rate": 8.096317547058557e-08, |
| "loss": 0.0, |
| "step": 1265 |
| }, |
| { |
| "epoch": 5.52992700729927, |
| "grad_norm": 0.0008048586314544082, |
| "learning_rate": 7.940704214141615e-08, |
| "loss": 0.0, |
| "step": 1266 |
| }, |
| { |
| "epoch": 5.534306569343066, |
| "grad_norm": 0.024905206635594368, |
| "learning_rate": 7.786576757955522e-08, |
| "loss": 0.0, |
| "step": 1267 |
| }, |
| { |
| "epoch": 5.538686131386862, |
| "grad_norm": 0.0006357874954119325, |
| "learning_rate": 7.633936124608998e-08, |
| "loss": 0.0, |
| "step": 1268 |
| }, |
| { |
| "epoch": 5.543065693430657, |
| "grad_norm": 0.0006507908110506833, |
| "learning_rate": 7.482783251083869e-08, |
| "loss": 0.0, |
| "step": 1269 |
| }, |
| { |
| "epoch": 5.547445255474453, |
| "grad_norm": 0.000882003630977124, |
| "learning_rate": 7.333119065229377e-08, |
| "loss": 0.0, |
| "step": 1270 |
| }, |
| { |
| "epoch": 5.5518248175182485, |
| "grad_norm": 0.0013504737289622426, |
| "learning_rate": 7.18494448575649e-08, |
| "loss": 0.0, |
| "step": 1271 |
| }, |
| { |
| "epoch": 5.556204379562044, |
| "grad_norm": 0.0021828978788107634, |
| "learning_rate": 7.038260422232296e-08, |
| "loss": 0.0, |
| "step": 1272 |
| }, |
| { |
| "epoch": 5.56058394160584, |
| "grad_norm": 0.0014103087596595287, |
| "learning_rate": 6.893067775074313e-08, |
| "loss": 0.0, |
| "step": 1273 |
| }, |
| { |
| "epoch": 5.5649635036496345, |
| "grad_norm": 0.0029129632748663425, |
| "learning_rate": 6.749367435545023e-08, |
| "loss": 0.0, |
| "step": 1274 |
| }, |
| { |
| "epoch": 5.569343065693431, |
| "grad_norm": 0.0020228680223226547, |
| "learning_rate": 6.607160285746511e-08, |
| "loss": 0.0, |
| "step": 1275 |
| }, |
| { |
| "epoch": 5.573722627737226, |
| "grad_norm": 0.0017138279508799314, |
| "learning_rate": 6.466447198614805e-08, |
| "loss": 0.0, |
| "step": 1276 |
| }, |
| { |
| "epoch": 5.578102189781022, |
| "grad_norm": 0.0009150414261966944, |
| "learning_rate": 6.327229037914717e-08, |
| "loss": 0.0, |
| "step": 1277 |
| }, |
| { |
| "epoch": 5.582481751824817, |
| "grad_norm": 0.0011616607662290335, |
| "learning_rate": 6.189506658234506e-08, |
| "loss": 0.0, |
| "step": 1278 |
| }, |
| { |
| "epoch": 5.586861313868614, |
| "grad_norm": 0.0004247005272191018, |
| "learning_rate": 6.053280904980558e-08, |
| "loss": 0.0, |
| "step": 1279 |
| }, |
| { |
| "epoch": 5.591240875912408, |
| "grad_norm": 0.0024173003621399403, |
| "learning_rate": 5.91855261437227e-08, |
| "loss": 0.0, |
| "step": 1280 |
| }, |
| { |
| "epoch": 5.595620437956204, |
| "grad_norm": 0.0015381041448563337, |
| "learning_rate": 5.785322613436895e-08, |
| "loss": 0.0, |
| "step": 1281 |
| }, |
| { |
| "epoch": 5.6, |
| "grad_norm": 0.0030655914451926947, |
| "learning_rate": 5.653591720004431e-08, |
| "loss": 0.0, |
| "step": 1282 |
| }, |
| { |
| "epoch": 5.604379562043795, |
| "grad_norm": 0.0009076350834220648, |
| "learning_rate": 5.5233607427026824e-08, |
| "loss": 0.0, |
| "step": 1283 |
| }, |
| { |
| "epoch": 5.608759124087591, |
| "grad_norm": 0.0009122485644184053, |
| "learning_rate": 5.3946304809521785e-08, |
| "loss": 0.0, |
| "step": 1284 |
| }, |
| { |
| "epoch": 5.613138686131387, |
| "grad_norm": 0.0005465763388201594, |
| "learning_rate": 5.267401724961374e-08, |
| "loss": 0.0, |
| "step": 1285 |
| }, |
| { |
| "epoch": 5.617518248175182, |
| "grad_norm": 0.001951449317857623, |
| "learning_rate": 5.141675255721762e-08, |
| "loss": 0.0, |
| "step": 1286 |
| }, |
| { |
| "epoch": 5.621897810218978, |
| "grad_norm": 0.003755422541871667, |
| "learning_rate": 5.017451845003074e-08, |
| "loss": 0.0, |
| "step": 1287 |
| }, |
| { |
| "epoch": 5.6262773722627735, |
| "grad_norm": 0.0010572342434898019, |
| "learning_rate": 4.8947322553485055e-08, |
| "loss": 0.0, |
| "step": 1288 |
| }, |
| { |
| "epoch": 5.630656934306569, |
| "grad_norm": 0.001004064455628395, |
| "learning_rate": 4.773517240070108e-08, |
| "loss": 0.0, |
| "step": 1289 |
| }, |
| { |
| "epoch": 5.635036496350365, |
| "grad_norm": 0.0010041017085313797, |
| "learning_rate": 4.6538075432441266e-08, |
| "loss": 0.0, |
| "step": 1290 |
| }, |
| { |
| "epoch": 5.63941605839416, |
| "grad_norm": 0.001073719235137105, |
| "learning_rate": 4.535603899706448e-08, |
| "loss": 0.0, |
| "step": 1291 |
| }, |
| { |
| "epoch": 5.643795620437956, |
| "grad_norm": 0.0008774212910793722, |
| "learning_rate": 4.41890703504802e-08, |
| "loss": 0.0, |
| "step": 1292 |
| }, |
| { |
| "epoch": 5.648175182481752, |
| "grad_norm": 0.0017952339258044958, |
| "learning_rate": 4.303717665610524e-08, |
| "loss": 0.0, |
| "step": 1293 |
| }, |
| { |
| "epoch": 5.652554744525547, |
| "grad_norm": 0.0007458472391590476, |
| "learning_rate": 4.190036498481875e-08, |
| "loss": 0.0, |
| "step": 1294 |
| }, |
| { |
| "epoch": 5.656934306569343, |
| "grad_norm": 0.001087260665372014, |
| "learning_rate": 4.077864231491896e-08, |
| "loss": 0.0, |
| "step": 1295 |
| }, |
| { |
| "epoch": 5.661313868613139, |
| "grad_norm": 0.004731070715934038, |
| "learning_rate": 3.9672015532081223e-08, |
| "loss": 0.0, |
| "step": 1296 |
| }, |
| { |
| "epoch": 5.665693430656934, |
| "grad_norm": 0.0019679393153637648, |
| "learning_rate": 3.8580491429314195e-08, |
| "loss": 0.0, |
| "step": 1297 |
| }, |
| { |
| "epoch": 5.67007299270073, |
| "grad_norm": 0.0009237058111466467, |
| "learning_rate": 3.75040767069193e-08, |
| "loss": 0.0, |
| "step": 1298 |
| }, |
| { |
| "epoch": 5.6744525547445255, |
| "grad_norm": 0.0008307588868774474, |
| "learning_rate": 3.6442777972449664e-08, |
| "loss": 0.0, |
| "step": 1299 |
| }, |
| { |
| "epoch": 5.678832116788321, |
| "grad_norm": 0.0010199848329648376, |
| "learning_rate": 3.5396601740669e-08, |
| "loss": 0.0, |
| "step": 1300 |
| }, |
| { |
| "epoch": 5.683211678832117, |
| "grad_norm": 0.002469507744535804, |
| "learning_rate": 3.436555443351142e-08, |
| "loss": 0.0, |
| "step": 1301 |
| }, |
| { |
| "epoch": 5.687591240875912, |
| "grad_norm": 0.0011531668715178967, |
| "learning_rate": 3.334964238004279e-08, |
| "loss": 0.0, |
| "step": 1302 |
| }, |
| { |
| "epoch": 5.691970802919708, |
| "grad_norm": 0.0012597210006788373, |
| "learning_rate": 3.23488718164211e-08, |
| "loss": 0.0, |
| "step": 1303 |
| }, |
| { |
| "epoch": 5.696350364963504, |
| "grad_norm": 0.0011917087249457836, |
| "learning_rate": 3.136324888585951e-08, |
| "loss": 0.0, |
| "step": 1304 |
| }, |
| { |
| "epoch": 5.700729927007299, |
| "grad_norm": 0.00046976955491118133, |
| "learning_rate": 3.039277963858667e-08, |
| "loss": 0.0, |
| "step": 1305 |
| }, |
| { |
| "epoch": 5.705109489051095, |
| "grad_norm": 0.0007437043241225183, |
| "learning_rate": 2.943747003181091e-08, |
| "loss": 0.0, |
| "step": 1306 |
| }, |
| { |
| "epoch": 5.709489051094891, |
| "grad_norm": 0.00099862867500633, |
| "learning_rate": 2.8497325929683628e-08, |
| "loss": 0.0, |
| "step": 1307 |
| }, |
| { |
| "epoch": 5.713868613138686, |
| "grad_norm": 0.0008740422781556845, |
| "learning_rate": 2.7572353103262617e-08, |
| "loss": 0.0, |
| "step": 1308 |
| }, |
| { |
| "epoch": 5.718248175182482, |
| "grad_norm": 0.0023886568378657103, |
| "learning_rate": 2.666255723047767e-08, |
| "loss": 0.0, |
| "step": 1309 |
| }, |
| { |
| "epoch": 5.7226277372262775, |
| "grad_norm": 0.0013786336639896035, |
| "learning_rate": 2.5767943896094215e-08, |
| "loss": 0.0, |
| "step": 1310 |
| }, |
| { |
| "epoch": 5.727007299270073, |
| "grad_norm": 0.00462157791480422, |
| "learning_rate": 2.488851859168112e-08, |
| "loss": 0.0, |
| "step": 1311 |
| }, |
| { |
| "epoch": 5.731386861313869, |
| "grad_norm": 0.0016167645808309317, |
| "learning_rate": 2.4024286715574885e-08, |
| "loss": 0.0, |
| "step": 1312 |
| }, |
| { |
| "epoch": 5.735766423357664, |
| "grad_norm": 0.0006235836772248149, |
| "learning_rate": 2.3175253572847445e-08, |
| "loss": 0.0, |
| "step": 1313 |
| }, |
| { |
| "epoch": 5.74014598540146, |
| "grad_norm": 0.0011521874694153666, |
| "learning_rate": 2.2341424375274256e-08, |
| "loss": 0.0, |
| "step": 1314 |
| }, |
| { |
| "epoch": 5.744525547445256, |
| "grad_norm": 0.0009948118822649121, |
| "learning_rate": 2.152280424130071e-08, |
| "loss": 0.0, |
| "step": 1315 |
| }, |
| { |
| "epoch": 5.748905109489051, |
| "grad_norm": 0.0005155662656761706, |
| "learning_rate": 2.0719398196012708e-08, |
| "loss": 0.0, |
| "step": 1316 |
| }, |
| { |
| "epoch": 5.753284671532847, |
| "grad_norm": 0.0015854033408686519, |
| "learning_rate": 1.9931211171103358e-08, |
| "loss": 0.0, |
| "step": 1317 |
| }, |
| { |
| "epoch": 5.757664233576643, |
| "grad_norm": 0.0021805285941809416, |
| "learning_rate": 1.9158248004845503e-08, |
| "loss": 0.0, |
| "step": 1318 |
| }, |
| { |
| "epoch": 5.762043795620438, |
| "grad_norm": 0.0006334164645522833, |
| "learning_rate": 1.8400513442059786e-08, |
| "loss": 0.0, |
| "step": 1319 |
| }, |
| { |
| "epoch": 5.766423357664234, |
| "grad_norm": 0.0010131029412150383, |
| "learning_rate": 1.7658012134086357e-08, |
| "loss": 0.0, |
| "step": 1320 |
| }, |
| { |
| "epoch": 5.7708029197080295, |
| "grad_norm": 0.0006262679817155004, |
| "learning_rate": 1.6930748638756268e-08, |
| "loss": 0.0, |
| "step": 1321 |
| }, |
| { |
| "epoch": 5.775182481751825, |
| "grad_norm": 0.0005879150703549385, |
| "learning_rate": 1.6218727420364287e-08, |
| "loss": 0.0, |
| "step": 1322 |
| }, |
| { |
| "epoch": 5.779562043795621, |
| "grad_norm": 0.0279290359467268, |
| "learning_rate": 1.5521952849639476e-08, |
| "loss": 0.0001, |
| "step": 1323 |
| }, |
| { |
| "epoch": 5.783941605839416, |
| "grad_norm": 0.0004460141935851425, |
| "learning_rate": 1.4840429203720751e-08, |
| "loss": 0.0, |
| "step": 1324 |
| }, |
| { |
| "epoch": 5.788321167883212, |
| "grad_norm": 0.0011242893524467945, |
| "learning_rate": 1.4174160666128866e-08, |
| "loss": 0.0, |
| "step": 1325 |
| }, |
| { |
| "epoch": 5.792700729927008, |
| "grad_norm": 0.003552220528945327, |
| "learning_rate": 1.3523151326741702e-08, |
| "loss": 0.0, |
| "step": 1326 |
| }, |
| { |
| "epoch": 5.797080291970802, |
| "grad_norm": 0.0008795269532129169, |
| "learning_rate": 1.2887405181768175e-08, |
| "loss": 0.0, |
| "step": 1327 |
| }, |
| { |
| "epoch": 5.801459854014599, |
| "grad_norm": 0.0008025469724088907, |
| "learning_rate": 1.2266926133725488e-08, |
| "loss": 0.0, |
| "step": 1328 |
| }, |
| { |
| "epoch": 5.805839416058394, |
| "grad_norm": 0.0005717664025723934, |
| "learning_rate": 1.1661717991412747e-08, |
| "loss": 0.0, |
| "step": 1329 |
| }, |
| { |
| "epoch": 5.81021897810219, |
| "grad_norm": 0.00190149643458426, |
| "learning_rate": 1.1071784469889602e-08, |
| "loss": 0.0, |
| "step": 1330 |
| }, |
| { |
| "epoch": 5.814598540145985, |
| "grad_norm": 0.0011143183801323175, |
| "learning_rate": 1.0497129190452926e-08, |
| "loss": 0.0, |
| "step": 1331 |
| }, |
| { |
| "epoch": 5.8189781021897815, |
| "grad_norm": 0.001196785713545978, |
| "learning_rate": 9.937755680613781e-09, |
| "loss": 0.0, |
| "step": 1332 |
| }, |
| { |
| "epoch": 5.823357664233576, |
| "grad_norm": 0.0009675221517682076, |
| "learning_rate": 9.393667374076875e-09, |
| "loss": 0.0, |
| "step": 1333 |
| }, |
| { |
| "epoch": 5.827737226277372, |
| "grad_norm": 0.0017407843843102455, |
| "learning_rate": 8.864867610718364e-09, |
| "loss": 0.0, |
| "step": 1334 |
| }, |
| { |
| "epoch": 5.8321167883211675, |
| "grad_norm": 0.0015355003997683525, |
| "learning_rate": 8.351359636566414e-09, |
| "loss": 0.0, |
| "step": 1335 |
| }, |
| { |
| "epoch": 5.836496350364963, |
| "grad_norm": 0.0009656071197241545, |
| "learning_rate": 7.853146603780948e-09, |
| "loss": 0.0, |
| "step": 1336 |
| }, |
| { |
| "epoch": 5.840875912408759, |
| "grad_norm": 0.0008687431691214442, |
| "learning_rate": 7.370231570633657e-09, |
| "loss": 0.0, |
| "step": 1337 |
| }, |
| { |
| "epoch": 5.8452554744525544, |
| "grad_norm": 0.021104618906974792, |
| "learning_rate": 6.902617501489961e-09, |
| "loss": 0.0001, |
| "step": 1338 |
| }, |
| { |
| "epoch": 5.84963503649635, |
| "grad_norm": 0.0011317721800878644, |
| "learning_rate": 6.45030726679069e-09, |
| "loss": 0.0, |
| "step": 1339 |
| }, |
| { |
| "epoch": 5.854014598540146, |
| "grad_norm": 0.001694355858489871, |
| "learning_rate": 6.0133036430343185e-09, |
| "loss": 0.0, |
| "step": 1340 |
| }, |
| { |
| "epoch": 5.858394160583941, |
| "grad_norm": 0.0029166857711970806, |
| "learning_rate": 5.591609312759761e-09, |
| "loss": 0.0, |
| "step": 1341 |
| }, |
| { |
| "epoch": 5.862773722627737, |
| "grad_norm": 0.0015766709111630917, |
| "learning_rate": 5.185226864530546e-09, |
| "loss": 0.0, |
| "step": 1342 |
| }, |
| { |
| "epoch": 5.867153284671533, |
| "grad_norm": 0.001368473982438445, |
| "learning_rate": 4.794158792917613e-09, |
| "loss": 0.0, |
| "step": 1343 |
| }, |
| { |
| "epoch": 5.871532846715328, |
| "grad_norm": 0.0007716699037700891, |
| "learning_rate": 4.4184074984859884e-09, |
| "loss": 0.0, |
| "step": 1344 |
| }, |
| { |
| "epoch": 5.875912408759124, |
| "grad_norm": 0.0015096355928108096, |
| "learning_rate": 4.057975287778404e-09, |
| "loss": 0.0, |
| "step": 1345 |
| }, |
| { |
| "epoch": 5.8802919708029195, |
| "grad_norm": 0.0010505887912586331, |
| "learning_rate": 3.7128643733011506e-09, |
| "loss": 0.0, |
| "step": 1346 |
| }, |
| { |
| "epoch": 5.884671532846715, |
| "grad_norm": 0.0009699283400550485, |
| "learning_rate": 3.383076873511859e-09, |
| "loss": 0.0, |
| "step": 1347 |
| }, |
| { |
| "epoch": 5.889051094890511, |
| "grad_norm": 0.011337986215949059, |
| "learning_rate": 3.0686148128050707e-09, |
| "loss": 0.0, |
| "step": 1348 |
| }, |
| { |
| "epoch": 5.8934306569343065, |
| "grad_norm": 0.0008212627726607025, |
| "learning_rate": 2.7694801215011334e-09, |
| "loss": 0.0, |
| "step": 1349 |
| }, |
| { |
| "epoch": 5.897810218978102, |
| "grad_norm": 0.0007408977835439146, |
| "learning_rate": 2.485674635832602e-09, |
| "loss": 0.0, |
| "step": 1350 |
| }, |
| { |
| "epoch": 5.902189781021898, |
| "grad_norm": 0.0029882804956287146, |
| "learning_rate": 2.2172000979345244e-09, |
| "loss": 0.0, |
| "step": 1351 |
| }, |
| { |
| "epoch": 5.906569343065693, |
| "grad_norm": 0.0008590557845309377, |
| "learning_rate": 1.9640581558330595e-09, |
| "loss": 0.0, |
| "step": 1352 |
| }, |
| { |
| "epoch": 5.910948905109489, |
| "grad_norm": 0.0009516619611531496, |
| "learning_rate": 1.7262503634360438e-09, |
| "loss": 0.0, |
| "step": 1353 |
| }, |
| { |
| "epoch": 5.915328467153285, |
| "grad_norm": 0.029193971306085587, |
| "learning_rate": 1.5037781805218866e-09, |
| "loss": 0.0001, |
| "step": 1354 |
| }, |
| { |
| "epoch": 5.91970802919708, |
| "grad_norm": 0.0010070932330563664, |
| "learning_rate": 1.2966429727323537e-09, |
| "loss": 0.0, |
| "step": 1355 |
| }, |
| { |
| "epoch": 5.924087591240876, |
| "grad_norm": 0.0013591903261840343, |
| "learning_rate": 1.1048460115634096e-09, |
| "loss": 0.0, |
| "step": 1356 |
| }, |
| { |
| "epoch": 5.9284671532846716, |
| "grad_norm": 0.0009585371590219438, |
| "learning_rate": 9.28388474357167e-10, |
| "loss": 0.0, |
| "step": 1357 |
| }, |
| { |
| "epoch": 5.932846715328467, |
| "grad_norm": 0.0010509295389056206, |
| "learning_rate": 7.672714442952256e-10, |
| "loss": 0.0, |
| "step": 1358 |
| }, |
| { |
| "epoch": 5.937226277372263, |
| "grad_norm": 0.012163963168859482, |
| "learning_rate": 6.214959103914564e-10, |
| "loss": 0.0, |
| "step": 1359 |
| }, |
| { |
| "epoch": 5.9416058394160585, |
| "grad_norm": 0.001641071867197752, |
| "learning_rate": 4.910627674867274e-10, |
| "loss": 0.0, |
| "step": 1360 |
| }, |
| { |
| "epoch": 5.945985401459854, |
| "grad_norm": 0.0007686957251280546, |
| "learning_rate": 3.759728162422427e-10, |
| "loss": 0.0, |
| "step": 1361 |
| }, |
| { |
| "epoch": 5.95036496350365, |
| "grad_norm": 0.00218059029430151, |
| "learning_rate": 2.7622676313565634e-10, |
| "loss": 0.0, |
| "step": 1362 |
| }, |
| { |
| "epoch": 5.954744525547445, |
| "grad_norm": 0.0017245035851374269, |
| "learning_rate": 1.9182522045690932e-10, |
| "loss": 0.0, |
| "step": 1363 |
| }, |
| { |
| "epoch": 5.959124087591241, |
| "grad_norm": 0.0005697187152691185, |
| "learning_rate": 1.2276870630267833e-10, |
| "loss": 0.0, |
| "step": 1364 |
| }, |
| { |
| "epoch": 5.963503649635037, |
| "grad_norm": 0.0009431016515009105, |
| "learning_rate": 6.90576445755431e-11, |
| "loss": 0.0, |
| "step": 1365 |
| }, |
| { |
| "epoch": 5.967883211678832, |
| "grad_norm": 0.0006408031331375241, |
| "learning_rate": 3.069236497982297e-11, |
| "loss": 0.0, |
| "step": 1366 |
| }, |
| { |
| "epoch": 5.972262773722628, |
| "grad_norm": 0.0016575654735788703, |
| "learning_rate": 7.67310302018931e-12, |
| "loss": 0.0, |
| "step": 1367 |
| }, |
| { |
| "epoch": 5.976642335766424, |
| "grad_norm": 0.0007427773089148104, |
| "learning_rate": 0.0, |
| "loss": 0.0, |
| "step": 1368 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1368, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 228, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.445935060497203e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|