| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.994160583941606, | |
| "eval_steps": 500, | |
| "global_step": 456, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004379562043795621, | |
| "grad_norm": 34.64235305786133, | |
| "learning_rate": 5.0000000000000004e-08, | |
| "loss": 2.6583, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.008759124087591242, | |
| "grad_norm": 33.89678192138672, | |
| "learning_rate": 1.0000000000000001e-07, | |
| "loss": 2.5074, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.013138686131386862, | |
| "grad_norm": 35.2148551940918, | |
| "learning_rate": 1.5000000000000002e-07, | |
| "loss": 2.7094, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.017518248175182483, | |
| "grad_norm": 35.11457061767578, | |
| "learning_rate": 2.0000000000000002e-07, | |
| "loss": 2.7266, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.021897810218978103, | |
| "grad_norm": 35.70753479003906, | |
| "learning_rate": 2.5000000000000004e-07, | |
| "loss": 2.7442, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.026277372262773723, | |
| "grad_norm": 34.34943771362305, | |
| "learning_rate": 3.0000000000000004e-07, | |
| "loss": 2.5578, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.030656934306569343, | |
| "grad_norm": 34.31540298461914, | |
| "learning_rate": 3.5000000000000004e-07, | |
| "loss": 2.5893, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.035036496350364967, | |
| "grad_norm": 32.545223236083984, | |
| "learning_rate": 4.0000000000000003e-07, | |
| "loss": 2.5039, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.03941605839416058, | |
| "grad_norm": 35.70431137084961, | |
| "learning_rate": 4.5000000000000003e-07, | |
| "loss": 2.6719, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.043795620437956206, | |
| "grad_norm": 34.14265441894531, | |
| "learning_rate": 5.000000000000001e-07, | |
| "loss": 2.5764, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.04817518248175182, | |
| "grad_norm": 32.08097839355469, | |
| "learning_rate": 5.5e-07, | |
| "loss": 2.4564, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.052554744525547446, | |
| "grad_norm": 32.66060256958008, | |
| "learning_rate": 6.000000000000001e-07, | |
| "loss": 2.458, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.05693430656934306, | |
| "grad_norm": 33.21636962890625, | |
| "learning_rate": 6.5e-07, | |
| "loss": 2.4835, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.061313868613138686, | |
| "grad_norm": 33.92257308959961, | |
| "learning_rate": 7.000000000000001e-07, | |
| "loss": 2.4288, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.06569343065693431, | |
| "grad_norm": 32.19805145263672, | |
| "learning_rate": 7.5e-07, | |
| "loss": 2.2411, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.07007299270072993, | |
| "grad_norm": 32.355220794677734, | |
| "learning_rate": 8.000000000000001e-07, | |
| "loss": 2.1597, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.07445255474452554, | |
| "grad_norm": 33.08480453491211, | |
| "learning_rate": 8.500000000000001e-07, | |
| "loss": 2.1377, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.07883211678832117, | |
| "grad_norm": 33.459957122802734, | |
| "learning_rate": 9.000000000000001e-07, | |
| "loss": 2.0306, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.08321167883211679, | |
| "grad_norm": 32.897315979003906, | |
| "learning_rate": 9.500000000000001e-07, | |
| "loss": 1.8697, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.08759124087591241, | |
| "grad_norm": 33.81785202026367, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 1.8147, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.09197080291970802, | |
| "grad_norm": 32.52595520019531, | |
| "learning_rate": 1.0500000000000001e-06, | |
| "loss": 1.6526, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.09635036496350365, | |
| "grad_norm": 34.09442138671875, | |
| "learning_rate": 1.1e-06, | |
| "loss": 1.6127, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.10072992700729927, | |
| "grad_norm": 30.89822769165039, | |
| "learning_rate": 1.1500000000000002e-06, | |
| "loss": 1.3872, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.10510948905109489, | |
| "grad_norm": 29.566524505615234, | |
| "learning_rate": 1.2000000000000002e-06, | |
| "loss": 1.2755, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.10948905109489052, | |
| "grad_norm": 28.26628875732422, | |
| "learning_rate": 1.25e-06, | |
| "loss": 1.1409, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.11386861313868613, | |
| "grad_norm": 30.7103328704834, | |
| "learning_rate": 1.3e-06, | |
| "loss": 0.966, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.11824817518248175, | |
| "grad_norm": 28.975385665893555, | |
| "learning_rate": 1.3500000000000002e-06, | |
| "loss": 0.7579, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.12262773722627737, | |
| "grad_norm": 26.821529388427734, | |
| "learning_rate": 1.4000000000000001e-06, | |
| "loss": 0.6013, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.12700729927007298, | |
| "grad_norm": 23.804439544677734, | |
| "learning_rate": 1.45e-06, | |
| "loss": 0.4978, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.13138686131386862, | |
| "grad_norm": 21.404451370239258, | |
| "learning_rate": 1.5e-06, | |
| "loss": 0.3926, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.13576642335766423, | |
| "grad_norm": 17.63161849975586, | |
| "learning_rate": 1.5500000000000002e-06, | |
| "loss": 0.2568, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.14014598540145987, | |
| "grad_norm": 10.998854637145996, | |
| "learning_rate": 1.6000000000000001e-06, | |
| "loss": 0.2373, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.14452554744525548, | |
| "grad_norm": 6.9544997215271, | |
| "learning_rate": 1.6500000000000003e-06, | |
| "loss": 0.1689, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.14890510948905109, | |
| "grad_norm": 5.1013102531433105, | |
| "learning_rate": 1.7000000000000002e-06, | |
| "loss": 0.1471, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.15328467153284672, | |
| "grad_norm": 4.501709461212158, | |
| "learning_rate": 1.75e-06, | |
| "loss": 0.132, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.15766423357664233, | |
| "grad_norm": 3.198529005050659, | |
| "learning_rate": 1.8000000000000001e-06, | |
| "loss": 0.1065, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.16204379562043797, | |
| "grad_norm": 3.2325005531311035, | |
| "learning_rate": 1.85e-06, | |
| "loss": 0.0907, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.16642335766423358, | |
| "grad_norm": 1.5125375986099243, | |
| "learning_rate": 1.9000000000000002e-06, | |
| "loss": 0.0782, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.1708029197080292, | |
| "grad_norm": 1.9160635471343994, | |
| "learning_rate": 1.9500000000000004e-06, | |
| "loss": 0.0852, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.17518248175182483, | |
| "grad_norm": 1.6062333583831787, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.074, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.17956204379562044, | |
| "grad_norm": 1.5675855875015259, | |
| "learning_rate": 2.05e-06, | |
| "loss": 0.0704, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.18394160583941604, | |
| "grad_norm": 1.440182089805603, | |
| "learning_rate": 2.1000000000000002e-06, | |
| "loss": 0.0674, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.18832116788321168, | |
| "grad_norm": 1.1466726064682007, | |
| "learning_rate": 2.15e-06, | |
| "loss": 0.0702, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.1927007299270073, | |
| "grad_norm": 1.2195515632629395, | |
| "learning_rate": 2.2e-06, | |
| "loss": 0.0723, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.19708029197080293, | |
| "grad_norm": 1.743561029434204, | |
| "learning_rate": 2.25e-06, | |
| "loss": 0.0875, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.20145985401459854, | |
| "grad_norm": 0.9764343500137329, | |
| "learning_rate": 2.3000000000000004e-06, | |
| "loss": 0.062, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.20583941605839415, | |
| "grad_norm": 0.8891277313232422, | |
| "learning_rate": 2.35e-06, | |
| "loss": 0.0576, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.21021897810218979, | |
| "grad_norm": 0.9648666977882385, | |
| "learning_rate": 2.4000000000000003e-06, | |
| "loss": 0.0656, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.2145985401459854, | |
| "grad_norm": 0.784566342830658, | |
| "learning_rate": 2.4500000000000003e-06, | |
| "loss": 0.0548, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.21897810218978103, | |
| "grad_norm": 0.9402966499328613, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.0626, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.22335766423357664, | |
| "grad_norm": 1.3284685611724854, | |
| "learning_rate": 2.55e-06, | |
| "loss": 0.0632, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.22773722627737225, | |
| "grad_norm": 1.0913968086242676, | |
| "learning_rate": 2.6e-06, | |
| "loss": 0.0675, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.2321167883211679, | |
| "grad_norm": 1.1069140434265137, | |
| "learning_rate": 2.6500000000000005e-06, | |
| "loss": 0.0541, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.2364963503649635, | |
| "grad_norm": 0.8529757857322693, | |
| "learning_rate": 2.7000000000000004e-06, | |
| "loss": 0.0657, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.24087591240875914, | |
| "grad_norm": 0.7182446718215942, | |
| "learning_rate": 2.7500000000000004e-06, | |
| "loss": 0.0607, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.24525547445255474, | |
| "grad_norm": 1.0538653135299683, | |
| "learning_rate": 2.8000000000000003e-06, | |
| "loss": 0.0556, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.24963503649635035, | |
| "grad_norm": 1.2083594799041748, | |
| "learning_rate": 2.85e-06, | |
| "loss": 0.0532, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.25401459854014596, | |
| "grad_norm": 0.8183572888374329, | |
| "learning_rate": 2.9e-06, | |
| "loss": 0.0529, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.2583941605839416, | |
| "grad_norm": 0.9014842510223389, | |
| "learning_rate": 2.95e-06, | |
| "loss": 0.0601, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.26277372262773724, | |
| "grad_norm": 0.9017247557640076, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0584, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2671532846715328, | |
| "grad_norm": 1.1078683137893677, | |
| "learning_rate": 3.05e-06, | |
| "loss": 0.0635, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.27153284671532846, | |
| "grad_norm": 1.174526572227478, | |
| "learning_rate": 3.1000000000000004e-06, | |
| "loss": 0.0523, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.2759124087591241, | |
| "grad_norm": 0.9296770095825195, | |
| "learning_rate": 3.1500000000000003e-06, | |
| "loss": 0.0588, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.28029197080291973, | |
| "grad_norm": 0.8549372553825378, | |
| "learning_rate": 3.2000000000000003e-06, | |
| "loss": 0.0639, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.2846715328467153, | |
| "grad_norm": 0.8956279158592224, | |
| "learning_rate": 3.2500000000000002e-06, | |
| "loss": 0.059, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.28905109489051095, | |
| "grad_norm": 0.7937710285186768, | |
| "learning_rate": 3.3000000000000006e-06, | |
| "loss": 0.0579, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.2934306569343066, | |
| "grad_norm": 0.7786620855331421, | |
| "learning_rate": 3.3500000000000005e-06, | |
| "loss": 0.0586, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.29781021897810217, | |
| "grad_norm": 0.7562637329101562, | |
| "learning_rate": 3.4000000000000005e-06, | |
| "loss": 0.046, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.3021897810218978, | |
| "grad_norm": 0.8958250880241394, | |
| "learning_rate": 3.45e-06, | |
| "loss": 0.0566, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.30656934306569344, | |
| "grad_norm": 0.9434528946876526, | |
| "learning_rate": 3.5e-06, | |
| "loss": 0.0548, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.310948905109489, | |
| "grad_norm": 1.0564453601837158, | |
| "learning_rate": 3.5500000000000003e-06, | |
| "loss": 0.0529, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.31532846715328466, | |
| "grad_norm": 0.896443247795105, | |
| "learning_rate": 3.6000000000000003e-06, | |
| "loss": 0.0517, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.3197080291970803, | |
| "grad_norm": 1.1364223957061768, | |
| "learning_rate": 3.65e-06, | |
| "loss": 0.0489, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.32408759124087594, | |
| "grad_norm": 1.1319010257720947, | |
| "learning_rate": 3.7e-06, | |
| "loss": 0.0548, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.3284671532846715, | |
| "grad_norm": 0.9694503545761108, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "loss": 0.0525, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.33284671532846716, | |
| "grad_norm": 0.8128111958503723, | |
| "learning_rate": 3.8000000000000005e-06, | |
| "loss": 0.0566, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.3372262773722628, | |
| "grad_norm": 0.9068273901939392, | |
| "learning_rate": 3.85e-06, | |
| "loss": 0.0475, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.3416058394160584, | |
| "grad_norm": 0.9689438343048096, | |
| "learning_rate": 3.900000000000001e-06, | |
| "loss": 0.048, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.345985401459854, | |
| "grad_norm": 0.940131664276123, | |
| "learning_rate": 3.95e-06, | |
| "loss": 0.0567, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.35036496350364965, | |
| "grad_norm": 0.8836082220077515, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.0542, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.35474452554744523, | |
| "grad_norm": 0.9325949549674988, | |
| "learning_rate": 4.05e-06, | |
| "loss": 0.0551, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.35912408759124087, | |
| "grad_norm": 0.8954764008522034, | |
| "learning_rate": 4.1e-06, | |
| "loss": 0.0517, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.3635036496350365, | |
| "grad_norm": 0.6444959044456482, | |
| "learning_rate": 4.15e-06, | |
| "loss": 0.0434, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.3678832116788321, | |
| "grad_norm": 0.9097581505775452, | |
| "learning_rate": 4.2000000000000004e-06, | |
| "loss": 0.0471, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.3722627737226277, | |
| "grad_norm": 0.849006712436676, | |
| "learning_rate": 4.25e-06, | |
| "loss": 0.0529, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.37664233576642336, | |
| "grad_norm": 0.8611392378807068, | |
| "learning_rate": 4.3e-06, | |
| "loss": 0.0513, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.381021897810219, | |
| "grad_norm": 0.7885357737541199, | |
| "learning_rate": 4.350000000000001e-06, | |
| "loss": 0.0523, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.3854014598540146, | |
| "grad_norm": 0.7642116546630859, | |
| "learning_rate": 4.4e-06, | |
| "loss": 0.0407, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.3897810218978102, | |
| "grad_norm": 0.8920945525169373, | |
| "learning_rate": 4.450000000000001e-06, | |
| "loss": 0.0485, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.39416058394160586, | |
| "grad_norm": 0.9801046848297119, | |
| "learning_rate": 4.5e-06, | |
| "loss": 0.0404, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.39854014598540144, | |
| "grad_norm": 1.0874953269958496, | |
| "learning_rate": 4.5500000000000005e-06, | |
| "loss": 0.0588, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.4029197080291971, | |
| "grad_norm": 0.9019029140472412, | |
| "learning_rate": 4.600000000000001e-06, | |
| "loss": 0.0466, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.4072992700729927, | |
| "grad_norm": 0.7258988618850708, | |
| "learning_rate": 4.65e-06, | |
| "loss": 0.0493, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.4116788321167883, | |
| "grad_norm": 1.103407859802246, | |
| "learning_rate": 4.7e-06, | |
| "loss": 0.0495, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.41605839416058393, | |
| "grad_norm": 0.751805305480957, | |
| "learning_rate": 4.75e-06, | |
| "loss": 0.0484, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.42043795620437957, | |
| "grad_norm": 0.7717764973640442, | |
| "learning_rate": 4.800000000000001e-06, | |
| "loss": 0.0447, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.4248175182481752, | |
| "grad_norm": 0.7147190570831299, | |
| "learning_rate": 4.85e-06, | |
| "loss": 0.0523, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.4291970802919708, | |
| "grad_norm": 0.9990110993385315, | |
| "learning_rate": 4.9000000000000005e-06, | |
| "loss": 0.0454, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.4335766423357664, | |
| "grad_norm": 0.7766187191009521, | |
| "learning_rate": 4.95e-06, | |
| "loss": 0.0472, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.43795620437956206, | |
| "grad_norm": 0.7124347686767578, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0473, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.44233576642335765, | |
| "grad_norm": 0.9340270757675171, | |
| "learning_rate": 4.99999232689698e-06, | |
| "loss": 0.0499, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.4467153284671533, | |
| "grad_norm": 0.7429985404014587, | |
| "learning_rate": 4.999969307635021e-06, | |
| "loss": 0.042, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.4510948905109489, | |
| "grad_norm": 0.9131317138671875, | |
| "learning_rate": 4.999930942355425e-06, | |
| "loss": 0.0519, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.4554744525547445, | |
| "grad_norm": 0.9970843195915222, | |
| "learning_rate": 4.999877231293698e-06, | |
| "loss": 0.0428, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.45985401459854014, | |
| "grad_norm": 0.7625145316123962, | |
| "learning_rate": 4.999808174779543e-06, | |
| "loss": 0.0442, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.4642335766423358, | |
| "grad_norm": 0.6059474945068359, | |
| "learning_rate": 4.999723773236865e-06, | |
| "loss": 0.0456, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.4686131386861314, | |
| "grad_norm": 0.6798833608627319, | |
| "learning_rate": 4.999624027183758e-06, | |
| "loss": 0.0408, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.472992700729927, | |
| "grad_norm": 1.0250803232192993, | |
| "learning_rate": 4.999508937232514e-06, | |
| "loss": 0.0471, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.47737226277372263, | |
| "grad_norm": 0.8457198739051819, | |
| "learning_rate": 4.999378504089609e-06, | |
| "loss": 0.0425, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.48175182481751827, | |
| "grad_norm": 0.9417868852615356, | |
| "learning_rate": 4.999232728555705e-06, | |
| "loss": 0.0388, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.48613138686131385, | |
| "grad_norm": 0.8558921813964844, | |
| "learning_rate": 4.999071611525643e-06, | |
| "loss": 0.0423, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.4905109489051095, | |
| "grad_norm": 0.7070104479789734, | |
| "learning_rate": 4.998895153988437e-06, | |
| "loss": 0.0354, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.4948905109489051, | |
| "grad_norm": 0.8162719011306763, | |
| "learning_rate": 4.998703357027268e-06, | |
| "loss": 0.0465, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.4992700729927007, | |
| "grad_norm": 0.9140358567237854, | |
| "learning_rate": 4.998496221819479e-06, | |
| "loss": 0.0457, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.5036496350364964, | |
| "grad_norm": 0.6447531580924988, | |
| "learning_rate": 4.998273749636564e-06, | |
| "loss": 0.039, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.5080291970802919, | |
| "grad_norm": 0.9157156944274902, | |
| "learning_rate": 4.998035941844167e-06, | |
| "loss": 0.0469, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.5124087591240876, | |
| "grad_norm": 0.7706230878829956, | |
| "learning_rate": 4.997782799902065e-06, | |
| "loss": 0.0325, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.5167883211678832, | |
| "grad_norm": 0.9391443729400635, | |
| "learning_rate": 4.997514325364168e-06, | |
| "loss": 0.0397, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.5211678832116788, | |
| "grad_norm": 1.0085054636001587, | |
| "learning_rate": 4.997230519878499e-06, | |
| "loss": 0.0403, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.5255474452554745, | |
| "grad_norm": 1.8318824768066406, | |
| "learning_rate": 4.996931385187195e-06, | |
| "loss": 0.0463, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5299270072992701, | |
| "grad_norm": 1.0216630697250366, | |
| "learning_rate": 4.9966169231264885e-06, | |
| "loss": 0.0406, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.5343065693430656, | |
| "grad_norm": 1.4819082021713257, | |
| "learning_rate": 4.9962871356267e-06, | |
| "loss": 0.0485, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.5386861313868613, | |
| "grad_norm": 0.9435060024261475, | |
| "learning_rate": 4.995942024712222e-06, | |
| "loss": 0.04, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.5430656934306569, | |
| "grad_norm": 0.7887905240058899, | |
| "learning_rate": 4.995581592501514e-06, | |
| "loss": 0.0397, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.5474452554744526, | |
| "grad_norm": 0.8321148753166199, | |
| "learning_rate": 4.995205841207082e-06, | |
| "loss": 0.0413, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.5518248175182482, | |
| "grad_norm": 1.0303553342819214, | |
| "learning_rate": 4.99481477313547e-06, | |
| "loss": 0.0422, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.5562043795620438, | |
| "grad_norm": 0.7056427001953125, | |
| "learning_rate": 4.994408390687241e-06, | |
| "loss": 0.0362, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.5605839416058395, | |
| "grad_norm": 0.9762740135192871, | |
| "learning_rate": 4.993986696356966e-06, | |
| "loss": 0.0385, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.564963503649635, | |
| "grad_norm": 0.9447624683380127, | |
| "learning_rate": 4.9935496927332095e-06, | |
| "loss": 0.0402, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.5693430656934306, | |
| "grad_norm": 0.6106760501861572, | |
| "learning_rate": 4.993097382498511e-06, | |
| "loss": 0.0319, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5737226277372263, | |
| "grad_norm": 1.0554594993591309, | |
| "learning_rate": 4.992629768429367e-06, | |
| "loss": 0.0437, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.5781021897810219, | |
| "grad_norm": 1.066218376159668, | |
| "learning_rate": 4.992146853396219e-06, | |
| "loss": 0.0382, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.5824817518248175, | |
| "grad_norm": 0.7517623901367188, | |
| "learning_rate": 4.991648640363434e-06, | |
| "loss": 0.0317, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.5868613138686132, | |
| "grad_norm": 0.8136976957321167, | |
| "learning_rate": 4.991135132389282e-06, | |
| "loss": 0.0339, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.5912408759124088, | |
| "grad_norm": 0.9254240989685059, | |
| "learning_rate": 4.990606332625923e-06, | |
| "loss": 0.0413, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.5956204379562043, | |
| "grad_norm": 0.6778447031974792, | |
| "learning_rate": 4.990062244319387e-06, | |
| "loss": 0.0377, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 1.1036059856414795, | |
| "learning_rate": 4.989502870809547e-06, | |
| "loss": 0.0376, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.6043795620437956, | |
| "grad_norm": 0.8054158091545105, | |
| "learning_rate": 4.988928215530111e-06, | |
| "loss": 0.0367, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.6087591240875913, | |
| "grad_norm": 0.9227175116539001, | |
| "learning_rate": 4.988338282008588e-06, | |
| "loss": 0.0374, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.6131386861313869, | |
| "grad_norm": 0.8502228260040283, | |
| "learning_rate": 4.9877330738662755e-06, | |
| "loss": 0.0384, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6175182481751825, | |
| "grad_norm": 0.684752881526947, | |
| "learning_rate": 4.987112594818232e-06, | |
| "loss": 0.0366, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.621897810218978, | |
| "grad_norm": 0.7456391453742981, | |
| "learning_rate": 4.9864768486732585e-06, | |
| "loss": 0.037, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.6262773722627737, | |
| "grad_norm": 0.6797431111335754, | |
| "learning_rate": 4.985825839333872e-06, | |
| "loss": 0.0325, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.6306569343065693, | |
| "grad_norm": 0.8098205924034119, | |
| "learning_rate": 4.985159570796279e-06, | |
| "loss": 0.0343, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.635036496350365, | |
| "grad_norm": 0.8089592456817627, | |
| "learning_rate": 4.984478047150361e-06, | |
| "loss": 0.026, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.6394160583941606, | |
| "grad_norm": 0.9282512664794922, | |
| "learning_rate": 4.983781272579637e-06, | |
| "loss": 0.0334, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.6437956204379562, | |
| "grad_norm": 0.802608072757721, | |
| "learning_rate": 4.9830692513612445e-06, | |
| "loss": 0.0259, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.6481751824817519, | |
| "grad_norm": 1.3046361207962036, | |
| "learning_rate": 4.982341987865914e-06, | |
| "loss": 0.045, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.6525547445255474, | |
| "grad_norm": 1.0812411308288574, | |
| "learning_rate": 4.9815994865579405e-06, | |
| "loss": 0.0329, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.656934306569343, | |
| "grad_norm": 0.7856137156486511, | |
| "learning_rate": 4.980841751995155e-06, | |
| "loss": 0.0341, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6613138686131387, | |
| "grad_norm": 1.0517083406448364, | |
| "learning_rate": 4.980068788828897e-06, | |
| "loss": 0.0299, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.6656934306569343, | |
| "grad_norm": 0.6148231029510498, | |
| "learning_rate": 4.979280601803988e-06, | |
| "loss": 0.0304, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.67007299270073, | |
| "grad_norm": 0.7572031021118164, | |
| "learning_rate": 4.9784771957586995e-06, | |
| "loss": 0.0309, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.6744525547445256, | |
| "grad_norm": 2.0948777198791504, | |
| "learning_rate": 4.977658575624727e-06, | |
| "loss": 0.0307, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.6788321167883211, | |
| "grad_norm": 0.624940037727356, | |
| "learning_rate": 4.976824746427153e-06, | |
| "loss": 0.03, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.6832116788321168, | |
| "grad_norm": 0.8346346616744995, | |
| "learning_rate": 4.975975713284426e-06, | |
| "loss": 0.036, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.6875912408759124, | |
| "grad_norm": 0.742098867893219, | |
| "learning_rate": 4.975111481408319e-06, | |
| "loss": 0.0325, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.691970802919708, | |
| "grad_norm": 0.8000304102897644, | |
| "learning_rate": 4.9742320561039055e-06, | |
| "loss": 0.0332, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.6963503649635037, | |
| "grad_norm": 1.063854694366455, | |
| "learning_rate": 4.973337442769523e-06, | |
| "loss": 0.0366, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.7007299270072993, | |
| "grad_norm": 0.965560257434845, | |
| "learning_rate": 4.972427646896738e-06, | |
| "loss": 0.0331, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.7051094890510949, | |
| "grad_norm": 1.5070244073867798, | |
| "learning_rate": 4.971502674070317e-06, | |
| "loss": 0.0446, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.7094890510948905, | |
| "grad_norm": 0.8810545206069946, | |
| "learning_rate": 4.970562529968189e-06, | |
| "loss": 0.0299, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.7138686131386861, | |
| "grad_norm": 0.7683446407318115, | |
| "learning_rate": 4.969607220361414e-06, | |
| "loss": 0.0244, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.7182481751824817, | |
| "grad_norm": 0.7444891929626465, | |
| "learning_rate": 4.968636751114141e-06, | |
| "loss": 0.0338, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.7226277372262774, | |
| "grad_norm": 0.7077688574790955, | |
| "learning_rate": 4.96765112818358e-06, | |
| "loss": 0.0285, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.727007299270073, | |
| "grad_norm": 0.5648500919342041, | |
| "learning_rate": 4.9666503576199574e-06, | |
| "loss": 0.026, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.7313868613138687, | |
| "grad_norm": 0.763556718826294, | |
| "learning_rate": 4.965634445566489e-06, | |
| "loss": 0.0299, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.7357664233576642, | |
| "grad_norm": 0.6892725825309753, | |
| "learning_rate": 4.9646033982593315e-06, | |
| "loss": 0.023, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.7401459854014598, | |
| "grad_norm": 1.0332573652267456, | |
| "learning_rate": 4.963557222027551e-06, | |
| "loss": 0.0313, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.7445255474452555, | |
| "grad_norm": 1.214428424835205, | |
| "learning_rate": 4.962495923293081e-06, | |
| "loss": 0.027, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.7489051094890511, | |
| "grad_norm": 0.9823130965232849, | |
| "learning_rate": 4.961419508570686e-06, | |
| "loss": 0.0231, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.7532846715328467, | |
| "grad_norm": 1.2535115480422974, | |
| "learning_rate": 4.960327984467919e-06, | |
| "loss": 0.0326, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.7576642335766424, | |
| "grad_norm": 0.9383441209793091, | |
| "learning_rate": 4.959221357685081e-06, | |
| "loss": 0.0286, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.762043795620438, | |
| "grad_norm": 1.0426976680755615, | |
| "learning_rate": 4.958099635015182e-06, | |
| "loss": 0.0298, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.7664233576642335, | |
| "grad_norm": 0.9159742593765259, | |
| "learning_rate": 4.956962823343895e-06, | |
| "loss": 0.025, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.7708029197080292, | |
| "grad_norm": 0.8746912479400635, | |
| "learning_rate": 4.95581092964952e-06, | |
| "loss": 0.0299, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.7751824817518248, | |
| "grad_norm": 0.9875199198722839, | |
| "learning_rate": 4.954643961002936e-06, | |
| "loss": 0.0309, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.7795620437956204, | |
| "grad_norm": 0.7389516234397888, | |
| "learning_rate": 4.953461924567559e-06, | |
| "loss": 0.0291, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.7839416058394161, | |
| "grad_norm": 0.790238082408905, | |
| "learning_rate": 4.952264827599299e-06, | |
| "loss": 0.0236, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.7883211678832117, | |
| "grad_norm": 0.6766819953918457, | |
| "learning_rate": 4.951052677446515e-06, | |
| "loss": 0.0238, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7927007299270074, | |
| "grad_norm": 0.8832846283912659, | |
| "learning_rate": 4.94982548154997e-06, | |
| "loss": 0.0259, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.7970802919708029, | |
| "grad_norm": 0.7298055291175842, | |
| "learning_rate": 4.948583247442783e-06, | |
| "loss": 0.023, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.8014598540145985, | |
| "grad_norm": 0.911920428276062, | |
| "learning_rate": 4.947325982750387e-06, | |
| "loss": 0.0272, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.8058394160583942, | |
| "grad_norm": 0.9145316481590271, | |
| "learning_rate": 4.946053695190479e-06, | |
| "loss": 0.0248, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.8102189781021898, | |
| "grad_norm": 0.8759565353393555, | |
| "learning_rate": 4.9447663925729735e-06, | |
| "loss": 0.0263, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.8145985401459854, | |
| "grad_norm": 1.1927592754364014, | |
| "learning_rate": 4.943464082799956e-06, | |
| "loss": 0.0305, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.8189781021897811, | |
| "grad_norm": 0.752566933631897, | |
| "learning_rate": 4.942146773865631e-06, | |
| "loss": 0.0247, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.8233576642335766, | |
| "grad_norm": 1.1121447086334229, | |
| "learning_rate": 4.940814473856278e-06, | |
| "loss": 0.0293, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.8277372262773722, | |
| "grad_norm": 1.0319955348968506, | |
| "learning_rate": 4.939467190950195e-06, | |
| "loss": 0.0247, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.8321167883211679, | |
| "grad_norm": 0.7960589528083801, | |
| "learning_rate": 4.938104933417655e-06, | |
| "loss": 0.0232, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.8364963503649635, | |
| "grad_norm": 0.593197226524353, | |
| "learning_rate": 4.936727709620853e-06, | |
| "loss": 0.0232, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.8408759124087591, | |
| "grad_norm": 0.6710584759712219, | |
| "learning_rate": 4.9353355280138525e-06, | |
| "loss": 0.0278, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.8452554744525548, | |
| "grad_norm": 0.7627159357070923, | |
| "learning_rate": 4.933928397142535e-06, | |
| "loss": 0.0291, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.8496350364963504, | |
| "grad_norm": 0.4998359680175781, | |
| "learning_rate": 4.93250632564455e-06, | |
| "loss": 0.018, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.8540145985401459, | |
| "grad_norm": 0.8028760552406311, | |
| "learning_rate": 4.931069322249258e-06, | |
| "loss": 0.0193, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.8583941605839416, | |
| "grad_norm": 0.6061640977859497, | |
| "learning_rate": 4.929617395777678e-06, | |
| "loss": 0.0142, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.8627737226277372, | |
| "grad_norm": 0.5901748538017273, | |
| "learning_rate": 4.928150555142436e-06, | |
| "loss": 0.0177, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.8671532846715329, | |
| "grad_norm": 0.7800254225730896, | |
| "learning_rate": 4.926668809347707e-06, | |
| "loss": 0.0264, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.8715328467153285, | |
| "grad_norm": 0.9308339357376099, | |
| "learning_rate": 4.925172167489162e-06, | |
| "loss": 0.0247, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.8759124087591241, | |
| "grad_norm": 0.9651213884353638, | |
| "learning_rate": 4.923660638753911e-06, | |
| "loss": 0.0216, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8802919708029197, | |
| "grad_norm": 1.1258251667022705, | |
| "learning_rate": 4.9221342324204455e-06, | |
| "loss": 0.0249, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.8846715328467153, | |
| "grad_norm": 1.0175387859344482, | |
| "learning_rate": 4.9205929578585845e-06, | |
| "loss": 0.0201, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.8890510948905109, | |
| "grad_norm": 1.5190610885620117, | |
| "learning_rate": 4.9190368245294155e-06, | |
| "loss": 0.0319, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.8934306569343066, | |
| "grad_norm": 0.9947767853736877, | |
| "learning_rate": 4.917465841985234e-06, | |
| "loss": 0.0228, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.8978102189781022, | |
| "grad_norm": 0.6416967511177063, | |
| "learning_rate": 4.91588001986949e-06, | |
| "loss": 0.0198, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.9021897810218978, | |
| "grad_norm": 0.6980161666870117, | |
| "learning_rate": 4.914279367916724e-06, | |
| "loss": 0.0172, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.9065693430656935, | |
| "grad_norm": 0.5301483869552612, | |
| "learning_rate": 4.912663895952511e-06, | |
| "loss": 0.0208, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.910948905109489, | |
| "grad_norm": 0.6047857999801636, | |
| "learning_rate": 4.911033613893397e-06, | |
| "loss": 0.0227, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.9153284671532846, | |
| "grad_norm": 0.6069537997245789, | |
| "learning_rate": 4.909388531746837e-06, | |
| "loss": 0.0195, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.9197080291970803, | |
| "grad_norm": 0.6859843730926514, | |
| "learning_rate": 4.907728659611143e-06, | |
| "loss": 0.0244, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.9240875912408759, | |
| "grad_norm": 0.6074005365371704, | |
| "learning_rate": 4.906054007675408e-06, | |
| "loss": 0.0195, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.9284671532846716, | |
| "grad_norm": 1.1983692646026611, | |
| "learning_rate": 4.9043645862194545e-06, | |
| "loss": 0.023, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.9328467153284672, | |
| "grad_norm": 0.8806214928627014, | |
| "learning_rate": 4.902660405613767e-06, | |
| "loss": 0.0243, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.9372262773722628, | |
| "grad_norm": 0.6523962616920471, | |
| "learning_rate": 4.900941476319426e-06, | |
| "loss": 0.016, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.9416058394160584, | |
| "grad_norm": 0.5673899054527283, | |
| "learning_rate": 4.899207808888051e-06, | |
| "loss": 0.0158, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.945985401459854, | |
| "grad_norm": 0.9643133282661438, | |
| "learning_rate": 4.897459413961729e-06, | |
| "loss": 0.0194, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.9503649635036496, | |
| "grad_norm": 0.6007612347602844, | |
| "learning_rate": 4.8956963022729495e-06, | |
| "loss": 0.0187, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.9547445255474453, | |
| "grad_norm": 0.968173623085022, | |
| "learning_rate": 4.893918484644545e-06, | |
| "loss": 0.0223, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.9591240875912409, | |
| "grad_norm": 0.6649457216262817, | |
| "learning_rate": 4.892125971989616e-06, | |
| "loss": 0.0205, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.9635036496350365, | |
| "grad_norm": 0.48259082436561584, | |
| "learning_rate": 4.890318775311471e-06, | |
| "loss": 0.0121, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.9678832116788321, | |
| "grad_norm": 0.8284991383552551, | |
| "learning_rate": 4.888496905703554e-06, | |
| "loss": 0.0176, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.9722627737226277, | |
| "grad_norm": 0.5141683220863342, | |
| "learning_rate": 4.8866603743493805e-06, | |
| "loss": 0.0154, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.9766423357664233, | |
| "grad_norm": 1.0223891735076904, | |
| "learning_rate": 4.884809192522466e-06, | |
| "loss": 0.0111, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.981021897810219, | |
| "grad_norm": 0.844782292842865, | |
| "learning_rate": 4.882943371586256e-06, | |
| "loss": 0.016, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.9854014598540146, | |
| "grad_norm": 0.6978311538696289, | |
| "learning_rate": 4.881062922994061e-06, | |
| "loss": 0.0129, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.9897810218978103, | |
| "grad_norm": 0.8764100074768066, | |
| "learning_rate": 4.879167858288982e-06, | |
| "loss": 0.0213, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.9941605839416059, | |
| "grad_norm": 1.0449023246765137, | |
| "learning_rate": 4.877258189103839e-06, | |
| "loss": 0.015, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.9985401459854014, | |
| "grad_norm": 0.7534664869308472, | |
| "learning_rate": 4.875333927161104e-06, | |
| "loss": 0.0144, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.7534664869308472, | |
| "learning_rate": 4.8733950842728236e-06, | |
| "loss": 0.0186, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.0043795620437956, | |
| "grad_norm": 1.4982736110687256, | |
| "learning_rate": 4.871441672340551e-06, | |
| "loss": 0.0126, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.0087591240875913, | |
| "grad_norm": 1.206292986869812, | |
| "learning_rate": 4.869473703355273e-06, | |
| "loss": 0.0165, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.013138686131387, | |
| "grad_norm": 0.4586186408996582, | |
| "learning_rate": 4.867491189397331e-06, | |
| "loss": 0.0089, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.0175182481751825, | |
| "grad_norm": 0.5647240281105042, | |
| "learning_rate": 4.8654941426363525e-06, | |
| "loss": 0.0122, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.0218978102189782, | |
| "grad_norm": 0.6478530764579773, | |
| "learning_rate": 4.863482575331173e-06, | |
| "loss": 0.012, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.0262773722627738, | |
| "grad_norm": 0.48696213960647583, | |
| "learning_rate": 4.861456499829764e-06, | |
| "loss": 0.0092, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.0306569343065692, | |
| "grad_norm": 0.6736640334129333, | |
| "learning_rate": 4.859415928569154e-06, | |
| "loss": 0.0149, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.0350364963503649, | |
| "grad_norm": 0.6518754363059998, | |
| "learning_rate": 4.857360874075355e-06, | |
| "loss": 0.0085, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.0394160583941605, | |
| "grad_norm": 0.5145443677902222, | |
| "learning_rate": 4.855291348963281e-06, | |
| "loss": 0.0102, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.0437956204379562, | |
| "grad_norm": 0.5647151470184326, | |
| "learning_rate": 4.853207365936676e-06, | |
| "loss": 0.0065, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.0481751824817518, | |
| "grad_norm": 0.46668219566345215, | |
| "learning_rate": 4.8511089377880334e-06, | |
| "loss": 0.0081, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.0525547445255474, | |
| "grad_norm": 0.9103809595108032, | |
| "learning_rate": 4.848996077398518e-06, | |
| "loss": 0.0107, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.056934306569343, | |
| "grad_norm": 0.5947101712226868, | |
| "learning_rate": 4.8468687977378855e-06, | |
| "loss": 0.0095, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.0613138686131387, | |
| "grad_norm": 0.7154219150543213, | |
| "learning_rate": 4.844727111864405e-06, | |
| "loss": 0.0097, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.0656934306569343, | |
| "grad_norm": 0.9023681282997131, | |
| "learning_rate": 4.842571032924778e-06, | |
| "loss": 0.0105, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.07007299270073, | |
| "grad_norm": 0.6020027995109558, | |
| "learning_rate": 4.840400574154056e-06, | |
| "loss": 0.0065, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.0744525547445256, | |
| "grad_norm": 0.7602945566177368, | |
| "learning_rate": 4.838215748875562e-06, | |
| "loss": 0.0121, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.0788321167883212, | |
| "grad_norm": 0.8768120408058167, | |
| "learning_rate": 4.83601657050081e-06, | |
| "loss": 0.0146, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.0832116788321169, | |
| "grad_norm": 0.7482877373695374, | |
| "learning_rate": 4.833803052529414e-06, | |
| "loss": 0.0076, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.0875912408759123, | |
| "grad_norm": 0.4619101881980896, | |
| "learning_rate": 4.831575208549018e-06, | |
| "loss": 0.0114, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.091970802919708, | |
| "grad_norm": 0.7442188262939453, | |
| "learning_rate": 4.829333052235202e-06, | |
| "loss": 0.0119, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.0963503649635036, | |
| "grad_norm": 0.754559338092804, | |
| "learning_rate": 4.827076597351403e-06, | |
| "loss": 0.011, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.1007299270072992, | |
| "grad_norm": 0.8147054314613342, | |
| "learning_rate": 4.824805857748831e-06, | |
| "loss": 0.0098, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.1051094890510949, | |
| "grad_norm": 0.814437985420227, | |
| "learning_rate": 4.82252084736638e-06, | |
| "loss": 0.0077, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.1094890510948905, | |
| "grad_norm": 0.7731255888938904, | |
| "learning_rate": 4.820221580230545e-06, | |
| "loss": 0.0129, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.1138686131386861, | |
| "grad_norm": 0.7589200139045715, | |
| "learning_rate": 4.8179080704553386e-06, | |
| "loss": 0.0095, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.1182481751824818, | |
| "grad_norm": 0.455625057220459, | |
| "learning_rate": 4.815580332242199e-06, | |
| "loss": 0.0088, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.1226277372262774, | |
| "grad_norm": 0.51591956615448, | |
| "learning_rate": 4.8132383798799075e-06, | |
| "loss": 0.0071, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.127007299270073, | |
| "grad_norm": 0.6024675965309143, | |
| "learning_rate": 4.810882227744495e-06, | |
| "loss": 0.0108, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.1313868613138687, | |
| "grad_norm": 0.6686123609542847, | |
| "learning_rate": 4.808511890299163e-06, | |
| "loss": 0.0139, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.1357664233576643, | |
| "grad_norm": 0.7872790694236755, | |
| "learning_rate": 4.806127382094184e-06, | |
| "loss": 0.0113, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.14014598540146, | |
| "grad_norm": 0.6551967263221741, | |
| "learning_rate": 4.803728717766822e-06, | |
| "loss": 0.0069, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.1445255474452556, | |
| "grad_norm": 0.7421084642410278, | |
| "learning_rate": 4.801315912041232e-06, | |
| "loss": 0.0083, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.148905109489051, | |
| "grad_norm": 0.6349561810493469, | |
| "learning_rate": 4.798888979728382e-06, | |
| "loss": 0.0097, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.1532846715328466, | |
| "grad_norm": 0.6274579167366028, | |
| "learning_rate": 4.796447935725954e-06, | |
| "loss": 0.0089, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.1576642335766423, | |
| "grad_norm": 0.5055127739906311, | |
| "learning_rate": 4.793992795018253e-06, | |
| "loss": 0.0062, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.162043795620438, | |
| "grad_norm": 1.1284935474395752, | |
| "learning_rate": 4.791523572676115e-06, | |
| "loss": 0.0118, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.1664233576642336, | |
| "grad_norm": 0.6343486905097961, | |
| "learning_rate": 4.789040283856822e-06, | |
| "loss": 0.0058, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.1708029197080292, | |
| "grad_norm": 0.9384168982505798, | |
| "learning_rate": 4.7865429438039955e-06, | |
| "loss": 0.0096, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.1751824817518248, | |
| "grad_norm": 0.879307746887207, | |
| "learning_rate": 4.784031567847515e-06, | |
| "loss": 0.0129, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.1795620437956205, | |
| "grad_norm": 0.5268783569335938, | |
| "learning_rate": 4.781506171403416e-06, | |
| "loss": 0.0073, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.183941605839416, | |
| "grad_norm": 1.332766056060791, | |
| "learning_rate": 4.778966769973802e-06, | |
| "loss": 0.0129, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.1883211678832117, | |
| "grad_norm": 0.7192438244819641, | |
| "learning_rate": 4.7764133791467434e-06, | |
| "loss": 0.0083, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.1927007299270074, | |
| "grad_norm": 0.5047981142997742, | |
| "learning_rate": 4.773846014596185e-06, | |
| "loss": 0.0057, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.197080291970803, | |
| "grad_norm": 0.5075733661651611, | |
| "learning_rate": 4.7712646920818486e-06, | |
| "loss": 0.0098, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.2014598540145984, | |
| "grad_norm": 0.5874909162521362, | |
| "learning_rate": 4.7686694274491375e-06, | |
| "loss": 0.0072, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.205839416058394, | |
| "grad_norm": 0.511114239692688, | |
| "learning_rate": 4.766060236629037e-06, | |
| "loss": 0.0058, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.2102189781021897, | |
| "grad_norm": 0.5427272915840149, | |
| "learning_rate": 4.763437135638021e-06, | |
| "loss": 0.0094, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.2145985401459853, | |
| "grad_norm": 0.6207345724105835, | |
| "learning_rate": 4.760800140577947e-06, | |
| "loss": 0.0117, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.218978102189781, | |
| "grad_norm": 0.9132710695266724, | |
| "learning_rate": 4.758149267635963e-06, | |
| "loss": 0.0085, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.2233576642335766, | |
| "grad_norm": 0.500217080116272, | |
| "learning_rate": 4.755484533084407e-06, | |
| "loss": 0.01, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.2277372262773723, | |
| "grad_norm": 0.38535866141319275, | |
| "learning_rate": 4.7528059532807045e-06, | |
| "loss": 0.0038, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.2321167883211679, | |
| "grad_norm": 0.5505772233009338, | |
| "learning_rate": 4.750113544667271e-06, | |
| "loss": 0.0064, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.2364963503649635, | |
| "grad_norm": 0.5370091795921326, | |
| "learning_rate": 4.747407323771408e-06, | |
| "loss": 0.0083, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.2408759124087592, | |
| "grad_norm": 0.6680497527122498, | |
| "learning_rate": 4.744687307205207e-06, | |
| "loss": 0.006, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.2452554744525548, | |
| "grad_norm": 0.5799117088317871, | |
| "learning_rate": 4.74195351166544e-06, | |
| "loss": 0.0067, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.2496350364963504, | |
| "grad_norm": 0.3809143304824829, | |
| "learning_rate": 4.739205953933464e-06, | |
| "loss": 0.0081, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.254014598540146, | |
| "grad_norm": 0.8633838891983032, | |
| "learning_rate": 4.736444650875114e-06, | |
| "loss": 0.0083, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.2583941605839417, | |
| "grad_norm": 0.4796256124973297, | |
| "learning_rate": 4.7336696194405995e-06, | |
| "loss": 0.0083, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.2627737226277373, | |
| "grad_norm": 0.8990418314933777, | |
| "learning_rate": 4.730880876664402e-06, | |
| "loss": 0.0053, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.2671532846715328, | |
| "grad_norm": 0.21372799575328827, | |
| "learning_rate": 4.72807843966517e-06, | |
| "loss": 0.0042, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.2715328467153284, | |
| "grad_norm": 1.0377510786056519, | |
| "learning_rate": 4.725262325645615e-06, | |
| "loss": 0.0083, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.275912408759124, | |
| "grad_norm": 0.29527121782302856, | |
| "learning_rate": 4.722432551892402e-06, | |
| "loss": 0.0023, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.2802919708029197, | |
| "grad_norm": 1.5753306150436401, | |
| "learning_rate": 4.719589135776048e-06, | |
| "loss": 0.0089, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.2846715328467153, | |
| "grad_norm": 0.3794252574443817, | |
| "learning_rate": 4.716732094750813e-06, | |
| "loss": 0.003, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.289051094890511, | |
| "grad_norm": 0.5407822132110596, | |
| "learning_rate": 4.7138614463545926e-06, | |
| "loss": 0.0075, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.2934306569343066, | |
| "grad_norm": 0.8722830414772034, | |
| "learning_rate": 4.710977208208812e-06, | |
| "loss": 0.0051, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.2978102189781022, | |
| "grad_norm": 0.6819527745246887, | |
| "learning_rate": 4.708079398018316e-06, | |
| "loss": 0.0094, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.3021897810218979, | |
| "grad_norm": 0.7198041677474976, | |
| "learning_rate": 4.7051680335712626e-06, | |
| "loss": 0.0068, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.3065693430656935, | |
| "grad_norm": 0.467638224363327, | |
| "learning_rate": 4.70224313273901e-06, | |
| "loss": 0.0059, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.310948905109489, | |
| "grad_norm": 0.4593437612056732, | |
| "learning_rate": 4.699304713476009e-06, | |
| "loss": 0.0039, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.3153284671532846, | |
| "grad_norm": 0.5790926814079285, | |
| "learning_rate": 4.696352793819698e-06, | |
| "loss": 0.0057, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.3197080291970802, | |
| "grad_norm": 0.3413192331790924, | |
| "learning_rate": 4.693387391890382e-06, | |
| "loss": 0.0055, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.3240875912408758, | |
| "grad_norm": 0.5049291849136353, | |
| "learning_rate": 4.690408525891129e-06, | |
| "loss": 0.0061, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.3284671532846715, | |
| "grad_norm": 0.25111323595046997, | |
| "learning_rate": 4.687416214107655e-06, | |
| "loss": 0.0041, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.332846715328467, | |
| "grad_norm": 0.5559152364730835, | |
| "learning_rate": 4.684410474908214e-06, | |
| "loss": 0.0093, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.3372262773722627, | |
| "grad_norm": 0.3842668831348419, | |
| "learning_rate": 4.681391326743484e-06, | |
| "loss": 0.0051, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.3416058394160584, | |
| "grad_norm": 1.6264209747314453, | |
| "learning_rate": 4.67835878814645e-06, | |
| "loss": 0.0063, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.345985401459854, | |
| "grad_norm": 0.5829497575759888, | |
| "learning_rate": 4.6753128777323e-06, | |
| "loss": 0.0054, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.3503649635036497, | |
| "grad_norm": 0.6949307322502136, | |
| "learning_rate": 4.6722536141982995e-06, | |
| "loss": 0.0055, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.3547445255474453, | |
| "grad_norm": 0.6198911070823669, | |
| "learning_rate": 4.669181016323686e-06, | |
| "loss": 0.0063, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.359124087591241, | |
| "grad_norm": 0.4557003080844879, | |
| "learning_rate": 4.666095102969545e-06, | |
| "loss": 0.0053, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.3635036496350366, | |
| "grad_norm": 0.7198585271835327, | |
| "learning_rate": 4.662995893078702e-06, | |
| "loss": 0.0048, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.3678832116788322, | |
| "grad_norm": 0.4380558431148529, | |
| "learning_rate": 4.659883405675604e-06, | |
| "loss": 0.0057, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.3722627737226278, | |
| "grad_norm": 0.986754298210144, | |
| "learning_rate": 4.656757659866199e-06, | |
| "loss": 0.0091, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.3766423357664235, | |
| "grad_norm": 1.1282256841659546, | |
| "learning_rate": 4.6536186748378236e-06, | |
| "loss": 0.0058, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.3810218978102191, | |
| "grad_norm": 0.3973119854927063, | |
| "learning_rate": 4.6504664698590795e-06, | |
| "loss": 0.0048, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.3854014598540145, | |
| "grad_norm": 0.4406156837940216, | |
| "learning_rate": 4.647301064279725e-06, | |
| "loss": 0.0039, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.3897810218978102, | |
| "grad_norm": 0.8249232172966003, | |
| "learning_rate": 4.644122477530545e-06, | |
| "loss": 0.0084, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.3941605839416058, | |
| "grad_norm": 1.4877322912216187, | |
| "learning_rate": 4.640930729123237e-06, | |
| "loss": 0.0054, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.3985401459854014, | |
| "grad_norm": 0.4890510141849518, | |
| "learning_rate": 4.6377258386502956e-06, | |
| "loss": 0.0021, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.402919708029197, | |
| "grad_norm": 0.36471042037010193, | |
| "learning_rate": 4.634507825784882e-06, | |
| "loss": 0.004, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.4072992700729927, | |
| "grad_norm": 1.1714568138122559, | |
| "learning_rate": 4.631276710280713e-06, | |
| "loss": 0.0079, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.4116788321167884, | |
| "grad_norm": 0.509325385093689, | |
| "learning_rate": 4.628032511971934e-06, | |
| "loss": 0.0027, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.416058394160584, | |
| "grad_norm": 0.34730231761932373, | |
| "learning_rate": 4.624775250772999e-06, | |
| "loss": 0.004, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.4204379562043796, | |
| "grad_norm": 0.4304009974002838, | |
| "learning_rate": 4.6215049466785484e-06, | |
| "loss": 0.0046, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.4248175182481753, | |
| "grad_norm": 0.721092700958252, | |
| "learning_rate": 4.618221619763287e-06, | |
| "loss": 0.0042, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.4291970802919707, | |
| "grad_norm": 0.9019221067428589, | |
| "learning_rate": 4.6149252901818585e-06, | |
| "loss": 0.008, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.4335766423357663, | |
| "grad_norm": 3.142669439315796, | |
| "learning_rate": 4.611615978168725e-06, | |
| "loss": 0.0053, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.437956204379562, | |
| "grad_norm": 0.8218545317649841, | |
| "learning_rate": 4.608293704038039e-06, | |
| "loss": 0.007, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.4423357664233576, | |
| "grad_norm": 0.49122154712677, | |
| "learning_rate": 4.604958488183523e-06, | |
| "loss": 0.0056, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.4467153284671532, | |
| "grad_norm": 0.7947913408279419, | |
| "learning_rate": 4.6016103510783405e-06, | |
| "loss": 0.0069, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.4510948905109489, | |
| "grad_norm": 0.38262632489204407, | |
| "learning_rate": 4.598249313274972e-06, | |
| "loss": 0.0054, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.4554744525547445, | |
| "grad_norm": 0.7605669498443604, | |
| "learning_rate": 4.59487539540509e-06, | |
| "loss": 0.0074, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.4598540145985401, | |
| "grad_norm": 0.4355056583881378, | |
| "learning_rate": 4.591488618179428e-06, | |
| "loss": 0.0027, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.4642335766423358, | |
| "grad_norm": 0.4696539640426636, | |
| "learning_rate": 4.58808900238766e-06, | |
| "loss": 0.0063, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.4686131386861314, | |
| "grad_norm": 0.4078298807144165, | |
| "learning_rate": 4.584676568898267e-06, | |
| "loss": 0.0039, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.472992700729927, | |
| "grad_norm": 0.22500784695148468, | |
| "learning_rate": 4.581251338658412e-06, | |
| "loss": 0.0027, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.4773722627737227, | |
| "grad_norm": 0.28224533796310425, | |
| "learning_rate": 4.577813332693812e-06, | |
| "loss": 0.0037, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.4817518248175183, | |
| "grad_norm": 0.4234824478626251, | |
| "learning_rate": 4.574362572108604e-06, | |
| "loss": 0.0057, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.486131386861314, | |
| "grad_norm": 0.4610466957092285, | |
| "learning_rate": 4.570899078085223e-06, | |
| "loss": 0.0033, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.4905109489051096, | |
| "grad_norm": 0.8538670539855957, | |
| "learning_rate": 4.567422871884265e-06, | |
| "loss": 0.0044, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.4948905109489052, | |
| "grad_norm": 0.4335832893848419, | |
| "learning_rate": 4.563933974844361e-06, | |
| "loss": 0.0041, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.4992700729927007, | |
| "grad_norm": 0.4888335168361664, | |
| "learning_rate": 4.560432408382045e-06, | |
| "loss": 0.003, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.5036496350364965, | |
| "grad_norm": 0.545806884765625, | |
| "learning_rate": 4.5569181939916195e-06, | |
| "loss": 0.0062, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.508029197080292, | |
| "grad_norm": 0.7364339828491211, | |
| "learning_rate": 4.553391353245029e-06, | |
| "loss": 0.0068, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.5124087591240876, | |
| "grad_norm": 0.7074061036109924, | |
| "learning_rate": 4.549851907791722e-06, | |
| "loss": 0.0034, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.5167883211678832, | |
| "grad_norm": 0.39756596088409424, | |
| "learning_rate": 4.546299879358524e-06, | |
| "loss": 0.0032, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.5211678832116788, | |
| "grad_norm": 0.6966583728790283, | |
| "learning_rate": 4.542735289749498e-06, | |
| "loss": 0.0013, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.5255474452554745, | |
| "grad_norm": 0.19892163574695587, | |
| "learning_rate": 4.5391581608458144e-06, | |
| "loss": 0.0011, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.5299270072992701, | |
| "grad_norm": 0.718493640422821, | |
| "learning_rate": 4.535568514605617e-06, | |
| "loss": 0.0026, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.5343065693430655, | |
| "grad_norm": 0.8941331505775452, | |
| "learning_rate": 4.5319663730638865e-06, | |
| "loss": 0.0034, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.5386861313868612, | |
| "grad_norm": 0.33956244587898254, | |
| "learning_rate": 4.528351758332303e-06, | |
| "loss": 0.002, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.5430656934306568, | |
| "grad_norm": 0.557651937007904, | |
| "learning_rate": 4.5247246925991185e-06, | |
| "loss": 0.0013, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.5474452554744524, | |
| "grad_norm": 0.7165636420249939, | |
| "learning_rate": 4.5210851981290096e-06, | |
| "loss": 0.003, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.551824817518248, | |
| "grad_norm": 0.36456218361854553, | |
| "learning_rate": 4.5174332972629505e-06, | |
| "loss": 0.0022, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.5562043795620437, | |
| "grad_norm": 0.1896594613790512, | |
| "learning_rate": 4.5137690124180714e-06, | |
| "loss": 0.0056, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.5605839416058394, | |
| "grad_norm": 0.6159863471984863, | |
| "learning_rate": 4.510092366087518e-06, | |
| "loss": 0.0057, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.564963503649635, | |
| "grad_norm": 1.0295354127883911, | |
| "learning_rate": 4.506403380840321e-06, | |
| "loss": 0.0011, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.5693430656934306, | |
| "grad_norm": 0.33694684505462646, | |
| "learning_rate": 4.50270207932125e-06, | |
| "loss": 0.0024, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.5737226277372263, | |
| "grad_norm": 0.8961917757987976, | |
| "learning_rate": 4.498988484250681e-06, | |
| "loss": 0.0058, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.578102189781022, | |
| "grad_norm": 1.736559510231018, | |
| "learning_rate": 4.4952626184244504e-06, | |
| "loss": 0.006, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.5824817518248175, | |
| "grad_norm": 0.41748425364494324, | |
| "learning_rate": 4.491524504713722e-06, | |
| "loss": 0.0017, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.5868613138686132, | |
| "grad_norm": 0.501815140247345, | |
| "learning_rate": 4.487774166064839e-06, | |
| "loss": 0.0018, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.5912408759124088, | |
| "grad_norm": 0.4359874427318573, | |
| "learning_rate": 4.48401162549919e-06, | |
| "loss": 0.0044, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.5956204379562045, | |
| "grad_norm": 0.3699054718017578, | |
| "learning_rate": 4.480236906113066e-06, | |
| "loss": 0.0036, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.5684164762496948, | |
| "learning_rate": 4.476450031077512e-06, | |
| "loss": 0.0023, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.6043795620437957, | |
| "grad_norm": 0.6451728343963623, | |
| "learning_rate": 4.4726510236381956e-06, | |
| "loss": 0.0044, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.6087591240875914, | |
| "grad_norm": 2.3887782096862793, | |
| "learning_rate": 4.468839907115259e-06, | |
| "loss": 0.0059, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.613138686131387, | |
| "grad_norm": 0.6304333806037903, | |
| "learning_rate": 4.465016704903171e-06, | |
| "loss": 0.0023, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.6175182481751826, | |
| "grad_norm": 0.38788676261901855, | |
| "learning_rate": 4.461181440470592e-06, | |
| "loss": 0.0027, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.621897810218978, | |
| "grad_norm": 0.3805489242076874, | |
| "learning_rate": 4.457334137360226e-06, | |
| "loss": 0.0012, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.6262773722627737, | |
| "grad_norm": 0.3548617660999298, | |
| "learning_rate": 4.453474819188676e-06, | |
| "loss": 0.0032, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.6306569343065693, | |
| "grad_norm": 0.8332701921463013, | |
| "learning_rate": 4.449603509646297e-06, | |
| "loss": 0.0028, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.635036496350365, | |
| "grad_norm": 0.7843290567398071, | |
| "learning_rate": 4.445720232497055e-06, | |
| "loss": 0.0037, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.6394160583941606, | |
| "grad_norm": 0.7074784636497498, | |
| "learning_rate": 4.44182501157838e-06, | |
| "loss": 0.003, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.6437956204379562, | |
| "grad_norm": 0.6076835989952087, | |
| "learning_rate": 4.4379178708010155e-06, | |
| "loss": 0.0019, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.6481751824817519, | |
| "grad_norm": 0.5793138146400452, | |
| "learning_rate": 4.433998834148877e-06, | |
| "loss": 0.0036, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.6525547445255473, | |
| "grad_norm": 0.7881670594215393, | |
| "learning_rate": 4.430067925678902e-06, | |
| "loss": 0.0025, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.656934306569343, | |
| "grad_norm": 0.24594959616661072, | |
| "learning_rate": 4.426125169520903e-06, | |
| "loss": 0.0022, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.6613138686131386, | |
| "grad_norm": 0.2806392312049866, | |
| "learning_rate": 4.42217058987742e-06, | |
| "loss": 0.0005, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.6656934306569342, | |
| "grad_norm": 0.4979081153869629, | |
| "learning_rate": 4.418204211023569e-06, | |
| "loss": 0.0021, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.6700729927007298, | |
| "grad_norm": 0.42502567172050476, | |
| "learning_rate": 4.4142260573068995e-06, | |
| "loss": 0.0053, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.6744525547445255, | |
| "grad_norm": 1.1811860799789429, | |
| "learning_rate": 4.410236153147235e-06, | |
| "loss": 0.0026, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.6788321167883211, | |
| "grad_norm": 0.4582519829273224, | |
| "learning_rate": 4.4062345230365345e-06, | |
| "loss": 0.0024, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.6832116788321168, | |
| "grad_norm": 0.30464282631874084, | |
| "learning_rate": 4.402221191538733e-06, | |
| "loss": 0.0055, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.6875912408759124, | |
| "grad_norm": 0.22526738047599792, | |
| "learning_rate": 4.3981961832895945e-06, | |
| "loss": 0.0003, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.691970802919708, | |
| "grad_norm": 0.32826468348503113, | |
| "learning_rate": 4.394159522996564e-06, | |
| "loss": 0.0009, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.6963503649635037, | |
| "grad_norm": 0.5943058133125305, | |
| "learning_rate": 4.390111235438606e-06, | |
| "loss": 0.0028, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.7007299270072993, | |
| "grad_norm": 1.7098802328109741, | |
| "learning_rate": 4.3860513454660666e-06, | |
| "loss": 0.0035, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.705109489051095, | |
| "grad_norm": 0.36092230677604675, | |
| "learning_rate": 4.381979878000506e-06, | |
| "loss": 0.0037, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.7094890510948906, | |
| "grad_norm": 0.2771202027797699, | |
| "learning_rate": 4.377896858034557e-06, | |
| "loss": 0.0018, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.7138686131386862, | |
| "grad_norm": 0.12323533743619919, | |
| "learning_rate": 4.373802310631765e-06, | |
| "loss": 0.0008, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.7182481751824819, | |
| "grad_norm": 0.19630667567253113, | |
| "learning_rate": 4.3696962609264375e-06, | |
| "loss": 0.0008, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.7226277372262775, | |
| "grad_norm": 0.4139691889286041, | |
| "learning_rate": 4.365578734123489e-06, | |
| "loss": 0.0031, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.7270072992700731, | |
| "grad_norm": 0.6594070196151733, | |
| "learning_rate": 4.3614497554982845e-06, | |
| "loss": 0.0044, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.7313868613138688, | |
| "grad_norm": 0.2723977863788605, | |
| "learning_rate": 4.357309350396488e-06, | |
| "loss": 0.0018, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.7357664233576642, | |
| "grad_norm": 0.16032417118549347, | |
| "learning_rate": 4.3531575442339025e-06, | |
| "loss": 0.0005, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.7401459854014598, | |
| "grad_norm": 0.3799298107624054, | |
| "learning_rate": 4.348994362496316e-06, | |
| "loss": 0.006, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.7445255474452555, | |
| "grad_norm": 0.28333285450935364, | |
| "learning_rate": 4.344819830739349e-06, | |
| "loss": 0.0015, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.748905109489051, | |
| "grad_norm": 0.3942627012729645, | |
| "learning_rate": 4.34063397458829e-06, | |
| "loss": 0.0018, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.7532846715328467, | |
| "grad_norm": 0.8048702478408813, | |
| "learning_rate": 4.336436819737942e-06, | |
| "loss": 0.0021, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.7576642335766424, | |
| "grad_norm": 0.1157551184296608, | |
| "learning_rate": 4.332228391952469e-06, | |
| "loss": 0.0009, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.762043795620438, | |
| "grad_norm": 0.18697626888751984, | |
| "learning_rate": 4.328008717065228e-06, | |
| "loss": 0.0031, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.7664233576642334, | |
| "grad_norm": 0.6587929129600525, | |
| "learning_rate": 4.323777820978622e-06, | |
| "loss": 0.0011, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.770802919708029, | |
| "grad_norm": 0.40322232246398926, | |
| "learning_rate": 4.319535729663929e-06, | |
| "loss": 0.0013, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.7751824817518247, | |
| "grad_norm": 0.33533793687820435, | |
| "learning_rate": 4.315282469161156e-06, | |
| "loss": 0.0008, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.7795620437956203, | |
| "grad_norm": 0.2024499624967575, | |
| "learning_rate": 4.3110180655788645e-06, | |
| "loss": 0.0022, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.783941605839416, | |
| "grad_norm": 0.5895872116088867, | |
| "learning_rate": 4.306742545094022e-06, | |
| "loss": 0.0019, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.7883211678832116, | |
| "grad_norm": 0.3792962431907654, | |
| "learning_rate": 4.3024559339518355e-06, | |
| "loss": 0.0017, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.7927007299270072, | |
| "grad_norm": 0.7945428490638733, | |
| "learning_rate": 4.298158258465593e-06, | |
| "loss": 0.0027, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.7970802919708029, | |
| "grad_norm": 0.37964075803756714, | |
| "learning_rate": 4.2938495450164984e-06, | |
| "loss": 0.0014, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.8014598540145985, | |
| "grad_norm": 0.08326616883277893, | |
| "learning_rate": 4.289529820053515e-06, | |
| "loss": 0.0005, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.8058394160583942, | |
| "grad_norm": 0.14445550739765167, | |
| "learning_rate": 4.285199110093198e-06, | |
| "loss": 0.0021, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.8102189781021898, | |
| "grad_norm": 0.24620558321475983, | |
| "learning_rate": 4.280857441719533e-06, | |
| "loss": 0.0007, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.8145985401459854, | |
| "grad_norm": 0.2617506980895996, | |
| "learning_rate": 4.276504841583778e-06, | |
| "loss": 0.0011, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.818978102189781, | |
| "grad_norm": 0.22467154264450073, | |
| "learning_rate": 4.27214133640429e-06, | |
| "loss": 0.0006, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.8233576642335767, | |
| "grad_norm": 0.25831958651542664, | |
| "learning_rate": 4.267766952966369e-06, | |
| "loss": 0.0029, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.8277372262773723, | |
| "grad_norm": 0.30368125438690186, | |
| "learning_rate": 4.263381718122092e-06, | |
| "loss": 0.0016, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.832116788321168, | |
| "grad_norm": 0.6697282195091248, | |
| "learning_rate": 4.258985658790144e-06, | |
| "loss": 0.0044, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.8364963503649636, | |
| "grad_norm": 0.4912242293357849, | |
| "learning_rate": 4.25457880195566e-06, | |
| "loss": 0.0014, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.8408759124087593, | |
| "grad_norm": 0.17477519810199738, | |
| "learning_rate": 4.2501611746700526e-06, | |
| "loss": 0.0002, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.845255474452555, | |
| "grad_norm": 0.09962823987007141, | |
| "learning_rate": 4.245732804050848e-06, | |
| "loss": 0.0009, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.8496350364963505, | |
| "grad_norm": 0.5256549119949341, | |
| "learning_rate": 4.241293717281523e-06, | |
| "loss": 0.0005, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.854014598540146, | |
| "grad_norm": 0.1596180498600006, | |
| "learning_rate": 4.236843941611332e-06, | |
| "loss": 0.001, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.8583941605839416, | |
| "grad_norm": 0.3437536656856537, | |
| "learning_rate": 4.232383504355147e-06, | |
| "loss": 0.002, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.8627737226277372, | |
| "grad_norm": 0.32742857933044434, | |
| "learning_rate": 4.227912432893282e-06, | |
| "loss": 0.0018, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.8671532846715329, | |
| "grad_norm": 0.5527262091636658, | |
| "learning_rate": 4.223430754671331e-06, | |
| "loss": 0.0004, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.8715328467153285, | |
| "grad_norm": 0.11191878467798233, | |
| "learning_rate": 4.218938497199996e-06, | |
| "loss": 0.0003, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.8759124087591241, | |
| "grad_norm": 0.09846347570419312, | |
| "learning_rate": 4.214435688054922e-06, | |
| "loss": 0.0004, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.8802919708029195, | |
| "grad_norm": 0.16991831362247467, | |
| "learning_rate": 4.209922354876523e-06, | |
| "loss": 0.0008, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.8846715328467152, | |
| "grad_norm": 0.126469686627388, | |
| "learning_rate": 4.2053985253698155e-06, | |
| "loss": 0.0004, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.8890510948905108, | |
| "grad_norm": 0.3232942521572113, | |
| "learning_rate": 4.200864227304247e-06, | |
| "loss": 0.0022, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.8934306569343065, | |
| "grad_norm": 0.3737439811229706, | |
| "learning_rate": 4.196319488513528e-06, | |
| "loss": 0.0017, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.897810218978102, | |
| "grad_norm": 0.14488628506660461, | |
| "learning_rate": 4.191764336895455e-06, | |
| "loss": 0.0002, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.9021897810218977, | |
| "grad_norm": 0.16040323674678802, | |
| "learning_rate": 4.187198800411748e-06, | |
| "loss": 0.0005, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.9065693430656934, | |
| "grad_norm": 0.19812235236167908, | |
| "learning_rate": 4.182622907087872e-06, | |
| "loss": 0.0002, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.910948905109489, | |
| "grad_norm": 0.059883181005716324, | |
| "learning_rate": 4.178036685012869e-06, | |
| "loss": 0.0005, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.9153284671532846, | |
| "grad_norm": 0.2905563414096832, | |
| "learning_rate": 4.1734401623391794e-06, | |
| "loss": 0.001, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.9197080291970803, | |
| "grad_norm": 0.38163650035858154, | |
| "learning_rate": 4.168833367282479e-06, | |
| "loss": 0.0007, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.924087591240876, | |
| "grad_norm": 0.04273957014083862, | |
| "learning_rate": 4.164216328121499e-06, | |
| "loss": 0.0001, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.9284671532846716, | |
| "grad_norm": 0.02129952795803547, | |
| "learning_rate": 4.15958907319785e-06, | |
| "loss": 0.0001, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.9328467153284672, | |
| "grad_norm": 0.016533153131604195, | |
| "learning_rate": 4.154951630915859e-06, | |
| "loss": 0.0001, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.9372262773722628, | |
| "grad_norm": 0.11019770801067352, | |
| "learning_rate": 4.150304029742381e-06, | |
| "loss": 0.0002, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.9416058394160585, | |
| "grad_norm": 0.05474651977419853, | |
| "learning_rate": 4.145646298206636e-06, | |
| "loss": 0.0002, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.945985401459854, | |
| "grad_norm": 0.10142989456653595, | |
| "learning_rate": 4.1409784649000255e-06, | |
| "loss": 0.0001, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.9503649635036497, | |
| "grad_norm": 0.21639519929885864, | |
| "learning_rate": 4.136300558475962e-06, | |
| "loss": 0.0019, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.9547445255474454, | |
| "grad_norm": 0.45263969898223877, | |
| "learning_rate": 4.131612607649694e-06, | |
| "loss": 0.0034, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.959124087591241, | |
| "grad_norm": 0.38673898577690125, | |
| "learning_rate": 4.126914641198123e-06, | |
| "loss": 0.0005, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.9635036496350367, | |
| "grad_norm": 0.29815611243247986, | |
| "learning_rate": 4.1222066879596344e-06, | |
| "loss": 0.0004, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.967883211678832, | |
| "grad_norm": 0.029003242030739784, | |
| "learning_rate": 4.1174887768339165e-06, | |
| "loss": 0.0002, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.9722627737226277, | |
| "grad_norm": 0.21172675490379333, | |
| "learning_rate": 4.112760936781783e-06, | |
| "loss": 0.0002, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.9766423357664233, | |
| "grad_norm": 0.27625802159309387, | |
| "learning_rate": 4.108023196824998e-06, | |
| "loss": 0.0038, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.981021897810219, | |
| "grad_norm": 0.26207876205444336, | |
| "learning_rate": 4.103275586046095e-06, | |
| "loss": 0.0002, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.9854014598540146, | |
| "grad_norm": 0.6478922367095947, | |
| "learning_rate": 4.098518133588198e-06, | |
| "loss": 0.0015, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.9897810218978103, | |
| "grad_norm": 0.10226385295391083, | |
| "learning_rate": 4.093750868654845e-06, | |
| "loss": 0.0005, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.994160583941606, | |
| "grad_norm": 0.18696191906929016, | |
| "learning_rate": 4.088973820509811e-06, | |
| "loss": 0.0004, | |
| "step": 456 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1368, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 6, | |
| "save_steps": 228, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1497677031420723e+18, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |